Viewing file: Psyco.py (34.26 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
# -*- coding: ISO-8859-1 -*- ######################################################################## # $Header: /var/local/cvsroot/4Suite/Ft/Rdf/Drivers/Psyco.py,v 1.12 2005/03/29 00:30:48 mbrown Exp $ """ A persistent RDF model driver using the PsycoPG adapter for PostgreSQL
See http://initd.org/projects/psycopg1
Copyright 2005 Fourthought, Inc. (USA). Detailed license and copyright information: http://4suite.org/COPYRIGHT Project home, documentation, distributions: http://4suite.org/ """
import re, codecs
from Ft.Rdf import RDF_MS_BASE, OBJECT_TYPE_UNKNOWN, OBJECT_TYPE_LITERAL, OBJECT_TYPE_RESOURCE from Ft.Rdf import Model from Ft.Rdf.Drivers import DataBaseExceptions, PROPERTIES
CREATE_DDL = """\ CREATE TABLE %(model)sUriHead ( ID integer primary key, value varchar(2000) unique not null );
CREATE TABLE %(model)sRdfResource ( ID integer primary key, UriHeadID integer references %(model)sUriHead(ID) not null, tail varchar(2000) not null );
CREATE UNIQUE INDEX %(model)sRdfResourceIdx ON %(model)sRdfResource (UriHeadId, tail);
CREATE TABLE %(model)sStatement ( id integer primary key, subjectID integer references %(model)sRdfResource(ID) not null, predicateID integer references %(model)sRdfResource(ID) not null, objectID integer not null, --0 for resource, 1 for (string) literal objectTypeCode integer NOT NULL, domainID integer references %(model)sRdfResource(ID) );
CREATE TABLE %(model)sStringLiteral ( id integer primary key, value varchar(2000) );
CREATE TABLE %(model)sNumericLiteral ( id integer primary key, value integer );
CREATE SEQUENCE %(model)sObjectSeq START 1; CREATE SEQUENCE %(model)sStatementSeq START 1; CREATE SEQUENCE %(model)sUriHeadSeq START 1;
CREATE UNIQUE INDEX %(model)sStringLiteralIDX ON %(model)sUriHead (value); CREATE UNIQUE INDEX %(model)sUriHeadValue ON %(model)sUriHead (value); --CREATE INDEX %(model)sStatementIDX ON %(model)sStatement (subjectId, predicateId, objectId, domainId);
CREATE TABLE %(model)sAclIdent ( ID integer primary key, name varchar(2000) not null );
CREATE TABLE %(model)sAclAssoc ( ACLIdentId integer references %(model)sAclIdent(ID), RdfResourceId integer references %(model)sRdfResource(ID) );
CREATE INDEX %(model)sAclIdentName ON %(model)sAclIdent (Name); COMMIT; VACUUM ANALYZE; BEGIN;
"""
DESTROY_DDL = """\ DROP TABLE %(model)sUriHead; DROP TABLE %(model)sRdfResource; DROP TABLE %(model)sStatement; DROP TABLE %(model)sStringLiteral; DROP TABLE %(model)sNumericLiteral; DROP TABLE %(model)sACLIdent; DROP TABLE %(model)sACLAssoc;
DROP SEQUENCE %(model)sObjectSeq; DROP SEQUENCE %(model)sStatementSeq; DROP SEQUENCE %(model)sUriHeadSeq;
DROP FUNCTION %(model)s_rlookup_func (int4); DROP FUNCTION %(model)s_getobj_func (int4, int4);
--Might not need to explicitly drop the indices --DROP INDEX %(model)sUriHeadValue; --DROP INDEX %(model)sRdfResourceIdx; --DROP INDEX %(model)sStatementIDX; --DROP INDEX %(model)sStringLiteralIDX; --DROP INDEX %(model)sACLIdentName; """
CREATE_FUNCS = """\ CREATE FUNCTION %(model)s_rlookup_func (int4) -- Params: resource ID RETURNS varchar(4000) AS 'SELECT CASE WHEN $1 = NULL THEN NULL ELSE UH.value || RES.tail END FROM %(model)sUriHead AS UH, %(model)sRdfResource AS RES WHERE RES.id = $1 AND RES.uriHeadId = UH.id;' LANGUAGE 'sql';
CREATE FUNCTION %(model)s_getobj_func (int4, int4) -- Params: object type, object ID -- The OR is probably bad for performance :-( RETURNS varchar(4000) AS 'SELECT CASE WHEN $1 = 0 AND R.id = $2 THEN %(model)s_rlookup_func($2) WHEN $1 = 1 AND STR.id = $2 THEN STR.value END FROM %(model)sStringLiteral AS STR, %(model)sRdfResource AS R WHERE STR.id = $2 OR R.id = $2;' LANGUAGE 'sql' """
UUID_PATTERN = re.compile(u'(?P<head>urn:uuid:)(?P<tail>.+)') HTTPPLUS_PATTERN = re.compile(u'(?P<head>(http|ftp)://.+[/#])(?P<tail>.+)') MAILTO_PATTERN = re.compile(u'(?P<head>mailto:)(?P<tail>.+)')
FLAG_DICT = {Model.NORMAL: '=', Model.REGEX: '~', Model.REGEX | Model.IGNORE_CASE: '~*'}
enc, dec, srdr, swtr = codecs.lookup('utf-8') dec_utf8 = lambda x: dec(x)[0]
def ProcessFlags(flags): """ flags is a dict. keys are subjectFlags, predicateFlags, objectFlags, statementUriFlags and domainFlags values are Model.NORMAL, Model.IGNORE_CASE and Model.REGEX """ ops = (FLAG_DICT[flags.get('subjectFlags', Model.NORMAL)], FLAG_DICT[flags.get('predicateFlags', Model.NORMAL)], FLAG_DICT[flags.get('objectFlags', Model.NORMAL)], FLAG_DICT[flags.get('statementFlags', Model.NORMAL)], FLAG_DICT[flags.get('domainFlags', Model.NORMAL)], ) return ops
def MapObjectType(ot): #FIXME: This mapping should really be stored in the DBMS, to minimize #breakage across 4Suite versions
#return as is, for now return ot
def SplitUri(uri): #FIXME: Soon, hopefully, we can expect unicode if not isinstance(uri, unicode): uri = unicode(str(uri), 'utf-8') #Note: this function does not have to recognize URI syntax/semantics #Since the results are just stitched back together #But it would be friendly if it did match = HTTPPLUS_PATTERN.match(uri) if not match: match = UUID_PATTERN.match(uri) if not match: match = MAILTO_PATTERN.match(uri) if not match: split = len(uri)/2 return (uri[:split], uri[split:]) return (match.group('head'), match.group('tail'))
def ConnectStringToDict(connStr): fields = connStr.split() dict = {} for field in fields: k, v = field.split('=') dict[k] = v return dict
def InitializeModule(): """ Post-import hook to initialize module's runtime variables that are not required at import time, but will be needed before the module-level functions are called. """ global psycopg import psycopg return
def GetDb(connStr, modelName='default'): return DbAdapter(connStr, modelName)
def CreateDb(connString, modelName='default'): if connString.find('=') == -1: connString = 'dbname=' + connString params = ConnectStringToDict(connString) connect = "dbname=template1" conn = psycopg.connect(connect) cur = conn.cursor() cur.execute("select datname from pg_database where datname = %s", [str(params['dbname'])]) res = cur.fetchone() conn.rollback() if not res: #Must rollback to avoid "ERROR: CREATE DATABASE: may not be called in a transaction block" cur.execute("ROLLBACK; CREATE DATABASE " + str(params['dbname'])) conn.close()
conn = psycopg.connect(connString) cur = conn.cursor() cur.execute(CREATE_DDL%{'model': modelName}) cur.execute(CREATE_FUNCS%{'model': modelName})
conn.commit() conn.close() return DbAdapter(connString)
def DestroyDb(connString, modelName='default'): if connString.find('=') == -1: connString = 'dbname=' + connString params = ConnectStringToDict(connString) conn = psycopg.connect(connString) cur = conn.cursor() cur.execute(DESTROY_DDL%{'model': modelName}) conn.commit() conn.close() return
def ExistsDb(connString, modelName='default'): if connString.find('=') == -1: connString = 'dbname=' + connString params = ConnectStringToDict(connString) connect = "dbname=template1" conn = psycopg.connect(connect) cur = conn.cursor() cur.execute("select datname from pg_database where datname = %s", (str(params['dbname']),)) res = cur.fetchone() conn.close() if not res: return 0
conn = psycopg.connect(connString) cur = conn.cursor() cur.execute("select tablename from pg_tables where tablename = %s", (str(modelName)+'rdfresource',)) res = cur.fetchone() conn.close() if not res: return 0 return 1
class DbAdapter: def __init__(self, connString, modelName='default', db=None): if connString.find('=') == -1: self.connString = 'dbname=' + connString else: self.connString = connString self.params = ConnectStringToDict(self.connString) self._model = str(modelName) self._urihT = str(modelName + 'UriHead') self._stmtT = str(modelName + 'Statement') self._resT = str(modelName + 'RdfResource') self._strT = str(modelName + 'StringLiteral') self._urihS = str(modelName + 'UriHeadSeq') self._stmtS = str(modelName + 'StatementSeq') self._objS = str(modelName + 'ObjectSeq') self.props = {PROPERTIES.OBJECT_TYPE_SUPPORTED: 1} return
def begin(self): self._db = psycopg.connect(self.connString) self._uriHeadCache = {} self._resourceCache = {} #self._stringLiteralCache = {} return
def commit(self): ##print self._uriHeadCache ##print self._resourceCache self._db.commit() self._db.close() self._db = None return
def rollback(self): #self._db.rollback() #Not needed: is the default self._db.close() self._db = None return
####### Legacy API ####### def add(self, statements): if not self._db: raise DataBaseExceptions.NoTransaction cur = self._db.cursor() for (subj, pred, obj, suri, domain, obj_type) in statements: if obj_type == OBJECT_TYPE_UNKNOWN: obj_type = OBJECT_TYPE_LITERAL subj = SplitUri(subj) subj_res = self._addResource(cur, subj[0], subj[1]) pred = SplitUri(pred) pred_res = self._addResource(cur, pred[0], pred[1]) #FIXME: for now, every object is a string literal if suri: suri = SplitUri(suri) if domain: domain = SplitUri(domain) domain_res = self._addResource(cur, domain[0], domain[1]) else: domain_res = None if obj_type == OBJECT_TYPE_RESOURCE: obj = SplitUri(obj) obj_res = self._addResource(cur, obj[0], obj[1]) #print "INSERT into %s VALUES (nextval(%s), %i, %i, %i, %i, '%s')"%(self._stmtT, self._stmtS, subj_res, pred_res, obj_res, obj_type, domain_res) cur.execute( ("INSERT into %s VALUES (nextval(%%s), %%i, %%i, %%i, %%i, %s)"%(self._stmtT, domain_res and "%i" or "%s")).encode('utf-8'), (self._stmtS, subj_res, pred_res, obj_res, obj_type, domain_res) ) else: #print "INSERT into %s VALUES (nextval(%s), '%s')"%(self._strT.encode('utf-8'), self._objS, obj.encode('utf-8')) cur.execute( ("INSERT into %s VALUES (nextval(%%s), %%s)"%(self._strT)).encode('utf-8'), (self._objS, obj.encode('utf-8')) ) #print "INSERT into %s VALUES (nextval(%s), %i, %i, currval(%s), %i, '%s')"%(self._stmtT.encode('utf-8'), self._stmtS, subj_res, pred_res, self._objS, obj_type, domain_res or 0) cur.execute( ("INSERT into %s VALUES (nextval(%%s), %%i, %%i, currval(%%s), %%i, %s)"%(self._stmtT, domain_res and "%i" or "%s")).encode('utf-8'), (self._stmtS, subj_res, pred_res, self._objS, obj_type, domain_res) ) return
def _addResource(self, cur, head, tail): #NOTE: A stored proc version of all this would be so much more efficient tres = self._resourceCache.get((head, tail)) if tres: return tres hres = self._uriHeadCache.get(head) if not hres: cur.execute("SELECT id from %s WHERE value = %%s"%self._urihT, (head.encode('utf-8'),)) hres = cur.fetchone() if hres: hres = hres[0] tres = None if hres: #Already have the URI head self._uriHeadCache[head] = hres cur.execute("SELECT id from %s WHERE UriHeadId = %%i AND tail = %%s"%self._resT, (hres, tail.encode('utf-8'))) tres = cur.fetchone() if tres: tres = tres[0] else: #Resource tail not yet added #print "INSERT into %s VALUES (nextval(%s), %i, '%s')"%(self._resT, self._objS, hres, tail.encode('utf-8')) cur.execute( "INSERT into %s VALUES (nextval(%%s), %%i, %%s)"%(self._resT), (self._objS, hres, tail.encode('utf-8')) ) cur.execute("SELECT id from %s WHERE UriHeadId = %%i AND tail = %%s"%self._resT, (hres, tail.encode('utf-8'))) tres = cur.fetchone()[0] else: #print "INSERT into %s VALUES (nextval(%s), %s)"%(self._urihT, self._urihS, head.encode('utf-8')) cur.execute( "INSERT into %s VALUES (nextval(%%s), %%s)"%self._urihT, (self._urihS, head.encode('utf-8')) ) #print "INSERT into %s VALUES (nextval(%s), currval(%s), '%s')"%(self._resT, self._objS, self._urihS, tail.encode('utf-8')) cur.execute( "INSERT into %s VALUES (nextval(%%s), currval(%%s), %%s)"%self._resT, (self._objS, self._urihS, tail.encode('utf-8')) ) cur.execute("SELECT id from %s WHERE UriHeadId = currval(%%s) AND tail = %%s"%self._resT, (self._urihS, tail.encode('utf-8')) ) tres = cur.fetchone()[0] self._resourceCache[(head, tail)] = tres return tres
def _stmtSelect(self, subject, predicate, object, statementUri, domain, flags, idsOnly=0): if not self._db: raise DataBaseExceptions.NoTransaction ops = ProcessFlags(flags) cur = self._db.cursor() operators = ProcessFlags(flags) if idsOnly: select = "SELECT S.id" else: select = """\ SELECT %(model)s_rlookup_func(S.SubjectID), %(model)s_rlookup_func(S.PredicateID), %(model)s_getobj_func(S.objectTypeCode, S.ObjectID), NULL, %(model)s_rlookup_func(S.DomainID), S.objectTypeCode"""%{'model': self._model} from_ = " FROM %s AS S, "%self._stmtT where = "" where_params = [] if subject: #select += "R_S.ID, R_S.UriHeadID, SU_S.ID, SU_S.value, R_S.tail, " from_ += "%s AS SU_S, %s AS R_S, "%(self._urihT, self._resT) if where: where += "AND " where += "S.subjectID = R_S.ID AND R_S.UriHeadID = SU_S.ID AND SU_S.value || R_S.tail %s %%s "%(ops[0]) where_params.extend([subject.encode('utf-8')]) if predicate: from_ += "%s AS SU_P, %s AS R_P, "%(self._urihT, self._resT) if where: where += "AND " where += "S.predicateID = R_P.ID AND R_P.uriHeadID = SU_P.ID AND SU_P.value || R_P.tail %s %%s "%(ops[1]) where_params.extend([predicate.encode('utf-8')]) if domain: from_ += "%s AS SU_D, %s AS R_D, "%(self._urihT, self._resT) if where: where += "AND " where += "S.subjectID = R_D.ID AND R_D.uriHeadID = SU_D.ID AND SU_D.value || R_D.tail %s %%s "%(ops[4]) where_params.extend([domain.encode('utf-8')]) if object: from1 = from_ + "%s AS SU_O, %s AS R_O"%(self._urihT, self._resT) from2 = from_ + "%s AS STR"%(self._strT,) if where: where += "AND " where2 = where + "S.objectTypeCode = 1 AND S.objectID = STR.ID AND STR.value %s %%s "%(ops[2]) where += "S.objectTypeCode = 0 AND S.objectID = R_O.ID AND R_O.UriHeadID = SU_O.ID AND SU_O.value || R_O.tail %s %%s "%(ops[2]) obj_where_params = [object.encode('utf-8')] query = select + from1 + (" WHERE " + where) #print query cur.execute(query, where_params + obj_where_params) #print "done" result = cur.fetchall() #print "results fetched" query = select + from2 + (" WHERE " + where2) #print query, obj_where_params cur.execute(query, where_params + obj_where_params) result += cur.fetchall() #print result return result query = select + from_[:-2] + (where and " WHERE " + where or "") qparams = where_params #print query, qparams cur.execute(query, qparams) result = cur.fetchall() #print result return result
def complete(self, subject, predicate, object, statementUri, domain, flags): result = self._stmtSelect(subject, predicate, object, statementUri, domain, flags, 0) return [ (dec_utf8(x[0]), dec_utf8(x[1]), dec_utf8(x[2]), dec_utf8(x[3]), dec_utf8(x[4]), x[5]) for x in (result or []) ]
def contains(self, subject, predicate, object, statementUri, domain, flags): return len(self.complete(subject, predicate, object, statementUri, domain, flags)) > 0
def remove(self, statements): for s in statements: self.removePattern(s[0], s[1], s[2], s[3], s[4], {}) return
def removePattern(self, subject, predicate, object, statementUri, domain, flags): ids = self._stmtSelect(subject, predicate, object, statementUri, domain, flags, 1) cur = self._db.cursor() if ids: for id_row in ids: #print ("DELETE FROM %s WHERE id = %%i"%self._stmtT, id_row[0]) cur.execute("DELETE FROM %s WHERE id = %%i"%self._stmtT, (id_row[0],)) ## if ids: ## ids = tuple([ i[0] for i in ids ]) ## print ids ## cur = self._db.cursor() ## cur.executemany("DELETE FROM %s WHERE id = %%i"%self._stmtT, (ids,)) return
def size(self, domain=None): cur = self._db.cursor() #cur.execute("SELECT COUNT(id) FROM %s WHERE domain='%(domain)s'" cur.execute("SELECT COUNT(id) FROM %s"%(self._stmtT)) return cur.fetchone()[0]
####### Versa API ####### def subjectsFromPredsAndObj(self, predicates, object, scope=None): #FIXME: support scope from Ft.Rdf.Parsers.Versa import DataTypes #FIXME: could overflow query length limits if not self._db: raise DataBaseExceptions.NoTransaction cur = self._db.cursor() select = "SELECT DISTINCT %(model)s_rlookup_func(S.SubjectID)"%{'model': self._model} from_ = " FROM %s AS S, "%self._stmtT where = "" where_params = [] if object is not None: obj_type_code = 1 if isinstance(object, DataTypes.Resource): obj_type_code = 0 object = unicode(str(object), 'utf-8') elif not isinstance(object, unicode): object = unicode(str(object), 'utf-8') if where: where += "AND " if obj_type_code == 0: from_ += "%s AS SU_O, %s AS R_O, "%(self._urihT, self._resT) where += "S.objectID = R_O.ID AND R_O.UriHeadID = SU_O.ID AND SU_O.value || R_O.tail = %s " else: from_ += "%s AS STR, "%(self._strT) where += "S.objectID = STR.ID AND STR.value = %s " where_params.extend([object.encode('utf-8')]) for i in range(len(predicates)): p = predicates[i] if not isinstance(p, unicode): p = unicode(str(p), 'utf-8') from_ += "%s AS SU_P%i, %s AS R_P%i, "%(self._urihT, i, self._resT, i) if where: where += "AND " where += "S.predicateID = R_P%i.ID AND R_P%i.uriHeadID = SU_P%i.ID AND SU_P%i.value || R_P%i.tail = %%s "%(i, i, i, i, i) where_params.extend([p.encode('utf-8')]) query = select + from_[:-2] + (where and " WHERE " + where or "") qparams = where_params #print query, qparams cur.execute(query, qparams) #print "done" return map(lambda x: dec_utf8(x[0]), cur.fetchall())
def subjectsFromPredAndObjs(self, predicate, objects, scope=None): #FIXME: support scope from Ft.Rdf.Parsers.Versa import DataTypes #FIXME: could overflow query length limits if not self._db: raise DataBaseExceptions.NoTransaction cur = self._db.cursor() select = "SELECT DISTINCT %(model)s_rlookup_func(S.SubjectID)"%{'model': self._model} from_ = " FROM %s AS S, "%(self._stmtT) where = "" where_params = [] if predicate is not None: from_ += "%s AS SU_P, %s AS R_P, "%(self._urihT, self._resT) if where: where += "AND " where += "S.predicateID = R_P.ID AND R_P.uriHeadID = SU_P.ID AND SU_P.value || R_P.tail = %s " where_params.extend([predicate.encode('utf-8')]) #print objects for i in range(len(objects)): o = objects[i] obj_type_code = 1 if isinstance(o, DataTypes.Resource): obj_type_code = 0 o = unicode(str(o), 'utf-8') elif not isinstance(o, unicode): o = unicode(str(o), 'utf-8') if where: where += "AND " if obj_type_code == 0: from_ += "%s AS SU_O%i, %s AS R_O%i, "%(self._urihT, i, self._resT, i) where += "S.objectID = R_O%i.ID AND R_O%i.UriHeadID = SU_O%i.ID AND SU_O%i.value || R_O%i.tail = %%s "%(i, i, i, i, i) else: from_ += "%s AS STR%i, "%(self._strT, i) where += "S.objectID = STR%i.ID AND STR%i.value = %%s "%(i, i) #where += "S.objectID = STR%i.ID AND STR%i.value = %%s AND S.objectTypeCode = 1 AND S1.SubjectID = R_O%i.ID AND R_O%i.UriHeadID = SU_O%i.ID AND SU_O%i.value = %%s AND R_O%i.tail = %%s AND S1.objectTypeCode = 0 "%(i, i, i, i, i, i, i) where_params.extend([o.encode('utf-8')]) #where += "S.objectID = STR%i.ID AND STR%i.value = %%s "%(i, i) #where_params.extend([o.encode('utf-8')]) query = select + from_[:-2] + (where and " WHERE " + where or "") qparams = where_params #print query, qparams cur.execute(query, qparams) #print "done" return map(lambda x: dec_utf8(x[0]), cur.fetchall())
def objectsFromSubAndPreds(self, subject, predicates, scope=None): #FIXME: support scope #FIXME: could overflow query length limits if not self._db: raise DataBaseExceptions.NoTransaction cur = self._db.cursor() if not isinstance(subject, unicode): subject = unicode(str(subject), 'utf-8') select = "SELECT DISTINCT %(model)s_getobj_func(S.objectTypeCode, S.ObjectID), S.objectTypeCode"%{'model': self._model} from_ = " FROM %s AS S, "%(self._stmtT) where = "" where_params = [] if subject is not None: from_ += "%s AS SU_S, %s AS R_S, "%(self._urihT, self._resT) if where: where += "AND " where += "S.subjectID = R_S.ID AND R_S.uriHeadID = SU_S.ID AND SU_S.value || R_S.tail = %s " where_params.extend([subject.encode('utf-8')]) for i in range(len(predicates)): p = predicates[i] if not isinstance(p, unicode): p = unicode(str(p), 'utf-8') from_ += "%s AS SU_P%i, %s AS R_P%i, "%(self._urihT, i, self._resT, i) if where: where += "AND " where += "S.predicateID = R_P%i.ID AND R_P%i.uriHeadID = SU_P%i.ID AND SU_P%i.value || R_P%i.tail = %%s "%(i, i, i, i, i) where_params.extend([p.encode('utf-8')]) query = select + from_[:-2] + (where and " WHERE " + where or "") qparams = where_params #print query, qparams cur.execute(query, qparams) #print "done" return map(lambda x: (dec_utf8(x[0]), x[1]), cur.fetchall())
def isResource(self, res): if not self._db: raise DataBaseExceptions.NoTransaction cur = self._db.cursor() if not isinstance(res, unicode): res = unicode(str(res), 'utf-8') query = "SELECT COUNT(R.id) FROM %s AS R, %s AS SU WHERE R.uriHeadID = SU.id AND SU.value || R.tail = %%s"%(self._resT, self._urihT) cur.execute(query, [res.encode('utf-8')]) return cur.fetchone()[0] > 0
def resources(self): if not self._db: raise DataBaseExceptions.NoTransaction cur = self._db.cursor() query = "SELECT SU.value, R.tail FROM %s AS R, %s AS SU WHERE R.uriHeadID = SU.id"%(self._resT, self._urihT) cur.execute(query) #return map(lambda x: x[0]+x[1], cur.fetchall()) return [ dec_utf8(s[0] + s[1]) for s in cur.fetchall() ]
############################################################################# # Documentation, examples and other miscellany #############################################################################
######### SQL EXAMPLES ########## ######### ADD TRIPLE ########## ## Add s='spam', p='eggs', o='ni' domain='monty' ## Just fudge and assume URIs are split first char, rest
##INSERT into UriHead VALUES (nextval('UriHeadSeq'), 's') ##INSERT into RdfResource VALUES (nextval('ObjectSeq'), currval('UriHeadSeq'), 'pam') ##INSERT into UriHead VALUES (nextval('UriHeadSeq'), 'e') ##INSERT into RdfResource VALUES (nextval('ObjectSeq'), currval('UriHeadSeq'), 'ggs') ##INSERT into UriHead VALUES (nextval('UriHeadSeq'), 'm') ##INSERT into RdfResource VALUES (nextval('ObjectSeq'), currval('UriHeadSeq'), 'onty') ##INSERT into StringLiteral VALUES (nextval('ObjectSeq'), 'ni') ##INSERT into Statement VALUES (nextval('StatementSeq'), currval('ObjectSeq')-3, currval('ObjectSeq')-2, currval('ObjectSeq')-1, 1, currval('ObjectSeq'))
######### COMPLETE TRIPLE ########## ## get just the matching subjects ##Note that in some cases, it would be more efficient not to do this join ##Mostly when one is using the same criteria over and over for query ##In which case, it's faster to just look up the corresponding resource ##Sequence numbers and plug them right in
##SELECT UriHead.ID, UriHead.value, ## RdfResource.ID, RdfResource.UriHeadID, RdfResource.tail, ## Statement.SubjectID, Statement.PredicateID, ## Statement.ObjectID, Statement.DomainID ##FROM UriHead, RdfResource, Statement ##WHERE Statement.SubjectID = RdfResource.ID ##AND RdfResource.UriHeadID = UriHead.ID ##AND UriHead.value = 'http://foo.com/' ##AND RdfResource.tail = 'bar'
##Here is a code snippet to select everything that matches certain subject and predicates(untested)
##SELECT S.SUBJECTID, RS.NAME, RS.SPLITURIID, ## S.PREDICATEID, RP.NAME, RP.SPLITURIID, ## S.OBJECTID, S.OBJECTTYPECODE ##FROM STATEMENT S, RESOURCE RS, RESOURCE RP ##WHERE S.SUBJECTID = ? AND S.PREDICATEID = ? AND S.SUBJECT = RS.ID ##AND S.PREDICATE = RP.ID
##From: Uche Ogbuji ##Date: 30 Jan 2002 20:31:44 -0700
##OK. I've heard nothing but praise for this, so I'm giving it a try.
##First a coupla links
##http://www.rons.net.cn/english/FSM/fog ##http://initd.org/Software/psycopg
##Here's what I'm doing
##First of all requires mxDateTime:
##Grab egenix-mx-base-2.0.3.zip from
##http://www.egenix.com/files/python/eGenix-mx-Extensions.html#Download-mxBASE
##(tgz seems to be corrupted)
##Unpack to ~/src, cd ~/src and ./setup.py install
##Then grab Psyco (psycopg-1.0.1.tar.gz)
##Unpack to ~/src and cd ~/src/psycopg-1.0.1
##$ ./configure ##--with-mxdatetime-includes=../egenix-mx-base-2.0.3/mx/DateTime/mxDateTime/ --with-postgres-includes=/usr/include/pgsql/ ##$ make ##$ make install
##Try her out:
##$ createdb psy
##Then in interactive Python:
##>>> import psycopg ##>>> connect = "dbname=psy" ##>>> conn = psycopg.connect(connect) ##>>> curs = conn.cursor() ##>>> curs.execute("CREATE TABLE stmt (s text, p text, o text)") ##>>> conn.commit() ##>>> curs.execute("INSERT INTO stmt VALUES ('urn:faux:uche', ##'urn:faux:fname', 'Uche Ogbuji')")
###This part is cool:
##>>> curs.executemany("INSERT INTO stmt VALUES (%s, %s, %s)", ##(('urn:faux:jeremy', 'urn:faux:fname', 'Jeremy Kloth'), ##('urn:faux:mike', 'urn:faux:fname', 'Mike Olson')))
##>>> conn.commit() ##>>> curs.execute("SELECT * FROM stmt") ##>>> row = curs.fetchone() ##>>> print repr(row) ##('urn:faux:uche', 'urn:faux:fname', 'Uche Ogbuji') ##>>> rows = curs.fetchall() ##>>> print [ repr(r) for r in rows ] ##["('urn:faux:jeremy', 'urn:faux:fname', 'Jeremy Kloth')", ##"('urn:faux:mike', 'urn:faux:fname', 'Mike Olson')"] ##>>> ##However, we might have a problem:
##>>> curs.execute((u"INSERT INTO stmt VALUES ('%s', '%s', '%s')"%(u'a', ##unicode('ê', "iso8859-1"), u'ê')).encode("utf-8")) ##>>> conn.commit() ##>>> rows = curs.fetchall() ##>>> curs.execute("SELECT * FROM stmt") ##>>> rows = curs.fetchall() ##>>> print [ repr(r) for r in rows ] ##["('urn:faux:uche', 'urn:faux:fname', 'Uche Ogbuji')", ##"('urn:faux:jeremy', 'urn:faux:fname', 'Jeremy Kloth')", ##"('urn:faux:mike', 'urn:faux:fname', 'Mike Olson')", "('a', ##'\\xc3\\xaa', '\\xc3\\xaa')"]
##I think we can get around this, though.
##Also see http://www.python.org/topics/database/DatabaseAPI-2.0.html
QUICK_TEST = """\ RDF_FILE = 'w3c1.rdf' from Ft.Rdf.Drivers import Psyco #Psyco.CreateDb('dbname=ftrdf_test', 'default') from Ft.Rdf import Model, Util model, db = Util.DeserializeFromUri(RDF_FILE, driver=Psyco, dbName='dbname=ftrdf_test', create=1) db.begin() model.complete(None, None, None) model.complete("http://www.dlib.org", None, None) model.complete(None, "http://purl.org/metadata/dublin_core#Title", None) model.complete(None, None, "World Wide Web Home Page") model.complete(None, None, "http://www.w3.org/1999/02/22-rdf-syntax-ns#Bag") model.complete("http://www.dlib.org", "http://purl.org/metadata/dublin_core#Title", None) model.complete("http://www.dlib.org", "http://purl.org/metadata/dublin_core#Title", "D-Lib Program - Research in Digital Libraries") model.complete(None, "http://purl.org/metadata/dublin_core#Title", "D-Lib Program - Research in Digital Libraries") model.complete("http://www.dlib.org", None, "D-Lib Program - Research in Digital Libraries") model.complete("http://www.dlib.org", "http://purl.org/metadata/dublin_core#Title", "foo")
model.complete(None, ".*purl.org.*", None, predicateFlags=Model.REGEX) model.complete(None, None, ".*ib.*", objectFlags=Model.REGEX) model.complete(".*lib.*", None, None, subjectFlags=Model.REGEX)
#Versa NSMAP = {"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "dc":"http://purl.org/metadata/dublin_core#"} r = Util.VersaQuery("all()", model, nsMap=NSMAP) print r print Util.VersaDataToXml(r) r = Util.VersaQuery("all() - dc:Title -> *", model, nsMap=NSMAP) print r print Util.VersaDataToXml(r) r = Util.VersaQuery("all() - rdf:type -> *", model, nsMap=NSMAP) print r print Util.VersaDataToXml(r) r = Util.VersaQuery("rdf:Bag <- rdf:type - *", model, nsMap=NSMAP) print r print Util.VersaDataToXml(r)
"""
##SQL Warning: spurious FROM source tables that are not narrowed down ##in the WHERE clause can cause weird duplicate rows in the result because of the cartesian product ##For instance, if there are 4 rows in the defaultUriHead table and ##15 in defaultRdfResource, the following query will return 60 (4*15) ##duplicates of each resultant row in the defaultStatement table:
##SELECT S.* ## FROM defaultStatement AS S, defaultStringLiteral AS STR, ## defaultUriHead AS SU_O, defaultRdfResource AS R_O ## WHERE S.objectTypeCode = 1 AND S.objectID = STR.ID AND ## STR.value = 'World Wide Web Home Page'
##Simple fix is to eliminate the spurious source tables:
##SELECT S.* ## FROM defaultStatement AS S, defaultStringLiteral AS STR ## WHERE S.objectTypeCode = 1 AND S.objectID = STR.ID AND ## STR.value = 'World Wide Web Home Page'
##NOTE complete() query plan:
##xmlserver=# explain SELECT system_rlookup_func(S.SubjectID),system_rlookup_func(S.PredicateID),system_getobj_func(S.objectTypeCode, S.ObjectID),NULL,system_rlookup_func(S.DomainID),S.objectTypeCode FROM systemStatement AS S, systemUriHead AS SU_S, systemRdfResource AS R_S, systemUriHead AS SU_P, systemRdfResource AS R_P WHERE S.subjectID = R_S.ID AND R_S.UriHeadID = SU_S.ID AND SU_S.value || R_S.tail = '/ftss/demos/images/cards' AND S.predicateID = R_P.ID AND R_P.uriHeadID = SU_P.ID AND SU_P.value || R_P.tail = 'http://schemas.4suite.org/4ss#type'; ##NOTICE: QUERY PLAN:
##Nested Loop (cost=126.51..199.96 rows=1 width=2108) ## -> Nested Loop (cost=126.51..175.75 rows=5 width=1588) ## -> Hash Join (cost=126.51..151.57 rows=5 width=1064) ## -> Seq Scan on systemstatement s (cost=0.00..20.00 rows=1000 width=20) ## -> Hash (cost=126.50..126.50 rows=5 width=1044) ## -> Merge Join (cost=0.00..126.50 rows=5 width=1044) ## -> Index Scan using systemurihead_pkey on systemurihead su_s (cost=0.00..52.00 rows=1000 width=520) ## -> Index Scan using systemrdfresourceidx on systemrdfresource r_s (cost=0.00..52.00 rows=1000 width=524) ## -> Index Scan using systemrdfresource_pkey on systemrdfresource r_p (cost=0.00..4.82 rows=1 width=524) ## -> Index Scan using systemurihead_pkey on systemurihead su_p (cost=0.00..4.82 rows=1 width=520)
##Then after VACUUM ANALYZE:
##Hash Join (cost=68.96..71.65 rows=1 width=122) ## -> Seq Scan on systemurihead su_p (cost=0.00..1.86 rows=86 width=24) ## -> Hash (cost=68.93..68.93 rows=11 width=98) ## -> Hash Join (cost=60.41..68.93 rows=11 width=98) ## -> Seq Scan on systemrdfresource r_p (cost=0.00..6.08 rows=308 width=27) ## -> Hash (cost=60.38..60.38 rows=11 width=71) ## -> Nested Loop (cost=2.08..60.38 rows=11 width=71) ## -> Hash Join (cost=2.08..15.09 rows=2 width=51) ## -> Seq Scan on systemrdfresource r_s (cost=0.00..6.08 rows=308 width=27) ## -> Hash (cost=1.86..1.86 rows=86 width=24) ## -> Seq Scan on systemurihead su_s (cost=0.00..1.86 rows=86 width=24) ## -> Index Scan using systemstatementidx on systemstatement s (cost=0.00..29.32 rows=7 width=20)
|