Viewing file: CoreFunctions.py (25.13 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
######################################################################## # $Header: /var/local/cvsroot/4Suite/Ft/Rdf/Parsers/Versa/CoreFunctions.py,v 1.27 2005/03/02 19:31:11 cogbuji Exp $ """ Core Versa functions
Copyright 2005 Fourthought, Inc. (USA). Detailed license and copyright information: http://4suite.org/COPYRIGHT Project home, documentation, distributions: http://4suite.org/ """
from Ft.Lib import boolean, number from Ft.Rdf import RDF_MS_BASE, RDF_SCHEMA_BASE from Ft.Rdf.Parsers.Versa import Constants from Ft.Rdf.Parsers.Versa import Compile from Ft.Rdf.Parsers.Versa import RDF_TYPE_PROP, RDF_SUBCLASSOF_PROP, RDF_SUBPROPOF_PROP
import DataTypes import ResourceExpressions, Traversal, Literals, Optimizer
_TYPE = RDF_MS_BASE + 'type' _SUBCLASSOF = RDF_SCHEMA_BASE + 'subClassOf'
def Current(con): """ Return the current value """ return con.current
# # List and set functions #
def List(con, *args): """ Create a new list """ return list(args[:])
def Set(con, *args): """ Create a new set. If the argument is a single list, it is converted to a similar set If the argument is one or more scalar value, it is converted to a set with those values Otherwise, it is an error, and you may have meant to use the flatten() function """ if isinstance(args[0], list): return DataTypes.Set(args[0]) else: return DataTypes.Set(list(args[:]))
def Length(con, obj): """ Return the length of a list """ return len(DataTypes.ToList(obj))
def Join(con, *args): """ Create a new list """ return reduce(lambda a, b: a + DataTypes.ToList(b), args)
def Item(con, l, num=1): """ Converts the first argument to a list L, and the second to a number N. Returns the Nth item in L. N defaults to 1. """ index = int(DataTypes.ToNumber(num)) try: return DataTypes.ToList(l)[index-1] except: return []
def Head(con, l, num=1): """ Converts the first argument to a list L, and the second to a number N. Returns a list consisting of the first N items in L. N defaults to 1. If N is negative, or exceeds the length of the list, the entire list is the result. """ return DataTypes.ToList(l)[:int(DataTypes.ToNumber(num))]
def Tail(con, l, num=1): """ Converts the first argument to a list L, and the second to a number N. Returns a list consisting of the last N items in L. N defaults to 1. If N is negative, or exceeds the length of the list, an empty list is the result. """ return DataTypes.ToList(l)[-int(DataTypes.ToNumber(num)):]
def Rest(con, l, num=1): """ Converts the first argument to a list L, and the second to a number N. Returns a list consisting of all items in L after position N. N defaults to 1. If N is negative, or exceeds the length of the list, an empty list is the result. The following expression returns the same list as L, regardless of the value of N: """ return DataTypes.ToList(l)[int(DataTypes.ToNumber(num)):]
#Instead of these forms, add a "flatten" function ##def List(con, *args): ## """ ## Create a new list ## """ ## finalValues = [] ## for arg in args: ## if DataTypes.IsList(arg): ## finalValues.extend(arg) ## elif DataTypes.IsSet(arg): ## finalValues.extend(arg._d.keys()) ## else: ## finalValues.append(arg) ## return finalValues
##def Set(con, *args): ## """ ## Create a new set ## """ ## finalValues = [] ## for arg in args: ## if DataTypes.IsList(arg): ## finalValues.extend(arg) ## elif DataTypes.IsSet(arg): ## finalValues.extend(arg._d.keys()) ## else: ## finalValues.append(arg) ## rt = DataTypes.Set(finalValues) ## return rt
def Distribute(con, sourceList, *exprs): """ Apply a list of expression to a list of values, returning a list of a list of the results. sourceList - a list of values exprs - the remaining arguments are all converted to strings and treated as Versa expressions which are applied to each item of sourceList """ sq_funcs = [ Optimizer.SubQueryFunction( Compile(DataTypes.ToString(e)), con ) for e in exprs ] sourceList = DataTypes.ToList(sourceList) res = [] orig = con.current for obj in sourceList: #FIXME: ShiftContext is used in case a sub-expression mutates the context, but we need to fix it so that subexpressions don't mutate the context: side-effect free, right? res.append([ f(ShiftContext(con, obj)) for f in sq_funcs ]) con.current = orig return res
def Map(con, expr, *argLists): """ Execute an expression using each item of a given list, in turn, as context expr - converted to strings and treated as Versa expressions argLists - remaining arguments are each iterated over in turn to form the argument Apply the query to each of the resources in the resource Expression. The first argument must be a list of resources The second is a list of queries. the results is a list of lists. The nested list will can the result of each of the queries. """ #Not implemented pending spec issues return
def Filter(con, sourceList, *filters): """ Apply a boolean expression to each of the resources The first argument must return a list of resources The second is a list of boolean-expressions. the return value is the list of filtered resources """ sq_funcs = [ Optimizer.SubQueryFunction( Compile(DataTypes.ToString(f)), con ) for f in filters ] sourceList = DataTypes.ToList(sourceList) res = [] orig = con.current for obj in sourceList: #FIXME: ShiftContext is used in case a sub-expression mutates the context, but we need to fix it so that subexpressions don't mutate the context: side-effect free, right? if len([ 1 for f in sq_funcs if DataTypes.ToBoolean(f(ShiftContext(con, obj))) ]) == \ len(sq_funcs): res.append(obj) con.current = orig return res
def Sort(con, sourceList=None, type_=Constants.SORT_STRING, order=Constants.SORT_ASCENDING): """ Sort the given set or list. If there are no arguments, the context is sorted using default params The first argument is converted to a list The second argument is the optional sort type The third argument is the optional sort order The results are the sorted list """ if sourceList is None: sourceList = DataTypes.ToList(con.current) else: sourceList = DataTypes.ToList(sourceList) #Decorate/Sort/Undecorate pattern #Decorate if DataTypes.Cmp(Constants.SORT_NUMBER, type_) == 0: sourceList = [ (i, DataTypes.ToNumber(i)) for i in sourceList ] else: sourceList = [ (i, DataTypes.ToString(i)) for i in sourceList ] #Sort sourceList.sort() if not DataTypes.Cmp(Constants.SORT_DESCENDING, order): sourceList.reverse() #Undecorate return [ value for i, value in sourceList ]
def SortQ(con, sourceList, expr, type_=Constants.SORT_STRING, order=Constants.SORT_ASCENDING): """ Dynamically sort using the given set or list. The first argument is converted to a list The second argument is the expression to be dynamically evaluated per list item The third argument is the optional sort type The fourth argument is the optional sort order The results are the sorted list """ if sourceList is None: sourceList = con.current else: sourceList = DataTypes.ToList(sourceList) expr = Compile(DataTypes.ToString(expr)) sort_func = lambda a, b: cmp(a[1], b[1]) key_func = Optimizer.SubQueryFunction(expr, con) if DataTypes.Cmp(Constants.SORT_NUMBER, type_) == 0: keyList = [ (i, DataTypes.ToNumber(key_func(ShiftContext(con, i)))) for i in sourceList ] else: keyList = [ (i, DataTypes.ToString(key_func(ShiftContext(con, i)))) for i in sourceList ] keyList.sort(sort_func) #Remember that the context value is corrupted at this point if DataTypes.Cmp(Constants.SORT_DESCENDING, order) == 0: keyList.reverse() return [ i[0] for i in keyList ]
def ShiftContext(con, new): #Not a Versa core function, but rather a utility routine #FIXME: Should probably have leading _ con.current = new return con
def ScopedSubquery(con, expr, scope): ''' Evaluate a subexpression within the limits of a given scope ''' #FIXME: should probably be in 4Suite-specific NS saved_scope = con.scope con.scope = str(scope) f = Optimizer.SubQueryFunction( Compile(DataTypes.ToString(expr)), con ) con.scope = saved_scope result = f(con) return result
def All(con): """ Get all resources in the system returns a list of strings (resources) Note, if this was all()[] Then a FilterFunction is created """ return [ DataTypes.ToResource(r) for r in con.driver.resources(con.scope) ]
def Type(con, types): """ Get all resources of a specific type. This takes into account rdfs:subClassOf ala RDFS Entailment rule rdfs9 (http://www.w3.org/TR/rdf-mt/#RDFRules) 7.3 RDFS Entailment Rules
uuu rdfs:subClassOf xxx . vvv rdf:type xxx . --> vvv rdf:type uuu .
Other possible rdf:type entailments:
rdfs2 aaa rdfs:domain xxx . uuu aaa yyy . ---> uuu rdf:type xxx .
Versa solution (? - skolem variable): Infered-member |- (list(types) <- rdfs:domain - ?) -> ?
rdfs3 aaa rdfs:range xxx . uuu aaa vvv . --> vvv rdf:type xxx .
Versa solution (? - skolem variable) ? - (list(types) <- rdfs:range - ?) -> Infered-member
Where infered-member is a resource returned amongst the list resulting from this evaluateion of type() """ #types = DataTypes.ToList(DataTypes.ToSet(types)) types = DataTypes.ToList(types) if not types: return DataTypes.Set() full_types = types[:] sc_info = ResourceExpressions.GetRelations(None, RDF_SUBCLASSOF_PROP, con, 1) for t in types: full_types.extend(sc_info.get(t, [])) #FIXME: The following line can be a performance pig type_info = ResourceExpressions.GetRelations(full_types, RDF_TYPE_PROP, con, 1) results = [] for k in type_info.keys(): results.extend(type_info[k]) results = DataTypes.Set([ DataTypes.ToResource(r) for r in results ]) return results
def Intersection(con, *args): """ Set intersection """ set_args = [ DataTypes.ToSet(e) for e in args ] res = DataTypes.Set() #FIXME: optimize by using the shortest for i in set_args[0]._d.keys(): for a in set_args: if i in a._d.keys(): res.append(i) return res
def Union(con, *args): """ Set union """ set_args = [ DataTypes.ToSet(e) for e in args ] res = DataTypes.Set() #FIXME: optimize by using the shortest for a in set_args: for i in a._d.keys(): res.append(i) return res
def Difference(con, arg1, arg2): """ Set difference """ set1 = DataTypes.ToSet(arg1) set2 = DataTypes.ToSet(arg2) res = DataTypes.Set() #FIXME: optimize by using the shortest for i in set1._d.keys(): if i not in set2._d.keys(): res.append(i) return res
# # Resource functions #
def Traverse(con, subjs, preds, dirn=Constants.TRAVERSE_FWD, trans=Constants.TRAVERSE_SIMPLE): preds = DataTypes.ToList(DataTypes.ToSet(preds)) start_nodes = DataTypes.ToList(DataTypes.ToSet(subjs)) restrict_start = trans != Constants.TRAVERSE_TRANS and start_nodes or None relations = [ ResourceExpressions.GetRelations( restrict_start, p, con, (dirn == Constants.TRAVERSE_INV), forceTransitive=(trans == Constants.TRAVERSE_TRANS) ) for p in preds ] handled_starting_points = []
def extract(start_with): #Extract from the raw set of relation results #A set of just the nodes along the traversal chain result = DataTypes.Set() for n in start_with: if n in handled_starting_points: #We could see a node in the start list multiple times #In case of circular relationships #Break the chain to prevent infinite recursion continue #FIXME: Outer loop over the smaller of result and r.keys() for r in relations: if n in r: for obj in r[n]: result.append(obj) handled_starting_points.extend(start_with) if result._d: plus = extract(result._d.keys()) result._d.update(plus._d) return result return extract(start_nodes)
def Order(con, subjs, pred, dirn=Constants.TRAVERSE_FWD): pred = DataTypes.ToResource(pred) relation = ResourceExpressions.GetRelations( None, pred, con, (dirn == Constants.TRAVERSE_INV), forceTransitive=0 ) results = [] start_nodes = DataTypes.ToList(DataTypes.ToSet(subjs)) for n in start_nodes: subresult = [] _depthFirst(n, relation, subresult, {}) results.append(subresult) return results
def _depthFirst(curr, relation, subresult, seen): #subresult and seen are mutated in place subresult.append(curr) if curr in seen: #To prevent infinite loops return seen[curr] = None if curr in relation: arcs = relation[curr] #arcs = DataTypes.ToSet(relation[curr]) for next in arcs: _depthFirst(next, relation, subresult, seen) return
def Triples(con, matchSubj, matchPred, matchObj): """ Return a list of triples matching the given criteria """ raise Exception("Not implemented")
# # Conversions #
def Resource(con, obj): r = DataTypes.ToResource(obj) #FIXME: Is this still needed? if con.driver.isResource(str(r)): return r return r
def String(con, obj): s = DataTypes.ToString(obj) return s
def Number(con, obj): s = DataTypes.ToNumber(obj) return s
def Boolean(con, obj): b = DataTypes.ToBoolean(obj) return b
#def List(con, obj): # s = DataTypes.ToList(obj) # return s
#def Set(con, obj): # s = DataTypes.ToSet(obj) # return s
def Current(con): """ Return the current value, if there is one """ con.current
# # Boolean functions #
def And(con, *args): """ Logical conjunction """ computed = [ arg for arg in args if DataTypes.ToBoolean(arg) ] return len(computed) == len(args) and boolean.true or boolean.false
def Or(con, *args): """ Logical disjunction """ computed = [ arg for arg in args if DataTypes.ToBoolean(arg) ] return computed and boolean.true or boolean.false
def Not(con, arg): """ Logical negation """ if DataTypes.ToBoolean(arg): return boolean.false else: return boolean.true
def IsResource(con, arg): """ Return true if the item is of type resource, or can be successfully converted to resource """ if DataTypes.IsList(arg): arg = arg[0] if DataTypes.IsSet(arg): arg = arg._d[0] if DataTypes.IsResource(arg): return boolean.true if DataTypes.IsString(arg): return con.driver.isResource(arg) and boolean.true or boolean.false return boolean.false
def Contains(con, arg1, arg2=None): """ contains(outer, inner) Return true if inner is contained in outer. """ if arg2 is None: inner = arg1 outer = con.current else: outer = arg1 inner = arg2 if DataTypes.IsString(outer): inner = DataTypes.ToString(inner) return outer.find(inner) != -1 and boolean.true or boolean.false if DataTypes.IsResource(outer): outer = DataTypes.ToString(outer) inner = DataTypes.ToString(inner) return outer.find(inner) != -1 and boolean.true or boolean.false return boolean.false
def ContainsCi(con, arg1, arg2=None): """ contains(outer, inner) Return true if inner is contained in outer. Case insensitive variation """ if arg2 is None: inner = arg1 outer = con.current else: outer = arg1 inner = arg2 if DataTypes.IsString(outer): inner = DataTypes.ToString(inner).lower() return outer.lower().find(inner) != -1 and boolean.true or boolean.false if DataTypes.IsResource(outer): outer = DataTypes.ToString(outer).lower() inner = DataTypes.ToString(inner).lower() return outer.find(inner) != -1 and boolean.true or boolean.false return boolean.false
def StartsWith(con, arg1, arg2=None): """ starts-with(outer, inner) Return true if outer starts with inner. """ if arg2 is None: inner = arg1 outer = con.current else: outer = arg1 inner = arg2 if DataTypes.IsString(outer): inner = DataTypes.ToString(inner) return outer.find(inner) == 0 and boolean.true or boolean.false if DataTypes.IsResource(outer): outer = DataTypes.ToString(outer) inner = DataTypes.ToString(inner) return outer.find(inner) == 0 and boolean.true or boolean.false return boolean.false
def Member(con, arg1, arg2=None): """ member(outer, inner) Return true if inner is a member of outer. """ if arg2 is None: inner = arg1 outer = con.current else: outer = arg1 inner = arg2 if DataTypes.IsList(outer): return [ i for i in outer if DataTypes.Cmp(i, inner) == 0 ] and boolean.true or boolean.false if DataTypes.IsSet(outer): return outer._d.has_key(inner) and boolean.true or boolean.false return boolean.false
def IsType(con, obj, type_): """ If obj resolves to a list, return true if any member is of the types If obj is not a list, return true if the item is of one of the specifed types """ res = DataTypes.ToList(obj) #Turn the type into a list of possible types (taking into account SubClass #FIXME: also take into account sameClassAs/samePropertyAs t = DataTypes.ToList(type_) found = DataTypes.Set() for i in t: found.append(i) _expandSubTypes(con, i, found)
for cur in res: for cur_t in found._d: if con.driver.complete(cur, _TYPE, cur_t, None, con.scope, {}): return boolean.true return boolean.false
def _expandSubTypes(con, baseType, found): """ Find any sub classes of this type s = None p = rdfs:subClassOf o = baseType """ subTypes = con.driver.subjectsFromPredAndObjs(_SUBCLASSOF, [baseType], con.scope) for s in subTypes: if not s in found._d: found.append(s) #See if it has any sub types _expandSubTypes(con, s, found)
def IsLiteral(con, obj): """ If computed argument is a list or set, convert to list and return true if the first member is a literal, otherwise return true if arg is a literal """ if DataTypes.IsList(obj): obj = obj[0] if DataTypes.IsSet(obj): obj = obj._d[0] if DataTypes.IsResource(obj): return boolean.false if DataTypes.IsString(obj): return not con.driver.isResource(obj) and boolean.true or boolean.false return boolean.true
# # Relational functions #
# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 # This does slow down these functions, but to an extent that is drowned out by the general slowness of function dispatch in Versa class curry: def __init__(self, fun, *args, **kwargs): self.fun = fun self.pending = args[:] self.kwargs = kwargs.copy()
def __call__(self, *args, **kwargs): if kwargs and self.kwargs: kw = self.kwargs.copy() kw.update(kwargs) else: kw = kwargs or self.kwargs
return self.fun(*(self.pending + args), **kw)
def RelFunc(con, left, right=None, cmp_results=None): #return result of comparison by data types ordering #cmp_results is a list of comparison results (0, 1 or 2) that #determines what sort of comparison to undertake #for example [1] would check if left is bigger than right and [0] whether they are equal. if right is None: right = left left = con.current return DataTypes.Cmp(left, right) in cmp_results and boolean.true or boolean.false
Gt = curry(RelFunc, cmp_results=[1]) Lt = curry(RelFunc, cmp_results=[-1]) Gte = curry(RelFunc, cmp_results=[0, 1]) Lte = curry(RelFunc, cmp_results=[0, -1]) Eq = curry(RelFunc, cmp_results=[0])
def Evaluate(con, expr): """ Dynamically evaluate the given string """ expr = Compile(DataTypes.ToString(expr)) return expr.evaluate(con)
def Properties(con, res=None): """ Note: This could take advantage of RDF/RDFS/OWL entailment rules to *bypass* doing a brute force search on the model or to augment the result with other properties infered via rdfs:subPropertyOf and rdfs7
http://www.w3.org/TR/rdf-mt/#RDFRules [7.3 RDFS Entailment Rules] Transitivity of rdfs:subPropertyOf (rdfs5) could be used to return additional properties related to those returned
Assuming res has a defining OWL ontology, the rdfs:domain relationships stated in that ontology could be used to deduce properties of res. So
scoped-subquery('res - rdfs:domain -> infered-property',<ontology>) where <ontology> is a defining model for res and other related resources
In addition, under extensional semantic conditions (http://www.w3.org/TR/2004/REC-rdf-mt-20040210/#RDFSExtRules) additional rdfs:domain relationships can be deduced from within this defining ontology: ext1, ext3,
""" if res is None: res = con.current stmts = con.driver.complete( str(DataTypes.ToResource(res)), None, None, None, con.scope, {} ) return DataTypes.Set([ DataTypes.ToResource(s[1]) for s in stmts ])
# # String functions #
def Concat(con, *args): """ Concatenate a list of strings """ return ''.join([ DataTypes.ToString(s) for s in args ])
def StringLength(con, obj): """ Return the length of a string """ return len(DataTypes.ToString(obj))
def SubstringBefore(con, arg1, arg2=None): if arg2 is None: inner = arg1 outer = con.current else: outer = arg1 inner = arg2 index = outer.find(inner) if index == -1: return "" return outer[:index]
def SubstringAfter(con, arg1, arg2=None): if arg2 is None: inner = arg1 outer = con.current else: outer = arg1 inner = arg2 index = outer.find(inner) if index == -1: return "" return outer[index+len(inner):]
def Scope(con,res): """ Returns the scope(s) in which the given typed subject(s) is/are defined. i.e. the scope of it's rdf:type statement (if one exists) """ #Normalize to a list of subjects if type(res) != type([]): if DataTypes.IsSet(res): res = DataTypes.ToList(res) elif not DataTypes.IsList(res): res = [res] scopes=[] for subj in res: #For each subject, collect the scope of it's rdf:type statement (if one exists) type_stmts = con.driver.complete( DataTypes.ToString(subj), _TYPE, None, None, None, {})
scopes.extend([stmt[4] for stmt in type_stmts])
#Return a set of scopes return DataTypes.Set(scopes)
FUNCTIONS = { (None, 'distribute'): Distribute, (None, 'traverse'): Traverse, (None, 'filter'): Filter, (None, 'all'): All, (None, 'sort'): Sort, (None, 'sortq'): SortQ, (None, 'type'): Type, (None, 'resource'): Resource, (None, 'string'): String, (None, 'number'): Number, (None, 'list'): List, (None, 'set'): Set, (None, 'join'): Join, (None, 'boolean'): Boolean, (None, 'and'): And, (None, 'or'): Or, (None, 'not'): Not, (None, 'is-resource'): IsResource, (None, 'contains'): Contains, (None, 'contains-ci'): ContainsCi, (None, 'starts-with'): StartsWith, (None, 'member'): Member, (None, 'is-type'): IsType, (None, 'is-literal'): IsLiteral, (None, 'gt'): Gt, (None, 'lt'): Lt, (None, 'eq'): Eq, (None, 'gte'): Gte, (None, 'lte'): Lte, (None, 'ge'): Gte, (None, 'le'): Lte, (None, 'evaluate'): Evaluate, (None, 'properties'): Properties, (None, 'current'): Current, (None, 'intersection'): Intersection, (None, 'union'): Union, (None, 'difference'): Difference, (None, 'substring-after'): SubstringAfter, (None, 'substring-before'): SubstringBefore, (None, 'concat'): Concat, (None, 'string-length'): StringLength, (None, 'length'): Length, (None, 'triples'): Triples, (None, 'head'): Head, (None, 'tail'): Tail, (None, 'rest'): Rest, (None, 'item'): Item, (None, 'order'): Order, (None, 'scoped-subquery'): ScopedSubquery, (None, 'scope'): Scope }
|