
Author: lgiessmann Date: Wed Sep 2 06:58:33 2009 New Revision: 128 Log: rdf-importer: added handling for the isidorus-types Topic, Name and Variant; currently importing isidorus:Association and isidorus:Role is missing Modified: trunk/src/constants.lisp trunk/src/unit_tests/rdf_importer_test.lisp trunk/src/xml/rdf/importer.lisp trunk/src/xml/rdf/rdf_tools.lisp Modified: trunk/src/constants.lisp ============================================================================== --- trunk/src/constants.lisp (original) +++ trunk/src/constants.lisp Wed Sep 2 06:58:33 2009 @@ -53,7 +53,14 @@ :*tm2rdf-associaiton-property* :*tm2rdf-subjectIdentifier-property* :*tm2rdf-itemIdentity-property* - :*tm2rdf-subjectLocator-property*)) + :*tm2rdf-subjectLocator-property* + :*tm2rdf-value-property* + :*tm2rdf-nametype-property* + :*tm2rdf-scope-property* + :*tm2rdf-varianttype-property* + :*tm2rdf-occurrencetype-property* + :*tm2rdf-roletype-property* + :*tm2rdf-associationtype-property*)) (in-package :constants) @@ -144,3 +151,17 @@ (defparameter *tm2rdf-subjectLocator-property* (concatenate 'string *tm2rdf-ns* "subjectLocator")) (defparameter *tm2rdf-itemIdentity-property* (concatenate 'string *tm2rdf-ns* "itemIdentity")) + +(defparameter *tm2rdf-value-property* (concatenate 'string *tm2rdf-ns* "value")) + +(defparameter *tm2rdf-nametype-property* (concatenate 'string *tm2rdf-ns* "nametype")) + +(defparameter *tm2rdf-scope-property* (concatenate 'string *tm2rdf-ns* "scope")) + +(defparameter *tm2rdf-varianttype-property* (concatenate 'string *tm2rdf-ns* "varianttype")) + +(defparameter *tm2rdf-occurrencetype-property* (concatenate 'string *tm2rdf-ns* "occurrencetype")) + +(defparameter *tm2rdf-roletype-property* (concatenate 'string *tm2rdf-ns* "roletype")) + +(defparameter *tm2rdf-associationtype-property* (concatenate 'string *tm2rdf-ns* "associationtype")) Modified: trunk/src/unit_tests/rdf_importer_test.lisp ============================================================================== --- trunk/src/unit_tests/rdf_importer_test.lisp (original) +++ trunk/src/unit_tests/rdf_importer_test.lisp Wed Sep 2 06:58:33 2009 @@ -21,6 +21,7 @@ *tm2rdf-ns* *xml-ns* *xml-string* + *xml-uri* *instance-psi* *type-psi* *type-instance-psi* @@ -69,7 +70,9 @@ :test-xml-base :test-get-type-psis :test-get-all-type-psis - :test-isidorus-type-p)) + :test-isidorus-type-p + :test-get-all-isidorus-nodes-by-id + :test-import-isidorus-name)) (declaim (optimize (debug 3) (speed 0) (safety 3) (space 0) (compilation-speed 0))) @@ -3256,6 +3259,227 @@ 'rdf-importer::occurrence))))))) +(test test-get-all-isidorus-nodes-by-id + "Tests the function get-all-isidorus-nodes-by-id." + (let ((doc-1 + (concatenate 'string "<rdf:RDF xmlns:rdf=\"" *rdf-ns* "\" " + "xmlns:sw=\"http://test/arcs/\">" + " <rdf:Description rdf:nodeID=\"node-id-1\"/>" + " <rdf:Description rdf:nodeID=\"node-id-2\"/>" + " <rdf:Description rdf:nodeID=\"node-id-1\">" + " <sw:arc rdf:nodeID=\"node-id-2\"/>" + " </rdf:Description>" + " <rdf:Description rdf:nodeID=\"node-id-3\">" + " <sw:arc rdf:nodeID=\"node-id-1\"/>" + " <sw:arc rdf:nodeID=\"node-id-4\"/>" + " </rdf:Description>" + " <sw:Node rdf:nodeID=\"node-id-4\" " + " xml:base=\"http://base/\">" + " <sw:arc>" + " <rdf:Description rdf:nodeID=\"node-id-1\" " + " xml:base=\"suffix\"/>" + " </sw:arc>" + " </sw:Node>" + "</rdf:RDF>"))) + (let ((root (elt (dom:child-nodes (cxml:parse doc-1 + (cxml-dom:make-dom-builder))) + 0)) + (description (concatenate 'string *rdf-ns* "Description")) + (sw-node "http://test/arcs/Node")) + (let ((node-id-1 (list + (list :elem (elt (rdf-importer::child-nodes-or-text + root) 0) + :xml-base nil) + (list :elem (elt (rdf-importer::child-nodes-or-text + root) 2) + :xml-base nil) + (list :elem (elt + (rdf-importer::child-nodes-or-text + (elt + (rdf-importer::child-nodes-or-text + (elt (rdf-importer::child-nodes-or-text + root) 4)) 0)) 0) + :xml-base "http://base/suffix"))) + (node-id-2 (elt (rdf-importer::child-nodes-or-text root) 1)) + (node-id-3 (elt (rdf-importer::child-nodes-or-text root) 3)) + (node-id-4 (elt (rdf-importer::child-nodes-or-text root) 4))) + (is (= (length (rdf-importer::child-nodes-or-text root)) 5)) + (is (= (length (rdf-importer::get-all-isidorus-nodes-by-id + "node-id-3" root nil)) 1)) + (is (eql (getf (first (rdf-importer::get-all-isidorus-nodes-by-id + "node-id-3" root nil)) :elem) + node-id-3)) + (is (= (length (rdf-importer::get-all-isidorus-nodes-by-id + "node-id-2" root nil)) 1)) + (is (eql (getf (first (rdf-importer::get-all-isidorus-nodes-by-id + "node-id-2" root description)) :elem) + node-id-2)) + (is (eql (getf (first (rdf-importer::get-all-isidorus-nodes-by-id + "node-id-4" root sw-node)) :elem) + node-id-4)) + (is (string= (getf (first (rdf-importer::get-all-isidorus-nodes-by-id + "node-id-4" root sw-node)) :xml-base) + "http://base/")) + (is (= (length (intersection + node-id-1 + (rdf-importer::get-all-isidorus-nodes-by-id + "node-id-1" root description) + :test #'(lambda(x y) + (and (eql (getf x :elem) (getf y :elem)) + (string= (getf x :xml-base) + (getf y :xml-base)))))) + (length node-id-1))))))) + + +(test test-import-isidorus-name + "Tests all functions that are responsible to import a resource + representing isidorus:Name." + (let ((revision-1 100) + (tm-id "http://test/tm-id") + (document-id "doc-id") + (db-dir "./data_base") + (doc-1 + (concatenate 'string "<rdf:RDF xmlns:rdf=\"" *rdf-ns* "\" " + " xmlns:sw=\"http://test/arcs/\"" + " xmlns:isi=\"" *tm2rdf-ns* "\">" + " <rdf:Description rdf:about=\"http://node-1\">" + " <isi:subjectIdentifier>http://topic-psi-1</isi:subjectIdentifier>" + " <isi:subjectLocator>http://topic-sl-1</isi:subjectLocator>" + " <isi:itemIdentity>http://topic-ii-1</isi:itemIdentity>" + " <sw:arc rdf:resource=\"http://resource-1\"/>" + " <isi:name>" + " <isi:Name>" + " <isi:itemIdentity>http://itemIdentity-1</isi:itemIdentity>" + " <isi:itemIdentity>http://itemIdentity-2</isi:itemIdentity>" + " <isi:scope rdf:resource=\"http://scope-1\"/>" + " <isi:scope rdf:resource=\"http://scope-2\"/>" + " <isi:value rdf:datatype=\"anyDatatype\">value-1</isi:value>" + " <isi:nametype rdf:resource=\"http://nametype-1\"/>" + " <isi:variant rdf:nodeID=\"variant-1\"/>" + " </isi:Name>" + " </isi:name>" + " <isi:name rdf:parseType=\"Resource\">" + " <rdf:type rdf:resource=\"" *tm2rdf-name-type-uri* "\"/>" + " <isi:itemIdentity>http://itemIdentity-4</isi:itemIdentity>" + " <isi:value rdf:datatype=\"anyDatatype\">value-3</isi:value>" + " <isi:nametype rdf:resource=\"http://nametype-2\"/>" + " <isi:variant rdf:parseType=\"Resource\">" + " <rdf:type>" + " <rdf:Description rdf:about=\"" *tm2rdf-variant-type-uri* "\"/>" + " </rdf:type>" + " <isi:value>value-4</isi:value>" + " <isi:scope>" + " <rdf:Description rdf:about=\"http://scope-3\"/>" + " </isi:scope>" + " </isi:variant>" + " </isi:name>" + " </rdf:Description>" + + " <rdf:Description rdf:nodeID=\"variant-1\">" + " <isi:scope rdf:resource=\"http://scope-3\"/>" + " <isi:value rdf:datatype=\"dt-2\">value-2</isi:value>" + " </rdf:Description>" + + " <rdf:Description rdf:nodeID=\"variant-1\">" + " <isi:itemIdentity rdf:datatype=\"" *xml-uri* "\">http://itemIdentity-3</isi:itemIdentity>" + " <rdf:type rdf:resource=\"" *tm2rdf-variant-type-uri* "\"/>" + " <isi:scope rdf:resource=\"http://scope-4\"/>" + " </rdf:Description>" + "</rdf:RDF>"))) + (let ((root (elt (dom:child-nodes (cxml:parse doc-1 + (cxml-dom:make-dom-builder))) + 0))) + (is (= (length (rdf-importer::child-nodes-or-text root)) 3)) + (rdf-init-db :db-dir db-dir :start-revision revision-1) + (rdf-importer::import-dom root revision-1 :tm-id tm-id + :document-id document-id) + (is (= (length (elephant:get-instances-by-class 'd:NameC)) 2)) + (is (= (length (elephant:get-instances-by-class 'd:VariantC)) 2)) + (is (= (length (elephant:get-instances-by-class 'd:TopicC)) 27)) + (is-false (find-if #'(lambda(x) + (not (d:psis x))) + (elephant:get-instances-by-class 'd:TopicC))) + (is-true (d:get-item-by-psi "http://node-1")) + (is-true (d:get-item-by-psi "http://topic-psi-1")) + (is-true (d:get-item-by-psi "http://resource-1")) + (is-true (d:get-item-by-psi "http://scope-1")) + (is-true (d:get-item-by-psi "http://scope-2")) + (is-true (d:get-item-by-psi "http://scope-3")) + (is-true (d:get-item-by-psi "http://scope-4")) + (is-true (d:get-item-by-psi "http://nametype-1")) + (is-true (d:get-item-by-psi "http://nametype-1")) + (is-true (d:get-item-by-psi "http://test/arcs/arc")) + (let ((top (d:get-item-by-psi "http://node-1")) + (nt-1 (d:get-item-by-psi "http://nametype-1")) + (nt-2 (d:get-item-by-psi "http://nametype-2")) + (scope-1 (d:get-item-by-psi "http://scope-1")) + (scope-2 (d:get-item-by-psi "http://scope-2")) + (scope-3 (d:get-item-by-psi "http://scope-3")) + (scope-4 (d:get-item-by-psi "http://scope-4"))) + (is (= (length (d:psis top)) 2)) + (is-true (find (elephant:get-instance-by-value 'd:PersistentIdC 'd:uri + "http://topic-psi-1") + (d:psis top))) + (is (= (length (d:item-identifiers top)) 1)) + (is (string= (d:uri (first (d:item-identifiers top))) + "http://topic-ii-1")) + (is (= (length (d:locators top)) 1)) + (is (string= (d:uri (first (d:locators top))) + "http://topic-sl-1")) + (is (= (length (d:names top)) 2)) + (let ((name-1 (find-if #'(lambda(x) + (eql (d:instance-of x) nt-1)) + (d:names top))) + (name-2 (find-if #'(lambda(x) + (eql (d:instance-of x) nt-2)) + (d:names top)))) + (is-true name-1) + (is-true name-2) + (is (= (length (d:item-identifiers name-1)) 2)) + (is (= (length + (intersection + (d:item-identifiers name-1) + (list (elephant:get-instance-by-value + 'd:ItemIdentifierC 'd:uri "http://itemIdentity-1") + (elephant:get-instance-by-value + 'd:ItemIdentifierC 'd:uri "http://itemIdentity-2")))) + 2)) + (is (= (length (d:item-identifiers name-2)) 1)) + (is (string= (d:uri (first (d:item-identifiers name-2))) + "http://itemIdentity-4")) + (is (= (length (d:themes name-1)) 2)) + (is (= (length (intersection (list scope-1 scope-2) + (d:themes name-1))) + 2)) + (is-false (d:themes name-2)) + (is (string= (d:charvalue name-1) "value-1")) + (is (string= (d:charvalue name-2) "value-3")) + (is (= (length (d:variants name-1)) 1)) + (is (= (length (d:variants name-2)) 1)) + (let ((variant-1 (first (d:variants name-1))) + (variant-2 (first (d:variants name-2)))) + (is (= (length (d:item-identifiers variant-1)) 1)) + (is (string= (d:uri (first (d:item-identifiers variant-1))) + "http://itemIdentity-3")) + (is-false (d:item-identifiers variant-2)) + (is (= (length (d:themes variant-1)) 4)) + (is (= (length (intersection (list scope-3 scope-4 + scope-1 scope-2) + (d:themes variant-1))) + 4)) + (is (= (length (d:themes variant-2)) 1)) + (is (eql scope-3 (first (d:themes variant-2)))) + (is (string= (d:charvalue variant-1) + "value-2")) + (is (string= (d:charvalue variant-2) + "value-4")) + (is (string= (d:datatype variant-1) + (concatenate 'string tm-id "/dt-2"))) + (is (string= (d:datatype variant-2) + *xml-string*)))))))) + + + (defun run-rdf-importer-tests() "Runs all defined tests." (when elephant:*store-controller* @@ -3281,4 +3505,6 @@ (it.bese.fiveam:run! 'test-xml-base) (it.bese.fiveam:run! 'test-get-type-psis) (it.bese.fiveam:run! 'test-get-all-type-psis) - (it.bese.fiveam:run! 'test-isidorus-type-p)) \ No newline at end of file + (it.bese.fiveam:run! 'test-isidorus-type-p) + (it.bese.fiveam:run! 'test-get-all-isidorus-nodes-by-id) + (it.bese.fiveam:run! 'test-import-isidorus-name)) \ No newline at end of file Modified: trunk/src/xml/rdf/importer.lisp ============================================================================== --- trunk/src/xml/rdf/importer.lisp (original) +++ trunk/src/xml/rdf/importer.lisp Wed Sep 2 06:58:33 2009 @@ -84,6 +84,7 @@ (let ((children (child-nodes-or-text rdf-dom :trim t))) (when children (loop for child across children + when (non-isidorus-type-p child tm-id :parent-xml-base xml-base) do (import-node child tm-id start-revision :document-id document-id :xml-base xml-base :xml-lang xml-lang)))) (import-node rdf-dom tm-id start-revision :document-id document-id @@ -96,31 +97,37 @@ (format t ">> import-node: ~a <<~%" (dom:node-name elem)) ;TODO: remove (tm-id-p tm-id "import-node") (parse-node elem) - (let ((fn-xml-lang (get-xml-lang elem :old-lang xml-lang))) + (let ((fn-xml-lang (get-xml-lang elem :old-lang xml-lang)) + (fn-xml-base (get-xml-base elem :old-base xml-base))) (let ((about (get-absolute-attribute elem tm-id xml-base "about")) (nodeID (get-ns-attribute elem "nodeID")) (ID (get-absolute-attribute elem tm-id xml-base "ID")) (UUID (get-ns-attribute elem "UUID" :ns-uri *rdf2tm-ns*))) (parse-properties-of-node elem (or about nodeID ID UUID)) - + ;TODO: create associaitons and roles (let ((literals (append (get-literals-of-node elem fn-xml-lang) (get-literals-of-node-content elem tm-id xml-base fn-xml-lang))) (associations (get-associations-of-node-content elem tm-id xml-base)) (types (get-types-of-node elem tm-id :parent-xml-base xml-base)) (super-classes - (get-super-classes-of-node-content elem tm-id xml-base))) - ;TODO: collect isidorus' subjectIdentifiers, itemIdentities, - ; subjectLocators, names and occurrences - ; add the collected constructs to the topic-stub - - ;TODO: collect associations and association roles and create the - ; corresponding constructs and stops the recusrion + (get-super-classes-of-node-content elem tm-id xml-base)) + (subject-identities (make-isidorus-identifiers + elem start-revision :what "subjectIdentifier")) + (item-identifiers (make-isidorus-identifiers elem start-revision)) + (subject-locators (make-isidorus-identifiers elem start-revision + :what "subjectLocator"))) (with-tm (start-revision document-id tm-id) (let ((this (make-topic-stub about ID nodeID UUID start-revision xml-importer::tm - :document-id document-id))) + :document-id document-id + :additional-subject-identifiers subject-identities + :item-identifiers item-identifiers + :subject-locators subject-locators))) + (make-isidorus-names elem this tm-id start-revision + :owner-xml-base fn-xml-base) + ;TODO: create topic occurrences (make-literals this literals tm-id start-revision :document-id document-id) (make-associations this associations xml-importer::tm @@ -136,6 +143,257 @@ this)))))) + +(defun make-isidorus-names (owner-elem owner-topic tm-id start-revision + &key (owner-xml-base nil) + (document-id *document-id*)) + "Creates all names of a resource node that are in a property isidorus:name + and have the type isidorus:Name." + (declare (dom:element owner-elem)) + (declare (string tm-id)) + (declare (TopicC owner-topic)) + (let ((content (child-nodes-or-text owner-elem :trim t)) + (root (elt (dom:child-nodes (dom:owner-document owner-elem)) 0))) + (when (and (not (stringp content)) + (> (length content) 0)) + (loop for property across content + when (isidorus-type-p property tm-id 'name + :parent-xml-base owner-xml-base) + collect + (let ((xml-base (get-xml-base property + :old-base owner-xml-base))) + (let ((nodes + (let ((nodeID (get-ns-attribute property "nodeID"))) + (if nodeID + (get-all-isidorus-nodes-by-id + nodeID root *tm2rdf-name-type-uri*) + (list (self-or-child-node + property *tm2rdf-name-type-uri* + :xml-base xml-base)))))) + (let ((item-identities + (remove-if #'null + (loop for node in nodes + append (make-isidorus-identifiers + (getf node :elem) start-revision)))) + (name-type (make-name-type nodes tm-id start-revision + :document-id document-id)) + (name-value (getf (make-value nodes tm-id) :value)) + (name-scopes (make-scopes nodes tm-id start-revision + :document-id document-id))) + ;(format t "ii: ~a~%type: ~a~%value: ~a~%scopes: ~a~%~%" + ; item-identities name-type name-value name-scopes) + (let ((this + (make-construct 'NameC + :start-revision start-revision + :topic owner-topic + :charvalue name-value + :instance-of name-type + :item-identifiers item-identities + :themes name-scopes))) + (make-isidorus-variants nodes this tm-id start-revision + :document-id document-id))))))))) + + +(defun make-isidorus-variants (name-nodes owner-name tm-id start-revision + &key (document-id *document-id*)) + "Creates name variants of the passed name-nodes." + (declare (NameC owner-name)) + (declare (string tm-id)) + (let ((root + (when name-nodes + (elt (dom:child-nodes + (dom:owner-document (getf (first name-nodes) :elem))) 0)))) + (remove-if + #'null + (loop for name-node in name-nodes + collect (let ((content (child-nodes-or-text (getf name-node :elem)))) + (when (and (not (stringp content)) + (> (length content) 0)) + (loop for property across content + when (isidorus-type-p + property tm-id 'variant + :parent-xml-base (getf name-node :xml-base)) + collect + (let ((nodes + (let ((nodeID + (get-ns-attribute property "nodeID"))) + (if nodeID + (get-all-isidorus-nodes-by-id + nodeID root *tm2rdf-name-type-uri*) + (list (self-or-child-node + property + *tm2rdf-variant-type-uri* + :xml-base + (get-xml-base + property + :old-base + (getf name-node :xml-base)))))))) + (let ((item-identities + (remove-if + #'null + (loop for node in nodes + append (make-isidorus-identifiers + (getf node :elem) start-revision)))) + (variant-scopes + (append + (make-scopes nodes tm-id start-revision + :document-id document-id) + (themes owner-name))) ;XTM 2.0: 4.12 + (value-and-type (make-value nodes tm-id))) + (make-construct 'VariantC + :start-revision start-revision + :item-identifiers item-identities + :themes variant-scopes + :charvalue + (getf value-and-type :value) + :datatype + (getf value-and-type :datatype) + :name owner-name)))))))))) + + +(defun self-or-child-node (property-node type-uri &key (xml-base)) + "Returns either the passed node or the child-node when it is + rdf:Description." + (declare (dom:element property-node)) + (let ((content (child-nodes-or-text property-node :trim t))) + (if (and (= (length content) 1) + (or (and (string= (dom:namespace-uri (elt content 0)) *rdf-ns*) + (string= (get-node-name (elt content 0)) "Description")) + (string= (concatenate-uri (dom:namespace-uri (elt content 0)) + (get-node-name (elt content 0))) + type-uri))) + (list :elem (elt content 0) + :xml-base (get-xml-base (elt content 0) :old-base xml-base)) + (list :elem property-node + :xml-base xml-base)))) + + +(defun make-scopes (node-list tm-id start-revision + &key (document-id *document-id*)) + "Creates for every found scope a corresponding topic stub." + (let ((properties + (remove-if + #'null + (loop for node in node-list + append (let ((content (child-nodes-or-text (getf node :elem) + :trim t))) + (loop for property across content + when (let ((prop-ns (dom:namespace-uri property)) + (prop-name (get-node-name property))) + (string= (concatenate-uri prop-ns prop-name) + *tm2rdf-scope-property*)) + collect (list :elem property + :xml-base (get-xml-base + property + :old-base + (getf node :xml-base))))))))) + (let ((scope-uris + (remove-if #'null + (map 'list #'(lambda(x) + (get-ref-of-property (getf x :elem) tm-id + (getf x :xml-base))) + properties)))) + (with-tm (start-revision document-id tm-id) + (map 'list #'(lambda(x) + (let ((topicid (getf x :topicid)) + (psi (getf x :psi))) + (make-topic-stub psi nil topicid nil start-revision + xml-importer::tm + :document-id document-id))) + scope-uris))))) + + +(defun make-value (node-list tm-id) + "Returns the literal value of a property of the type isidorus:value." + (let ((property + (loop for node in node-list + when (or (let ((content (child-nodes-or-text (getf node :elem) + :trim t))) + (loop for property across content + when (let ((prop-ns (dom:namespace-uri property)) + (prop-name (get-node-name property))) + (string= (concatenate-uri prop-ns prop-name) + *tm2rdf-value-property*)) + return property)) + (get-ns-attribute (getf node :elem) + "value" :ns-uri *tm2rdf-ns*)) + return (or (let ((content (child-nodes-or-text (getf node :elem) + :trim t))) + (loop for property across content + when (let ((prop-ns (dom:namespace-uri property)) + (prop-name (get-node-name property))) + (string= (concatenate-uri prop-ns prop-name) + *tm2rdf-value-property*)) + return property)) + (get-ns-attribute (getf node :elem) + "value" :ns-uri *tm2rdf-ns*))))) + (if property + (if (stringp property) + (list :value property :datatype *xml-string*) + (let ((prop-content (child-nodes-or-text property)) + (type (let ((dt + (get-datatype + property tm-id + (find-if #'(lambda(x) + (eql property (getf x :elem))) + node-list)))) + (if dt dt *xml-string*)))) + (cond + ((= (length prop-content) 0) + (list :value "" :datatype type)) + ((not (stringp prop-content)) ;must be an element + (let ((text-val "")) + (when (dom:child-nodes property) + (loop for content-node across + (dom:child-nodes property) + do (push-string + (node-to-string content-node) + text-val))) + (list :value text-val :datatype type))) + (t (list :value prop-content :datatype type))))) + (list :value "" :datatype *xml-string*)))) + + + +(defun make-name-type (node-list tm-id start-revision + &key (document-id *document-id*)) + "Creates a topic stub that is the type of the name represented by the + passed nodes." + (let ((property + (loop for node in node-list + when (let ((content (child-nodes-or-text (getf node :elem) + :trim t))) + (loop for property across content + when (let ((prop-ns (dom:namespace-uri property)) + (prop-name (get-node-name property))) + (string= (concatenate-uri prop-ns prop-name) + *tm2rdf-nametype-property*)) + return property)) + return (let ((content (child-nodes-or-text (getf node :elem) + :trim t))) + (loop for property across content + when (let ((prop-ns (dom:namespace-uri property)) + (prop-name (get-node-name property))) + (string= (concatenate-uri prop-ns prop-name) + *tm2rdf-nametype-property*)) + return (list + :elem property + :xml-base (get-xml-base property + :old-base + (getf + node + :xml-base)))))))) + (when property + (let ((type-uri (get-ref-of-property (getf property :elem) tm-id + (getf property :xml-base)))) + (unless type-uri + (error "From make-name-type(): type-uri is missing!")) + (with-tm (start-revision document-id tm-id) + (make-topic-stub (getf type-uri :psi) nil + (getf type-uri :topicid) nil start-revision + xml-importer::tm :document-id document-id)))))) + + (defun import-arc (elem tm-id start-revision &key (document-id *document-id*) (xml-base nil) (xml-lang nil)) @@ -144,7 +402,6 @@ (declare (dom:element elem)) (format t ">> import-arc: ~a <<~%" (dom:node-name elem)) ;TODO: remove (let ((fn-xml-lang (get-xml-lang elem :old-lang xml-lang)) - (fn-xml-base (get-xml-base elem :old-base xml-base)) (UUID (get-ns-attribute elem "UUID" :ns-uri *rdf2tm-ns*)) (parseType (get-ns-attribute elem "parseType")) (content (child-nodes-or-text elem :trim t))) @@ -159,42 +416,51 @@ (string/= parseType "Collection"))) (when UUID (parse-properties-of-node elem UUID) - (let ((this - (get-item-by-id UUID :xtm-id document-id - :revision start-revision))) - (let ((literals - (append (get-literals-of-property - elem fn-xml-lang) - (get-literals-of-node-content - elem tm-id xml-base fn-xml-lang))) - (associations - (get-associations-of-node-content - elem tm-id xml-base)) - (types - (remove-if - #'null - (append - (get-types-of-node-content elem tm-id fn-xml-base) - (when (get-ns-attribute elem "type") - (list :ID nil - :topicid (get-ns-attribute elem "type") - :psi (get-ns-attribute elem "type")))))) - (super-classes - (get-super-classes-of-node-content - elem tm-id xml-base))) - ;TODO: collect isidorus' subjectIdentifiers, itemIdentities, - ; subjectLocators, names and occurrences - ; add the collected constructs to the topic-stub - (make-literals this literals tm-id start-revision - :document-id document-id) - (make-associations this associations xml-importer::tm - start-revision :document-id document-id) - (make-types this types xml-importer::tm start-revision - :document-id document-id) - (make-super-classes - this super-classes xml-importer::tm - start-revision :document-id document-id)) - this))))) + (let ((subject-identifiers + (make-isidorus-identifiers + elem start-revision :what "subjectIdentifier")) + (item-identities + (make-isidorus-identifiers elem start-revision)) + (subject-locators + (make-isidorus-identifiers elem start-revision + :what "subjectLocator"))) + (let ((this + (make-topic-stub + nil nil nil UUID start-revision xml-importer::tm + :additional-subject-identifiers + subject-identifiers + :item-identifiers item-identities + :subject-locators subject-locators + :document-id document-id))) + (let ((literals + (append (get-literals-of-property + elem fn-xml-lang) + (get-literals-of-node-content + elem tm-id xml-base fn-xml-lang))) + (associations + (get-associations-of-node-content + elem tm-id xml-base)) + (types (get-types-of-property + elem tm-id + :parent-xml-base xml-base)) + (super-classes + (get-super-classes-of-node-content + elem tm-id xml-base))) + (make-isidorus-names elem this tm-id start-revision + :owner-xml-base xml-base + :document-id document-id) + ;TDOD: create topic occurrences + (make-literals this literals tm-id start-revision + :document-id document-id) + (make-associations + this associations xml-importer::tm + start-revision :document-id document-id) + (make-types this types xml-importer::tm start-revision + :document-id document-id) + (make-super-classes + this super-classes xml-importer::tm + start-revision :document-id document-id)) + this)))))) (make-recursion-from-arc elem tm-id start-revision :document-id document-id :xml-base xml-base :xml-lang xml-lang) @@ -276,7 +542,7 @@ (map 'list #'(lambda(literal) (make-occurrence owner-top literal start-revision tm-id :document-id document-id)) - literals)) + (filter-isidorus-literals literals))) (defun make-associations (owner-top associations tm start-revision @@ -408,7 +674,9 @@ (defun make-topic-stub (about ID nodeId UUID start-revision - tm &key (document-id *document-id*)) + tm &key (document-id *document-id*) + (additional-subject-identifiers nil) + (item-identifiers nil) (subject-locators nil)) "Returns a topic corresponding to the passed parameters. When the searched topic does not exist there will be created one. If about or ID is set there will also be created a new PSI." @@ -429,15 +697,23 @@ inner-top)))) (if top top - (let ((psi (when psi-uri - (make-instance 'PersistentIdC - :uri psi-uri - :start-revision start-revision)))) + (let ((psis (if psi-uri + (remove-if + #'null + (append + (list + (make-instance 'PersistentIdC + :uri psi-uri + :start-revision start-revision)) + additional-subject-identifiers)) + additional-subject-identifiers))) (handler-case (add-to-topicmap tm (make-construct 'TopicC :topicid topic-id - :psis (when psi (list psi)) + :psis psis + :item-identifiers item-identifiers + :locators subject-locators :xtm-id document-id :start-revision start-revision)) (Condition (err)(error "Creating topic ~a failed: ~a" @@ -917,4 +1193,46 @@ collect (import-node item tm-id start-revision :document-id document-id :xml-base xml-base - :xml-lang xml-lang)))))))) \ No newline at end of file + :xml-lang xml-lang)))))))) + + +(defun make-isidorus-identifiers (owner-elem start-revision &key (what "itemIdentity")) + "Returns a list oc created identifier objects that can be + used directly in make-topic-stub." + (declare (dom:element owner-elem)) + (declare (string what)) + (when (and (string/= what "itemIdentity") + (string/= what "subjectIdentifier") + (string/= what "subjectLocator")) + (error "From make-identifiers(): what must be set to: ~a but is ~a" + (list "itemIdentity" "subjectIdentifiers" "subjectLocator") + what)) + (let ((content (child-nodes-or-text owner-elem :trim t)) + (class-symbol (cond + ((string= what "itemIdentity") + 'ItemIdentifierC) + ((string= what "subjectIdentifier") + 'PersistentIdC) + ((string= what "subjectLocator") + 'SubjectLocatorC)))) + (unless (stringp content) + (let ((identifiers + (loop for property across content + when (let ((prop-ns (dom:namespace-uri property)) + (prop-name (get-node-name property)) + (prop-content (child-nodes-or-text property :trim t))) + (and (string= prop-ns *tm2rdf-ns*) + (string= prop-name what) + (stringp prop-content) + (> (length prop-content) 0))) + collect (let ((uri (child-nodes-or-text property :trim t))) + (make-instance class-symbol + :uri uri + :start-revision start-revision)))) + (identifier-attr + (let ((attr (get-ns-attribute owner-elem what :ns-uri *tm2rdf-ns*))) + (when attr + (list (make-instance class-symbol + :uri attr + :start-revision start-revision)))))) + (remove-if #'null (append identifiers identifier-attr)))))) \ No newline at end of file Modified: trunk/src/xml/rdf/rdf_tools.lisp ============================================================================== --- trunk/src/xml/rdf/rdf_tools.lisp (original) +++ trunk/src/xml/rdf/rdf_tools.lisp Wed Sep 2 06:58:33 2009 @@ -45,7 +45,15 @@ *tm2rdf-association-property* *tm2rdf-subjectIdentifier-property* *tm2rdf-itemIdentity-property* - *tm2rdf-subjectLocator-property*) + *tm2rdf-subjectLocator-property* + *tm2rdf-ns* + *tm2rdf-value-property* + *tm2rdf-nametype-property* + *tm2rdf-scope-property* + *tm2rdf-varianttype-property* + *tm2rdf-occurrencetype-property* + *tm2rdf-roletype-property* + *tm2rdf-associationtype-property*) (:import-from :xml-constants *rdf_core_psis.xtm* *core_psis.xtm*) @@ -290,6 +298,29 @@ :psi (or ID about))))))) +(defun get-ref-of-property (property-elem tm-id xml-base) + "Returns a plist of the form (:topicid <string> :psi <string>). + That contains the property's value." + (declare (dom:element property-elem)) + (declare (string tm-id)) + (let ((nodeId (get-ns-attribute property-elem "nodeID")) + (resource (get-ns-attribute property-elem "resource")) + (content (let ((node-refs + (get-node-refs (child-nodes-or-text property-elem) + tm-id xml-base))) + (when node-refs + (first node-refs))))) + (cond + (nodeID + (list :topicid nodeID + :psi nil)) + (resource + (list :topicid resource + :psi resource)) + (content + content)))) + + (defun parse-property-name (property owner-identifier) "Parses the given property's name to the known rdf/rdfs nodes and arcs. If the given name es equal to an node an error is thrown otherwise @@ -501,6 +532,19 @@ (get-types-of-node-content elem tm-id xml-base))))) +(defun get-types-of-property (elem tm-id &key (parent-xml-base nil)) + "Returns a plist of all property's types of the form + (:topicid <string> :psi <string> :ID <string>)." + (let ((xml-base (get-xml-base elem :old-base parent-xml-base))) + (remove-if #'null + (append + (get-types-of-node-content elem tm-id xml-base) + (when (get-ns-attribute elem "type") + (list :ID nil + :topicid (get-ns-attribute elem "type") + :psi (get-ns-attribute elem "type"))))))) + + (defun get-type-psis (elem tm-id &key (parent-xml-base nil)) "Returns a list of type-uris of the passed node." @@ -617,6 +661,34 @@ (string= uri property-name-uri)))) +(defun non-isidorus-type-p (elem tm-id &key (parent-xml-base nil) + (ignore-topic nil)) + "Returns t if the passed element is not of an isidorus' type. + The environmental property is not analysed by this function!" + (declare (dom:element elem)) + (declare (string tm-id)) + (let ((nodeID (get-ns-attribute elem "nodeID")) + (document (dom:owner-document elem)) + (types + (let ((b-types + (list + *tm2rdf-name-type-uri* *tm2rdf-variant-type-uri* + *tm2rdf-occurrence-type-uri* *tm2rdf-association-type-uri* + *tm2rdf-role-type-uri*)) + (a-types (list *tm2rdf-topic-type-uri*))) + (if ignore-topic + b-types + (append a-types b-types))))) + (if nodeID + (not (loop for type in types + when (type-of-id-p nodeId type tm-id document) + return t)) + (not (loop for type in types + when (type-p elem type tm-id + :parent-xml-base parent-xml-base) + return t))))) + + (defun isidorus-type-p (property-elem-or-node-elem tm-id what &key(parent-xml-base nil)) "Returns t if the node elem is of the type isidorus:<Type> and is @@ -654,7 +726,16 @@ property-elem-or-node-elem) (get-node-name property-elem-or-node-elem)))) (if (or (string= type *tm2rdf-topic-type-uri*) - (string= type *tm2rdf-association-type-uri*)) + (string= type *tm2rdf-association-type-uri*) + (let ((parseType (get-ns-attribute property-elem-or-node-elem + "parseType"))) + (and parseType + (string= parseType "Resource"))) + (get-ns-attribute property-elem-or-node-elem "type") + (get-ns-attribute property-elem-or-node-elem "value" + :ns-uri *tm2rdf-ns*) + (get-ns-attribute property-elem-or-node-elem "itemIdentity" + :ns-uri *tm2rdf-ns*)) (type-p property-elem-or-node-elem type tm-id :parent-xml-base parent-xml-base) (when (string= elem-uri property) @@ -686,5 +767,85 @@ (string= x-uri *tm2rdf-role-property*) (string= x-uri *tm2rdf-subjectIdentifier-property*) (string= x-uri *tm2rdf-itemIdentity-property*) + (string= x-uri *tm2rdf-value-property*) + (string= x-uri *tm2rdf-scope-property*) + (string= x-uri *tm2rdf-nametype-property*) + (string= x-uri *tm2rdf-varianttype-property*) + (string= x-uri *tm2rdf-associationtype-property*) + (string= x-uri *tm2rdf-occurrencetype-property*) + (string= x-uri *tm2rdf-roletype-property*) (string= x-uri *tm2rdf-subjectLocator-property*)))) - content)))) \ No newline at end of file + content)))) + + +(defun get-all-isidorus-nodes-by-id (node-id current-node type-uri + &key (parent-xml-base nil) + (collected-nodes nil)) + "Returns a list of all nodes that own the given nodeID and are of + type type-uri, rdf:Description or when the rdf:parseType is set to + Resource or the isidorus:value attribute is set." + (declare (dom:element current-node)) + (declare (string node-id)) + (let ((datatype (when (get-ns-attribute current-node "datatype") + t)) + (parseType (let ((attr (get-ns-attribute current-node "parseType"))) + (when (and attr + (string= attr "Literal")) + t))) + (content (child-nodes-or-text current-node :trim t)) + (xml-base (get-xml-base current-node :old-base parent-xml-base)) + (nodeID (get-ns-attribute current-node "nodeID")) + (node-uri-p (let ((node-uri + (concatenate-uri (dom:namespace-uri current-node) + (get-node-name current-node))) + (description (concatenate 'string *rdf-ns* + "Description"))) + (or (string= node-uri (if type-uri type-uri "")) + (string= node-uri description) + (get-ns-attribute current-node "type") + (get-ns-attribute current-node "value" + :ns-uri *tm2rdf-ns*) + (get-ns-attribute current-node "itemIdentity" + :ns-uri *tm2rdf-ns*) + (let ((parseType (get-ns-attribute current-node + "parseType"))) + (when parseType + (string= parseType "Resource"))))))) + (remove-duplicates + (remove-if + #'null + (if (or datatype parseType (stringp content) (not content)) + (if (and (string= nodeID node-id) node-uri-p) + (append (list (list :elem current-node + :xml-base xml-base)) + collected-nodes) + collected-nodes) + (if (and (string= nodeID node-id) node-uri-p) + (loop for item across content + append (get-all-isidorus-nodes-by-id + node-id item type-uri + :collected-nodes (append + (list (list :elem current-node + :xml-base xml-base)) + collected-nodes) + :parent-xml-base xml-base)) + (loop for item across content + append (get-all-isidorus-nodes-by-id + node-id item type-uri + :collected-nodes collected-nodes + :parent-xml-base xml-base))))) + :test #'(lambda(x y) + (eql (getf x :elem) (getf y :elem)))))) + + +(defun filter-isidorus-literals (literals) + "Removes all literals that are known isidorus properties which + are able to contain literal data." + (remove-if #'(lambda(x) + (or (string= (getf x :type) + *tm2rdf-subjectIdentifier-property*) + (string= (getf x :type) + *tm2rdf-itemIdentity-property*) + (string= (getf x :type) + *tm2rdf-subjectLocator-property*))) + literals)) \ No newline at end of file