Author: lgiessmann Date: Wed Jul 29 10:53:52 2009 New Revision: 97
Log: added some basic functions and unit tests for the rdf-importer
Modified: trunk/docs/json.ebnf trunk/src/isidorus.asd trunk/src/unit_tests/rdf_importer_test.lisp trunk/src/xml/rdf/importer.lisp trunk/src/xml/rdf/rdf_tools.lisp trunk/src/xml/xtm/tools.lisp
Modified: trunk/docs/json.ebnf ============================================================================== --- trunk/docs/json.ebnf (original) +++ trunk/docs/json.ebnf Wed Jul 29 10:53:52 2009 @@ -41,14 +41,14 @@ Scope = ""scopes":" (DblList | Null) InstanceOf = ""instanceOfs":" (DblList | Null) Type = ""type":" List -ID = "\id":" string +ID = "\id":" String TopicRef = ""topicRef":" List
Variant = "{" ItemIdentity "," Scope "," RData "}" Variants = ""variants":" (("[" Variant+ "]") | Null)
Name = "{" ItemIdentity "," Type "," Scope "," Value "," Variants "}" -Names = ""names":" ("[" Name+ "]") | Null +Names = ""names":" (("[" Name+ "]") | Null)
Occurrence = "{" ItemIdentity "," Type "," Scope "," RData "}" Occurrences = ""occurrences":" (("[" Occurrence+ "]") | Null) @@ -60,7 +60,7 @@ Roles = ""roles":" (("[" Role+ "]") | Null)
Association = "{" ItemIdentity "," Type "," Scope "," Roles "}" -Associations ""associations":" (("[" Association "]") | Null) +Associations = ""associations":" (("[" Association "]") | Null)
TopicStub = "{" ID "," ItemIdentity "," SubjectLocator "," SubjectIdentifier "}" TopicStubs = ""topicStubs":" (("[" TopicStub+ "]") | Null)
Modified: trunk/src/isidorus.asd ============================================================================== --- trunk/src/isidorus.asd (original) +++ trunk/src/isidorus.asd Wed Jul 29 10:53:52 2009 @@ -134,7 +134,8 @@ (:file "json_test" :depends-on ("fixtures")) (:file "threading_test") - (:file "rdf_importer_test")) + (:file "rdf_importer_test" + :depends-on ("fixtures"))) :depends-on ("atom" "constants" "model"
Modified: trunk/src/unit_tests/rdf_importer_test.lisp ============================================================================== --- trunk/src/unit_tests/rdf_importer_test.lisp (original) +++ trunk/src/unit_tests/rdf_importer_test.lisp Wed Jul 29 10:53:52 2009 @@ -13,17 +13,19 @@ :xml-importer :datamodel :it.bese.FiveAM - :unittests-constants :fixtures) (:import-from :constants *rdf-ns* *rdfs-ns* - *rdf2tm-ns*) + *rdf2tm-ns* + *xml-ns* + *xml-string*) (:import-from :xml-tools xpath-child-elems-by-qname xpath-single-child-elem-by-qname xpath-select-location-path - get-ns-attribute) + get-ns-attribute + absolute-uri-p) (:export :test-get-literals-of-node :test-parse-node :run-rdf-importer-tests)) @@ -46,53 +48,97 @@ "xmlns:isi="http://isidorus/test#%5C" " "rdf:type="rdfType" rdf:ID="rdfID" rdf:nodeID="" "rdfNodeID" rdf:unknown="rdfUnknown" " - "isi:ID="isiID" isi:arc="isiArc"/>")) - (doc-2 - (concatenate 'string "<rdf:Description xmlns:rdf="" *rdf-ns* "" " - "xmlns:rdfs="" *rdfs-ns* "" " - "rdfs:subClassOf="rdfsSubClassOf" />"))) - (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder))) - (dom-2 (cxml:parse doc-2 (cxml-dom:make-dom-builder)))) + "isi:ID="isiID" isi:arc="isiArc" " + "isi:empty=""/>"))) + (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder)))) + (is (= (length (dom:child-nodes dom-1)) 1)) + (let ((literals (rdf-importer::get-literals-of-node + (elt (dom:child-nodes dom-1) 0) nil))) + (is-true literals) + (is (= (length literals) 4)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) "rdfUnknown") + (string= (getf x :type) + (concatenate 'string *rdf-ns* "unknown")) + (not (getf x :ID)))) + literals)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) "isiID") + (string= (getf x :type) + "http://isidorus/test#ID") + (not (getf x :ID)))) + literals)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) "isiArc") + (string= (getf x :type) + "http://isidorus/test#arc") + (not (getf x :ID)))) + literals)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) "") + (string= (getf x :type) + "http://isidorus/test#empty") + (not (getf x :ID)))) + literals)) + (map 'list #'(lambda(x) (is-false (getf x :lang))) + literals))) + + (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder)))) (is (= (length (dom:child-nodes dom-1)) 1)) - (is (= (length (dom:child-nodes dom-2)) 1)) + (dom:set-attribute-ns (elt (dom:child-nodes dom-1) 0) + *xml-ns* "lang" "de") (let ((literals (rdf-importer::get-literals-of-node - (elt (dom:child-nodes dom-1) 0)))) + (elt (dom:child-nodes dom-1) 0) "en"))) (is-true literals) - (is (= (length literals) 3)) + (is (= (length literals) 4)) (is-true (find-if #'(lambda(x) (and (string= (getf x :value) "rdfUnknown") (string= (getf x :type) - (concatenate 'string *rdf-ns* "unknown")))) + (concatenate 'string *rdf-ns* "unknown")) + (not (getf x :ID)))) literals)) (is-true (find-if #'(lambda(x) (and (string= (getf x :value) "isiID") (string= (getf x :type) - "http://isidorus/test#ID"))) + "http://isidorus/test#ID") + (not (getf x :ID)))) literals)) (is-true (find-if #'(lambda(x) (and (string= (getf x :value) "isiArc") (string= (getf x :type) - "http://isidorus/test#arc"))) - literals))) - (signals error (rdf-importer::get-literals-of-node - (elt (dom:child-nodes dom-2) 0)))))) + "http://isidorus/test#arc") + (not (getf x :ID)))) + literals)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) "") + (string= (getf x :type) + "http://isidorus/test#empty") + (not (getf x :ID)))) + literals)) + (map 'list #'(lambda(x) (is-true (string= (getf x :lang) "de"))) + literals)))))
(test test-parse-node "Tests the parse-node function." (let ((doc-1 - (concatenate 'string "<rdf:Description xmlns:rdf="" *rdf-ns* "" " + (concatenate 'string "<rdf:UnknownType xmlns:rdf="" *rdf-ns* "" " "xmlns:isi="" *rdf2tm-ns* "" " "xmlns:arcs="http://test/arcs/%5C" " "rdf:ID="rdfID" xml:base="xmlBase" " "arcs:arc="arcsArc">" "arcs:rel" - "<rdf:Element rdf:about="element"/>" + "<rdf:Description rdf:about="element"/>" "</arcs:rel>" - "</rdf:Description>"))) + "</rdf:UnknownType>"))) (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder)))) (is (length (dom:child-nodes dom-1)) 1) (let ((node (elt (dom:child-nodes dom-1) 0))) @@ -113,16 +159,400 @@ (dom:remove-attribute-ns node *rdf-ns* "nodeID") (is-true (rdf-importer::parse-node node)) (is-true (get-ns-attribute node "UUID" :ns-uri *rdf2tm-ns*)) + (dom:set-attribute-ns node *rdf-ns* "resource" "rdfResource") + (signals error (rdf-importer::parse-node node)) + (dom:set-attribute-ns node *rdf-ns* "resource" "") + (is-true (rdf-importer::parse-node node)) (dom:replace-child node (dom:create-text-node dom-1 "anyText") (xpath-single-child-elem-by-qname node "http://test/arcs/" "rel")) (signals error (rdf-importer::parse-node node))))))
+(test test-get-literals-of-property + "Tests the function get-literals-or-property." + (let ((doc-1 + (concatenate 'string "<prop:property xmlns:prop="http://props/%5C" " + "xmlns:rdf="" *rdf-ns* "" " + "xmlns:rdfs="" *rdfs-ns* "" " + "rdf:type="rdfType" rdf:resource="rdfResource" " + "rdf:nodeID="rdfNodeID" " + "prop:prop1="http://should/be/a/literal%5C" " + "prop:prop2="prop-2" " + "prop:prop3="">content-text</prop:property>"))) + (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder)))) + (is-true dom-1) + (is (= (length (dom:child-nodes dom-1)) 1)) + (let ((property (elt (dom:child-nodes dom-1) 0))) + (let ((literals (rdf-importer::get-literals-of-property property nil))) + (is (= (length literals) 3)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) + "http://should/be/a/literal") + (string= (getf x :type) "http://props/prop1") + (not (getf x :ID)))) + literals)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) "prop-2") + (string= (getf x :type) "http://props/prop2") + (not (getf x :ID)))) + literals)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) "") + (string= (getf x :type) "http://props/prop3") + (not (getf x :ID)))) + literals))))))) + + +(test test-parse-property + "Tests the function parse-property." + (let ((doc-1 + (concatenate 'string "<rdf:Description xmlns:rdf="" *rdf-ns* "" " + "xmlns:rdfs="" *rdfs-ns* "" " + "xmlns:prop="http://isidorus/props/%5C%22%3E" + "<prop:prop0 rdf:parseType="Resource" />" + "<prop:prop1 rdf:parseType="Resource">" + "<prop:prop1_0 rdf:resource="prop21" />" + "</prop:prop1>" + "<prop:prop2 rdf:parseType="Literal">" + "<content_root>content-text</content_root>" + "</prop:prop2>" + "<prop:prop3 rdf:parseType="Collection" />" + "<prop:prop4 rdf:parseType="Collection">" + "<prop:prop4_0 rdf:resource="prop5_1" />" + "<prop:prop4_1 rdf:nodeID="prop5_2" />" + "prop:prop4_2/" + "</prop:prop4>" + "<prop:prop5 />" + "prop:prop6prop6</prop:prop6>" + "<prop:prop7 rdf:nodeID="prop7"/>" + "<prop:prop8 rdf:resource="prop8" />" + "<prop:prop9 rdf:type="typeProp9"> </prop:prop9>" + "<prop:prop10 rdf:datatype="datatypeProp10" />" + "<prop:prop11 rdf:ID="IDProp11"> </prop:prop11>" + "<prop:prop12 rdf:ID="IDprop12" rdf:nodeID="prop12">" + " </prop:prop12>" + "<prop:prop13 />" + "prop:prop14prop14</prop:prop14>" + "<prop:prop15 rdf:nodeID="prop15"/>" + "<prop:prop16 rdf:resource="prop16" />" + "<prop:prop17 rdf:type="typeProp17"> </prop:prop17>" + "<prop:prop18 rdf:ID="IDprop18" rdf:nodeID="prop18">" + " </prop:prop18>" + "</rdf:Description>"))) + (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder)))) + (is-true dom-1) + (is (= (length (dom:child-nodes dom-1)) 1)) + (let ((child (elt (dom:child-nodes dom-1) 0))) + (let ((children (rdf-importer::child-nodes-or-text child)) + (text-node (dom:create-text-node dom-1 "new text node"))) + (is (= (length children) 19)) + (loop for property across children + do (is-true (rdf-importer::parse-property property))) + (dotimes (i (length children)) + (if (or (= i 0) (= i 1) (= i 3) (= i 4) (= i 9) (= i 17)) + (is-true (get-ns-attribute (elt children i) "UUID" + :ns-uri *rdf2tm-ns*)) + (is-false (get-ns-attribute (elt children i) "UUID" + :ns-uri *rdf2tm-ns*)))) + (let ((prop (elt children 0))) + (dom:set-attribute-ns prop *rdf-ns* "parseType" "Unknown") + (signals error (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "parseType" "Resource") + (is-true (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "ID" "newID") + (is-true (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "bad" "bad") + (signals error (rdf-importer::parse-property prop)) + (dom:remove-attribute-ns prop *rdf-ns* "bad") + (is-true (rdf-importer::parse-property prop)) + (dom:append-child prop text-node) + (signals error (rdf-importer::parse-property prop)) + (dom:remove-child prop text-node) + (is-true (rdf-importer::parse-property prop))) + (let ((prop (elt children 1))) + (dom:set-attribute-ns prop *rdf-ns* "nodeID" "bad") + (signals error (rdf-importer::parse-property prop)) + (dom:remove-attribute-ns prop *rdf-ns* "nodeID") + (is-true (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "ID" "newID") + (is-true (rdf-importer::parse-property prop)) + (dom:append-child prop text-node) + (signals error (rdf-importer::parse-property prop)) + (dom:remove-child prop text-node) + (is-true (rdf-importer::parse-property prop))) + (let ((prop (elt children 3))) + (dom:append-child prop text-node) + (signals error (rdf-importer::parse-property prop)) + (dom:remove-child prop text-node) + (is-true (rdf-importer::parse-property prop))) + (let ((prop (elt children 4))) + (dom:append-child prop text-node) + (signals error (rdf-importer::parse-property prop)) + (dom:remove-child prop text-node) + (is-true (rdf-importer::parse-property prop))) + (let ((prop (elt children 5))) + (dom:set-attribute-ns prop *rdf-ns* "type" "newType") + (is-true (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "unknown" "unknown") + (is-true (rdf-importer::parse-property prop)) + (dom:append-child prop text-node) + (signals error (rdf-importer::parse-property prop)) + (dom:remove-child prop text-node) + (is-true (rdf-importer::parse-property prop)) + (dom:remove-attribute-ns prop *rdf-ns* "unknown") + (is-true (rdf-importer::parse-property prop)) + (dom:append-child prop text-node) + (signals error (rdf-importer::parse-property prop)) + (dom:remove-child prop text-node) + (is-true (rdf-importer::parse-property prop))) + (let ((prop (elt children 10))) + (dom:set-attribute-ns prop *rdf-ns* "type" "newType") + (signals error (rdf-importer::parse-property prop)) + (dom:remove-attribute-ns prop *rdf-ns* "type") + (is-true (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "nodeID" "newNodeID") + (signals error (rdf-importer::parse-property prop)) + (dom:remove-attribute-ns prop *rdf-ns* "nodeID") + (is-true (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "resource" "newResource") + (signals error (rdf-importer::parse-property prop)) + (dom:remove-attribute-ns prop *rdf-ns* "resource") + (is-true (rdf-importer::parse-property prop)) + (dom:set-attribute-ns prop *rdf-ns* "ID" "newID") + (is-true (rdf-importer::parse-property prop)))))))) + + +(test test-get-types + "Tests the functions get-type-of-node-name, get-types-of-content, + get-node-rerfs, absolute-uri-p, absolutize-value and absolutize-id." + (let ((tm-id "http://test-tm") + (doc-1 + (concatenate 'string "<rdf:anyType xmlns:rdf="" *rdf-ns* "" " + "xmlns:isi="" *rdf2tm-ns* "" " + "xmlns:arcs="http://test/arcs/%5C" " + "xml:base="xml-base/first" " + "rdf:about="resource" rdf:type="attr-type">" + "<rdf:type rdf:ID="rdfID" " + "rdf:resource="content-type-1"/>" + "<rdf:type /><!-- blank_node -->" + "<rdf:type arcs:arc="literalArc"/>" + "<rdf:type rdf:parseType="Collection" " + " xml:base="http://xml-base/absolute/%5C%22%3E" + "<!-- blank_node that is a list -->" + "<rdf:Description rdf:about="c-about-type"/>" + "<rdf:Description rdf:ID="c-id-type"/>" + "<rdf:Description rdf:nodeID="c-nodeID-type"/>" + "rdf:Description/<!-- blank_node -->" + "</rdf:type>" + "<rdf:type rdf:ID="rdfID2">" + "<rdf:Description rdf:about="c-about-type-2"/>" + "</rdf:type>" + + "rdf:type" + "<rdf:Description rdf:nodeID="c-nodeID-type-2"/>" + "</rdf:type>" + "<rdf:type xml:base="http://new-base/%5C%22%3E" + "<rdf:Description rdf:ID="c-ID-type-2"/>" + "</rdf:type>" + "<rdf:type rdf:ID="rdfID3">" + "rdf:Description/" + "</rdf:type>" + + "<arcs:arc rdf:resource="anyArc"/>" + "rdf:arc" + "<rdf:Description rdf:about="anyResource"/>" + "</rdf:arc>" + "</rdf:anyType>"))) + (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder)))) + (is-true dom-1) + (is (= (length (dom:child-nodes dom-1)) 1)) + (is-true (absolute-uri-p tm-id)) + (is-false (absolute-uri-p "http//bad")) + (is-false (absolute-uri-p "")) + (is-false (absolute-uri-p " ")) + (is-false (absolute-uri-p nil)) + (let ((node (elt (dom:child-nodes dom-1) 0))) + (loop for property across (rdf-importer::child-nodes-or-text node) + do (rdf-importer::parse-property property)) + (let ((types + (append + (list (list + :value (rdf-importer::get-type-of-node-name node) + :ID nil)) + (rdf-importer::get-types-of-node-content node tm-id nil))) + (node-uuid (get-ns-attribute + (elt (rdf-importer::child-nodes-or-text + (elt (rdf-importer::child-nodes-or-text node) 7)) + 0) + "UUID" :ns-uri *rdf2tm-ns*))) + (is (= (length types) 10)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) + (concatenate + 'string *rdf-ns* "anyType")) + (not (getf x :ID)))) + types)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) + (concatenate + 'string tm-id + "/xml-base/first/attr-type")) + (not (getf x :ID)))) + types)) + (is-true (find-if #'(lambda(x) + (and (string= + (getf x :value) + (concatenate + 'string tm-id + "/xml-base/first/content-type-1")) + (string= (getf x :ID) + "rdfID"))) + types)) + (is-true (find-if #'(lambda(x) + (and (string= + (getf x :value) + (concatenate + 'string tm-id + "/xml-base/first/c-about-type-2")) + (string= (getf x :ID) + "rdfID2"))) + types)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) + "c-nodeID-type-2") + (not (getf x :ID)))) + types)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) + "http://new-base#c-ID-type-2") + (not (getf x :ID)))) + types)) + (is-true (find-if #'(lambda(x) + (and + (string= (getf x :value) node-uuid) + (string= (getf x :ID) + "rdfID3"))) + types)) + (is-true (= 10 (count-if #'(lambda(x) + (> (length (getf x :value)) 0)) + types))))))))
+(test test-get-literals-of-content + (let ((doc-1 + (concatenate 'string "<rdf:Description xmlns:rdf="" *rdf-ns* "" " + "xmlns:rdfs="" *rdfs-ns* "" " + "xmlns:prop="http://isidorus/props/%5C" " + "xml:base="base/first" xml:lang="de" >" + "prop:lit0text0</prop:lit0>" + "<prop:lit1 rdf:parseType="Literal">text1</prop:lit1>" + "<prop:lit2 xml:base="http://base/absolute%5C" " + "rdf:datatype="dType1">text2</prop:lit2>" + "<prop:arc rdf:parseType="Collection"/>" + "<prop:lit3 xml:lang="en" rdf:datatype="dType2">" + "<![CDATA[text3]]></prop:lit3>" + "<prop:lit4 rdf:datatype="dType2"><root><child/></root>" + " </prop:lit4>" + "<prop:lit5 rdf:ID="rdfID" " + "rdf:parseType="Literal"><root><child>" + "childText5</child> </root></prop:lit5>" + "<prop:lit6 xml:lang="" rdf:parseType="Literal">" + " <![CDATA[text6]]> abc " + "</prop:lit6>" + "</rdf:Description>"))) + (let ((dom-1 (cxml:parse doc-1 (cxml-dom:make-dom-builder))) + (tm-id "http://test-tm")) + (is-true dom-1) + (is (= (length (dom:child-nodes dom-1)) 1)) + (let ((node (elt (dom:child-nodes dom-1) 0))) + (dotimes (iter (length (dom:child-nodes node))) + (is-true (rdf-importer::parse-property + (elt (dom:child-nodes node) iter)))) + (let ((literals (rdf-importer::get-literals-of-node-content + node tm-id nil nil))) + (is (= (length literals) 7)) + (is-true (find-if + #'(lambda(x) + (and (string= (getf x :value) "text0") + (string= (getf x :type) + "http://isidorus/props/lit0") + (not (getf x :ID)) + (string= (getf x :lang) "de") + (string= (getf x :datatype) *xml-string*))) + literals)) + (is-true (find-if + #'(lambda(x) + (and (string= (getf x :value) "text1") + (string= (getf x :type) + "http://isidorus/props/lit1") + (not (getf x :ID)) + (string= (getf x :lang) "de") + (string= (getf x :datatype) *xml-string*))) + literals)) + (is-true (find-if + #'(lambda(x) + (and (string= (getf x :value) "text2") + (string= (getf x :type) + "http://isidorus/props/lit2") + (not (getf x :ID)) + (string= (getf x :lang) "de") + (string= (getf x :datatype) + "http://base/absolute/dType1"))) + literals)) + (is-true (find-if + #'(lambda(x) + (and (string= (getf x :value) "text3") + (string= (getf x :type) + "http://isidorus/props/lit3") + (not (getf x :ID)) + (string= (getf x :lang) "en") + (string= (getf x :datatype) + "http://test-tm/base/first/dType2"))) + literals)) + (is-true (find-if + #'(lambda(x) + (and (string= (getf x :value) + "<root><child></child></root> ") + (string= (getf x :type) + "http://isidorus/props/lit4") + (not (getf x :ID)) + (string= (getf x :lang) "de") + (string= (getf x :datatype) + "http://test-tm/base/first/dType2"))) + literals)) + (is-true (find-if + #'(lambda(x) + (and (string= (getf x :value) + "<root><child>childText5</child> </root>") + (string= (getf x :type) + "http://isidorus/props/lit5") + (string= (getf x :ID) "rdfID") + (string= (getf x :lang) "de") + (string= (getf x :datatype) *xml-string*))) + literals)) + (is-true (find-if + #'(lambda(x) + (and (string= (getf x :value) " text6 abc ") + (string= (getf x :type) + "http://isidorus/props/lit6") + (not (getf x :ID)) + (not (getf x :lang)) + (string= (getf x :datatype) *xml-string*))) + literals)))))))
(defun run-rdf-importer-tests() (it.bese.fiveam:run! 'test-get-literals-of-node) - (it.bese.fiveam:run! 'test-parse-node)) \ No newline at end of file + (it.bese.fiveam:run! 'test-parse-node) + (it.bese.fiveam:run! 'test-get-literals-of-property) + (it.bese.fiveam:run! 'test-parse-property) + (it.bese.fiveam:run! 'test-get-types) + (it.bese.fiveam:run! 'test-get-literals-of-content)) \ No newline at end of file
Modified: trunk/src/xml/rdf/importer.lisp ============================================================================== --- trunk/src/xml/rdf/importer.lisp (original) +++ trunk/src/xml/rdf/importer.lisp Wed Jul 29 10:53:52 2009 @@ -54,5 +54,217 @@
(defun import-node (elem tm-id &key (document-id *document-id*) (xml-base nil) (xml-lang nil)) + (declare (ignorable document-id)) ;TODO: remove + (tm-id-p tm-id "import-node") (parse-node elem) - ) \ No newline at end of file + (let ((fn-xml-base (get-xml-base elem :old-base xml-base))) + (loop for property across (child-nodes-or-text elem) + do (parse-property property)) + (let ((about + (if (get-ns-attribute elem "about") + (absolutize-value (get-ns-attribute elem "about") + fn-xml-base tm-id) + nil)) + (nodeID (get-ns-attribute elem "nodeID")) + (ID (get-ns-attribute elem "ID")) + (UUID (get-ns-attribute elem "UUID" :ns-uri *rdf2tm-ns*)) + (literals (append (get-literals-of-node elem xml-lang) + (get-literals-of-node-content elem tm-id + xml-base xml-lang))) + (associations nil) + (types (append (list + (list :value (get-type-of-node-name elem) :ID nil)) + (get-types-of-node-content elem tm-id fn-xml-base))) + (super-classes nil)) ;TODO: implement + (declare (ignorable about nodeID ID UUID literals associations ;TODO: remove + types super-classes))))) + + +(defun get-literals-of-node-content (node tm-id xml-base xml-lang) + "Returns a list of literals that is produced of a node's content." + (declare (dom:element node)) + (tm-id-p tm-id "get-literals-of-content") + (let ((properties (child-nodes-or-text node)) + (fn-xml-base (get-xml-base node :old-base xml-base)) + (fn-xml-lang (get-xml-lang node :old-lang xml-lang))) + (let ((literals + (loop for property across properties + when (let ((prp-xml-base (get-xml-base property + :old-base fn-xml-base))) + (let ((datatype + (when (get-ns-attribute property "datatype") + (absolutize-value + (get-ns-attribute property "datatype") + prp-xml-base tm-id))) + (parseType (get-ns-attribute property "parseType")) + (nodeID (get-ns-attribute property "nodeID")) + (resource (get-ns-attribute property "resource")) + (UUID (get-ns-attribute property "UUID" + :ns-uri *rdf2tm-ns*))) + (or (or datatype + (string= parseType "Literal")) + (not (or nodeID resource UUID parseType))))) + collect (let ((content (child-nodes-or-text property)) + (prp-xml-base (get-xml-base property + :old-base fn-xml-base)) + (ID (get-ns-attribute property "ID")) + (prp-name (get-node-name property)) + (prp-ns (dom:namespace-uri property)) + (child-xml-lang + (get-xml-lang property :old-lang fn-xml-lang))) + (let ((full-name (concatenate-uri prp-ns prp-name)) + (datatype + (if (get-ns-attribute property "datatype") + (absolutize-value + (get-ns-attribute property "datatype") + prp-xml-base tm-id) + *xml-string*)) + (text + (cond + ((= (length content) 0) + "") + ((not (stringp content)) ;must be an element + (let ((text-val "")) + (loop for content-node across + (dom:child-nodes property) + do (push-string + (node-to-string content-node) + text-val)) + text-val)) + (t content)))) + (list :type full-name + :value text + :ID ID + :lang child-xml-lang + :datatype datatype)))))) + + literals))) + + +(defun get-type-of-node-name (node) + "Returns the type of the node name (namespace + tagname)." + (let ((node-name (get-node-name node)) + (node-ns (dom:namespace-uri node))) + (concatenate-uri node-ns node-name))) + + +(defun get-types-of-node-content (node tm-id xml-base) + "Returns a list of type-uris that corresponds to the node's content + or attributes." + (let ((fn-xml-base (get-xml-base node :old-base xml-base))) + (let ((attr-type + (if (get-ns-attribute node "type") + (list + (list :value (absolutize-value (get-ns-attribute node "type") + fn-xml-base tm-id) + :ID nil)) + nil)) + (content-types + (loop for child across (child-nodes-or-text node) + when (and (string= (dom:namespace-uri child) *rdf-ns*) + (string= (get-node-name child) "type")) + collect (let ((nodeID (get-ns-attribute child "nodeID")) + (resource (if (get-ns-attribute child "resource") + (absolutize-value + (get-ns-attribute child "resource") + fn-xml-base tm-id))) + (UUID (get-ns-attribute child "UUID" + :ns-uri *rdf2tm-ns*)) + (ID (get-ns-attribute child "ID"))) + (if (or nodeID resource UUID) + (list :value (or nodeID resource UUID) + :ID ID) + (let ((child-xml-base + (get-xml-base child :old-base fn-xml-base))) + (loop for ref in + (get-node-refs (child-nodes-or-text child) + tm-id child-xml-base) + append (list :value ref + :ID ID)))))))) + (remove-if #'null (append attr-type content-types))))) + + +(defun get-literals-of-property (property xml-lang) + "Returns a list of attributes that are treated as literal nodes." + (let ((fn-xml-lang (get-xml-lang property :old-lang xml-lang)) + (attributes nil)) + (dom:map-node-map + #'(lambda(attr) + (let ((attr-ns (dom:namespace-uri attr)) + (attr-name (get-node-name attr))) + (let ((l-type (concatenate-uri attr-ns attr-name)) + (l-value (if (get-ns-attribute property attr-name + :ns-uri attr-ns) + (get-ns-attribute property attr-name + :ns-uri attr-ns) + ""))) + (cond + ((string= attr-ns *rdf-ns*) + (unless (or (string= attr-name "ID") + (string= attr-name "resource") + (string= attr-name "nodeID") + (string= attr-name "type") + (string= attr-name "parseType") + (string= attr-name "datatype")) + (push (list :type l-type + :value l-value + :ID nil + :lang fn-xml-lang + :datatype *xml-string*) + attributes))) + ((or (string= attr-ns *xml-ns*) + (string= attr-ns *xmlns-ns*)) + nil);;do nothing, all xml-attributes are no literals + (t + (unless (and (string= attr-ns *rdf2tm-ns*) + (string= attr-name "UUID")) + (push (list :type l-type + :value l-value + :ID nil + :lang fn-xml-lang + :datatype *xml-string*) + attributes))))))) + (dom:attributes property)) + attributes)) + + +(defun get-literals-of-node (node xml-lang) + "Returns alist of attributes that are treated as literal nodes." + (let ((fn-xml-lang (get-xml-lang node :old-lang xml-lang)) + (attributes nil)) + (dom:map-node-map + #'(lambda(attr) + (let ((attr-ns (dom:namespace-uri attr)) + (attr-name (get-node-name attr))) + (let ((l-type (concatenate-uri attr-ns attr-name)) + (l-value (if (get-ns-attribute node attr-name :ns-uri attr-ns) + (get-ns-attribute node attr-name :ns-uri attr-ns) + ""))) + (cond + ((string= attr-ns *rdf-ns*) + (unless (or (string= attr-name "ID") + (string= attr-name "about") + (string= attr-name "nodeID") + (string= attr-name "type")) + (push (list :type l-type + :value l-value + :ID nil + :lang fn-xml-lang + :datatype *xml-string*) + attributes))) + ((or (string= attr-ns *xml-ns*) + (string= attr-ns *xmlns-ns*)) + nil);;do nothing, all xml-attributes are no literals + (t + (unless (and (string= attr-ns *rdf2tm-ns*) + (string= attr-name "UUID")) + (push (list :type l-type + :value l-value + :ID nil + :lang fn-xml-lang + :datatype *xml-string*) + attributes))))))) + (dom:attributes node)) + attributes)) + +
Modified: trunk/src/xml/rdf/rdf_tools.lisp ============================================================================== --- trunk/src/xml/rdf/rdf_tools.lisp (original) +++ trunk/src/xml/rdf/rdf_tools.lisp Wed Jul 29 10:53:52 2009 @@ -33,8 +33,10 @@ get-xml-lang get-xml-base absolutize-value + absolutize-id concatenate-uri - push-string) + push-string + node-to-string) (:import-from :xml-importer get-uuid get-store-spec) @@ -44,6 +46,18 @@
(in-package :rdf-importer)
+(defvar *rdf-types* (list "Description" "List" "Alt" "Bag" "Seq" + "Statement" "Property" "XMLLiteral")) + +(defvar *rdf-properties* (list "type" "first" "rest" "subject" "predicate" + "object")) + +(defvar *rdfs-types* (list "Resource" "Literal" "Class" "Datatype" + "Container" "ContainerMembershipProperty")) + +(defvar *rdfs-properties* (list "subClassOf" "subPropertyOf" "domain" + "range" "range" "label" "comment" + "member" "seeAlso" "isDefinedBy"))
(defun _n-p (node-name) "Returns t if the given value is of the form _[0-9]+" @@ -62,51 +76,28 @@ (defun parse-node-name (node) "Parses the given node's name to the known rdf/rdfs nodes and arcs. If the given name es equal to a property an error is thrown otherwise - there is displayed a warning." + there is displayed a warning when the rdf ord rdfs namespace is used." (declare (dom:element node)) (let ((node-name (get-node-name node)) - (node-ns (dom:namespace-uri node))) + (node-ns (dom:namespace-uri node)) + (err-pref "From parse-node-name(): ")) (when (string= node-ns *rdf-ns*) - (when (or (string= node-name "type") - (string= node-name "first") - (string= node-name "rest") - (string= node-name "subject") - (string= node-name "predicate") - (string= node-name "object")) - (error "From parse-node-name(): rdf:~a is a property and not allowed here!" - node-name)) + (when (find node-name *rdf-properties* :test #'string=) + (error "~ardf:~a is a property and not allowed here!" + err-pref node-name)) (when (string= node-name "RDF") - (error "From parse-node-name(): rdf:RDF not allowed here!")) - (unless (or (string= node-name "Description") - (string= node-name "List") - (string= node-name "Alt") - (string= node-name "Bag") - (string= node-name "Seq") - (string= node-name "Statement") - (string= node-name "Property") - (string= node-name "XMLLiteral")) - (format t "From parse-node-name(): Warning: ~a is not a known rdf:type!~%" - node-name))) + (error "~ardf:RDF not allowed here!" + err-pref)) + (unless (find node-name *rdf-types* :test #'string=) + (format t "~aWarning: ~a is not a known RDF type!~%" + err-pref node-name))) (when (string= node-ns *rdfs-ns*) - (when (or (string= node-name "subClassOf") - (string= node-name "subPropertyOf") - (string= node-name "domain") - (string= node-name "range") - (string= node-name "label") - (string= node-name "comment") - (string= node-name "member") - (string= node-name "seeAlso") - (string= node-name "isDefinedBy")) - (error "From parse-node-name(): rdfs:~a is a property and not allowed here!" - node-name)) - (unless (and (string= node-name "Resource") - (string= node-name "Literal") - (string= node-name "Class") - (string= node-name "Datatype") - (string= node-name "Cotnainer") - (string= node-name "ContainerMembershipProperty")) - (format t "From parse-node-name(): Warning: rdfs:~a is not a known rdfs:type!~%" - node-name)))) + (when (find node-name *rdfs-properties* :test #'string=) + (error "~ardfs:~a is a property and not allowed here!" + err-pref node-name)) + (unless (find node-name *rdfs-types* :test #'string=) + (format t "~aWarning: rdfs:~a is not a known rdfs:type!~%" + err-pref node-name)))) t)
@@ -117,7 +108,12 @@ (let ((ID (get-ns-attribute node "ID")) (nodeID (get-ns-attribute node "nodeID")) (about (get-ns-attribute node "about")) - (err-pref "From parse-node(): ")) + (err-pref "From parse-node(): ") + (resource (get-ns-attribute node "resource")) + (datatype (get-ns-attribute node "datatype")) + (parseType (get-ns-attribute node "parseType")) + (class (get-ns-attribute node "Class" :ns-uri *rdfs-ns*)) + (subClassOf (get-ns-attribute node "subClassOf" :ns-uri *rdfs-ns*))) (when (and about nodeID) (error "~ardf:about and rdf:nodeID are not allowed in parallel use: (~a) (~a)!" err-pref about nodeID)) @@ -130,43 +126,161 @@ (handler-case (let ((content (child-nodes-or-text node :trim t))) (when (stringp content) (error "text-content not allowed here!"))) - (condition (err) (error "~a~a" err-pref err)))) + (condition (err) (error "~a~a" err-pref err))) + (when (or resource datatype parseType class subClassOf) + (error "~a~a is not allowed here!" + err-pref (cond + (resource (concatenate 'string "resource(" + resource ")")) + (datatype (concatenate 'string "datatype(" + datatype ")")) + (parseType (concatenate 'string "parseType(" + parseType ")")) + (class (concatenate 'string "Class(" class ")")) + (subClassOf (concatenate 'string "subClassOf(" + subClassOf ")"))))) + (dolist (item *rdf-types*) + (when (get-ns-attribute node item) + (error "~ardf:~a is a type and not allowed here!" + err-pref item))) + (dolist (item *rdfs-types*) + (when (get-ns-attribute node item :ns-uri *rdfs-ns*) + (error "~ardfs:~a is a type and not allowed here!" + err-pref item)))) t)
+(defun get-node-refs (nodes tm-id xml-base) + "Returns a list of node references that can be used as topic IDs." + (when (and nodes + (> (length nodes) 0)) + (loop for node across nodes + collect (let ((fn-xml-base (get-xml-base node :old-base xml-base))) + (parse-node node) + (let ((ID (when (get-ns-attribute node "ID") + (absolutize-id (get-ns-attribute node "ID") + fn-xml-base tm-id))) + (nodeID (get-ns-attribute node "nodeID")) + (about (when (get-ns-attribute node "about") + (absolutize-value + (get-ns-attribute node "about") + fn-xml-base tm-id))) + (UUID (get-ns-attribute node "UUID" :ns-uri *rdf2tm-ns*))) + (or ID nodeID about UUID)))))) + + +(defun parse-property-name (property) + "Parses the given property's name to the known rdf/rdfs nodes and arcs. + If the given name es equal to an node an error is thrown otherwise + there is displayed a warning when the rdf ord rdfs namespace is used." + (declare (dom:element property)) + (let ((property-name (get-node-name property)) + (property-ns (dom:namespace-uri property)) + (err-pref "From parse-property-name(): ")) + (when (string= property-ns *rdf-ns*) + (when (find property-name *rdf-types* :test #'string=) + (error "~ardf:~a is a node and not allowed here!" + err-pref property-name)) + (when (string= property-name "RDF") + (error "~ardf:RDF not allowed here!" + err-pref)) + (unless (find property-name *rdf-properties* :test #'string=) + (format t "~aWarning: ~a is not a known RDF property!~%" + err-pref property-name))) + (when (string= property-ns *rdfs-ns*) + (when (find property-name *rdfs-types* :test #'string=) + (error "~ardfs:~a is a type and not allowed here!" + err-pref property-name)) + (unless (find property-name *rdfs-properties* :test #'string=) + (format t "~aWarning: rdfs:~a is not a known rdfs:type!~%" + err-pref property-name)))) + t) + + +(defun parse-property (property) + "Parses a property that represents a rdf-arc." + (declare (dom:element property)) + (let ((err-pref "From parse-property(): ") + (node-name (get-node-name property)) + (node-ns (dom:namespace-uri property)) + (nodeID (get-ns-attribute property "nodeID")) + (resource (get-ns-attribute property "resource")) + (datatype (get-ns-attribute property "datatype")) + (type (get-ns-attribute property "type")) + (parseType (get-ns-attribute property "parseType")) + (about (get-ns-attribute property "about")) + (subClassOf (get-ns-attribute property "subClassOf" :ns-uri *rdfs-ns*)) + (literals (get-literals-of-property property nil)) + (content (child-nodes-or-text property :trim t))) + (when (and parseType + (or nodeID resource datatype type literals)) + (error "~awhen rdf:parseType is set the attributes: ~a => ~a are not allowed!" + err-pref + (append (list (cond (nodeID "rdf:nodeID") + (resource "rdf:resource") + (datatype "rdf:datatype") + (type "rdf:type"))) + (map 'list #'(lambda(x)(getf x :type)) literals)) + (append (list (or nodeID resource datatype type)) + (map 'list #'(lambda(x)(getf x :value)) literals)))) + (when (and parseType + (not (or (string= parseType "Resource") + (string= parseType "Literal") + (string= parseType "Collection")))) + (error "~aunknown rdf:parseType: ~a" + err-pref parseType)) + (when (and parseType + (or (string= parseType "Resource") + (string= parseType "Collection"))) + (dom:set-attribute-ns property *rdf2tm-ns* "UUID" (get-uuid))) + (when (and parseType (string= parseType "Resource") (stringp content)) + (error "~ardf:parseType is set to 'Resource' expecting xml content: ~a!" + err-pref content)) + (when (and parseType + (string= parseType "Collection") + (stringp content)) + (error "~ardf:parseType is set to 'Collection' expecting resource content: ~a" + err-pref content)) + (when (and nodeID resource) + (error "~aondly one of rdf:nodeID and rdf:resource is allowed: (~a) (~a)!" + err-pref nodeID resource)) + (when (and (or nodeID resource type) + datatype) + (error "~aonly one of ~a and rdf:datatype (~a) is allowed!" + err-pref + (cond + (nodeID (concatenate 'string "rdf:nodeID (" nodeID ")")) + (resource (concatenate 'string "rdf:resource (" resource ")")) + (type (concatenate 'string "rdf:type (" type ")"))) + datatype)) + (when (and (or type nodeID resource) + (> (length content) 0)) + (error "~awhen ~a is set no content is allowed: ~a!" + err-pref + (cond + (type (concatenate 'string "rdf:type (" type ")")) + (nodeID (concatenate 'string "rdf:nodeID (" nodeID ")")) + (resource (concatenate 'string "rdf:resource (" resource ")"))) + content)) + (when (and (or type + (and (string= node-name "type") + (string= node-ns *rdf-ns*))) + (not (or nodeID resource)) + (not content)) + (dom:set-attribute-ns property *rdf2tm-ns* "UUID" (get-uuid))) + (when (or about subClassOf) + (error "~a~a not allowed here!" + err-pref + (if about + (concatenate 'string "rdf:about (" about ")") + (concatenate 'string "rdfs:subClassOf (" subClassOf ")")))) + (dolist (item *rdf-types*) + (when (get-ns-attribute property item) + (error "~ardf:~a is a type and not allowed here!" + err-pref item))) + (dolist (item *rdfs-types*) + (when (get-ns-attribute property item :ns-uri *rdfs-ns*) + (error "~ardfs:~a is a type and not allowed here!" + err-pref item)))) + t)
-(defun get-literals-of-node (node) - "Returns alist of attributes that are treated as literal nodes." - (let ((attributes nil)) - (dom:map-node-map - #'(lambda(attr) - (let ((attr-ns (dom:namespace-uri attr)) - (attr-name (get-node-name attr))) - (cond - ((string= attr-ns *rdf-ns*) - (unless (or (string= attr-name "ID") - (string= attr-name "about") - (string= attr-name "nodeID") - (string= attr-name "type")) - (push (list :type (concatenate-uri attr-ns attr-name) - :value (get-ns-attribute node attr-name)) - attributes))) - ((or (string= attr-ns *xml-ns*) - (string= attr-ns *xmlns-ns*)) - nil);;do nothing, all xml-attributes are no literals - ((string= attr-ns *rdfs-ns*) - (if (or (string= attr-name "subClassOf") - (string= attr-name "Class")) - (error "From get-literals-of-node(): rdfs:~a is not allowed here" - attr-name) - (push (list :type (concatenate-uri attr-ns attr-name) - :value (get-ns-attribute node attr-name - :ns-uri attr-ns)) - attributes))) - (t - (push (list :type (concatenate-uri attr-ns attr-name) - :value (get-ns-attribute node attr-name - :ns-uri attr-ns)) - attributes))))) - (dom:attributes node)) - attributes)) \ No newline at end of file
Modified: trunk/src/xml/xtm/tools.lisp ============================================================================== --- trunk/src/xml/xtm/tools.lisp (original) +++ trunk/src/xml/xtm/tools.lisp Wed Jul 29 10:53:52 2009 @@ -28,7 +28,8 @@ :get-xml-base :absolutize-value :concatenate-uri - :push-string)) + :push-string + :node-to-string))
(in-package :xml-tools)
@@ -65,13 +66,24 @@ (concatenate 'string prep-ns separator value))))
-(defun absolutize-value(value base tm-id) - "Returns the passed value as an absolute uri computed +(defun absolutize-id (id xml-base tm-id) + "Returns the passed id as an absolute uri computed with the given base and tm-id." + (declare (string id tm-id)) + (let ((prep-id (if (and (> (length id) 0) + (eql (elt id 0) ##)) + id + (concatenate 'string "#" (string-left-trim "/" id))))) + (absolutize-value prep-id xml-base tm-id))) + + +(defun absolutize-value(value xml-base tm-id) + "Returns the passed value as an absolute uri computed + with the given xml-base and tm-id." (declare (string value tm-id)) (unless (absolute-uri-p tm-id) (error "From absolutize-value(): you must provide a stable identifier (PSI-style) for this TM: ~a" tm-id)) - (when (> (count ## value) 2) + (when (> (count ## value) 1) (error "From absolutize-value(): value is allowed to have only one "#": ~a" value)) (if (absolute-uri-p value) value @@ -80,8 +92,8 @@ (string-left-trim "/" value) "")) (prep-base - (if (> (length base) 0) - (string-right-trim "/" base) + (if (> (length xml-base) 0) + (string-right-trim "/" xml-base) ""))) (let ((fragment (if (and (> (length prep-value) 0) @@ -323,4 +335,27 @@ (loop for child-node across (dom:child-nodes elem) unless (or (dom:text-node-p child-node) (dom:comment-p child-node)) - collect child-node)) \ No newline at end of file + collect child-node)) + + +(defun node-to-string (elem) + "Transforms the passed node element recursively to a string." + (if (dom:text-node-p elem) + (dom:node-value elem) + (let ((node-name (dom:node-name elem)) + (attributes (dom:attributes elem)) + (child-nodes (dom:child-nodes elem)) + (elem-string "")) + (push-string (concatenate 'string "<" node-name) elem-string) + (dom:map-node-map + #'(lambda(attr) + (let ((attr-name (dom:node-name attr)) + (attr-value (dom:node-value attr))) + (push-string (concatenate 'string " " attr-name "="" + attr-value """) + elem-string))) + attributes) + (push-string ">" elem-string) + (loop for child-node across child-nodes + do (push-string (node-to-string child-node) elem-string)) + (push-string (concatenate 'string "</" node-name ">") elem-string)))) \ No newline at end of file