[![flake8 Lint](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/lint.yml/badge.svg)](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/lint.yml)
[![Test](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/test.yml/badge.svg)](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/test.yml)
[![codecov](https://codecov.io/gh/acdh-oeaw/acdh-cidoc-pyutils/branch/main/graph/badge.svg?token=XRF7ANN1TM)](https://codecov.io/gh/acdh-oeaw/acdh-cidoc-pyutils)
[![PyPI version](https://badge.fury.io/py/acdh-cidoc-pyutils.svg)](https://badge.fury.io/py/acdh-cidoc-pyutils)
# acdh-cidoc-pyutils
Helper functions for the generation of CIDOC CRMish RDF (from XML/TEI data)
## Installation
* install via `pip install acdh-cidoc-pyutils`
## Examples
* For 'real-world-examples' see e.g. [semantic-kraus project](https://github.com/semantic-kraus/lk-data/blob/main/scripts/make_rdf.py)
* also take a look into [test_cidoc_pyutils.py](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/blob/main/tests/test_cidoc_pyutils.py)
### extract `cidoc:P14i_performed FRBROO:F51_ Pursuit` triples from `tei:person/tei:occupation` nodes
```python
import lxml.etree as ET
from rdflib import URIRef
rom acdh_cidoc_pyutils import make_occupations, NSMAP
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<person xml:id="DWpers0091" sortKey="Gulbransson_Olaf_Leonhard">
<persName type="pref">Gulbransson, Olaf</persName>
<occupation notBefore="1900-12" notAfter="2000" key="#hansi" xml:lang="it">Bürgermeister</occupation>
<occupation from="1233-02-03" key="#sumsi">Tischlermeister/Fleischhauer</occupation>
<occupation key="franzi">Sängerin</occupation>
<occupation>Bäckerin</occupation>
</person>
</TEI>"""
g, uris = make_occupations(subj, x, "https://foo.bar", id_xpath="@key")
print(g.serialize())
# returns
```
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<https://foo/bar/DWpers0091> ns1:P14i_performed <https://foo/bar/DWpers0091/occupation/3>,
<https://foo/bar/DWpers0091/occupation/franzi>,
<https://foo/bar/DWpers0091/occupation/hansi>,
<https://foo/bar/DWpers0091/occupation/sumsi> .
<https://foo/bar/DWpers0091/occupation/3> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;
rdfs:label "Bäckerin"@de .
<https://foo/bar/DWpers0091/occupation/franzi> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;
rdfs:label "Sängerin"@de .
<https://foo/bar/DWpers0091/occupation/hansi> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;
rdfs:label "Bürgermeister"@it ;
ns1:P4_has_time-span <https://foo/bar/DWpers0091/occupation/hansi/time-span> .
<https://foo/bar/DWpers0091/occupation/hansi/time-span> a ns1:E52_Time-Span ;
rdfs:label "1900-12 - 2000"^^xsd:string ;
ns1:P82a_begin_of_the_begin "1900-12"^^xsd:gYearMonth ;
ns1:P82b_end_of_the_end "2000"^^xsd:gYear .
<https://foo/bar/DWpers0091/occupation/sumsi> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;
rdfs:label "Tischlermeister/Fleischhauer"@de ;
ns1:P4_has_time-span <https://foo/bar/DWpers0091/occupation/sumsi/time-span> .
<https://foo/bar/DWpers0091/occupation/sumsi/time-span> a ns1:E52_Time-Span ;
rdfs:label "1233-02-03 - 1233-02-03"^^xsd:string ;
ns1:P82a_begin_of_the_begin "1233-02-03"^^xsd:date ;
ns1:P82b_end_of_the_end "1233-02-03"^^xsd:date .
```
### extract birth/death triples from `tei:person`
```python
import lxml.etree as ET
from rdflib import URIRef
from acdh_cidoc_pyutils import make_birth_death_entities, NSMAP
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<person xml:id="DWpers0091" sortKey="Gulbransson_Olaf_Leonhard">
<persName type="pref">Gulbransson, Olaf</persName>
<birth when="1873-05-26">
26. 5. 1873<placeName key="#DWplace00139">Christiania (Oslo)</placeName>
</birth>
<death>
<date notBefore-iso="1905-07-04" when="1955" to="2000">04.07.1905</date>
<settlement key="pmb50">
<placeName type="pref">Wien</placeName>
<location><geo>48.2066 16.37341</geo></location>
</settlement>
</death>
</person>
</TEI>"""
doc = ET.fromstring(sample)
x = doc.xpath(".//tei:person[1]", namespaces=NSMAP)[0]
xml_id = x.attrib["{http://www.w3.org/XML/1998/namespace}id"].lower()
item_id = f"https://foo/bar/{xml_id}"
subj = URIRef(item_id)
event_graph, birth_uri, birth_timestamp = make_birth_death_entities(
subj, x, place_id_xpath="//tei:placeName[1]/@key
)
event_graph, birth_uri, birth_timestamp = make_birth_death_entities(
subj, x, event_type="death", verbose=True, date_node_xpath="/tei:date[1]",
place_id_xpath="//tei:settlement[1]/@key"
)
event_graph.serialize(format="turtle")
# returns
```
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
# birth example
<https://foo/bar/dwpers0091/birth> a ns1:E67_Birth ;
rdfs:label "Geburt von Gulbransson, Olaf Leonhard"@fr ;
ns1:P4_has_time-span <https://foo/bar/dwpers0091/birth/time-span> ;
ns1:P7_took_place_at <https://foo/bar/DWplace00139> ;
ns1:P98_brought_into_life <https://foo/bar/dwpers0091> .
<https://foo/bar/dwpers0091/birth/time-span> a ns1:E52_Time-Span ;
rdfs:label "1873-05-26 - 1873-05-26"^^xsd:string ;
ns1:P82a_begin_of_the_begin "1873-05-26"^^xsd:date ;
ns1:P82b_end_of_the_end "1873-05-26"^^xsd:date .
# death example
<https://foo/bar/dwpers0091/death> a ns1:E69_Death ;
rdfs:label "Geburt von Gulbransson, Olaf Leonhard"@fr ;
ns1:P100_was_death_of <https://foo/bar/dwpers0091> ;
ns1:P7_took_place_at <https://foo/bar/pmb50>
ns1:P4_has_time-span <https://foo/bar/dwpers0091/death/time-span> .
<https://foo/bar/dwpers0091/death/time-span> a ns1:E52_Time-Span ;
rdfs:label "1905-07-04 - 2000"^^xsd:string ;
ns1:P82a_begin_of_the_begin "1905-07-04"^^xsd:date ;
ns1:P82b_end_of_the_end "2000"^^xsd:gYear .
```
### create `ns1:P168_place_is_defined_by "Point(456 123)"^^<geo:wktLiteral> .` from tei:coords
```python
import lxml.etree as ET
from rdflib import Graph, URIRef, RDF
from acdh_cidoc_pyutils import coordinates_to_p168, NSMAP, CIDOC
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<place xml:id="DWplace00092">
<placeName type="orig_name">Reval (Tallinn)</placeName>
<location><geo>123 456</geo></location>
</place>
</TEI>"""
doc = ET.fromstring(sample)
g = Graph()
for x in doc.xpath(".//tei:place", namespaces=NSMAP):
xml_id = x.attrib["{http://www.w3.org/XML/1998/namespace}id"].lower()
item_id = f"https://foo/bar/{xml_id}"
subj = URIRef(item_id)
g.add((subj, RDF.type, CIDOC["E53_Place"]))
g += coordinates_to_p168(subj, x)
print(g.serialize())
# returns
```
```ttl
...
ns1:P168_place_is_defined_by "Point(456 123)"^^<geo:wktLiteral> .
...
```
* Function parameter `verbose` prints information in case the given xpath does not return expected results which is a text node with two numbers separated by a given separator (default value is `separator=" "`)
* Function parameter `inverse` (default: `inverse=False`) changes the order of the coordinates.
### date-like-string to casted rdflib.Literal
```python
from acdh_cidoc_pyutils import date_to_literal d
dates = [
"1900",
"1900-01",
"1901-01-01",
"foo",
]
for x in dates:
date_literal = date_to_literal(x)
print((date_literal.datatype))
# returns
# http://www.w3.org/2001/XMLSchema#gYear
# http://www.w3.org/2001/XMLSchema#gYearMonth
# http://www.w3.org/2001/XMLSchema#date
# http://www.w3.org/2001/XMLSchema#string
```
### make some random URI
```python
from acdh_cidoc_pyutils import make_uri
domain = "https://hansi4ever.com/"
version = "1"
prefix = "sumsi"
uri = make_uri(domain=domain, version=version, prefix=prefix)
print(uri)
# https://hansi4ever.com/1/sumsi/6ead32b8-9713-11ed-8065-65787314013c
uri = make_uri(domain=domain)
print(uri)
# https://hansi4ever.com/8b912e66-9713-11ed-8065-65787314013c
```
### create an E52_Time-Span graph
```python
from acdh_cidoc_pyutils import create_e52, make_uri
uri = make_uri()
e52 = create_e52(uri, begin_of_begin="1800-12-12", end_of_end="1900-01")
print(e52.serialize())
# returns
```
```ttl
# @prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
# @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
# @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
# <https://hansi4ever.com/387fb457-971b-11ed-8065-65787314013c> a ns1:E52_Time-Span ;
# rdfs:label "1800-12-12 - 1900-01"^^xsd:string ;
# ns1:P82a_begin_of_the_begin "1800-12-12"^^xsd:date ;
# ns1:P82b_end_of_the_end "1900-01"^^xsd:gYearMonth .
```
### creates E42 from tei:org|place|person
takes a tei:person|place|org node, extracts their `@xml:id` and all `tei:idno` elements, derives `idoc:E42_Identifier` triples and relates them to a passed in subject via `cidoc:P1_is_identified_by`
```python
import lxml.etree as ET
from rdflib import Graph, URIRef, RDF
from acdh_cidoc_pyutils import make_e42_identifiers, NSMAP, CIDOC
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<place xml:id="DWplace00092">
<placeName type="orig_name">Reval (Tallinn)</placeName>
<placeName xml:lang="de" type="simple_name">Reval</placeName>
<placeName xml:lang="und" type="alt_label">Tallinn</placeName>
<idno type="pmb">https://pmb.acdh.oeaw.ac.at/entity/42085/</idno>
<idno type="URI" subtype="geonames">https://www.geonames.org/588409</idno>
<idno subtype="foobarid">12345</idno>
</place>
</TEI>"""
doc = ET.fromstring(sample)
g = Graph()
for x in doc.xpath(".//tei:place|tei:org|tei:person|tei:bibl", namespaces=NSMAP):
xml_id = x.attrib["{http://www.w3.org/XML/1998/namespace}id"].lower()
item_id = f"https://foo/bar/{xml_id}"
subj = URIRef(item_id)
g.add((subj, RDF.type, CIDOC["E53_Place"]))
g += make_e42_identifiers(
subj, x, type_domain="http://hansi/4/ever", default_lang="it",
)
print(g.serialize(format="turtle"))
# returns
```
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
<https://foo/bar/dwplace00092> a ns1:E53_Place ;
ns1:P1_is_identified_by <https://foo/bar/dwplace00092/identifier/DWplace00092>,
<https://foo/bar/dwplace00092/identifier/idno/0>,
<https://foo/bar/dwplace00092/identifier/idno/1>,
<https://foo/bar/dwplace00092/identifier/idno/2> ;
owl:sameAs <https://pmb.acdh.oeaw.ac.at/entity/42085/>,
<https://www.geonames.org/588409> .
<http://hansi/4/ever/idno/URI/geonames> a ns1:E55_Type .
<http://hansi/4/ever/idno/foobarid> a ns1:E55_Type .
<http://hansi/4/ever/idno/pmb> a ns1:E55_Type .
<http://hansi/4/ever/xml-id> a ns1:E55_Type .
<https://foo/bar/dwplace00092/identifier/DWplace00092> a ns1:E42_Identifier ;
rdfs:label "Identifier: DWplace00092"@it ;
rdf:value "DWplace00092";
ns1:P2_has_type <http://hansi/4/ever/xml-id> .
<https://foo/bar/dwplace00092/identifier/idno/0> a ns1:E42_Identifier ;
rdfs:label "Identifier: https://pmb.acdh.oeaw.ac.at/entity/42085/"@it ;
rdf:value "https://pmb.acdh.oeaw.ac.at/entity/42085/";
ns1:P2_has_type <http://hansi/4/ever/idno/pmb> .
<https://foo/bar/dwplace00092/identifier/idno/1> a ns1:E42_Identifier ;
rdfs:label "Identifier: https://www.geonames.org/588409"@it ;
rdf:value "https://www.geonames.org/588409"
ns1:P2_has_type <http://hansi/4/ever/idno/URI/geonames> .
<https://foo/bar/dwplace00092/identifier/idno/2> a ns1:E42_Identifier ;
rdfs:label "Identifier: 12345"@it ;
rdf:value "12345";
ns1:P2_has_type <http://hansi/4/ever/idno/foobarid> .
```
### creates appellations from tei:org|place|person
takes a tei:person|place|org node, extracts `persName, placeName and orgName` texts, `@xml:lang` and custom type values and returns `cidoc:E33_41` and `cidoc:E55` nodes linked via `cidoc:P1_is_identified_by` and `cidoc:P2_has_type`
```python
import lxml.etree as ET
from rdflib import Graph, URIRef, RDF
from acdh_cidoc_pyutils import make_appellations, NSMAP, CIDOC
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<place xml:id="DWplace00092">
<placeName type="orig_name">Reval (Tallinn)</placeName>
<placeName xml:lang="de" type="simple_name">Reval</placeName>
<placeName xml:lang="und" type="alt_label">Tallinn</placeName>
<idno type="pmb">https://pmb.acdh.oeaw.ac.at/entity/42085/</idno>
</place>
</TEI>"""
doc = ET.fromstring(sample)
g = Graph()
for x in doc.xpath(".//tei:place|tei:org|tei:person|tei:bibl", namespaces=NSMAP):
xml_id = x.attrib["{http://www.w3.org/XML/1998/namespace}id"].lower()
item_id = f"https://foo/bar/{xml_id}"
subj = URIRef(item_id)
g.add((subj, RDF.type, CIDOC["E53_Place"]))
g += make_appellations(
subj, x, type_domain="http://hansi/4/ever", default_lang="it"
)
g.serialize(format="ttl")
# returns
```
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
<https://foo/bar/dwplace00092> a ns1:E53_Place ;
ns1:P1_is_identified_by <https://foo/bar/dwplace00092/appellation/0>,
<https://foo/bar/dwplace00092/appellation/1>,
<https://foo/bar/dwplace00092/appellation/2> .
<http://hansi/4/ever/alt-label> a ns1:E55_Type ;
rdfs:label "alt_label" .
<http://hansi/4/ever/orig-name> a ns1:E55_Type ;
rdfs:label "orig_name" .
<http://hansi/4/ever/simple-name> a ns1:E55_Type ;
rdfs:label "simple_name" .
<https://foo/bar/dwplace00092/appellation/0> a ns1:E33_E41_Linguistic_Appellation ;
rdfs:label "Reval (Tallinn)"@it ;
ns1:P2_has_type <http://hansi/4/ever/orig-name> .
<https://foo/bar/dwplace00092/appellation/1> a ns1:E33_E41_Linguistic_Appellation ;
rdfs:label "Reval"@de ;
ns1:P2_has_type <http://hansi/4/ever/simple-name> .
<https://foo/bar/dwplace00092/appellation/2> a ns1:E33_E41_Linguistic_Appellation ;
rdfs:label "Tallinn"@und ;
ns1:P2_has_type <http://hansi/4/ever/alt-label> .
```
### connects to places (E53_Place) with P89_falls_within
```python
domain = "https://foo/bar/"
subj = URIRef(f"{domain}place__237979")
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<place xml:id="place__237979">
<placeName>Lerchenfelder Gürtel 48</placeName>
<desc type="entity_type">Wohngebäude (K.WHS)</desc>
<desc type="entity_type_id">36</desc>
<location type="coords">
<geo>48,209035 16,339257</geo>
</location>
<location>
<placeName ref="place__50">Wien</placeName>
<geo>48,208333 16,373056</geo>
</location>
</place>
</TEI>"""
doc = ET.fromstring(sample)
node = doc.xpath(".//tei:place[1]", namespaces=NSMAP)[0]
g = p89_falls_within(
subj, node, domain, location_id_xpath="./tei:location/tei:placeName/@ref"
)
result = g.serialize(format="ttl")
```
returns
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
<https://foo/bar/place__237979> ns1:P89_falls_within <https://foo/bar/place__50> .
```
### creates E66_Formation and E68_Dissolution events
```python
from acdh_cidoc_pyutils import p95i_was_formed_by
from rdflib import Graph, URIRef
g = Graph()
subj = URIRef("https://wienerschnitzler.org")
label = "Wiener Moderne Verein"
g += p95i_was_formed_by(
subj, start_date="2023-10-14", end_date="2025-12-31", label=f"{label} wurde gegründet", label_lang="de"
)
result = g.serialize(format="ttl")
```
returns
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<https://wienerschnitzler.org> ns1:P95i_was_formed_by <https://wienerschnitzler.org/formation-event> .
<https://wienerschnitzler.org/dissolution-event> a ns1:E68_Dissolution ;
rdfs:label "Institution wurde aufgelöst"@de ;
ns1:P4_has_time-span <https://wienerschnitzler.org/dissolution-event/dissolution-time-span> .
<https://wienerschnitzler.org/dissolution-event/dissolution-time-span> a ns1:E52_Time-Span ;
rdfs:label "2025-12-31"^^xsd:string ;
ns1:P82a_begin_of_the_begin "2025-12-31"^^xsd:date ;
ns1:P82b_end_of_the_end "2025-12-31"^^xsd:date .
<https://wienerschnitzler.org/formation-event> a ns1:E66_Formation ;
rdfs:label "Wiener Moderne Verein wurde gegründet"@de ;
ns1:P4_has_time-span <https://wienerschnitzler.org/formation-event/formation-time-span> .
<https://wienerschnitzler.org/formation-event/formation-time-span> a ns1:E52_Time-Span ;
rdfs:label "2023-10-14"^^xsd:string ;
ns1:P82a_begin_of_the_begin "2023-10-14"^^xsd:date ;
ns1:P82b_end_of_the_end "2023-10-14"^^xsd:date .
```
### normalize_string
```python
from acdh_cidoc_pyutils import normalize_string
string = """\n\nhallo
mein schatz ich liebe dich
du bist die einzige für mich
"""
print(normalize_string(string))
# returns
# hallo mein schatz ich liebe dich du bist die einzige für mich
```
### extract date attributes (begin, end)
expects typical TEI date attributes like `@when, @when-iso, @notBefore, @notAfter, @from, @to, ...` and returns a tuple containg start- and enddate values. If only `@when or @when-iso` or only `@notBefore or @notAfter` are provided, the returned values are the same, unless the default parameter `fill_missing` is set to `False`.
```python
from lxml.etree import Element
from acdh_cidoc_pyutils import extract_begin_end
date_string = "1900-12-12"
date_object = Element("{http://www.tei-c.org/ns/1.0}tei")
date_object.attrib["when-iso"] = date_string
print(extract_begin_end(date_object))
# returns
# ('1900-12-12', '1900-12-12')
date_string = "1900-12-12"
date_object = Element("{http://www.tei-c.org/ns/1.0}tei")
date_object.attrib["when-iso"] = date_string
print(extract_begin_end(date_object, fill_missing=False))
# returns
# ('1900-12-12', None)
date_object = Element("{http://www.tei-c.org/ns/1.0}tei")
date_object.attrib["notAfter"] = "1900-12-12"
date_object.attrib["notBefore"] = "1800"
print(extract_begin_end(date_object))
# returns
# ('1800', '1900-12-12')
```
### Convert a TEI document into an RDF graph representing a CIDOC CRM F24 Publication Expression.
```python
from acdh_cidoc_pyutils import teidoc_as_f24_publication_expression
file_path = "L02643.xml"
domain = "https://schnitzler-briefe.acdh.oeaw.ac.at"
uri, g, mentions = teidoc_as_f24_publication_expression(
file_path, domain, ".//tei:titleStmt/tei:title[@level='a']"
)
g.serialize(file_name.replace(".xml", ".ttl"))
```
returns
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
<https://schnitzler-briefe.acdh.oeaw.ac.at/L02643.xml> a <http://iflastandards.info/ns/fr/frbr/frbroo/F24_Publication_Expression> ;
rdfs:label "Paul Goldmann an Arthur Schnitzler, 6. 8. 1889"@de ;
ns1:P1_is_identified_by <https://schnitzler-briefe.acdh.oeaw.ac.at/L02643.xml/appellation> ;
ns1:P67_refers_to <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb11485>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb12698>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb169237>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb2121>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb213>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb29698>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb50>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb52510>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb53101>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb53104>,
<https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb88392> .
<https://pfp-schema.acdh.oeaw.ac.at/types/tei-document> a ns1:E55_Type ;
rdfs:label "A TEI/XML encoded text"@en .
<https://schnitzler-briefe.acdh.oeaw.ac.at/L02643.xml/appellation> a ns1:E33_E41_Linguistic_Appellation ;
rdfs:label "Paul Goldmann an Arthur Schnitzler, 6. 8. 1889"@de ;
ns1:P2_has_type <https://pfp-schema.acdh.oeaw.ac.at/types/tei-document> .
```
## development
* `pip install -r requirements_dev.txt`
* `flake8` -> linting
* `coverage run -m pytest` -> runs tests and creates coverage stats
Raw data
{
"_id": null,
"home_page": "https://github.com/acdh-oeaw/acdh-cidoc-pyutils",
"name": "acdh-cidoc-pyutils",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.8",
"maintainer_email": null,
"keywords": null,
"author": "Peter Andorfer",
"author_email": "peter.andorfer@oeaw.ac.at",
"download_url": "https://files.pythonhosted.org/packages/44/69/d74e3312f701e241393d6c0546faf51e7a3d31a636b6bc8f45d9c53dec99/acdh_cidoc_pyutils-1.6.2.tar.gz",
"platform": null,
"description": "[![flake8 Lint](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/lint.yml/badge.svg)](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/lint.yml)\n[![Test](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/test.yml/badge.svg)](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/actions/workflows/test.yml)\n[![codecov](https://codecov.io/gh/acdh-oeaw/acdh-cidoc-pyutils/branch/main/graph/badge.svg?token=XRF7ANN1TM)](https://codecov.io/gh/acdh-oeaw/acdh-cidoc-pyutils)\n[![PyPI version](https://badge.fury.io/py/acdh-cidoc-pyutils.svg)](https://badge.fury.io/py/acdh-cidoc-pyutils)\n\n# acdh-cidoc-pyutils\nHelper functions for the generation of CIDOC CRMish RDF (from XML/TEI data)\n\n## Installation\n\n* install via `pip install acdh-cidoc-pyutils`\n\n## Examples\n\n* For 'real-world-examples' see e.g. [semantic-kraus project](https://github.com/semantic-kraus/lk-data/blob/main/scripts/make_rdf.py)\n* also take a look into [test_cidoc_pyutils.py](https://github.com/acdh-oeaw/acdh-cidoc-pyutils/blob/main/tests/test_cidoc_pyutils.py)\n\n### extract `cidoc:P14i_performed FRBROO:F51_ Pursuit` triples from `tei:person/tei:occupation` nodes\n```python\nimport lxml.etree as ET\nfrom rdflib import URIRef\nrom acdh_cidoc_pyutils import make_occupations, NSMAP\nsample = \"\"\"\n<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">\n <person xml:id=\"DWpers0091\" sortKey=\"Gulbransson_Olaf_Leonhard\">\n <persName type=\"pref\">Gulbransson, Olaf</persName>\n <occupation notBefore=\"1900-12\" notAfter=\"2000\" key=\"#hansi\" xml:lang=\"it\">B\u00fcrgermeister</occupation>\n <occupation from=\"1233-02-03\" key=\"#sumsi\">Tischlermeister/Fleischhauer</occupation>\n <occupation key=\"franzi\">S\u00e4ngerin</occupation>\n <occupation>B\u00e4ckerin</occupation>\n </person>\n</TEI>\"\"\"\ng, uris = make_occupations(subj, x, \"https://foo.bar\", id_xpath=\"@key\")\nprint(g.serialize())\n# returns\n```\n```ttl\n@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n<https://foo/bar/DWpers0091> ns1:P14i_performed <https://foo/bar/DWpers0091/occupation/3>,\n <https://foo/bar/DWpers0091/occupation/franzi>,\n <https://foo/bar/DWpers0091/occupation/hansi>,\n <https://foo/bar/DWpers0091/occupation/sumsi> .\n\n<https://foo/bar/DWpers0091/occupation/3> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;\n rdfs:label \"B\u00e4ckerin\"@de .\n\n<https://foo/bar/DWpers0091/occupation/franzi> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;\n rdfs:label \"S\u00e4ngerin\"@de .\n\n<https://foo/bar/DWpers0091/occupation/hansi> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;\n rdfs:label \"B\u00fcrgermeister\"@it ;\n ns1:P4_has_time-span <https://foo/bar/DWpers0091/occupation/hansi/time-span> .\n\n<https://foo/bar/DWpers0091/occupation/hansi/time-span> a ns1:E52_Time-Span ;\n rdfs:label \"1900-12 - 2000\"^^xsd:string ;\n ns1:P82a_begin_of_the_begin \"1900-12\"^^xsd:gYearMonth ;\n ns1:P82b_end_of_the_end \"2000\"^^xsd:gYear .\n\n<https://foo/bar/DWpers0091/occupation/sumsi> a <http://iflastandards.info/ns/fr/frbr/frbroo#F51> ;\n rdfs:label \"Tischlermeister/Fleischhauer\"@de ;\n ns1:P4_has_time-span <https://foo/bar/DWpers0091/occupation/sumsi/time-span> .\n\n<https://foo/bar/DWpers0091/occupation/sumsi/time-span> a ns1:E52_Time-Span ;\n rdfs:label \"1233-02-03 - 1233-02-03\"^^xsd:string ;\n ns1:P82a_begin_of_the_begin \"1233-02-03\"^^xsd:date ;\n ns1:P82b_end_of_the_end \"1233-02-03\"^^xsd:date .\n```\n\n### extract birth/death triples from `tei:person`\n\n```python\nimport lxml.etree as ET\nfrom rdflib import URIRef\nfrom acdh_cidoc_pyutils import make_birth_death_entities, NSMAP\n\nsample = \"\"\"\n<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">\n <person xml:id=\"DWpers0091\" sortKey=\"Gulbransson_Olaf_Leonhard\">\n <persName type=\"pref\">Gulbransson, Olaf</persName>\n <birth when=\"1873-05-26\">\n 26. 5. 1873<placeName key=\"#DWplace00139\">Christiania (Oslo)</placeName>\n </birth>\n <death>\n <date notBefore-iso=\"1905-07-04\" when=\"1955\" to=\"2000\">04.07.1905</date>\n <settlement key=\"pmb50\">\n <placeName type=\"pref\">Wien</placeName>\n <location><geo>48.2066 16.37341</geo></location>\n </settlement>\n </death>\n </person>\n</TEI>\"\"\"\n\ndoc = ET.fromstring(sample)\nx = doc.xpath(\".//tei:person[1]\", namespaces=NSMAP)[0]\nxml_id = x.attrib[\"{http://www.w3.org/XML/1998/namespace}id\"].lower()\nitem_id = f\"https://foo/bar/{xml_id}\"\nsubj = URIRef(item_id)\nevent_graph, birth_uri, birth_timestamp = make_birth_death_entities(\n subj, x, place_id_xpath=\"//tei:placeName[1]/@key\n)\nevent_graph, birth_uri, birth_timestamp = make_birth_death_entities(\n subj, x, event_type=\"death\", verbose=True, date_node_xpath=\"/tei:date[1]\",\n place_id_xpath=\"//tei:settlement[1]/@key\"\n)\nevent_graph.serialize(format=\"turtle\")\n# returns\n```\n```ttl\n@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n# birth example\n\n<https://foo/bar/dwpers0091/birth> a ns1:E67_Birth ;\n rdfs:label \"Geburt von Gulbransson, Olaf Leonhard\"@fr ;\n ns1:P4_has_time-span <https://foo/bar/dwpers0091/birth/time-span> ;\n ns1:P7_took_place_at <https://foo/bar/DWplace00139> ;\n ns1:P98_brought_into_life <https://foo/bar/dwpers0091> .\n\n<https://foo/bar/dwpers0091/birth/time-span> a ns1:E52_Time-Span ;\n rdfs:label \"1873-05-26 - 1873-05-26\"^^xsd:string ;\n ns1:P82a_begin_of_the_begin \"1873-05-26\"^^xsd:date ;\n ns1:P82b_end_of_the_end \"1873-05-26\"^^xsd:date .\n\n# death example\n\n<https://foo/bar/dwpers0091/death> a ns1:E69_Death ;\n rdfs:label \"Geburt von Gulbransson, Olaf Leonhard\"@fr ;\n ns1:P100_was_death_of <https://foo/bar/dwpers0091> ;\n ns1:P7_took_place_at <https://foo/bar/pmb50>\n ns1:P4_has_time-span <https://foo/bar/dwpers0091/death/time-span> .\n\n<https://foo/bar/dwpers0091/death/time-span> a ns1:E52_Time-Span ;\n rdfs:label \"1905-07-04 - 2000\"^^xsd:string ;\n ns1:P82a_begin_of_the_begin \"1905-07-04\"^^xsd:date ;\n ns1:P82b_end_of_the_end \"2000\"^^xsd:gYear .\n```\n\n\n### create `ns1:P168_place_is_defined_by \"Point(456 123)\"^^<geo:wktLiteral> .` from tei:coords\n```python\nimport lxml.etree as ET\nfrom rdflib import Graph, URIRef, RDF\nfrom acdh_cidoc_pyutils import coordinates_to_p168, NSMAP, CIDOC\nsample = \"\"\"\n<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">\n <place xml:id=\"DWplace00092\">\n <placeName type=\"orig_name\">Reval (Tallinn)</placeName>\n <location><geo>123 456</geo></location>\n </place>\n</TEI>\"\"\"\n\ndoc = ET.fromstring(sample)\ng = Graph()\nfor x in doc.xpath(\".//tei:place\", namespaces=NSMAP):\n xml_id = x.attrib[\"{http://www.w3.org/XML/1998/namespace}id\"].lower()\n item_id = f\"https://foo/bar/{xml_id}\"\n subj = URIRef(item_id)\n g.add((subj, RDF.type, CIDOC[\"E53_Place\"]))\n g += coordinates_to_p168(subj, x)\nprint(g.serialize())\n# returns\n```\n```ttl\n...\n ns1:P168_place_is_defined_by \"Point(456 123)\"^^<geo:wktLiteral> .\n...\n```\n* Function parameter `verbose` prints information in case the given xpath does not return expected results which is a text node with two numbers separated by a given separator (default value is `separator=\" \"`)\n* Function parameter `inverse` (default: `inverse=False`) changes the order of the coordinates.\n\n\n\n### date-like-string to casted rdflib.Literal\n\n```python\nfrom acdh_cidoc_pyutils import date_to_literal d\ndates = [\n \"1900\",\n \"1900-01\",\n \"1901-01-01\",\n \"foo\",\n]\nfor x in dates:\n date_literal = date_to_literal(x)\n print((date_literal.datatype))\n\n# returns\n# http://www.w3.org/2001/XMLSchema#gYear\n# http://www.w3.org/2001/XMLSchema#gYearMonth\n# http://www.w3.org/2001/XMLSchema#date\n# http://www.w3.org/2001/XMLSchema#string\n```\n\n### make some random URI\n\n```python\nfrom acdh_cidoc_pyutils import make_uri\n\ndomain = \"https://hansi4ever.com/\"\nversion = \"1\"\nprefix = \"sumsi\"\nuri = make_uri(domain=domain, version=version, prefix=prefix)\nprint(uri)\n# https://hansi4ever.com/1/sumsi/6ead32b8-9713-11ed-8065-65787314013c\n\nuri = make_uri(domain=domain)\nprint(uri)\n# https://hansi4ever.com/8b912e66-9713-11ed-8065-65787314013c\n```\n\n### create an E52_Time-Span graph\n\n```python\nfrom acdh_cidoc_pyutils import create_e52, make_uri\nuri = make_uri()\ne52 = create_e52(uri, begin_of_begin=\"1800-12-12\", end_of_end=\"1900-01\")\nprint(e52.serialize())\n# returns\n```\n```ttl\n# @prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n# @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n# @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n# <https://hansi4ever.com/387fb457-971b-11ed-8065-65787314013c> a ns1:E52_Time-Span ;\n# rdfs:label \"1800-12-12 - 1900-01\"^^xsd:string ;\n# ns1:P82a_begin_of_the_begin \"1800-12-12\"^^xsd:date ;\n# ns1:P82b_end_of_the_end \"1900-01\"^^xsd:gYearMonth .\n```\n### creates E42 from tei:org|place|person\n\ntakes a tei:person|place|org node, extracts their `@xml:id` and all `tei:idno` elements, derives `idoc:E42_Identifier` triples and relates them to a passed in subject via `cidoc:P1_is_identified_by`\n\n```python\nimport lxml.etree as ET\nfrom rdflib import Graph, URIRef, RDF\nfrom acdh_cidoc_pyutils import make_e42_identifiers, NSMAP, CIDOC\nsample = \"\"\"\n<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">\n <place xml:id=\"DWplace00092\">\n <placeName type=\"orig_name\">Reval (Tallinn)</placeName>\n <placeName xml:lang=\"de\" type=\"simple_name\">Reval</placeName>\n <placeName xml:lang=\"und\" type=\"alt_label\">Tallinn</placeName>\n <idno type=\"pmb\">https://pmb.acdh.oeaw.ac.at/entity/42085/</idno>\n <idno type=\"URI\" subtype=\"geonames\">https://www.geonames.org/588409</idno>\n <idno subtype=\"foobarid\">12345</idno>\n </place>\n</TEI>\"\"\"\n\ndoc = ET.fromstring(sample)\ng = Graph()\nfor x in doc.xpath(\".//tei:place|tei:org|tei:person|tei:bibl\", namespaces=NSMAP):\n xml_id = x.attrib[\"{http://www.w3.org/XML/1998/namespace}id\"].lower()\n item_id = f\"https://foo/bar/{xml_id}\"\n subj = URIRef(item_id)\n g.add((subj, RDF.type, CIDOC[\"E53_Place\"]))\n g += make_e42_identifiers(\n subj, x, type_domain=\"http://hansi/4/ever\", default_lang=\"it\",\n )\nprint(g.serialize(format=\"turtle\"))\n# returns\n```\n```ttl\n@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n<https://foo/bar/dwplace00092> a ns1:E53_Place ;\n ns1:P1_is_identified_by <https://foo/bar/dwplace00092/identifier/DWplace00092>,\n <https://foo/bar/dwplace00092/identifier/idno/0>,\n <https://foo/bar/dwplace00092/identifier/idno/1>,\n <https://foo/bar/dwplace00092/identifier/idno/2> ;\n owl:sameAs <https://pmb.acdh.oeaw.ac.at/entity/42085/>,\n <https://www.geonames.org/588409> .\n\n<http://hansi/4/ever/idno/URI/geonames> a ns1:E55_Type .\n\n<http://hansi/4/ever/idno/foobarid> a ns1:E55_Type .\n\n<http://hansi/4/ever/idno/pmb> a ns1:E55_Type .\n\n<http://hansi/4/ever/xml-id> a ns1:E55_Type .\n\n<https://foo/bar/dwplace00092/identifier/DWplace00092> a ns1:E42_Identifier ;\n rdfs:label \"Identifier: DWplace00092\"@it ;\n rdf:value \"DWplace00092\";\n ns1:P2_has_type <http://hansi/4/ever/xml-id> .\n\n<https://foo/bar/dwplace00092/identifier/idno/0> a ns1:E42_Identifier ;\n rdfs:label \"Identifier: https://pmb.acdh.oeaw.ac.at/entity/42085/\"@it ;\n rdf:value \"https://pmb.acdh.oeaw.ac.at/entity/42085/\";\n ns1:P2_has_type <http://hansi/4/ever/idno/pmb> .\n\n<https://foo/bar/dwplace00092/identifier/idno/1> a ns1:E42_Identifier ;\n rdfs:label \"Identifier: https://www.geonames.org/588409\"@it ;\n rdf:value \"https://www.geonames.org/588409\" \n ns1:P2_has_type <http://hansi/4/ever/idno/URI/geonames> .\n\n<https://foo/bar/dwplace00092/identifier/idno/2> a ns1:E42_Identifier ;\n rdfs:label \"Identifier: 12345\"@it ;\n rdf:value \"12345\";\n ns1:P2_has_type <http://hansi/4/ever/idno/foobarid> .\n```\n\n### creates appellations from tei:org|place|person\n\ntakes a tei:person|place|org node, extracts `persName, placeName and orgName` texts, `@xml:lang` and custom type values and returns `cidoc:E33_41` and `cidoc:E55` nodes linked via `cidoc:P1_is_identified_by` and `cidoc:P2_has_type`\n\n```python\nimport lxml.etree as ET\nfrom rdflib import Graph, URIRef, RDF\nfrom acdh_cidoc_pyutils import make_appellations, NSMAP, CIDOC\n\nsample = \"\"\"\n<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">\n <place xml:id=\"DWplace00092\">\n <placeName type=\"orig_name\">Reval (Tallinn)</placeName>\n <placeName xml:lang=\"de\" type=\"simple_name\">Reval</placeName>\n <placeName xml:lang=\"und\" type=\"alt_label\">Tallinn</placeName>\n <idno type=\"pmb\">https://pmb.acdh.oeaw.ac.at/entity/42085/</idno>\n </place>\n</TEI>\"\"\"\n\ndoc = ET.fromstring(sample)\ng = Graph()\nfor x in doc.xpath(\".//tei:place|tei:org|tei:person|tei:bibl\", namespaces=NSMAP):\n xml_id = x.attrib[\"{http://www.w3.org/XML/1998/namespace}id\"].lower()\n item_id = f\"https://foo/bar/{xml_id}\"\n subj = URIRef(item_id)\n g.add((subj, RDF.type, CIDOC[\"E53_Place\"]))\n g += make_appellations(\n subj, x, type_domain=\"http://hansi/4/ever\", default_lang=\"it\"\n )\n\ng.serialize(format=\"ttl\")\n# returns\n```\n```ttl\n@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n<https://foo/bar/dwplace00092> a ns1:E53_Place ;\n ns1:P1_is_identified_by <https://foo/bar/dwplace00092/appellation/0>,\n <https://foo/bar/dwplace00092/appellation/1>,\n <https://foo/bar/dwplace00092/appellation/2> .\n\n<http://hansi/4/ever/alt-label> a ns1:E55_Type ;\n rdfs:label \"alt_label\" .\n\n<http://hansi/4/ever/orig-name> a ns1:E55_Type ;\n rdfs:label \"orig_name\" .\n\n<http://hansi/4/ever/simple-name> a ns1:E55_Type ;\n rdfs:label \"simple_name\" .\n\n<https://foo/bar/dwplace00092/appellation/0> a ns1:E33_E41_Linguistic_Appellation ;\n rdfs:label \"Reval (Tallinn)\"@it ;\n ns1:P2_has_type <http://hansi/4/ever/orig-name> .\n\n<https://foo/bar/dwplace00092/appellation/1> a ns1:E33_E41_Linguistic_Appellation ;\n rdfs:label \"Reval\"@de ;\n ns1:P2_has_type <http://hansi/4/ever/simple-name> .\n\n<https://foo/bar/dwplace00092/appellation/2> a ns1:E33_E41_Linguistic_Appellation ;\n rdfs:label \"Tallinn\"@und ;\n ns1:P2_has_type <http://hansi/4/ever/alt-label> .\n```\n\n### connects to places (E53_Place) with P89_falls_within\n\n```python\ndomain = \"https://foo/bar/\"\nsubj = URIRef(f\"{domain}place__237979\")\nsample = \"\"\"\n<TEI xmlns=\"http://www.tei-c.org/ns/1.0\">\n <place xml:id=\"place__237979\">\n <placeName>Lerchenfelder Gürtel 48</placeName>\n <desc type=\"entity_type\">Wohngebäude (K.WHS)</desc>\n <desc type=\"entity_type_id\">36</desc>\n <location type=\"coords\">\n <geo>48,209035 16,339257</geo>\n </location>\n <location>\n <placeName ref=\"place__50\">Wien</placeName>\n <geo>48,208333 16,373056</geo>\n </location>\n </place>\n</TEI>\"\"\"\ndoc = ET.fromstring(sample)\nnode = doc.xpath(\".//tei:place[1]\", namespaces=NSMAP)[0]\ng = p89_falls_within(\n subj, node, domain, location_id_xpath=\"./tei:location/tei:placeName/@ref\"\n)\nresult = g.serialize(format=\"ttl\")\n```\nreturns\n```ttl\n@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n\n<https://foo/bar/place__237979> ns1:P89_falls_within <https://foo/bar/place__50> .\n```\n\n### creates E66_Formation and E68_Dissolution events\n```python\nfrom acdh_cidoc_pyutils import p95i_was_formed_by\nfrom rdflib import Graph, URIRef\n\n\ng = Graph()\nsubj = URIRef(\"https://wienerschnitzler.org\")\nlabel = \"Wiener Moderne Verein\"\ng += p95i_was_formed_by(\n subj, start_date=\"2023-10-14\", end_date=\"2025-12-31\", label=f\"{label} wurde gegr\u00fcndet\", label_lang=\"de\"\n)\nresult = g.serialize(format=\"ttl\")\n```\nreturns\n```ttl\n@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n<https://wienerschnitzler.org> ns1:P95i_was_formed_by <https://wienerschnitzler.org/formation-event> .\n\n<https://wienerschnitzler.org/dissolution-event> a ns1:E68_Dissolution ;\n rdfs:label \"Institution wurde aufgel\u00f6st\"@de ;\n ns1:P4_has_time-span <https://wienerschnitzler.org/dissolution-event/dissolution-time-span> .\n\n<https://wienerschnitzler.org/dissolution-event/dissolution-time-span> a ns1:E52_Time-Span ;\n rdfs:label \"2025-12-31\"^^xsd:string ;\n ns1:P82a_begin_of_the_begin \"2025-12-31\"^^xsd:date ;\n ns1:P82b_end_of_the_end \"2025-12-31\"^^xsd:date .\n\n<https://wienerschnitzler.org/formation-event> a ns1:E66_Formation ;\n rdfs:label \"Wiener Moderne Verein wurde gegr\u00fcndet\"@de ;\n ns1:P4_has_time-span <https://wienerschnitzler.org/formation-event/formation-time-span> .\n\n<https://wienerschnitzler.org/formation-event/formation-time-span> a ns1:E52_Time-Span ;\n rdfs:label \"2023-10-14\"^^xsd:string ;\n ns1:P82a_begin_of_the_begin \"2023-10-14\"^^xsd:date ;\n ns1:P82b_end_of_the_end \"2023-10-14\"^^xsd:date .\n```\n\n### normalize_string\n\n```python\nfrom acdh_cidoc_pyutils import normalize_string\nstring = \"\"\"\\n\\nhallo\nmein schatz ich liebe dich\n du bist die einzige f\u00fcr mich\n \"\"\"\nprint(normalize_string(string))\n# returns\n# hallo mein schatz ich liebe dich du bist die einzige f\u00fcr mich\n```\n\n### extract date attributes (begin, end)\n\nexpects typical TEI date attributes like `@when, @when-iso, @notBefore, @notAfter, @from, @to, ...` and returns a tuple containg start- and enddate values. If only `@when or @when-iso` or only `@notBefore or @notAfter` are provided, the returned values are the same, unless the default parameter `fill_missing` is set to `False`. \n\n```python\nfrom lxml.etree import Element\nfrom acdh_cidoc_pyutils import extract_begin_end\n\ndate_string = \"1900-12-12\"\ndate_object = Element(\"{http://www.tei-c.org/ns/1.0}tei\")\ndate_object.attrib[\"when-iso\"] = date_string\nprint(extract_begin_end(date_object))\n\n# returns\n# ('1900-12-12', '1900-12-12')\n\ndate_string = \"1900-12-12\"\ndate_object = Element(\"{http://www.tei-c.org/ns/1.0}tei\")\ndate_object.attrib[\"when-iso\"] = date_string\nprint(extract_begin_end(date_object, fill_missing=False))\n\n# returns\n# ('1900-12-12', None)\n\ndate_object = Element(\"{http://www.tei-c.org/ns/1.0}tei\")\ndate_object.attrib[\"notAfter\"] = \"1900-12-12\"\ndate_object.attrib[\"notBefore\"] = \"1800\"\nprint(extract_begin_end(date_object))\n\n# returns\n# ('1800', '1900-12-12')\n```\n\n### Convert a TEI document into an RDF graph representing a CIDOC CRM F24 Publication Expression.\n\n```python\nfrom acdh_cidoc_pyutils import teidoc_as_f24_publication_expression\n\nfile_path = \"L02643.xml\"\ndomain = \"https://schnitzler-briefe.acdh.oeaw.ac.at\"\n\nuri, g, mentions = teidoc_as_f24_publication_expression(\n file_path, domain, \".//tei:titleStmt/tei:title[@level='a']\"\n)\ng.serialize(file_name.replace(\".xml\", \".ttl\"))\n```\nreturns \n```ttl\n@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n\n<https://schnitzler-briefe.acdh.oeaw.ac.at/L02643.xml> a <http://iflastandards.info/ns/fr/frbr/frbroo/F24_Publication_Expression> ;\n rdfs:label \"Paul Goldmann an Arthur Schnitzler, 6. 8. 1889\"@de ;\n ns1:P1_is_identified_by <https://schnitzler-briefe.acdh.oeaw.ac.at/L02643.xml/appellation> ;\n ns1:P67_refers_to <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb11485>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb12698>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb169237>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb2121>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb213>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb29698>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb50>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb52510>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb53101>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb53104>,\n <https://schnitzler-briefe.acdh.oeaw.ac.at/#pmb88392> .\n\n<https://pfp-schema.acdh.oeaw.ac.at/types/tei-document> a ns1:E55_Type ;\n rdfs:label \"A TEI/XML encoded text\"@en .\n\n<https://schnitzler-briefe.acdh.oeaw.ac.at/L02643.xml/appellation> a ns1:E33_E41_Linguistic_Appellation ;\n rdfs:label \"Paul Goldmann an Arthur Schnitzler, 6. 8. 1889\"@de ;\n ns1:P2_has_type <https://pfp-schema.acdh.oeaw.ac.at/types/tei-document> .\n```\n\n## development\n\n* `pip install -r requirements_dev.txt`\n* `flake8` -> linting\n* `coverage run -m pytest` -> runs tests and creates coverage stats\n",
"bugtrack_url": null,
"license": "MIT license",
"summary": "Helper functions for the generation of CIDOC CRMish RDF (from XML/TEI data)",
"version": "1.6.2",
"project_urls": {
"Homepage": "https://github.com/acdh-oeaw/acdh-cidoc-pyutils"
},
"split_keywords": [],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "c20f8b3738900560025db1bdee0a87929ef277dbddf1165779f74e56779e7fc3",
"md5": "a07aae3ff300b821de64adba0a9113d8",
"sha256": "1f3b3f73bddd52b86430c1ae210727a50004cebdab46934e69d47de801c99eea"
},
"downloads": -1,
"filename": "acdh_cidoc_pyutils-1.6.2-py3-none-any.whl",
"has_sig": false,
"md5_digest": "a07aae3ff300b821de64adba0a9113d8",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.8",
"size": 14239,
"upload_time": "2025-01-31T12:31:51",
"upload_time_iso_8601": "2025-01-31T12:31:51.029741Z",
"url": "https://files.pythonhosted.org/packages/c2/0f/8b3738900560025db1bdee0a87929ef277dbddf1165779f74e56779e7fc3/acdh_cidoc_pyutils-1.6.2-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "4469d74e3312f701e241393d6c0546faf51e7a3d31a636b6bc8f45d9c53dec99",
"md5": "6c5bf5969dbfef0f2a85fcddc0ca89d4",
"sha256": "ae314d67593b1b3be8df652e8d5f1909c1eb9f379c3ef336e38331fbf0c26fae"
},
"downloads": -1,
"filename": "acdh_cidoc_pyutils-1.6.2.tar.gz",
"has_sig": false,
"md5_digest": "6c5bf5969dbfef0f2a85fcddc0ca89d4",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.8",
"size": 17982,
"upload_time": "2025-01-31T12:31:53",
"upload_time_iso_8601": "2025-01-31T12:31:53.116659Z",
"url": "https://files.pythonhosted.org/packages/44/69/d74e3312f701e241393d6c0546faf51e7a3d31a636b6bc8f45d9c53dec99/acdh_cidoc_pyutils-1.6.2.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-01-31 12:31:53",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "acdh-oeaw",
"github_project": "acdh-cidoc-pyutils",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "acdh-cidoc-pyutils"
}