-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_rdf.py
83 lines (71 loc) · 1.96 KB
/
make_rdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
from tqdm import tqdm
from acdh_cidoc_pyutils import (
make_e42_identifiers,
make_appellations,
make_birth_death_entities,
)
from acdh_cidoc_pyutils.namespaces import CIDOC
from acdh_tei_pyutils.tei import TeiReader
from acdh_tei_pyutils.utils import get_xmlid
from rdflib import Graph, Namespace, URIRef
from rdflib.namespace import RDF
g = Graph()
domain = "https://pmb.acdh.oeaw.ac.at/"
PU = Namespace(domain)
if os.environ.get("NO_LIMIT"):
LIMIT = False
print("no limit")
else:
LIMIT = False
rdf_dir = "./rdf"
os.makedirs(rdf_dir, exist_ok=True)
index_file = "./to_ingest/listperson.xml"
entity_type = "person"
doc = TeiReader(index_file)
items = doc.any_xpath(f".//tei:{entity_type}[@xml:id]")
if LIMIT:
items = items[:LIMIT]
for x in tqdm(items, total=len(items)):
xml_id = get_xmlid(x)
item_id = f"{PU}{xml_id}"
subj = URIRef(item_id)
g.add((subj, RDF.type, CIDOC["E21_Person"]))
# ids
g += make_e42_identifiers(
subj,
x,
type_domain="http://hansi/4/ever",
default_lang="de",
)
# names
g += make_appellations(
subj, x, type_domain="http://hansi/4/ever", default_lang="de"
)
# birth
event_graph, birth_uri, birth_timestamp = make_birth_death_entities(
subj,
x,
f"{PU}place__",
event_type="birth",
verbose=True,
default_prefix="Geburt von",
date_node_xpath="/tei:date[1]",
place_id_xpath="//tei:settlement[1]/@key",
)
g += event_graph
# death
event_graph, birth_uri, birth_timestamp = make_birth_death_entities(
subj,
x,
f"{PU}place__",
event_type="death",
verbose=True,
default_prefix="Tod von",
date_node_xpath="/tei:date[1]",
place_id_xpath="//tei:settlement[1]/@key",
)
g += event_graph
save_path = os.path.join(rdf_dir, f"{entity_type}.ttl")
print(f"saving graph as {save_path}")
g.serialize(save_path)