Bottler API
Overview
The gkc.bottler module provides canonical Wikibase JSON construction primitives. This is where distilled and validated data is transformed into the precise structure required by Wikidata: labels, descriptions, aliases, claims with qualifiers, references, and related item components.
Key principle: All code that produces Wikibase JSON structures should use bottler primitives, not build JSON inline. This ensures determinism, consistency, and maintainability across the entire pipeline.
The module provides:
- DataTypeTransformer: Static methods to convert source data values to Wikibase datatypes (items, quantities, times, etc.)
- SnakBuilder: Construct individual snaks (the property-value pairs that form claims)
- ClaimBuilder: Build complete claim structures with qualifiers and references
- LanguageBuilder: Construct multilingual label/description/alias blocks
- EntityShellBuilder: Build blank Wikibase entity scaffolds from profile metadata
- Utility functions: Helpers for value normalization and claim construction
Quick Start: Build a Complete Entity Shell
from gkc.bottler import EntityShellBuilder
metadata = {
"labels": {"en": "Example Item", "de": "Beispiel Artikel"},
"descriptions": {"en": "An example Wikidata item"},
"aliases": {"en": ["Example", "Test Item"]},
"statement_pids": ["P31", "P17", "P625"],
}
builder = EntityShellBuilder()
shell = builder.build_entity_shell(metadata)
# Result is a valid Wikibase entity structure ready for use in packets
print(shell)
# Output:
# {
# "labels": {"en": {"value": "Example Item", "language": "en"}, ...},
# "descriptions": {...},
# "aliases": {...},
# "claims": {"P17": [], "P31": [], "P625": []} # sorted deterministically
# }
Complete Example: Build a Statement with Qualifier and Reference
from gkc.bottler import DataTypeTransformer, SnakBuilder, ClaimBuilder
# 1. Create transformer
transformer = DataTypeTransformer()
# 2. Create snak builder
snak_builder = SnakBuilder(transformer)
# 3. Create claim builder
claim_builder = ClaimBuilder(snak_builder)
# 4. Build a claim with qualifiers and references
claim = claim_builder.create_claim(
property_id="P31", # instance of
value="Q5", # human
datatype="wikibase-item",
qualifiers=[
{
"property": "P585", # point in time
"value": "2005-06-15",
"datatype": "time",
}
],
references=[
{
"P248": {"value": "Q5", "datatype": "wikibase-item"}, # stated in
"P813": {"value": "2005-06-15", "datatype": "time"}, # retrieved
}
],
rank="preferred",
)
print(claim)
# Returns a complete Wikibase statement structure
quantity_snak = builder.create_snak("P1082", 1000, "quantity", {"unit": "1"})
time_snak = builder.create_snak("P571", "2005-01-15", "time")
text_snak = builder.create_snak(
"P1476",
"Sample title",
"monolingualtext",
{"language": "en"},
)
coord_snak = builder.create_snak("P625", {"lat": 51.5, "lon": -0.12}, "globe-coordinate")
url_snak = builder.create_snak("P856", "https://example.org", "url")
string_snak = builder.create_snak("P1477", "Example string", "string")
print(item_snak)
print(quantity_snak)
print(time_snak)
print(text_snak)
print(coord_snak)
print(url_snak)
print(string_snak)
### `ClaimBuilder.create_claim()`
```python
from gkc.bottler import DataTypeTransformer, SnakBuilder, ClaimBuilder
claim_builder = ClaimBuilder(SnakBuilder(DataTypeTransformer()))
claim = claim_builder.create_claim(
property_id="P31",
value="Q5",
datatype="wikibase-item",
qualifiers=[
{"property": "P580", "value": "2005-01-15", "datatype": "time"},
],
references=[
{
"P248": {"value": "Q123", "datatype": "wikibase-item"},
"P854": {"value": "https://example.org", "datatype": "url"},
}
],
rank="normal",
)
print(claim)
Distillate.__init__() and Distillate.from_file()
import json
import tempfile
from pathlib import Path
from gkc.bottler import Distillate
config = {
"reference_library": {
"official_source": [
{"property": "P248", "value": "Q123", "datatype": "wikibase-item"}
]
},
"qualifier_library": {
"start_date": [
{"property": "P580", "value": "2005-01-15", "datatype": "time"}
]
},
"mappings": {
"claims": [
{
"property": "P31",
"references": [
{"name": "inline_ref", "property": "P248", "value": "Q123", "datatype": "wikibase-item"}
],
"qualifiers": [
{"name": "inline_qual", "property": "P580", "value": "2005-01-15", "datatype": "time"}
],
}
]
},
}
# Direct initialization
bottler = Distillate(config)
print(sorted(bottler.reference_library.keys()))
print(sorted(bottler.qualifier_library.keys()))
# File-based initialization
with tempfile.TemporaryDirectory() as tmpdir:
path = Path(tmpdir) / "distillate.json"
path.write_text(json.dumps(config), encoding="utf-8")
loaded = Distillate.from_file(str(path))
print(isinstance(loaded, Distillate))
API Reference (mkdocstrings)
Transforms source data values to Wikidata datavalue structures.
Source code in gkc/bottler.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192 | class DataTypeTransformer:
"""Transforms source data values to Wikidata datavalue structures."""
@staticmethod
def to_wikibase_item(qid: str) -> dict:
"""Convert a QID string to wikibase-entityid datavalue."""
return DataTypeTransformer.to_wikibase_entity_reference(
qid,
entity_type="item",
)
@staticmethod
def to_wikibase_entity_reference(
entity_id: str,
*,
entity_type: Optional[str] = None,
) -> dict:
"""Convert a resolved or symbolic entity identifier to a datavalue.
Real Wikibase IDs such as ``Q5`` and ``P31`` include ``numeric-id``.
Symbolic pre-resolution identifiers such as ``_instance_of`` omit it.
"""
normalized_entity_id = str(entity_id).strip()
if not normalized_entity_id:
raise ValueError("entity_id must be a non-empty string")
inferred_entity_type = entity_type
numeric_id: int | None = None
if normalized_entity_id.startswith("Q") and normalized_entity_id[1:].isdigit():
inferred_entity_type = inferred_entity_type or "item"
numeric_id = int(normalized_entity_id[1:])
elif (
normalized_entity_id.startswith("P") and normalized_entity_id[1:].isdigit()
):
inferred_entity_type = inferred_entity_type or "property"
numeric_id = int(normalized_entity_id[1:])
if inferred_entity_type not in {"item", "property"}:
raise ValueError(
"entity_type must be 'item' or 'property' for symbolic entity references"
)
value = {
"entity-type": inferred_entity_type,
"id": normalized_entity_id,
}
if numeric_id is not None:
value["numeric-id"] = numeric_id
return {
"value": value,
"type": "wikibase-entityid",
}
@staticmethod
def to_quantity(value: Union[float, int], unit: str = "1") -> dict:
"""Convert a number to quantity datavalue."""
return {
"value": {"amount": f"+{value}", "unit": unit},
"type": "quantity",
}
@staticmethod
def to_time(
date_input: Union[str, int],
precision: Optional[int] = None,
calendar: str = "Q1985727",
) -> dict:
"""Convert date input to Wikidata time datavalue.
Args:
date_input: Year (2005), partial date (2005-01),
or full ISO date (2005-01-15)
precision: Explicit precision (9=year, 10=month, 11=day)
or None to auto-detect
calendar: Calendar model QID (default: Q1985727 = Gregorian)
Returns:
Wikidata time datavalue structure
"""
# Convert int to string
date_str = str(date_input).strip()
# Parse the date and determine precision
if precision is None:
# Auto-detect precision from format
if "-" not in date_str:
# Just a year: 2005
precision = 9
time_str = f"+{date_str.zfill(4)}-00-00T00:00:00Z"
else:
parts = date_str.split("-")
if len(parts) == 2:
# Year-month: 2005-01
precision = 10
year, month = parts
time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
elif len(parts) == 3:
# Full date: 2005-01-15
precision = 11
year, month, day = parts
# Handle time portion if present
if "T" in day:
day = day.split("T")[0]
time_str = (
f"+{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}T00:00:00Z"
)
else:
# Fallback for unexpected format
precision = 11
time_str = (
f"+{date_str}T00:00:00Z"
if "T" not in date_str
else f"+{date_str}"
)
else:
# Use explicit precision
if precision == 9:
# Year precision: use -00-00
year = date_str.split("-")[0]
time_str = f"+{year.zfill(4)}-00-00T00:00:00Z"
elif precision == 10:
# Month precision: use -00 for day
parts = date_str.split("-")
year = parts[0]
month = parts[1] if len(parts) > 1 else "01"
time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
else:
# Day precision (11) or other
if "T" not in date_str:
time_str = f"+{date_str}T00:00:00Z"
else:
time_str = (
f"+{date_str}" if date_str.startswith("+") else f"+{date_str}"
)
return {
"value": {
"time": time_str,
"timezone": 0,
"before": 0,
"after": 0,
"precision": precision,
"calendarmodel": f"http://www.wikidata.org/entity/{calendar}",
},
"type": "time",
}
@staticmethod
def to_monolingualtext(text: str, language: str) -> dict:
"""Convert text to monolingualtext datavalue."""
return {
"value": {"text": text, "language": language},
"type": "monolingualtext",
}
@staticmethod
def to_globe_coordinate(lat: float, lon: float, precision: float = 0.0001) -> dict:
"""Convert latitude/longitude to globe-coordinate datavalue."""
return {
"value": {
"latitude": lat,
"longitude": lon,
"precision": precision,
"globe": "http://www.wikidata.org/entity/Q2",
},
"type": "globecoordinate",
}
@staticmethod
def to_url(url: str) -> dict:
"""Convert URL string to url datavalue."""
return {"value": url, "type": "string"}
|
Convert latitude/longitude to globe-coordinate datavalue.
Source code in gkc/bottler.py
176
177
178
179
180
181
182
183
184
185
186
187 | @staticmethod
def to_globe_coordinate(lat: float, lon: float, precision: float = 0.0001) -> dict:
"""Convert latitude/longitude to globe-coordinate datavalue."""
return {
"value": {
"latitude": lat,
"longitude": lon,
"precision": precision,
"globe": "http://www.wikidata.org/entity/Q2",
},
"type": "globecoordinate",
}
|
to_monolingualtext(text, language)
staticmethod
Convert text to monolingualtext datavalue.
Source code in gkc/bottler.py
168
169
170
171
172
173
174 | @staticmethod
def to_monolingualtext(text: str, language: str) -> dict:
"""Convert text to monolingualtext datavalue."""
return {
"value": {"text": text, "language": language},
"type": "monolingualtext",
}
|
Convert a number to quantity datavalue.
Source code in gkc/bottler.py
| @staticmethod
def to_quantity(value: Union[float, int], unit: str = "1") -> dict:
"""Convert a number to quantity datavalue."""
return {
"value": {"amount": f"+{value}", "unit": unit},
"type": "quantity",
}
|
Convert date input to Wikidata time datavalue.
Parameters:
| Name |
Type |
Description |
Default |
date_input
|
Union[str, int]
|
Year (2005), partial date (2005-01),
or full ISO date (2005-01-15)
|
required
|
precision
|
Optional[int]
|
Explicit precision (9=year, 10=month, 11=day)
or None to auto-detect
|
None
|
calendar
|
str
|
Calendar model QID (default: Q1985727 = Gregorian)
|
'Q1985727'
|
Returns:
| Type |
Description |
dict
|
Wikidata time datavalue structure
|
Source code in gkc/bottler.py
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166 | @staticmethod
def to_time(
date_input: Union[str, int],
precision: Optional[int] = None,
calendar: str = "Q1985727",
) -> dict:
"""Convert date input to Wikidata time datavalue.
Args:
date_input: Year (2005), partial date (2005-01),
or full ISO date (2005-01-15)
precision: Explicit precision (9=year, 10=month, 11=day)
or None to auto-detect
calendar: Calendar model QID (default: Q1985727 = Gregorian)
Returns:
Wikidata time datavalue structure
"""
# Convert int to string
date_str = str(date_input).strip()
# Parse the date and determine precision
if precision is None:
# Auto-detect precision from format
if "-" not in date_str:
# Just a year: 2005
precision = 9
time_str = f"+{date_str.zfill(4)}-00-00T00:00:00Z"
else:
parts = date_str.split("-")
if len(parts) == 2:
# Year-month: 2005-01
precision = 10
year, month = parts
time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
elif len(parts) == 3:
# Full date: 2005-01-15
precision = 11
year, month, day = parts
# Handle time portion if present
if "T" in day:
day = day.split("T")[0]
time_str = (
f"+{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}T00:00:00Z"
)
else:
# Fallback for unexpected format
precision = 11
time_str = (
f"+{date_str}T00:00:00Z"
if "T" not in date_str
else f"+{date_str}"
)
else:
# Use explicit precision
if precision == 9:
# Year precision: use -00-00
year = date_str.split("-")[0]
time_str = f"+{year.zfill(4)}-00-00T00:00:00Z"
elif precision == 10:
# Month precision: use -00 for day
parts = date_str.split("-")
year = parts[0]
month = parts[1] if len(parts) > 1 else "01"
time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
else:
# Day precision (11) or other
if "T" not in date_str:
time_str = f"+{date_str}T00:00:00Z"
else:
time_str = (
f"+{date_str}" if date_str.startswith("+") else f"+{date_str}"
)
return {
"value": {
"time": time_str,
"timezone": 0,
"before": 0,
"after": 0,
"precision": precision,
"calendarmodel": f"http://www.wikidata.org/entity/{calendar}",
},
"type": "time",
}
|
Convert URL string to url datavalue.
Source code in gkc/bottler.py
| @staticmethod
def to_url(url: str) -> dict:
"""Convert URL string to url datavalue."""
return {"value": url, "type": "string"}
|
Convert a resolved or symbolic entity identifier to a datavalue.
Real Wikibase IDs such as Q5 and P31 include numeric-id.
Symbolic pre-resolution identifiers such as _instance_of omit it.
Source code in gkc/bottler.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 | @staticmethod
def to_wikibase_entity_reference(
entity_id: str,
*,
entity_type: Optional[str] = None,
) -> dict:
"""Convert a resolved or symbolic entity identifier to a datavalue.
Real Wikibase IDs such as ``Q5`` and ``P31`` include ``numeric-id``.
Symbolic pre-resolution identifiers such as ``_instance_of`` omit it.
"""
normalized_entity_id = str(entity_id).strip()
if not normalized_entity_id:
raise ValueError("entity_id must be a non-empty string")
inferred_entity_type = entity_type
numeric_id: int | None = None
if normalized_entity_id.startswith("Q") and normalized_entity_id[1:].isdigit():
inferred_entity_type = inferred_entity_type or "item"
numeric_id = int(normalized_entity_id[1:])
elif (
normalized_entity_id.startswith("P") and normalized_entity_id[1:].isdigit()
):
inferred_entity_type = inferred_entity_type or "property"
numeric_id = int(normalized_entity_id[1:])
if inferred_entity_type not in {"item", "property"}:
raise ValueError(
"entity_type must be 'item' or 'property' for symbolic entity references"
)
value = {
"entity-type": inferred_entity_type,
"id": normalized_entity_id,
}
if numeric_id is not None:
value["numeric-id"] = numeric_id
return {
"value": value,
"type": "wikibase-entityid",
}
|
Convert a QID string to wikibase-entityid datavalue.
Source code in gkc/bottler.py
| @staticmethod
def to_wikibase_item(qid: str) -> dict:
"""Convert a QID string to wikibase-entityid datavalue."""
return DataTypeTransformer.to_wikibase_entity_reference(
qid,
entity_type="item",
)
|
SnakBuilder
Builds snak structures (the building blocks of claims).
Source code in gkc/bottler.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244 | class SnakBuilder:
"""Builds snak structures (the building blocks of claims)."""
def __init__(self, transformer: DataTypeTransformer):
self.transformer = transformer
def create_snak(
self, property_id: str, value: Any, datatype: str, transform_config: dict = None
) -> dict:
"""Create a snak with the appropriate datavalue."""
canonical_datatype = canonicalize_wikibase_datatype(datatype)
# Apply transformations based on datatype
if canonical_datatype == "wikibase-item":
datavalue = self.transformer.to_wikibase_item(value)
elif canonical_datatype == "quantity":
unit = transform_config.get("unit", "1") if transform_config else "1"
datavalue = self.transformer.to_quantity(value, unit)
elif canonical_datatype == "time":
# Get precision from transform_config or auto-detect
precision = None
if transform_config:
precision = transform_config.get("precision")
datavalue = self.transformer.to_time(value, precision)
elif canonical_datatype == "monolingualtext":
language = (
transform_config.get("language", "en") if transform_config else "en"
)
datavalue = self.transformer.to_monolingualtext(value, language)
elif canonical_datatype == "globe-coordinate":
datavalue = self.transformer.to_globe_coordinate(value["lat"], value["lon"])
elif canonical_datatype == "url":
datavalue = self.transformer.to_url(value)
else:
spec = get_wikibase_datatype_spec(canonical_datatype)
datavalue = {"value": value, "type": spec.datavalue_type}
return {
"snaktype": "value",
"property": property_id,
"datavalue": datavalue,
}
def create_snak_from_datavalue(self, property_id: str, datavalue: dict) -> dict:
"""Create a snak from a prebuilt datavalue structure."""
return {
"snaktype": "value",
"property": property_id,
"datavalue": datavalue,
}
|
create_snak(property_id, value, datatype, transform_config=None)
Create a snak with the appropriate datavalue.
Source code in gkc/bottler.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236 | def create_snak(
self, property_id: str, value: Any, datatype: str, transform_config: dict = None
) -> dict:
"""Create a snak with the appropriate datavalue."""
canonical_datatype = canonicalize_wikibase_datatype(datatype)
# Apply transformations based on datatype
if canonical_datatype == "wikibase-item":
datavalue = self.transformer.to_wikibase_item(value)
elif canonical_datatype == "quantity":
unit = transform_config.get("unit", "1") if transform_config else "1"
datavalue = self.transformer.to_quantity(value, unit)
elif canonical_datatype == "time":
# Get precision from transform_config or auto-detect
precision = None
if transform_config:
precision = transform_config.get("precision")
datavalue = self.transformer.to_time(value, precision)
elif canonical_datatype == "monolingualtext":
language = (
transform_config.get("language", "en") if transform_config else "en"
)
datavalue = self.transformer.to_monolingualtext(value, language)
elif canonical_datatype == "globe-coordinate":
datavalue = self.transformer.to_globe_coordinate(value["lat"], value["lon"])
elif canonical_datatype == "url":
datavalue = self.transformer.to_url(value)
else:
spec = get_wikibase_datatype_spec(canonical_datatype)
datavalue = {"value": value, "type": spec.datavalue_type}
return {
"snaktype": "value",
"property": property_id,
"datavalue": datavalue,
}
|
create_snak_from_datavalue(property_id, datavalue)
Create a snak from a prebuilt datavalue structure.
Source code in gkc/bottler.py
238
239
240
241
242
243
244 | def create_snak_from_datavalue(self, property_id: str, datavalue: dict) -> dict:
"""Create a snak from a prebuilt datavalue structure."""
return {
"snaktype": "value",
"property": property_id,
"datavalue": datavalue,
}
|
ClaimBuilder
Builds complete claim structures with qualifiers and references.
Source code in gkc/bottler.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346 | class ClaimBuilder:
"""Builds complete claim structures with qualifiers and references."""
def __init__(self, snak_builder: SnakBuilder):
self.snak_builder = snak_builder
def create_claim(
self,
property_id: str,
value: Any,
datatype: str,
transform_config: dict = None,
qualifiers: list[dict] = None,
references: list[dict] = None,
rank: str = "normal",
) -> dict:
"""Create a complete claim structure."""
claim = {
"mainsnak": self.snak_builder.create_snak(
property_id, value, datatype, transform_config
),
"type": "statement",
"rank": rank,
}
return self._attach_claim_metadata(
claim,
qualifiers=qualifiers,
references=references,
)
def create_claim_from_datavalue(
self,
property_id: str,
datavalue: dict,
*,
qualifiers: list[dict] = None,
references: list[dict] = None,
rank: str = "normal",
) -> dict:
"""Create a complete claim structure from a prebuilt datavalue."""
claim = {
"mainsnak": self.snak_builder.create_snak_from_datavalue(
property_id,
datavalue,
),
"type": "statement",
"rank": rank,
}
return self._attach_claim_metadata(
claim,
qualifiers=qualifiers,
references=references,
)
def _attach_claim_metadata(
self,
claim: dict,
*,
qualifiers: list[dict] = None,
references: list[dict] = None,
) -> dict:
"""Attach qualifier and reference groups to an existing claim."""
# Add qualifiers if provided
if qualifiers:
claim["qualifiers"] = {}
claim["qualifiers-order"] = []
for qual in qualifiers:
qual_prop = qual["property"]
qual_snak = self.snak_builder.create_snak(
qual_prop,
qual["value"],
qual["datatype"],
qual.get("transform"),
)
claim["qualifiers"][qual_prop] = [qual_snak]
claim["qualifiers-order"].append(qual_prop)
# Add references if provided
if references:
claim["references"] = []
for ref_group in references:
ref_snaks = {}
ref_order = []
for ref_prop, ref_config in ref_group.items():
ref_snak = self.snak_builder.create_snak(
ref_prop,
ref_config["value"],
ref_config.get("datatype", "wikibase-item"),
ref_config.get("transform"),
)
ref_snaks[ref_prop] = [ref_snak]
ref_order.append(ref_prop)
claim["references"].append(
{"snaks": ref_snaks, "snaks-order": ref_order}
)
return claim
|
create_claim(property_id, value, datatype, transform_config=None, qualifiers=None, references=None, rank='normal')
Create a complete claim structure.
Source code in gkc/bottler.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276 | def create_claim(
self,
property_id: str,
value: Any,
datatype: str,
transform_config: dict = None,
qualifiers: list[dict] = None,
references: list[dict] = None,
rank: str = "normal",
) -> dict:
"""Create a complete claim structure."""
claim = {
"mainsnak": self.snak_builder.create_snak(
property_id, value, datatype, transform_config
),
"type": "statement",
"rank": rank,
}
return self._attach_claim_metadata(
claim,
qualifiers=qualifiers,
references=references,
)
|
create_claim_from_datavalue(property_id, datavalue, *, qualifiers=None, references=None, rank='normal')
Create a complete claim structure from a prebuilt datavalue.
Source code in gkc/bottler.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301 | def create_claim_from_datavalue(
self,
property_id: str,
datavalue: dict,
*,
qualifiers: list[dict] = None,
references: list[dict] = None,
rank: str = "normal",
) -> dict:
"""Create a complete claim structure from a prebuilt datavalue."""
claim = {
"mainsnak": self.snak_builder.create_snak_from_datavalue(
property_id,
datavalue,
),
"type": "statement",
"rank": rank,
}
return self._attach_claim_metadata(
claim,
qualifiers=qualifiers,
references=references,
)
|
LanguageBuilder
Builds Wikibase language-keyed structures for labels, descriptions, and aliases.
Source code in gkc/bottler.py
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423 | class LanguageBuilder:
"""Builds Wikibase language-keyed structures for labels, descriptions, and aliases."""
@staticmethod
def build_label_block(language_dict: dict[str, str]) -> dict[str, dict[str, str]]:
"""Build a Wikibase labels block from language-to-value mapping.
Args:
language_dict: Dictionary mapping language codes to label text
(e.g., {"en": "Example", "mul": "Beispiel"})
Returns:
Wikibase labels structure: {lang: {value: text, language: lang}}
"""
if not isinstance(language_dict, dict):
return {}
labels: dict[str, dict[str, str]] = {}
for lang, value in sorted(language_dict.items()):
if isinstance(lang, str) and lang and isinstance(value, str) and value:
labels[lang] = {"value": value, "language": lang}
return labels
@staticmethod
def build_description_block(
language_dict: dict[str, str],
) -> dict[str, dict[str, str]]:
"""Build a Wikibase descriptions block from language-to-value mapping.
Args:
language_dict: Dictionary mapping language codes to description text
Returns:
Wikibase descriptions structure: {lang: {value: text, language: lang}}
"""
if not isinstance(language_dict, dict):
return {}
descriptions: dict[str, dict[str, str]] = {}
for lang, value in sorted(language_dict.items()):
if isinstance(lang, str) and lang and isinstance(value, str) and value:
descriptions[lang] = {"value": value, "language": lang}
return descriptions
@staticmethod
def build_alias_block(
language_dict: dict[str, Union[str, list[str]]],
) -> dict[str, list[dict[str, str]]]:
"""Build a Wikibase aliases block from language-to-values mapping.
Args:
language_dict: Dictionary mapping language codes to alias text(s)
(e.g., {"en": ["Alias1", "Alias2"]})
Returns:
Wikibase aliases structure: {lang: [{value: text, language: lang}, ...]}
"""
if not isinstance(language_dict, dict):
return {}
aliases: dict[str, list[dict[str, str]]] = {}
for lang, values in sorted(language_dict.items()):
if not isinstance(lang, str) or not lang:
continue
alias_list: list[str] = []
if isinstance(values, str) and values:
alias_list = [values]
elif isinstance(values, list):
alias_list = [v for v in values if isinstance(v, str) and v]
if alias_list:
aliases[lang] = [{"value": v, "language": lang} for v in alias_list]
return aliases
|
build_alias_block(language_dict)
staticmethod
Build a Wikibase aliases block from language-to-values mapping.
Parameters:
| Name |
Type |
Description |
Default |
language_dict
|
dict[str, Union[str, list[str]]]
|
Dictionary mapping language codes to alias text(s)
(e.g., {"en": ["Alias1", "Alias2"]})
|
required
|
Returns:
| Type |
Description |
dict[str, list[dict[str, str]]]
|
Wikibase aliases structure: {lang: [{value: text, language: lang}, ...]}
|
Source code in gkc/bottler.py
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423 | @staticmethod
def build_alias_block(
language_dict: dict[str, Union[str, list[str]]],
) -> dict[str, list[dict[str, str]]]:
"""Build a Wikibase aliases block from language-to-values mapping.
Args:
language_dict: Dictionary mapping language codes to alias text(s)
(e.g., {"en": ["Alias1", "Alias2"]})
Returns:
Wikibase aliases structure: {lang: [{value: text, language: lang}, ...]}
"""
if not isinstance(language_dict, dict):
return {}
aliases: dict[str, list[dict[str, str]]] = {}
for lang, values in sorted(language_dict.items()):
if not isinstance(lang, str) or not lang:
continue
alias_list: list[str] = []
if isinstance(values, str) and values:
alias_list = [values]
elif isinstance(values, list):
alias_list = [v for v in values if isinstance(v, str) and v]
if alias_list:
aliases[lang] = [{"value": v, "language": lang} for v in alias_list]
return aliases
|
build_description_block(language_dict)
staticmethod
Build a Wikibase descriptions block from language-to-value mapping.
Parameters:
| Name |
Type |
Description |
Default |
language_dict
|
dict[str, str]
|
Dictionary mapping language codes to description text
|
required
|
Returns:
| Type |
Description |
dict[str, dict[str, str]]
|
Wikibase descriptions structure: {lang: {value: text, language: lang}}
|
Source code in gkc/bottler.py
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391 | @staticmethod
def build_description_block(
language_dict: dict[str, str],
) -> dict[str, dict[str, str]]:
"""Build a Wikibase descriptions block from language-to-value mapping.
Args:
language_dict: Dictionary mapping language codes to description text
Returns:
Wikibase descriptions structure: {lang: {value: text, language: lang}}
"""
if not isinstance(language_dict, dict):
return {}
descriptions: dict[str, dict[str, str]] = {}
for lang, value in sorted(language_dict.items()):
if isinstance(lang, str) and lang and isinstance(value, str) and value:
descriptions[lang] = {"value": value, "language": lang}
return descriptions
|
build_label_block(language_dict)
staticmethod
Build a Wikibase labels block from language-to-value mapping.
Parameters:
| Name |
Type |
Description |
Default |
language_dict
|
dict[str, str]
|
Dictionary mapping language codes to label text
(e.g., {"en": "Example", "mul": "Beispiel"})
|
required
|
Returns:
| Type |
Description |
dict[str, dict[str, str]]
|
Wikibase labels structure: {lang: {value: text, language: lang}}
|
Source code in gkc/bottler.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370 | @staticmethod
def build_label_block(language_dict: dict[str, str]) -> dict[str, dict[str, str]]:
"""Build a Wikibase labels block from language-to-value mapping.
Args:
language_dict: Dictionary mapping language codes to label text
(e.g., {"en": "Example", "mul": "Beispiel"})
Returns:
Wikibase labels structure: {lang: {value: text, language: lang}}
"""
if not isinstance(language_dict, dict):
return {}
labels: dict[str, dict[str, str]] = {}
for lang, value in sorted(language_dict.items()):
if isinstance(lang, str) and lang and isinstance(value, str) and value:
labels[lang] = {"value": value, "language": lang}
return labels
|
EntityShellBuilder
Builds Wikibase entity shells from profile metadata.
Source code in gkc/bottler.py
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477 | class EntityShellBuilder:
"""Builds Wikibase entity shells from profile metadata."""
def __init__(self, language_builder: Optional[LanguageBuilder] = None):
self.language_builder = language_builder or LanguageBuilder()
def build_entity_shell(
self,
entity_metadata: dict[str, Any],
) -> dict[str, Any]:
"""Build a blank Wikibase entity shell from profile entity metadata.
Args:
entity_metadata: Dictionary containing:
- labels: {lang: value}
- descriptions: {lang: value}
- aliases: {lang: [values]}
- statement_pids: [list of property IDs to pre-stage]
Returns:
Wikibase entity structure with labels/descriptions/aliases + empty claims
"""
labels = self.language_builder.build_label_block(
entity_metadata.get("labels", {})
)
descriptions = self.language_builder.build_description_block(
entity_metadata.get("descriptions", {})
)
aliases = self.language_builder.build_alias_block(
entity_metadata.get("aliases", {})
)
# Build empty claims structure with deterministic property ordering
statement_pids = entity_metadata.get("statement_pids", [])
claims: dict[str, list] = {}
if isinstance(statement_pids, list):
for pid in sorted(statement_pids):
if isinstance(pid, str) and pid:
claims[pid] = []
entity: dict[str, Any] = {}
if labels:
entity["labels"] = labels
if descriptions:
entity["descriptions"] = descriptions
if aliases:
entity["aliases"] = aliases
if claims:
entity["claims"] = claims
return entity
|
build_entity_shell(entity_metadata)
Build a blank Wikibase entity shell from profile entity metadata.
Parameters:
| Name |
Type |
Description |
Default |
entity_metadata
|
dict[str, Any]
|
Dictionary containing:
- labels: {lang: value}
- descriptions: {lang: value}
- aliases: {lang: [values]}
- statement_pids: [list of property IDs to pre-stage]
|
required
|
Returns:
| Type |
Description |
dict[str, Any]
|
Wikibase entity structure with labels/descriptions/aliases + empty claims
|
Source code in gkc/bottler.py
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477 | def build_entity_shell(
self,
entity_metadata: dict[str, Any],
) -> dict[str, Any]:
"""Build a blank Wikibase entity shell from profile entity metadata.
Args:
entity_metadata: Dictionary containing:
- labels: {lang: value}
- descriptions: {lang: value}
- aliases: {lang: [values]}
- statement_pids: [list of property IDs to pre-stage]
Returns:
Wikibase entity structure with labels/descriptions/aliases + empty claims
"""
labels = self.language_builder.build_label_block(
entity_metadata.get("labels", {})
)
descriptions = self.language_builder.build_description_block(
entity_metadata.get("descriptions", {})
)
aliases = self.language_builder.build_alias_block(
entity_metadata.get("aliases", {})
)
# Build empty claims structure with deterministic property ordering
statement_pids = entity_metadata.get("statement_pids", [])
claims: dict[str, list] = {}
if isinstance(statement_pids, list):
for pid in sorted(statement_pids):
if isinstance(pid, str) and pid:
claims[pid] = []
entity: dict[str, Any] = {}
if labels:
entity["labels"] = labels
if descriptions:
entity["descriptions"] = descriptions
if aliases:
entity["aliases"] = aliases
if claims:
entity["claims"] = claims
return entity
|
Utility Functions
normalize_claim_datavalue()
Determine the appropriate Wikibase datatype and formatted value from raw input.
Returns a tuple of (datatype, value) suitable for use in a datavalue block,
or None if the value cannot be transformed.
Supports:
- Entity references (Q###, P###)
- Strings
- Booleans
- Numbers (int, float)
- Dictionaries with id/value fields
Source code in gkc/bottler.py
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523 | def normalize_claim_datavalue(value: Any) -> Optional[tuple[str, Any]]:
"""Determine the appropriate Wikibase datatype and formatted value from raw input.
Returns a tuple of (datatype, value) suitable for use in a datavalue block,
or None if the value cannot be transformed.
Supports:
- Entity references (Q###, P###)
- Strings
- Booleans
- Numbers (int, float)
- Dictionaries with id/value fields
"""
if isinstance(value, str) and validate_entity_reference(value):
entity_id = value.upper()
entity_type = "item" if entity_id.startswith("Q") else "property"
return (
"wikibase-entityid",
{
"entity-type": entity_type,
"id": entity_id,
"numeric-id": int(entity_id[1:]),
},
)
if isinstance(value, str):
return ("string", value)
if isinstance(value, bool):
return ("boolean", value)
if isinstance(value, int):
return ("quantity", {"amount": str(value), "unit": "1"})
if isinstance(value, float):
return ("quantity", {"amount": str(value), "unit": "1"})
if isinstance(value, dict):
if isinstance(value.get("id"), str) and validate_entity_reference(value["id"]):
return normalize_claim_datavalue(value["id"])
if "value" in value:
return normalize_claim_datavalue(value["value"])
return None
|
build_claim_from_property_and_value()
Build a Wikibase statement structure from a property ID and raw value.
This is a convenience function that uses normalize_claim_datavalue to convert
the raw value and builds a complete statement structure.
Parameters:
| Name |
Type |
Description |
Default |
property_id
|
str
|
Wikidata property ID (e.g., "P31")
|
required
|
raw_value
|
Any
|
The value to build into a claim
|
required
|
Returns:
| Type |
Description |
Optional[dict[str, Any]]
|
A Wikibase statement structure or None if the value cannot be transformed
|
Source code in gkc/bottler.py
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557 | def build_claim_from_property_and_value(
property_id: str, raw_value: Any
) -> Optional[dict[str, Any]]:
"""Build a Wikibase statement structure from a property ID and raw value.
This is a convenience function that uses normalize_claim_datavalue to convert
the raw value and builds a complete statement structure.
Args:
property_id: Wikidata property ID (e.g., "P31")
raw_value: The value to build into a claim
Returns:
A Wikibase statement structure or None if the value cannot be transformed
"""
datavalue_info = normalize_claim_datavalue(raw_value)
if datavalue_info is None:
return None
data_type, data_value = datavalue_info
return {
"mainsnak": {
"snaktype": "value",
"property": property_id,
"datavalue": {
"type": data_type,
"value": data_value,
},
},
"type": "statement",
"rank": "normal",
}
|
Distillate
Distillate: Final product of the distillation process.
A Distillate is the fully configured transformer ready to convert source data
into Wikidata claims. It knows how to handle properties, qualifiers, references,
and all the complex datatype transformations needed.
Plain meaning: A fully configured data transformer ready to produce output.
Source code in gkc/bottler.py
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691 | class Distillate:
"""
Distillate: Final product of the distillation process.
A Distillate is the fully configured transformer ready to convert source data
into Wikidata claims. It knows how to handle properties, qualifiers, references,
and all the complex datatype transformations needed.
Plain meaning: A fully configured data transformer ready to produce output.
"""
def __init__(self, mapping_config: dict):
"""Initialize with a transformation recipe configuration."""
self.config = mapping_config
self.transformer = DataTypeTransformer()
self.snak_builder = SnakBuilder(self.transformer)
self.claim_builder = ClaimBuilder(self.snak_builder)
# Load explicit reference and qualifier libraries
self.reference_library = mapping_config.get("reference_library", {}).copy()
self.qualifier_library = mapping_config.get("qualifier_library", {}).copy()
# Extract and merge inline named references/qualifiers from claims
self._extract_inline_named_elements()
@classmethod
def from_file(cls, file_path: str) -> "Distillate":
"""Load distillate configuration from a JSON file."""
import json
with open(file_path) as f:
config = json.load(f)
return cls(config)
def _extract_inline_named_elements(self):
"""
Scan all claims for inline named references and qualifiers.
Merge them into the reference_library and qualifier_library.
Explicit library entries take precedence over inline named elements.
New consistent structure: references/qualifiers use "property" field,
not property-as-key. Named references are defined inline with "name" field.
"""
claims = self.config.get("mappings", {}).get("claims", [])
for claim in claims:
# Extract named references
references = claim.get("references", [])
# Check if this reference array has a name (defines a reusable reference)
named_refs = [
r
for r in references
if isinstance(r, dict) and "name" in r and "property" in r
]
if named_refs:
# Get the name from the first named reference
name = named_refs[0]["name"]
# Don't override explicit library entries
if name not in self.reference_library:
# Store all property objects (without "name" key)
# as the library entry
ref_array = []
for ref in references:
if isinstance(ref, dict) and "property" in ref:
ref_copy = {k: v for k, v in ref.items() if k != "name"}
ref_array.append(ref_copy)
self.reference_library[name] = ref_array
# Extract named qualifiers
qualifiers = claim.get("qualifiers", [])
named_quals = [
q
for q in qualifiers
if isinstance(q, dict) and "name" in q and "property" in q
]
if named_quals:
name = named_quals[0]["name"]
if name not in self.qualifier_library:
qual_array = []
for qual in qualifiers:
if isinstance(qual, dict) and "property" in qual:
qual_copy = {k: v for k, v in qual.items() if k != "name"}
qual_array.append(qual_copy)
self.qualifier_library[name] = qual_array
@staticmethod
def _is_empty_value(value: Any) -> bool:
if value is None:
return True
if isinstance(value, float) and math.isnan(value):
return True
try:
import pandas as pd
except Exception:
pd = None
if pd is not None and pd.isna(value):
return True
try:
nan_check = value != value
except Exception:
nan_check = False
if isinstance(nan_check, bool) and nan_check:
return True
if isinstance(value, str) and not value.strip():
return True
return False
@staticmethod
def _split_values(value: Any, separator: Optional[str] = None) -> list[str]:
values = value if isinstance(value, (list, tuple)) else [value]
result: list[str] = []
for val in values:
if Distillate._is_empty_value(val):
continue
text = val if isinstance(val, str) else str(val)
text = text.strip()
if separator and separator in text:
# Split and filter
parts = [p.strip() for p in text.split(separator)]
result.extend([p for p in parts if p])
else:
result.append(text)
return result
|
__init__(mapping_config)
Initialize with a transformation recipe configuration.
Source code in gkc/bottler.py
571
572
573
574
575
576
577
578
579
580
581
582
583 | def __init__(self, mapping_config: dict):
"""Initialize with a transformation recipe configuration."""
self.config = mapping_config
self.transformer = DataTypeTransformer()
self.snak_builder = SnakBuilder(self.transformer)
self.claim_builder = ClaimBuilder(self.snak_builder)
# Load explicit reference and qualifier libraries
self.reference_library = mapping_config.get("reference_library", {}).copy()
self.qualifier_library = mapping_config.get("qualifier_library", {}).copy()
# Extract and merge inline named references/qualifiers from claims
self._extract_inline_named_elements()
|
from_file(file_path)
classmethod
Load distillate configuration from a JSON file.
Source code in gkc/bottler.py
585
586
587
588
589
590
591
592 | @classmethod
def from_file(cls, file_path: str) -> "Distillate":
"""Load distillate configuration from a JSON file."""
import json
with open(file_path) as f:
config = json.load(f)
return cls(config)
|
Design Principles
-
Determinism: All builders produce byte-identical output for identical input, enabling stable packet digests and test assertions.
-
Composition: Builders are composable—SnakBuilder uses DataTypeTransformer; ClaimBuilder uses SnakBuilder, etc.
-
Flexibility: Configuration is passed as simple dictionaries, allowing future extensibility without API churn.
-
Validation-Agnostic: Bottler focuses on structure production, not validation. Validation is handled by fermenter.
-
Profile-Aware: When integrated with still_charger and EntityShellBuilder, bottler generates profile-compliant packet shells.
Integration: Using Bottler in still_charger
Profile-only curation packets now include Wikibase JSON entity shells:
from gkc.still_charger import create_curation_packet
packet = create_curation_packet("Q4", operation_mode="single")
# Each entity in data.entities now includes an "entity" field with canonical Wikibase JSON
for entity in packet["data"]["entities"]:
print(entity["entity"]) # Fully formed Wikibase entity shell
# {
# "labels": {...},
# "descriptions": {...},
# "aliases": {...},
# "claims": {...}
# }
This ensures profile-only packets have shape-consistent, deterministic Wikibase JSON scaffolds ready for charging and validation.
See Also