Skip to content

Bottler API

Overview

The bottler module transforms distilled source values into Wikibase/Wikidata claim structures.

It provides:

  • Datatype transformation helpers (DataTypeTransformer)
  • Snak construction (SnakBuilder)
  • Claim construction with qualifiers/references (ClaimBuilder)
  • End-to-end mapping configuration container (Distillate)

Quick Start

from gkc.bottler import DataTypeTransformer, SnakBuilder, ClaimBuilder

transformer = DataTypeTransformer()
snak_builder = SnakBuilder(transformer)
claim_builder = ClaimBuilder(snak_builder)

claim = claim_builder.create_claim(
    property_id="P31",
    value="Q5",
    datatype="wikibase-item",
)

print(claim["mainsnak"]["property"], claim["type"], claim["rank"])

Public API Quick Starts

DataTypeTransformer.to_wikibase_item()

from gkc.bottler import DataTypeTransformer

datavalue = DataTypeTransformer.to_wikibase_item("Q42")
print(datavalue)

DataTypeTransformer.to_quantity()

from gkc.bottler import DataTypeTransformer

datavalue = DataTypeTransformer.to_quantity(42, unit="1")
print(datavalue)

DataTypeTransformer.to_time()

from gkc.bottler import DataTypeTransformer

year_only = DataTypeTransformer.to_time(2005)
month_precision = DataTypeTransformer.to_time("2005-01")
day_precision = DataTypeTransformer.to_time("2005-01-15", precision=11)

print(year_only)
print(month_precision)
print(day_precision)

DataTypeTransformer.to_monolingualtext()

from gkc.bottler import DataTypeTransformer

datavalue = DataTypeTransformer.to_monolingualtext("Hello", "en")
print(datavalue)

DataTypeTransformer.to_globe_coordinate()

from gkc.bottler import DataTypeTransformer

datavalue = DataTypeTransformer.to_globe_coordinate(51.5074, -0.1278, precision=0.0001)
print(datavalue)

DataTypeTransformer.to_url()

from gkc.bottler import DataTypeTransformer

datavalue = DataTypeTransformer.to_url("https://example.org")
print(datavalue)

SnakBuilder.create_snak()

from gkc.bottler import DataTypeTransformer, SnakBuilder

builder = SnakBuilder(DataTypeTransformer())

item_snak = builder.create_snak("P31", "Q5", "wikibase-item")
quantity_snak = builder.create_snak("P1082", 1000, "quantity", {"unit": "1"})
time_snak = builder.create_snak("P571", "2005-01-15", "time")
text_snak = builder.create_snak(
    "P1476",
    "Sample title",
    "monolingualtext",
    {"language": "en"},
)
coord_snak = builder.create_snak("P625", {"lat": 51.5, "lon": -0.12}, "globe-coordinate")
url_snak = builder.create_snak("P856", "https://example.org", "url")
string_snak = builder.create_snak("P1477", "Example string", "string")

print(item_snak)
print(quantity_snak)
print(time_snak)
print(text_snak)
print(coord_snak)
print(url_snak)
print(string_snak)

ClaimBuilder.create_claim()

from gkc.bottler import DataTypeTransformer, SnakBuilder, ClaimBuilder

claim_builder = ClaimBuilder(SnakBuilder(DataTypeTransformer()))

claim = claim_builder.create_claim(
    property_id="P31",
    value="Q5",
    datatype="wikibase-item",
    qualifiers=[
        {"property": "P580", "value": "2005-01-15", "datatype": "time"},
    ],
    references=[
        {
            "P248": {"value": "Q123", "datatype": "wikibase-item"},
            "P854": {"value": "https://example.org", "datatype": "url"},
        }
    ],
    rank="normal",
)

print(claim)

Distillate.__init__() and Distillate.from_file()

import json
import tempfile
from pathlib import Path

from gkc.bottler import Distillate

config = {
    "reference_library": {
        "official_source": [
            {"property": "P248", "value": "Q123", "datatype": "wikibase-item"}
        ]
    },
    "qualifier_library": {
        "start_date": [
            {"property": "P580", "value": "2005-01-15", "datatype": "time"}
        ]
    },
    "mappings": {
        "claims": [
            {
                "property": "P31",
                "references": [
                    {"name": "inline_ref", "property": "P248", "value": "Q123", "datatype": "wikibase-item"}
                ],
                "qualifiers": [
                    {"name": "inline_qual", "property": "P580", "value": "2005-01-15", "datatype": "time"}
                ],
            }
        ]
    },
}

# Direct initialization
bottler = Distillate(config)
print(sorted(bottler.reference_library.keys()))
print(sorted(bottler.qualifier_library.keys()))

# File-based initialization
with tempfile.TemporaryDirectory() as tmpdir:
    path = Path(tmpdir) / "distillate.json"
    path.write_text(json.dumps(config), encoding="utf-8")
    loaded = Distillate.from_file(str(path))
    print(isinstance(loaded, Distillate))

API Reference (mkdocstrings)

DataTypeTransformer

Transforms source data values to Wikidata datavalue structures.

Source code in gkc/bottler.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
class DataTypeTransformer:
    """Transforms source data values to Wikidata datavalue structures."""

    @staticmethod
    def to_wikibase_item(qid: str) -> dict:
        """Convert a QID string to wikibase-entityid datavalue."""
        numeric_id = int(qid[1:])  # Remove 'Q' prefix
        return {
            "value": {
                "entity-type": "item",
                "numeric-id": numeric_id,
                "id": qid,
            },
            "type": "wikibase-entityid",
        }

    @staticmethod
    def to_quantity(value: Union[float, int], unit: str = "1") -> dict:
        """Convert a number to quantity datavalue."""
        return {
            "value": {"amount": f"+{value}", "unit": unit},
            "type": "quantity",
        }

    @staticmethod
    def to_time(
        date_input: Union[str, int],
        precision: Optional[int] = None,
        calendar: str = "Q1985727",
    ) -> dict:
        """Convert date input to Wikidata time datavalue.

        Args:
            date_input: Year (2005), partial date (2005-01),
                or full ISO date (2005-01-15)
            precision: Explicit precision (9=year, 10=month, 11=day)
                or None to auto-detect
            calendar: Calendar model QID (default: Q1985727 = Gregorian)

        Returns:
            Wikidata time datavalue structure
        """
        # Convert int to string
        date_str = str(date_input).strip()

        # Parse the date and determine precision
        if precision is None:
            # Auto-detect precision from format
            if "-" not in date_str:
                # Just a year: 2005
                precision = 9
                time_str = f"+{date_str.zfill(4)}-00-00T00:00:00Z"
            else:
                parts = date_str.split("-")
                if len(parts) == 2:
                    # Year-month: 2005-01
                    precision = 10
                    year, month = parts
                    time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
                elif len(parts) == 3:
                    # Full date: 2005-01-15
                    precision = 11
                    year, month, day = parts
                    # Handle time portion if present
                    if "T" in day:
                        day = day.split("T")[0]
                    time_str = (
                        f"+{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}T00:00:00Z"
                    )
                else:
                    # Fallback for unexpected format
                    precision = 11
                    time_str = (
                        f"+{date_str}T00:00:00Z"
                        if "T" not in date_str
                        else f"+{date_str}"
                    )
        else:
            # Use explicit precision
            if precision == 9:
                # Year precision: use -00-00
                year = date_str.split("-")[0]
                time_str = f"+{year.zfill(4)}-00-00T00:00:00Z"
            elif precision == 10:
                # Month precision: use -00 for day
                parts = date_str.split("-")
                year = parts[0]
                month = parts[1] if len(parts) > 1 else "01"
                time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
            else:
                # Day precision (11) or other
                if "T" not in date_str:
                    time_str = f"+{date_str}T00:00:00Z"
                else:
                    time_str = (
                        f"+{date_str}" if date_str.startswith("+") else f"+{date_str}"
                    )

        return {
            "value": {
                "time": time_str,
                "timezone": 0,
                "before": 0,
                "after": 0,
                "precision": precision,
                "calendarmodel": f"http://www.wikidata.org/entity/{calendar}",
            },
            "type": "time",
        }

    @staticmethod
    def to_monolingualtext(text: str, language: str) -> dict:
        """Convert text to monolingualtext datavalue."""
        return {
            "value": {"text": text, "language": language},
            "type": "monolingualtext",
        }

    @staticmethod
    def to_globe_coordinate(lat: float, lon: float, precision: float = 0.0001) -> dict:
        """Convert latitude/longitude to globe-coordinate datavalue."""
        return {
            "value": {
                "latitude": lat,
                "longitude": lon,
                "precision": precision,
                "globe": "http://www.wikidata.org/entity/Q2",
            },
            "type": "globecoordinate",
        }

    @staticmethod
    def to_url(url: str) -> dict:
        """Convert URL string to url datavalue."""
        return {"value": url, "type": "string"}

to_globe_coordinate(lat, lon, precision=0.0001) staticmethod

Convert latitude/longitude to globe-coordinate datavalue.

Source code in gkc/bottler.py
134
135
136
137
138
139
140
141
142
143
144
145
@staticmethod
def to_globe_coordinate(lat: float, lon: float, precision: float = 0.0001) -> dict:
    """Convert latitude/longitude to globe-coordinate datavalue."""
    return {
        "value": {
            "latitude": lat,
            "longitude": lon,
            "precision": precision,
            "globe": "http://www.wikidata.org/entity/Q2",
        },
        "type": "globecoordinate",
    }

to_monolingualtext(text, language) staticmethod

Convert text to monolingualtext datavalue.

Source code in gkc/bottler.py
126
127
128
129
130
131
132
@staticmethod
def to_monolingualtext(text: str, language: str) -> dict:
    """Convert text to monolingualtext datavalue."""
    return {
        "value": {"text": text, "language": language},
        "type": "monolingualtext",
    }

to_quantity(value, unit='1') staticmethod

Convert a number to quantity datavalue.

Source code in gkc/bottler.py
32
33
34
35
36
37
38
@staticmethod
def to_quantity(value: Union[float, int], unit: str = "1") -> dict:
    """Convert a number to quantity datavalue."""
    return {
        "value": {"amount": f"+{value}", "unit": unit},
        "type": "quantity",
    }

to_time(date_input, precision=None, calendar='Q1985727') staticmethod

Convert date input to Wikidata time datavalue.

Parameters:

Name Type Description Default
date_input Union[str, int]

Year (2005), partial date (2005-01), or full ISO date (2005-01-15)

required
precision Optional[int]

Explicit precision (9=year, 10=month, 11=day) or None to auto-detect

None
calendar str

Calendar model QID (default: Q1985727 = Gregorian)

'Q1985727'

Returns:

Type Description
dict

Wikidata time datavalue structure

Source code in gkc/bottler.py
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
@staticmethod
def to_time(
    date_input: Union[str, int],
    precision: Optional[int] = None,
    calendar: str = "Q1985727",
) -> dict:
    """Convert date input to Wikidata time datavalue.

    Args:
        date_input: Year (2005), partial date (2005-01),
            or full ISO date (2005-01-15)
        precision: Explicit precision (9=year, 10=month, 11=day)
            or None to auto-detect
        calendar: Calendar model QID (default: Q1985727 = Gregorian)

    Returns:
        Wikidata time datavalue structure
    """
    # Convert int to string
    date_str = str(date_input).strip()

    # Parse the date and determine precision
    if precision is None:
        # Auto-detect precision from format
        if "-" not in date_str:
            # Just a year: 2005
            precision = 9
            time_str = f"+{date_str.zfill(4)}-00-00T00:00:00Z"
        else:
            parts = date_str.split("-")
            if len(parts) == 2:
                # Year-month: 2005-01
                precision = 10
                year, month = parts
                time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
            elif len(parts) == 3:
                # Full date: 2005-01-15
                precision = 11
                year, month, day = parts
                # Handle time portion if present
                if "T" in day:
                    day = day.split("T")[0]
                time_str = (
                    f"+{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}T00:00:00Z"
                )
            else:
                # Fallback for unexpected format
                precision = 11
                time_str = (
                    f"+{date_str}T00:00:00Z"
                    if "T" not in date_str
                    else f"+{date_str}"
                )
    else:
        # Use explicit precision
        if precision == 9:
            # Year precision: use -00-00
            year = date_str.split("-")[0]
            time_str = f"+{year.zfill(4)}-00-00T00:00:00Z"
        elif precision == 10:
            # Month precision: use -00 for day
            parts = date_str.split("-")
            year = parts[0]
            month = parts[1] if len(parts) > 1 else "01"
            time_str = f"+{year.zfill(4)}-{month.zfill(2)}-00T00:00:00Z"
        else:
            # Day precision (11) or other
            if "T" not in date_str:
                time_str = f"+{date_str}T00:00:00Z"
            else:
                time_str = (
                    f"+{date_str}" if date_str.startswith("+") else f"+{date_str}"
                )

    return {
        "value": {
            "time": time_str,
            "timezone": 0,
            "before": 0,
            "after": 0,
            "precision": precision,
            "calendarmodel": f"http://www.wikidata.org/entity/{calendar}",
        },
        "type": "time",
    }

to_url(url) staticmethod

Convert URL string to url datavalue.

Source code in gkc/bottler.py
147
148
149
150
@staticmethod
def to_url(url: str) -> dict:
    """Convert URL string to url datavalue."""
    return {"value": url, "type": "string"}

to_wikibase_item(qid) staticmethod

Convert a QID string to wikibase-entityid datavalue.

Source code in gkc/bottler.py
19
20
21
22
23
24
25
26
27
28
29
30
@staticmethod
def to_wikibase_item(qid: str) -> dict:
    """Convert a QID string to wikibase-entityid datavalue."""
    numeric_id = int(qid[1:])  # Remove 'Q' prefix
    return {
        "value": {
            "entity-type": "item",
            "numeric-id": numeric_id,
            "id": qid,
        },
        "type": "wikibase-entityid",
    }

SnakBuilder

Builds snak structures (the building blocks of claims).

Source code in gkc/bottler.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
class SnakBuilder:
    """Builds snak structures (the building blocks of claims)."""

    def __init__(self, transformer: DataTypeTransformer):
        self.transformer = transformer

    def create_snak(
        self, property_id: str, value: Any, datatype: str, transform_config: dict = None
    ) -> dict:
        """Create a snak with the appropriate datavalue."""
        # Apply transformations based on datatype
        if datatype == "wikibase-item":
            datavalue = self.transformer.to_wikibase_item(value)
        elif datatype == "quantity":
            unit = transform_config.get("unit", "1") if transform_config else "1"
            datavalue = self.transformer.to_quantity(value, unit)
        elif datatype == "time":
            # Get precision from transform_config or auto-detect
            precision = None
            if transform_config:
                precision = transform_config.get("precision")
            datavalue = self.transformer.to_time(value, precision)
        elif datatype == "monolingualtext":
            language = (
                transform_config.get("language", "en") if transform_config else "en"
            )
            datavalue = self.transformer.to_monolingualtext(value, language)
        elif datatype == "globe-coordinate":
            datavalue = self.transformer.to_globe_coordinate(value["lat"], value["lon"])
        elif datatype == "url":
            datavalue = self.transformer.to_url(value)
        else:
            # Default: treat as string
            datavalue = {"value": value, "type": "string"}

        return {
            "snaktype": "value",
            "property": property_id,
            "datavalue": datavalue,
        }

create_snak(property_id, value, datatype, transform_config=None)

Create a snak with the appropriate datavalue.

Source code in gkc/bottler.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def create_snak(
    self, property_id: str, value: Any, datatype: str, transform_config: dict = None
) -> dict:
    """Create a snak with the appropriate datavalue."""
    # Apply transformations based on datatype
    if datatype == "wikibase-item":
        datavalue = self.transformer.to_wikibase_item(value)
    elif datatype == "quantity":
        unit = transform_config.get("unit", "1") if transform_config else "1"
        datavalue = self.transformer.to_quantity(value, unit)
    elif datatype == "time":
        # Get precision from transform_config or auto-detect
        precision = None
        if transform_config:
            precision = transform_config.get("precision")
        datavalue = self.transformer.to_time(value, precision)
    elif datatype == "monolingualtext":
        language = (
            transform_config.get("language", "en") if transform_config else "en"
        )
        datavalue = self.transformer.to_monolingualtext(value, language)
    elif datatype == "globe-coordinate":
        datavalue = self.transformer.to_globe_coordinate(value["lat"], value["lon"])
    elif datatype == "url":
        datavalue = self.transformer.to_url(value)
    else:
        # Default: treat as string
        datavalue = {"value": value, "type": "string"}

    return {
        "snaktype": "value",
        "property": property_id,
        "datavalue": datavalue,
    }

ClaimBuilder

Builds complete claim structures with qualifiers and references.

Source code in gkc/bottler.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
class ClaimBuilder:
    """Builds complete claim structures with qualifiers and references."""

    def __init__(self, snak_builder: SnakBuilder):
        self.snak_builder = snak_builder

    def create_claim(
        self,
        property_id: str,
        value: Any,
        datatype: str,
        transform_config: dict = None,
        qualifiers: list[dict] = None,
        references: list[dict] = None,
        rank: str = "normal",
    ) -> dict:
        """Create a complete claim structure."""
        claim = {
            "mainsnak": self.snak_builder.create_snak(
                property_id, value, datatype, transform_config
            ),
            "type": "statement",
            "rank": rank,
        }

        # Add qualifiers if provided
        if qualifiers:
            claim["qualifiers"] = {}
            claim["qualifiers-order"] = []
            for qual in qualifiers:
                qual_prop = qual["property"]
                qual_snak = self.snak_builder.create_snak(
                    qual_prop,
                    qual["value"],
                    qual["datatype"],
                    qual.get("transform"),
                )
                claim["qualifiers"][qual_prop] = [qual_snak]
                claim["qualifiers-order"].append(qual_prop)

        # Add references if provided
        if references:
            claim["references"] = []
            for ref_group in references:
                ref_snaks = {}
                ref_order = []
                for ref_prop, ref_config in ref_group.items():
                    ref_snak = self.snak_builder.create_snak(
                        ref_prop,
                        ref_config["value"],
                        ref_config.get("datatype", "wikibase-item"),
                        ref_config.get("transform"),
                    )
                    ref_snaks[ref_prop] = [ref_snak]
                    ref_order.append(ref_prop)
                claim["references"].append(
                    {"snaks": ref_snaks, "snaks-order": ref_order}
                )

        return claim

create_claim(property_id, value, datatype, transform_config=None, qualifiers=None, references=None, rank='normal')

Create a complete claim structure.

Source code in gkc/bottler.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def create_claim(
    self,
    property_id: str,
    value: Any,
    datatype: str,
    transform_config: dict = None,
    qualifiers: list[dict] = None,
    references: list[dict] = None,
    rank: str = "normal",
) -> dict:
    """Create a complete claim structure."""
    claim = {
        "mainsnak": self.snak_builder.create_snak(
            property_id, value, datatype, transform_config
        ),
        "type": "statement",
        "rank": rank,
    }

    # Add qualifiers if provided
    if qualifiers:
        claim["qualifiers"] = {}
        claim["qualifiers-order"] = []
        for qual in qualifiers:
            qual_prop = qual["property"]
            qual_snak = self.snak_builder.create_snak(
                qual_prop,
                qual["value"],
                qual["datatype"],
                qual.get("transform"),
            )
            claim["qualifiers"][qual_prop] = [qual_snak]
            claim["qualifiers-order"].append(qual_prop)

    # Add references if provided
    if references:
        claim["references"] = []
        for ref_group in references:
            ref_snaks = {}
            ref_order = []
            for ref_prop, ref_config in ref_group.items():
                ref_snak = self.snak_builder.create_snak(
                    ref_prop,
                    ref_config["value"],
                    ref_config.get("datatype", "wikibase-item"),
                    ref_config.get("transform"),
                )
                ref_snaks[ref_prop] = [ref_snak]
                ref_order.append(ref_prop)
            claim["references"].append(
                {"snaks": ref_snaks, "snaks-order": ref_order}
            )

    return claim

Distillate

Distillate: Final product of the distillation process.

A Distillate is the fully configured transformer ready to convert source data into Wikidata claims. It knows how to handle properties, qualifiers, references, and all the complex datatype transformations needed.

Plain meaning: A fully configured data transformer ready to produce output.

Source code in gkc/bottler.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
class Distillate:
    """
    Distillate: Final product of the distillation process.

    A Distillate is the fully configured transformer ready to convert source data
    into Wikidata claims. It knows how to handle properties, qualifiers, references,
    and all the complex datatype transformations needed.

    Plain meaning: A fully configured data transformer ready to produce output.
    """

    def __init__(self, mapping_config: dict):
        """Initialize with a transformation recipe configuration."""
        self.config = mapping_config
        self.transformer = DataTypeTransformer()
        self.snak_builder = SnakBuilder(self.transformer)
        self.claim_builder = ClaimBuilder(self.snak_builder)

        # Load explicit reference and qualifier libraries
        self.reference_library = mapping_config.get("reference_library", {}).copy()
        self.qualifier_library = mapping_config.get("qualifier_library", {}).copy()

        # Extract and merge inline named references/qualifiers from claims
        self._extract_inline_named_elements()

    @classmethod
    def from_file(cls, file_path: str) -> "Distillate":
        """Load distillate configuration from a JSON file."""
        import json

        with open(file_path) as f:
            config = json.load(f)
        return cls(config)

    def _extract_inline_named_elements(self):
        """
        Scan all claims for inline named references and qualifiers.
        Merge them into the reference_library and qualifier_library.
        Explicit library entries take precedence over inline named elements.

        New consistent structure: references/qualifiers use "property" field,
        not property-as-key. Named references are defined inline with "name" field.
        """
        claims = self.config.get("mappings", {}).get("claims", [])

        for claim in claims:
            # Extract named references
            references = claim.get("references", [])

            # Check if this reference array has a name (defines a reusable reference)
            named_refs = [
                r
                for r in references
                if isinstance(r, dict) and "name" in r and "property" in r
            ]

            if named_refs:
                # Get the name from the first named reference
                name = named_refs[0]["name"]

                # Don't override explicit library entries
                if name not in self.reference_library:
                    # Store all property objects (without "name" key)
                    # as the library entry
                    ref_array = []
                    for ref in references:
                        if isinstance(ref, dict) and "property" in ref:
                            ref_copy = {k: v for k, v in ref.items() if k != "name"}
                            ref_array.append(ref_copy)
                    self.reference_library[name] = ref_array

            # Extract named qualifiers
            qualifiers = claim.get("qualifiers", [])

            named_quals = [
                q
                for q in qualifiers
                if isinstance(q, dict) and "name" in q and "property" in q
            ]

            if named_quals:
                name = named_quals[0]["name"]

                if name not in self.qualifier_library:
                    qual_array = []
                    for qual in qualifiers:
                        if isinstance(qual, dict) and "property" in qual:
                            qual_copy = {k: v for k, v in qual.items() if k != "name"}
                            qual_array.append(qual_copy)
                    self.qualifier_library[name] = qual_array

    @staticmethod
    def _is_empty_value(value: Any) -> bool:
        if value is None:
            return True
        if isinstance(value, float) and math.isnan(value):
            return True
        try:
            import pandas as pd
        except Exception:
            pd = None
        if pd is not None and pd.isna(value):
            return True
        try:
            nan_check = value != value
        except Exception:
            nan_check = False
        if isinstance(nan_check, bool) and nan_check:
            return True
        if isinstance(value, str) and not value.strip():
            return True
        return False

    @staticmethod
    def _split_values(value: Any, separator: Optional[str] = None) -> list[str]:
        values = value if isinstance(value, (list, tuple)) else [value]
        result: list[str] = []

        for val in values:
            if Distillate._is_empty_value(val):
                continue
            text = val if isinstance(val, str) else str(val)
            text = text.strip()

            if separator and separator in text:
                # Split and filter
                parts = [p.strip() for p in text.split(separator)]
                result.extend([p for p in parts if p])
            else:
                result.append(text)

        return result

__init__(mapping_config)

Initialize with a transformation recipe configuration.

Source code in gkc/bottler.py
268
269
270
271
272
273
274
275
276
277
278
279
280
def __init__(self, mapping_config: dict):
    """Initialize with a transformation recipe configuration."""
    self.config = mapping_config
    self.transformer = DataTypeTransformer()
    self.snak_builder = SnakBuilder(self.transformer)
    self.claim_builder = ClaimBuilder(self.snak_builder)

    # Load explicit reference and qualifier libraries
    self.reference_library = mapping_config.get("reference_library", {}).copy()
    self.qualifier_library = mapping_config.get("qualifier_library", {}).copy()

    # Extract and merge inline named references/qualifiers from claims
    self._extract_inline_named_elements()

from_file(file_path) classmethod

Load distillate configuration from a JSON file.

Source code in gkc/bottler.py
282
283
284
285
286
287
288
289
@classmethod
def from_file(cls, file_path: str) -> "Distillate":
    """Load distillate configuration from a JSON file."""
    import json

    with open(file_path) as f:
        config = json.load(f)
    return cls(config)

See Also