Skip to content

Cooperage API

Overview

The cooperage module is the active packet-to-operation barreling layer between still_charger and shipper.

Current primary functionality converts charged curation packet entities into operation payloads compatible with WikibaseShipper.plan_batch and write methods.

Legacy schema/RDF helpers remain available as compatibility re-exports.

Quick Start

from gkc.cooperage import barrel_curation_packet_to_wikibase_plan

packet = {
    "packet_id": "pkt-demo",
    "entities": [
        {
            "id": "ent-001",
            "profile": "TribalGovernmentUS",
            "profile_structure": {
                "statements": [{"id": "instance_of", "io_map": [{"to": "https://www.wikidata.org/entity/P31"}]}]
            },
            "data": {
                "labels": {"en": "Cherokee Nation"},
                "statements": {"instance_of": [{"value": "Q7840353"}]},
            },
        }
    ],
}

operations, report = barrel_curation_packet_to_wikibase_plan(packet)

print(report.operations_created)
print(operations[0]["kind"], operations[0]["label"])

Public API Quick Starts

barrel_curation_packet_to_wikibase_plan()

from gkc.cooperage import barrel_curation_packet_to_wikibase_plan

operations, report = barrel_curation_packet_to_wikibase_plan(
    packet,
    property_id_map={"instance_of": "P31"},
)

print(report.operations_created)
print(report.entities_skipped)
print([issue.message for issue in report.issues])

BarrelIssue and BarrelPlanReport

from gkc.cooperage import BarrelIssue, BarrelPlanReport

issue = BarrelIssue(
    severity="warning",
    entity_id="ent-001",
    field="statements.instance_of",
    message="No property mapping found",
)

report = BarrelPlanReport(operations_created=0, entities_skipped=1, issues=[issue])
print(report.entities_skipped, report.issues[0].severity)

Compatibility re-exports

from gkc.cooperage import fetch_entity_rdf, fetch_schema_specification

rdf_ttl = fetch_entity_rdf("Q42", format="ttl")
schema_text = fetch_schema_specification("E502")

print(len(rdf_ttl), len(schema_text))

API Reference (mkdocstrings)

BarrelIssue

Source code in gkc/cooperage.py
37
38
39
40
41
42
@dataclass
class BarrelIssue:
    severity: str
    entity_id: str
    field: str
    message: str

BarrelPlanReport

Source code in gkc/cooperage.py
45
46
47
48
49
@dataclass
class BarrelPlanReport:
    operations_created: int = 0
    entities_skipped: int = 0
    issues: list[BarrelIssue] = field(default_factory=list)

barrel_curation_packet_to_wikibase_plan()

Convert charged curation packet content into Wikibase plan operations.

Output operations are compatible with WikibaseShipper.plan_batch.

Source code in gkc/cooperage.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
def barrel_curation_packet_to_wikibase_plan(
    packet: dict[str, Any],
    *,
    property_id_map: Optional[dict[str, str]] = None,
) -> tuple[list[dict[str, Any]], BarrelPlanReport]:
    """Convert charged curation packet content into Wikibase plan operations.

    Output operations are compatible with ``WikibaseShipper.plan_batch``.
    """
    operations: list[dict[str, Any]] = []
    report = BarrelPlanReport()

    for entity in packet.get("entities", []):
        entity_id = str(entity.get("id", ""))
        data = entity.get("data")
        if not isinstance(data, dict) or not data:
            report.entities_skipped += 1
            continue

        labels = _normalize_mono_map(data.get("labels"))
        descriptions = _normalize_mono_map(data.get("descriptions"))
        aliases = _normalize_aliases(data.get("aliases"))

        per_entity_statement_map = _statement_pid_map(entity)
        raw_statements = data.get("statements", {})
        claims: list[dict[str, Any]] = []

        if isinstance(raw_statements, dict):
            for statement_id, values in raw_statements.items():
                property_id = _resolve_property_id(
                    statement_id,
                    per_entity_map=per_entity_statement_map,
                    global_map=property_id_map,
                )
                if not property_id:
                    report.issues.append(
                        BarrelIssue(
                            severity="warning",
                            entity_id=entity_id,
                            field=f"statements.{statement_id}",
                            message="No property ID mapping found; statement skipped.",
                        )
                    )
                    continue

                value_list = values if isinstance(values, list) else [values]
                for value in value_list:
                    statement = _claim_statement(property_id, value)
                    if statement is None:
                        report.issues.append(
                            BarrelIssue(
                                severity="warning",
                                entity_id=entity_id,
                                field=f"statements.{statement_id}",
                                message="Unsupported statement value shape; value skipped.",
                            )
                        )
                        continue
                    claims.append(statement)

        payload: dict[str, Any] = {}
        if labels:
            payload["labels"] = labels
        if descriptions:
            payload["descriptions"] = descriptions
        if aliases:
            payload["aliases"] = aliases
        if claims:
            payload["claims"] = claims

        if not payload:
            report.entities_skipped += 1
            continue

        entity_data_id = data.get("wikibase_id") or data.get("entity_id")
        operation_label = ""
        if isinstance(labels.get("en"), dict):
            operation_label = labels["en"].get("value", "")

        operations.append(
            {
                "kind": "item",
                "label": operation_label,
                "entity_id": (
                    entity_data_id if isinstance(entity_data_id, str) else None
                ),
                "payload": payload,
                "metadata": {
                    "packet_id": packet.get("packet_id"),
                    "packet_entity_id": entity_id,
                    "profile_entity": entity.get("profile_entity"),
                    "profile": entity.get("profile"),  # backward compat
                },
            }
        )
        report.operations_created += 1

    return operations, report

CooperageError (compatibility)

Bases: Exception

DEPRECATED: Use RuntimeError instead.

Raised when entity/schema fetch operations fail. This exception is provided for backward compatibility but new code should catch RuntimeError instead.

See: gkc.mash for fetch functions that raise RuntimeError.

Source code in gkc/cooperage.py
24
25
26
27
28
29
30
31
32
33
34
class CooperageError(Exception):
    """
    DEPRECATED: Use RuntimeError instead.

    Raised when entity/schema fetch operations fail. This exception is provided
    for backward compatibility but new code should catch RuntimeError instead.

    See: gkc.mash for fetch functions that raise RuntimeError.
    """

    pass

fetch_entity_rdf() (compatibility)

Fetch RDF data for a Wikidata entity.

Retrieves entity data in RDF format using Wikibase's Special:EntityData endpoint, which supports multiple RDF serialization formats (Turtle, RDF/XML, N-Triples).

Parameters:

Name Type Description Default
qid str

Wikidata entity ID (e.g., 'Q42', 'P31')

required
format str

RDF format - 'ttl' (Turtle), 'rdf' (RDF/XML), 'nt' (N-Triples)

'ttl'
user_agent Optional[str]

Custom user agent string

None

Returns:

Type Description
str

RDF data as string

Raises:

Type Description
RuntimeError

If fetch fails

Plain meaning: Download entity data in RDF format.

Example

rdf = fetch_entity_rdf('Q42') # Get Douglas Adams RDF rdf = fetch_entity_rdf('P31', format='nt') # Get property in N-Triples

Source code in gkc/mash/core.py
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
def fetch_entity_rdf(
    qid: str, format: str = "ttl", user_agent: Optional[str] = None
) -> str:
    """
    Fetch RDF data for a Wikidata entity.

    Retrieves entity data in RDF format using Wikibase's Special:EntityData endpoint,
    which supports multiple RDF serialization formats (Turtle, RDF/XML, N-Triples).

    Args:
        qid: Wikidata entity ID (e.g., 'Q42', 'P31')
        format: RDF format - 'ttl' (Turtle), 'rdf' (RDF/XML), 'nt' (N-Triples)
        user_agent: Custom user agent string

    Returns:
        RDF data as string

    Raises:
        RuntimeError: If fetch fails

    Plain meaning: Download entity data in RDF format.

    Example:
        >>> rdf = fetch_entity_rdf('Q42')  # Get Douglas Adams RDF
        >>> rdf = fetch_entity_rdf('P31', format='nt')  # Get property in N-Triples
    """
    if not qid:
        raise ValueError("Entity ID (qid) is required")

    # Validate format
    valid_formats = {"ttl", "rdf", "nt"}
    if format not in valid_formats:
        raise ValueError(f"Invalid format '{format}'. Must be one of: {valid_formats}")

    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.{format}"
    headers = {"User-Agent": user_agent or DEFAULT_USER_AGENT}

    try:
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        return response.text
    except requests.RequestException as exc:
        raise RuntimeError(
            f"Failed to fetch RDF for {qid} from {url}: {str(exc)}"
        ) from exc

fetch_schema_specification() (compatibility)

Fetch Wikidata EntitySchema specification text (ShExC format).

Retrieves a Wikidata EntitySchema's schemaText from the raw action endpoint. EntitySchemas define the shape and structure constraints that form part of Wikibase's validation schema (along with property constraints).

Parameters:

Name Type Description Default
eid str

EntitySchema ID (e.g., 'E502')

required
user_agent Optional[str]

Custom user agent string

None

Returns:

Type Description
str

ShExC schema text as string

Raises:

Type Description
RuntimeError

If fetch fails

Plain meaning: Get the shape/structure specification for a Wikibase entity type.

Example

schema = fetch_entity_schema_specification('E502') # Schema for tribes

Source code in gkc/mash/core.py
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
def fetch_entity_schema_specification(
    eid: str, user_agent: Optional[str] = None
) -> str:
    """
    Fetch Wikidata EntitySchema specification text (ShExC format).

    Retrieves a Wikidata EntitySchema's schemaText from the raw action endpoint.
    EntitySchemas define the shape and structure constraints that form part of
    Wikibase's validation schema (along with property constraints).

    Args:
        eid: EntitySchema ID (e.g., 'E502')
        user_agent: Custom user agent string

    Returns:
        ShExC schema text as string

    Raises:
        RuntimeError: If fetch fails

    Plain meaning: Get the shape/structure specification for a Wikibase entity type.

    Example:
        >>> schema = fetch_entity_schema_specification('E502')  # Schema for tribes
    """
    if not eid:
        raise ValueError("EntitySchema ID (eid) is required")

    # Prefer the EntitySchema JSON content (action=raw), which includes schemaText
    try:
        schema_json = fetch_entity_schema_json(eid, user_agent=user_agent)
        schema_text = schema_json.get("schemaText")
        if isinstance(schema_text, str) and schema_text.strip():
            return schema_text
    except RuntimeError:
        # Fall back to the Special:EntitySchemaText endpoint
        pass

    url = f"https://www.wikidata.org/wiki/Special:EntitySchemaText/{eid}"
    headers = {"User-Agent": user_agent or DEFAULT_USER_AGENT}

    try:
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        return response.text
    except requests.RequestException as exc:
        raise RuntimeError(
            f"Failed to fetch EntitySchema {eid} from {url}: {str(exc)}"
        ) from exc

fetch_entity_schema_json() (compatibility)

Fetch the JSON content for a Wikidata EntitySchema.

Uses the MediaWiki raw action endpoint to retrieve the full EntitySchema JSON, which includes labels, descriptions, aliases, and schemaText.

Parameters:

Name Type Description Default
eid str

EntitySchema ID (e.g., 'E502')

required
user_agent Optional[str]

Custom user agent string

None

Returns:

Type Description
dict[str, Any]

Parsed JSON dictionary for the EntitySchema

Raises:

Type Description
RuntimeError

If fetch or parsing fails

Plain meaning: Retrieve an EntitySchema JSON document from Wikibase.

Source code in gkc/mash/core.py
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
def fetch_entity_schema_json(
    eid: str, user_agent: Optional[str] = None
) -> dict[str, Any]:
    """
    Fetch the JSON content for a Wikidata EntitySchema.

    Uses the MediaWiki raw action endpoint to retrieve the full EntitySchema
    JSON, which includes labels, descriptions, aliases, and schemaText.

    Args:
        eid: EntitySchema ID (e.g., 'E502')
        user_agent: Custom user agent string

    Returns:
        Parsed JSON dictionary for the EntitySchema

    Raises:
        RuntimeError: If fetch or parsing fails

    Plain meaning: Retrieve an EntitySchema JSON document from Wikibase.
    """
    if not eid:
        raise ValueError("EntitySchema ID (eid) is required")

    url = f"https://www.wikidata.org/wiki/EntitySchema:{eid}?action=raw"
    headers = {"User-Agent": user_agent or DEFAULT_USER_AGENT}

    try:
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        data = response.json()
        if not isinstance(data, dict):
            raise RuntimeError(f"Unexpected EntitySchema JSON content for {eid}")
        return data
    except requests.RequestException as exc:
        raise RuntimeError(
            f"Failed to fetch EntitySchema JSON for {eid} from {url}: {str(exc)}"
        ) from exc
    except ValueError as exc:
        raise RuntimeError(
            f"Failed to parse EntitySchema JSON for {eid}: {str(exc)}"
        ) from exc

fetch_entity_schema_metadata() (compatibility)

DEPRECATED: Fetch metadata for a Wikidata EntitySchema.

This function is no longer actively maintained. For basic entity schema retrieval, use fetch_entity_schema_json() instead.

To be removed in v0.3.0.

Source code in gkc/cooperage.py
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
def fetch_entity_schema_metadata(
    eid: str, language: str = "en", user_agent=None
) -> dict:
    """
    DEPRECATED: Fetch metadata for a Wikidata EntitySchema.

    This function is no longer actively maintained. For basic entity schema
    retrieval, use fetch_entity_schema_json() instead.

    To be removed in v0.3.0.
    """
    # This function is rarely/never used. If needed, implement via fetch_entity_schema_json
    raise NotImplementedError(
        "fetch_entity_schema_metadata has been removed. "
        "Use fetch_entity_schema_json() and extract metadata directly."
    )

get_entity_uri() (compatibility)

Get the full URI for a Wikidata entity.

Parameters:

Name Type Description Default
entity_id str

Entity ID (e.g., 'Q42', 'P31', 'L1', 'E502')

required

Returns:

Type Description
str

Full URI string

Example

get_entity_uri('Q42') 'http://www.wikidata.org/entity/Q42' get_entity_uri('P31') 'http://www.wikidata.org/entity/P31'

Source code in gkc/utilities.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def get_entity_uri(entity_id: str) -> str:
    """
    Get the full URI for a Wikidata entity.

    Args:
        entity_id: Entity ID (e.g., 'Q42', 'P31', 'L1', 'E502')

    Returns:
        Full URI string

    Example:
        >>> get_entity_uri('Q42')
        'http://www.wikidata.org/entity/Q42'
        >>> get_entity_uri('P31')
        'http://www.wikidata.org/entity/P31'
    """
    if not entity_id:
        raise ValueError("Entity ID is required")

    return f"http://www.wikidata.org/entity/{entity_id}"

validate_entity_reference() (compatibility)

Validate that a string looks like a Wikidata entity ID.

Plain meaning: Check if an ID is in valid Wikidata format.

Parameters:

Name Type Description Default
entity_id str

String to validate

required

Returns:

Type Description
bool

True if valid format, False otherwise

Example

validate_entity_reference('Q42') True validate_entity_reference('P31') True validate_entity_reference('E502') True validate_entity_reference('invalid') False

Source code in gkc/utilities.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def validate_entity_reference(entity_id: str) -> bool:
    """
    Validate that a string looks like a Wikidata entity ID.

    Plain meaning: Check if an ID is in valid Wikidata format.

    Args:
        entity_id: String to validate

    Returns:
        True if valid format, False otherwise

    Example:
        >>> validate_entity_reference('Q42')
        True
        >>> validate_entity_reference('P31')
        True
        >>> validate_entity_reference('E502')
        True
        >>> validate_entity_reference('invalid')
        False
    """
    if not entity_id or not isinstance(entity_id, str):
        return False

    # Must start with Q, P, L, or E followed by digits
    if len(entity_id) < 2:
        return False

    prefix = entity_id[0].upper()
    rest = entity_id[1:]

    return prefix in ("Q", "P", "L", "E") and rest.isdigit()

See Also