Skip to content

Mash Formatters API

Overview

Convert Wikidata templates to different output formats for bulk editing, validation, or export. Currently supports QuickStatements V1 format for creating and updating items on Wikidata.

Quick Start

from gkc.mash import (
    WikibaseLoader,
    apply_item_property_filters,
    apply_template_language_filter,
)
from gkc.mash_formatters import QSV1Formatter

# Load a template
loader = WikibaseLoader()
template = loader.load("Q42")

# Format for new item creation
formatter = QSV1Formatter()
qs_text = formatter.format(template, for_new_item=True)
print(qs_text)

Classes

QSV1Formatter

Format a WikibaseItemTemplate as QuickStatements V1 syntax.

Plain meaning: Convert a template to bulk-edit format for Wikidata.

Source code in gkc/mash_formatters.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class QSV1Formatter:
    """Format a WikibaseItemTemplate as QuickStatements V1 syntax.

    Plain meaning: Convert a template to bulk-edit format for Wikidata.
    """

    def __init__(
        self,
        exclude_properties: list[str] | None = None,
        exclude_qualifiers: bool = False,
        exclude_references: bool = False,
        entity_labels: dict[str, str] | None = None,
    ):
        """Initialize the formatter.

        Args:
            exclude_properties: List of property IDs to skip (e.g., ['P31']).
            exclude_qualifiers: If True, omit all qualifiers.
            exclude_references: If True, omit all references.
            entity_labels: Dict mapping entity IDs (properties and items) to labels
                          (e.g., {'P31': 'instance of', 'Q5': 'human'}) for comments.

        Plain meaning: Configure what to include or exclude from output.
        """

        self.exclude_properties = exclude_properties or []
        self.exclude_qualifiers = exclude_qualifiers
        self.exclude_references = exclude_references
        self.entity_labels = entity_labels or {}

    def format(self, template: WikibaseItemTemplate, for_new_item: bool = True) -> str:
        """Convert template to QuickStatements V1 format.

        Args:
            template: The WikibaseItemTemplate to format.
            for_new_item: If True, use "CREATE" and "LAST" syntax for new items.
                         If False, use the QID and "P" syntax for updates.

        Returns:
            QuickStatements V1 text.

        Plain meaning: Generate editable QS text from the template.
        """

        lines: list[str] = []

        if for_new_item:
            lines.append("CREATE")
            # Add labels and descriptions
            for lang, text in template.labels.items():
                lines.append(f'LAST\tL{lang}\t"{text}"')

            for lang, text in template.descriptions.items():
                lines.append(f'LAST\tD{lang}\t"{text}"')

            # Add aliases
            for lang, alias_list in template.aliases.items():
                for alias in alias_list:
                    lines.append(f'LAST\tA{lang}\t"{alias}"')

            # Add claims with inline comments
            for claim in template.claims:
                if claim.property_id in self.exclude_properties:
                    continue

                line = self._claim_to_qs_line("LAST", claim)
                if line:
                    lines.append(line)
        else:
            # For existing items
            qid = template.qid
            for lang, text in template.labels.items():
                lines.append(f'{qid}\t{lang}\t"{text}"')

            for lang, text in template.descriptions.items():
                lines.append(f'{qid}\tDn\t"{text}"')

            for claim in template.claims:
                if claim.property_id in self.exclude_properties:
                    continue

                line = self._claim_to_qs_line(qid, claim)
                if line:
                    lines.append(line)

        return "\n".join(lines)

    def _claim_to_qs_line(self, subject: str, claim) -> str:
        """Convert a single claim to a QS V1 line with optional comment.

        QuickStatements V1 format for qualifiers and references:
        - Qualifiers: P1|Q2|P3|Q4 (with pipes separating property-value pairs)
        - References: S248|Q123 (source), S854|http://... (reference URL)
        - Comments: /* comment text */ at end of line
        - Time values with precision: +2001-01-15T00:00:00Z/11
          (where /11 is day precision)

        Plain meaning: Format one statement with qualifiers/references/comments.
        """

        # Format the main value with metadata (e.g., precision for dates)
        value_str = claim.value
        if hasattr(claim, "value_metadata") and claim.value_metadata:
            if "precision" in claim.value_metadata:
                precision = claim.value_metadata["precision"]
                value_str = f"{claim.value}/{precision}"

        parts: list[str] = [subject, claim.property_id, value_str]

        # Build comment parts for main claim
        comment_parts: list[str] = []
        if self.entity_labels:
            prop_label = self.entity_labels.get(claim.property_id)
            if prop_label:
                # Check if value is an entity (Q-ID) or other type
                if claim.value.startswith("Q") and claim.value[1:].isdigit():
                    value_label = self.entity_labels.get(claim.value, claim.value)
                    comment_parts.append(f"{prop_label} is {value_label}")
                else:
                    # For non-entity values (strings, dates, etc.), show the value
                    comment_parts.append(f"{prop_label} is {claim.value}")

        # Add qualifiers on the same line, separated by pipes
        if not self.exclude_qualifiers and claim.qualifiers:
            for qual in claim.qualifiers:
                prop = qual.get("property", "")
                value = qual.get("value", "")
                if prop and value:
                    # Format qualifier value with metadata if present
                    qual_value_str = value
                    if "metadata" in qual and qual["metadata"]:
                        if "precision" in qual["metadata"]:
                            precision = qual["metadata"]["precision"]
                            qual_value_str = f"{value}/{precision}"

                    parts.append(prop)
                    parts.append(qual_value_str)

                    if self.entity_labels:
                        qual_prop_label = self.entity_labels.get(prop)
                        if qual_prop_label:
                            if value.startswith("Q") and value[1:].isdigit():
                                qual_value_label = self.entity_labels.get(value, value)
                                comment_parts.append(
                                    f"{qual_prop_label} is {qual_value_label}"
                                )
                            else:
                                comment_parts.append(f"{qual_prop_label} is {value}")

        # Add references on the same line, separated by pipes
        # References in QS V1 use S prefix (e.g., S248 for 'stated in')
        # For now, we'll skip complex reference formatting
        # A full implementation would parse and reconstruct reference data
        if not self.exclude_references and claim.references:
            pass

        # Build final line with optional comment
        line = "\t".join(parts)
        if comment_parts:
            comment = "; ".join(comment_parts)
            line += f"\t/* {comment} */"

        return line

__init__(exclude_properties=None, exclude_qualifiers=False, exclude_references=False, entity_labels=None)

Initialize the formatter.

Parameters:

Name Type Description Default
exclude_properties list[str] | None

List of property IDs to skip (e.g., ['P31']).

None
exclude_qualifiers bool

If True, omit all qualifiers.

False
exclude_references bool

If True, omit all references.

False
entity_labels dict[str, str] | None

Dict mapping entity IDs (properties and items) to labels (e.g., {'P31': 'instance of', 'Q5': 'human'}) for comments.

None

Plain meaning: Configure what to include or exclude from output.

Source code in gkc/mash_formatters.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(
    self,
    exclude_properties: list[str] | None = None,
    exclude_qualifiers: bool = False,
    exclude_references: bool = False,
    entity_labels: dict[str, str] | None = None,
):
    """Initialize the formatter.

    Args:
        exclude_properties: List of property IDs to skip (e.g., ['P31']).
        exclude_qualifiers: If True, omit all qualifiers.
        exclude_references: If True, omit all references.
        entity_labels: Dict mapping entity IDs (properties and items) to labels
                      (e.g., {'P31': 'instance of', 'Q5': 'human'}) for comments.

    Plain meaning: Configure what to include or exclude from output.
    """

    self.exclude_properties = exclude_properties or []
    self.exclude_qualifiers = exclude_qualifiers
    self.exclude_references = exclude_references
    self.entity_labels = entity_labels or {}

format(template, for_new_item=True)

Convert template to QuickStatements V1 format.

Parameters:

Name Type Description Default
template WikibaseItemTemplate

The WikibaseItemTemplate to format.

required
for_new_item bool

If True, use "CREATE" and "LAST" syntax for new items. If False, use the QID and "P" syntax for updates.

True

Returns:

Type Description
str

QuickStatements V1 text.

Plain meaning: Generate editable QS text from the template.

Source code in gkc/mash_formatters.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def format(self, template: WikibaseItemTemplate, for_new_item: bool = True) -> str:
    """Convert template to QuickStatements V1 format.

    Args:
        template: The WikibaseItemTemplate to format.
        for_new_item: If True, use "CREATE" and "LAST" syntax for new items.
                     If False, use the QID and "P" syntax for updates.

    Returns:
        QuickStatements V1 text.

    Plain meaning: Generate editable QS text from the template.
    """

    lines: list[str] = []

    if for_new_item:
        lines.append("CREATE")
        # Add labels and descriptions
        for lang, text in template.labels.items():
            lines.append(f'LAST\tL{lang}\t"{text}"')

        for lang, text in template.descriptions.items():
            lines.append(f'LAST\tD{lang}\t"{text}"')

        # Add aliases
        for lang, alias_list in template.aliases.items():
            for alias in alias_list:
                lines.append(f'LAST\tA{lang}\t"{alias}"')

        # Add claims with inline comments
        for claim in template.claims:
            if claim.property_id in self.exclude_properties:
                continue

            line = self._claim_to_qs_line("LAST", claim)
            if line:
                lines.append(line)
    else:
        # For existing items
        qid = template.qid
        for lang, text in template.labels.items():
            lines.append(f'{qid}\t{lang}\t"{text}"')

        for lang, text in template.descriptions.items():
            lines.append(f'{qid}\tDn\t"{text}"')

        for claim in template.claims:
            if claim.property_id in self.exclude_properties:
                continue

            line = self._claim_to_qs_line(qid, claim)
            if line:
                lines.append(line)

    return "\n".join(lines)

Examples

Format for new item creation

from gkc.mash import WikibaseLoader
from gkc.mash_formatters import QSV1Formatter

loader = WikibaseLoader()
template = loader.load("Q42")

# Filter to simplify output
apply_template_language_filter(template, "en")
apply_item_property_filters(template, exclude_properties=["P18", "P373"])

# Format with CREATE/LAST syntax
formatter = QSV1Formatter()
qs_text = formatter.format(template, for_new_item=True)

print(qs_text)
# CREATE
# LAST  Len "Douglas Adams"
# LAST  Den "English science fiction writer"
# LAST  P31 Q5
# ...

Format for updating existing item

from gkc.mash import WikibaseLoader
from gkc.mash_formatters import QSV1Formatter

loader = WikibaseLoader()
template = loader.load("Q42")

# Format with QID syntax for updates
formatter = QSV1Formatter()
qs_text = formatter.format(template, for_new_item=False)

print(qs_text)
# Q42   en  "Douglas Adams"
# Q42   Dn  "English science fiction writer"
# Q42   P31 Q5
# ...

Add human-readable comments

from gkc.mash import WikibaseLoader, fetch_property_labels
from gkc.mash_formatters import QSV1Formatter

loader = WikibaseLoader()
template = loader.load("Q42")

# Collect all property and item IDs
entity_ids = set()
for claim in template.claims:
    entity_ids.add(claim.property_id)
    if claim.value.startswith("Q"):
        entity_ids.add(claim.value)

# Fetch labels for comments
from gkc.sparql import fetch_entity_labels
entity_labels = fetch_entity_labels(list(entity_ids), languages=["en"])

# Format with inline comments
formatter = QSV1Formatter(entity_labels=entity_labels)
qs_text = formatter.format(template, for_new_item=True)

print(qs_text)
# CREATE
# LAST  Len "Douglas Adams"
# LAST  Den "English science fiction writer"
# LAST  P31 Q5  /* instance of is human */
# LAST  P21 Q6581097    /* sex or gender is male */
# ...

Exclude qualifiers and references

from gkc.mash import WikibaseLoader
from gkc.mash_formatters import QSV1Formatter

loader = WikibaseLoader()
template = loader.load("Q42")

# Create formatter that excludes qualifiers and references
formatter = QSV1Formatter(
    exclude_qualifiers=True,
    exclude_references=True
)

qs_text = formatter.format(template, for_new_item=True)
# Output will only include main statements, no qualifiers/references

Exclude specific properties

from gkc.mash import WikibaseLoader
from gkc.mash_formatters import QSV1Formatter

loader = WikibaseLoader()
template = loader.load("Q42")

# Exclude properties that aren't relevant
formatter = QSV1Formatter(
    exclude_properties=["P18", "P373", "P856"]  # image, commons cat, website
)

qs_text = formatter.format(template, for_new_item=True)

Complete workflow with all options

from gkc.mash import (
    WikibaseLoader,
    apply_item_property_filters,
    apply_template_language_filter,
    fetch_property_labels,
)
from gkc.mash_formatters import QSV1Formatter
from gkc.sparql import fetch_entity_labels

# Load template
loader = WikibaseLoader()
template = loader.load("Q42")

# Filter template
apply_template_language_filter(template, "en")
apply_item_property_filters(template, exclude_properties=["P18", "P373"])

# Fetch entity labels for comments
entity_ids = set()
for claim in template.claims:
    entity_ids.add(claim.property_id)
    if claim.value.startswith("Q"):
        entity_ids.add(claim.value)

entity_labels = fetch_entity_labels(list(entity_ids), languages=["en"])

# Format with all options
formatter = QSV1Formatter(
    exclude_properties=["P31"],  # Skip 'instance of' for this example
    exclude_qualifiers=False,
    exclude_references=True,
    entity_labels=entity_labels
)

qs_text = formatter.format(template, for_new_item=True)
print(qs_text)

QuickStatements V1 Format Notes

Syntax Overview

  • CREATE: Start a new item
  • LAST: Refer to the most recently created item
  • Labels: LAST\tLen\t"English label"
  • Descriptions: LAST\tDen\t"English description"
  • Aliases: LAST\tAen\t"English alias"
  • Statements: LAST\tP31\tQ5 (property, value)
  • Qualifiers: LAST\tP31\tQ5\tP580\t+1952-03-11T00:00:00Z/11 (append with pipes)
  • Comments: /* human-readable note */ at end of line

Precision in Time Values

Time values include precision indicators:

  • /9 - year precision
  • /10 - month precision
  • /11 - day precision
  • /14 - second precision

Example: +1952-03-11T00:00:00Z/11 (precise to day)

Entity References

  • Items: Q-IDs (e.g., Q42)
  • Properties: P-IDs (e.g., P31)
  • Strings: Quoted values (e.g., "Douglas Adams")
  • Quantities: Numeric values with optional units
  • Coordinates: @lat/lon format

See Also