213 lines
7.4 KiB
Python
213 lines
7.4 KiB
Python
# %% [markdown]
|
|
# Build the Lego Knowledge Graph using the sources in `/data`.
|
|
|
|
# %%
|
|
from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
|
|
# %% [markdown]
|
|
# Setup the requirements for building a knowledge graph
|
|
|
|
# %%
|
|
g = Graph()
|
|
thm = Namespace("https://thm.de/")
|
|
THM = Namespace("https://thm.de/ont/")
|
|
|
|
# %% [markdown]
|
|
# # Rebrickable
|
|
|
|
# %% [markdown]
|
|
# 
|
|
|
|
# %%
|
|
re_colors = pd.read_csv("data/rebrickable/colors.csv")
|
|
re_elements = pd.read_csv("data/rebrickable/elements.csv")
|
|
re_inventories = pd.read_csv("data/rebrickable/inventories.csv")
|
|
re_inventory_minifigs = pd.read_csv("data/rebrickable/inventory_minifigs.csv")
|
|
re_inventory_parts = pd.read_csv("data/rebrickable/inventory_parts.csv")
|
|
re_inventory_sets = pd.read_csv("data/rebrickable/inventory_sets.csv")
|
|
re_minifigs = pd.read_csv("data/rebrickable/minifigs.csv")
|
|
re_part_categories = pd.read_csv("data/rebrickable/part_categories.csv")
|
|
re_part_relationships = pd.read_csv("data/rebrickable/part_relationships.csv")
|
|
re_parts = pd.read_csv("data/rebrickable/parts.csv")
|
|
re_sets = pd.read_csv("data/rebrickable/sets.csv")
|
|
re_themes = pd.read_csv("data/rebrickable/themes.csv")
|
|
|
|
# %% [markdown]
|
|
# Colors
|
|
|
|
# %%
|
|
for color in re_colors.itertuples(index=False):
|
|
color_ref = thm[f"colors/{color.id}"]
|
|
|
|
g.add((color_ref, RDFS.label, Literal(color.name, lang="en")))
|
|
g.add((color_ref, THM.color, Literal(color.rgb)))
|
|
g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))
|
|
|
|
if not pd.isna(color.y1):
|
|
# First appearance
|
|
g.add((color_ref, THM.first_year, Literal(datetime(year = int(color.y1), month=1, day=1))))
|
|
if not pd.isna(color.y2):
|
|
# Last appearance
|
|
g.add((color_ref, THM.last_year, Literal(datetime(year = int(color.y2), month=1, day=1))))
|
|
|
|
|
|
# %% [markdown]
|
|
# Part Categories
|
|
|
|
# %%
|
|
for part_category in re_part_categories.itertuples(index=False):
|
|
part_category_ref = thm[f"part_category/{part_category.id}"]
|
|
|
|
g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang="en")))
|
|
|
|
# %% [markdown]
|
|
# Parts
|
|
|
|
# %%
|
|
for part in re_parts.itertuples(index=False):
|
|
part_ref = thm[f"part/{part.part_num}"]
|
|
|
|
g.add((part_ref, RDFS.label, Literal(part.name, lang="en")))
|
|
g.add((part_ref, THM.part_category, thm[f"part_category/{part.part_cat_id}"]))
|
|
g.add((part_ref, THM.part_material, Literal(part.part_material)))
|
|
|
|
# %% [markdown]
|
|
# Elements
|
|
|
|
# %%
|
|
for element in re_elements.itertuples(index=False):
|
|
part_ref = thm[f"part/{element.part_num}"]
|
|
color_ref = thm[f"colors/{element.color_id}"]
|
|
|
|
g.add((part_ref, THM.has_color, color_ref))
|
|
|
|
# %% [markdown]
|
|
# Part Relationships
|
|
|
|
# %%
|
|
for part_relationship in re_part_relationships.itertuples(index=False):
|
|
part_ref_parent = thm[f"part/{part_relationship.parent_part_num}"]
|
|
part_ref_child = thm[f"part/{part_relationship.child_part_num}"]
|
|
|
|
g.add((part_ref_parent, THM.has_child, part_ref_child))
|
|
|
|
# %% [markdown]
|
|
# Themes
|
|
|
|
# %%
|
|
for theme in re_themes.itertuples(index=False):
|
|
theme_ref = thm[f"theme/{int(theme.id)}"]
|
|
|
|
g.add((theme_ref, RDFS.label, Literal(theme.name, lang="en")))
|
|
|
|
if not pd.isna(theme.parent_id):
|
|
g.add((theme_ref, THM.parent_theme, thm[f"theme/{int(theme.parent_id)}"]))
|
|
|
|
# %% [markdown]
|
|
# Sets
|
|
|
|
# %%
|
|
for lego_set in re_sets.itertuples(index=False):
|
|
set_ref = thm[f"set/lego/{lego_set.set_num}"]
|
|
|
|
g.add((set_ref, RDFS.label, Literal(lego_set.name, lang="en")))
|
|
g.add((set_ref, THM.year, Literal(datetime(int(lego_set.year), 1, 1))))
|
|
g.add((set_ref, THM.theme, thm[f"theme/{int(lego_set.theme_id)}"]))
|
|
g.add((set_ref, THM.num_parts, Literal(int(lego_set.num_parts), datatype=XSD.integer)))
|
|
g.add((set_ref, THM.brand, Literal("Lego")))
|
|
|
|
# %% [markdown]
|
|
# Minifigures
|
|
|
|
# %%
|
|
for minifig in re_minifigs.itertuples(index=False):
|
|
minifig_ref = thm[f"minifig/{minifig.fig_num}"]
|
|
|
|
g.add((set_ref, RDFS.label, Literal(minifig.name, lang="en")))
|
|
g.add((set_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))
|
|
|
|
# %% [markdown]
|
|
# Now the ugly part: Inventories
|
|
|
|
# %%
|
|
for inventory in re_inventories.itertuples(index=False):
|
|
inventory_ref = thm[f"inventory/{inventory.id}"]
|
|
|
|
g.add((inventory_ref, THM.set, thm[f"sets/lego/{inventory.set_num}"]))
|
|
|
|
# %% [markdown]
|
|
# Inventories relate sets, minifigures and parts to each other, creating a kind of "top level set"
|
|
# (this takes a lot of time)
|
|
|
|
# %%
|
|
for inventory_part in re_inventory_parts.itertuples(index=False):
|
|
inventory_part_ref = thm[f"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}"]
|
|
|
|
inventory_ref = thm[f"inventory/{inventory_part.inventory_id}"]
|
|
part_ref = thm[f"part/{inventory_part.part_num}"]
|
|
|
|
g.add((inventory_part_ref, RDFS.domain, inventory_ref))
|
|
g.add((inventory_part_ref, RDFS.range, part_ref))
|
|
g.add((inventory_part_ref, RDF.type, RDF.Property))
|
|
|
|
g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))
|
|
g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))
|
|
g.add((inventory_part_ref, THM.color, thm[f"color/{inventory_part.color_id}"]))
|
|
|
|
# %%
|
|
for inventory_set in re_inventory_sets.itertuples(index=False):
|
|
inventory_set_ref = thm[f"inventory_set/{inventory_set.inventory_id}/{inventory_set.set_num}"]
|
|
|
|
inventory_ref = thm[f"inventory/{inventory_set.inventory_id}"]
|
|
set_ref = thm[f"set/lego/{inventory_set.set_num}"]
|
|
|
|
g.add((inventory_set_ref, RDFS.domain, inventory_ref))
|
|
g.add((inventory_set_ref, RDFS.range, set_ref))
|
|
g.add((inventory_set_ref, RDF.type, RDF.Property))
|
|
|
|
g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer)))
|
|
|
|
# %%
|
|
for inventory_minifig in re_inventory_minifigs.itertuples(index=False):
|
|
inventory_minifig_ref = thm[f"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}"]
|
|
|
|
inventory_ref = thm[f"inventory/{inventory_minifig.inventory_id}"]
|
|
minifig_ref = thm[f"minifig/lego/{inventory_minifig.fig_num}"]
|
|
|
|
g.add((inventory_minifig_ref, RDFS.domain, inventory_ref))
|
|
g.add((inventory_minifig_ref, RDFS.range, minifig_ref))
|
|
g.add((inventory_minifig_ref, RDF.type, RDF.Property))
|
|
|
|
g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer)))
|
|
|
|
# %% [markdown]
|
|
# Serialize the graph in turtle format
|
|
|
|
# %% [markdown]
|
|
# ```
|
|
# ___-------___
|
|
# _-~~ ~~-_
|
|
# _-~ /~-_
|
|
# /^\__/^\ /~ \ / \
|
|
# /| O|| O| / \_______________/ \
|
|
# | |___||__| / / \ \
|
|
# | \ / / \ \
|
|
# | (_______) /______/ \_________ \
|
|
# | / / \ / \
|
|
# \ \^\\ \ / \ /
|
|
# \ || \______________/ _-_ //\__//
|
|
# \ ||------_-~~-_ ------------- \ --/~ ~\ || __/
|
|
# ~-----||====/~ |==================| |/~~~~~
|
|
# (_(__/ ./ / \_\ \.
|
|
# (_(___/ \_____)_)
|
|
# ```
|
|
|
|
# %%
|
|
g.bind("thmont", THM)
|
|
|
|
g.serialize("lego_graph_rebrickable.ttl", format="turtle")
|
|
|
|
|