# %% [markdown] # Build the Lego Knowledge Graph using the sources in `/data`. # %% from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal import pandas as pd from datetime import datetime # %% [markdown] # Setup the requirements for building a knowledge graph # %% g = Graph() thm = Namespace("https://thm.de/") THM = Namespace("https://thm.de/ont/") # %% [markdown] # # Rebrickable # %% [markdown] # ![Rebrickable](\data\rebrickable\downloads_schema_v3.png) # %% re_colors = pd.read_csv("data/rebrickable/colors.csv") re_elements = pd.read_csv("data/rebrickable/elements.csv") re_inventories = pd.read_csv("data/rebrickable/inventories.csv") re_inventory_minifigs = pd.read_csv("data/rebrickable/inventory_minifigs.csv") re_inventory_parts = pd.read_csv("data/rebrickable/inventory_parts.csv") re_inventory_sets = pd.read_csv("data/rebrickable/inventory_sets.csv") re_minifigs = pd.read_csv("data/rebrickable/minifigs.csv") re_part_categories = pd.read_csv("data/rebrickable/part_categories.csv") re_part_relationships = pd.read_csv("data/rebrickable/part_relationships.csv") re_parts = pd.read_csv("data/rebrickable/parts.csv") re_sets = pd.read_csv("data/rebrickable/sets.csv") re_themes = pd.read_csv("data/rebrickable/themes.csv") # %% [markdown] # Colors # %% for color in re_colors.itertuples(index=False): color_ref = thm[f"colors/{color.id}"] g.add((color_ref, RDFS.label, Literal(color.name, lang="en"))) g.add((color_ref, THM.color, Literal(color.rgb))) g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean))) if not pd.isna(color.y1): # First appearance g.add((color_ref, THM.first_year, Literal(datetime(year = int(color.y1), month=1, day=1)))) if not pd.isna(color.y2): # Last appearance g.add((color_ref, THM.last_year, Literal(datetime(year = int(color.y2), month=1, day=1)))) # %% [markdown] # Part Categories # %% for part_category in re_part_categories.itertuples(index=False): part_category_ref = thm[f"part_category/{part_category.id}"] g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang="en"))) # %% [markdown] # Parts # %% for part in re_parts.itertuples(index=False): part_ref = thm[f"part/{part.part_num}"] g.add((part_ref, RDFS.label, Literal(part.name, lang="en"))) g.add((part_ref, THM.part_category, thm[f"part_category/{part.part_cat_id}"])) g.add((part_ref, THM.part_material, Literal(part.part_material))) # %% [markdown] # Elements # %% for element in re_elements.itertuples(index=False): part_ref = thm[f"part/{element.part_num}"] color_ref = thm[f"colors/{element.color_id}"] g.add((part_ref, THM.has_color, color_ref)) # %% [markdown] # Part Relationships # %% for part_relationship in re_part_relationships.itertuples(index=False): part_ref_parent = thm[f"part/{part_relationship.parent_part_num}"] part_ref_child = thm[f"part/{part_relationship.child_part_num}"] g.add((part_ref_parent, THM.has_child, part_ref_child)) # %% [markdown] # Themes # %% for theme in re_themes.itertuples(index=False): theme_ref = thm[f"theme/{int(theme.id)}"] g.add((theme_ref, RDFS.label, Literal(theme.name, lang="en"))) if not pd.isna(theme.parent_id): g.add((theme_ref, THM.parent_theme, thm[f"theme/{int(theme.parent_id)}"])) # %% [markdown] # Sets # %% for lego_set in re_sets.itertuples(index=False): set_ref = thm[f"set/lego/{lego_set.set_num}"] g.add((set_ref, RDFS.label, Literal(lego_set.name, lang="en"))) g.add((set_ref, THM.year, Literal(datetime(int(lego_set.year), 1, 1)))) g.add((set_ref, THM.theme, thm[f"theme/{int(lego_set.theme_id)}"])) g.add((set_ref, THM.num_parts, Literal(int(lego_set.num_parts), datatype=XSD.integer))) g.add((set_ref, THM.brand, Literal("Lego"))) # %% [markdown] # Minifigures # %% for minifig in re_minifigs.itertuples(index=False): minifig_ref = thm[f"minifig/{minifig.fig_num}"] g.add((set_ref, RDFS.label, Literal(minifig.name, lang="en"))) g.add((set_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer))) # %% [markdown] # Now the ugly part: Inventories # %% for inventory in re_inventories.itertuples(index=False): inventory_ref = thm[f"inventory/{inventory.id}"] g.add((inventory_ref, THM.set, thm[f"sets/lego/{inventory.set_num}"])) # %% [markdown] # Inventories relate sets, minifigures and parts to each other, creating a kind of "top level set" # (this takes a lot of time) # %% for inventory_part in re_inventory_parts.itertuples(index=False): inventory_part_ref = thm[f"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}"] inventory_ref = thm[f"inventory/{inventory_part.inventory_id}"] part_ref = thm[f"part/{inventory_part.part_num}"] g.add((inventory_part_ref, RDFS.domain, inventory_ref)) g.add((inventory_part_ref, RDFS.range, part_ref)) g.add((inventory_part_ref, RDF.type, RDF.Property)) g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer))) g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean))) g.add((inventory_part_ref, THM.color, thm[f"color/{inventory_part.color_id}"])) # %% for inventory_set in re_inventory_sets.itertuples(index=False): inventory_set_ref = thm[f"inventory_set/{inventory_set.inventory_id}/{inventory_set.set_num}"] inventory_ref = thm[f"inventory/{inventory_set.inventory_id}"] set_ref = thm[f"set/lego/{inventory_set.set_num}"] g.add((inventory_set_ref, RDFS.domain, inventory_ref)) g.add((inventory_set_ref, RDFS.range, set_ref)) g.add((inventory_set_ref, RDF.type, RDF.Property)) g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer))) # %% for inventory_minifig in re_inventory_minifigs.itertuples(index=False): inventory_minifig_ref = thm[f"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}"] inventory_ref = thm[f"inventory/{inventory_minifig.inventory_id}"] minifig_ref = thm[f"minifig/lego/{inventory_minifig.fig_num}"] g.add((inventory_minifig_ref, RDFS.domain, inventory_ref)) g.add((inventory_minifig_ref, RDFS.range, minifig_ref)) g.add((inventory_minifig_ref, RDF.type, RDF.Property)) g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer))) # %% [markdown] # Serialize the graph in turtle format # %% [markdown] # ``` # ___-------___ # _-~~ ~~-_ # _-~ /~-_ # /^\__/^\ /~ \ / \ # /| O|| O| / \_______________/ \ # | |___||__| / / \ \ # | \ / / \ \ # | (_______) /______/ \_________ \ # | / / \ / \ # \ \^\\ \ / \ / # \ || \______________/ _-_ //\__// # \ ||------_-~~-_ ------------- \ --/~ ~\ || __/ # ~-----||====/~ |==================| |/~~~~~ # (_(__/ ./ / \_\ \. # (_(___/ \_____)_) # ``` # %% g.bind("thmont", THM) g.serialize("lego_graph_rebrickable.ttl", format="turtle")