diff --git a/lego/lego_graph_rebrickable.ipynb b/lego/lego_graph_rebrickable.ipynb index 5ee1ad2..23f3e93 100644 --- a/lego/lego_graph_rebrickable.ipynb +++ b/lego/lego_graph_rebrickable.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 2, "id": "90209948", "metadata": {}, "outputs": [], @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 3, "id": "8e573135", "metadata": {}, "outputs": [], @@ -41,6 +41,61 @@ "THM = Namespace(\"https://thm.de/ont/\")" ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "class_hierarchy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ")>" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Class hierarchy\n", + "g.add((THM.LegoEntity, RDF.type, OWL.Class))\n", + "g.add((THM.LegoEntity, RDFS.subClassOf, OWL.Thing))\n", + "\n", + "g.add((THM.PhysicalItem, RDF.type, OWL.Class))\n", + "g.add((THM.PhysicalItem, RDFS.subClassOf, THM.LegoEntity))\n", + "g.add((THM.Set, RDF.type, OWL.Class))\n", + "g.add((THM.Set, RDFS.subClassOf, THM.PhysicalItem))\n", + "g.add((THM.Part, RDF.type, OWL.Class))\n", + "g.add((THM.Part, RDFS.subClassOf, THM.PhysicalItem))\n", + "g.add((THM.Minifigure, RDF.type, OWL.Class))\n", + "g.add((THM.Minifigure, RDFS.subClassOf, THM.PhysicalItem))\n", + "\n", + "g.add((THM.CatalogEntry, RDF.type, OWL.Class))\n", + "g.add((THM.CatalogEntry, RDFS.subClassOf, THM.LegoEntity))\n", + "g.add((THM.Color, RDF.type, OWL.Class))\n", + "g.add((THM.Color, RDFS.subClassOf, THM.CatalogEntry))\n", + "g.add((THM.PartCategory, RDF.type, OWL.Class))\n", + "g.add((THM.PartCategory, RDFS.subClassOf, THM.CatalogEntry))\n", + "g.add((THM.Theme, RDF.type, OWL.Class))\n", + "g.add((THM.Theme, RDFS.subClassOf, THM.CatalogEntry))\n", + "\n", + "g.add((THM.Inventory, RDF.type, OWL.Class))\n", + "g.add((THM.Inventory, RDFS.subClassOf, THM.LegoEntity))\n", + "g.add((THM.InventoryEntry, RDF.type, OWL.Class))\n", + "g.add((THM.InventoryEntry, RDFS.subClassOf, THM.LegoEntity))\n", + "g.add((THM.InventoryPart, RDF.type, OWL.Class))\n", + "g.add((THM.InventoryPart, RDFS.subClassOf, THM.InventoryEntry))\n", + "g.add((THM.InventorySet, RDF.type, OWL.Class))\n", + "g.add((THM.InventorySet, RDFS.subClassOf, THM.InventoryEntry))\n", + "g.add((THM.InventoryMinifig, RDF.type, OWL.Class))\n", + "g.add((THM.InventoryMinifig, RDFS.subClassOf, THM.InventoryEntry))\n", + "\n", + "g.add((THM.Producer, RDF.type, OWL.Class))\n", + "g.add((THM.Producer, RDFS.subClassOf, THM.LegoEntity))" + ] + }, { "cell_type": "markdown", "id": "d56199d5", @@ -59,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 5, "id": "d8a1fe84", "metadata": {}, "outputs": [], @@ -88,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 6, "id": "ae505704", "metadata": {}, "outputs": [], @@ -96,15 +151,14 @@ "for color in re_colors.itertuples(index=False):\n", " color_ref = thm[f\"color/{color.id}\"]\n", "\n", + " g.add((color_ref, RDF.type, THM.Color))\n", " g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n", " g.add((color_ref, THM.color, Literal(color.rgb)))\n", " g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))\n", " \n", " if not pd.isna(color.y1):\n", - " # First appearance\n", " g.add((color_ref, THM.first_year, Literal(datetime(year = int(color.y1), month=1, day=1))))\n", " if not pd.isna(color.y2):\n", - " # Last appearance\n", " g.add((color_ref, THM.last_year, Literal(datetime(year = int(color.y2), month=1, day=1))))\n" ] }, @@ -118,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 7, "id": "fb9e17d6", "metadata": {}, "outputs": [], @@ -126,7 +180,8 @@ "for part_category in re_part_categories.itertuples(index=False):\n", " part_category_ref = thm[f\"part_category/{part_category.id}\"]\n", "\n", - " g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang=\"en\")))" + " g.add((part_category_ref, RDF.type, THM.PartCategory))\n", + " g.add((part_category_ref, RDFS.label, Literal(part_category.name, lang=\"en\")))" ] }, { @@ -139,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 8, "id": "8fdb080e", "metadata": {}, "outputs": [], @@ -147,6 +202,7 @@ "for part in re_parts.itertuples(index=False):\n", " part_ref = thm[f\"part/{part.part_num}\"]\n", "\n", + " g.add((part_ref, RDF.type, THM.Part))\n", " g.add((part_ref, RDFS.label, Literal(part.name, lang=\"en\")))\n", " g.add((part_ref, THM.part_category, thm[f\"part_category/{part.part_cat_id}\"]))\n", " g.add((part_ref, THM.part_material, Literal(part.part_material)))" @@ -162,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 9, "id": "579b1d67", "metadata": {}, "outputs": [], @@ -184,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 10, "id": "00db079a", "metadata": {}, "outputs": [], @@ -206,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 11, "id": "1a529aae", "metadata": {}, "outputs": [], @@ -214,6 +270,7 @@ "for theme in re_themes.itertuples(index=False):\n", " theme_ref = thm[f\"theme/{int(theme.id)}\"]\n", "\n", + " g.add((theme_ref, RDF.type, THM.Theme))\n", " g.add((theme_ref, RDFS.label, Literal(theme.name, lang=\"en\")))\n", "\n", " if not pd.isna(theme.parent_id):\n", @@ -230,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 12, "id": "29b357ef", "metadata": {}, "outputs": [], @@ -238,6 +295,7 @@ "for lego_set in re_sets.itertuples(index=False):\n", " set_ref = thm[f\"set/lego/{lego_set.set_num}\"]\n", "\n", + " g.add((set_ref, RDF.type, THM.Set))\n", " g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n", " g.add((set_ref, THM.year, Literal(datetime(int(lego_set.year), 1, 1))))\n", " g.add((set_ref, THM.theme, thm[f\"theme/{int(lego_set.theme_id)}\"]))\n", @@ -255,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 13, "id": "a67b3e70", "metadata": {}, "outputs": [], @@ -263,8 +321,9 @@ "for minifig in re_minifigs.itertuples(index=False):\n", " minifig_ref = thm[f\"minifig/{minifig.fig_num}\"]\n", "\n", - " g.add((set_ref, RDFS.label, Literal(minifig.name, lang=\"en\")))\n", - " g.add((set_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))" + " g.add((minifig_ref, RDF.type, THM.Minifigure))\n", + " g.add((minifig_ref, RDFS.label, Literal(minifig.name, lang=\"en\")))\n", + " g.add((minifig_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))" ] }, { @@ -277,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 14, "id": "0c97dc4d", "metadata": {}, "outputs": [], @@ -285,6 +344,7 @@ "for inventory in re_inventories.itertuples(index=False):\n", " inventory_ref = thm[f\"inventory/{inventory.id}\"]\n", "\n", + " g.add((inventory_ref, RDF.type, THM.Inventory))\n", " g.add((inventory_ref, THM.set, thm[f\"set/lego/{inventory.set_num}\"]))" ] }, @@ -299,21 +359,17 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 15, "id": "dc2ba03e", "metadata": {}, "outputs": [], "source": [ "for inventory_part in re_inventory_parts.itertuples(index=False):\n", " inventory_part_ref = thm[f\"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}\"]\n", - " \n", - " inventory_ref = thm[f\"inventory/{inventory_part.inventory_id}\"]\n", - " part_ref = thm[f\"part/{inventory_part.part_num}\"]\n", "\n", - " g.add((inventory_part_ref, RDFS.domain, inventory_ref))\n", - " g.add((inventory_part_ref, RDFS.range, part_ref))\n", - " g.add((inventory_part_ref, RDF.type, RDF.Property))\n", - " \n", + " g.add((inventory_part_ref, RDF.type, THM.InventoryPart))\n", + " g.add((inventory_part_ref, THM.inventory, thm[f\"inventory/{inventory_part.inventory_id}\"]))\n", + " g.add((inventory_part_ref, THM.part, thm[f\"part/{inventory_part.part_num}\"]))\n", " g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))\n", " g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))\n", " g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))" @@ -321,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 16, "id": "8715a1cf", "metadata": {}, "outputs": [], @@ -329,19 +385,15 @@ "for inventory_set in re_inventory_sets.itertuples(index=False):\n", " inventory_set_ref = thm[f\"inventory_set/{inventory_set.inventory_id}/{inventory_set.set_num}\"]\n", "\n", - " inventory_ref = thm[f\"inventory/{inventory_set.inventory_id}\"]\n", - " set_ref = thm[f\"set/lego/{inventory_set.set_num}\"]\n", - "\n", - " g.add((inventory_set_ref, RDFS.domain, inventory_ref))\n", - " g.add((inventory_set_ref, RDFS.range, set_ref))\n", - " g.add((inventory_set_ref, RDF.type, RDF.Property))\n", - "\n", + " g.add((inventory_set_ref, RDF.type, THM.InventorySet))\n", + " g.add((inventory_set_ref, THM.inventory, thm[f\"inventory/{inventory_set.inventory_id}\"]))\n", + " g.add((inventory_set_ref, THM.set, thm[f\"set/lego/{inventory_set.set_num}\"]))\n", " g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer)))" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 17, "id": "08c2c580", "metadata": {}, "outputs": [], @@ -349,13 +401,9 @@ "for inventory_minifig in re_inventory_minifigs.itertuples(index=False):\n", " inventory_minifig_ref = thm[f\"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}\"]\n", "\n", - " inventory_ref = thm[f\"inventory/{inventory_minifig.inventory_id}\"]\n", - " minifig_ref = thm[f\"minifig/lego/{inventory_minifig.fig_num}\"]\n", - "\n", - " g.add((inventory_minifig_ref, RDFS.domain, inventory_ref))\n", - " g.add((inventory_minifig_ref, RDFS.range, minifig_ref))\n", - " g.add((inventory_minifig_ref, RDF.type, RDF.Property))\n", - "\n", + " g.add((inventory_minifig_ref, RDF.type, THM.InventoryMinifig))\n", + " g.add((inventory_minifig_ref, THM.inventory, thm[f\"inventory/{inventory_minifig.inventory_id}\"]))\n", + " g.add((inventory_minifig_ref, THM.minifig, thm[f\"minifig/{inventory_minifig.fig_num}\"]))\n", " g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer)))" ] }, @@ -377,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 18, "id": "1e0ac437", "metadata": {}, "outputs": [], @@ -387,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 19, "id": "fd944ccb", "metadata": {}, "outputs": [], @@ -419,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 20, "id": "a8beb593", "metadata": {}, "outputs": [], @@ -431,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 21, "id": "b14e6e89", "metadata": {}, "outputs": [], @@ -441,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 22, "id": "bbf5462b", "metadata": {}, "outputs": [], @@ -451,7 +499,8 @@ "\n", " if not (set_ref, None, None) in g:\n", " additional_entries += 1\n", - " g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n", + " g.add((set_ref, RDF.type, THM.Set))\n", + " g.add((set_ref, RDFS.label, Literal(bl_set.set_name, lang=\"en\")))\n", " if not pd.isna(bl_set.year) and str(bl_set.year).isnumeric():\n", " g.add((set_ref, THM.first_year, Literal(datetime(int(bl_set.year), 1, 1))))\n", " if not pd.isna(bl_set.parts) and str(bl_set.parts).isnumeric():\n", @@ -461,7 +510,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 23, "id": "ef52582e", "metadata": {}, "outputs": [], @@ -471,27 +520,29 @@ "\n", " if not (part_ref, None, None) in g:\n", " additional_entries += 1\n", + " g.add((part_ref, RDF.type, THM.Part))\n", " g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 24, "id": "8bf0ffeb", "metadata": {}, "outputs": [], "source": [ "for bl_minifig in bl_minifigs.itertuples(index=False):\n", - " minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\n", + " minifig_ref = thm[f\"minifig/{bl_minifig.minifig_id}\"]\n", "\n", " if not (minifig_ref, None, None) in g:\n", " additional_entries += 1\n", + " g.add((minifig_ref, RDF.type, THM.Minifigure))\n", " g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 25, "id": "3491b032", "metadata": {}, "outputs": [ @@ -517,7 +568,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 26, "id": "c1e9ff32", "metadata": {}, "outputs": [], @@ -527,24 +578,24 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 27, "id": "217dc4d2", "metadata": {}, "outputs": [], "source": [ - "additional_entries = 0 \n", + "additional_entries = 0\n", "\n", "for merlin_set in merlin_sets.itertuples(index=False):\n", " set_ref = thm[f\"set/lego/{merlin_set.id}\"]\n", " producer_ref = thm[f\"producer/{merlin_set.producer}\"]\n", "\n", - " #add producer\n", " if not (producer_ref, None, None) in g:\n", + " g.add((producer_ref, RDF.type, THM.Producer))\n", " g.add((producer_ref, RDFS.label, Literal(merlin_set.producer)))\n", "\n", - " #add the set\n", " if not (set_ref, None, None) in g:\n", " additional_entries += 1\n", + " g.add((set_ref, RDF.type, THM.Set))\n", " g.add((set_ref, RDFS.label, Literal(merlin_set.name)))\n", " g.add((set_ref, THM.brand, producer_ref))\n", " if not pd.isna(merlin_set.year):\n", @@ -556,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 28, "id": "54eaa79e", "metadata": {}, "outputs": [], @@ -618,17 +669,17 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 29, "id": "1a30bff8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - ")>" + ")>" ] }, - "execution_count": 71, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -641,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 30, "id": "7b9c3bcf", "metadata": {}, "outputs": [ @@ -649,9 +700,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "(rdflib.term.URIRef('https://thm.de/inventory/102116'), rdflib.term.URIRef('https://thm.de/inventory_part/102116/3626cpr3530'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))\n", - "(rdflib.term.URIRef('https://thm.de/inventory/102116'), rdflib.term.URIRef('https://thm.de/inventory_part/102116/970c12'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))\n", - "(rdflib.term.URIRef('https://thm.de/inventory/102116'), rdflib.term.URIRef('https://thm.de/inventory_part/102116/973c14h14pr5739'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))\n" + "(rdflib.term.URIRef('https://thm.de/inventory_part/102116/3626cpr3530'), rdflib.term.Literal('Minifig Head Chitauri, Gold Armor, Lime Eyes, Large Open Mouth with Teeth Print', lang='en'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))\n", + "(rdflib.term.URIRef('https://thm.de/inventory_part/102116/970c12'), rdflib.term.Literal('Hips and Dark Bluish Gray Legs', lang='en'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))\n", + "(rdflib.term.URIRef('https://thm.de/inventory_part/102116/973c14h14pr5739'), rdflib.term.Literal('Torso Armor, Light Bluish Gray and Olive Green Panels Print, Light Bluish Gray Arms and Hands', lang='en'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))\n" ] } ], @@ -659,18 +710,20 @@ "import rdflib\n", " \n", "query = \"\"\"\n", - "SELECT ?inventory ?inventory_part ?quantity\n", + "SELECT ?inventory_part ?part_name ?quantity\n", "WHERE {\n", " ?inventory thmont:set .\n", - " ?inventory_part rdfs:domain ?inventory .\n", - " ?inventory_part thmont:quantity ?quantity \n", + " ?inventory_part thmont:inventory ?inventory ;\n", + " thmont:part ?part ;\n", + " thmont:quantity ?quantity .\n", + " ?part rdfs:label ?part_name .\n", "}\n", "\"\"\"\n", "\n", "qres = g.query(query)\n", "\n", - "for row in qres :\n", - " print(row)" + "for row in qres:\n", + " print(row)" ] } ],