added pipeline and corrected typing
parent
6b2c881866
commit
605dc4f96b
|
|
@ -17,8 +17,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal\n",
|
"from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"from datetime import datetime\n",
|
"from datetime import datetime"
|
||||||
"import os"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -88,7 +87,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 4,
|
||||||
"id": "ae505704",
|
"id": "ae505704",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
|
@ -96,6 +95,7 @@
|
||||||
"for color in re_colors.itertuples(index=False):\n",
|
"for color in re_colors.itertuples(index=False):\n",
|
||||||
" color_ref = thm[f\"color/{color.id}\"]\n",
|
" color_ref = thm[f\"color/{color.id}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((color_ref, RDF.type, THM.Color ))\n",
|
||||||
" g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n",
|
" g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n",
|
||||||
" g.add((color_ref, THM.color, Literal(color.rgb)))\n",
|
" g.add((color_ref, THM.color, Literal(color.rgb)))\n",
|
||||||
" g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))\n",
|
" g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))\n",
|
||||||
|
|
@ -126,6 +126,7 @@
|
||||||
"for part_category in re_part_categories.itertuples(index=False):\n",
|
"for part_category in re_part_categories.itertuples(index=False):\n",
|
||||||
" part_category_ref = thm[f\"part_category/{part_category.id}\"]\n",
|
" part_category_ref = thm[f\"part_category/{part_category.id}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((part_category_ref, RDF.type, THM.PartCategory ))\n",
|
||||||
" g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang=\"en\")))"
|
" g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang=\"en\")))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -147,6 +148,7 @@
|
||||||
"for part in re_parts.itertuples(index=False):\n",
|
"for part in re_parts.itertuples(index=False):\n",
|
||||||
" part_ref = thm[f\"part/{part.part_num}\"]\n",
|
" part_ref = thm[f\"part/{part.part_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((part_ref, RDF.type, THM.Part))\n",
|
||||||
" g.add((part_ref, RDFS.label, Literal(part.name, lang=\"en\")))\n",
|
" g.add((part_ref, RDFS.label, Literal(part.name, lang=\"en\")))\n",
|
||||||
" g.add((part_ref, THM.part_category, thm[f\"part_category/{part.part_cat_id}\"]))\n",
|
" g.add((part_ref, THM.part_category, thm[f\"part_category/{part.part_cat_id}\"]))\n",
|
||||||
" g.add((part_ref, THM.part_material, Literal(part.part_material)))"
|
" g.add((part_ref, THM.part_material, Literal(part.part_material)))"
|
||||||
|
|
@ -162,7 +164,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 7,
|
||||||
"id": "579b1d67",
|
"id": "579b1d67",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
|
@ -171,6 +173,7 @@
|
||||||
" part_ref = thm[f\"part/{element.part_num}\"]\n",
|
" part_ref = thm[f\"part/{element.part_num}\"]\n",
|
||||||
" color_ref = thm[f\"color/{element.color_id}\"]\n",
|
" color_ref = thm[f\"color/{element.color_id}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((part_ref, RDF.type, THM.Element))\n",
|
||||||
" g.add((part_ref, THM.has_color, color_ref))"
|
" g.add((part_ref, THM.has_color, color_ref))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -193,7 +196,8 @@
|
||||||
" part_ref_parent = thm[f\"part/{part_relationship.parent_part_num}\"]\n",
|
" part_ref_parent = thm[f\"part/{part_relationship.parent_part_num}\"]\n",
|
||||||
" part_ref_child = thm[f\"part/{part_relationship.child_part_num}\"]\n",
|
" part_ref_child = thm[f\"part/{part_relationship.child_part_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" g.add((part_ref_parent, THM.has_child, part_ref_child))"
|
" g.add((part_ref_parent, THM.has_child, part_ref_child))\n",
|
||||||
|
" g.add((part_ref_child, THM.has_parent, part_ref_parent))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -214,6 +218,7 @@
|
||||||
"for theme in re_themes.itertuples(index=False):\n",
|
"for theme in re_themes.itertuples(index=False):\n",
|
||||||
" theme_ref = thm[f\"theme/{int(theme.id)}\"]\n",
|
" theme_ref = thm[f\"theme/{int(theme.id)}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((theme_ref, RDF.type, THM.Theme))\n",
|
||||||
" g.add((theme_ref, RDFS.label, Literal(theme.name, lang=\"en\")))\n",
|
" g.add((theme_ref, RDFS.label, Literal(theme.name, lang=\"en\")))\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if not pd.isna(theme.parent_id):\n",
|
" if not pd.isna(theme.parent_id):\n",
|
||||||
|
|
@ -238,8 +243,9 @@
|
||||||
"for lego_set in re_sets.itertuples(index=False):\n",
|
"for lego_set in re_sets.itertuples(index=False):\n",
|
||||||
" set_ref = thm[f\"set/lego/{lego_set.set_num}\"]\n",
|
" set_ref = thm[f\"set/lego/{lego_set.set_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((set_ref, RDF.type, THM.Set))\n",
|
||||||
" g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n",
|
" g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n",
|
||||||
" g.add((set_ref, THM.year, Literal(datetime(int(lego_set.year), 1, 1))))\n",
|
" g.add((set_ref, THM.year, Literal(int(lego_set.year), datatype=XSD.integer)))\n",
|
||||||
" g.add((set_ref, THM.theme, thm[f\"theme/{int(lego_set.theme_id)}\"]))\n",
|
" g.add((set_ref, THM.theme, thm[f\"theme/{int(lego_set.theme_id)}\"]))\n",
|
||||||
" g.add((set_ref, THM.num_parts, Literal(int(lego_set.num_parts), datatype=XSD.integer)))\n",
|
" g.add((set_ref, THM.num_parts, Literal(int(lego_set.num_parts), datatype=XSD.integer)))\n",
|
||||||
" g.add((set_ref, THM.brand, Literal(\"Lego\")))"
|
" g.add((set_ref, THM.brand, Literal(\"Lego\")))"
|
||||||
|
|
@ -263,8 +269,9 @@
|
||||||
"for minifig in re_minifigs.itertuples(index=False):\n",
|
"for minifig in re_minifigs.itertuples(index=False):\n",
|
||||||
" minifig_ref = thm[f\"minifig/{minifig.fig_num}\"]\n",
|
" minifig_ref = thm[f\"minifig/{minifig.fig_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" g.add((set_ref, RDFS.label, Literal(minifig.name, lang=\"en\")))\n",
|
" g.add((minifig_ref, RDF.type, THM.Minifig))\n",
|
||||||
" g.add((set_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))"
|
" g.add((minifig_ref, RDFS.label, Literal(minifig.name, lang=\"en\")))\n",
|
||||||
|
" g.add((minifig_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -277,7 +284,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 12,
|
||||||
"id": "0c97dc4d",
|
"id": "0c97dc4d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
|
@ -285,6 +292,7 @@
|
||||||
"for inventory in re_inventories.itertuples(index=False):\n",
|
"for inventory in re_inventories.itertuples(index=False):\n",
|
||||||
" inventory_ref = thm[f\"inventory/{inventory.id}\"]\n",
|
" inventory_ref = thm[f\"inventory/{inventory.id}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((inventory_ref, RDF.type, THM.Inventory))\n",
|
||||||
" g.add((inventory_ref, THM.set, thm[f\"set/lego/{inventory.set_num}\"]))"
|
" g.add((inventory_ref, THM.set, thm[f\"set/lego/{inventory.set_num}\"]))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -302,21 +310,39 @@
|
||||||
"execution_count": 13,
|
"execution_count": 13,
|
||||||
"id": "dc2ba03e",
|
"id": "dc2ba03e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'\\nfor inventory_part in re_inventory_parts.itertuples(index=False):\\n inventory_part_ref = thm[f\"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}\"]\\n\\n inventory_ref = thm[f\"inventory/{inventory_part.inventory_id}\"]\\n part_ref = thm[f\"part/{inventory_part.part_num}\"]\\n\\n g.add((inventory_part_ref, RDF.type, THM.PartInv))\\n g.add((inventory_part_ref, RDF.type, RDF.Property))\\n\\n g.add((inventory_part_ref, RDFS.domain, THM.Inventory))\\n g.add((inventory_part_ref, RDFS.range, THM.Part))\\n\\n g.add((inventory_ref, THM.contains, inventory_part_ref))\\n g.add((part_ref, THM.belongs, inventory_part_ref))\\n\\n g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))\\n g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))\\n g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))\\n'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"\"\"\"\n",
|
||||||
"for inventory_part in re_inventory_parts.itertuples(index=False):\n",
|
"for inventory_part in re_inventory_parts.itertuples(index=False):\n",
|
||||||
" inventory_part_ref = thm[f\"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}\"]\n",
|
" inventory_part_ref = thm[f\"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}\"]\n",
|
||||||
" \n",
|
" \n",
|
||||||
" inventory_ref = thm[f\"inventory/{inventory_part.inventory_id}\"]\n",
|
" inventory_ref = thm[f\"inventory/{inventory_part.inventory_id}\"]\n",
|
||||||
" part_ref = thm[f\"part/{inventory_part.part_num}\"]\n",
|
" part_ref = thm[f\"part/{inventory_part.part_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" g.add((inventory_part_ref, RDFS.domain, inventory_ref))\n",
|
" g.add((inventory_part_ref, RDF.type, THM.PartInv))\n",
|
||||||
" g.add((inventory_part_ref, RDFS.range, part_ref))\n",
|
|
||||||
" g.add((inventory_part_ref, RDF.type, RDF.Property))\n",
|
" g.add((inventory_part_ref, RDF.type, RDF.Property))\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((inventory_part_ref, RDFS.domain, THM.Inventory))\n",
|
||||||
|
" g.add((inventory_part_ref, RDFS.range, THM.Part))\n",
|
||||||
|
" \n",
|
||||||
|
" g.add((inventory_ref, THM.contains, inventory_part_ref))\n",
|
||||||
|
" g.add((part_ref, THM.belongs, inventory_part_ref))\n",
|
||||||
|
"\n",
|
||||||
" g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))\n",
|
" g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))\n",
|
||||||
" g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))\n",
|
" g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))\n",
|
||||||
" g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))"
|
" g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))\n",
|
||||||
|
"\"\"\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -332,10 +358,15 @@
|
||||||
" inventory_ref = thm[f\"inventory/{inventory_set.inventory_id}\"]\n",
|
" inventory_ref = thm[f\"inventory/{inventory_set.inventory_id}\"]\n",
|
||||||
" set_ref = thm[f\"set/lego/{inventory_set.set_num}\"]\n",
|
" set_ref = thm[f\"set/lego/{inventory_set.set_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" g.add((inventory_set_ref, RDFS.domain, inventory_ref))\n",
|
" g.add((inventory_set_ref, RDF.type, THM.SetInv))\n",
|
||||||
" g.add((inventory_set_ref, RDFS.range, set_ref))\n",
|
|
||||||
" g.add((inventory_set_ref, RDF.type, RDF.Property))\n",
|
" g.add((inventory_set_ref, RDF.type, RDF.Property))\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((inventory_set_ref, RDFS.domain, THM.Inventory))\n",
|
||||||
|
" g.add((inventory_set_ref, RDFS.range, THM.Set))\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((inventory_ref, THM.contains, inventory_set_ref))\n",
|
||||||
|
" g.add((set_ref, THM.belongs, inventory_set_ref))\n",
|
||||||
|
" \n",
|
||||||
" g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer)))"
|
" g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer)))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -350,12 +381,17 @@
|
||||||
" inventory_minifig_ref = thm[f\"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}\"]\n",
|
" inventory_minifig_ref = thm[f\"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" inventory_ref = thm[f\"inventory/{inventory_minifig.inventory_id}\"]\n",
|
" inventory_ref = thm[f\"inventory/{inventory_minifig.inventory_id}\"]\n",
|
||||||
" minifig_ref = thm[f\"minifig/lego/{inventory_minifig.fig_num}\"]\n",
|
" minifig_ref = thm[f\"minifig/{inventory_minifig.fig_num}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" g.add((inventory_minifig_ref, RDFS.domain, inventory_ref))\n",
|
" g.add((inventory_minifig_ref, RDF.type, THM.MinifigInv))\n",
|
||||||
" g.add((inventory_minifig_ref, RDFS.range, minifig_ref))\n",
|
|
||||||
" g.add((inventory_minifig_ref, RDF.type, RDF.Property))\n",
|
" g.add((inventory_minifig_ref, RDF.type, RDF.Property))\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" g.add((inventory_minifig_ref, RDFS.domain, THM.Inventory))\n",
|
||||||
|
" g.add((inventory_minifig_ref, RDFS.range, THM.Minifig))\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((inventory_ref, THM.contains, inventory_minifig_ref))\n",
|
||||||
|
" g.add((minifig_ref, THM.belongs, inventory_minifig_ref))\n",
|
||||||
|
" \n",
|
||||||
" g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer)))"
|
" g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer)))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -464,14 +500,27 @@
|
||||||
"execution_count": 21,
|
"execution_count": 21,
|
||||||
"id": "ef52582e",
|
"id": "ef52582e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'\\nfor bl_part in bl_parts.itertuples(index=False):\\n part_ref = thm[f\"part/{bl_part.part_id}\"]\\n\\n if not (part_ref, None, None) in g:\\n additional_entries += 1\\n g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))\\n'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 21,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"\"\"\"\n",
|
||||||
"for bl_part in bl_parts.itertuples(index=False):\n",
|
"for bl_part in bl_parts.itertuples(index=False):\n",
|
||||||
" part_ref = thm[f\"part/{bl_part.part_id}\"]\n",
|
" part_ref = thm[f\"part/{bl_part.part_id}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if not (part_ref, None, None) in g:\n",
|
" if not (part_ref, None, None) in g:\n",
|
||||||
" additional_entries += 1\n",
|
" additional_entries += 1\n",
|
||||||
" g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))"
|
" g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))\n",
|
||||||
|
"\"\"\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -479,14 +528,27 @@
|
||||||
"execution_count": 22,
|
"execution_count": 22,
|
||||||
"id": "8bf0ffeb",
|
"id": "8bf0ffeb",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'\\nfor bl_minifig in bl_minifigs.itertuples(index=False):\\n minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\\n\\n if not (minifig_ref, None, None) in g:\\n additional_entries += 1\\n g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))\\n'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 22,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"\"\"\"\n",
|
||||||
"for bl_minifig in bl_minifigs.itertuples(index=False):\n",
|
"for bl_minifig in bl_minifigs.itertuples(index=False):\n",
|
||||||
" minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\n",
|
" minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if not (minifig_ref, None, None) in g:\n",
|
" if not (minifig_ref, None, None) in g:\n",
|
||||||
" additional_entries += 1\n",
|
" additional_entries += 1\n",
|
||||||
" g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))"
|
" g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))\n",
|
||||||
|
"\"\"\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -499,7 +561,7 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Added 107748 items\n"
|
"Added 4131 items\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
@ -548,7 +610,7 @@
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<Graph identifier=N30c6d515851c45f1af93153d75c76ea9 (<class 'rdflib.graph.Graph'>)>"
|
"<Graph identifier=N0b9369c5913a4399a349bbd3a82b1420 (<class 'rdflib.graph.Graph'>)>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 24,
|
"execution_count": 24,
|
||||||
|
|
@ -557,15 +619,15 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"g.bind(\"thmont\", THM)\n",
|
"g.bind(\"thm\", THM)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"g.serialize(\"lego_graph_rebrickable.ttl\", format=\"turtle\")"
|
"g.serialize(\"lego_graph.ttl\", format=\"turtle\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "venv (3.14.4)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
|
@ -30,7 +30,7 @@
|
||||||
a4paper,margin=25mm
|
a4paper,margin=25mm
|
||||||
}
|
}
|
||||||
|
|
||||||
\title{\huge{Knowledgegraphen - Lego}}
|
\title{\huge{Knowledge Graph - Lego}}
|
||||||
\date{\today}
|
\date{\today}
|
||||||
\author{
|
\author{
|
||||||
\begin{tabular}{ccc}
|
\begin{tabular}{ccc}
|
||||||
|
|
@ -127,7 +127,7 @@
|
||||||
\toprule
|
\toprule
|
||||||
& Brickset \\ \midrule
|
& Brickset \\ \midrule
|
||||||
URL & \url{https://brickset.com/}\\
|
URL & \url{https://brickset.com/}\\
|
||||||
Beschaffung & Webscraping/CSV-Download \\
|
Beschaffung & CSV-Download \\
|
||||||
Lizenz & nicht spezifiziert \\
|
Lizenz & nicht spezifiziert \\
|
||||||
Erhalt & 23.04.2026 \\ \bottomrule
|
Erhalt & 23.04.2026 \\ \bottomrule
|
||||||
\end{tabularx}
|
\end{tabularx}
|
||||||
|
|
@ -184,9 +184,11 @@
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
https://thm.de/set/{brand}/{id}
|
https://thm.de/set/{brand}/{id}
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
Um die Dateigrösse des Graph zu reduzieren wurde \texttt{thm}, statt \texttt{th-mannheim} verwendet.
|
||||||
|
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\includegraphics[width=\columnwidth]{bilder/example_part_number.png}
|
\centering
|
||||||
|
\includegraphics[width=0.8\columnwidth]{bilder/example_part_number.png}
|
||||||
\caption{Lego Stein mit Teile-Nummer (Design-ID) 41769 \cite{cunninghamSellLEGOBricklink2018}}
|
\caption{Lego Stein mit Teile-Nummer (Design-ID) 41769 \cite{cunninghamSellLEGOBricklink2018}}
|
||||||
\label{fig:lego_example_part_number}
|
\label{fig:lego_example_part_number}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
@ -199,14 +201,41 @@
|
||||||
|
|
||||||
\subsection{Pipeline}
|
\subsection{Pipeline}
|
||||||
|
|
||||||
|
Die Datensätze von \textit{Bricklink} und \textit{Merlins Steine} wurden durch Webscraping erhoben. Entstandene Fehler durch Ausnahmefälle mussten manuell bereinigt werden. Demnach ist dieser Teil nicht automatisierbar. Abbildung \ref{fig:pipeline} zeigt die Pipeline zur Erstellung des Knowledge Graph.
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\includegraphics[width=\columnwidth]{./bilder/kgr_pipeline1.drawio.png}
|
||||||
|
\caption{Pipeline Erstellung Knowledge Graph}
|
||||||
|
\label{fig:pipeline}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
\section{Evaluation}
|
\section{Evaluation}
|
||||||
|
|
||||||
\subsection{Ergebnis}
|
\subsection{Ergebnis}
|
||||||
|
|
||||||
Das Projekt kann unter der URL: \url{https://gitty.informatik.hs-mannheim.de/2211275/kgr} betrachtet werden.
|
Das Projekt kann unter der URL: \url{https://gitty.informatik.hs-mannheim.de/2211275/kgr} betrachtet werden.
|
||||||
|
Der resultierende Knowledge-Graph ist über 300 MB gross.
|
||||||
\subsection{Beispiel-Queries}
|
\subsection{Beispiel-Queries}
|
||||||
|
|
||||||
|
Erhalten der Gesamtheit aller Lego Star Wars Minifiguren:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
SELECT DISTINCT ?name
|
||||||
|
WHERE {
|
||||||
|
?set thmont:theme ?theme.
|
||||||
|
?theme rdf:type thmont:Theme.
|
||||||
|
?set rdf:type thmont:Set.
|
||||||
|
?theme rdfs:label "Star Wars"@en.
|
||||||
|
?inventory thmont:set ?set.
|
||||||
|
?inventory rdf:type thmont:Inventory.
|
||||||
|
?inventory thmont:contains ?minifig_inv.
|
||||||
|
?minifig_inv rdf:type thmont:MinifigInv.
|
||||||
|
?minifig thmont:belongs ?minifig_inv.
|
||||||
|
?minifig rdfs:label ?name.
|
||||||
|
}
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
\subsection{Abdeckung}
|
\subsection{Abdeckung}
|
||||||
|
|
||||||
\subsection{Konsistenz}
|
\subsection{Konsistenz}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
<mxfile host="app.diagrams.net">
|
||||||
|
<diagram name="Seite-1" id="_iVW848-2TJ0zfREs3N2">
|
||||||
|
<mxGraphModel dx="808" dy="425" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
|
||||||
|
<root>
|
||||||
|
<mxCell id="0" />
|
||||||
|
<mxCell id="1" parent="0" />
|
||||||
|
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-11" edge="1" parent="1" source="Hu6JBMkSkJOX-7uFVOw6-2" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="Hu6JBMkSkJOX-7uFVOw6-3">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-2" parent="1" style="rounded=1;whiteSpace=wrap;html=1;" value="Bricklink,&nbsp;<span style="background-color: transparent; color: light-dark(rgb(0, 0, 0), rgb(255, 255, 255));">Brickset</span><div>Rebrickable,&nbsp;<span style="background-color: transparent; color: light-dark(rgb(0, 0, 0), rgb(255, 255, 255));">Merlin</span></div>" vertex="1">
|
||||||
|
<mxGeometry height="50" width="110" x="50" y="150" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-12" edge="1" parent="1" source="Hu6JBMkSkJOX-7uFVOw6-3" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="Hu6JBMkSkJOX-7uFVOw6-8">
|
||||||
|
<mxGeometry relative="1" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-3" parent="1" style="rounded=1;whiteSpace=wrap;html=1;" value="Datensätze als .CSV" vertex="1">
|
||||||
|
<mxGeometry height="50" width="120" x="200" y="150" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-7" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;rounded=0;" value="Manuelle Abfrage &amp; Transformation" vertex="1">
|
||||||
|
<mxGeometry height="30" width="100" x="130" y="210" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-8" parent="1" style="rounded=1;whiteSpace=wrap;html=1;" value="lego_graph.ttl" vertex="1">
|
||||||
|
<mxGeometry height="50" width="120" x="360" y="150" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-10" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;rounded=0;" value="Jupyter Notebook" vertex="1">
|
||||||
|
<mxGeometry height="30" width="100" x="290" y="210" as="geometry" />
|
||||||
|
</mxCell>
|
||||||
|
</root>
|
||||||
|
</mxGraphModel>
|
||||||
|
</diagram>
|
||||||
|
</mxfile>
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
Loading…
Reference in New Issue