finished rebrickable graph, fixed inventory_sets.csv
parent
383493245b
commit
3b4bfae39b
File diff suppressed because it is too large
Load Diff
|
|
@ -5,12 +5,12 @@
|
|||
"id": "747b245f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Build the Lego Knwoledge Graph using the sources in `/data`."
|
||||
"Build the Lego Knowledge Graph using the sources in `/data`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 28,
|
||||
"id": "90209948",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -30,14 +30,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 29,
|
||||
"id": "8e573135",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"g = Graph()\n",
|
||||
"thm = Namespace(\"https://th-mannheim.de/\")\n",
|
||||
"THM = Namespace(\"https://th-mannheim.de/ont/\")"
|
||||
"thm = Namespace(\"https://thm.de/\")\n",
|
||||
"THM = Namespace(\"https://thm.de/ont/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -58,7 +58,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 30,
|
||||
"id": "d8a1fe84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -87,7 +87,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 31,
|
||||
"id": "ae505704",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -117,7 +117,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 32,
|
||||
"id": "fb9e17d6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -138,7 +138,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 33,
|
||||
"id": "8fdb080e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -161,7 +161,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 34,
|
||||
"id": "579b1d67",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -183,7 +183,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 35,
|
||||
"id": "00db079a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -205,29 +205,159 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 36,
|
||||
"id": "1a529aae",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "SyntaxError",
|
||||
"evalue": "f-string: unmatched ')' (1024367582.py, line 2)",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
" \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[48]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[31m \u001b[39m\u001b[31mtheme_ref = thm[f\"theme/{int(theme.id))}\"]\u001b[39m\n ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m f-string: unmatched ')'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for theme in re_themes.itertuples(index=False):\n",
|
||||
" theme_ref = thm[f\"theme/{int(theme.id)}\"]\n",
|
||||
"\n",
|
||||
" g.add((theme_ref, RDFS.label, Literal(theme.name)))\n",
|
||||
" g.add((theme_ref, RDFS.label, Literal(theme.name, lang=\"en\")))\n",
|
||||
"\n",
|
||||
" if not pd.isna(theme.parent_id):\n",
|
||||
" g.add((theme_ref, THM.parent_theme, thm[f\"theme/{int(theme.parent_id)}\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f72c2e9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Sets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "29b357ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for lego_set in re_sets.itertuples(index=False):\n",
|
||||
" set_ref = thm[f\"set/lego/{lego_set.set_num}\"]\n",
|
||||
"\n",
|
||||
" g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n",
|
||||
" g.add((set_ref, THM.year, Literal(datetime(int(lego_set.year), 1, 1))))\n",
|
||||
" g.add((set_ref, THM.theme, thm[f\"theme/{int(lego_set.theme_id)}\"]))\n",
|
||||
" g.add((set_ref, THM.num_parts, Literal(int(lego_set.num_parts), datatype=XSD.integer)))\n",
|
||||
" g.add((set_ref, THM.brand, Literal(\"Lego\")))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2616476",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Minifigures"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "a67b3e70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for minifig in re_minifigs.itertuples(index=False):\n",
|
||||
" minifig_ref = thm[f\"minifig/{minifig.fig_num}\"]\n",
|
||||
"\n",
|
||||
" g.add((set_ref, RDFS.label, Literal(minifig.name, lang=\"en\")))\n",
|
||||
" g.add((set_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e9baff1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now the ugly part: Inventories"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "0c97dc4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for inventory in re_inventories.itertuples(index=False):\n",
|
||||
" inventory_ref = thm[f\"inventory/{inventory.id}\"]\n",
|
||||
"\n",
|
||||
" g.add((inventory_ref, THM.set, thm[f\"sets/lego/{inventory.set_num}\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c962cf0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Inventories relate sets, minifigures and parts to each other, creating a kind of \"top level set\" \n",
|
||||
"(this takes a lot of time)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "dc2ba03e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for inventory_part in re_inventory_parts.itertuples(index=False):\n",
|
||||
" inventory_part_ref = thm[f\"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}\"]\n",
|
||||
" \n",
|
||||
" inventory_ref = thm[f\"inventory/{inventory_part.inventory_id}\"]\n",
|
||||
" part_ref = thm[f\"part/{inventory_part.part_num}\"]\n",
|
||||
"\n",
|
||||
" g.add((inventory_part_ref, RDFS.domain, inventory_ref))\n",
|
||||
" g.add((inventory_part_ref, RDFS.range, part_ref))\n",
|
||||
" g.add((inventory_part_ref, RDF.type, RDF.Property))\n",
|
||||
" \n",
|
||||
" g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))\n",
|
||||
" g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))\n",
|
||||
" g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"id": "8715a1cf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for inventory_set in re_inventory_sets.itertuples(index=False):\n",
|
||||
" inventory_set_ref = thm[f\"inventory_set/{inventory_set.inventory_id}/{inventory_set.set_num}\"]\n",
|
||||
"\n",
|
||||
" inventory_ref = thm[f\"inventory/{inventory_set.inventory_id}\"]\n",
|
||||
" set_ref = thm[f\"set/lego/{inventory_set.set_num}\"]\n",
|
||||
"\n",
|
||||
" g.add((inventory_set_ref, RDFS.domain, inventory_ref))\n",
|
||||
" g.add((inventory_set_ref, RDFS.range, set_ref))\n",
|
||||
" g.add((inventory_set_ref, RDF.type, RDF.Property))\n",
|
||||
"\n",
|
||||
" g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "08c2c580",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for inventory_minifig in re_inventory_minifigs.itertuples(index=False):\n",
|
||||
" inventory_minifig_ref = thm[f\"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}\"]\n",
|
||||
"\n",
|
||||
" inventory_ref = thm[f\"inventory/{inventory_minifig.inventory_id}\"]\n",
|
||||
" minifig_ref = thm[f\"minifig/lego/{inventory_minifig.fig_num}\"]\n",
|
||||
"\n",
|
||||
" g.add((inventory_minifig_ref, RDFS.domain, inventory_ref))\n",
|
||||
" g.add((inventory_minifig_ref, RDFS.range, minifig_ref))\n",
|
||||
" g.add((inventory_minifig_ref, RDF.type, RDF.Property))\n",
|
||||
"\n",
|
||||
" g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bfab0c73",
|
||||
|
|
@ -269,10 +399,10 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<Graph identifier=Nf661b2e682c043188ddd822a6bca246c (<class 'rdflib.graph.Graph'>)>"
|
||||
"<Graph identifier=Nd0322d7d995f458896746825ba0ca42f (<class 'rdflib.graph.Graph'>)>"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -280,7 +410,7 @@
|
|||
"source": [
|
||||
"g.bind(\"thmont\", THM)\n",
|
||||
"\n",
|
||||
"g.serialize(\"lego_graph.ttl\", format=\"turtle\")"
|
||||
"g.serialize(\"lego_graph_rebrickable.ttl\", format=\"turtle\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -109,6 +109,13 @@
|
|||
\end{tabularx}
|
||||
\end{table}
|
||||
|
||||
Der Aufbau des Datensatzes entspricht folgendem Schema:
|
||||
|
||||
\begin{figure}[H]
|
||||
\includegraphics[width=\columnwidth]{bilder/downloads_schema_v3.png}
|
||||
\caption{Datenbankschema \textit{Rebrickable} \cite{FreeLEGOCatalog}}
|
||||
\end{figure}
|
||||
|
||||
\subsection{Brickset}
|
||||
|
||||
\textit{Brickset} ist primär eine Datenbank von Lego-Sets. Dazu dient die Seite als News-Portal, Tracking-Möglichkeit und Review-Seite über Lego-Sets. \textit{Brickset} finanziert sich über Affiliate Marketing \cite{BricksetHomePage2026}. Die Seite wurde ausgewählt, um den von \textit{Rebrickable} erhaltenen Datensatz über Sets anzureichern, um bspw. Verpackungsdimensionen, Modelldimensionen, \ac{UVP} und die \ac{EAN}.
|
||||
|
|
|
|||
Loading…
Reference in New Issue