knowledge graph construction with rebrickable data
parent
ca8f1f55a6
commit
383493245b
|
|
@ -0,0 +1 @@
|
||||||
|
*.ttl
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,308 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "747b245f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Build the Lego Knwoledge Graph using the sources in `/data`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 40,
|
||||||
|
"id": "90209948",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from datetime import datetime"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "fe91fa67",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Setup the requirements for building a knowledge graph"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 41,
|
||||||
|
"id": "8e573135",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"g = Graph()\n",
|
||||||
|
"thm = Namespace(\"https://th-mannheim.de/\")\n",
|
||||||
|
"THM = Namespace(\"https://th-mannheim.de/ont/\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "d56199d5",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Rebrickable"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "d1e1abb0",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 42,
|
||||||
|
"id": "d8a1fe84",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"re_colors = pd.read_csv(\"data/rebrickable/colors.csv\")\n",
|
||||||
|
"re_elements = pd.read_csv(\"data/rebrickable/elements.csv\")\n",
|
||||||
|
"re_inventories = pd.read_csv(\"data/rebrickable/inventories.csv\")\n",
|
||||||
|
"re_inventory_minifigs = pd.read_csv(\"data/rebrickable/inventory_minifigs.csv\")\n",
|
||||||
|
"re_inventory_parts = pd.read_csv(\"data/rebrickable/inventory_parts.csv\")\n",
|
||||||
|
"re_inventory_sets = pd.read_csv(\"data/rebrickable/inventory_sets.csv\")\n",
|
||||||
|
"re_minifigs = pd.read_csv(\"data/rebrickable/minifigs.csv\")\n",
|
||||||
|
"re_part_categories = pd.read_csv(\"data/rebrickable/part_categories.csv\")\n",
|
||||||
|
"re_part_relationships = pd.read_csv(\"data/rebrickable/part_relationships.csv\")\n",
|
||||||
|
"re_parts = pd.read_csv(\"data/rebrickable/parts.csv\")\n",
|
||||||
|
"re_sets = pd.read_csv(\"data/rebrickable/sets.csv\")\n",
|
||||||
|
"re_themes = pd.read_csv(\"data/rebrickable/themes.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "f3677416",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Colors"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 43,
|
||||||
|
"id": "ae505704",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for color in re_colors.itertuples(index=False):\n",
|
||||||
|
" color_ref = thm[f\"colors/{color.id}\"]\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n",
|
||||||
|
" g.add((color_ref, THM.color, Literal(color.rgb)))\n",
|
||||||
|
" g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))\n",
|
||||||
|
" \n",
|
||||||
|
" if not pd.isna(color.y1):\n",
|
||||||
|
" # First appearance\n",
|
||||||
|
" g.add((color_ref, THM.first_year, Literal(datetime(year = int(color.y1), month=1, day=1))))\n",
|
||||||
|
" if not pd.isna(color.y2):\n",
|
||||||
|
" # Last appearance\n",
|
||||||
|
" g.add((color_ref, THM.last_year, Literal(datetime(year = int(color.y2), month=1, day=1))))\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e27b2bc4",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Part Categories"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 44,
|
||||||
|
"id": "fb9e17d6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for part_category in re_part_categories.itertuples(index=False):\n",
|
||||||
|
" part_category_ref = thm[f\"part_category/{part_category.id}\"]\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang=\"en\")))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ea32849b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Parts"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 45,
|
||||||
|
"id": "8fdb080e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for part in re_parts.itertuples(index=False):\n",
|
||||||
|
" part_ref = thm[f\"part/{part.part_num}\"]\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((part_ref, RDFS.label, Literal(part.name, lang=\"en\")))\n",
|
||||||
|
" g.add((part_ref, THM.part_category, thm[f\"part_category/{part.part_cat_id}\"]))\n",
|
||||||
|
" g.add((part_ref, THM.part_material, Literal(part.part_material)))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "fcaadd84",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Elements"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 46,
|
||||||
|
"id": "579b1d67",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for element in re_elements.itertuples(index=False):\n",
|
||||||
|
" part_ref = thm[f\"part/{element.part_num}\"]\n",
|
||||||
|
" color_ref = thm[f\"colors/{element.color_id}\"]\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((part_ref, THM.has_color, color_ref))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "44dae336",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Part Relationships"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 47,
|
||||||
|
"id": "00db079a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for part_relationship in re_part_relationships.itertuples(index=False):\n",
|
||||||
|
" part_ref_parent = thm[f\"part/{part_relationship.parent_part_num}\"]\n",
|
||||||
|
" part_ref_child = thm[f\"part/{part_relationship.child_part_num}\"]\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((part_ref_parent, THM.has_child, part_ref_child))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "19dc64b8",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Themes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1a529aae",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "SyntaxError",
|
||||||
|
"evalue": "f-string: unmatched ')' (1024367582.py, line 2)",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
" \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[48]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[31m \u001b[39m\u001b[31mtheme_ref = thm[f\"theme/{int(theme.id))}\"]\u001b[39m\n ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m f-string: unmatched ')'\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for theme in re_themes.itertuples(index=False):\n",
|
||||||
|
" theme_ref = thm[f\"theme/{int(theme.id)}\"]\n",
|
||||||
|
"\n",
|
||||||
|
" g.add((theme_ref, RDFS.label, Literal(theme.name)))\n",
|
||||||
|
"\n",
|
||||||
|
" if not pd.isna(theme.parent_id):\n",
|
||||||
|
" g.add((theme_ref, THM.parent_theme, thm[f\"theme/{int(theme.parent_id)}\"]))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "bfab0c73",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Serialize the graph in turtle format"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2abd6894",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"```\n",
|
||||||
|
" ___-------___\n",
|
||||||
|
" _-~~ ~~-_\n",
|
||||||
|
" _-~ /~-_\n",
|
||||||
|
" /^\\__/^\\ /~ \\ / \\\n",
|
||||||
|
" /| O|| O| / \\_______________/ \\\n",
|
||||||
|
"| |___||__| / / \\ \\\n",
|
||||||
|
"| \\ / / \\ \\\n",
|
||||||
|
"| (_______) /______/ \\_________ \\\n",
|
||||||
|
"| / / \\ / \\\n",
|
||||||
|
" \\ \\^\\\\ \\ / \\ /\n",
|
||||||
|
" \\ || \\______________/ _-_ //\\__//\n",
|
||||||
|
" \\ ||------_-~~-_ ------------- \\ --/~ ~\\ || __/\n",
|
||||||
|
" ~-----||====/~ |==================| |/~~~~~\n",
|
||||||
|
" (_(__/ ./ / \\_\\ \\.\n",
|
||||||
|
" (_(___/ \\_____)_)\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1a30bff8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<Graph identifier=Nf661b2e682c043188ddd822a6bca246c (<class 'rdflib.graph.Graph'>)>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 36,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"g.bind(\"thmont\", THM)\n",
|
||||||
|
"\n",
|
||||||
|
"g.serialize(\"lego_graph.ttl\", format=\"turtle\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "venv (3.14.4)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.14.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
|
|
@ -142,7 +142,7 @@
|
||||||
\subsection{Merlins Steine}
|
\subsection{Merlins Steine}
|
||||||
|
|
||||||
\textit{Merlins Steine} ist eine Website, die sich auf die Berichterstattung von Reviews zu Klemmbausteinen spezifiziert. Um auf entsprechende Sets zu referenzieren beinhaltet \textit{Merlins Steine} eine Datenbank an Klemmbausteinen, die über 30000 Sets unterschiedlicher Marken enthält. Merlins Steine finanziert sich über Affiliate Marketing. \cite{SetDatenbankSetDB}.\\
|
\textit{Merlins Steine} ist eine Website, die sich auf die Berichterstattung von Reviews zu Klemmbausteinen spezifiziert. Um auf entsprechende Sets zu referenzieren beinhaltet \textit{Merlins Steine} eine Datenbank an Klemmbausteinen, die über 30000 Sets unterschiedlicher Marken enthält. Merlins Steine finanziert sich über Affiliate Marketing. \cite{SetDatenbankSetDB}.\\
|
||||||
\textit{Merlins Steine} wurde ausgewählt, da Sets von anderen Marken im Vergleich zu vorherig genannten Quellen aufgelistet werden.
|
\textit{Merlins Steine} wurde ausgewählt, da Sets von anderen Marken im Vergleich zu vorherig genannten Quellen aufgelistet werden. Der Datensatz enthält Informationen zum \ac{UVP}, Teileanzahl, Release-Jahr, Thema Modelldimensionen und des Herstellers.
|
||||||
|
|
||||||
\begin{table}[H]
|
\begin{table}[H]
|
||||||
\begin{tabularx}{\columnwidth}{@{}ll@{}}
|
\begin{tabularx}{\columnwidth}{@{}ll@{}}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue