309 lines
8.5 KiB
Plaintext
309 lines
8.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "747b245f",
|
|
"metadata": {},
|
|
"source": [
|
|
"Build the Lego Knwoledge Graph using the sources in `/data`."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"id": "90209948",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal\n",
|
|
"import pandas as pd\n",
|
|
"from datetime import datetime"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "fe91fa67",
|
|
"metadata": {},
|
|
"source": [
|
|
"Setup the requirements for building a knowledge graph"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 41,
|
|
"id": "8e573135",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"g = Graph()\n",
|
|
"thm = Namespace(\"https://th-mannheim.de/\")\n",
|
|
"THM = Namespace(\"https://th-mannheim.de/ont/\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d56199d5",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Rebrickable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d1e1abb0",
|
|
"metadata": {},
|
|
"source": [
|
|
""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"id": "d8a1fe84",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"re_colors = pd.read_csv(\"data/rebrickable/colors.csv\")\n",
|
|
"re_elements = pd.read_csv(\"data/rebrickable/elements.csv\")\n",
|
|
"re_inventories = pd.read_csv(\"data/rebrickable/inventories.csv\")\n",
|
|
"re_inventory_minifigs = pd.read_csv(\"data/rebrickable/inventory_minifigs.csv\")\n",
|
|
"re_inventory_parts = pd.read_csv(\"data/rebrickable/inventory_parts.csv\")\n",
|
|
"re_inventory_sets = pd.read_csv(\"data/rebrickable/inventory_sets.csv\")\n",
|
|
"re_minifigs = pd.read_csv(\"data/rebrickable/minifigs.csv\")\n",
|
|
"re_part_categories = pd.read_csv(\"data/rebrickable/part_categories.csv\")\n",
|
|
"re_part_relationships = pd.read_csv(\"data/rebrickable/part_relationships.csv\")\n",
|
|
"re_parts = pd.read_csv(\"data/rebrickable/parts.csv\")\n",
|
|
"re_sets = pd.read_csv(\"data/rebrickable/sets.csv\")\n",
|
|
"re_themes = pd.read_csv(\"data/rebrickable/themes.csv\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f3677416",
|
|
"metadata": {},
|
|
"source": [
|
|
"Colors"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"id": "ae505704",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for color in re_colors.itertuples(index=False):\n",
|
|
" color_ref = thm[f\"colors/{color.id}\"]\n",
|
|
"\n",
|
|
" g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n",
|
|
" g.add((color_ref, THM.color, Literal(color.rgb)))\n",
|
|
" g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))\n",
|
|
" \n",
|
|
" if not pd.isna(color.y1):\n",
|
|
" # First appearance\n",
|
|
" g.add((color_ref, THM.first_year, Literal(datetime(year = int(color.y1), month=1, day=1))))\n",
|
|
" if not pd.isna(color.y2):\n",
|
|
" # Last appearance\n",
|
|
" g.add((color_ref, THM.last_year, Literal(datetime(year = int(color.y2), month=1, day=1))))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e27b2bc4",
|
|
"metadata": {},
|
|
"source": [
|
|
"Part Categories"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"id": "fb9e17d6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for part_category in re_part_categories.itertuples(index=False):\n",
|
|
" part_category_ref = thm[f\"part_category/{part_category.id}\"]\n",
|
|
"\n",
|
|
" g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang=\"en\")))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ea32849b",
|
|
"metadata": {},
|
|
"source": [
|
|
"Parts"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 45,
|
|
"id": "8fdb080e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for part in re_parts.itertuples(index=False):\n",
|
|
" part_ref = thm[f\"part/{part.part_num}\"]\n",
|
|
"\n",
|
|
" g.add((part_ref, RDFS.label, Literal(part.name, lang=\"en\")))\n",
|
|
" g.add((part_ref, THM.part_category, thm[f\"part_category/{part.part_cat_id}\"]))\n",
|
|
" g.add((part_ref, THM.part_material, Literal(part.part_material)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "fcaadd84",
|
|
"metadata": {},
|
|
"source": [
|
|
"Elements"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 46,
|
|
"id": "579b1d67",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for element in re_elements.itertuples(index=False):\n",
|
|
" part_ref = thm[f\"part/{element.part_num}\"]\n",
|
|
" color_ref = thm[f\"colors/{element.color_id}\"]\n",
|
|
"\n",
|
|
" g.add((part_ref, THM.has_color, color_ref))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "44dae336",
|
|
"metadata": {},
|
|
"source": [
|
|
"Part Relationships"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 47,
|
|
"id": "00db079a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for part_relationship in re_part_relationships.itertuples(index=False):\n",
|
|
" part_ref_parent = thm[f\"part/{part_relationship.parent_part_num}\"]\n",
|
|
" part_ref_child = thm[f\"part/{part_relationship.child_part_num}\"]\n",
|
|
"\n",
|
|
" g.add((part_ref_parent, THM.has_child, part_ref_child))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "19dc64b8",
|
|
"metadata": {},
|
|
"source": [
|
|
"Themes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1a529aae",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "SyntaxError",
|
|
"evalue": "f-string: unmatched ')' (1024367582.py, line 2)",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
" \u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[48]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[31m \u001b[39m\u001b[31mtheme_ref = thm[f\"theme/{int(theme.id))}\"]\u001b[39m\n ^\n\u001b[31mSyntaxError\u001b[39m\u001b[31m:\u001b[39m f-string: unmatched ')'\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for theme in re_themes.itertuples(index=False):\n",
|
|
" theme_ref = thm[f\"theme/{int(theme.id)}\"]\n",
|
|
"\n",
|
|
" g.add((theme_ref, RDFS.label, Literal(theme.name)))\n",
|
|
"\n",
|
|
" if not pd.isna(theme.parent_id):\n",
|
|
" g.add((theme_ref, THM.parent_theme, thm[f\"theme/{int(theme.parent_id)}\"]))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bfab0c73",
|
|
"metadata": {},
|
|
"source": [
|
|
"Serialize the graph in turtle format"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "2abd6894",
|
|
"metadata": {},
|
|
"source": [
|
|
"```\n",
|
|
" ___-------___\n",
|
|
" _-~~ ~~-_\n",
|
|
" _-~ /~-_\n",
|
|
" /^\\__/^\\ /~ \\ / \\\n",
|
|
" /| O|| O| / \\_______________/ \\\n",
|
|
"| |___||__| / / \\ \\\n",
|
|
"| \\ / / \\ \\\n",
|
|
"| (_______) /______/ \\_________ \\\n",
|
|
"| / / \\ / \\\n",
|
|
" \\ \\^\\\\ \\ / \\ /\n",
|
|
" \\ || \\______________/ _-_ //\\__//\n",
|
|
" \\ ||------_-~~-_ ------------- \\ --/~ ~\\ || __/\n",
|
|
" ~-----||====/~ |==================| |/~~~~~\n",
|
|
" (_(__/ ./ / \\_\\ \\.\n",
|
|
" (_(___/ \\_____)_)\n",
|
|
"```"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1a30bff8",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<Graph identifier=Nf661b2e682c043188ddd822a6bca246c (<class 'rdflib.graph.Graph'>)>"
|
|
]
|
|
},
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"g.bind(\"thmont\", THM)\n",
|
|
"\n",
|
|
"g.serialize(\"lego_graph.ttl\", format=\"turtle\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "venv (3.14.4)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.14.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|