Compare commits

...

5 Commits
falko ... main

Author SHA1 Message Date
Roman Schöne 140cf2e78b paper diagrams 2026-05-03 11:50:56 +02:00
Roman Schöne 8f9314cddb coverage 2026-05-02 21:28:41 +02:00
Roman Schöne 4cee889722 adjusted merlin 2026-05-02 12:38:16 +02:00
Roman Schöne 8720ab9c57 clean up merlin data 2026-05-02 10:20:44 +02:00
Roman Schöne 605dc4f96b added pipeline and corrected typing 2026-05-01 21:32:40 +02:00
22 changed files with 18695 additions and 4925 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,212 +0,0 @@
# %% [markdown]
# Build the Lego Knowledge Graph using the sources in `/data`.
# %%
from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal
import pandas as pd
from datetime import datetime
# %% [markdown]
# Setup the requirements for building a knowledge graph
# %%
g = Graph()
thm = Namespace("https://thm.de/")
THM = Namespace("https://thm.de/ont/")
# %% [markdown]
# # Rebrickable
# %% [markdown]
# ![Rebrickable](\data\rebrickable\downloads_schema_v3.png)
# %%
re_colors = pd.read_csv("data/rebrickable/colors.csv")
re_elements = pd.read_csv("data/rebrickable/elements.csv")
re_inventories = pd.read_csv("data/rebrickable/inventories.csv")
re_inventory_minifigs = pd.read_csv("data/rebrickable/inventory_minifigs.csv")
re_inventory_parts = pd.read_csv("data/rebrickable/inventory_parts.csv")
re_inventory_sets = pd.read_csv("data/rebrickable/inventory_sets.csv")
re_minifigs = pd.read_csv("data/rebrickable/minifigs.csv")
re_part_categories = pd.read_csv("data/rebrickable/part_categories.csv")
re_part_relationships = pd.read_csv("data/rebrickable/part_relationships.csv")
re_parts = pd.read_csv("data/rebrickable/parts.csv")
re_sets = pd.read_csv("data/rebrickable/sets.csv")
re_themes = pd.read_csv("data/rebrickable/themes.csv")
# %% [markdown]
# Colors
# %%
for color in re_colors.itertuples(index=False):
color_ref = thm[f"colors/{color.id}"]
g.add((color_ref, RDFS.label, Literal(color.name, lang="en")))
g.add((color_ref, THM.color, Literal(color.rgb)))
g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))
if not pd.isna(color.y1):
# First appearance
g.add((color_ref, THM.first_year, Literal(datetime(year = int(color.y1), month=1, day=1))))
if not pd.isna(color.y2):
# Last appearance
g.add((color_ref, THM.last_year, Literal(datetime(year = int(color.y2), month=1, day=1))))
# %% [markdown]
# Part Categories
# %%
for part_category in re_part_categories.itertuples(index=False):
part_category_ref = thm[f"part_category/{part_category.id}"]
g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang="en")))
# %% [markdown]
# Parts
# %%
for part in re_parts.itertuples(index=False):
part_ref = thm[f"part/{part.part_num}"]
g.add((part_ref, RDFS.label, Literal(part.name, lang="en")))
g.add((part_ref, THM.part_category, thm[f"part_category/{part.part_cat_id}"]))
g.add((part_ref, THM.part_material, Literal(part.part_material)))
# %% [markdown]
# Elements
# %%
for element in re_elements.itertuples(index=False):
part_ref = thm[f"part/{element.part_num}"]
color_ref = thm[f"colors/{element.color_id}"]
g.add((part_ref, THM.has_color, color_ref))
# %% [markdown]
# Part Relationships
# %%
for part_relationship in re_part_relationships.itertuples(index=False):
part_ref_parent = thm[f"part/{part_relationship.parent_part_num}"]
part_ref_child = thm[f"part/{part_relationship.child_part_num}"]
g.add((part_ref_parent, THM.has_child, part_ref_child))
# %% [markdown]
# Themes
# %%
for theme in re_themes.itertuples(index=False):
theme_ref = thm[f"theme/{int(theme.id)}"]
g.add((theme_ref, RDFS.label, Literal(theme.name, lang="en")))
if not pd.isna(theme.parent_id):
g.add((theme_ref, THM.parent_theme, thm[f"theme/{int(theme.parent_id)}"]))
# %% [markdown]
# Sets
# %%
for lego_set in re_sets.itertuples(index=False):
set_ref = thm[f"set/lego/{lego_set.set_num}"]
g.add((set_ref, RDFS.label, Literal(lego_set.name, lang="en")))
g.add((set_ref, THM.year, Literal(datetime(int(lego_set.year), 1, 1))))
g.add((set_ref, THM.theme, thm[f"theme/{int(lego_set.theme_id)}"]))
g.add((set_ref, THM.num_parts, Literal(int(lego_set.num_parts), datatype=XSD.integer)))
g.add((set_ref, THM.brand, Literal("Lego")))
# %% [markdown]
# Minifigures
# %%
for minifig in re_minifigs.itertuples(index=False):
minifig_ref = thm[f"minifig/{minifig.fig_num}"]
g.add((set_ref, RDFS.label, Literal(minifig.name, lang="en")))
g.add((set_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))
# %% [markdown]
# Now the ugly part: Inventories
# %%
for inventory in re_inventories.itertuples(index=False):
inventory_ref = thm[f"inventory/{inventory.id}"]
g.add((inventory_ref, THM.set, thm[f"sets/lego/{inventory.set_num}"]))
# %% [markdown]
# Inventories relate sets, minifigures and parts to each other, creating a kind of "top level set"
# (this takes a lot of time)
# %%
for inventory_part in re_inventory_parts.itertuples(index=False):
inventory_part_ref = thm[f"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}"]
inventory_ref = thm[f"inventory/{inventory_part.inventory_id}"]
part_ref = thm[f"part/{inventory_part.part_num}"]
g.add((inventory_part_ref, RDFS.domain, inventory_ref))
g.add((inventory_part_ref, RDFS.range, part_ref))
g.add((inventory_part_ref, RDF.type, RDF.Property))
g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))
g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))
g.add((inventory_part_ref, THM.color, thm[f"color/{inventory_part.color_id}"]))
# %%
for inventory_set in re_inventory_sets.itertuples(index=False):
inventory_set_ref = thm[f"inventory_set/{inventory_set.inventory_id}/{inventory_set.set_num}"]
inventory_ref = thm[f"inventory/{inventory_set.inventory_id}"]
set_ref = thm[f"set/lego/{inventory_set.set_num}"]
g.add((inventory_set_ref, RDFS.domain, inventory_ref))
g.add((inventory_set_ref, RDFS.range, set_ref))
g.add((inventory_set_ref, RDF.type, RDF.Property))
g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer)))
# %%
for inventory_minifig in re_inventory_minifigs.itertuples(index=False):
inventory_minifig_ref = thm[f"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}"]
inventory_ref = thm[f"inventory/{inventory_minifig.inventory_id}"]
minifig_ref = thm[f"minifig/lego/{inventory_minifig.fig_num}"]
g.add((inventory_minifig_ref, RDFS.domain, inventory_ref))
g.add((inventory_minifig_ref, RDFS.range, minifig_ref))
g.add((inventory_minifig_ref, RDF.type, RDF.Property))
g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer)))
# %% [markdown]
# Serialize the graph in turtle format
# %% [markdown]
# ```
# ___-------___
# _-~~ ~~-_
# _-~ /~-_
# /^\__/^\ /~ \ / \
# /| O|| O| / \_______________/ \
# | |___||__| / / \ \
# | \ / / \ \
# | (_______) /______/ \_________ \
# | / / \ / \
# \ \^\\ \ / \ /
# \ || \______________/ _-_ //\__//
# \ ||------_-~~-_ ------------- \ --/~ ~\ || __/
# ~-----||====/~ |==================| |/~~~~~
# (_(__/ ./ / \_\ \.
# (_(___/ \_____)_)
# ```
# %%
g.bind("thmont", THM)
g.serialize("lego_graph_rebrickable.ttl", format="turtle")

View File

@ -10,15 +10,14 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 257,
"id": "90209948",
"metadata": {},
"outputs": [],
"source": [
"from rdflib import Graph, Namespace, XSD, OWL, RDF, RDFS, SKOS, URIRef, Literal\n",
"import pandas as pd\n",
"from datetime import datetime\n",
"import os"
"import numpy as np"
]
},
{
@ -31,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 258,
"id": "8e573135",
"metadata": {},
"outputs": [],
@ -59,7 +58,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 259,
"id": "d8a1fe84",
"metadata": {},
"outputs": [],
@ -88,7 +87,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 260,
"id": "ae505704",
"metadata": {},
"outputs": [],
@ -96,16 +95,17 @@
"for color in re_colors.itertuples(index=False):\n",
" color_ref = thm[f\"color/{color.id}\"]\n",
"\n",
" g.add((color_ref, RDF.type, THM.Color ))\n",
" g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n",
" g.add((color_ref, THM.color, Literal(color.rgb)))\n",
" g.add((color_ref, THM.rgbcolor, Literal(color.rgb)))\n",
" g.add((color_ref, THM.is_transparent, Literal(color.is_trans, datatype=XSD.boolean)))\n",
" \n",
" if not pd.isna(color.y1):\n",
" # First appearance\n",
" g.add((color_ref, THM.first_year, Literal(datetime(year = int(color.y1), month=1, day=1))))\n",
" g.add((color_ref, THM.first_year, Literal(int(color.y1), datatype=XSD.integer)))\n",
" if not pd.isna(color.y2):\n",
" # Last appearance\n",
" g.add((color_ref, THM.last_year, Literal(datetime(year = int(color.y2), month=1, day=1))))\n"
" g.add((color_ref, THM.last_year, Literal(int(color.y2), datatype=XSD.integer)))\n"
]
},
{
@ -118,7 +118,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 261,
"id": "fb9e17d6",
"metadata": {},
"outputs": [],
@ -126,6 +126,7 @@
"for part_category in re_part_categories.itertuples(index=False):\n",
" part_category_ref = thm[f\"part_category/{part_category.id}\"]\n",
"\n",
" g.add((part_category_ref, RDF.type, THM.PartCategory ))\n",
" g.add((part_category_ref, RDFS.label, Literal(part_category_ref, lang=\"en\")))"
]
},
@ -139,7 +140,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 262,
"id": "8fdb080e",
"metadata": {},
"outputs": [],
@ -147,6 +148,7 @@
"for part in re_parts.itertuples(index=False):\n",
" part_ref = thm[f\"part/{part.part_num}\"]\n",
"\n",
" g.add((part_ref, RDF.type, THM.Part))\n",
" g.add((part_ref, RDFS.label, Literal(part.name, lang=\"en\")))\n",
" g.add((part_ref, THM.part_category, thm[f\"part_category/{part.part_cat_id}\"]))\n",
" g.add((part_ref, THM.part_material, Literal(part.part_material)))"
@ -162,16 +164,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 263,
"id": "579b1d67",
"metadata": {},
"outputs": [],
"source": [
"for element in re_elements.itertuples(index=False):\n",
" element_ref = thm[f\"element/{element.element_id}\"]\n",
" part_ref = thm[f\"part/{element.part_num}\"]\n",
" color_ref = thm[f\"color/{element.color_id}\"]\n",
"\n",
" g.add((part_ref, THM.has_color, color_ref))"
" g.add((element_ref, RDF.type, RDF.Property))\n",
" g.add((element_ref, RDF.type, THM.Element))\n",
"\n",
" g.add((element_ref, RDFS.domain, THM.Part))\n",
" g.add((element_ref, RDFS.range, THM.Color))\n",
"\n",
" g.add((element_ref, THM.color, color_ref))\n",
" g.add((part_ref, THM.part, part_ref))"
]
},
{
@ -184,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 264,
"id": "00db079a",
"metadata": {},
"outputs": [],
@ -193,7 +203,8 @@
" part_ref_parent = thm[f\"part/{part_relationship.parent_part_num}\"]\n",
" part_ref_child = thm[f\"part/{part_relationship.child_part_num}\"]\n",
"\n",
" g.add((part_ref_parent, THM.has_child, part_ref_child))"
" g.add((part_ref_parent, THM.has_child, part_ref_child))\n",
" g.add((part_ref_child, THM.has_parent, part_ref_parent))"
]
},
{
@ -206,7 +217,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 265,
"id": "1a529aae",
"metadata": {},
"outputs": [],
@ -214,6 +225,7 @@
"for theme in re_themes.itertuples(index=False):\n",
" theme_ref = thm[f\"theme/{int(theme.id)}\"]\n",
"\n",
" g.add((theme_ref, RDF.type, THM.Theme))\n",
" g.add((theme_ref, RDFS.label, Literal(theme.name, lang=\"en\")))\n",
"\n",
" if not pd.isna(theme.parent_id):\n",
@ -230,7 +242,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 266,
"id": "29b357ef",
"metadata": {},
"outputs": [],
@ -238,8 +250,9 @@
"for lego_set in re_sets.itertuples(index=False):\n",
" set_ref = thm[f\"set/lego/{lego_set.set_num}\"]\n",
"\n",
" g.add((set_ref, RDF.type, THM.Set))\n",
" g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n",
" g.add((set_ref, THM.year, Literal(datetime(int(lego_set.year), 1, 1))))\n",
" g.add((set_ref, THM.year, Literal(int(lego_set.year), datatype=XSD.integer)))\n",
" g.add((set_ref, THM.theme, thm[f\"theme/{int(lego_set.theme_id)}\"]))\n",
" g.add((set_ref, THM.num_parts, Literal(int(lego_set.num_parts), datatype=XSD.integer)))\n",
" g.add((set_ref, THM.brand, Literal(\"Lego\")))"
@ -255,7 +268,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 267,
"id": "a67b3e70",
"metadata": {},
"outputs": [],
@ -263,8 +276,9 @@
"for minifig in re_minifigs.itertuples(index=False):\n",
" minifig_ref = thm[f\"minifig/{minifig.fig_num}\"]\n",
"\n",
" g.add((set_ref, RDFS.label, Literal(minifig.name, lang=\"en\")))\n",
" g.add((set_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))"
" g.add((minifig_ref, RDF.type, THM.Minifig))\n",
" g.add((minifig_ref, RDFS.label, Literal(minifig.name, lang=\"en\")))\n",
" g.add((minifig_ref, THM.num_parts, Literal(int(minifig.num_parts), datatype=XSD.integer)))"
]
},
{
@ -277,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 268,
"id": "0c97dc4d",
"metadata": {},
"outputs": [],
@ -285,6 +299,7 @@
"for inventory in re_inventories.itertuples(index=False):\n",
" inventory_ref = thm[f\"inventory/{inventory.id}\"]\n",
"\n",
" g.add((inventory_ref, RDF.type, THM.Inventory))\n",
" g.add((inventory_ref, THM.set, thm[f\"set/lego/{inventory.set_num}\"]))"
]
},
@ -299,29 +314,47 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 269,
"id": "dc2ba03e",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'\\nfor inventory_part in re_inventory_parts.itertuples(index=False):\\n inventory_part_ref = thm[f\"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}\"]\\n\\n inventory_ref = thm[f\"inventory/{inventory_part.inventory_id}\"]\\n part_ref = thm[f\"part/{inventory_part.part_num}\"]\\n\\n g.add((inventory_part_ref, RDF.type, THM.PartInv))\\n g.add((inventory_part_ref, RDF.type, RDF.Property))\\n\\n g.add((inventory_part_ref, RDFS.domain, THM.Inventory))\\n g.add((inventory_part_ref, RDFS.range, THM.Part))\\n\\n g.add((inventory_ref, THM.contains, inventory_part_ref))\\n g.add((part_ref, THM.belongs, inventory_part_ref))\\n\\n g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))\\n g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))\\n g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))\\n'"
]
},
"execution_count": 269,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"for inventory_part in re_inventory_parts.itertuples(index=False):\n",
" inventory_part_ref = thm[f\"inventory_part/{inventory_part.inventory_id}/{inventory_part.part_num}\"]\n",
" \n",
" inventory_ref = thm[f\"inventory/{inventory_part.inventory_id}\"]\n",
" part_ref = thm[f\"part/{inventory_part.part_num}\"]\n",
"\n",
" g.add((inventory_part_ref, RDFS.domain, inventory_ref))\n",
" g.add((inventory_part_ref, RDFS.range, part_ref))\n",
" g.add((inventory_part_ref, RDF.type, THM.PartInv))\n",
" g.add((inventory_part_ref, RDF.type, RDF.Property))\n",
"\n",
" g.add((inventory_part_ref, RDFS.domain, THM.Inventory))\n",
" g.add((inventory_part_ref, RDFS.range, THM.Part))\n",
" \n",
" g.add((inventory_ref, THM.contains, inventory_part_ref))\n",
" g.add((part_ref, THM.belongs, inventory_part_ref))\n",
"\n",
" g.add((inventory_part_ref, THM.quantity, Literal(int(inventory_part.quantity), datatype=XSD.integer)))\n",
" g.add((inventory_part_ref, THM.is_spare, Literal(inventory_part.is_spare, datatype=XSD.boolean)))\n",
" g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))"
" g.add((inventory_part_ref, THM.color, thm[f\"color/{inventory_part.color_id}\"]))\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 270,
"id": "8715a1cf",
"metadata": {},
"outputs": [],
@ -332,16 +365,21 @@
" inventory_ref = thm[f\"inventory/{inventory_set.inventory_id}\"]\n",
" set_ref = thm[f\"set/lego/{inventory_set.set_num}\"]\n",
"\n",
" g.add((inventory_set_ref, RDFS.domain, inventory_ref))\n",
" g.add((inventory_set_ref, RDFS.range, set_ref))\n",
" g.add((inventory_set_ref, RDF.type, THM.SetInv))\n",
" g.add((inventory_set_ref, RDF.type, RDF.Property))\n",
"\n",
" g.add((inventory_set_ref, RDFS.domain, THM.Inventory))\n",
" g.add((inventory_set_ref, RDFS.range, THM.Set))\n",
"\n",
" g.add((inventory_ref, THM.contains, inventory_set_ref))\n",
" g.add((set_ref, THM.belongs, inventory_set_ref))\n",
" \n",
" g.add((inventory_set_ref, THM.quantity, Literal(int(inventory_set.quantity), datatype=XSD.integer)))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 271,
"id": "08c2c580",
"metadata": {},
"outputs": [],
@ -350,12 +388,17 @@
" inventory_minifig_ref = thm[f\"inventory_minifig/{inventory_minifig.inventory_id}/{inventory_minifig.fig_num}\"]\n",
"\n",
" inventory_ref = thm[f\"inventory/{inventory_minifig.inventory_id}\"]\n",
" minifig_ref = thm[f\"minifig/lego/{inventory_minifig.fig_num}\"]\n",
" minifig_ref = thm[f\"minifig/{inventory_minifig.fig_num}\"]\n",
"\n",
" g.add((inventory_minifig_ref, RDFS.domain, inventory_ref))\n",
" g.add((inventory_minifig_ref, RDFS.range, minifig_ref))\n",
" g.add((inventory_minifig_ref, RDF.type, THM.MinifigInv))\n",
" g.add((inventory_minifig_ref, RDF.type, RDF.Property))\n",
"\n",
" g.add((inventory_minifig_ref, RDFS.domain, THM.Inventory))\n",
" g.add((inventory_minifig_ref, RDFS.range, THM.Minifig))\n",
"\n",
" g.add((inventory_ref, THM.contains, inventory_minifig_ref))\n",
" g.add((minifig_ref, THM.belongs, inventory_minifig_ref))\n",
" \n",
" g.add((inventory_minifig_ref, THM.quantity, Literal(int(inventory_minifig.quantity), datatype=XSD.integer)))"
]
},
@ -377,17 +420,26 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 272,
"id": "1e0ac437",
"metadata": {},
"outputs": [],
"source": [
"bs_sets = pd.read_csv(\"./data/brickset/sets.csv\")"
"bs_sets = pd.read_csv(\"./data/brickset/sets.csv\")\n",
"bs_parts = pd.read_csv(\"./data/brickset/parts.csv\")"
]
},
{
"cell_type": "markdown",
"id": "d120c079",
"metadata": {},
"source": [
"Add Set prices"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 273,
"id": "fd944ccb",
"metadata": {},
"outputs": [],
@ -397,14 +449,40 @@
" set_ref = thm[f\"set/lego/{num}\"]\n",
"\n",
" if (set_ref, None, None) in g:\n",
" if not pd.isna(bs_set.USRetailPrice):\n",
" g.add((set_ref, THM.us_retail_price, Literal(bs_set.USRetailPrice, datatype=XSD.float)))\n",
" if not pd.isna(bs_set.DERetailPrice):\n",
" g.add((set_ref, THM.de_retail_price, Literal(bs_set.DERetailPrice, datatype=XSD.float)))\n",
" if not pd.isna(bs_set.UKRetailPrice):\n",
" g.add((set_ref, THM.us_retail_price, Literal(bs_set.UKRetailPrice, datatype=XSD.float)))\n",
" if not pd.isna(bs_set.CARetailPrice):\n",
" g.add((set_ref, THM.ca_retail_price, Literal(bs_set.CARetailPrice, datatype=XSD.float)))"
" #brickset prices already in euro\n",
" #choose the cheapest price since the usual customer wont choose the highest price\n",
" options = [bs_set.USRetailPrice, bs_set.DERetailPrice, bs_set.UKRetailPrice, bs_set.CARetailPrice]\n",
" options = [int(opt) for opt in options if not pd.isna(opt)]\n",
"\n",
" if len(options) >= 1:\n",
" cheapest = min(options)\n",
" g.add((set_ref, THM.price_new, Literal(cheapest, datatype=XSD.float)))"
]
},
{
"cell_type": "markdown",
"id": "cbd69fa6",
"metadata": {},
"source": [
"Only concrete elements (parts considering their shape, color and print) can have prices"
]
},
{
"cell_type": "code",
"execution_count": 274,
"id": "307a3210",
"metadata": {},
"outputs": [],
"source": [
"\n",
"for bs_element in bs_parts.itertuples(index=False):\n",
" element_ref = thm[f\"element/{bs_element.ElementID}\"]\n",
" \n",
" if (element_ref, None, None) in g:\n",
" if not pd.isna(bs_element.BrickLinkSoldPriceNew):\n",
" g.add((element_ref, THM.price_new, Literal(bs_element.BrickLinkSoldPriceNew, datatype=XSD.float)))\n",
" if not pd.isna(bs_element.BrickLinkSoldPriceUsed):\n",
" g.add((element_ref, THM.price_used, Literal(bs_element.BrickLinkSoldPriceUsed, datatype=XSD.float)))"
]
},
{
@ -419,7 +497,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 275,
"id": "a8beb593",
"metadata": {},
"outputs": [],
@ -431,17 +509,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b14e6e89",
"metadata": {},
"outputs": [],
"source": [
"additional_entries = 0"
]
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 276,
"id": "bbf5462b",
"metadata": {},
"outputs": [],
@ -450,10 +518,9 @@
" set_ref = thm[f\"set/lego/{bl_set.set_id}\"]\n",
"\n",
" if not (set_ref, None, None) in g:\n",
" additional_entries += 1\n",
" g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n",
" if not pd.isna(bl_set.year) and str(bl_set.year).isnumeric():\n",
" g.add((set_ref, THM.first_year, Literal(datetime(int(bl_set.year), 1, 1))))\n",
" g.add((set_ref, THM.year, Literal(int(bl_set.year))))\n",
" if not pd.isna(bl_set.parts) and str(bl_set.parts).isnumeric():\n",
" g.add((set_ref, THM.num_parts, Literal(int(bl_set.parts), datatype=XSD.integer)))\n",
" g.add((set_ref, THM.brand, Literal(\"Lego\")))"
@ -461,50 +528,139 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 277,
"id": "ef52582e",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'\\nfor bl_part in bl_parts.itertuples(index=False):\\n part_ref = thm[f\"part/{bl_part.part_id}\"]\\n\\n if not (part_ref, None, None) in g:\\n additional_entries += 1\\n g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))\\n'"
]
},
"execution_count": 277,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"for bl_part in bl_parts.itertuples(index=False):\n",
" part_ref = thm[f\"part/{bl_part.part_id}\"]\n",
"\n",
" if not (part_ref, None, None) in g:\n",
" additional_entries += 1\n",
" g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))"
" g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 278,
"id": "8bf0ffeb",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'\\nfor bl_minifig in bl_minifigs.itertuples(index=False):\\n minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\\n\\n if not (minifig_ref, None, None) in g:\\n g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))\\n'"
]
},
"execution_count": 278,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"for bl_minifig in bl_minifigs.itertuples(index=False):\n",
" minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\n",
"\n",
" if not (minifig_ref, None, None) in g:\n",
" additional_entries += 1\n",
" g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))"
" g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))\n",
"\"\"\""
]
},
{
"cell_type": "markdown",
"id": "e73471b9",
"metadata": {},
"source": [
"# Merlins Steine"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "3491b032",
"execution_count": 279,
"id": "ab1ec488",
"metadata": {},
"outputs": [],
"source": [
"me_sets = pd.read_csv(\"./data/merlin/others.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 293,
"id": "9bcd2956",
"metadata": {},
"outputs": [],
"source": [
"t = me_sets[me_sets[\"brand\"] == \"Pantasy\"]"
]
},
{
"cell_type": "code",
"execution_count": 294,
"id": "9ab21460",
"metadata": {},
"outputs": [],
"source": [
"t[\"ratio\"] = t[\"price_eur\"] / t[\"num_parts\"]"
]
},
{
"cell_type": "code",
"execution_count": 295,
"id": "459c3a4c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Added 107748 items\n"
"data": {
"text/plain": [
"np.float64(0.43016261640379705)"
]
},
"execution_count": 295,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(f\"Added {additional_entries} items\")"
"t[\"ratio\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 282,
"id": "bfcf2840",
"metadata": {},
"outputs": [],
"source": [
"for me_set in me_sets.itertuples(index=False):\n",
" if not pd.isna(me_set.brand) and not pd.isna(me_set.id):\n",
" set_ref = thm[f\"set/{me_set.brand}/{me_set.id}\"]\n",
"\n",
" g.add((set_ref, RDF.type, THM.Set))\n",
" g.add((set_ref, RDFS.label, Literal(me_set.name, lang=\"en\")))\n",
" if not pd.isna(me_set.release):\n",
" g.add((set_ref, THM.year, Literal(int(me_set.release), datatype=XSD.integer)))\n",
"\n",
" if not pd.isna(me_set.num_parts):\n",
" g.add((set_ref, THM.num_parts, Literal(int(me_set.num_parts), datatype=XSD.integer)))\n",
" g.add((set_ref, THM.brand, Literal(me_set.brand)))\n",
" if not pd.isna(me_set.price_eur):\n",
" g.add((set_ref, THM.price_new, Literal(me_set.price_eur, datatype=XSD.float)))"
]
},
{
@ -541,25 +697,25 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 283,
"id": "1a30bff8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Graph identifier=N30c6d515851c45f1af93153d75c76ea9 (<class 'rdflib.graph.Graph'>)>"
"<Graph identifier=Naee4bab906a6444290a3659ffe0fbd45 (<class 'rdflib.graph.Graph'>)>"
]
},
"execution_count": 24,
"execution_count": 283,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g.bind(\"thmont\", THM)\n",
"g.bind(\"thm\", THM)\n",
"\n",
"g.serialize(\"lego_graph_rebrickable.ttl\", format=\"turtle\")"
"g.serialize(\"lego_graph.ttl\", format=\"turtle\")"
]
}
],

View File

@ -30,7 +30,7 @@
a4paper,margin=25mm
}
\title{\huge{Knowledgegraphen - Lego}}
\title{\huge{Knowledge Graph - Lego}}
\date{\today}
\author{
\begin{tabular}{ccc}
@ -49,7 +49,7 @@
%... then configure it.
\fancyhead{} % clear all header fields
\fancyhead[L]{Lego}
\fancyhead[R]{KGR - Knowledgegraphen}
\fancyhead[R]{KGR - Knowledge Graphen}
\fancyfoot{} % clear all footer fields
\fancyfoot[LE,RO]{\thepage}
@ -74,16 +74,16 @@
\begin{enumerate}
\item \label{item:min_set_count} Was ist die minimale Anzahl an Sets, die benötigt wird um ein anderes Set zusammenzubauen?
\item \label{item:min_set_price} Was ist der geringste Preis einer Auswahl an Sets um ein anderes Set zusammenzubauen?
\item \label{item:set_span} Welche anderen Sets, können mit Sets, die sich schon im eigenen Besitz befinden zusammengebaut werden?
\item Sind Sets von anderen Herstellern im Vergleich zu Lego Sets, auf den durchschnittlichen Teilepreis betrachtet billiger?
\item Haben neuere Sets im Vergleich zu älteren Sets eine geringere Teileanzahl, da auf eine grössere Anzahl an speziell angefertigten Teilen zugegriffen werden kann?
\item Haben Sets mit höherer Teileanzahl eine höhere Anzahl an Minifiguren?
\item \label{item:set_span} Welche anderen Sets, können mit Sets, die sich schon im eigenen Besitz befinden zusammengebaut werden?
\item \label{item:equivalent_part} Welche Lego-Teile besitzen äquivalente Teile von anderen Marken?
\end{enumerate}
\subsection{Nutzen}
Wird ein Set an Klemmbausteinen nicht mehr vertrieben und man möchte das Set dennoch haben, so ergeben sich mehrere Möglichkeiten:
Wird ein Set an Klemmbausteinen nicht mehr vertrieben, an welchem dennoch Nachfrage besteht, existieren folgende Möglichkeiten:
\begin{itemize}
\item Man kauft das Set von einem Zweitanbieter
\item Man stellt sich die benötigten Teile des Sets selbst zusammen. Dies geschieht entweder indem die Teile einzeln von Zweitanbietern gekauft werden oder durch den Erwerb von anderen Sets, welche die benötigten Teile enthalten. Siehe Fragen: \ref{item:min_set_count} und \ref{item:min_set_price}.
@ -114,6 +114,7 @@
\begin{figure}[H]
\includegraphics[width=\columnwidth]{bilder/downloads_schema_v3.png}
\caption{Datenbankschema \textit{Rebrickable} \cite{FreeLEGOCatalog}}
\label{fig:rebrickable_scheme}
\end{figure}
Der Datensatz konnte über die URL \url{https://rebrickable.com/downloads/} erhalten werden.
@ -127,7 +128,7 @@
\toprule
& Brickset \\ \midrule
URL & \url{https://brickset.com/}\\
Beschaffung & Webscraping/CSV-Download \\
Beschaffung & CSV-Download \\
Lizenz & nicht spezifiziert \\
Erhalt & 23.04.2026 \\ \bottomrule
\end{tabularx}
@ -179,14 +180,17 @@
\subsection{Integrationsprozess}
Jedes von Lego veröffentlichte Teil besitzt der Form zugrunde eine eindeutige Teile-Nummer, auch Design-ID genannt. Die Teilenummer wird nur aufgrund der Form eines Legosteins vergeben und kann auf dem Lego-Stein abgelesen werden. Üblicherweise besitzt eine Design-ID 4-5 Stellen. Abhängig von der Form, Farbe und des Drucks besitzt jeder Lego-Stein eine 6-7 stellige Element-ID. Diese Element-ID lässt sich Lego-Sets besitzen ebenfalls eine Set-Nummer. Allerdings gibt es spezielle Lego-Sets, welche in Teil-Sets aufgegliedert werden oder mehrere Bauvarianten besitzen \cite{FreeLEGOCatalog}. Diese sind in der Modellierung dieser Arbeit nicht weiter berücksichtigt. Stattdessen wird das zugrundeliegende Hauptset betrachtet. Lego-Minifiguren erhalten durch Lego keine eindeutige Identifikationsnummer. Zur eindeutigen Identifikation von Minifiguren wird die von \textit{Rebrickable} vergebene ID verwendet.\\
Da die einzige Quelle für andere Hersteller nur \textit{Merlins Steine} ist und diese nur Sets enthält, wird der Hersteller in der IRI miteinbezogen.
Jedes von Lego veröffentlichte Teil besitzt der Form zugrunde eine eindeutige Teile-Nummer, auch Design-ID genannt. Die Teilenummer wird nur aufgrund der Form eines Legosteins vergeben und kann auf dem Lego-Stein abgelesen werden. Üblicherweise besitzt eine Design-ID 4-5 Stellen. Abhängig von der Form, Farbe und des Drucks besitzt jeder Lego-Stein eine 6-7 stellige Element-ID. Die Element-IDs von Teilen eines Lego-Sets befindet sich als Auflistung aller Teile in der Bauanleitung eines Lego-Sets.\\
Sets besitzen ebenfalls eine Set-Nummer. Einige Set-Nummern werden mit einem Suffix bspw. \textit{-1} angegeben. Dieser Suffix gibt Aufschluss über die Version des Sets. Allerdings gibt es spezielle Lego-Sets, welche in Teil-Sets aufgegliedert werden oder mehrere Bauvarianten besitzen \cite{FreeLEGOCatalog}. Diese Art von Sets wird mithilfe von Inventaren modelliert (Siehe \ref{fig:rebrickable_scheme}). Ein Inventar kann als übergeordnetes Set verstanden werden. Ein Inventar kann somit Set-, Minifiguren- und Teile-Inventare besitzen, die angeben in welcher Stückzahl ein Teil-Set, eine Minifigur oder ein Teil vorhanden ist\\
Lego-Minifiguren erhalten durch Lego keine eindeutige Identifikationsnummer. Zur eindeutigen Identifikation von Minifiguren wird die von \textit{Rebrickable} vergebene ID verwendet. Auf Minifiguren-Seiten von \textit{Rebrickable} sind Referenzen zu IDs derselben Minifigur auf anderen Seiten enthalten. Diese Referenzen sind innerhalb des bereitgestellten Datensatzes nicht abgebildet.\\
Da die einzige Quelle für andere Hersteller nur \textit{Merlins Steine} ist und diese nur Sets enthält, wird der Hersteller in der IRI nur für Sets miteinbezogen.
\begin{verbatim}
https://thm.de/set/{brand}/{id}
https://thm.de/set/{brand}/{id}
\end{verbatim}
Um die Dateigrösse des Graph zu reduzieren wurde \texttt{thm}, statt \texttt{th-mannheim} verwendet.
\begin{figure}[H]
\includegraphics[width=\columnwidth]{bilder/example_part_number.png}
\centering
\includegraphics[width=0.8\columnwidth]{bilder/example_part_number.png}
\caption{Lego Stein mit Teile-Nummer (Design-ID) 41769 \cite{cunninghamSellLEGOBricklink2018}}
\label{fig:lego_example_part_number}
\end{figure}
@ -199,22 +203,170 @@
\subsection{Pipeline}
Die Datensätze von \textit{Bricklink} und \textit{Merlins Steine} wurden durch Webscraping erhoben. Entstandene Fehler durch Ausnahmefälle mussten manuell bereinigt werden. Demnach ist dieser Teil nicht automatisierbar. Abbildung \ref{fig:pipeline} zeigt die Pipeline zur Erstellung des Knowledge Graph.
\begin{figure}[H]
\includegraphics[width=\columnwidth]{./bilder/kgr_pipeline1.drawio.png}
\caption{Pipeline Erstellung Knowledge Graph}
\label{fig:pipeline}
\end{figure}
\section{Evaluation}
\subsection{Ergebnis}
Das Projekt kann unter der URL: \url{https://gitty.informatik.hs-mannheim.de/2211275/kgr} betrachtet werden.
Der resultierende Knowledge-Graph ist über 300 MB gross. Die Dateigrösse lässt sich auf die Zuordnungen von Teilen zu Inventaren zurückführen.
\subsection{Beispiel-Queries}
Erhalten der Gesamtheit aller Lego Star Wars Minifiguren:
\begin{verbatim}
SELECT DISTINCT ?name
WHERE {
?set thmont:theme ?theme.
?theme rdf:type thmont:Theme.
?set rdf:type thmont:Set.
?theme rdfs:label "Star Wars"@en.
?inventory thmont:set ?set.
?inventory rdf:type thmont:Inventory.
?inventory thmont:contains ?minifig_inv.
?minifig_inv rdf:type thmont:MinifigInv.
?minifig thmont:belongs ?minifig_inv.
?minifig rdfs:label ?name.
}
\end{verbatim}
Anzahl aller Minifiguren enthalten in allen Lego-Sets gruppiert nach Figur.
\begin{verbatim}
SELECT
(SUM(?quantity) as ?sum) ?minifig ?name
WHERE {
?minifig rdf:type thm:Minifig.
?minifig_inv rdf:type thm:MinifigInv.
?minifig thm:belongs ?minifig_inv.
?minifig_inv thm:quantity ?quantity.
?minifig rdfs:label ?name.
}
GROUP BY ?minifig ?name
ORDER BY DESC(?sum)
\end{verbatim}
Durchschnittliche Anzahl an Teilen je Set gruppiert nach Jahren.
\begin{verbatim}
SELECT ?year (AVG(?part_count) as ?avgp)
WHERE {
?set thm:year ?year.
?set thm:num_parts ?part_count.
}
GROUP BY ?year
ORDER BY DESC(?avgp)
\end{verbatim}
Durchschnittlicher Teilepreis gruppiert nach Marken. \label{verb:ppp_query}
\begin{verbatim}
SELECT ?brand (AVG(?price)/AVG(?num) as ?t)
WHERE {
?set thm:num_parts ?num.
?set rdfs:label ?name.
?set rdf:type thm:Set.
?set thm:brand ?brand.
?set thm:price_new ?price.
FILTER (?num > 0)
}
GROUP BY ?brand
ORDER BY DESC(?t)
\end{verbatim}
\subsection{Abdeckung}
\subsection{Konsistenz}
Tabelle \ref{tab:coverage} zeigt einen Überblick welche der Prädikate (Graph, Name, Kategorie, Preise, Jahr) der Knowledge Graph für Minifiguren (Figs), Teile und Sets abdeckt (X=enthalten,-=Fehlt). Das Prädikat \textit{Graph} spiegelt wider, ob das Prädikat im Graph vorhanden ist.
\begin{table}[H]
\centering
\begin{tabular}{@{}lllllll@{}}
\toprule
& \multicolumn{3}{l}{Lego} & \multicolumn{3}{l}{Andere Marken} \\ \midrule
& Figs & Teile & Sets & Figs & Teile & Sets \\ \midrule
Graph & X & X & X & - & - & X \\
Name & X & X & X & - & - & X\\
Kateg. & - & X & X & - & - & - \\
Preise & - & X & X & - & - & X \\
Jahr & - & - & X & - & - & X \\ \bottomrule
\end{tabular}
\caption{Abdeckung des Graphen für Lego und weitere Klemmbausteinmarken}
\label{tab:coverage}
\end{table}
Da Lego keine IDs für Minifiguren vergibt ist das erkennen zweier gleichartiger Figuren schwieriger. Die Preise von \textit{Brickset} konnten nicht den Minifiguren aus \textit{Rebrickable} zugeordnet werden. Da \textit{Rebrickable} die Zuordnung von Minifiguren zu Sets liefert, wurde die Entscheidung getroffen auf die Preiszuordnung zu verzichten. Für Figuren und Teile weiterer Marken, waren eine Zuordnung nur schwer bis gar nicht möglich. Diese Zuordnung wäre beispielsweise durch Bilderkennung, anhand vom Hersteller bereitgestellte Bauanleitungen in Form von .PDF-Dateien möglich.
\subsection{Qualität}
\section{Ausblick}
Es wird betrachtet, ob die ursprünglichen Fragestellungen mithilfe des Knowledge Graphen beantwortet werden können.
\begin{enumerate}
\item Was ist die minimale Anzahl an Sets, die benötigt wird um ein anderes Set zusammenzubauen?
\item Was ist der geringste Preis einer Auswahl an Sets um ein anderes Set zusammenzubauen?
\item Welche anderen Sets, können mit Sets, die sich schon im eigenen Besitz befinden zusammengebaut werden?\\
\textit{Der Knowledge Graph beinhaltet die Daten, sodass die Fragestellungen 1., 2., 3. beantwortet werden können. Eine effiziente Softwarelösung besitzt eine höhere geschätzte Komplexität und liegt ausserhalb des Rahmens dieser Arbeit.}
\item Sind Sets von anderen Herstellern im Vergleich zu Lego Sets, auf den durchschnittlichen Teilepreis betrachtet billiger?\\
\textit{Diese Frage kann mithilfe der letzten Beispiel-Query \ref{verb:ppp_query} beantwortet werden. Abbildung \ref{fig:ppp} zeigt die Teile-Preise je Hersteller. Lego hat den höchsten Teilepreis mit 0.096 \texteuro und BlueBrixx den niedrigsten mit 0.0437 \texteuro.
\begin{figure}[H]
\centering
\includegraphics[width=\columnwidth]{./bilder/diagram_avg_part_price_brand.png}
\caption{Hersteller sortiert nach durchschnittlichem Teile-Preis in \texteuro}
\label{fig:ppp}
\end{figure}
}
\item Haben neuere Sets im Vergleich zu älteren Sets eine geringere Teileanzahl, da auf eine grössere Anzahl an speziell angefertigten Teilen zugegriffen werden kann?\\
\textit{Es ist das Gegenteil zu erkennen. Eine Erklärung dafür könnte sein, dass die Klemmbaustein-Marken mehrheitlich Sets für \ac{AFOL} herausbringen im Vergleich zu vorherigen Jahren. Diese Sets sind meistens komplexer und besitzen demnach eine höhere Teileanzahl.}
\begin{figure}[H]
\centering
\includegraphics[width=\columnwidth]{./bilder/diagram_avg_parts_per_year.png}
\caption{Durchschnittliche Teileanzahl nach Jahren aller Klemmbausteinmarken}
\label{fig:avg_parts_per_year}
\end{figure}
\begin{verbatim}
SELECT (AVG(?parts) as ?total) ?year
WHERE {
?set rdf:type thm:Set.
?set thm:brand ?brand.
?set thm:year ?year.
?set rdfs:label ?name.
?set thm:num_parts ?parts.
}
GROUP BY ?year
ORDER BY ASC(?total)
\end{verbatim}
\item Haben Sets mit höherer Teileanzahl eine höhere Anzahl an Minifiguren?\\
\textit{Zwischen der Anzahl Minifiguren $M$ und der Anzahl an Teilen $T$ eines Sets kann eine mittelstarke Korrelation $\rho_{M,T} \approx 0.5926$ festgestellt werden. Hier gab es mehrere interessante Ausreisser: \href{https://rebrickable.com/sets/BIGBOX-1/the-ultimate-battle-for-chima}{BIGBOX-1,The Ultimate Battle for Chima}, \href{https://rebrickable.com/sets/2000409-2/window-exploration-bag/}{2000409-2, Window Exploration Bag 2} und \href{https://rebrickable.com/sets/2000409-1/window-exploration-bag/}{2000409-1, Window Exploration Bag 1}.}
\textit{
\begin{figure}[H]
\centering
\includegraphics[width=\columnwidth]{./bilder/diagram_correlation_parts_figs.png}
\caption{Anzahl Minifiguren und Teile}
\label{fig:scatter_parts_figs}
\end{figure}
}
Der Datensatz konnte mithilfe folgender SPARQL-Query erhalten werden. Hier werden Sets mit 0 Teilen herausgefiltert.
\begin{verbatim}
SELECT ?part_num (SUM(?qty) as ?total)
WHERE {
?set rdf:type thm:Set.
?set thm:brand ?brand.
?set rdfs:label ?name.
?set thm:num_parts ?part_num.
?inv thm:set ?set.
?inv rdf:type thm:Inventory.
?inv thm:contains ?fig_inv.
?fig_inv thm:quantity ?qty.
FILTER(?part_num > 0).
}
GROUP BY ?set ?part_num ?name ?inv
\end{verbatim}
\item Welche Lego-Teile besitzen äquivalente Teile von anderen Marken?\\
\textit{Der Knowledge Graph bietet keine Möglichkeit dies zu beantwortet, da keine Datensätze über Teile, die nicht von Lego produziert worden sind, in die Erstellung des Graph eingeflossen sind.}
\end{enumerate}
\section{Ausblick}
\begin{itemize}
\item Es fehlen tiefer greifende Daten zu anderen Klemmbausteinmarken, neben Lego. Ein erhalten der Daten wäre nur durch direkte Anfrage beim Hersteller oder durch aufwändige Methodik, wie Bilderkennung möglich.
\item identische Minifiguren könnten mit höherem Aufwand gegenseitig zugeordnet werden. So würde eine Verbindung zwischen der Inventarisierung durch \textit{Rebrickable } und Preisen von \textit{Brickset} entstehen.
\item Zur Beantwortung der Fragestellungen 1,2,3 ist eine eigene Softwarelösung und eine Erweiterung des Graphen vonnöten. \textit{Rebrickable} bietet selber Werkzeuge \url{https://rebrickable.com/help/build-engine/} an, um diese Fragestellung zu beantworten.
\end{itemize}
\section*{Abkürzungsverzeichnis}
\begin{acronym}[Abkürzungsverzeichnis]
\acro{MOC}{My Own Creation}

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -0,0 +1,31 @@
<mxfile host="app.diagrams.net">
<diagram name="Seite-1" id="_iVW848-2TJ0zfREs3N2">
<mxGraphModel dx="808" dy="425" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-11" edge="1" parent="1" source="Hu6JBMkSkJOX-7uFVOw6-2" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="Hu6JBMkSkJOX-7uFVOw6-3">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-2" parent="1" style="rounded=1;whiteSpace=wrap;html=1;" value="Bricklink,&amp;nbsp;&lt;span style=&quot;background-color: transparent; color: light-dark(rgb(0, 0, 0), rgb(255, 255, 255));&quot;&gt;Brickset&lt;/span&gt;&lt;div&gt;Rebrickable,&amp;nbsp;&lt;span style=&quot;background-color: transparent; color: light-dark(rgb(0, 0, 0), rgb(255, 255, 255));&quot;&gt;Merlin&lt;/span&gt;&lt;/div&gt;" vertex="1">
<mxGeometry height="50" width="110" x="50" y="150" as="geometry" />
</mxCell>
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-12" edge="1" parent="1" source="Hu6JBMkSkJOX-7uFVOw6-3" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="Hu6JBMkSkJOX-7uFVOw6-8">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-3" parent="1" style="rounded=1;whiteSpace=wrap;html=1;" value="Datensätze als .CSV" vertex="1">
<mxGeometry height="50" width="120" x="200" y="150" as="geometry" />
</mxCell>
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-7" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;rounded=0;" value="Manuelle Abfrage &amp;amp; Transformation" vertex="1">
<mxGeometry height="30" width="100" x="130" y="210" as="geometry" />
</mxCell>
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-8" parent="1" style="rounded=1;whiteSpace=wrap;html=1;" value="lego_graph.ttl" vertex="1">
<mxGeometry height="50" width="120" x="360" y="150" as="geometry" />
</mxCell>
<mxCell id="Hu6JBMkSkJOX-7uFVOw6-10" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;rounded=0;" value="Jupyter Notebook" vertex="1">
<mxGeometry height="30" width="100" x="290" y="210" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -0,0 +1 @@
,Lambda/Roman,Lambda,03.05.2026 11:29,file:///C:/Users/Roman/AppData/Roaming/LibreOffice/4;

View File

@ -0,0 +1,6 @@
"brand","t"
"Lego","0.09687904"
"Cobi","0.07586302"
"Pantasy","0.05728256"
"MouldKing","0.05323224"
"BlueBrixx","0.043733075"
1 brand t
2 Lego 0.09687904
3 Cobi 0.07586302
4 Pantasy 0.05728256
5 MouldKing 0.05323224
6 BlueBrixx 0.043733075

Binary file not shown.

View File

@ -0,0 +1,78 @@
"total","year"
"0.0","2027"
"1.0","1950"
"100.263888888888888888888889","1973"
"101.025641025641025641025641","1972"
"103.288135593220338983050847","1985"
"103.794871794871794871794872","1974"
"104.323741007194244604316547","1992"
"104.347058823529411764705882","1994"
"105.636363636363636363636364","1980"
"106.891608391608391608391608","1997"
"113.566666666666666666666667","1967"
"12.435897435897435897435897","1955"
"12.5","1953"
"122.129411764705882352941176","1988"
"123.9125295508274231678487","1998"
"124.003865979381443298969072","2012"
"124.326375711574952561669829","2014"
"125.840579710144927536231884","1993"
"126.522070015220700152207002","2011"
"127.65641025641025641025641","1995"
"129.071428571428571428571429","1968"
"131.587837837837837837837838","1991"
"132.5","2013"
"134.275735294117647058823529","2003"
"134.780487804878048780487805","1975"
"139.038461538461538461538462","2004"
"14.25","1959"
"140.74025974025974025974026","1978"
"142.662337662337662337662338","1976"
"147.16195121951219512195122","2015"
"148.229656419529837251356239","2009"
"159.889","2016"
"16.0","1956"
"161.840396753832281334535618","2017"
"163.051724137931034482758621","1990"
"165.224489795918367346938776","1996"
"165.62577962577962577962578","2007"
"166.35746606334841628959276","2006"
"174.373493975903614457831325","2005"
"175.333333333333333333333333","1960"
"179.64218455743879472693032","2010"
"18.015151515151515151515152","1958"
"183.630669546436285097192225","2008"
"204.192013593882752761257434","2018"
"277.083064516129032258064516","2019"
"34.291666666666666666666667","1957"
"341.934300993124522536287242","2021"
"391.574585635359116022099448","2020"
"40.868421052631578947368421","1964"
"403.468056489576328177538668","2022"
"43.819672131147540983606557","1966"
"475.79951690821256038647343","2023"
"48.964285714285714285714286","1965"
"500.052801724137931034482759","2026"
"593.916924664602683178534572","2024"
"612.001367365542388331814038","2025"
"64.955056179775280898876404","1982"
"65.820833333333333333333333","1987"
"67.295081967213114754098361","1963"
"71.92","1969"
"75.561224489795918367346939","1984"
"77.122807017543859649122807","1977"
"8.357142857142857142857143","1954"
"80.690476190476190476190476","1970"
"85.846153846153846153846154","1962"
"87.706521739130434782608696","2001"
"88.775362318840579710144928","1989"
"91.280701754385964912280702","1971"
"92.6796875","1999"
"93.075980392156862745098039","2000"
"93.593939393939393939393939","1986"
"93.785714285714285714285714","1981"
"93.853333333333333333333333","1983"
"94.418524871355060034305317","2002"
"94.88","1961"
"96.4","1949"
"96.913978494623655913978495","1979"
1 total year
2 0.0 2027
3 1.0 1950
4 100.263888888888888888888889 1973
5 101.025641025641025641025641 1972
6 103.288135593220338983050847 1985
7 103.794871794871794871794872 1974
8 104.323741007194244604316547 1992
9 104.347058823529411764705882 1994
10 105.636363636363636363636364 1980
11 106.891608391608391608391608 1997
12 113.566666666666666666666667 1967
13 12.435897435897435897435897 1955
14 12.5 1953
15 122.129411764705882352941176 1988
16 123.9125295508274231678487 1998
17 124.003865979381443298969072 2012
18 124.326375711574952561669829 2014
19 125.840579710144927536231884 1993
20 126.522070015220700152207002 2011
21 127.65641025641025641025641 1995
22 129.071428571428571428571429 1968
23 131.587837837837837837837838 1991
24 132.5 2013
25 134.275735294117647058823529 2003
26 134.780487804878048780487805 1975
27 139.038461538461538461538462 2004
28 14.25 1959
29 140.74025974025974025974026 1978
30 142.662337662337662337662338 1976
31 147.16195121951219512195122 2015
32 148.229656419529837251356239 2009
33 159.889 2016
34 16.0 1956
35 161.840396753832281334535618 2017
36 163.051724137931034482758621 1990
37 165.224489795918367346938776 1996
38 165.62577962577962577962578 2007
39 166.35746606334841628959276 2006
40 174.373493975903614457831325 2005
41 175.333333333333333333333333 1960
42 179.64218455743879472693032 2010
43 18.015151515151515151515152 1958
44 183.630669546436285097192225 2008
45 204.192013593882752761257434 2018
46 277.083064516129032258064516 2019
47 34.291666666666666666666667 1957
48 341.934300993124522536287242 2021
49 391.574585635359116022099448 2020
50 40.868421052631578947368421 1964
51 403.468056489576328177538668 2022
52 43.819672131147540983606557 1966
53 475.79951690821256038647343 2023
54 48.964285714285714285714286 1965
55 500.052801724137931034482759 2026
56 593.916924664602683178534572 2024
57 612.001367365542388331814038 2025
58 64.955056179775280898876404 1982
59 65.820833333333333333333333 1987
60 67.295081967213114754098361 1963
61 71.92 1969
62 75.561224489795918367346939 1984
63 77.122807017543859649122807 1977
64 8.357142857142857142857143 1954
65 80.690476190476190476190476 1970
66 85.846153846153846153846154 1962
67 87.706521739130434782608696 2001
68 88.775362318840579710144928 1989
69 91.280701754385964912280702 1971
70 92.6796875 1999
71 93.075980392156862745098039 2000
72 93.593939393939393939393939 1986
73 93.785714285714285714285714 1981
74 93.853333333333333333333333 1983
75 94.418524871355060034305317 2002
76 94.88 1961
77 96.4 1949
78 96.913978494623655913978495 1979

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 14,
"id": "ad994162",
"metadata": {},
"outputs": [],
@ -14,13 +14,12 @@
"import pandas as pd\n",
"import time\n",
"import random\n",
"import re\n",
"import pprint"
"import re"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "b5536e8c",
"metadata": {},
"outputs": [],
@ -30,44 +29,55 @@
},
{
"cell_type": "code",
"execution_count": null,
"id": "a5daea73",
"execution_count": 16,
"id": "6d109e8a",
"metadata": {},
"outputs": [],
"source": [
"with open(\"./data/merlin/others.csv\", mode=\"w+\", encoding=\"utf8\", newline=\"\") as producerfile:\n",
" writer = csv.writer(producerfile)\n",
" writer.writerow([\"id\", \"producer\", \"name\", \"size\", \"parts\", \"year\"])\n",
" for producer in producers:\n",
" with open(f\"data/merlin/{producer}.json\", mode=\"r\", encoding=\"utf8\") as sourcefile:\n",
" data = json.loads(sourcefile.read())\n",
" for row in data[\"data\"]:\n",
" _, id, _, name, rating, _, _, size, parts, year, _ = row\n",
"id_to_name = dict()\n",
"for producer in producers:\n",
" with open(f\"../data/merlin/{producer}.json\", mode=\"r\", encoding=\"utf8\") as prodfile:\n",
" listings = json.load(prodfile)[\"data\"]\n",
"\n",
" writer.writerow([id, producer, name, size, parts, year])"
" for listing in listings:\n",
" name = listing[3] \n",
" id = listing[1]\n",
"\n",
" id_to_name[id] = name"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 17,
"id": "ab997198",
"metadata": {},
"outputs": [],
"source": [
"# uvp preise bestimmen :(\n",
"def get_all_ids() -> list[str]:\n",
" df = pd.read_csv(\"./data/merlin/others.csv\")\n",
" df = pd.read_csv(\"../data/merlin/others.csv\")\n",
" return df[\"id\"].to_list()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 18,
"id": "32b1fa46",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"' with open(\"../data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\\n for idx, id in enumerate(get_all_ids()[3663:]):\\n try:\\n small_id = id.lower()\\n\\n response = rq.get(f\"https://www.merlinssteine.de/sets/{small_id}\")\\n soup = bs4.BeautifulSoup(response.text)\\n\\n # Prices\\n price_eur = soup.find(id=\"listprice_eur\")\\n price_usd = soup.find(id=\"listprice_usd\")\\n price_cn = soup.find(id=\"listprice_cn\")\\n bestprice_eur = soup.find(id=\"bestprice_eur\")\\n bestprice_usd = soup.find(id=\"bestprice_usd\")\\n bestprice_cn = soup.find(id=\"bestprice_cn\")\\n\\n all_prices = [price_eur, price_cn, price_usd, bestprice_eur, bestprice_cn, bestprice_usd]\\n\\n #categories\\n other_dump = [description.text.replace(\"\\n\", \"\") for description in soup.find_all(class_=\"setpage_ct\")]\\n writer = csv.writer(pricefile)\\n\\n all_prices = [p.text if p != None else \"_\" for p in all_prices]\\n writer.writerow([id, *all_prices, *other_dump])\\n time.sleep(random.randint(2, 3))\\n except Exception as e:\\n print(e) '"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open(\"./data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\n",
"\"\"\" with open(\"../data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\n",
" for idx, id in enumerate(get_all_ids()[3663:]):\n",
" try:\n",
" small_id = id.lower()\n",
@ -93,12 +103,12 @@
" writer.writerow([id, *all_prices, *other_dump])\n",
" time.sleep(random.randint(2, 3))\n",
" except Exception as e:\n",
" print(e)"
" print(e) \"\"\""
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 19,
"id": "4a10a1e3",
"metadata": {},
"outputs": [],
@ -118,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"id": "9c00f188",
"metadata": {},
"outputs": [],
@ -167,79 +177,82 @@
},
{
"cell_type": "code",
"execution_count": 176,
"id": "ae53869e",
"execution_count": 21,
"id": "9b44a0e5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Listenpreis:': '14.95 EUR (7.3 ct/Teil) (7.9 ct/g)',\n",
" 'DetailsVon:': 'BlueBrixx',\n",
" 'EAN:': '4060904003671',\n",
" 'Steine von:': 'Qunlong',\n",
" 'Kategorie:': 'EisenbahnHersteller-',\n",
" 'Kategorien:': 'BBSpecial, BRIX',\n",
" 'Anleitung': 'Ohne Bauabschnitte',\n",
" 'Bewertungen': 'Bewerten',\n",
" 'Hersteller-Videos': 'video-1',\n",
" 'Inhalt': '205 Teile',\n",
" 'Gewicht': ': 190 g',\n",
" 'Keine Aufkleber': '',\n",
" 'Keine Drucke': '',\n",
" 'Farbverteilung': '',\n",
" 'TeilelistenBrickLink': 'XMLRebrickable CSVLEGO PaB CSVSetDB CSV',\n",
" 'PreiseListenpreis:': '14.95 EUR (7.3 ct/Teil) (7.9 ct/g)'}\n"
]
}
],
"outputs": [],
"source": [
"details = {\n",
" \"id\" : [],\n",
" \"listprice_eur\" : [],\n",
" \"listprice_cn\" : [],\n",
" \"listprice_usd\" : [],\n",
" \"bestprice_eur\" : [],\n",
" \"bestprice_cn\" : [],\n",
" \"bestprice_usd\" : [],\n",
" \"brand\" : [],\n",
" \"ean\" : [],\n",
" \"producer\" : [],\n",
" \"release\" : [],\n",
" \"scale\" : [],\n",
" \"category\" : [],\n",
" \"producer_category\" : [],\n",
" \"num_parts\" : [],\n",
" \"width\" : [],\n",
" \"height\" : [],\n",
" \"depth\" : [],\n",
" \"designer\" : [],\n",
" \"weight\" : [],\n",
" \"age\" : []\n",
"}\n",
"import random\n",
"\n",
"me_details = pd.DataFrame(details)\n",
"\n",
"with open(\"../data/merlin/prices.csv\", mode=\"r\", encoding=\"utf8\") as price_file:\n",
" reader = csv.reader(price_file)\n",
"\n",
" # for row in reader:\n",
" # id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd, *other = row\n",
" \n",
" # me_details.loc[-1] = [id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd] + list(range(0, 12))\n",
" # me_details.index = me_details.index + 1\n",
" id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd, *other = [row for row in reader][random.randint(0, 4500)]\n",
" other = filter(lambda s: not \"Wikipedia\" in s, other)\n",
"\n",
" pprint.pp(split_by_keywords(\"\".join(other), keywords))\n"
"def rm_epsilon(l : list[str]) ->list[str]:\n",
" return list(filter(lambda s : len(s) > 0, l))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "b83aa413",
"execution_count": 56,
"id": "ae53869e",
"metadata": {},
"outputs": [],
"source": [
"me_details = pd.DataFrame({\n",
" \"id\" : [],\n",
" \"name\" : [],\n",
" \"price_eur\" : [],\n",
" \"price_cn\" : [],\n",
" \"price_us\" : [],\n",
" \"brand\" : [],\n",
" \"ean\" : [],\n",
" \"producer\" : [],\n",
" \"release\" : [],\n",
" \"category\" : [],\n",
" \"producer_category\" : [],\n",
" \"num_parts\" : [],\n",
" })\n",
"\n",
"with open(\"../data/merlin/prices.csv\", mode=\"r\", encoding=\"utf8\") as price_file:\n",
" reader = csv.reader(price_file)\n",
"\n",
" for row in reader:\n",
" id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd, *other = row\n",
" other = filter(lambda s: not \"Wikipedia\" in s, other)\n",
"\n",
" retrieved = split_by_keywords(\"\".join(other), keywords)\n",
"\n",
" brand = retrieved.get(\"DetailsVon:\", \"\").replace(\" \", \"\")\n",
" ean = retrieved.get(\"EAN:\", \"\")\n",
" producer = retrieved.get(\"Steine von:\", \"\")\n",
" age = retrieved.get(\"Altersempfehlung:\", \"\")\n",
" release = retrieved.get(\"Release:\", \"\").split(\" \")[-1]\n",
" num_parts = retrieved.get(\"Inhalt\", \"\").split(\"Teile\")[0].replace(\"Ein Teil\", \"1\").replace(\"Preise\", \"\").replace(\"Mit Fernsteuerung / Elektrik\", \"1\").replace(\"Eine Minifigur\", \"1\").replace(\"Minifiguren\", \"\").strip()\n",
"\n",
" category = retrieved.get(\"Kategorie:\", \"\").strip().split(\",\")\n",
" categories = \",\".join(rm_epsilon(retrieved.get(\"Kategorien:\", \"\") .split(\",\") + category)).replace(\"Hersteller\", \"\")\n",
" producer_category = retrieved.get(\"Hersteller-Kategorie:\", \"\").split(\",\")\n",
" producer_categories = \",\".join(rm_epsilon(retrieved.get(\"Hersteller-Kategorien:\", \"\").split(\",\") + producer_category))\n",
"\n",
" if brand == \"\":\n",
" continue\n",
" me_extra = pd.DataFrame({\n",
" \"id\" : [id],\n",
" \"name\" : [id_to_name.get(id, \"\")],\n",
" \"price_eur\" : [lp_eur.replace(\"_\", \"\")],\n",
" \"price_us\" : [lp_usd.replace(\"_\", \"\")],\n",
" \"price_cn\" : [lp_cn.replace(\"_\", \"\")],\n",
" \"brand\" : [brand],\n",
" \"ean\" : [ean],\n",
" \"producer\" : [producer],\n",
" \"release\" : [release],\n",
" \"category\" : [categories],\n",
" \"producer_category\" : [producer_categories],\n",
" \"num_parts\" : [num_parts],\n",
" })\n",
"\n",
" me_details = pd.concat([me_details, me_extra])"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "1b5bcea6",
"metadata": {},
"outputs": [
{
@ -264,49 +277,47 @@
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>listprice_eur</th>\n",
" <th>listprice_cn</th>\n",
" <th>listprice_usd</th>\n",
" <th>bestprice_eur</th>\n",
" <th>bestprice_cn</th>\n",
" <th>bestprice_usd</th>\n",
" <th>name</th>\n",
" <th>price_eur</th>\n",
" <th>price_cn</th>\n",
" <th>price_us</th>\n",
" <th>brand</th>\n",
" <th>ean</th>\n",
" <th>producer</th>\n",
" <th>...</th>\n",
" <th>scale</th>\n",
" <th>release</th>\n",
" <th>category</th>\n",
" <th>producer_category</th>\n",
" <th>num_parts</th>\n",
" <th>width</th>\n",
" <th>height</th>\n",
" <th>depth</th>\n",
" <th>designer</th>\n",
" <th>weight</th>\n",
" <th>age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [id, listprice_eur, listprice_cn, listprice_usd, bestprice_eur, bestprice_cn, bestprice_usd, brand, ean, producer, release, scale, category, producer_category, num_parts, width, height, depth, designer, weight, age]\n",
"Index: []\n",
"\n",
"[0 rows x 21 columns]"
"Columns: [id, name, price_eur, price_cn, price_us, brand, ean, producer, release, category, producer_category, num_parts]\n",
"Index: []"
]
},
"execution_count": 40,
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"me_details"
"me_details[\"Mit Fernsteuerung / Elektrik\" == me_details[\"num_parts\"]]"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "0fb65dec",
"metadata": {},
"outputs": [],
"source": [
"me_details.to_csv(\"../data/merlin/others.csv\", index=False)"
]
}
],