added bricklink and brickset data to graph

pull/1/head
Roman Schöne 2026-04-28 10:44:06 +02:00
parent 524c2e6d8b
commit f2a8ca0391
1 changed files with 143 additions and 25 deletions

View File

@ -10,7 +10,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 1,
"id": "90209948", "id": "90209948",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -31,7 +31,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 2,
"id": "8e573135", "id": "8e573135",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -59,7 +59,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 3,
"id": "d8a1fe84", "id": "d8a1fe84",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -88,13 +88,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": null,
"id": "ae505704", "id": "ae505704",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"for color in re_colors.itertuples(index=False):\n", "for color in re_colors.itertuples(index=False):\n",
" color_ref = thm[f\"colors/{color.id}\"]\n", " color_ref = thm[f\"color/{color.id}\"]\n",
"\n", "\n",
" g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n", " g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n",
" g.add((color_ref, THM.color, Literal(color.rgb)))\n", " g.add((color_ref, THM.color, Literal(color.rgb)))\n",
@ -118,7 +118,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 5,
"id": "fb9e17d6", "id": "fb9e17d6",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -139,7 +139,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 6,
"id": "8fdb080e", "id": "8fdb080e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -162,14 +162,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": null,
"id": "579b1d67", "id": "579b1d67",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"for element in re_elements.itertuples(index=False):\n", "for element in re_elements.itertuples(index=False):\n",
" part_ref = thm[f\"part/{element.part_num}\"]\n", " part_ref = thm[f\"part/{element.part_num}\"]\n",
" color_ref = thm[f\"colors/{element.color_id}\"]\n", " color_ref = thm[f\"color/{element.color_id}\"]\n",
"\n", "\n",
" g.add((part_ref, THM.has_color, color_ref))" " g.add((part_ref, THM.has_color, color_ref))"
] ]
@ -184,7 +184,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 8,
"id": "00db079a", "id": "00db079a",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -206,7 +206,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 9,
"id": "1a529aae", "id": "1a529aae",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -230,7 +230,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 10,
"id": "29b357ef", "id": "29b357ef",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -255,7 +255,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 11,
"id": "a67b3e70", "id": "a67b3e70",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -277,7 +277,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": null,
"id": "0c97dc4d", "id": "0c97dc4d",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -285,7 +285,7 @@
"for inventory in re_inventories.itertuples(index=False):\n", "for inventory in re_inventories.itertuples(index=False):\n",
" inventory_ref = thm[f\"inventory/{inventory.id}\"]\n", " inventory_ref = thm[f\"inventory/{inventory.id}\"]\n",
"\n", "\n",
" g.add((inventory_ref, THM.set, thm[f\"sets/lego/{inventory.set_num}\"]))" " g.add((inventory_ref, THM.set, thm[f\"set/lego/{inventory.set_num}\"]))"
] ]
}, },
{ {
@ -299,7 +299,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 13,
"id": "dc2ba03e", "id": "dc2ba03e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -321,7 +321,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 14,
"id": "8715a1cf", "id": "8715a1cf",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -341,7 +341,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 15,
"id": "08c2c580", "id": "08c2c580",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -377,7 +377,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 45, "execution_count": 16,
"id": "1e0ac437", "id": "1e0ac437",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -387,17 +387,124 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 17,
"id": "fd944ccb", "id": "fd944ccb",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"\n",
"for bs_set in bs_sets.itertuples(index=False):\n", "for bs_set in bs_sets.itertuples(index=False):\n",
" num = f\"{str(bs_set.Number).strip()}-{str(bs_set.Variant)}\" #Error for Set 853357\n", " num = f\"{str(bs_set.Number).strip()}-{str(bs_set.Variant)}\" #Error for Set 853357\n",
" set_ref = thm[f\"set/lego/{num}\"]\n", " set_ref = thm[f\"set/lego/{num}\"]\n",
"\n", "\n",
" " " if (set_ref, None, None) in g:\n",
" if not pd.isna(bs_set.USRetailPrice):\n",
" g.add((set_ref, THM.us_retail_price, Literal(bs_set.USRetailPrice, datatype=XSD.float)))\n",
" if not pd.isna(bs_set.DERetailPrice):\n",
" g.add((set_ref, THM.de_retail_price, Literal(bs_set.DERetailPrice, datatype=XSD.float)))\n",
" if not pd.isna(bs_set.UKRetailPrice):\n",
" g.add((set_ref, THM.us_retail_price, Literal(bs_set.UKRetailPrice, datatype=XSD.float)))\n",
" if not pd.isna(bs_set.CARetailPrice):\n",
" g.add((set_ref, THM.ca_retail_price, Literal(bs_set.CARetailPrice, datatype=XSD.float)))"
]
},
{
"cell_type": "markdown",
"id": "a16fd51a",
"metadata": {},
"source": [
"# Bricklink\n",
"\n",
"Add missing minifigs, parts and sets using bricklink data"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "a8beb593",
"metadata": {},
"outputs": [],
"source": [
"bl_minifigs = pd.read_csv(\"./data/bricklink/minifigs.csv\")\n",
"bl_sets = pd.read_csv(\"./data/bricklink/sets.csv\")\n",
"bl_parts = pd.read_csv(\"./data/bricklink/parts.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b14e6e89",
"metadata": {},
"outputs": [],
"source": [
"additional_entries = 0"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "bbf5462b",
"metadata": {},
"outputs": [],
"source": [
"for bl_set in bl_sets.itertuples(index=False):\n",
" set_ref = thm[f\"set/lego/{bl_set.set_id}\"]\n",
"\n",
" if not (set_ref, None, None) in g:\n",
" additional_entries += 1\n",
" g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n",
" if not pd.isna(bl_set.year) and str(bl_set.year).isnumeric():\n",
" g.add((set_ref, THM.first_year, Literal(datetime(int(bl_set.year), 1, 1))))\n",
" if not pd.isna(bl_set.parts) and str(bl_set.parts).isnumeric():\n",
" g.add((set_ref, THM.num_parts, Literal(int(bl_set.parts), datatype=XSD.integer)))\n",
" g.add((set_ref, THM.brand, Literal(\"Lego\")))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "ef52582e",
"metadata": {},
"outputs": [],
"source": [
"for bl_part in bl_parts.itertuples(index=False):\n",
" part_ref = thm[f\"part/{bl_part.part_id}\"]\n",
"\n",
" if not (part_ref, None, None) in g:\n",
" additional_entries += 1\n",
" g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "8bf0ffeb",
"metadata": {},
"outputs": [],
"source": [
"for bl_minifig in bl_minifigs.itertuples(index=False):\n",
" minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\n",
"\n",
" if not (minifig_ref, None, None) in g:\n",
" additional_entries += 1\n",
" g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "3491b032",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Added 107748 items\n"
]
}
],
"source": [
"print(f\"Added {additional_entries} items\")"
] ]
}, },
{ {
@ -434,14 +541,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 24,
"id": "1a30bff8", "id": "1a30bff8",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"<Graph identifier=N30c6d515851c45f1af93153d75c76ea9 (<class 'rdflib.graph.Graph'>)>"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"g.bind(\"thmont\", THM)\n", "g.bind(\"thmont\", THM)\n",
"\n", "\n",
"#g.serialize(\"lego_graph_rebrickable.ttl\", format=\"turtle\")" "g.serialize(\"lego_graph_rebrickable.ttl\", format=\"turtle\")"
] ]
} }
], ],