diff --git a/lego/lego_graph_rebrickable.ipynb b/lego/lego_graph_rebrickable.ipynb index 1bf34b7..60c5ce4 100644 --- a/lego/lego_graph_rebrickable.ipynb +++ b/lego/lego_graph_rebrickable.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "id": "90209948", "metadata": {}, "outputs": [], @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "id": "8e573135", "metadata": {}, "outputs": [], @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "id": "d8a1fe84", "metadata": {}, "outputs": [], @@ -88,13 +88,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "ae505704", "metadata": {}, "outputs": [], "source": [ "for color in re_colors.itertuples(index=False):\n", - " color_ref = thm[f\"colors/{color.id}\"]\n", + " color_ref = thm[f\"color/{color.id}\"]\n", "\n", " g.add((color_ref, RDFS.label, Literal(color.name, lang=\"en\")))\n", " g.add((color_ref, THM.color, Literal(color.rgb)))\n", @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "id": "fb9e17d6", "metadata": {}, "outputs": [], @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "id": "8fdb080e", "metadata": {}, "outputs": [], @@ -162,14 +162,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "579b1d67", "metadata": {}, "outputs": [], "source": [ "for element in re_elements.itertuples(index=False):\n", " part_ref = thm[f\"part/{element.part_num}\"]\n", - " color_ref = thm[f\"colors/{element.color_id}\"]\n", + " color_ref = thm[f\"color/{element.color_id}\"]\n", "\n", " g.add((part_ref, THM.has_color, color_ref))" ] @@ -184,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "id": "00db079a", "metadata": {}, "outputs": [], @@ -206,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 9, "id": "1a529aae", "metadata": {}, "outputs": [], @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "id": "29b357ef", "metadata": {}, "outputs": [], @@ -255,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 11, "id": "a67b3e70", "metadata": {}, "outputs": [], @@ -277,7 +277,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "0c97dc4d", "metadata": {}, "outputs": [], @@ -285,7 +285,7 @@ "for inventory in re_inventories.itertuples(index=False):\n", " inventory_ref = thm[f\"inventory/{inventory.id}\"]\n", "\n", - " g.add((inventory_ref, THM.set, thm[f\"sets/lego/{inventory.set_num}\"]))" + " g.add((inventory_ref, THM.set, thm[f\"set/lego/{inventory.set_num}\"]))" ] }, { @@ -299,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 13, "id": "dc2ba03e", "metadata": {}, "outputs": [], @@ -321,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 14, "id": "8715a1cf", "metadata": {}, "outputs": [], @@ -341,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 15, "id": "08c2c580", "metadata": {}, "outputs": [], @@ -377,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 16, "id": "1e0ac437", "metadata": {}, "outputs": [], @@ -387,17 +387,124 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "fd944ccb", "metadata": {}, "outputs": [], "source": [ - "\n", "for bs_set in bs_sets.itertuples(index=False):\n", " num = f\"{str(bs_set.Number).strip()}-{str(bs_set.Variant)}\" #Error for Set 853357\n", " set_ref = thm[f\"set/lego/{num}\"]\n", "\n", - " " + " if (set_ref, None, None) in g:\n", + " if not pd.isna(bs_set.USRetailPrice):\n", + " g.add((set_ref, THM.us_retail_price, Literal(bs_set.USRetailPrice, datatype=XSD.float)))\n", + " if not pd.isna(bs_set.DERetailPrice):\n", + " g.add((set_ref, THM.de_retail_price, Literal(bs_set.DERetailPrice, datatype=XSD.float)))\n", + " if not pd.isna(bs_set.UKRetailPrice):\n", + " g.add((set_ref, THM.us_retail_price, Literal(bs_set.UKRetailPrice, datatype=XSD.float)))\n", + " if not pd.isna(bs_set.CARetailPrice):\n", + " g.add((set_ref, THM.ca_retail_price, Literal(bs_set.CARetailPrice, datatype=XSD.float)))" + ] + }, + { + "cell_type": "markdown", + "id": "a16fd51a", + "metadata": {}, + "source": [ + "# Bricklink\n", + "\n", + "Add missing minifigs, parts and sets using bricklink data" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a8beb593", + "metadata": {}, + "outputs": [], + "source": [ + "bl_minifigs = pd.read_csv(\"./data/bricklink/minifigs.csv\")\n", + "bl_sets = pd.read_csv(\"./data/bricklink/sets.csv\")\n", + "bl_parts = pd.read_csv(\"./data/bricklink/parts.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "b14e6e89", + "metadata": {}, + "outputs": [], + "source": [ + "additional_entries = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bbf5462b", + "metadata": {}, + "outputs": [], + "source": [ + "for bl_set in bl_sets.itertuples(index=False):\n", + " set_ref = thm[f\"set/lego/{bl_set.set_id}\"]\n", + "\n", + " if not (set_ref, None, None) in g:\n", + " additional_entries += 1\n", + " g.add((set_ref, RDFS.label, Literal(lego_set.name, lang=\"en\")))\n", + " if not pd.isna(bl_set.year) and str(bl_set.year).isnumeric():\n", + " g.add((set_ref, THM.first_year, Literal(datetime(int(bl_set.year), 1, 1))))\n", + " if not pd.isna(bl_set.parts) and str(bl_set.parts).isnumeric():\n", + " g.add((set_ref, THM.num_parts, Literal(int(bl_set.parts), datatype=XSD.integer)))\n", + " g.add((set_ref, THM.brand, Literal(\"Lego\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ef52582e", + "metadata": {}, + "outputs": [], + "source": [ + "for bl_part in bl_parts.itertuples(index=False):\n", + " part_ref = thm[f\"part/{bl_part.part_id}\"]\n", + "\n", + " if not (part_ref, None, None) in g:\n", + " additional_entries += 1\n", + " g.add((part_ref, RDFS.label, Literal(bl_part.part_name, lang=\"en\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8bf0ffeb", + "metadata": {}, + "outputs": [], + "source": [ + "for bl_minifig in bl_minifigs.itertuples(index=False):\n", + " minifig_ref = thm[f\"minfig/{bl_minifig.minifig_id}\"]\n", + "\n", + " if not (minifig_ref, None, None) in g:\n", + " additional_entries += 1\n", + " g.add((minifig_ref, RDFS.label, Literal(bl_minifig.minifig_name, lang=\"en\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "3491b032", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Added 107748 items\n" + ] + } + ], + "source": [ + "print(f\"Added {additional_entries} items\")" ] }, { @@ -434,14 +541,25 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "id": "1a30bff8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + ")>" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "g.bind(\"thmont\", THM)\n", "\n", - "#g.serialize(\"lego_graph_rebrickable.ttl\", format=\"turtle\")" + "g.serialize(\"lego_graph_rebrickable.ttl\", format=\"turtle\")" ] } ],