clean up merlin data
parent
605dc4f96b
commit
8720ab9c57
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
13853
lego/lego_graph.ipynb
13853
lego/lego_graph.ipynb
File diff suppressed because it is too large
Load Diff
|
|
@ -114,6 +114,7 @@
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\includegraphics[width=\columnwidth]{bilder/downloads_schema_v3.png}
|
\includegraphics[width=\columnwidth]{bilder/downloads_schema_v3.png}
|
||||||
\caption{Datenbankschema \textit{Rebrickable} \cite{FreeLEGOCatalog}}
|
\caption{Datenbankschema \textit{Rebrickable} \cite{FreeLEGOCatalog}}
|
||||||
|
\label{fig:rebrickable_scheme}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Der Datensatz konnte über die URL \url{https://rebrickable.com/downloads/} erhalten werden.
|
Der Datensatz konnte über die URL \url{https://rebrickable.com/downloads/} erhalten werden.
|
||||||
|
|
@ -179,13 +180,14 @@
|
||||||
|
|
||||||
\subsection{Integrationsprozess}
|
\subsection{Integrationsprozess}
|
||||||
|
|
||||||
Jedes von Lego veröffentlichte Teil besitzt der Form zugrunde eine eindeutige Teile-Nummer, auch Design-ID genannt. Die Teilenummer wird nur aufgrund der Form eines Legosteins vergeben und kann auf dem Lego-Stein abgelesen werden. Üblicherweise besitzt eine Design-ID 4-5 Stellen. Abhängig von der Form, Farbe und des Drucks besitzt jeder Lego-Stein eine 6-7 stellige Element-ID. Diese Element-ID lässt sich Lego-Sets besitzen ebenfalls eine Set-Nummer. Allerdings gibt es spezielle Lego-Sets, welche in Teil-Sets aufgegliedert werden oder mehrere Bauvarianten besitzen \cite{FreeLEGOCatalog}. Diese sind in der Modellierung dieser Arbeit nicht weiter berücksichtigt. Stattdessen wird das zugrundeliegende Hauptset betrachtet. Lego-Minifiguren erhalten durch Lego keine eindeutige Identifikationsnummer. Zur eindeutigen Identifikation von Minifiguren wird die von \textit{Rebrickable} vergebene ID verwendet.\\
|
Jedes von Lego veröffentlichte Teil besitzt der Form zugrunde eine eindeutige Teile-Nummer, auch Design-ID genannt. Die Teilenummer wird nur aufgrund der Form eines Legosteins vergeben und kann auf dem Lego-Stein abgelesen werden. Üblicherweise besitzt eine Design-ID 4-5 Stellen. Abhängig von der Form, Farbe und des Drucks besitzt jeder Lego-Stein eine 6-7 stellige Element-ID. Die Element-IDs von Teilen eines Lego-Sets befindet sich als Auflistung aller Teile in der Bauanleitung eines Lego-Sets.\\
|
||||||
|
Sets besitzen ebenfalls eine Set-Nummer. Allerdings gibt es spezielle Lego-Sets, welche in Teil-Sets aufgegliedert werden oder mehrere Bauvarianten besitzen \cite{FreeLEGOCatalog}. Diese Art von Sets wird mithilfe von Inventaren modelliert (Siehe \ref{fig:rebrickable_scheme}). Ein Inventar kann als übergeordnetes Set verstanden werden. Ein Inventar kann somit Set-, Minifiguren- und Teile-Inventare besitzen, die angeben in welcher Stückzahl ein Teil-Set, eine Minifigur oder ein Teil vorhanden ist\\
|
||||||
|
Lego-Minifiguren erhalten durch Lego keine eindeutige Identifikationsnummer. Zur eindeutigen Identifikation von Minifiguren wird die von \textit{Rebrickable} vergebene ID verwendet.\\
|
||||||
Da die einzige Quelle für andere Hersteller nur \textit{Merlins Steine} ist und diese nur Sets enthält, wird der Hersteller in der IRI miteinbezogen.
|
Da die einzige Quelle für andere Hersteller nur \textit{Merlins Steine} ist und diese nur Sets enthält, wird der Hersteller in der IRI miteinbezogen.
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
https://thm.de/set/{brand}/{id}
|
https://thm.de/set/{brand}/{id}
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
Um die Dateigrösse des Graph zu reduzieren wurde \texttt{thm}, statt \texttt{th-mannheim} verwendet.
|
Um die Dateigrösse des Graph zu reduzieren wurde \texttt{thm}, statt \texttt{th-mannheim} verwendet.
|
||||||
|
|
||||||
\begin{figure}[H]
|
\begin{figure}[H]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=0.8\columnwidth]{bilder/example_part_number.png}
|
\includegraphics[width=0.8\columnwidth]{bilder/example_part_number.png}
|
||||||
|
|
@ -235,9 +237,36 @@ WHERE {
|
||||||
?minifig rdfs:label ?name.
|
?minifig rdfs:label ?name.
|
||||||
}
|
}
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
Welche Minifiguren kommen am häufigsten vor?
|
||||||
|
\begin{verbatim}
|
||||||
|
SELECT
|
||||||
|
(SUM(?quantity) as ?sum) ?minifig ?name
|
||||||
|
WHERE {
|
||||||
|
?minifig rdf:type thm:Minifig.
|
||||||
|
?minifig_inv rdf:type thm:MinifigInv.
|
||||||
|
?minifig thm:belongs ?minifig_inv.
|
||||||
|
?minifig_inv thm:quantity ?quantity.
|
||||||
|
?minifig rdfs:label ?name.
|
||||||
|
}
|
||||||
|
GROUP BY ?minifig ?name
|
||||||
|
ORDER BY DESC(?sum)
|
||||||
|
\end{verbatim}
|
||||||
|
Durchschnittliche Anzahl an Teilen je Set gruppiert nach Jahren
|
||||||
|
\begin{verbatim}
|
||||||
|
SELECT ?year (AVG(?part_count) as ?avgp)
|
||||||
|
WHERE {
|
||||||
|
?set thm:year ?year.
|
||||||
|
?set thm:num_parts ?part_count.
|
||||||
|
}
|
||||||
|
GROUP BY ?year
|
||||||
|
ORDER BY DESC(?avgp
|
||||||
|
)
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
\subsection{Abdeckung}
|
\subsection{Abdeckung}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Konsistenz}
|
\subsection{Konsistenz}
|
||||||
|
|
||||||
\subsection{Qualität}
|
\subsection{Qualität}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 44,
|
"execution_count": null,
|
||||||
"id": "ad994162",
|
"id": "ad994162",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
|
@ -14,8 +14,7 @@
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"import random\n",
|
"import random\n",
|
||||||
"import re\n",
|
"import re"
|
||||||
"import pprint"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -30,44 +29,44 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 103,
|
||||||
"id": "a5daea73",
|
"id": "6d109e8a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with open(\"./data/merlin/others.csv\", mode=\"w+\", encoding=\"utf8\", newline=\"\") as producerfile:\n",
|
"id_to_name = dict()\n",
|
||||||
" writer = csv.writer(producerfile)\n",
|
"for producer in producers:\n",
|
||||||
" writer.writerow([\"id\", \"producer\", \"name\", \"size\", \"parts\", \"year\"])\n",
|
" with open(f\"../data/merlin/{producer}.json\", mode=\"r\", encoding=\"utf8\") as prodfile:\n",
|
||||||
" for producer in producers:\n",
|
" listings = json.load(prodfile)[\"data\"]\n",
|
||||||
" with open(f\"data/merlin/{producer}.json\", mode=\"r\", encoding=\"utf8\") as sourcefile:\n",
|
|
||||||
" data = json.loads(sourcefile.read())\n",
|
|
||||||
" for row in data[\"data\"]:\n",
|
|
||||||
" _, id, _, name, rating, _, _, size, parts, year, _ = row\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" writer.writerow([id, producer, name, size, parts, year])"
|
" for listing in listings:\n",
|
||||||
|
" name = listing[3] \n",
|
||||||
|
" id = listing[1]\n",
|
||||||
|
"\n",
|
||||||
|
" id_to_name[id] = name"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 11,
|
||||||
"id": "ab997198",
|
"id": "ab997198",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# uvp preise bestimmen :(\n",
|
"# uvp preise bestimmen :(\n",
|
||||||
"def get_all_ids() -> list[str]:\n",
|
"def get_all_ids() -> list[str]:\n",
|
||||||
" df = pd.read_csv(\"./data/merlin/others.csv\")\n",
|
" df = pd.read_csv(\"../data/merlin/others.csv\")\n",
|
||||||
" return df[\"id\"].to_list()"
|
" return df[\"id\"].to_list()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 12,
|
||||||
"id": "32b1fa46",
|
"id": "32b1fa46",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with open(\"./data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\n",
|
"with open(\"../data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\n",
|
||||||
" for idx, id in enumerate(get_all_ids()[3663:]):\n",
|
" for idx, id in enumerate(get_all_ids()[3663:]):\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" small_id = id.lower()\n",
|
" small_id = id.lower()\n",
|
||||||
|
|
@ -98,7 +97,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 13,
|
||||||
"id": "4a10a1e3",
|
"id": "4a10a1e3",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
|
@ -118,7 +117,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 14,
|
||||||
"id": "9c00f188",
|
"id": "9c00f188",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
|
@ -167,79 +166,79 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 176,
|
"execution_count": 85,
|
||||||
"id": "ae53869e",
|
"id": "9b44a0e5",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"{'Listenpreis:': '14.95 EUR (7.3 ct/Teil) (7.9 ct/g)',\n",
|
|
||||||
" 'DetailsVon:': 'BlueBrixx',\n",
|
|
||||||
" 'EAN:': '4060904003671',\n",
|
|
||||||
" 'Steine von:': 'Qunlong',\n",
|
|
||||||
" 'Kategorie:': 'EisenbahnHersteller-',\n",
|
|
||||||
" 'Kategorien:': 'BBSpecial, BRIX',\n",
|
|
||||||
" 'Anleitung': 'Ohne Bauabschnitte',\n",
|
|
||||||
" 'Bewertungen': 'Bewerten',\n",
|
|
||||||
" 'Hersteller-Videos': 'video-1',\n",
|
|
||||||
" 'Inhalt': '205 Teile',\n",
|
|
||||||
" 'Gewicht': ': 190 g',\n",
|
|
||||||
" 'Keine Aufkleber': '',\n",
|
|
||||||
" 'Keine Drucke': '',\n",
|
|
||||||
" 'Farbverteilung': '',\n",
|
|
||||||
" 'TeilelistenBrickLink': 'XMLRebrickable CSVLEGO PaB CSVSetDB CSV',\n",
|
|
||||||
" 'PreiseListenpreis:': '14.95 EUR (7.3 ct/Teil) (7.9 ct/g)'}\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"details = {\n",
|
"def rm_epsilon(l : list[str]) ->list[str]:\n",
|
||||||
" \"id\" : [],\n",
|
" return list(filter(lambda s : len(s) > 0, l))"
|
||||||
" \"listprice_eur\" : [],\n",
|
|
||||||
" \"listprice_cn\" : [],\n",
|
|
||||||
" \"listprice_usd\" : [],\n",
|
|
||||||
" \"bestprice_eur\" : [],\n",
|
|
||||||
" \"bestprice_cn\" : [],\n",
|
|
||||||
" \"bestprice_usd\" : [],\n",
|
|
||||||
" \"brand\" : [],\n",
|
|
||||||
" \"ean\" : [],\n",
|
|
||||||
" \"producer\" : [],\n",
|
|
||||||
" \"release\" : [],\n",
|
|
||||||
" \"scale\" : [],\n",
|
|
||||||
" \"category\" : [],\n",
|
|
||||||
" \"producer_category\" : [],\n",
|
|
||||||
" \"num_parts\" : [],\n",
|
|
||||||
" \"width\" : [],\n",
|
|
||||||
" \"height\" : [],\n",
|
|
||||||
" \"depth\" : [],\n",
|
|
||||||
" \"designer\" : [],\n",
|
|
||||||
" \"weight\" : [],\n",
|
|
||||||
" \"age\" : []\n",
|
|
||||||
"}\n",
|
|
||||||
"import random\n",
|
|
||||||
"\n",
|
|
||||||
"me_details = pd.DataFrame(details)\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"../data/merlin/prices.csv\", mode=\"r\", encoding=\"utf8\") as price_file:\n",
|
|
||||||
" reader = csv.reader(price_file)\n",
|
|
||||||
"\n",
|
|
||||||
" # for row in reader:\n",
|
|
||||||
" # id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd, *other = row\n",
|
|
||||||
" \n",
|
|
||||||
" # me_details.loc[-1] = [id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd] + list(range(0, 12))\n",
|
|
||||||
" # me_details.index = me_details.index + 1\n",
|
|
||||||
" id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd, *other = [row for row in reader][random.randint(0, 4500)]\n",
|
|
||||||
" other = filter(lambda s: not \"Wikipedia\" in s, other)\n",
|
|
||||||
"\n",
|
|
||||||
" pprint.pp(split_by_keywords(\"\".join(other), keywords))\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 40,
|
"execution_count": 110,
|
||||||
"id": "b83aa413",
|
"id": "ae53869e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"me_details = pd.DataFrame({\n",
|
||||||
|
" \"id\" : [],\n",
|
||||||
|
" \"name\" : [],\n",
|
||||||
|
" \"price_eur\" : [],\n",
|
||||||
|
" \"price_cn\" : [],\n",
|
||||||
|
" \"price_us\" : [],\n",
|
||||||
|
" \"brand\" : [],\n",
|
||||||
|
" \"ean\" : [],\n",
|
||||||
|
" \"producer\" : [],\n",
|
||||||
|
" \"release\" : [],\n",
|
||||||
|
" \"category\" : [],\n",
|
||||||
|
" \"producer_category\" : [],\n",
|
||||||
|
" \"num_parts\" : [],\n",
|
||||||
|
" })\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"../data/merlin/prices.csv\", mode=\"r\", encoding=\"utf8\") as price_file:\n",
|
||||||
|
" reader = csv.reader(price_file)\n",
|
||||||
|
"\n",
|
||||||
|
" for row in reader:\n",
|
||||||
|
" id, lp_eur, lp_cn, lp_usd, bp_eur, bp_cn, bp_usd, *other = row\n",
|
||||||
|
" other = filter(lambda s: not \"Wikipedia\" in s, other)\n",
|
||||||
|
"\n",
|
||||||
|
" retrieved = split_by_keywords(\"\".join(other), keywords)\n",
|
||||||
|
"\n",
|
||||||
|
" brand = retrieved.get(\"DetailsVon:\", \"\")\n",
|
||||||
|
" ean = retrieved.get(\"EAN:\", \"\")\n",
|
||||||
|
" producer = retrieved.get(\"Steine von:\", \"\")\n",
|
||||||
|
" age = retrieved.get(\"Altersempfehlung:\", \"\")\n",
|
||||||
|
" release = retrieved.get(\"Release:\", \"\").split(\" \")[-1]\n",
|
||||||
|
" num_parts = retrieved.get(\"Inhalt\", \"\").split(\"Teile\")[0].strip()\n",
|
||||||
|
" category = retrieved.get(\"Kategorie:\", \"\").strip().split(\",\")\n",
|
||||||
|
" categories = \",\".join(rm_epsilon(retrieved.get(\"Kategorien:\", \"\") .split(\",\") + category)).replace(\"Hersteller\", \"\")\n",
|
||||||
|
" producer_category = retrieved.get(\"Hersteller-Kategorie:\", \"\").split(\",\")\n",
|
||||||
|
" producer_categories = \",\".join(rm_epsilon(retrieved.get(\"Hersteller-Kategorien:\", \"\").split(\",\") + producer_category))\n",
|
||||||
|
"\n",
|
||||||
|
" me_extra = pd.DataFrame({\n",
|
||||||
|
" \"id\" : [id],\n",
|
||||||
|
" \"name\" : [id_to_name.get(id, \"\")],\n",
|
||||||
|
" \"price_eur\" : [lp_eur],\n",
|
||||||
|
" \"price_us\" : [lp_usd],\n",
|
||||||
|
" \"price_cn\" : [lp_cn],\n",
|
||||||
|
" \"brand\" : [brand],\n",
|
||||||
|
" \"ean\" : [ean],\n",
|
||||||
|
" \"producer\" : [producer],\n",
|
||||||
|
" \"release\" : [release],\n",
|
||||||
|
" \"category\" : [categories],\n",
|
||||||
|
" \"producer_category\" : [producer_categories],\n",
|
||||||
|
" \"num_parts\" : [num_parts],\n",
|
||||||
|
" })\n",
|
||||||
|
"\n",
|
||||||
|
" me_details = pd.concat([me_details, me_extra])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 111,
|
||||||
|
"id": "1b5bcea6",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
|
@ -264,43 +263,234 @@
|
||||||
" <tr style=\"text-align: right;\">\n",
|
" <tr style=\"text-align: right;\">\n",
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
" <th>id</th>\n",
|
" <th>id</th>\n",
|
||||||
" <th>listprice_eur</th>\n",
|
" <th>name</th>\n",
|
||||||
" <th>listprice_cn</th>\n",
|
" <th>price_eur</th>\n",
|
||||||
" <th>listprice_usd</th>\n",
|
" <th>price_cn</th>\n",
|
||||||
" <th>bestprice_eur</th>\n",
|
" <th>price_us</th>\n",
|
||||||
" <th>bestprice_cn</th>\n",
|
|
||||||
" <th>bestprice_usd</th>\n",
|
|
||||||
" <th>brand</th>\n",
|
" <th>brand</th>\n",
|
||||||
" <th>ean</th>\n",
|
" <th>ean</th>\n",
|
||||||
" <th>producer</th>\n",
|
" <th>producer</th>\n",
|
||||||
" <th>...</th>\n",
|
" <th>release</th>\n",
|
||||||
" <th>scale</th>\n",
|
|
||||||
" <th>category</th>\n",
|
" <th>category</th>\n",
|
||||||
" <th>producer_category</th>\n",
|
" <th>producer_category</th>\n",
|
||||||
" <th>num_parts</th>\n",
|
" <th>num_parts</th>\n",
|
||||||
" <th>width</th>\n",
|
|
||||||
" <th>height</th>\n",
|
|
||||||
" <th>depth</th>\n",
|
|
||||||
" <th>designer</th>\n",
|
|
||||||
" <th>weight</th>\n",
|
|
||||||
" <th>age</th>\n",
|
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </thead>\n",
|
" </thead>\n",
|
||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>BB-108899</td>\n",
|
||||||
|
" <td>Die drei ??? - Kids - Einbruch im Leuchtturm</td>\n",
|
||||||
|
" <td>99.95</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>BlueBrixx</td>\n",
|
||||||
|
" <td>4060904014783</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>2026</td>\n",
|
||||||
|
" <td>BBPlay, The Three Investigators</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>1393</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>BB-108899</td>\n",
|
||||||
|
" <td>Die drei ??? - Kids - Einbruch im Leuchtturm</td>\n",
|
||||||
|
" <td>99.95</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>BlueBrixx</td>\n",
|
||||||
|
" <td>4060904014783</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>2026</td>\n",
|
||||||
|
" <td>BBPlay, The Three Investigators</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>1393</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>BB-108569</td>\n",
|
||||||
|
" <td>Fledermaus</td>\n",
|
||||||
|
" <td>29.95</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>BlueBrixx</td>\n",
|
||||||
|
" <td>4060904023020</td>\n",
|
||||||
|
" <td>Xingbao</td>\n",
|
||||||
|
" <td>2026</td>\n",
|
||||||
|
" <td>Tiere</td>\n",
|
||||||
|
" <td>BBPro</td>\n",
|
||||||
|
" <td>579</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>BB-109262</td>\n",
|
||||||
|
" <td>1970er Sport Cabriolet schwarz</td>\n",
|
||||||
|
" <td>49.95</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>BlueBrixx</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>Qunlong</td>\n",
|
||||||
|
" <td>2026</td>\n",
|
||||||
|
" <td>Autos, Fahrzeuge</td>\n",
|
||||||
|
" <td>BBSpecial</td>\n",
|
||||||
|
" <td>1291</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>BB-109021</td>\n",
|
||||||
|
" <td>Mittelalterliche Steinbrücke</td>\n",
|
||||||
|
" <td>59.95</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>BlueBrixx</td>\n",
|
||||||
|
" <td>4060904022184</td>\n",
|
||||||
|
" <td>Qunlong</td>\n",
|
||||||
|
" <td>2026</td>\n",
|
||||||
|
" <td>Geschichte, Mittelalter</td>\n",
|
||||||
|
" <td>BBSpecial</td>\n",
|
||||||
|
" <td>1654</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>PANT-86219</td>\n",
|
||||||
|
" <td>My Own Swordsman™ Tavern Gate 武林外传</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>Pantasy</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>2023</td>\n",
|
||||||
|
" <td>China, Gebäude, Popkultur</td>\n",
|
||||||
|
" <td>My Own Swordsman</td>\n",
|
||||||
|
" <td>422</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>PANT-86220</td>\n",
|
||||||
|
" <td>My Own Swordsman™ Tong Fu Inn 武林外传</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>Pantasy</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>2023</td>\n",
|
||||||
|
" <td>China, Gebäude, Popkultur</td>\n",
|
||||||
|
" <td>My Own Swordsman</td>\n",
|
||||||
|
" <td>2000</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>PANT-61008</td>\n",
|
||||||
|
" <td>Retro 1960s Television</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>Pantasy</td>\n",
|
||||||
|
" <td>6973817320354</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>2022</td>\n",
|
||||||
|
" <td>Gegenstände</td>\n",
|
||||||
|
" <td>Retro Collection</td>\n",
|
||||||
|
" <td>1173</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>PANT-15007</td>\n",
|
||||||
|
" <td>Pink Rose</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>Pantasy</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>GoBricks</td>\n",
|
||||||
|
" <td>2024</td>\n",
|
||||||
|
" <td>Blumen, Pflanzen</td>\n",
|
||||||
|
" <td>Botanical World</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>PANT-86218</td>\n",
|
||||||
|
" <td>Sherlock Holmes™ 221B Baker Street</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>_</td>\n",
|
||||||
|
" <td>Pantasy</td>\n",
|
||||||
|
" <td>6973817320156</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>2022</td>\n",
|
||||||
|
" <td>Popkultur</td>\n",
|
||||||
|
" <td>Sherlock Holmes</td>\n",
|
||||||
|
" <td>1088</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
"<p>0 rows × 21 columns</p>\n",
|
"<p>4509 rows × 12 columns</p>\n",
|
||||||
"</div>"
|
"</div>"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"Empty DataFrame\n",
|
" id name price_eur \\\n",
|
||||||
"Columns: [id, listprice_eur, listprice_cn, listprice_usd, bestprice_eur, bestprice_cn, bestprice_usd, brand, ean, producer, release, scale, category, producer_category, num_parts, width, height, depth, designer, weight, age]\n",
|
"0 BB-108899 Die drei ??? - Kids - Einbruch im Leuchtturm 99.95 \n",
|
||||||
"Index: []\n",
|
"0 BB-108899 Die drei ??? - Kids - Einbruch im Leuchtturm 99.95 \n",
|
||||||
|
"0 BB-108569 Fledermaus 29.95 \n",
|
||||||
|
"0 BB-109262 1970er Sport Cabriolet schwarz 49.95 \n",
|
||||||
|
"0 BB-109021 Mittelalterliche Steinbrücke 59.95 \n",
|
||||||
|
".. ... ... ... \n",
|
||||||
|
"0 PANT-86219 My Own Swordsman™ Tavern Gate 武林外传 _ \n",
|
||||||
|
"0 PANT-86220 My Own Swordsman™ Tong Fu Inn 武林外传 _ \n",
|
||||||
|
"0 PANT-61008 Retro 1960s Television _ \n",
|
||||||
|
"0 PANT-15007 Pink Rose _ \n",
|
||||||
|
"0 PANT-86218 Sherlock Holmes™ 221B Baker Street _ \n",
|
||||||
"\n",
|
"\n",
|
||||||
"[0 rows x 21 columns]"
|
" price_cn price_us brand ean producer release \\\n",
|
||||||
|
"0 _ _ BlueBrixx 4060904014783 2026 \n",
|
||||||
|
"0 _ _ BlueBrixx 4060904014783 2026 \n",
|
||||||
|
"0 _ _ BlueBrixx 4060904023020 Xingbao 2026 \n",
|
||||||
|
"0 _ _ BlueBrixx Qunlong 2026 \n",
|
||||||
|
"0 _ _ BlueBrixx 4060904022184 Qunlong 2026 \n",
|
||||||
|
".. ... ... ... ... ... ... \n",
|
||||||
|
"0 _ _ Pantasy 2023 \n",
|
||||||
|
"0 _ _ Pantasy 2023 \n",
|
||||||
|
"0 _ _ Pantasy 6973817320354 2022 \n",
|
||||||
|
"0 _ _ Pantasy GoBricks 2024 \n",
|
||||||
|
"0 _ _ Pantasy 6973817320156 2022 \n",
|
||||||
|
"\n",
|
||||||
|
" category producer_category num_parts \n",
|
||||||
|
"0 BBPlay, The Three Investigators 1393 \n",
|
||||||
|
"0 BBPlay, The Three Investigators 1393 \n",
|
||||||
|
"0 Tiere BBPro 579 \n",
|
||||||
|
"0 Autos, Fahrzeuge BBSpecial 1291 \n",
|
||||||
|
"0 Geschichte, Mittelalter BBSpecial 1654 \n",
|
||||||
|
".. ... ... ... \n",
|
||||||
|
"0 China, Gebäude, Popkultur My Own Swordsman 422 \n",
|
||||||
|
"0 China, Gebäude, Popkultur My Own Swordsman 2000 \n",
|
||||||
|
"0 Gegenstände Retro Collection 1173 \n",
|
||||||
|
"0 Blumen, Pflanzen Botanical World \n",
|
||||||
|
"0 Popkultur Sherlock Holmes 1088 \n",
|
||||||
|
"\n",
|
||||||
|
"[4509 rows x 12 columns]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 40,
|
"execution_count": 111,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
|
@ -308,11 +498,21 @@
|
||||||
"source": [
|
"source": [
|
||||||
"me_details"
|
"me_details"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 112,
|
||||||
|
"id": "0fb65dec",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"me_details.to_csv(\"../data/merlin/others.csv\", index=False)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "venv (3.14.4)",
|
"display_name": "venv (3.12.3)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
|
@ -326,7 +526,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.14.4"
|
"version": "3.12.3"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue