120 lines
3.6 KiB
Plaintext
120 lines
3.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "ad994162",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import csv\n",
|
|
"import json\n",
|
|
"import requests as rq\n",
|
|
"import bs4\n",
|
|
"import pandas as pd\n",
|
|
"import time\n",
|
|
"import random"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b5536e8c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"producers = [\"bluebrixx\", \"cada\", \"cobi\", \"mouldking\", \"pantasy\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a5daea73",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"./data/merlin/others.csv\", mode=\"w+\", encoding=\"utf8\", newline=\"\") as producerfile:\n",
|
|
" writer = csv.writer(producerfile)\n",
|
|
" writer.writerow([\"id\", \"producer\", \"name\", \"size\", \"parts\", \"year\"])\n",
|
|
" for producer in producers:\n",
|
|
" with open(f\"data/merlin/{producer}.json\", mode=\"r\", encoding=\"utf8\") as sourcefile:\n",
|
|
" data = json.loads(sourcefile.read())\n",
|
|
" for row in data[\"data\"]:\n",
|
|
" _, id, _, name, rating, _, _, size, parts, year, _ = row\n",
|
|
"\n",
|
|
" writer.writerow([id, producer, name, size, parts, year])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "ab997198",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# uvp preise bestimmen :(\n",
|
|
"def get_all_ids() -> list[str]:\n",
|
|
" df = pd.read_csv(\"./data/merlin/others.csv\")\n",
|
|
" return df[\"id\"].to_list()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "32b1fa46",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"./data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\n",
|
|
" for idx, id in enumerate(get_all_ids()[3663:]):\n",
|
|
" try:\n",
|
|
" small_id = id.lower()\n",
|
|
"\n",
|
|
" response = rq.get(f\"https://www.merlinssteine.de/sets/{small_id}\")\n",
|
|
" soup = bs4.BeautifulSoup(response.text)\n",
|
|
"\n",
|
|
" # Prices\n",
|
|
" price_eur = soup.find(id=\"listprice_eur\")\n",
|
|
" price_usd = soup.find(id=\"listprice_usd\")\n",
|
|
" price_cn = soup.find(id=\"listprice_cn\")\n",
|
|
" bestprice_eur = soup.find(id=\"bestprice_eur\")\n",
|
|
" bestprice_usd = soup.find(id=\"bestprice_usd\")\n",
|
|
" bestprice_cn = soup.find(id=\"bestprice_cn\")\n",
|
|
"\n",
|
|
" all_prices = [price_eur, price_cn, price_usd, bestprice_eur, bestprice_cn, bestprice_usd]\n",
|
|
" \n",
|
|
" #categories\n",
|
|
" other_dump = [description.text.replace(\"\\n\", \"\") for description in soup.find_all(class_=\"setpage_ct\")]\n",
|
|
" writer = csv.writer(pricefile)\n",
|
|
" \n",
|
|
" all_prices = [p.text if p != None else \"_\" for p in all_prices]\n",
|
|
" writer.writerow([id, *all_prices, *other_dump])\n",
|
|
" time.sleep(random.randint(2, 3))\n",
|
|
" except Exception as e:\n",
|
|
" print(e)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "venv (3.14.4)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.14.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|