{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "ad994162", "metadata": {}, "outputs": [], "source": [ "import csv\n", "import json\n", "import requests as rq\n", "import bs4\n", "import pandas as pd\n", "import time\n", "import random" ] }, { "cell_type": "code", "execution_count": null, "id": "b5536e8c", "metadata": {}, "outputs": [], "source": [ "producers = [\"bluebrixx\", \"cada\", \"cobi\", \"mouldking\", \"pantasy\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "a5daea73", "metadata": {}, "outputs": [], "source": [ "with open(\"./data/merlin/others.csv\", mode=\"w+\", encoding=\"utf8\", newline=\"\") as producerfile:\n", " writer = csv.writer(producerfile)\n", " writer.writerow([\"id\", \"producer\", \"name\", \"size\", \"parts\", \"year\"])\n", " for producer in producers:\n", " with open(f\"data/merlin/{producer}.json\", mode=\"r\", encoding=\"utf8\") as sourcefile:\n", " data = json.loads(sourcefile.read())\n", " for row in data[\"data\"]:\n", " _, id, _, name, rating, _, _, size, parts, year, _ = row\n", "\n", " writer.writerow([id, producer, name, size, parts, year])" ] }, { "cell_type": "code", "execution_count": 4, "id": "ab997198", "metadata": {}, "outputs": [], "source": [ "# uvp preise bestimmen :(\n", "def get_all_ids() -> list[str]:\n", " df = pd.read_csv(\"./data/merlin/others.csv\")\n", " return df[\"id\"].to_list()" ] }, { "cell_type": "code", "execution_count": 7, "id": "32b1fa46", "metadata": {}, "outputs": [], "source": [ "with open(\"./data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\n", " for idx, id in enumerate(get_all_ids()[3663:]):\n", " try:\n", " small_id = id.lower()\n", "\n", " response = rq.get(f\"https://www.merlinssteine.de/sets/{small_id}\")\n", " soup = bs4.BeautifulSoup(response.text)\n", "\n", " # Prices\n", " price_eur = soup.find(id=\"listprice_eur\")\n", " price_usd = soup.find(id=\"listprice_usd\")\n", " price_cn = soup.find(id=\"listprice_cn\")\n", " bestprice_eur = soup.find(id=\"bestprice_eur\")\n", " bestprice_usd = soup.find(id=\"bestprice_usd\")\n", " bestprice_cn = soup.find(id=\"bestprice_cn\")\n", "\n", " all_prices = [price_eur, price_cn, price_usd, bestprice_eur, bestprice_cn, bestprice_usd]\n", " \n", " #categories\n", " other_dump = [description.text.replace(\"\\n\", \"\") for description in soup.find_all(class_=\"setpage_ct\")]\n", " writer = csv.writer(pricefile)\n", " \n", " all_prices = [p.text if p != None else \"_\" for p in all_prices]\n", " writer.writerow([id, *all_prices, *other_dump])\n", " time.sleep(random.randint(2, 3))\n", " except Exception as e:\n", " print(e)" ] } ], "metadata": { "kernelspec": { "display_name": "venv (3.14.4)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.4" } }, "nbformat": 4, "nbformat_minor": 5 }