kgr/lego/lego_graph_merlin.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ad994162",
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import json\n",
    "import requests as rq\n",
    "import bs4\n",
    "import pandas as pd\n",
    "import time\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5536e8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "producers = [\"bluebrixx\", \"cada\", \"cobi\", \"mouldking\", \"pantasy\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5daea73",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./data/merlin/others.csv\", mode=\"w+\", encoding=\"utf8\", newline=\"\") as producerfile:\n",
    "    writer = csv.writer(producerfile)\n",
    "    writer.writerow([\"id\", \"producer\", \"name\", \"size\", \"parts\", \"year\"])\n",
    "    for producer in producers:\n",
    "        with open(f\"data/merlin/{producer}.json\", mode=\"r\", encoding=\"utf8\") as sourcefile:\n",
    "            data = json.loads(sourcefile.read())\n",
    "            for row in data[\"data\"]:\n",
    "                _, id, _, name, rating, _, _, size, parts, year, _ = row\n",
    "\n",
    "                writer.writerow([id, producer, name, size, parts, year])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ab997198",
   "metadata": {},
   "outputs": [],
   "source": [
    "# uvp preise bestimmen :(\n",
    "def get_all_ids() -> list[str]:\n",
    "    df = pd.read_csv(\"./data/merlin/others.csv\")\n",
    "    return df[\"id\"].to_list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "32b1fa46",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./data/merlin/prices.csv\", mode=\"a+\", encoding=\"utf8\", newline=\"\") as pricefile:\n",
    "    for idx, id in enumerate(get_all_ids()[3663:]):\n",
    "        try:\n",
    "            small_id = id.lower()\n",
    "\n",
    "            response = rq.get(f\"https://www.merlinssteine.de/sets/{small_id}\")\n",
    "            soup = bs4.BeautifulSoup(response.text)\n",
    "\n",
    "            # Prices\n",
    "            price_eur = soup.find(id=\"listprice_eur\")\n",
    "            price_usd = soup.find(id=\"listprice_usd\")\n",
    "            price_cn = soup.find(id=\"listprice_cn\")\n",
    "            bestprice_eur = soup.find(id=\"bestprice_eur\")\n",
    "            bestprice_usd = soup.find(id=\"bestprice_usd\")\n",
    "            bestprice_cn = soup.find(id=\"bestprice_cn\")\n",
    "\n",
    "            all_prices = [price_eur, price_cn, price_usd, bestprice_eur, bestprice_cn, bestprice_usd]\n",
    "            \n",
    "            #categories\n",
    "            other_dump = [description.text.replace(\"\\n\", \"\") for description in soup.find_all(class_=\"setpage_ct\")]\n",
    "            writer = csv.writer(pricefile)\n",
    "            \n",
    "            all_prices = [p.text if p != None else \"_\" for p in all_prices]\n",
    "            writer.writerow([id, *all_prices, *other_dump])\n",
    "            time.sleep(random.randint(2, 3))\n",
    "        except Exception as e:\n",
    "            print(e)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv (3.14.4)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.14.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}