116 lines
2.6 KiB
Plaintext
116 lines
2.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e27b62ab",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Brickset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "2adf9f0d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import requests as rq\n",
|
|
"import csv\n",
|
|
"import time\n",
|
|
"\n",
|
|
"session = rq.Session()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "9b76d900",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def brickset_page_url(theme):\n",
|
|
" return f\"https://brickset.com/sets/theme-{theme}\"\n",
|
|
"\n",
|
|
"download_url = \"https://brickset.com/exportscripts/sets/list\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "a7045435",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open(\"./data/brickset/themes.txt\", mode=\"r\", encoding=\"utf8\") as themesfile:\n",
|
|
" themes = themesfile.read().splitlines()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "18a3fe83",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def download_theme(theme : str, delay : int = 5):\n",
|
|
" session.get(url=brickset_page_url(theme))\n",
|
|
" time.sleep(delay)\n",
|
|
" response = session.get(url=download_url) \n",
|
|
" with open(f\"./data/brickset/{theme}.csv\", mode=\"w+\", encoding=\"utf8\") as themefile:\n",
|
|
" themefile.write(response.text)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"id": "a9f4554d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for idx, theme in enumerate(themes):\n",
|
|
" try:\n",
|
|
" download_theme(theme, delay=5)\n",
|
|
" except Exception as e:\n",
|
|
" print(e)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"id": "1b9d9cac",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for idx, theme in enumerate(themes):\n",
|
|
" with open(f\"./data/brickset/{theme}.csv\") as reader, open(f\"./data/brickset/{theme}.csv\", 'r+') as writer:\n",
|
|
" for line in reader:\n",
|
|
" if line.strip():\n",
|
|
" writer.write(line)\n",
|
|
" writer.truncate()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "venv (3.14.4)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.14.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|