commit 42121be2c5b9f7ad86a420f51837619a0399efd3 Author: Christoph Giess Date: Mon Oct 10 11:13:41 2022 +0200 Schlüsselqulifikation Data Science mit Python diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..73003a7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# --> Temporary files +privat +temp +*.swp +*~ + +# ---> Latex +*.aux +*.log +*.nav +*.out +*.snm +*.toc +*.vrb +*.synctex.gz + +# ---> Mercurial +.hg/ +.hgignore +.hgsigs +.hgsub +.hgsubstate +.hgtags + +# ---> Dart +# Don’t commit the following directories created by pub. +.buildlog +.pub/ +.dart_tool/ +build/ +packages +.packages + +# Or the files created by dart2js. +*.dart.js +*.js_ +*.js.deps +*.js.map + +# Include when developing application packages. +pubspec.lock + +# Mac file +.DS_Store + +# IntelliJ related +*.iml +*.ipr +*.iws +.idea/ diff --git a/README.adoc b/README.adoc new file mode 100644 index 0000000..7fbc9b8 --- /dev/null +++ b/README.adoc @@ -0,0 +1,37 @@ += Digital Basics: Data Science mit Python + +== Ziele +Wir werden gemeinsam, +* lernen, wie man Daten automatisiert auswerten kann +* statistischen Verfahren dafür kennen lernen +* Visualisierungen zum besseren Verständnis von Daten nutzen +* die Programmiersprache Python und deren Bibliotheken besser kennen lernen + +== Zielgruppe + +Das Seminar richtet sich an +* alle, die an „Digital Basics: Einführung in die Programmierung mit Python“ teilgenommen haben +* sowie Personen, die ein bisschen Python programmieren können +und die lernen möchten, wie man mit Hilfe von Python Daten auswerten kann. + +== Inhalte + +* Daten aus Dateien einlesen +* Grundlegende statistische Funktionen +* Verwenden von externen Bibliotheken zur Datenanalyse +* Visualisierung von Daten + +== Methoden + +* Vortrag +* Übungen +* individuelles Feedback / Support +* Vorstellung der Ergebnisse in der Gruppe +* Diskussion + + +*Seminardauer:* 5 UE + +*Teilnehmerzahl:* 6-20 + +*Referent:* Christoph Giess, MARS diff --git a/jupyter_book/01_wiederholung.ipynb b/jupyter_book/01_wiederholung.ipynb new file mode 100644 index 0000000..3ba8ff3 --- /dev/null +++ b/jupyter_book/01_wiederholung.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a44722c-9586-4418-ad7f-8a3c964a9db8", + "metadata": {}, + "source": [ + "# Wiederholung\n", + "\n", + "Um alle Teilnehmer auf den gleichen Stand zu bringen fangen wir dort an, wo der erste Kurs geendet hat." + ] + }, + { + "cell_type": "markdown", + "id": "5246ab8c-2b98-48f0-9dc5-a36d99ecff60", + "metadata": {}, + "source": [ + "## Packages, Funktionen, Arrays und Rechnen" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c04b1fca-a1f5-4ba2-90a8-216c3cc41556", + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "random.seed(42)\n", + "\n", + "def rand_numbers(n, start, end):\n", + " \"\"\" Erzeugt einen Array mit Zufallszahlen mit 2 Nachkommastellen\n", + " n\n", + " Anzahl der erzeugten Zufallszahlen\n", + " \n", + " start\n", + " Kleinste mögliche Zahl (inklusiv)\n", + " \n", + " end\n", + " Größte mögliche Zahl (exklusiv)\n", + " \"\"\"\n", + " digits = 2;\n", + " factor = 10 ** digits;\n", + " result = []\n", + " for i in range(n):\n", + " result.append(random.randint(start * factor, end * factor) / factor)\n", + " \n", + " return result " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ff484198-3f23-439c-a17d-790e24d74c7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[-0.13, -0.29, -0.61, -0.45, 0.95, -0.14, -0.74, -0.77, -0.03, -0.76]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rand_numbers(10, -1, 1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter_book/02_mehr_zu_funktionen.ipynb b/jupyter_book/02_mehr_zu_funktionen.ipynb new file mode 100644 index 0000000..4d5ca9c --- /dev/null +++ b/jupyter_book/02_mehr_zu_funktionen.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a44722c-9586-4418-ad7f-8a3c964a9db8", + "metadata": {}, + "source": [ + "# Mehr zu Funktionen" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c04b1fca-a1f5-4ba2-90a8-216c3cc41556", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "import random" + ] + }, + { + "cell_type": "markdown", + "id": "4679037e-31c3-407f-8b52-7fa086b0f238", + "metadata": {}, + "source": [ + "## Default-Werte\n", + "\n", + "An der Funktion ist unschön, dass sie die Zufallszahlen immer mit 2 Nachkommastellen zurück gibt.\n", + "\n", + "In den meisten Fällen ist das OK aber manchmal möchte ich weniger oder auch mehr Nachkommastellen.\n", + "Dies lässt sich problemlos mit einem weiteren Parameter realisieren. Dem kann man sogar einen Default-Wert geben." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d29042ce-7023-45f2-a389-e73d4ddfd7e8", + "metadata": {}, + "outputs": [], + "source": [ + "def rand_numbers(n, start, end, digits=2):\n", + " \"\"\" Erzeugt einen Array mit Zufallszahlen mit 2 Nachkommastellen\n", + " n\n", + " Anzahl der erzeugten Zufallszahlen\n", + " \n", + " start\n", + " Kleinste mögliche Zahl (inklusiv)\n", + " \n", + " end\n", + " Größte mögliche Zahl (exklusiv)\n", + " \n", + " digits\n", + " Anzahl Nachkommastellen, Default: 2\n", + " \"\"\"\n", + " factor = 10 ** digits;\n", + " result = []\n", + " for i in range(n):\n", + " result.append(random.randint(start * factor, end * factor) / factor)\n", + " \n", + " return result " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3f7b55f4-d79b-4df0-adb2-2a1664abc65b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[13.2, 11.47, 2.19, 1.47, 2.6]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rand_numbers(5, 1, 20)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "89b767d1-4305-492b-984f-960b322e89a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[6.8, 12.5, 14.6, 10.0, 2.1]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rand_numbers(5, 1, 20, 1)" + ] + }, + { + "cell_type": "markdown", + "id": "3c19d60f-948b-46f7-a910-5573ac1131d8", + "metadata": {}, + "source": [ + "## Benannte Parameter\n", + "\n", + "Bei `math.sin(math.radians(45))` kann man verstehen, was die Funktionen tun und was `45` bedeutet.\n", + "\n", + "Bei `rand_numbers(5, 1, 20, 1)`, ist das ohne Dokumentation nicht mehr möglich.\n", + "Um Code verständlicher zu machen können die Parameter von Funktionen beim Aufruf benannt werden." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e831c3a7-d8ef-4d0a-8a1d-9a41db33d9c4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[2.5, 6.0, 15.9, 16.3, 3.9]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rand_numbers(n=5, start=1, end=20, digits=1)" + ] + }, + { + "cell_type": "markdown", + "id": "e4f2a14d-4698-47a3-8440-29bf12e7150d", + "metadata": {}, + "source": [ + "Die Reihenfolge ist beliebig." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7a3b4b92-19bc-4986-b87b-c9530fa8b60a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[15.5, 8.7, 14.4, 10.3, 11.7]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rand_numbers(end=20, start=1, digits=1, n=5)" + ] + }, + { + "cell_type": "markdown", + "id": "d798fca4-38b1-4e7c-86c9-88209728f611", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "Was passiert, wenn man einzelne Parameter weglässt?" + ] + }, + { + "cell_type": "markdown", + "id": "dc71edeb-7f06-4609-938f-18b6d7619dba", + "metadata": {}, + "source": [ + "## Lösung\n", + "Man kann nur `digits` weglassen, weil dies einen Default-Wert hat. Alle anderen Parameter müssen angegeben werden." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter_book/03_matplotlib.ipynb b/jupyter_book/03_matplotlib.ipynb new file mode 100644 index 0000000..9bf29eb --- /dev/null +++ b/jupyter_book/03_matplotlib.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a44722c-9586-4418-ad7f-8a3c964a9db8", + "metadata": {}, + "source": [ + "# Grafiken" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d29042ce-7023-45f2-a389-e73d4ddfd7e8", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "import random\n", + "def rand_numbers(n, start, end, digits=2):\n", + " \"\"\" Erzeugt einen Array mit Zufallszahlen mit 2 Nachkommastellen\n", + " n\n", + " Anzahl der erzeugten Zufallszahlen\n", + " \n", + " start\n", + " Kleinste mögliche Zahl (inklusiv)\n", + " \n", + " end\n", + " Größte mögliche Zahl (exklusiv)\n", + " \n", + " digits\n", + " Anzahl Nachkommastellen, Default: 2\n", + " \"\"\"\n", + " factor = 10 ** digits\n", + " result = []\n", + " for i in range(n):\n", + " result.append(random.randint(start * factor, end * factor) / factor)\n", + " \n", + " return result " + ] + }, + { + "cell_type": "markdown", + "id": "ab90ad44-8536-4445-8674-a451a9a4971f", + "metadata": {}, + "source": [ + "## Matplotlib\n", + "\n", + "Bisher haben wir nur mit Zahlen ein Texten gearbeitet.\n", + "Dafür haben wir die Packages `math` und `random` verwendet.\n", + "\n", + "Jetzt möchten wir Grafiken erzeugen. Dazu benötigen wir ein weiteres Package: `mathplotlib`,\n", + "genauer gesagt, davon erst einmal nur den Teil `pyplot`.\n", + "\n", + "Um uns Tipparbeit zu sparen sagen wir beim `import`, dass wir im Folgenden dieses Package `plt` nennen möchten." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "89b767d1-4305-492b-984f-960b322e89a6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "numbers = rand_numbers(50, -100, 100)\n", + "plt.plot(numbers);\n", + "plt.show() # bei manchen Jupyter-Versionen nicht nötig, einfach mal ohne testen" + ] + }, + { + "cell_type": "markdown", + "id": "fa5231c5-7b2a-4d02-a0e3-69a8ee78d930", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "Erklärt,\n", + "1. was das Programm tut\n", + "2. was auf der Grafik zu sehen ist\n", + "3. wozu das Semikolon in der vorletzten Zeile dient" + ] + }, + { + "cell_type": "markdown", + "id": "486f4398-0d21-411d-8f91-e57fc925d2b8", + "metadata": {}, + "source": [ + "## Lösung\n", + "1. Was tut Programm?\n", + " - es importiert die Bibliothek `mathplotlib.pyplot`. Diese enthält Funktionen zum Zeichnen von Grafiken\n", + " - diese Bibliothek nennen wir `plt` weil das schneller zu schreibenist als `mathplotlib.pyplot`\n", + " - 50 Zufallszahlen zwischen -100 und 100 erzeugt und diese im Array mit dem Namen `numbers` speichern\n", + " - die Zufallszahlen in `numbers` zeichnen\n", + "1. Was ist auf Grafik zu sehen?\n", + " - Der Wert der Zufallszahlen ist auf der y-Achse\n", + " - Die 0te Zufallszahl ist auf der x-Achse bei x=0, die 1te Zufallszahl bei x=1 usw.\n", + "1. Die Funktion plot() zeichnet die Grafik und liefert zusätzlich noch ein Ergebnis zurück. Das Semikolon sorgt dafür, dass das Ergebnis nicht angezeigt wird." + ] + }, + { + "cell_type": "markdown", + "id": "5feb449e-76c4-4ea9-b0e2-4912f100cc39", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "Wie kann man erkennen, wie gleichmäßig die Zufallszahlen verteilt sind?" + ] + }, + { + "cell_type": "markdown", + "id": "7d02f67c-6395-42be-988d-e29d182d0bd5", + "metadata": {}, + "source": [ + "## Lösung\n", + "Die Zufallszahlen der Größe nach sortieren.\n", + "Je gleichmäßiger sie verteilt sind desto gerader ist die Linie im Graphen." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c3cd108d-ec9f-4570-b604-b18c07887f95", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "numbers.sort()\n", + "plt.plot(numbers);\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "4866c521-8484-413b-80af-7bb129a5c2c5", + "metadata": {}, + "source": [ + "Das sieht schon nicht schlecht aus. Abweichungen von der Gerade sind aber deutlich zu sehen." + ] + }, + { + "cell_type": "markdown", + "id": "2f2ddb2c-d63e-4ee7-b5c1-1c148eea8787", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "Welche Stelle im Programm muss geändert werden damit der erzeugte Graph viel näher an einer Gerade ist?" + ] + }, + { + "cell_type": "markdown", + "id": "7cc38c4b-8ff6-4549-9a26-df42bad9e42f", + "metadata": {}, + "source": [ + "## Lösung\n", + "Nach dem Gesetz der großen Zahlen müsste die Abweichung von der Gerade kleiner werden wenn man mehr Zufallszahlen zieht." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "aee05f91-0bbd-4c9e-a972-ff305167c974", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "numbers = rand_numbers(10000, -100, 100)\n", + "numbers.sort()\n", + "plt.plot(numbers);\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "fcdd2a03-14b2-4424-a0a0-5bc1e73017ae", + "metadata": {}, + "source": [ + "## Matplotlib Dokumentation\n", + "\n", + "Dokumentation zu Matplotlib und anderen Python Packages findet man im unteren Teil des Hilfe-Menüs.\n", + "\n", + "![jupyter_help.png](jupyter_help.png)\n", + "\n", + "Falls diese fehlen:\n", + "* matplotlib: https://matplotlib.org/stable/api/index.html\n", + "* pandas: https://pandas.pydata.org/docs/reference/index.html" + ] + }, + { + "cell_type": "markdown", + "id": "95389da8-8526-4904-bc04-01a2104f1d1d", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "Erzeuge einen Graph mit Sinus und Cosinus-Funktion." + ] + }, + { + "cell_type": "markdown", + "id": "34fcc9ed-61b0-483a-ae79-fb5faa25ac9b", + "metadata": {}, + "source": [ + "## Lösung" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0c429b71-11b0-4a30-8710-61298f161185", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import math\n", + "\n", + "sins = []\n", + "coss = []\n", + "\n", + "for i in range(360):\n", + " sins.append(math.sin(math.radians(i)))\n", + " coss.append(math.cos(math.radians(i))) \n", + " \n", + "plt.plot(sins)\n", + "plt.plot(coss);\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "6dda6efa-5540-4659-8f66-afd18b697f5c", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "* Lest die Dokumentation zu `plt.plot` und verändert\n", + " * Farbe der Linie\n", + " * Dicke der Linie\n", + " * Beschriftung der x-Achse (Werte in Bogen- statt Gradmaß)\n", + "* Lest die Dokumentation zu `plot` und versucht den Graphen zu beschriften" + ] + }, + { + "cell_type": "markdown", + "id": "ba58416c-8104-468b-8a0b-88cf86e85879", + "metadata": {}, + "source": [ + "## Lösung" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9e65731d-e81f-491a-ace5-46eaa6e208e6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import math\n", + "\n", + "sins = []\n", + "coss = []\n", + "rads = []\n", + "\n", + "for i in range(360):\n", + " r = math.radians(i)\n", + " rads.append(r)\n", + " sins.append(math.sin(r))\n", + " coss.append(math.cos(r))\n", + "\n", + "plt.plot(rads, sins, color='violet', linewidth=2)\n", + "plt.plot(rads, coss, color='darkblue', linewidth=2)\n", + "\n", + "plt.title('sin(x) und cos(x)')\n", + "plt.xlabel('x von 0 bis 2π')\n", + "plt.ylabel('y')\n", + "plt.legend(['sin(x)', 'cos(x)']);\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter_book/04_pandas.ipynb b/jupyter_book/04_pandas.ipynb new file mode 100644 index 0000000..523c4a9 --- /dev/null +++ b/jupyter_book/04_pandas.ipynb @@ -0,0 +1,1262 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a44722c-9586-4418-ad7f-8a3c964a9db8", + "metadata": {}, + "source": [ + "# Daten analysieren\n", + "\n", + "Zufallszahlen und Sinuswerte zu visualisieren ist ganz nett, in der Realtität müssen aber Ergebnisse aus Umfragen oder Messwerte von Experimenten ausgewertet werden.\n", + "\n", + "Standard für die Datenanlyse mit Python ist das Package `pandas`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a5292e13-c487-4e3a-8085-21d80469a734", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas" + ] + }, + { + "cell_type": "markdown", + "id": "8a02c3e1-8ea1-475b-90f0-b3ea6141e4ba", + "metadata": {}, + "source": [ + "Um zu demonstrieren, wie `pandas` funktioniert, brauchen wir ein paar Daten. Die finden wir in der Datei `car.csv`, in der die Kosten eines Autos über mehrere Jahre hinweg erfasst wurden.\n", + "\n", + "CSV (Comma Separated Values) gibt es in verschiedenen Ausprägungen. In unserer Datei sind die Daten nicht mit Komma sondern einem Tabulator voneinander getrennt. Das muss man beim Einlesen mit `sep=\"\\t\"` angeben." + ] + }, + { + "cell_type": "markdown", + "id": "dc89267a-2993-4fb6-b225-99a233476f83", + "metadata": {}, + "source": [ + "## Hinweis\n", + "\n", + "Bei den folgenden Beispielen wird die Datei `car.csv` benötigt.\n", + "Diese findet sich [hier](https://gist.githubusercontent.com/cgiess/58f995bf88cd0e2269b634c7970eb479/raw/2de29b036ead1eabc402301e934b82e8f674aaf4/car.csv). Diese Datei muss auf den Jupyter-Rechner liegen. In der Regel kann man dies mittels `Upload Files` erreichen.\n", + "\n", + "![upload_file.png](upload_file.png)\n", + "\n", + "\n", + "Das `Upload Files` funktioniert aber bei https://jupyter.org/try-jupyter/lab/ nicht. Hier muss die Datei wie folgt bereitsgestellt werden (Stand Mai 2022):\n", + "\n", + "\n", + "```\n", + "from js import fetch\n", + "res = await fetch('https://gist.githubusercontent.com/cgiess/58f995bf88cd0e2269b634c7970eb479/raw/2de29b036ead1eabc402301e934b82e8f674aaf4/car.csv')\n", + "text = await res.text()\n", + "with open('car.csv', 'w') as f:\n", + " f.write(text)\n", + "\n", + "import os\n", + "os.listdir()\n", + "```\n", + "\n", + "Als Ergebnis sollte eine Liste mit Dateienamen angezeigt werden in der `car.csv` enthalten ist, z.B.:\n", + "\n", + "`['.matplotlib', '.ipython', 'car.csv']`" + ] + }, + { + "cell_type": "markdown", + "id": "f9c2b389-370c-4c43-b0f4-f05ed3910d88", + "metadata": {}, + "source": [ + "## CSV Dateien mit Pandas lesen" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bd02ed34-11f1-40ed-b3f0-7217a0acccef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiter
02012-07-07KaufAutohaus13800.0030NaN
12012-07-10BenzinESSO57.0119934.89
22012-07-11VersichHaftpfl.104.30400NaN
32012-07-23BenzinKaufland55.0382834.20
42012-08-10BenzinKaufland56.72144435.47
.....................
2272021-08-28BenzinAVIA47.1010455229.27
2282021-10-09BenzinKaufland55.1010514733.97
2292021-10-19BenzinJET40.1010562324.32
2302021-12-04BenzinJET53.3010618634.19
2312021-12-25BenzinAVIA51.4210672731.96
\n", + "

232 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter\n", + "0 2012-07-07 Kauf Autohaus 13800.00 30 NaN\n", + "1 2012-07-10 Benzin ESSO 57.01 199 34.89\n", + "2 2012-07-11 Versich Haftpfl. 104.30 400 NaN\n", + "3 2012-07-23 Benzin Kaufland 55.03 828 34.20\n", + "4 2012-08-10 Benzin Kaufland 56.72 1444 35.47\n", + ".. ... ... ... ... ... ...\n", + "227 2021-08-28 Benzin AVIA 47.10 104552 29.27\n", + "228 2021-10-09 Benzin Kaufland 55.10 105147 33.97\n", + "229 2021-10-19 Benzin JET 40.10 105623 24.32\n", + "230 2021-12-04 Benzin JET 53.30 106186 34.19\n", + "231 2021-12-25 Benzin AVIA 51.42 106727 31.96\n", + "\n", + "[232 rows x 6 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d = pandas.read_csv(\"file:car.csv\", sep=\"\\t\")\n", + "d" + ] + }, + { + "cell_type": "markdown", + "id": "0f10f423-f74d-4de6-b4cc-bfe45664d40c", + "metadata": {}, + "source": [ + "Das scheint funktioniert zu haben. Eine kleine Änderung werden wir noch vornehmen. Die erste Spalte enthält ein Datum. Da dies verschieden geschrieben werden kann (31.12.2020, 12/31/2020, ...) müssen wir bei der Erkennung ein bisschen nachhelfen. In unserem Fall ist es ausreichend zu sagen, welche Spalten ein Datum enthalten." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "49844097-ddff-4331-831a-433cad03cda6", + "metadata": {}, + "outputs": [], + "source": [ + "d = pandas.read_csv('file:car.csv', sep='\\t', parse_dates=['Datum'])" + ] + }, + { + "cell_type": "markdown", + "id": "de418fbd-024f-4205-be0b-5357c6667ccb", + "metadata": {}, + "source": [ + "## Auf einzelne Daten zugreifen\n", + "\n", + "Der Rückgabewert von `read_csv` ist ein **Data Frame**.\n", + "Das sieht aus wie eine Tabelle. Was kann man damit nun machen?" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0440feaf-1eff-4206-a349-ac2b3186ecce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 13800.00\n", + "1 57.01\n", + "2 104.30\n", + "3 55.03\n", + "4 56.72\n", + " ... \n", + "227 47.10\n", + "228 55.10\n", + "229 40.10\n", + "230 53.30\n", + "231 51.42\n", + "Name: Preis, Length: 232, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[\"Preis\"]" + ] + }, + { + "cell_type": "markdown", + "id": "60b9d3f1-5665-42c3-9bba-f9e2293b2de3", + "metadata": {}, + "source": [ + "Einzelne Spalten kann man über den Spaltentitel adressieren. So eine Spalte nennt Pandas **Series**.\n", + "Möchte man einen Wert aus so einer Series haben, so muss man zusätzlich dessen Zeilennummer angeben." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8be6a487-c75a-4ff2-b39d-7a4b2451391c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13800.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[\"Preis\"][0]" + ] + }, + { + "cell_type": "markdown", + "id": "c0d070ae-beaa-474b-ad12-3878169f638b", + "metadata": {}, + "source": [ + "Das Ganze geht auch umgekehrt.\n", + "Eine Zeile bekommt man über den Array `iloc`:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "745dcce7-0d8b-4e7b-bb30-43bb7e78b493", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Datum 2012-07-07 00:00:00\n", + "Typ Kauf\n", + "Beschreibung Autohaus\n", + "Preis 13800.0\n", + "km 30\n", + "Liter NaN\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.iloc[0]" + ] + }, + { + "cell_type": "markdown", + "id": "4cb8b853-3151-44cc-9d11-d6656a19bc22", + "metadata": {}, + "source": [ + "... und einen einzelnen Wert daraus über den Spaltennamen." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d779293b-4e75-4814-972d-138df30cf8a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "30" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.iloc[0][\"km\"]" + ] + }, + { + "cell_type": "markdown", + "id": "b710be5b-7be3-454b-8089-402719845f75", + "metadata": {}, + "source": [ + "Mit `head()` und `tail()` kann man zudem die ersten bzw. letzten N Spalten auswählen." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9f9d880a-e119-45ef-8136-70866faf3236", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiter
12012-07-10BenzinESSO57.0119934.89
\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter\n", + "1 2012-07-10 Benzin ESSO 57.01 199 34.89" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.head(2).tail(1)" + ] + }, + { + "cell_type": "markdown", + "id": "4cd9e9a2-a2da-4a75-8dbd-18206d99744b", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "Wie bekommt man die 4. bis 6. Zeile?" + ] + }, + { + "cell_type": "markdown", + "id": "ccd76426-4859-4a79-8882-c3cd72dcfb5a", + "metadata": {}, + "source": [ + "## Lösung\n", + "Achtung: die 4. Zeile hat die Nummer 3 weil die Nummerierung mit 0 beginnt!" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "15e79e26-3e9f-4674-9fb6-fbf3fe3a1303", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiter
32012-07-23BenzinKaufland55.0382834.20
42012-08-10BenzinKaufland56.72144435.47
52012-08-23SteuernKfz-Steuer50.001500NaN
\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter\n", + "3 2012-07-23 Benzin Kaufland 55.03 828 34.20\n", + "4 2012-08-10 Benzin Kaufland 56.72 1444 35.47\n", + "5 2012-08-23 Steuern Kfz-Steuer 50.00 1500 NaN" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.head(6).tail(3)" + ] + }, + { + "cell_type": "markdown", + "id": "bc38357a-65bd-4f59-969a-ca3bd8c2fc92", + "metadata": {}, + "source": [ + "## Was wissen die Daten über sich selbst?\n", + "Der Data Frame kann über sich selbst etwas sagen:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "100cc290-195c-4b74-a231-1c4bcc8c5d24", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Datum datetime64[ns]\n", + "Typ object\n", + "Beschreibung object\n", + "Preis float64\n", + "km int64\n", + "Liter float64\n", + "dtype: object" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.dtypes" + ] + }, + { + "cell_type": "markdown", + "id": "4c75bf50-b650-462a-b48d-e00950065e4c", + "metadata": {}, + "source": [ + "Was bedeutet dies?\n", + "Für jede Spalte wird angegeben, welchen Typ die darin enthaltenen Daten besitzen.\n", + "* datetime64\\[ns] - ist ein Zeitstempel bestehend aus Datum und Uhrzeit wobei letztere eine Genauigkeit von Nanosekunden hat\n", + "* object - das sind Texte\n", + "* float64 - Zahl mit Nachkommastellen.\n", + "* int64 - Zahl ohne Nachkommastellen\n", + "\n", + "Über die Verteilung der Zahlen gibt die Funktion `describe()` Auskunft." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "676f6f12-173c-44ac-a5bc-34063a0c2ae0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PreiskmLiter
count232.000000232.000000201.000000
mean117.58357853910.50862131.144726
std904.53867131374.1364204.261039
min0.00000030.00000010.140000
25%39.84000027386.75000029.890000
50%45.16000053140.00000032.640000
75%50.00000081382.00000033.650000
max13800.000000106727.00000037.930000
\n", + "
" + ], + "text/plain": [ + " Preis km Liter\n", + "count 232.000000 232.000000 201.000000\n", + "mean 117.583578 53910.508621 31.144726\n", + "std 904.538671 31374.136420 4.261039\n", + "min 0.000000 30.000000 10.140000\n", + "25% 39.840000 27386.750000 29.890000\n", + "50% 45.160000 53140.000000 32.640000\n", + "75% 50.000000 81382.000000 33.650000\n", + "max 13800.000000 106727.000000 37.930000" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "02ec9241-c8e2-4ccb-b7ba-ef2062e6227e", + "metadata": {}, + "source": [ + "## Aufgabe" + ] + }, + { + "cell_type": "markdown", + "id": "1e083c1e-4efe-4f34-ac5a-027c5e64a977", + "metadata": {}, + "source": [ + "1. Welche Bedeutung haben diese Zahlen?\n", + "1. Welche davon helfen beim Verständnis der Daten?\n", + "1. Gibt es Datensätze, wo diese Funktion noch viel hilfreicher ist?" + ] + }, + { + "cell_type": "markdown", + "id": "e2d48322-849d-4a4b-aeba-51d588504ad3", + "metadata": {}, + "source": [ + "## Lösung\n", + "Die besprechen wir im Kurs" + ] + }, + { + "cell_type": "markdown", + "id": "878dec12-c334-4d2c-9d10-4500f38af752", + "metadata": {}, + "source": [ + "## Zeilen zählen" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cb3dc7a5-a4f2-4c75-a251-2862fde3f324", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Datum 232\n", + "Typ 232\n", + "Beschreibung 232\n", + "Preis 232\n", + "km 232\n", + "Liter 201\n", + "dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.count()" + ] + }, + { + "cell_type": "markdown", + "id": "5e773974-53d8-4a16-849b-708e429ef38d", + "metadata": {}, + "source": [ + "In der Spalte **Liter** fehlen einige Einträge. Darum liefert `count()` für diese einen kleineren Wert.\n", + "Nicht vorhandene Werte werden als **NaN** (Not a Number) angezeigt." + ] + }, + { + "cell_type": "markdown", + "id": "5af33628-90d0-490d-a413-f2083fe40a5a", + "metadata": {}, + "source": [ + "## Werte zählen" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b7930759-6e4f-4a82-aa63-830a15dd8d8e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Benzin 201\n", + "Versich 10\n", + "Steuern 10\n", + "Werkst 9\n", + "Kauf 2\n", + "Name: Typ, dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[\"Typ\"].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "657dc390-cefc-4345-a2c8-86cd4da01b03", + "metadata": {}, + "source": [ + "## Rechnen\n", + "\n", + "Welche Kosten sind insgesamt angefallen? Dazu muss man alle Einträge der Spalte `Preis` aufsummieren." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "3e68e583-7427-40fa-86bf-f87571d83a63", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "27279.390000000003" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[\"Preis\"].sum()" + ] + }, + { + "cell_type": "markdown", + "id": "9b631276-87e4-449b-9d07-52896b7fbc16", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "\n", + "Was ist der Durchschnittsverbrauch des Autos über die gesamte erfasste Zeit?\n", + "\n", + "**Hinweise**\n", + "* Gesamtverbrauch, d.h. wieviele Liter wurden insgesamt verbraucht\n", + "* Fahrstrecke, d.h. km-Stand am Ende - km-Stand am Anfang\n", + "* Verbrauch wird i.d.R. in l/100km angegeben" + ] + }, + { + "cell_type": "markdown", + "id": "e2cfce0c-9eb6-4ac3-a2bb-43d085d20466", + "metadata": {}, + "source": [ + "## Lösung" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "081d24f8-cdc7-4273-80fb-c9e20a42f09b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5.867165899697274" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l_total = d[\"Liter\"].sum()\n", + "\n", + "km_start = d[\"km\"].min() # oder d[\"km\"][0] -> km-Stand aus der 0ten Zeile\n", + "km_end = d[\"km\"].max() # oder d[\"km\"].iloc[-1] -> km-Stand aus der letzten Zeile,\n", + " # geht man von 0 eins zurück fängt man am Ende wieder an\n", + "\n", + "km_total = km_end - km_start\n", + "\n", + "fuel_avg = 100 * l_total / km_total\n", + "fuel_avg" + ] + }, + { + "cell_type": "markdown", + "id": "9b125258-956b-4a6e-8074-6f4956563bd0", + "metadata": {}, + "source": [ + "## Daten filtern\n", + "Wenn man nur an einem Teil der Daten interessiert ist kann man sich diesen selektieren." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e15a92cf-d913-436e-8a51-440dfcbde914", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiter
12012-07-10BenzinESSO57.0119934.89
32012-07-23BenzinKaufland55.0382834.20
\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter\n", + "1 2012-07-10 Benzin ESSO 57.01 199 34.89\n", + "3 2012-07-23 Benzin Kaufland 55.03 828 34.20" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_only = d[d[\"Typ\"] == \"Benzin\"]\n", + "fuel_only.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "561575e8-6193-44e1-956e-59a6e618d4d7", + "metadata": {}, + "source": [ + "## Aufgabe\n", + "1. Selektiere die Zeilen, bei denen der Preis kleiner als 100 ist.\n", + "1. Selektiere die Zeilen, bei denen der Preis gleich 50 ist.\n", + "1. Selektiere die Zeilen, bei denen Liter größer 36 und kleiner 38 sind." + ] + }, + { + "cell_type": "markdown", + "id": "c01cb1b4-dd7a-49f2-ae25-a13bd99de5fb", + "metadata": {}, + "source": [ + "## Lösung" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bbe1a6eb-d467-4656-8c17-ab6f9e4ceeb2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiter
62012-09-10BenzinAVIA60.80206136.87
72012-09-14BenzinOMV61.10271036.83
1062016-09-15BenzinKaufland48.514887237.93
1242017-05-30BenzinReal46.785786836.01
\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter\n", + "6 2012-09-10 Benzin AVIA 60.80 2061 36.87\n", + "7 2012-09-14 Benzin OMV 61.10 2710 36.83\n", + "106 2016-09-15 Benzin Kaufland 48.51 48872 37.93\n", + "124 2017-05-30 Benzin Real 46.78 57868 36.01" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[d[\"Preis\"] < 100]\n", + "d[d[\"Preis\"] == 50]\n", + "# d.described() sagte, dass der größte Wert 37.93 ist\n", + "d[d[\"Liter\"] > 36]\n", + "d[d[\"Liter\"].between(36,38)]" + ] + }, + { + "cell_type": "markdown", + "id": "8f1056bd-f6be-48a8-98ca-4f332989e45a", + "metadata": {}, + "source": [ + "## Daten Gruppieren\n", + "\n", + "Die Kosten sollen nach Spalte `Typ` bzw `Beschreibung` aufsummiert werden." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "3ecb95b7-de9d-4383-9162-ca1e5b5bf669", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Typ\n", + "Benzin 8704.15\n", + "Kauf 14296.68\n", + "Steuern 500.00\n", + "Versich 2596.02\n", + "Werkst 1182.54\n", + "Name: Preis, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.groupby(\"Typ\")[\"Preis\"].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "9acedd39-f003-4a98-82ae-8082a2685d1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Beschreibung\n", + "AGIP 32.80\n", + "ARAL 795.33\n", + "AVIA 671.59\n", + "Autohof 45.30\n", + "Avanti 64.10\n", + "BFT 88.66\n", + "BP 46.05\n", + "Bavaria 44.00\n", + "ESSO 1320.52\n", + "Elf 45.00\n", + "Globus 798.94\n", + "HEM 27.70\n", + "JET 1074.80\n", + "KK 183.34\n", + "Kaufland 1080.48\n", + "OMV 95.38\n", + "Oil 261.71\n", + "Real 222.15\n", + "SHELL 539.30\n", + "Star 99.80\n", + "Tango 56.85\n", + "Tankcenter 413.43\n", + "Total 481.25\n", + "UNO-X 43.36\n", + "Unbekannt 172.31\n", + "Name: Preis, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_only.groupby(\"Beschreibung\")[\"Preis\"].sum()" + ] + }, + { + "cell_type": "markdown", + "id": "d38f01a2-ffb8-48a5-a646-517989735b3b", + "metadata": {}, + "source": [ + "Das ist doch nett. Noch schöner wäre aber, wenn die Daten nach der zweiten Spalte absteigend sortiert wären.\n", + "Zudem ist man oft nur an den größten Ergebnissen interessiert." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "3152d46a-d38e-4e1c-8465-b13082c72638", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Beschreibung\n", + "ESSO 1320.52\n", + "Kaufland 1080.48\n", + "JET 1074.80\n", + "Globus 798.94\n", + "ARAL 795.33\n", + "AVIA 671.59\n", + "SHELL 539.30\n", + "Total 481.25\n", + "Tankcenter 413.43\n", + "Oil 261.71\n", + "Name: Preis, dtype: float64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_only.groupby(\"Beschreibung\")[\"Preis\"].sum().sort_values(ascending=False).head(10)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter_book/05_pandas_visu.ipynb b/jupyter_book/05_pandas_visu.ipynb new file mode 100644 index 0000000..aa610aa --- /dev/null +++ b/jupyter_book/05_pandas_visu.ipynb @@ -0,0 +1,847 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a44722c-9586-4418-ad7f-8a3c964a9db8", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "# Daten visualiseren" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a5292e13-c487-4e3a-8085-21d80469a734", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas\n", + "d = pandas.read_csv('file:car.csv', sep='\\t', parse_dates=['Datum'])\n", + "fuel_only = d[d[\"Typ\"] == \"Benzin\"]" + ] + }, + { + "cell_type": "markdown", + "id": "ac401f86-f53e-470f-b80a-02a86e3e5632", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Alles auf einmal\n", + "Mittels `plot()` werden alle Zahlenspalten in einer Grafik dargestellt.\n", + "\n", + "**Hinweis:** auf https://jupyter.org/try-jupyter/lab/ wird nur dann eine Grafik angzeigt wenn man explizit `show()`aus `matplotlib.pyplot` aufruft." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f6af7955-e6ba-4f02-bfe0-e51f677778da", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "d.plot();\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "531b9a96-967c-4a20-89c4-8ddc2dd6e4d8", + "metadata": {}, + "source": [ + "Das ist in diesem Fall nicht besonders hilfreich weil jede Spalte eine andere Bedeutung hat.\n", + "Bei Messwerten, bei dem jede Spalte ein anderes Experiment ist könnte dies aber nützlich sein." + ] + }, + { + "cell_type": "markdown", + "id": "73365e97-2317-496c-8747-9514c3f385da", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Zwei Spalten als X-Y-Koordinaten darstellen" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9279c8d3-eada-4cc1-a62f-00738cd22dde", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fuel_only.plot(kind=\"scatter\", x=\"km\", y=\"Preis\");\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "c50a29cb-39c3-4b3e-8e16-0c56adc09278", + "metadata": {}, + "source": [ + "Das sieht schon besser aus. Allerdings sind die Zahlen nicht wirklich vergleichbar. Besser wäre, den Preis / l darzustellen. Den müssen wir aber erst berechnen." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f996d38c-c942-4f19-89ce-65ee49405419", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "ppl = fuel_only[\"Preis\"] / fuel_only[\"Liter\"]" + ] + }, + { + "cell_type": "markdown", + "id": "be50aaa8-1486-47a8-9f0c-bbdae50ef717", + "metadata": {}, + "source": [ + "Jetzt haben wir eine Spalte `ppl` (Preis pro l) und die Tabelle `fuel_with_ppl`.\n", + "Die Spalte `ppl` soll aber in die Tabelle eingefügt werden. Dazu benötigt sie zuerst einmal einen Namen." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3092995f-b852-4db0-99aa-5bcf73cd0234", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ppl_with_name = ppl.rename(\"Preis_pro_l\")" + ] + }, + { + "cell_type": "markdown", + "id": "db096e70-02ec-4f11-bfea-f229f72e6b89", + "metadata": {}, + "source": [ + "`rename()` benennt aber `ppl` nicht einfach um sondern erzeugt eine neue Spalte die den Namen enthält.\n", + "Diese kann kann mit der Tabelle vereinigt werden wobei wiederum eine neue Tabelle entsteht." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "70eb9694-9866-42e8-b8b0-b22c6ec46377", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fuel_with_ppl = fuel_only.join(ppl_with_name)" + ] + }, + { + "cell_type": "markdown", + "id": "bd233836-e76e-4aed-b1e9-3b6f7c5439ef", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "Diese drei Befehle kann man auch in einem Zusammenfassen. Damit spart man sich neue Variablennamen." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3df419ef-aae2-43fd-871c-d09985fad63f", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiterPreis_pro_l
12012-07-10BenzinESSO57.0119934.891.633993
32012-07-23BenzinKaufland55.0382834.201.609064
\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter Preis_pro_l\n", + "1 2012-07-10 Benzin ESSO 57.01 199 34.89 1.633993\n", + "3 2012-07-23 Benzin Kaufland 55.03 828 34.20 1.609064" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_with_ppl = fuel_only.join((fuel_only[\"Preis\"] / fuel_only[\"Liter\"]).rename(\"Preis_pro_l\"))\n", + "fuel_with_ppl.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "b728a171-9484-4731-b933-628ed01f2f9c", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "Damit können wir jetzt die Treibstoffkosten in Abhängigkeit vom km-Stand darstellen." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "4732e828-26e7-4563-826a-c1e2589c6f7b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fuel_with_ppl.plot(kind=\"scatter\", x=\"km\", y=\"Preis_pro_l\");\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "38c63e68-eedd-485b-a247-66191d549f17", + "metadata": {}, + "source": [ + "Interessant ist jetz noch der Verbrauch des Fahrzeugs, d.h. wieviele Liter pro 100 km es verbraucht hat.\n", + "Dazu muss man die Differenz zwischen jeweils zwei km-Ständen berechnen.\n", + "Dies geschieht mit der Funktion `diff()` die man auf eine Spalte der Daten anwendet." + ] + }, + { + "cell_type": "markdown", + "id": "21350aab-508b-4305-b817-6854bc80df37", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Aufgabe\n", + "1. Was macht die Funktion `diff()` genau?\n", + "1. Bei welchen Daten kann man die noch verwenden?" + ] + }, + { + "cell_type": "markdown", + "id": "fd0919aa-bf37-4437-af2f-00b9a8f121b2", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Lösung\n", + "Schauen wir uns das mal genauer an. Zuerst die Daten" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8bae2c0a-cab5-4c78-b9aa-8c065b69d152", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 199\n", + "3 828\n", + "4 1444\n", + "6 2061\n", + "7 2710\n", + "Name: km, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_with_ppl[\"km\"].head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "bd5eec5b-d958-428a-a164-eab324e9930d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 NaN\n", + "3 629.0\n", + "4 616.0\n", + "6 617.0\n", + "7 649.0\n", + "Name: km, dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_with_ppl[\"km\"].diff().head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "31f246d4-bead-4393-a478-dd53e2719e86", + "metadata": {}, + "source": [ + "`diff()` berechnet die Differnz von zwei aufeinanderfolgenden Zeilen.\n", + "Den Abstand kann man aber auch selbst festlegen. Möchte man die Differenz von jeweils den übernächsten Zeilen muss man den Parameter `periods` verwenden." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "785605be-c477-4397-a7ac-bba042e94984", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 NaN\n", + "3 NaN\n", + "4 1245.0\n", + "6 1233.0\n", + "7 1266.0\n", + "Name: km, dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_with_ppl[\"km\"].diff(periods=2).head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "b322a18f-8d5b-44a1-b7a3-02daae8caff7", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Weiter mit der Verbrauchsberechung\n", + "Wir erzeugen jetzt eine neue **Series** in unserem **Data Frame** die gefahrenen km zwischen zwei Tankstopps enthält.\n", + "Mit der Funktion `diff()` geht das ohne Umwege:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b2173099-01c9-47d4-ba6d-177654600e51", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiterPreis_pro_lkm_driven
12012-07-10BenzinESSO57.0119934.891.633993NaN
32012-07-23BenzinKaufland55.0382834.201.609064629.0
\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter Preis_pro_l km_driven\n", + "1 2012-07-10 Benzin ESSO 57.01 199 34.89 1.633993 NaN\n", + "3 2012-07-23 Benzin Kaufland 55.03 828 34.20 1.609064 629.0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_with_ppl[\"km_driven\"] = fuel_with_ppl[\"km\"].diff()\n", + "fuel_with_ppl.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "15ad8aed-e928-46dc-aa6e-25ced6032236", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "Nun können wir den Verbrauch berechnen ..." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "346095e8-ebeb-434c-81cd-ffa968176a74", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatumTypBeschreibungPreiskmLiterPreis_pro_lkm_drivenl/100km
12012-07-10BenzinESSO57.0119934.891.633993NaNNaN
32012-07-23BenzinKaufland55.0382834.201.609064629.05.437202
\n", + "
" + ], + "text/plain": [ + " Datum Typ Beschreibung Preis km Liter Preis_pro_l km_driven \\\n", + "1 2012-07-10 Benzin ESSO 57.01 199 34.89 1.633993 NaN \n", + "3 2012-07-23 Benzin Kaufland 55.03 828 34.20 1.609064 629.0 \n", + "\n", + " l/100km \n", + "1 NaN \n", + "3 5.437202 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuel_all = fuel_with_ppl.join((fuel_with_ppl[\"Liter\"] * 100 / fuel_with_ppl[\"km_driven\"]).rename(\"l/100km\"))\n", + "fuel_all.head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "17d09559-c563-43ba-8e0b-52c993bb36b1", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "... und den Verbrauch in der Grafik darstellen.\n", + "Dabei ändern wir gleich noch ein paar Parameter um zu zeigen, was es alles so für Möglichkeiten gibt." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a9da9245-86f4-4cdb-9dce-3129bb204dc1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fuel_all.plot(x=\"Datum\", y=\"l/100km\", figsize=(15,4), title=\"Verbrauch meines Autos\");\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "49454f16-873e-4802-9535-ad331f698742", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Box-and-Whisker-Plots\n", + "Auf einen Blick sehen, wie die Daten verteilt sind.\n", + "Wer erinnert sich noch an `describe()`?" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "95c4dddf-de31-425b-9bee-7221a0c9b7d5", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD5CAYAAADSiMnIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAYG0lEQVR4nO3df5TV9X3n8edrBgWEKqic0YL0kg1JI2NjZKKk2dMa7RlBe4rbGhubIyTLhsaoSdvdTSelZ2mT2mK7u0kkiV1XSSCrqLU5lQYSQgCTtieoQ/wBijlOEMOwilMRjKgo8u4f9zP0y3g/A3PvzL3D8Hqccw/f7/v7+X6/n+/cO9/X99dlFBGYmZlV0tToDpiZ2fDlkDAzsyyHhJmZZTkkzMwsyyFhZmZZoxrdgcF25plnRqlUanQ3zMyOK5s3b/7XiJjUtz7iQqJUKtHZ2dnobpiZHVckPVup7stNZmaW5ZAwM7Msh4SZmWU5JMzMLMshYWZmWQ4JMzPLckiYmVmWQ8LMzLJG3JfpbOBKHaurnnfHkisGsSdmNtw4JKzfHX2pY7WDwOwE5stNZmaW5ZAwM7Msh4SZmWU5JMzMLMshYWZmWQ4JMzPLckiYmVmWQ8LMzLKOGhKSlkl6QdLWQu10SeskPZ3+nZjqknSLpC5Jj0u6oDDP/NT+aUnzC/WZkrakeW6RpP7WYWZm9XMsZxLfAGb3qXUA6yNiOrA+jQPMAaan10LgVijv8IHFwEXAhcDiwk7/VuAThflmH2UdZmZWJ0cNiYj4IbCnT3kusDwNLweuLNRXRNkmYIKks4HLgHURsSciXgLWAbPTtFMjYlNEBLCiz7IqrcPMzOqk2nsSLRHxXBp+HmhJw5OBnYV23anWX727Qr2/dbyNpIWSOiV19vT0VLE5ZmZWSc03rtMZQAxCX6peR0TcFhFtEdE2adKkoeyKmdkJpdqQ2J0uFZH+fSHVdwHnFNpNSbX+6lMq1Ptbh5mZ1Um1IbEK6H1CaT5wf6E+Lz3lNAvYly4ZrQXaJU1MN6zbgbVp2suSZqWnmub1WValdZiZWZ0c9e9JSFoJXAycKamb8lNKS4B7JS0AngWuTs3XAJcDXcCrwMcBImKPpC8AD6d2n4+I3pvhn6L8BNVY4DvpRT/rMDOzOjlqSETENZlJl1ZoG8D1meUsA5ZVqHcCrRXqL1Zah5mZ1Y+/cW1mZlkOCTMzy3JImJlZlkPCzMyyHBJmZpblkDAzsyyHhJmZZTkkzMwsyyFhZmZZDgkzM8tySJiZWZZDwszMshwSZmaW5ZAwM7Msh4SZmWU5JMzMLMshYWZmWQ4JMzPLckiYmVmWQ8LMzLIcEmZmluWQMDOzLIeEmZllOSTMzCzLIWFmZlkOCTMzy3JImJlZlkPCzMyyagoJSX8o6QlJWyWtlDRG0jRJD0rqknSPpJNT29FpvCtNLxWW87lU/4mkywr12anWJamjlr6amdnAVR0SkiYDnwbaIqIVaAY+AtwMfDEi3gm8BCxIsywAXkr1L6Z2SDo3zTcDmA18TVKzpGbgq8Ac4FzgmtTWzMzqpNbLTaOAsZJGAacAzwGXAPel6cuBK9Pw3DROmn6pJKX63RFxICKeAbqAC9OrKyK2R8QbwN2prZmZ1cmoameMiF2S/ifwM+A14HvAZmBvRBxMzbqByWl4MrAzzXtQ0j7gjFTfVFh0cZ6dfeoXVeqLpIXAQoCpU6dWu0kj2nv//Hvse+3NquYtdaweUPvTxp7EY4vbq1qXmQ0vVYeEpImUj+ynAXuBv6N8uajuIuI24DaAtra2aEQfhrt9r73JjiVX1GVdAw0VMxu+arnc9BvAMxHRExFvAt8CPghMSJefAKYAu9LwLuAcgDT9NODFYr3PPLm6mZnVSS0h8TNglqRT0r2FS4EngY3AVanNfOD+NLwqjZOmb4iISPWPpKefpgHTgYeAh4Hp6Wmpkynf3F5VQ3/NzGyAarkn8aCk+4AfAweBRyhf8lkN3C3pL1LtjjTLHcA3JXUBeyjv9ImIJyTdSzlgDgLXR8RbAJJuANZSfnJqWUQ8UW1/zcxs4KoOCYCIWAws7lPeTvnJpL5tXwc+nFnOTcBNFeprgDW19NHMzKrnb1ybmVmWQ8LMzLIcEmZmluWQMDOzLIeEmZllOSTMzCzLIWFmZlkOCTMzy3JImJlZlkPCzMyyHBJmZpblkDAzsyyHhJmZZTkkzMwsyyFhZmZZDgkzM8tySJiZWZZDwszMshwSZmaW5ZAwM7Msh4SZmWU5JMzMLMshYWZmWQ4JMzPLckiYmVmWQ8LMzLIcEmZmluWQMDOzrJpCQtIESfdJekrSNkkfkHS6pHWSnk7/TkxtJekWSV2SHpd0QWE581P7pyXNL9RnStqS5rlFkmrpr5mZDUytZxJfBr4bEb8MvBfYBnQA6yNiOrA+jQPMAaan10LgVgBJpwOLgYuAC4HFvcGS2nyiMN/sGvtrZmYDUHVISDoN+DXgDoCIeCMi9gJzgeWp2XLgyjQ8F1gRZZuACZLOBi4D1kXEnoh4CVgHzE7TTo2ITRERwIrCsszMrA5qOZOYBvQAX5f0iKTbJY0DWiLiudTmeaAlDU8Gdhbm7061/urdFepvI2mhpE5JnT09PTVskpmZFdUSEqOAC4BbI+J9wH7+/dISAOkMIGpYxzGJiNsioi0i2iZNmjTUqzMzO2HUEhLdQHdEPJjG76McGrvTpSLSvy+k6buAcwrzT0m1/upTKtTNzKxOqg6JiHge2Cnp3al0KfAksArofUJpPnB/Gl4FzEtPOc0C9qXLUmuBdkkT0w3rdmBtmvaypFnpqaZ5hWWZmVkdjKpx/huBOyWdDGwHPk45eO6VtAB4Frg6tV0DXA50Aa+mtkTEHklfAB5O7T4fEXvS8KeAbwBjge+kl5mZ1UlNIRERjwJtFSZdWqFtANdnlrMMWFah3gm01tJHMzOrnr9xbWZmWQ4JMzPLckiYmVmWQ8LMzLIcEmZmluWQMDOzrFq/J2HHiV94TwfnLe84esNBWRfAFXVZl5kNLYfECeLn25awY0l9dtyljtV1WY+ZDT1fbjIzsyyHhJmZZTkkzMwsyyFhZmZZDgkzM8tySJiZWZZDwszMshwSZmaW5ZAwM7Msh4SZmWU5JMzMLMshYWZmWQ4JMzPLckiYmVmWQ8LMzLIcEmZmluWQMDOzLIeEmZllOSTMzCzLIWFmZlk1h4SkZkmPSPp2Gp8m6UFJXZLukXRyqo9O411peqmwjM+l+k8kXVaoz061LkkdtfbVzMwGZjDOJD4DbCuM3wx8MSLeCbwELEj1BcBLqf7F1A5J5wIfAWYAs4GvpeBpBr4KzAHOBa5Jbc3MrE5qCglJU4ArgNvTuIBLgPtSk+XAlWl4bhonTb80tZ8L3B0RByLiGaALuDC9uiJie0S8Adyd2pqZWZ3UeibxJeCzwKE0fgawNyIOpvFuYHIangzsBEjT96X2h+t95snV30bSQkmdkjp7enpq3CQzM+tVdUhI+k3ghYjYPIj9qUpE3BYRbRHRNmnSpEZ3x8xsxBhVw7wfBH5L0uXAGOBU4MvABEmj0tnCFGBXar8LOAfoljQKOA14sVDvVZwnVzczszqo+kwiIj4XEVMiokT5xvOGiPgosBG4KjWbD9yfhlelcdL0DRERqf6R9PTTNGA68BDwMDA9PS11clrHqmr7a2ZmA1fLmUTOHwN3S/oL4BHgjlS/A/impC5gD+WdPhHxhKR7gSeBg8D1EfEWgKQbgLVAM7AsIp4Ygv6amVnGoIRERDwAPJCGt1N+Mqlvm9eBD2fmvwm4qUJ9DbBmMPpoZmYD529cm5lZlkPCzMyyHBJmZpblkDAzsyyHhJmZZTkkzMwsyyFhZmZZDgkzM8tySJiZWZZDwszMshwSZmaW5ZAwM7Msh4SZmWU5JMzMLMshYWZmWQ4JMzPLckiYmVnWUPz5UhumSh2r67Ke08aeVJf1mNnQc0icIHYsuaKq+Uodq6ue18yOf77cZGZmWQ4JMzPLckiYmVmWQ8LMzLIcEmZmluWQMDOzLIeEmZllOSTMzCzLIWFmZllVh4SkcyRtlPSkpCckfSbVT5e0TtLT6d+JqS5Jt0jqkvS4pAsKy5qf2j8taX6hPlPSljTPLZJUy8aamdnA1HImcRD4rxFxLjALuF7SuUAHsD4ipgPr0zjAHGB6ei0EboVyqACLgYuAC4HFvcGS2nyiMN/sGvprZmYDVHVIRMRzEfHjNPxzYBswGZgLLE/NlgNXpuG5wIoo2wRMkHQ2cBmwLiL2RMRLwDpgdpp2akRsiogAVhSWZWZmdTAo9yQklYD3AQ8CLRHxXJr0PNCShicDOwuzdadaf/XuCvVK618oqVNSZ09PT20bY2Zmh9UcEpLGA38P/EFEvFycls4AotZ1HE1E3BYRbRHRNmnSpKFenZnZCaOmkJB0EuWAuDMivpXKu9OlItK/L6T6LuCcwuxTUq2/+pQKdTMzq5Nanm4ScAewLSL+d2HSKqD3CaX5wP2F+rz0lNMsYF+6LLUWaJc0Md2wbgfWpmkvS5qV1jWvsCwzM6uDWv7o0AeBa4Etkh5NtT8BlgD3SloAPAtcnaatAS4HuoBXgY8DRMQeSV8AHk7tPh8Re9Lwp4BvAGOB76SXmZnVSdUhERH/DOS+t3BphfYBXJ9Z1jJgWYV6J9BabR/NzKw2/sa1mZllOSTMzCzLIWFmZlkOCTMzy3JImJlZlkPCzMyyHBJmZpblkDAzsyyHhJmZZTkkzMwsyyFhZmZZDgkzM8tySJiZWZZDwszMshwSZmaW5ZAwM7Msh4SZmWU5JMzMLMshYWZmWQ4JMzPLckiYmVmWQ8LMzLIcEmZmluWQMDOzLIeEmZllOSTMzCzLIWFmZlkOCTM7rjQ3NyPp8Ku5ubnRXRrRhn1ISJot6SeSuiR1NLo/vcaPH3/EB3X8+PGN7pIdo+L71vsaSVauXElrayvNzc20traycuXKRndp0DQ3N3Po0KEjaocOHRpRQTFmzJgjPptjxoxpaH+GdUhIaga+CswBzgWukXRuY3tVDoj9+/dTKpXo6uqiVCqxf/9+B8VxoDcQmpqa+P73v09TU9MR9ePdypUrWbRoEUuXLuX1119n6dKlLFq0aMQERW9AXHfddezdu5frrrvuiPrxbsyYMRw4cICWlha2bdtGS0sLBw4caGhQKCIatvKjkfQB4M8i4rI0/jmAiPir3DxtbW3R2dk51P2iVCrxzDPPHK5NmzaNHTt2MJx/ntUodaxmx5IrGt2NQSOJpqYm3nrrrcO13qPTkfDetba2snTpUj70oQ8drm3cuJEbb7yRrVu3NrBneectP6/u69wyf0vd13ksJNHS0sLzzz9/uHbWWWexe/fuIf98StocEW1vqw/nXwxJVwGzI+K/pPFrgYsi4oY+7RYCCwGmTp0689lnnx3wuk7kD2qpY3XV8w6HABnp791I376BkMQpp5zC/v37D9fGjRvHq6++OmxD/nh5/3IhQUQM2xdwFXB7Yfxa4Cv9zTNz5swYakCUSqUjaqVSKco/ThvOgGhqajqi1tTUNGLeuxkzZsSGDRuOqG3YsCFmzJjRoB4NLiCAGD9+fGzevDnGjx9/uDYSANHS0nJEraWlpS7bB3RGhX3qsL4nAewCzimMT0m1hho3bhw7duxg2rRp/PSnPz18qWncuHGN7podg94bnevXr694I/R4tmjRIhYsWMDGjRt588032bhxIwsWLGDRokWN7tqgaG9vB+CVV15h5syZvPLKK0fUj3ejR49m9+7dnHXWWTz11FOHLzWNHj26cZ2qlBzD5QWMArYD04CTgceAGf3NU48ziYiIcePGHT6CAWLcuHF1Wa/Vrvi+MYKOQnvdddddMWPGjGhqaooZM2bEXXfd1eguDar29vaQFEBIivb29kZ3aVCNHj36iM/m6NGj67JeMmcSw/qeBICky4EvAc3Asoi4qb/29bhxbWY20uTuSYxqRGcGIiLWAGsa3Q8zsxPRcL8nYWZmDeSQMDOzLIeEmZllOSTMzCxr2D/dNFCSeoCBf+W6emcC/1rH9dXbSN6+kbxt4O073tV7+34pIib1LY64kKg3SZ2VHhsbKUby9o3kbQNv3/FuuGyfLzeZmVmWQ8LMzLIcErW7rdEdGGIjeftG8raBt+94Nyy2z/ckzMwsy2cSZmaW5ZAwM7Msh0QFkt6S9KikrZL+TtIpA5j3FyXdN5T9G2ySSpKG59+2rIKkVyrUPilpXhr+mKRfrH/PbCTr+7mT9B1JUyTdIKlLUkg6szBdkm5J0x6XdEFh2nxJT6fX/Nw66sEhUdlrEXF+RLQCbwCfLE6UlP3fcyPi/0fEVUPdQRuYiPjbiFiRRj8GDCgk+nvPqzFSDkQk7Sju+Ar1ITvw6G/Zw+VnI2kscEZEdAP/AvwGb/+S7xxgenotBG5N854OLAYuAi4EFkuaWKeuv41D4uj+CXinpIsl/ZOkVcCTkpol/Y2kh9NRwO/DkR9gSTMkPZR2Bo9Lmt7IDTkWkt4h6RFJ/13SP0hal3YEN0j6ozRtU/ogHzck/Zmk/5b+bnobcGd6X8ZKminpB5I2S1or6ew0zwOSviSpE/jMIHdpWB6ISGoeiuXWg6RRw+gg7WLgAYCIeCQidlRoMxdYkf7mzyZgQvrsXQasi4g9EfESsA6YXZxR0pmSfiTpirRv+oGk+yVtl7RE0kfTvmeLpP9Qy4Y4JPqRflHnAL1/VfwC4DMR8S5gAbAvIt4PvB/4hKRpfRbxSeDLEXE+5R1Td106XiVJ7wb+nvKRdg/QCvw25e27CXg1It4H/AiY16Bu1iQi7gM6gY+m9+UgsBS4KiJmAssob2uvkyOiLSL+1xB2a8gPRNI8T0m6U9I2Sff1nr2kg4CbJf0Y+LCka9LOZaukm2vduME48EhB/pikx4DrC/WPSVolaQOwvs/PZpOkGYW2D0hqkzRO0rL0c3tE0tzCsr4l6bsqX+b56xo2ew7w3aO0mQzsLIx3p1qu3rsdLcBq4H9ExOpUfi/l/c17gGuBd0XEhcDtwI3Vb4ZDImespEcp70x+BtyR6g9FxDNpuB2Yl9o9CJxB+bSx6EfAn0j6Y8r/L8prQ93xGkwC7qe883ws1TZGxM8jogfYB/xjqm8BSvXv4pB4N+UwXJfeyz+l/LfUe90zlCuv84HIu4GvRcR7gJeBTxWmvRgRFwA/BG4GLgHOB94v6coatm+wDjy+DtwYEe+tMO0CyiH/633q9wBXp36cDZwdEZ3AImBD2ol+CPgbSb1/oP584HeB84DflXTOgDb4330Q+Ocq5+3PScB64LMRsa5QfzginouIA8BPge+les2/qw6JynovBZwfETdGxBupvr/QRpQ/tL3tpkXE94oLiYi7gN8CXgPWSLqkPt2vyj7KgfgfC7UDheFDhfFDHAd/1fAYCXii8D6eFxHthen7czPWqBEHIjsj4l/S8P/jyPe6NwzfDzwQET0RcRC4E/i1gW5cMigHHpImABMi4oep9M0+TdZFxJ4Ks94L9F56uhrovVfRDnSkn+sDwBhgapq2PiL2RcTrwJPALx19M9/W33dQ/lm/cZSmu4BiCE1JtVwdyme+mylfkioast9Vh0T11gLXSToJQNK7CkcjpNo7gO0RcQvlX5ZfqX83j9kbwH+ivFP6vUZ3Zoj9HPiFNPwTYJKkDwBIOql4iWIINeJApO83Z4vjQxGG9TrwqNj3iNgFvCjpVyifHfQGoYDfKfxcp0bEtgr9e6vKPh3LpSaAVZR/3yRpFuWzxuco71vaJU1U+YZ1e6pB+T37z8AvpwODIeeQqN7tlI80fpyugf4f3v6BuhrYmo5YWoEVDGMRsR/4TeAPgVMb3J1anCKpu/D6oz7TvwH8bXpfmikfbd6crnc/CvxqPTvbj8E+EJnaG4bA71H5cshDwK+rfGO0GbgG+EGV/R+UA4+I2AvsldQbNh8dwOz3AJ8FTouIx1NtLXCjJAFIel+1fcuYTSEkJH1aUjflM4LHJd2eJq0BtgNdwP8lXf5LZ0VfAB5Or88Xz5Qi4i3K78slkoqXDIdGRPjll191fgGvVKhdDHy7MN4E/CXlSzFbgY3AaZQvy2xNbTqAJyiH23eB0zPrKwFPUb7MtI3yfYJT0rQdwJmFttcU1nnzUbbjiHn7rK+3jxMo7+w+DXyl0ryU71l8pZ/1zAR6Q/yvC8v+WJ9lHl5vGm+hfIlmcaE2lvJB3Zb0s/t2ZlnfBi4e4Ps6Guhs9OdrMF/+v5vMTgCSSpR3hq2N7osdX3y5yczMsnwmYTaCSDqD8iOSfV0aES/WsNwHKV9KKbo2IrZUal/Der5K+fHRoi9HxNcHcz127BwSZmaW5ctNZmaW5ZAwM7Msh4SZmWU5JMzMLOvfAMbCgqE6r8ciAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fuel_all.plot(kind=\"box\");\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "117cde42-698c-4d2d-841d-816772f6a422", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "Wenn sich die Werte in den Spalten voneinander unterscheiden ist es besser, wenn man die Daten einzeln visualisert:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "395dd8c4-5c88-4bcf-b77f-06db4f8f92ab", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fuel_all[[\"Liter\", \"Preis_pro_l\", \"km_driven\", \"l/100km\"]].plot(kind=\"box\", subplots=True, figsize=(15,4));\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "215661fd-30c1-4fa3-98e4-fbbd991f0402", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Bar-Charts" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "7f8176ab-6239-49e3-bf03-831448411301", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEjCAYAAADQeG38AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAa/ElEQVR4nO3df5xddX3n8debBARESDAjYpI6EbNoYKlgHiGI67akGwJYgy4o2C2RpuaxC4pouxSs3fgA6QN3W6mo4EYIBkUgpXSTll+mCMUqv4bfAaRMA0hSkNGEgKBA8L1/nO+QyzCTzJ07mXPhvp+Px33cc77nnDufezOZ9z3f8z3nyDYREdHZtqu7gIiIqF/CICIiEgYREZEwiIgIEgYREQGMr7uAkZo0aZK7u7vrLiMi4jXl9ttv/7ntroHtr9kw6O7upqenp+4yIiJeUyQ9Olh7uokiIiJhEBERCYOIiCBhEBERJAwiIoKEQUREkDCIiAgSBhERQcIgIiJ4DZ+BHKOn+9Qr6y4BgEfOOqLuEiI61lb3DCQtlfSkpNWDLPsTSZY0qcxL0jmSeiXdI+mAhnUXSHqoPBY0tL9X0r1lm3MkabTeXEREDM9wuom+Dcwb2ChpKjAX+GlD82HA9PJYBJxX1t0dWAwcCMwCFkuaWLY5D/hkw3av+lkREbFtbTUMbN8IrB9k0dnAKUDjTZTnAxe5cjMwQdKewKHAKtvrbW8AVgHzyrJdbd/s6mbMFwFHtvSOIiKiaSM6gCxpPrDO9t0DFk0GHmuYX1vattS+dpD2oX7uIkk9knr6+vpGUnpERAyi6TCQtDPweeB/jX45W2Z7ie2Ztmd2db3qctwRETFCI9kz2AuYBtwt6RFgCnCHpLcC64CpDetOKW1bap8ySHtERIyhpsPA9r2232K723Y3VdfOAbafAFYCx5VRRbOBjbYfB64F5kqaWA4czwWuLcueljS7jCI6DlgxSu8tIiKGaThDSy8BbgL2lrRW0sItrH4VsAboBb4FnABgez1wBnBbeZxe2ijrnF+2+Tfg6pG9lYiIGKmtnnRm+9itLO9umDZw4hDrLQWWDtLeA+y7tToiImLbyeUoIiIiYRAREQmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERAQJg4iIIGEQEREkDCIigoRBRESQMIiICBIGERFBwiAiIkgYREQECYOIiCBhEBERDCMMJC2V9KSk1Q1t/0fSTyTdI+nvJU1oWHaapF5JD0o6tKF9XmnrlXRqQ/s0SbeU9ssk7TCK7y8iIoZhOHsG3wbmDWhbBexrez/gX4HTACTNAI4B9inbnCtpnKRxwDeAw4AZwLFlXYAvA2fbfiewAVjY0juKiIimbTUMbN8IrB/Q9n3bm8rszcCUMj0fuNT287YfBnqBWeXRa3uN7ReAS4H5kgQcAlxetl8GHNnaW4qIiGaNxjGDPwKuLtOTgccalq0tbUO1vxl4qiFY+tsHJWmRpB5JPX19faNQekREQIthIOnPgU3AxaNTzpbZXmJ7pu2ZXV1dY/EjIyI6wviRbijpE8AHgTm2XZrXAVMbVptS2hii/RfABEnjy95B4/oRETFGRrRnIGkecArwIdvPNSxaCRwj6Q2SpgHTgVuB24DpZeTQDlQHmVeWELkeOKpsvwBYMbK3EhERIzWcoaWXADcBe0taK2kh8HXgTcAqSXdJ+iaA7fuA5cD9wDXAibZfKt/6PwVcCzwALC/rAvwZ8DlJvVTHEC4Y1XcYERFbtdVuItvHDtI85B9s22cCZw7SfhVw1SDta6hGG0VERE1yBnJERCQMIiIiYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERAQJg4iIIGEQEREkDCIigoRBRESQMIiICBIGERFBwiAiIkgYREQECYOIiCBhEBERJAwiIoJhhIGkpZKelLS6oW13SaskPVSeJ5Z2STpHUq+keyQd0LDNgrL+Q5IWNLS/V9K9ZZtzJGm032RERGzZcPYMvg3MG9B2KnCd7enAdWUe4DBgenksAs6DKjyAxcCBwCxgcX+AlHU+2bDdwJ8VERHb2FbDwPaNwPoBzfOBZWV6GXBkQ/tFrtwMTJC0J3AosMr2etsbgFXAvLJsV9s32zZwUcNrRUTEGBnpMYM9bD9epp8A9ijTk4HHGtZbW9q21L52kPZBSVokqUdST19f3whLj4iIgVo+gFy+0XsUahnOz1pie6btmV1dXWPxIyMiOsJIw+BnpYuH8vxkaV8HTG1Yb0pp21L7lEHaIyJiDI00DFYC/SOCFgArGtqPK6OKZgMbS3fStcBcSRPLgeO5wLVl2dOSZpdRRMc1vFZERIyR8VtbQdIlwO8AkyStpRoVdBawXNJC4FHgo2X1q4DDgV7gOeB4ANvrJZ0B3FbWO912/0HpE6hGLO0EXF0eERExhrYaBraPHWLRnEHWNXDiEK+zFFg6SHsPsO/W6oiIiG0nZyBHRETCICIiEgYREUHCICIiSBhERAQJg4iIIGEQEREkDCIigoRBRESQMIiICBIGERFBwiAiIkgYREQEw7hq6etV96lX1l0CAI+cdUTdJUREZM8gIiISBhERQcIgIiJIGEREBAmDiIggYRAREbQYBpI+K+k+SaslXSJpR0nTJN0iqVfSZZJ2KOu+ocz3luXdDa9zWml/UNKhLb6niIho0ojDQNJk4CRgpu19gXHAMcCXgbNtvxPYACwsmywENpT2s8t6SJpRttsHmAecK2ncSOuKiIjmtdpNNB7YSdJ4YGfgceAQ4PKyfBlwZJmeX+Ypy+dIUmm/1Pbzth8GeoFZLdYVERFNGHEY2F4H/BXwU6oQ2AjcDjxle1NZbS0wuUxPBh4r224q67+5sX2QbV5B0iJJPZJ6+vr6Rlp6REQM0Eo30USqb/XTgLcBb6Tq5tlmbC+xPdP2zK6urm35oyIiOkor3US/Bzxsu8/2i8AVwMHAhNJtBDAFWFem1wFTAcry3YBfNLYPsk1ERIyBVsLgp8BsSTuXvv85wP3A9cBRZZ0FwIoyvbLMU5b/wLZL+zFltNE0YDpwawt1RUREk0Z81VLbt0i6HLgD2ATcCSwBrgQulfSl0nZB2eQC4DuSeoH1VCOIsH2fpOVUQbIJONH2SyOtKyIimtfSJaxtLwYWD2hewyCjgWz/Gjh6iNc5EzizlVoiImLkcgZyREQkDCIiImEQEREkDCIigoRBRESQMIiICBIGERFBwiAiIkgYREQECYOIiCBhEBERJAwiIoKEQUREkDCIiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKCFsNA0gRJl0v6iaQHJB0kaXdJqyQ9VJ4nlnUl6RxJvZLukXRAw+ssKOs/JGlBq28qIiKa0+qewVeBa2y/C/ht4AHgVOA629OB68o8wGHA9PJYBJwHIGl3YDFwIDALWNwfIBERMTZGHAaSdgM+AFwAYPsF208B84FlZbVlwJFlej5wkSs3AxMk7QkcCqyyvd72BmAVMG+kdUVERPNa2TOYBvQBF0q6U9L5kt4I7GH78bLOE8AeZXoy8FjD9mtL21DtryJpkaQeST19fX0tlB4REY1aCYPxwAHAebb3B55lc5cQALYNuIWf8Qq2l9ieaXtmV1fXaL1sRETHayUM1gJrbd9S5i+nCoefle4fyvOTZfk6YGrD9lNK21DtERExRkYcBrafAB6TtHdpmgPcD6wE+kcELQBWlOmVwHFlVNFsYGPpTroWmCtpYjlwPLe0RUTEGBnf4vafBi6WtAOwBjieKmCWS1oIPAp8tKx7FXA40As8V9bF9npJZwC3lfVOt72+xboiIqIJLYWB7buAmYMsmjPIugZOHOJ1lgJLW6klIiJGLmcgR0REwiAiIhIGERFBwiAiIkgYREQECYOIiCBhEBERJAwiIoKEQUREkDCIiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiGIUwkDRO0p2S/rHMT5N0i6ReSZdJ2qG0v6HM95bl3Q2vcVppf1DSoa3WFBERzRmNPYPPAA80zH8ZONv2O4ENwMLSvhDYUNrPLushaQZwDLAPMA84V9K4UagrIiKGqaUwkDQFOAI4v8wLOAS4vKyyDDiyTM8v85Tlc8r684FLbT9v+2GgF5jVSl0REdGcVvcM/gY4BfhNmX8z8JTtTWV+LTC5TE8GHgMoyzeW9V9uH2SbV5C0SFKPpJ6+vr4WS4+IiH4jDgNJHwSetH37KNazRbaX2J5pe2ZXV9dY/diIiNe98S1sezDwIUmHAzsCuwJfBSZIGl++/U8B1pX11wFTgbWSxgO7Ab9oaO/XuE1ERIyBEe8Z2D7N9hTb3VQHgH9g+w+A64GjymoLgBVlemWZpyz/gW2X9mPKaKNpwHTg1pHWFRERzWtlz2AofwZcKulLwJ3ABaX9AuA7knqB9VQBgu37JC0H7gc2ASfafmkb1BUREUMYlTCwfQNwQ5lewyCjgWz/Gjh6iO3PBM4cjVoiIqJ5OQM5IiISBhERkTCIiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERATb5n4GEfE60H3qlXWXAMAjZx1RdwkdIXsGERGRMIiIiIRBRESQMIiICBIGERFBC2Egaaqk6yXdL+k+SZ8p7btLWiXpofI8sbRL0jmSeiXdI+mAhtdaUNZ/SNKC1t9WREQ0o5U9g03An9ieAcwGTpQ0AzgVuM72dOC6Mg9wGDC9PBYB50EVHsBi4EBgFrC4P0AiImJsjDgMbD9u+44y/QzwADAZmA8sK6stA44s0/OBi1y5GZggaU/gUGCV7fW2NwCrgHkjrSsiIpo3KscMJHUD+wO3AHvYfrwsegLYo0xPBh5r2GxtaRuqfbCfs0hSj6Sevr6+0Sg9IiIYhTCQtAvwd8DJtp9uXGbbgFv9GQ2vt8T2TNszu7q6RutlIyI6XkthIGl7qiC42PYVpflnpfuH8vxkaV8HTG3YfEppG6o9IiLGSCujiQRcADxg+ysNi1YC/SOCFgArGtqPK6OKZgMbS3fStcBcSRPLgeO5pS0iIsZIKxeqOxj4Q+BeSXeVts8DZwHLJS0EHgU+WpZdBRwO9ALPAccD2F4v6QzgtrLe6bbXt1BXREQ0acRhYPtfAA2xeM4g6xs4cYjXWgosHWktERHRmpyBHBERCYOIiEgYREQECYOIiCBhEBERJAwiIoLWzjOIiOgI3adeWXcJADxy1hHb7LWzZxAREQmDiIhIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIctJZxCt0wslFEYPJnkFERCQMIiIiYRARESQMIiKCNgoDSfMkPSipV9KpddcTEdFJ2iIMJI0DvgEcBswAjpU0o96qIiI6R1uEATAL6LW9xvYLwKXA/JpriojoGLJddw1IOgqYZ/uPy/wfAgfa/tSA9RYBi8rs3sCDY1roq00Cfl5zDe0in8Vm+Sw2y2exWbt8Fm+33TWw8TV10pntJcCSuuvoJ6nH9sy662gH+Sw2y2exWT6Lzdr9s2iXbqJ1wNSG+SmlLSIixkC7hMFtwHRJ0yTtABwDrKy5poiIjtEW3US2N0n6FHAtMA5Yavu+mssajrbpsmoD+Sw2y2exWT6Lzdr6s2iLA8gREVGvdukmioiIGiUMIiIiYRAREQmDGAFJXy7PR9ddS0S7kjRtOG3tIgeQmyTpYOCLwNupRmMJsO131FnXWJJ0L7AfcLvtA+qup11Imszm3wsAbN9YX0VjT9IbgP8KdPPKz+H0umqqi6Q7Bv7/kHS77ffWVdOWtMXQ0teYC4DPArcDL9VcS12uATYAu0h6uqG9Pxh3raes+pS9pY8B97P598JAR4UBsALYSPX/4/maa6mFpHcB+wC7SfpIw6JdgR3rqWrrsmfQJEm32D6w7jragaQVtnNBQUDSg8B+tjvyD2A/Satt71t3HXWSNB84EvgQrzx59hngUts/rqOurUkYNEnSWVQnxl1Bwzcf23fUVlTUTtLVwNG2f1l3LXWStAT4mu17666lbpIOsn1T3XUMV7qJmte/V9B4wSkDh9RQS60kPUP13gVsXx7PdmI3EfAccJek63jll4ST6itp7JTjSKb6m3K8pDVUn0N/1+F+ddZXkw9Lug/4FVXX6n7AZ21/t96yBpcwaJLt3627hnZh+03905JEdQ+K2fVVVKuVdPb1tD5YdwFtaK7tUyR9GHgE+AjVMaSEwWuZpP9m+7uSPjfYcttfGeua2omr/sb/J2kx0FG3LS136vtEJ39RsP0ogKTZwH22nynzuwLvBh6tsby6bF+ejwD+1vbG6jtTe0oYDN8by/ObtrhWBxkwUmI7qq6zX9dUTm1svyTpN5J2s72x7npqdh7QOJzyl4O0dYp/kPQTqm6i/yGpizb+/5EDyE2SNNX2YwPa3mr7ibpqqoukCxtmN1HtCn/L9pP1VFQfSSuA/YFVwLP97Z1yzKCfpLtsv2dA2z2deMygdJ1OBDaWLwxvBHax/bOaSxtU9gyat0bS5cBC28+VtqvowG8+to+vu4Y2ckV5dLo1kk6i2hsAOAFYU2M9dbrA9h81zAv4HjCnpnq2KGHQvNXAD4F/kXS07X+j+kfuOJJ2BBZSnWDz8sk0A/4DdATbyyTtBPyW7brvzV2n/w6cA3yBanTRdWy+b3mnWSfpXNsnSJoIXAl8q+6ihpJrEzXPts8FPk3VJ/j7VL/0neg7wFuBQ4F/prpd6TO1VlST8ntwF9UQQiS9R1LHjS6y/aTtY2y/xfYetj/eid2GALb/AvilpG8C3wf+2vaFW9msNjlm0CRJd9rev0zvCSwH3mt753orG3v9n0V/n7Ck7YEf2u644aWSbqc61+SGht+PjjkbV9Iptv+3pK8xyJejTjp2MmBghYC/AG6lfFGw3Zbdiekmat7h/RO2H5f0u8D7aqynTi+W56ck7Qs8Abylxnrq9OIgQwd/U1cxNXigPPfUWkV7+P0B83dSDTPt70VIGLxOrJf0cQZclZHOuyAZwJLSF/oFqhOudqH6FtSJ7iu/F+MkTQdOAtryGjTbgu1/KM/L+tskbUc1eubpITd8HbJ9fDn35CTbZ9ddz3DlmEHzVlCdabuJaghh/6NjSJoKYPt82xts32j7HbbfAqyruby6fJrqQPrzwCXA08DJdRZUB0nfk7RrGUa5Grhf0v+su66xZvsl4Ni662hGjhk0qZP6gYdSTqSZZ/uRAe3HA1+wvVcthUXt+s8zkPQHVMOtT6W670UnnmdwNlX30GW88tyTtryoZbqJmvdjSf+xw6/K+Dng+5KOsP0QgKTTgI8D/7nWymoi6XoGP3DaaRcw3L4MJDgS+LrtFyV16jfO95Tnxhv7tO1FLRMGzXs/8AlJD9OhV2W0fZWk54GrJR0J/DEwC/iA7Q21FlefP22Y3pHqbl+baqqlTt+kOhP9buBGSW+n6jLrOK+1a1Wlm6hJ5Zf7Vfov1NVJJP0n4O+pDpR+1HbbXnelDpJutT2r7jrGSjlgfJTt5Q1tAsbZ7rhglLQH8JfA22wfJmkGcJDtC2oubVA5gNyk8kd/KnBImX6ODvscJT1Tbnd5FdWt/OYATza0dxxJuzc8Jkk6FNit7rrGku3fAKcMaHMnBkHxbeBa4G1l/l9p40EF6SZqUrlE80xgb+BCqgNE3wUOrrOusdR4H4N42e1svtHPJuBhqkt1dJp/kvSnvPqg6fr6SqrNJNvLy/E0bG+S1Lb3TU8YNO/DVFenvAPA9r9Lyh/HDmd7Wt01tImPlecTG9oMvKOGWur2rKQ3UwYWlHs9tO0lzhMGzXvBtvtHSJTx1NHhJO1MNcrqt2wvKiee7W37H2subUwlFEHSyVTH0U6hOi/pHZJ+BHQBR9dY2hZ1VF/3KFku6f8CEyR9Evgn2vhKhDFmLgReYPOlSdYBX6qvnHpI2lnSFyQtKfPTJXXaLTGnAH9DdS2i7ajucXEp8D7bd9dY1xZlNNEISPovwFyq/uFrba+quaSomaQe2zMHXMjwbtu/XXdtY0nSZVTHT46zvW/ZY/rxwBvedAJJO1AdX3wfcFB5PGV7Rq2FDSHdRCNQ/vivkjQJ+EXd9URbeKHcz6C/+3AvqvNQOs1etj8m6VgA28+pnW/8u23tRDXabrfy+HegbU9WTRgMUzn4cxawHjiD6lr+k4DtJB1n+5o664vafZGqW2CqpIupRpd14p3gOj4USxfZPlT39riF6vjBV9r9hMx0Ew2TpB7g81QJvwQ4zPbNkt4FXNLfNRCdq4wcmU3VfXiz7Z/XXNKYkfQNqgv07Qz8OTCD6oYuBwOfsH1DfdWNLUnXUH1RXE0VBDcBq93mf2wTBsPUeKNvSQ/YfnfDsjsTBp1N0nW252yt7fVK0meAY4A9qQ6Y/pRq+PUtnRSK/UrX2D5UxwveB+xL1atwk+3FddY2lIwmGr7GG5X8asCyJGqHkrSjpN2BSZImNpyF3A1Mrrm8MWP7q7YPorpQYS/wEeCvgRMk/Ydai6tBOfN6NdVZ+lcDPwL2Aj5Ta2FbkD2DYSpnDj5L1QWwE9VlKCjzO9revq7aoj7lG/HJVJccaLyXwzPAt2x/vY662oGk/YGlwH62x9Vdz1iRdBKb9whepOoq6n/cWy7b0XZyAHmYOumXOZryY6r7YB9l+2uSFlBdsfQR4Ht1FlYHSeOBw6i6jOYAN1AdXO8k3cDfAp+1/XjNtQxb9gwiWiDpDuD3bK+X9AGqk4s+TXUt+3fbPqrO+sZKOffmWKp7hN9K9TmssN1RdwF8LUsYRLSg8cSyMqKmz/YXy/zLgw5e7yT9gGpP6O/afQhlDC7dRBGtGSdpfLlM8xxgUcOyjvn/1YF3dHvd6Zhf1oht5BLgnyX9nGqU2Q8BJL2TNr5CZcRA6SaKaFE5O31P4Pv9feRlOOUu7Xrz84iBEgYREZGTziIiImEQERHkAHJE08oF6a4rs28FXgL6yvws2y/UUlhEC3LMIKIFkr4I/NL2X9VdS0Qr0k0U0bqdJD0saXsASbv2z0u6QdJXJd0labWkWXUXGzGYhEFE635FdQ2eI8r8McAVtl8s8zuXM5FPoLpwW0TbSRhEjI7z2Xxns+OBCxuWXQJg+0ZgV0kTxra0iK1LGESMAts/Arol/Q4wrlzL/uXFA1cfq7oihithEDF6LqK6WNuFA9o/BiDp/cBG27lMRbSdhEHE6LkYmEjpFmrwa0l3At8EFo55VRHDkPMMIlrQf7nq4v3A5bafGrDad22fPFY1RYxEwiBiFEj6GtUdvg6vu5aIkchJZxERkWMGERGRMIiICBIGERFBwiAiIkgYREQE8P8B7DKfFdqTgKsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "d.groupby(\"Typ\")[\"Preis\"].sum().plot(kind=\"bar\");\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "7d3bdb98-87cb-4990-9038-1f26a9bc49f9", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Pie-Charts" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "c30096be-6783-4f26-aa70-1bcf6e4c9d2c", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "d.groupby(\"Typ\")[\"Preis\"].sum().plot(kind=\"pie\");\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyter_book/Makefile b/jupyter_book/Makefile new file mode 100644 index 0000000..2674dfc --- /dev/null +++ b/jupyter_book/Makefile @@ -0,0 +1,14 @@ +all: html pdf + +# Mit Copy&Paste eingefügte Bilder werden im Notebook BASE64 kodiert gespeichert. +# Für LaTeX müssen diese als Bild-Datei vorliegen was jupyter-book nicht kann +# -> selber die Bilder extrahieren +preprocess: + ./extract_attachments.sh + +html: preprocess + jupyter-book build . +pdf: preprocess + jupyter-book build --builder pdflatex . +clean: + rm -rf _build diff --git a/jupyter_book/_config.yml b/jupyter_book/_config.yml new file mode 100644 index 0000000..8b94ade --- /dev/null +++ b/jupyter_book/_config.yml @@ -0,0 +1,22 @@ +# See https://jupyterbook.org/customize/config.html + +title: Data Science mit Python +author: MARS – Center for Entrepreneurship +email: c.giess@hs-mannheim.de +copyright: "2022" +logo: logo_mars.png + +only_build_toc_files: true + +execute: + execute_notebooks: force + +latex: + latex_documents: + targetname: python_data_science.tex + +sphinx: + extra_extensions: + - sphinx_jupyterbook_latex + config: + language: German diff --git a/jupyter_book/_toc.yml b/jupyter_book/_toc.yml new file mode 100644 index 0000000..4347562 --- /dev/null +++ b/jupyter_book/_toc.yml @@ -0,0 +1,8 @@ +format: jb-book +root: intro +chapters: +- file: 01_wiederholung +- file: 02_mehr_zu_funktionen +- file: 03_matplotlib +- file: 04_pandas +- file: 05_pandas_visu.ipynb diff --git a/jupyter_book/car.csv b/jupyter_book/car.csv new file mode 100644 index 0000000..be44d94 --- /dev/null +++ b/jupyter_book/car.csv @@ -0,0 +1,233 @@ +Datum Typ Beschreibung Preis km Liter +2012-07-07 Kauf Autohaus 13800.00 30 +2012-07-10 Benzin ESSO 57.01 199 34.89 +2012-07-11 Versich Haftpfl. 104.30 400 +2012-07-23 Benzin Kaufland 55.03 828 34.20 +2012-08-10 Benzin Kaufland 56.72 1444 35.47 +2012-08-23 Steuern Kfz-Steuer 50.00 1500 +2012-09-10 Benzin AVIA 60.80 2061 36.87 +2012-09-14 Benzin OMV 61.10 2710 36.83 +2012-09-19 Benzin Tango 56.85 3328 32.03 +2012-09-21 Benzin ARAL 52.30 3837 30.78 +2012-10-06 Benzin Kaufland 52.01 4396 31.73 +2012-10-23 Benzin ARAL 53.20 4996 33.27 +2012-11-05 Benzin Kaufland 52.12 5571 33.65 +2012-11-12 Benzin Unbekannt 41.72 6070 29.44 +2012-11-17 Benzin AVIA 54.39 6629 33.49 +2012-11-27 Benzin Kaufland 51.65 7219 33.78 +2012-12-14 Kauf Winterreifen 496.68 7300 +2012-12-21 Benzin JET 44.06 7712 28.08 +2012-12-21 Benzin BP 46.05 8182 29.73 +2012-12-29 Benzin SHELL 47.35 8656 30.18 +2012-12-29 Benzin ESSO 47.13 9187 31.65 +2013-01-01 Versich Haftpfl. 374.26 9600 +2013-01-22 Benzin Kaufland 52.00 9752 34.70 +2013-02-12 Benzin ARAL 52.01 10317 32.73 +2013-02-28 Benzin ESSO 52.05 10894 34.04 +2013-03-21 Benzin ESSO 52.10 11436 33.85 +2013-04-05 Benzin ARAL 51.10 11888 30.99 +2013-04-06 Benzin Total 47.80 12398 30.86 +2013-03-20 Benzin ESSO 38.90 12837 25.95 +2013-06-02 Benzin AVIA 49.16 13394 31.33 +2013-06-09 Benzin ESSO 52.70 14005 32.96 +2013-07-08 Steuern Kfz-Steuer 50.00 15300 +2013-07-22 Benzin ESSO 35.20 14730 22.58 +2013-07-23 Benzin ESSO 57.06 15262 35.03 +2013-09-08 Benzin ESSO 51.61 15833 31.30 +2013-09-17 Benzin Kaufland 50.02 16387 32.93 +2013-10-21 Benzin JET 49.71 16949 34.07 +2013-11-08 Benzin ESSO 47.50 17501 33.01 +2013-11-13 Benzin Star 47.80 18056 32.76 +2013-11-17 Benzin ARAL 49.30 18644 31.62 +2013-11-25 Benzin ESSO 47.89 19158 33.28 +2013-12-21 Benzin ARAL 29.95 19502 19.21 +2013-12-23 Benzin OMV 34.28 19838 21.99 +2013-12-26 Benzin ARAL 36.81 20204 23.46 +2013-12-28 Benzin Kaufland 52.00 20706 34.92 +2014-01-02 Versich Haftpfl. 366.76 21000 +2014-01-28 Benzin ESSO 48.20 21264 33.97 +2014-03-05 Benzin ESSO 51.33 21828 34.24 +2014-03-22 Benzin ESSO 51.79 22407 33.65 +2014-05-02 Benzin Globus 47.31 22956 31.35 +2014-05-11 Benzin ESSO 48.14 23530 31.69 +2014-05-15 Benzin ESSO 50.08 24150 33.63 +2014-06-23 Benzin KK 50.00 24777 33.58 +2014-07-08 Steuern Kfz-Steuer 50.00 25000 +2014-07-11 Benzin Globus 45.46 25297 30.53 +2014-08-09 Benzin Globus 48.25 25911 33.53 +2014-08-24 Benzin Tankcenter 47.01 26435 31.36 +2014-09-13 Benzin AVIA 54.25 27041 35.48 +2014-10-06 Benzin ESSO 40.02 27502 27.43 +2014-10-08 Benzin Avanti 40.40 27998 28.88 +2014-10-22 Benzin ESSO 47.01 28594 33.60 +2014-11-11 Benzin BFT 47.00 29169 33.36 +2014-11-13 Benzin Total 49.15 29689 33.12 +2014-11-18 Benzin UNO-X 43.36 30171 27.70 +2014-11-22 Benzin ARAL 45.77 30716 31.16 +2014-11-23 Benzin ARAL 38.76 31135 26.03 +2014-11-29 Benzin SHELL 45.39 31651 33.65 +2014-12-17 Benzin ESSO 39.81 32146 31.87 +2014-12-25 Benzin ARAL 40.80 32637 31.41 +2014-12-27 Benzin ESSO 41.00 33212 32.83 +2015-01-02 Versich Haftpfl. 392.05 33500 +2015-01-26 Benzin Kaufland 40.20 33735 31.43 +2015-02-22 Benzin Globus 41.66 34242 33.09 +2015-03-15 Benzin ESSO 45.02 34846 31.95 +2015-04-19 Benzin Globus 49.37 35439 35.54 +2015-06-30 Werkst Autohaus 450.44 35900 +2015-07-06 Steuern Kfz-Steuer 50.00 35910 +2015-07-13 Benzin Kaufland 48.35 35983 32.04 +2015-08-15 Benzin Real 39.99 36497 29.00 +2015-09-08 Benzin Kaufland 43.72 37110 33.66 +2015-09-22 Werkst Autohaus 5.59 37200 +2015-11-02 Werkst ATU 17.60 37400 +2015-11-26 Benzin JET 39.26 37650 31.43 +2015-12-21 Benzin JET 34.56 38111 29.07 +2016-01-02 Versich Haftpfl. 240.37 38500 +2016-01-20 Benzin ARAL 40.28 38650 33.32 +2016-01-25 Benzin Kaufland 37.22 39200 31.04 +2016-02-01 Benzin JET 38.73 39700 33.42 +2016-03-07 Benzin AVIA 39.20 40233 33.25 +2016-03-19 Benzin JET 34.40 40800 30.20 +2016-03-20 Benzin Total 41.50 41331 33.77 +2016-03-23 Benzin ESSO 40.80 41915 35.51 +2016-04-21 Benzin ESSO 39.00 42490 33.08 +2016-05-02 Benzin Total 45.50 43109 33.73 +2016-05-02 Benzin ESSO 41.30 43651 32.29 +2016-05-16 Benzin Globus 40.60 44186 34.12 +2016-06-10 Benzin Unbekannt 0.00 44736 33.00 +2016-06-18 Benzin BFT 41.66 45293 33.32 +2016-07-06 Steuern Kfz-Steuer 50.00 45600 +2016-07-07 Benzin SHELL 36.54 45679 29.49 +2016-07-08 Benzin Kaufland 25.54 45911 20.13 +2016-07-09 Benzin SHELL 38.83 46302 29.89 +2016-07-09 Benzin ESSO 36.92 46629 27.37 +2016-08-02 Benzin SHELL 35.61 47033 29.21 +2016-08-10 Benzin Tankcenter 40.69 47633 33.16 +2016-08-23 Benzin Globus 38.06 48214 33.16 +2016-09-15 Benzin Kaufland 48.51 48872 37.93 +2016-09-17 Benzin Kaufland 34.00 49323 25.78 +2016-09-18 Benzin AGIP 32.80 49714 24.87 +2016-09-25 Benzin Avanti 23.70 50017 20.45 +2016-10-01 Benzin JET 42.00 50609 32.33 +2016-10-26 Benzin Oil 39.91 51173 31.70 +2016-11-14 Benzin JET 39.19 51713 32.15 +2016-12-09 Benzin KK 44.18 52308 35.09 +2017-01-02 Versich Haftpfl. 227.29 52850 +2017-01-02 Benzin SHELL 46.40 52866 32.47 +2017-01-20 Benzin KK 43.46 53414 33.98 +2017-02-03 Benzin Unbekannt 42.40 53984 32.64 +2017-02-18 Benzin JET 46.80 54608 33.45 +2017-03-08 Benzin SHELL 41.85 55189 32.22 +2017-03-18 Benzin Kaufland 34.12 55635 26.47 +2017-04-11 Benzin Oil 43.45 56199 32.94 +2017-04-18 Benzin SHELL 38.00 56681 28.59 +2017-04-24 Benzin Globus 43.51 57228 34.29 +2017-05-30 Benzin Real 46.78 57868 36.01 +2017-06-13 Werkst Autohaus 272.26 58100 +2017-06-17 Benzin AVIA 41.90 58490 32.01 +2017-07-06 Steuern Kfz-Steuer 50.00 59000 +2017-07-08 Benzin ARAL 46.36 59098 33.38 +2017-07-24 Benzin SHELL 47.17 59700 31.47 +2017-07-28 Benzin ESSO 29.40 60072 20.29 +2017-08-08 Benzin Oil 44.11 60692 34.32 +2017-08-21 Benzin Kaufland 44.01 61280 34.14 +2017-08-31 Benzin Tankcenter 19.11 61564 14.62 +2017-09-01 Benzin SHELL 34.55 62009 25.42 +2017-09-05 Benzin ESSO 38.00 62452 28.17 +2017-09-23 Benzin Tankcenter 44.50 63010 34.31 +2017-10-21 Benzin Kaufland 45.40 63652 34.42 +2017-11-08 Benzin Real 43.94 64305 33.57 +2017-11-16 Benzin Real 46.12 64905 34.77 +2017-11-25 Benzin Autohof 45.30 65480 32.85 +2017-12-14 Benzin Real 45.32 66071 34.62 +2017-12-24 Benzin AVIA 27.50 66460 20.54 +2017-12-17 Benzin Star 33.00 66873 24.83 +2018-01-02 Versich Haftpfl. 234.86 66350 +2018-01-08 Benzin JET 43.71 67478 33.39 +2018-01-22 Benzin JET 43.05 68022 32.89 +2018-02-13 Benzin ARAL 43.30 68571 32.34 +2018-03-08 Benzin ARAL 46.00 69131 33.60 +2018-03-20 Benzin Globus 45.00 69699 33.61 +2018-04-16 Benzin JET 47.22 70312 35.53 +2018-04-28 Benzin AVIA 41.21 70860 30.10 +2018-05-14 Benzin JET 44.30 71426 31.22 +2018-05-26 Benzin AVIA 39.85 71928 27.31 +2018-06-19 Benzin Oil 47.83 72577 33.71 +2018-06-27 Benzin SHELL 14.90 72745 10.14 +2018-06-29 Benzin KK 45.70 73281 32.90 +2018-07-06 Steuern Kfz-Steuer 50.00 73880 +2018-07-28 Benzin JET 46.60 73898 33.07 +2018-08-21 Benzin JET 46.67 74469 32.43 +2018-09-10 Benzin Globus 45.67 75018 30.67 +2018-09-27 Benzin Unbekannt 39.01 75486 26.92 +2018-10-10 Benzin AVIA 55.00 76069 35.74 +2018-11-03 Benzin JET 51.52 76682 33.26 +2018-11-21 Benzin Total 52.30 77227 34.43 +2018-11-30 Benzin Globus 48.82 77793 32.14 +2018-12-19 Benzin Globus 50.24 78378 34.67 +2019-01-02 Versich Haftpfl. 221.26 78700 +2019-01-10 Benzin HEM 27.70 78758 21.49 +2019-01-22 Benzin JET 44.80 79209 35.03 +2019-01-31 Benzin JET 32.80 79664 25.25 +2019-02-16 Benzin AVIA 36.90 80141 28.19 +2019-03-01 Benzin JET 44.61 80700 34.34 +2019-03-16 Benzin Tankcenter 39.65 81255 31.05 +2019-03-23 Benzin Tankcenter 40.87 81763 30.57 +2019-03-24 Benzin Bavaria 44.00 82273 33.61 +2019-03-28 Benzin Total 46.80 82926 34.44 +2019-04-14 Benzin Globus 47.37 83504 33.62 +2019-05-17 Benzin ESSO 49.30 84043 33.56 +2019-06-13 Benzin Unbekannt 49.18 84644 33.48 +2019-06-26 Benzin Globus 37.80 85139 27.61 +2019-07-01 Benzin ARAL 48.68 85773 33.54 +2019-07-06 Steuern Kfz-Steuer 50.00 86000 +2019-07-17 Benzin Oil 43.68 86317 31.91 +2019-08-15 Benzin JET 27.60 86644 20.16 +2019-08-17 Benzin ESSO 44.25 87131 31.63 +2019-08-18 Benzin SHELL 29.50 87441 21.24 +2019-09-11 Benzin Tankcenter 46.94 87997 33.84 +2019-09-29 Benzin JET 46.70 88606 34.36 +2019-10-14 Benzin ARAL 37.70 89138 27.54 +2019-10-23 Benzin ARAL 39.31 89682 29.58 +2019-10-31 Werkst Glühlampe 9.99 90000 +2019-11-11 Benzin ARAL 43.70 90305 32.88 +2019-11-16 Benzin SHELL 40.21 90789 28.74 +2019-11-17 Benzin Star 19.00 91018 13.68 +2019-11-19 Benzin JET 43.50 91553 32.98 +2019-12-05 Benzin Oil 42.73 92128 32.15 +2019-12-22 Benzin Elf 45.00 92653 32.87 +2020-01-02 Versich Haftpfl. 220.76 93000 +2020-01-17 Benzin Globus 42.90 93171 32.04 +2020-01-24 Benzin Kaufland 40.95 93726 30.58 +2020-02-12 Benzin Tankcenter 43.99 94283 33.66 +2020-03-05 Benzin SHELL 43.00 94838 33.62 +2020-04-25 Benzin Kaufland 38.80 95413 35.63 +2020-05-29 Benzin Kaufland 28.50 95907 25.93 +2020-06-26 Benzin Total 34.25 96456 27.87 +2020-07-06 Steuern Kfz-Steuer 50.00 96500 +2020-07-31 Benzin Total 33.20 96965 28.40 +2020-08-31 Benzin Globus 39.75 97514 33.15 +2020-09-16 Benzin Globus 36.21 98064 29.46 +2020-09-22 Werkst ATU 34.30 98230 +2020-10-14 Werkst ATU 90.38 98445 +2020-10-18 Benzin Tankcenter 42.66 98552 32.64 +2020-11-30 Benzin Total 42.10 99114 34.82 +2020-12-16 Benzin Total 37.65 99678 32.77 +2020-12-30 Benzin AVIA 32.90 100125 25.52 +2021-01-04 Versich Haftpfl. 214.11 100300 +2021-02-27 Benzin Kaufland 47.71 100641 33.86 +2021-04-13 Benzin Tankcenter 48.01 101250 33.88 +2021-05-04 Benzin Total 51.00 101854 30.25 +2021-05-25 Benzin JET 49.61 102414 33.32 +2021-06-14 Werkst Scheibenwischer 31.98 102600 +2021-06-16 Werkst Sommerreifen 270.00 102600 +2021-06-19 Benzin AVIA 40.01 102876 27.05 +2021-07-06 Steuern Kfz-Steuer 50.00 103000 +2021-07-21 Benzin Kaufland 46.80 103443 30.00 +2021-08-12 Benzin Globus 50.96 104026 32.07 +2021-08-28 Benzin AVIA 47.10 104552 29.27 +2021-10-09 Benzin Kaufland 55.10 105147 33.97 +2021-10-19 Benzin JET 40.10 105623 24.32 +2021-12-04 Benzin JET 53.30 106186 34.19 +2021-12-25 Benzin AVIA 51.42 106727 31.96 diff --git a/jupyter_book/extract_attachments.sh b/jupyter_book/extract_attachments.sh new file mode 100755 index 0000000..13c6e1f --- /dev/null +++ b/jupyter_book/extract_attachments.sh @@ -0,0 +1,12 @@ +# Extrahiert aus Python-Notebooks alle mittels Drag&Drop inkludierten Bilder. +awk ' +/"attachments"/ { + getline; + gsub(/[":]/,"",$1) + file=$1; + getline; + gsub(/^[^:]*: "/, "") + sub(/"/, "") + system("echo " $0 " | base64 -d > _build/latex/attachment:"file) +}' *.ipynb + diff --git a/jupyter_book/intro.md b/jupyter_book/intro.md new file mode 100644 index 0000000..747b1a8 --- /dev/null +++ b/jupyter_book/intro.md @@ -0,0 +1,23 @@ +# Digital Basics: Data Science mit Python + +Dieser Kurs richtet sich an +* alle, die an „Digital Basics: Einführung in die Programmierung mit Python“ teilgenommen haben +* sowie Personen, die ein bisschen Python programmieren können +und jetzt lernen möchten, wie man Daten mit Hilfe von Python auswerten kann. + + +# Kurzvorstellung + +* Wer bin ich? +* Wer seid ihr und was wollt ihr am Ende des Kurses machen? + + +# Los gehts + +* Jupyter + * https://jupyter.org/try-jupyter/lab/ + * Notebook Pyolite + * Python im Webbrowser + * Input-Zeile bedienen + * Python Programm (Code) und Dokumentation ([Markdown](https://de.wikipedia.org/wiki/Markdown)) + diff --git a/jupyter_book/jupyter_help.png b/jupyter_book/jupyter_help.png new file mode 100644 index 0000000..ea53977 Binary files /dev/null and b/jupyter_book/jupyter_help.png differ diff --git a/jupyter_book/logo_mars.png b/jupyter_book/logo_mars.png new file mode 100644 index 0000000..eeba023 Binary files /dev/null and b/jupyter_book/logo_mars.png differ diff --git a/jupyter_book/requirements.txt b/jupyter_book/requirements.txt new file mode 100644 index 0000000..7e821e4 --- /dev/null +++ b/jupyter_book/requirements.txt @@ -0,0 +1,3 @@ +jupyter-book +matplotlib +numpy diff --git a/jupyter_book/upload_file.png b/jupyter_book/upload_file.png new file mode 100644 index 0000000..2058464 Binary files /dev/null and b/jupyter_book/upload_file.png differ diff --git a/python_data_science.pdf b/python_data_science.pdf new file mode 100644 index 0000000..50a1591 Binary files /dev/null and b/python_data_science.pdf differ