From 327cf9209c903fb818f6518c1e45594c286fec2b Mon Sep 17 00:00:00 2001 From: Alejandro Restrepo Klinge <56825548+mrrestre@users.noreply.github.com> Date: Mon, 22 Apr 2024 21:45:37 +0200 Subject: [PATCH] lsat changes to notebook --- README.md | 4 - Titanic.ipynb | 3197 +++++++++++-------------------------------------- 2 files changed, 683 insertions(+), 2518 deletions(-) diff --git a/README.md b/README.md index 596ef15..27db12a 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,6 @@ Optional (!): Wenn sie möchten, können Sie gerne ein entsprechendes Video Tuto Bewertet werden vor allem Kreativität der Aufgabe, Sorgfalt und didaktischer Aufbau bzw. Nachvollziehbarkeit. -## Quellen - -https://www.kaggle.com/datasets/sakshisatre/titanic-dataset/data -https://chat.openai.com/ ## Commands init db in docker (Mit beigefügte container nicht notwendig) diff --git a/Titanic.ipynb b/Titanic.ipynb index 0f8463c..e5fb8fe 100644 --- a/Titanic.ipynb +++ b/Titanic.ipynb @@ -1,5 +1,31 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "f0ad356c-2f8d-4aea-968f-ebcdb9c8e857", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "# Exploring Titanic Dataset: Analyzing Survival Factors\n", + "This Jupyter notebook looks into a dataset about the Titanic. The goal is to uncover insights into the factors that influenced survival aboard the Titanic. The dataset comprises passenger information such as age, gender, ticket class, fare, and more, allowing us to investigate correlations and patterns related to survival outcomes.\n", + "\n", + "## Data Preprocessing\n", + "Before embarking on our analysis, it's crucial to understand the journey the dataset has undergone. Initially, the raw data was subjected to a series of Python scripts for preprocessing. These scripts handled tasks such as handling missing ids, separation of variables (Salutation, first name, last name), and ensuring data integrity.\n", + "\n", + "Following preprocessing, the dataset was loaded into a MySQL Database for efficient storage and retrieval. \n", + "\n", + "## Objective\n", + "Our primary goal is to discern whether certain variables played a significant role in determining the survival of passengers aboard the Titanic. By analyzing features like age, gender, ticket class, and familial relationships, we aim to unravel potential correlations and uncover underlying trends that influenced survival rates.\n", + "\n", + "## Sources\n", + "- [Kaggle - Titanic Data Set](https://www.kaggle.com/datasets/sakshisatre/titanic-dataset/data)\n", + "- [ChatGPT](https://chat.openai.com/)\n", + "- [IBM - Logistic Regression](https://www.ibm.com/topics/logistic-regression)\n", + "\n", + "-----" + ] + }, { "cell_type": "markdown", "id": "98d38553-0bf7-4aaf-a052-8aa6b84a620a", @@ -187,7 +213,9 @@ { "cell_type": "markdown", "id": "f5812623-59a6-4bd3-ac39-09daed7cd2f0", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## Test Connection" ] @@ -618,7 +646,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9b2a7638b3de4ab5a7714620ac0e560f", + "model_id": "963786f49b904d95b40120ed1b3ecdf7", "version_major": 2, "version_minor": 0 }, @@ -632,7 +660,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ff14cd91ccaf406dbba38e9cd25f47d2", + "model_id": "576251ccc44c47fb8b114da477d2578d", "version_major": 2, "version_minor": 0 }, @@ -646,7 +674,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "664a9698faea40ae89dcc6293d53b4dc", + "model_id": "d025ad0b80144eb0b32dddf5096c0488", "version_major": 2, "version_minor": 0 }, @@ -660,7 +688,7 @@ { "data": { "text/html": [ - "