{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pandas\n",
" Using cached pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n",
"Requirement already satisfied: numpy>=1.26.0 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from pandas) (2.2.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from pandas) (2.9.0.post0)\n",
"Collecting pytz>=2020.1 (from pandas)\n",
" Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n",
"Collecting tzdata>=2022.7 (from pandas)\n",
" Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)\n",
"Requirement already satisfied: six>=1.5 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Using cached pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n",
"Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)\n",
"Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)\n",
"Installing collected packages: pytz, tzdata, pandas\n",
"Successfully installed pandas-2.2.3 pytz-2025.2 tzdata-2025.2\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install pandas"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"loans = pandas.read_csv('loan_historical_data.csv', sep=\";\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Client | \n",
" Income | \n",
" Credit | \n",
" Gender | \n",
" Unemployed | \n",
" Safe | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" K1 | \n",
" High | \n",
" Excellent | \n",
" Female | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 1 | \n",
" K2 | \n",
" High | \n",
" Excellent | \n",
" Man | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 2 | \n",
" K3 | \n",
" Low | \n",
" Poor | \n",
" Man | \n",
" No | \n",
" No | \n",
"
\n",
" \n",
" | 3 | \n",
" K4 | \n",
" Low | \n",
" Excellent | \n",
" Female | \n",
" Yes | \n",
" Yes | \n",
"
\n",
" \n",
" | 4 | \n",
" K5 | \n",
" Low | \n",
" Excellent | \n",
" Man | \n",
" Yes | \n",
" Yes | \n",
"
\n",
" \n",
" | 5 | \n",
" K6 | \n",
" Low | \n",
" Poor | \n",
" Female | \n",
" Yes | \n",
" No | \n",
"
\n",
" \n",
" | 6 | \n",
" K7 | \n",
" High | \n",
" Poor | \n",
" Man | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 7 | \n",
" K8 | \n",
" High | \n",
" Poor | \n",
" Female | \n",
" Yes | \n",
" Yes | \n",
"
\n",
" \n",
" | 8 | \n",
" K9 | \n",
" Low | \n",
" Fair | \n",
" Man | \n",
" Yes | \n",
" No | \n",
"
\n",
" \n",
" | 9 | \n",
" K10 | \n",
" High | \n",
" Fair | \n",
" Female | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 10 | \n",
" K11 | \n",
" Low | \n",
" Fair | \n",
" Female | \n",
" Yes | \n",
" No | \n",
"
\n",
" \n",
" | 11 | \n",
" K12 | \n",
" Low | \n",
" Fair | \n",
" Man | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Client Income Credit Gender Unemployed Safe\n",
"0 K1 High Excellent Female No Yes\n",
"1 K2 High Excellent Man No Yes\n",
"2 K3 Low Poor Man No No\n",
"3 K4 Low Excellent Female Yes Yes\n",
"4 K5 Low Excellent Man Yes Yes\n",
"5 K6 Low Poor Female Yes No\n",
"6 K7 High Poor Man No Yes\n",
"7 K8 High Poor Female Yes Yes\n",
"8 K9 Low Fair Man Yes No\n",
"9 K10 High Fair Female No Yes\n",
"10 K11 Low Fair Female Yes No\n",
"11 K12 Low Fair Man No Yes"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loans"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting scikit-learn\n",
" Downloading scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
"Requirement already satisfied: numpy>=1.19.5 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from scikit-learn) (2.2.4)\n",
"Requirement already satisfied: scipy>=1.6.0 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from scikit-learn) (1.15.2)\n",
"Collecting joblib>=1.2.0 (from scikit-learn)\n",
" Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)\n",
"Collecting threadpoolctl>=3.1.0 (from scikit-learn)\n",
" Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)\n",
"Downloading scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.2 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.2/13.2 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hDownloading joblib-1.4.2-py3-none-any.whl (301 kB)\n",
"Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)\n",
"Installing collected packages: threadpoolctl, joblib, scikit-learn\n",
"Successfully installed joblib-1.4.2 scikit-learn-1.6.1 threadpoolctl-3.6.0\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install scikit-learn"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier\n",
"from sklearn.model_selection import train_test_split # Import train_test_split function\n",
"from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"arr1 = [1,1,2,2,2,4,4,2,3]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{1, 2, 3, 4}"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"set(arr1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1, 2, 3, 4]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(set(arr1))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"arr2 = [\"High\", \"Low\", \"Low\", \"Low\", \"High\"]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['High', 'Low']"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(set(arr2))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"def cat2int(col):\n",
" vals = list(set(col))\n",
" for i, string in enumerate(col):\n",
" col[i] = vals.index(string)\n",
" return col"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 1\n",
"2 1\n",
"3 0\n",
"4 1\n",
"5 0\n",
"6 1\n",
"7 0\n",
"8 1\n",
"9 0\n",
"10 0\n",
"11 1\n",
"Name: Gender, dtype: object"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat2int(loans[\"Gender\"])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Client | \n",
" Income | \n",
" Credit | \n",
" Gender | \n",
" Unemployed | \n",
" Safe | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" K1 | \n",
" High | \n",
" Excellent | \n",
" 0 | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 1 | \n",
" K2 | \n",
" High | \n",
" Excellent | \n",
" 1 | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 2 | \n",
" K3 | \n",
" Low | \n",
" Poor | \n",
" 1 | \n",
" No | \n",
" No | \n",
"
\n",
" \n",
" | 3 | \n",
" K4 | \n",
" Low | \n",
" Excellent | \n",
" 0 | \n",
" Yes | \n",
" Yes | \n",
"
\n",
" \n",
" | 4 | \n",
" K5 | \n",
" Low | \n",
" Excellent | \n",
" 1 | \n",
" Yes | \n",
" Yes | \n",
"
\n",
" \n",
" | 5 | \n",
" K6 | \n",
" Low | \n",
" Poor | \n",
" 0 | \n",
" Yes | \n",
" No | \n",
"
\n",
" \n",
" | 6 | \n",
" K7 | \n",
" High | \n",
" Poor | \n",
" 1 | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 7 | \n",
" K8 | \n",
" High | \n",
" Poor | \n",
" 0 | \n",
" Yes | \n",
" Yes | \n",
"
\n",
" \n",
" | 8 | \n",
" K9 | \n",
" Low | \n",
" Fair | \n",
" 1 | \n",
" Yes | \n",
" No | \n",
"
\n",
" \n",
" | 9 | \n",
" K10 | \n",
" High | \n",
" Fair | \n",
" 0 | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
" | 10 | \n",
" K11 | \n",
" Low | \n",
" Fair | \n",
" 0 | \n",
" Yes | \n",
" No | \n",
"
\n",
" \n",
" | 11 | \n",
" K12 | \n",
" Low | \n",
" Fair | \n",
" 1 | \n",
" No | \n",
" Yes | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Client Income Credit Gender Unemployed Safe\n",
"0 K1 High Excellent 0 No Yes\n",
"1 K2 High Excellent 1 No Yes\n",
"2 K3 Low Poor 1 No No\n",
"3 K4 Low Excellent 0 Yes Yes\n",
"4 K5 Low Excellent 1 Yes Yes\n",
"5 K6 Low Poor 0 Yes No\n",
"6 K7 High Poor 1 No Yes\n",
"7 K8 High Poor 0 Yes Yes\n",
"8 K9 Low Fair 1 Yes No\n",
"9 K10 High Fair 0 No Yes\n",
"10 K11 Low Fair 0 Yes No\n",
"11 K12 Low Fair 1 No Yes"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loans"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 0\n",
"2 0\n",
"3 1\n",
"4 1\n",
"5 1\n",
"6 0\n",
"7 1\n",
"8 1\n",
"9 0\n",
"10 1\n",
"11 0\n",
"Name: Unemployed, dtype: object"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat2int(loans[\"Income\"])\n",
"cat2int(loans[\"Credit\"])\n",
"cat2int(loans[\"Unemployed\"])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Client | \n",
" Income | \n",
" Credit | \n",
" Gender | \n",
" Unemployed | \n",
" Safe | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" K1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" Yes | \n",
"
\n",
" \n",
" | 1 | \n",
" K2 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" Yes | \n",
"
\n",
" \n",
" | 2 | \n",
" K3 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" | 3 | \n",
" K4 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" Yes | \n",
"
\n",
" \n",
" | 4 | \n",
" K5 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" Yes | \n",
"
\n",
" \n",
" | 5 | \n",
" K6 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
" | 6 | \n",
" K7 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" Yes | \n",
"
\n",
" \n",
" | 7 | \n",
" K8 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" Yes | \n",
"
\n",
" \n",
" | 8 | \n",
" K9 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
" | 9 | \n",
" K10 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" Yes | \n",
"
\n",
" \n",
" | 10 | \n",
" K11 | \n",
" 1 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
" | 11 | \n",
" K12 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" Yes | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Client Income Credit Gender Unemployed Safe\n",
"0 K1 0 0 0 0 Yes\n",
"1 K2 0 0 1 0 Yes\n",
"2 K3 1 1 1 0 No\n",
"3 K4 1 0 0 1 Yes\n",
"4 K5 1 0 1 1 Yes\n",
"5 K6 1 1 0 1 No\n",
"6 K7 0 1 1 0 Yes\n",
"7 K8 0 1 0 1 Yes\n",
"8 K9 1 2 1 1 No\n",
"9 K10 0 2 0 0 Yes\n",
"10 K11 1 2 0 1 No\n",
"11 K12 1 2 1 0 Yes"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loans"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"#split dataset in features and target variable\n",
"\n",
"X = loans[['Income','Credit','Gender','Unemployed']] # Features\n",
"y = loans.Safe # Target variable"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Income | \n",
" Credit | \n",
" Gender | \n",
" Unemployed | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 5 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 6 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 7 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 8 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 9 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 10 | \n",
" 1 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 11 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Income Credit Gender Unemployed\n",
"0 0 0 0 0\n",
"1 0 0 1 0\n",
"2 1 1 1 0\n",
"3 1 0 0 1\n",
"4 1 0 1 1\n",
"5 1 1 0 1\n",
"6 0 1 1 0\n",
"7 0 1 0 1\n",
"8 1 2 1 1\n",
"9 0 2 0 0\n",
"10 1 2 0 1\n",
"11 1 2 1 0"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Yes\n",
"1 Yes\n",
"2 No\n",
"3 Yes\n",
"4 Yes\n",
"5 No\n",
"6 Yes\n",
"7 Yes\n",
"8 No\n",
"9 Yes\n",
"10 No\n",
"11 Yes\n",
"Name: Safe, dtype: object"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=15) # 70% training and 30% test"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Income | \n",
" Credit | \n",
" Gender | \n",
" Unemployed | \n",
"
\n",
" \n",
" \n",
" \n",
" | 9 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 7 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 10 | \n",
" 1 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 5 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 8 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Income Credit Gender Unemployed\n",
"9 0 2 0 0\n",
"3 1 0 0 1\n",
"4 1 0 1 1\n",
"0 0 0 0 0\n",
"7 0 1 0 1\n",
"10 1 2 0 1\n",
"5 1 1 0 1\n",
"8 1 2 1 1"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9 Yes\n",
"3 Yes\n",
"4 Yes\n",
"0 Yes\n",
"7 Yes\n",
"10 No\n",
"5 No\n",
"8 No\n",
"Name: Safe, dtype: object"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_train"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Income | \n",
" Credit | \n",
" Gender | \n",
" Unemployed | \n",
"
\n",
" \n",
" \n",
" \n",
" | 11 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 6 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Income Credit Gender Unemployed\n",
"11 1 2 1 0\n",
"6 0 1 1 0\n",
"2 1 1 1 0\n",
"1 0 0 1 0"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"11 Yes\n",
"6 Yes\n",
"2 No\n",
"1 Yes\n",
"Name: Safe, dtype: object"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"# Create Decision Tree classifer object\n",
"clf = DecisionTreeClassifier()\n",
"\n",
"# Train Decision Tree Classifer\n",
"# Training the model on the data, storing the information learned from the data\n",
"# Model is learning the relationship between x (features: Income, Credit,\tGender,\tUnemployed) and y (Safe)\n",
"clf = clf.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"y_pred = clf.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Income | \n",
" Credit | \n",
" Gender | \n",
" Unemployed | \n",
"
\n",
" \n",
" \n",
" \n",
" | 11 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 6 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Income Credit Gender Unemployed\n",
"11 1 2 1 0\n",
"6 0 1 1 0\n",
"2 1 1 1 0\n",
"1 0 0 1 0"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['No', 'Yes', 'No', 'Yes'], dtype=object)"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"11 Yes\n",
"6 Yes\n",
"2 No\n",
"1 Yes\n",
"Name: Safe, dtype: object"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.75\n"
]
}
],
"source": [
"# Model Accuracy, how often is the classifier correct?\n",
"print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"clf = DecisionTreeClassifier()\n",
"clf = clf.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"y_pred = clf.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.5\n"
]
}
],
"source": [
"print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pydotplus\n",
" Downloading pydotplus-2.0.2.tar.gz (278 kB)\n",
" Installing build dependencies ... \u001b[?25ldone\n",
"\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
"\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
"\u001b[?25hRequirement already satisfied: pyparsing>=2.0.1 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from pydotplus) (3.2.3)\n",
"Building wheels for collected packages: pydotplus\n",
" Building wheel for pydotplus (pyproject.toml) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for pydotplus: filename=pydotplus-2.0.2-py3-none-any.whl size=24687 sha256=c477e8981a47f023f93b6f4aa926f3899a8d5302c81197821877eb34780c3280\n",
" Stored in directory: /home/br0kenpixel/.cache/pip/wheels/4a/c0/ed/a9eeeb08c3c53bb90d3822cf76557c8fdcbc349ee11a011169\n",
"Successfully built pydotplus\n",
"Installing collected packages: pydotplus\n",
"Successfully installed pydotplus-2.0.2\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install pydotplus"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.tree import export_graphviz\n",
"from six import StringIO\n",
"from IPython.display import Image"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"import pydotplus"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dot_data = StringIO()\n",
"export_graphviz(clf, out_file=dot_data,\n",
" filled=True, rounded=False,\n",
" special_characters=True,feature_names = ['Income','Credit','Gender','Unemployed'],class_names=['no','yes'])\n",
"graph = pydotplus.graph_from_dot_data(dot_data.getvalue())\n",
"graph.write_png('graf.png')\n",
"Image(graph.create_png())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Titanic"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"titanic = pandas.read_csv('titanic_full.csv')"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PassengerId | \n",
" Survived | \n",
" Pclass | \n",
" Name | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Ticket | \n",
" Fare | \n",
" Cabin | \n",
" Embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 0 | \n",
" 3 | \n",
" Braund, Mr. Owen Harris | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" A/5 21171 | \n",
" 7.2500 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" PC 17599 | \n",
" 71.2833 | \n",
" C85 | \n",
" C | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 1 | \n",
" 3 | \n",
" Heikkinen, Miss. Laina | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" STON/O2. 3101282 | \n",
" 7.9250 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 113803 | \n",
" 53.1000 | \n",
" C123 | \n",
" S | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" 0 | \n",
" 3 | \n",
" Allen, Mr. William Henry | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 373450 | \n",
" 8.0500 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic.head()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PassengerId | \n",
" Survived | \n",
" Pclass | \n",
" Name | \n",
" Sex | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Ticket | \n",
" Fare | \n",
" Cabin | \n",
" Embarked | \n",
"
\n",
" \n",
" \n",
" \n",
" | 886 | \n",
" 887 | \n",
" 0 | \n",
" 2 | \n",
" Montvila, Rev. Juozas | \n",
" male | \n",
" 27.0 | \n",
" 0 | \n",
" 0 | \n",
" 211536 | \n",
" 13.00 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" | 887 | \n",
" 888 | \n",
" 1 | \n",
" 1 | \n",
" Graham, Miss. Margaret Edith | \n",
" female | \n",
" 19.0 | \n",
" 0 | \n",
" 0 | \n",
" 112053 | \n",
" 30.00 | \n",
" B42 | \n",
" S | \n",
"
\n",
" \n",
" | 888 | \n",
" 889 | \n",
" 0 | \n",
" 3 | \n",
" Johnston, Miss. Catherine Helen \"Carrie\" | \n",
" female | \n",
" NaN | \n",
" 1 | \n",
" 2 | \n",
" W./C. 6607 | \n",
" 23.45 | \n",
" NaN | \n",
" S | \n",
"
\n",
" \n",
" | 889 | \n",
" 890 | \n",
" 1 | \n",
" 1 | \n",
" Behr, Mr. Karl Howell | \n",
" male | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" 111369 | \n",
" 30.00 | \n",
" C148 | \n",
" C | \n",
"
\n",
" \n",
" | 890 | \n",
" 891 | \n",
" 0 | \n",
" 3 | \n",
" Dooley, Mr. Patrick | \n",
" male | \n",
" 32.0 | \n",
" 0 | \n",
" 0 | \n",
" 370376 | \n",
" 7.75 | \n",
" NaN | \n",
" Q | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PassengerId Survived Pclass Name \\\n",
"886 887 0 2 Montvila, Rev. Juozas \n",
"887 888 1 1 Graham, Miss. Margaret Edith \n",
"888 889 0 3 Johnston, Miss. Catherine Helen \"Carrie\" \n",
"889 890 1 1 Behr, Mr. Karl Howell \n",
"890 891 0 3 Dooley, Mr. Patrick \n",
"\n",
" Sex Age SibSp Parch Ticket Fare Cabin Embarked \n",
"886 male 27.0 0 0 211536 13.00 NaN S \n",
"887 female 19.0 0 0 112053 30.00 B42 S \n",
"888 female NaN 1 2 W./C. 6607 23.45 NaN S \n",
"889 male 26.0 0 0 111369 30.00 C148 C \n",
"890 male 32.0 0 0 370376 7.75 NaN Q "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic.tail()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PassengerId | \n",
" Survived | \n",
" Pclass | \n",
" Age | \n",
" SibSp | \n",
" Parch | \n",
" Fare | \n",
"
\n",
" \n",
" \n",
" \n",
" | count | \n",
" 891.000000 | \n",
" 891.000000 | \n",
" 891.000000 | \n",
" 714.000000 | \n",
" 891.000000 | \n",
" 891.000000 | \n",
" 891.000000 | \n",
"
\n",
" \n",
" | mean | \n",
" 446.000000 | \n",
" 0.383838 | \n",
" 2.308642 | \n",
" 29.699118 | \n",
" 0.523008 | \n",
" 0.381594 | \n",
" 32.204208 | \n",
"
\n",
" \n",
" | std | \n",
" 257.353842 | \n",
" 0.486592 | \n",
" 0.836071 | \n",
" 14.526497 | \n",
" 1.102743 | \n",
" 0.806057 | \n",
" 49.693429 | \n",
"
\n",
" \n",
" | min | \n",
" 1.000000 | \n",
" 0.000000 | \n",
" 1.000000 | \n",
" 0.420000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 25% | \n",
" 223.500000 | \n",
" 0.000000 | \n",
" 2.000000 | \n",
" 20.125000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 7.910400 | \n",
"
\n",
" \n",
" | 50% | \n",
" 446.000000 | \n",
" 0.000000 | \n",
" 3.000000 | \n",
" 28.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 14.454200 | \n",
"
\n",
" \n",
" | 75% | \n",
" 668.500000 | \n",
" 1.000000 | \n",
" 3.000000 | \n",
" 38.000000 | \n",
" 1.000000 | \n",
" 0.000000 | \n",
" 31.000000 | \n",
"
\n",
" \n",
" | max | \n",
" 891.000000 | \n",
" 1.000000 | \n",
" 3.000000 | \n",
" 80.000000 | \n",
" 8.000000 | \n",
" 6.000000 | \n",
" 512.329200 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PassengerId Survived Pclass Age SibSp \\\n",
"count 891.000000 891.000000 891.000000 714.000000 891.000000 \n",
"mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n",
"std 257.353842 0.486592 0.836071 14.526497 1.102743 \n",
"min 1.000000 0.000000 1.000000 0.420000 0.000000 \n",
"25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n",
"50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n",
"75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n",
"max 891.000000 1.000000 3.000000 80.000000 8.000000 \n",
"\n",
" Parch Fare \n",
"count 891.000000 891.000000 \n",
"mean 0.381594 32.204208 \n",
"std 0.806057 49.693429 \n",
"min 0.000000 0.000000 \n",
"25% 0.000000 7.910400 \n",
"50% 0.000000 14.454200 \n",
"75% 0.000000 31.000000 \n",
"max 6.000000 512.329200 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic.describe()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Survived\n",
"0 549\n",
"1 342\n",
"Name: count, dtype: int64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic.Survived.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Sex\n",
"male 577\n",
"female 314\n",
"Name: count, dtype: int64"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic.Sex.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Cabin\n",
"G6 4\n",
"C23 C25 C27 4\n",
"B96 B98 4\n",
"F2 3\n",
"D 3\n",
" ..\n",
"E17 1\n",
"A24 1\n",
"C50 1\n",
"B42 1\n",
"C148 1\n",
"Name: count, Length: 147, dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic.Cabin.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Embarked\n",
"S 644\n",
"C 168\n",
"Q 77\n",
"Name: count, dtype: int64"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic.Embarked.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_4882/3535274200.py:1: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
" titanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})\n"
]
}
],
"source": [
"titanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"X = titanic[titanic.columns.difference(['Survived','PassengerId','','Name','Ticket','Cabin','Embarked'])] # Všetko okrem\n",
"y = titanic.Survived"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Age | \n",
" Fare | \n",
" Parch | \n",
" Pclass | \n",
" Sex | \n",
" SibSp | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 22.0 | \n",
" 7.2500 | \n",
" 0 | \n",
" 3 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" 38.0 | \n",
" 71.2833 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 2 | \n",
" 26.0 | \n",
" 7.9250 | \n",
" 0 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 35.0 | \n",
" 53.1000 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" 35.0 | \n",
" 8.0500 | \n",
" 0 | \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 886 | \n",
" 27.0 | \n",
" 13.0000 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 887 | \n",
" 19.0 | \n",
" 30.0000 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 888 | \n",
" NaN | \n",
" 23.4500 | \n",
" 2 | \n",
" 3 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 889 | \n",
" 26.0 | \n",
" 30.0000 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" | 890 | \n",
" 32.0 | \n",
" 7.7500 | \n",
" 0 | \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
891 rows × 6 columns
\n",
"
"
],
"text/plain": [
" Age Fare Parch Pclass Sex SibSp\n",
"0 22.0 7.2500 0 3 0 1\n",
"1 38.0 71.2833 0 1 1 1\n",
"2 26.0 7.9250 0 3 1 0\n",
"3 35.0 53.1000 0 1 1 1\n",
"4 35.0 8.0500 0 3 0 0\n",
".. ... ... ... ... ... ...\n",
"886 27.0 13.0000 0 2 0 0\n",
"887 19.0 30.0000 0 1 1 0\n",
"888 NaN 23.4500 2 3 1 1\n",
"889 26.0 30.0000 0 1 0 0\n",
"890 32.0 7.7500 0 3 0 0\n",
"\n",
"[891 rows x 6 columns]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 0\n",
" ..\n",
"886 0\n",
"887 1\n",
"888 0\n",
"889 1\n",
"890 0\n",
"Name: Survived, Length: 891, dtype: int64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"clf = DecisionTreeClassifier(max_depth=4)\n",
"\n",
"# Train Decision Tree Classifer\n",
"clf = clf.fit(X_train,y_train)\n",
"\n",
"#Predict the response for test dataset\n",
"y_pred = clf.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.7821229050279329\n"
]
}
],
"source": [
"# Model Accuracy, how often is the classifier correct?\n",
"print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"person = np.array([ # 23 ročný muž bez detí\n",
" 23,\n",
" 100,\n",
" 0,\n",
" 1,\n",
" 1,\n",
" 0\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 23, 100, 0, 1, 1, 0])"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"person"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 23, 100, 0, 1, 1, 0]])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"person.reshape(1, -1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Prediction: [1]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages/sklearn/utils/validation.py:2739: UserWarning: X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"print('Prediction: ', clf.predict(person.reshape(1,-1))) # Prežije?"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ui-cviko1",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}