In [1]:

%pip install pandas

Collecting pandas
  Using cached pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Requirement already satisfied: numpy>=1.26.0 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from pandas) (2.2.4)
Requirement already satisfied: python-dateutil>=2.8.2 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from pandas) (2.9.0.post0)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Requirement already satisfied: six>=1.5 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Using cached pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)
Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.3 pytz-2025.2 tzdata-2025.2

[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.

In [2]:

import pandas

In [17]:

loans = pandas.read_csv('loan_historical_data.csv', sep=";")

In [18]:

loans

Out[18]:

	Client	Income	Credit	Gender	Unemployed	Safe
0	K1	High	Excellent	Female	No	Yes
1	K2	High	Excellent	Man	No	Yes
2	K3	Low	Poor	Man	No	No
3	K4	Low	Excellent	Female	Yes	Yes
4	K5	Low	Excellent	Man	Yes	Yes
5	K6	Low	Poor	Female	Yes	No
6	K7	High	Poor	Man	No	Yes
7	K8	High	Poor	Female	Yes	Yes
8	K9	Low	Fair	Man	Yes	No
9	K10	High	Fair	Female	No	Yes
10	K11	Low	Fair	Female	Yes	No
11	K12	Low	Fair	Man	No	Yes

In [5]:

%pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Requirement already satisfied: numpy>=1.19.5 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from scikit-learn) (2.2.4)
Requirement already satisfied: scipy>=1.6.0 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from scikit-learn) (1.15.2)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.2 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.2/13.2 MB 8.3 MB/s eta 0:00:00a 0:00:01
Downloading joblib-1.4.2-py3-none-any.whl (301 kB)
Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.6.1 threadpoolctl-3.6.0

[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.

In [6]:

from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

In [19]:

arr1 = [1,1,2,2,2,4,4,2,3]

In [20]:

set(arr1)

Out[20]:

{1, 2, 3, 4}

In [21]:

list(set(arr1))

Out[21]:

[1, 2, 3, 4]

In [22]:

arr2 = ["High", "Low", "Low", "Low", "High"]

In [23]:

list(set(arr2))

Out[23]:

['High', 'Low']

In [24]:

def cat2int(col):
    vals = list(set(col))
    for i, string in enumerate(col):
        col[i] = vals.index(string)
    return col

In [25]:

cat2int(loans["Gender"])

Out[25]:

0     0
1     1
2     1
3     0
4     1
5     0
6     1
7     0
8     1
9     0
10    0
11    1
Name: Gender, dtype: object

In [26]:

loans

Out[26]:

	Client	Income	Credit	Gender	Unemployed	Safe
0	K1	High	Excellent	0	No	Yes
1	K2	High	Excellent	1	No	Yes
2	K3	Low	Poor	1	No	No
3	K4	Low	Excellent	0	Yes	Yes
4	K5	Low	Excellent	1	Yes	Yes
5	K6	Low	Poor	0	Yes	No
6	K7	High	Poor	1	No	Yes
7	K8	High	Poor	0	Yes	Yes
8	K9	Low	Fair	1	Yes	No
9	K10	High	Fair	0	No	Yes
10	K11	Low	Fair	0	Yes	No
11	K12	Low	Fair	1	No	Yes

In [27]:

cat2int(loans["Income"])
cat2int(loans["Credit"])
cat2int(loans["Unemployed"])

Out[27]:

0     0
1     0
2     0
3     1
4     1
5     1
6     0
7     1
8     1
9     0
10    1
11    0
Name: Unemployed, dtype: object

In [28]:

loans

Out[28]:

	Client	Income	Credit	Gender	Unemployed	Safe
0	K1	0	0	0	0	Yes
1	K2	0	0	1	0	Yes
2	K3	1	1	1	0	No
3	K4	1	0	0	1	Yes
4	K5	1	0	1	1	Yes
5	K6	1	1	0	1	No
6	K7	0	1	1	0	Yes
7	K8	0	1	0	1	Yes
8	K9	1	2	1	1	No
9	K10	0	2	0	0	Yes
10	K11	1	2	0	1	No
11	K12	1	2	1	0	Yes

In [29]:

#split dataset in features and target variable

X = loans[['Income','Credit','Gender','Unemployed']] # Features
y = loans.Safe # Target variable

In [31]:

Out[31]:

	Income	Credit	Gender	Unemployed
0	0	0	0	0
1	0	0	1	0
2	1	1	1	0
3	1	0	0	1
4	1	0	1	1
5	1	1	0	1
6	0	1	1	0
7	0	1	0	1
8	1	2	1	1
9	0	2	0	0
10	1	2	0	1
11	1	2	1	0

In [32]:

Out[32]:

0     Yes
1     Yes
2      No
3     Yes
4     Yes
5      No
6     Yes
7     Yes
8      No
9     Yes
10     No
11    Yes
Name: Safe, dtype: object

In [33]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=15) # 70% training and 30% test

In [34]:

X_train

Out[34]:

	Income	Credit	Gender	Unemployed
9	0	2	0	0
3	1	0	0	1
4	1	0	1	1
0	0	0	0	0
7	0	1	0	1
10	1	2	0	1
5	1	1	0	1
8	1	2	1	1

In [35]:

y_train

Out[35]:

9     Yes
3     Yes
4     Yes
0     Yes
7     Yes
10     No
5      No
8      No
Name: Safe, dtype: object

In [37]:

X_test

Out[37]:

	Income	Credit	Gender
11	1	2	1
6	0	1	1
2	1	1	1
1	0	0	1

In [36]:

y_test

Out[36]:

11    Yes
6     Yes
2      No
1     Yes
Name: Safe, dtype: object

In [38]:

# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
# Training the model on the data, storing the information learned from the data
# Model is learning the relationship between x (features: Income, Credit,	Gender,	Unemployed) and y (Safe)
clf = clf.fit(X_train,y_train)

In [39]:

y_pred = clf.predict(X_test)

In [40]:

X_test

Out[40]:

	Income	Credit	Gender
11	1	2	1
6	0	1	1
2	1	1	1
1	0	0	1

In [41]:

y_pred

Out[41]:

array(['No', 'Yes', 'No', 'Yes'], dtype=object)

In [42]:

y_test

Out[42]:

11    Yes
6     Yes
2      No
1     Yes
Name: Safe, dtype: object

In [43]:

# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.75

In [44]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [45]:

clf = DecisionTreeClassifier()
clf = clf.fit(X_train,y_train)

In [46]:

y_pred = clf.predict(X_test)

In [47]:

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.5

In [50]:

%pip install pydotplus

Collecting pydotplus
  Downloading pydotplus-2.0.2.tar.gz (278 kB)
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
  Preparing metadata (pyproject.toml) ... done
Requirement already satisfied: pyparsing>=2.0.1 in /home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages (from pydotplus) (3.2.3)
Building wheels for collected packages: pydotplus
  Building wheel for pydotplus (pyproject.toml) ... done
  Created wheel for pydotplus: filename=pydotplus-2.0.2-py3-none-any.whl size=24687 sha256=c477e8981a47f023f93b6f4aa926f3899a8d5302c81197821877eb34780c3280
  Stored in directory: /home/br0kenpixel/.cache/pip/wheels/4a/c0/ed/a9eeeb08c3c53bb90d3822cf76557c8fdcbc349ee11a011169
Successfully built pydotplus
Installing collected packages: pydotplus
Successfully installed pydotplus-2.0.2

[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.

In [51]:

from sklearn.tree import export_graphviz
from six import StringIO
from IPython.display import Image

In [52]:

import pydotplus

In [53]:

dot_data = StringIO()
export_graphviz(clf, out_file=dot_data,
                filled=True, rounded=False,
                special_characters=True,feature_names = ['Income','Credit','Gender','Unemployed'],class_names=['no','yes'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('graf.png')
Image(graph.create_png())

Out[53]:

No description has been provided for this image

Titanic¶

In [54]:

titanic = pandas.read_csv('titanic_full.csv')

In [55]:

titanic.head()

Out[55]:

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

In [56]:

titanic.tail()

Out[56]:

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
886	887	0	2	Montvila, Rev. Juozas	male	27.0	0	0	211536	13.00	NaN	S
887	888	1	1	Graham, Miss. Margaret Edith	female	19.0	0	0	112053	30.00	B42	S
888	889	0	3	Johnston, Miss. Catherine Helen "Carrie"	female	NaN	1	2	W./C. 6607	23.45	NaN	S
889	890	1	1	Behr, Mr. Karl Howell	male	26.0	0	0	111369	30.00	C148	C
890	891	0	3	Dooley, Mr. Patrick	male	32.0	0	0	370376	7.75	NaN	Q

In [57]:

titanic.describe()

Out[57]:

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
count	891.000000	891.000000	891.000000	714.000000	891.000000	891.000000	891.000000
mean	446.000000	0.383838	2.308642	29.699118	0.523008	0.381594	32.204208
std	257.353842	0.486592	0.836071	14.526497	1.102743	0.806057	49.693429
min	1.000000	0.000000	1.000000	0.420000	0.000000	0.000000	0.000000
25%	223.500000	0.000000	2.000000	20.125000	0.000000	0.000000	7.910400
50%	446.000000	0.000000	3.000000	28.000000	0.000000	0.000000	14.454200
75%	668.500000	1.000000	3.000000	38.000000	1.000000	0.000000	31.000000
max	891.000000	1.000000	3.000000	80.000000	8.000000	6.000000	512.329200

In [58]:

titanic.Survived.value_counts()

Out[58]:

Survived
0    549
1    342
Name: count, dtype: int64

In [59]:

titanic.Sex.value_counts()

Out[59]:

Sex
male      577
female    314
Name: count, dtype: int64

In [60]:

titanic.Cabin.value_counts()

Out[60]:

Cabin
G6             4
C23 C25 C27    4
B96 B98        4
F2             3
D              3
              ..
E17            1
A24            1
C50            1
B42            1
C148           1
Name: count, Length: 147, dtype: int64

In [61]:

titanic.Embarked.value_counts()

Out[61]:

Embarked
S    644
C    168
Q     77
Name: count, dtype: int64

In [62]:

titanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})

/tmp/ipykernel_4882/3535274200.py:1: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  titanic['Sex'] = titanic['Sex'].replace({'male': 0, 'female': 1})

In [63]:

X = titanic[titanic.columns.difference(['Survived','PassengerId','','Name','Ticket','Cabin','Embarked'])] # Všetko okrem
y = titanic.Survived

In [64]:

Out[64]:

	Age	Fare	Parch	Pclass	Sex	SibSp
0	22.0	7.2500	0	3	0	1
1	38.0	71.2833	0	1	1	1
2	26.0	7.9250	0	3	1	0
3	35.0	53.1000	0	1	1	1
4	35.0	8.0500	0	3	0	0
...	...	...	...	...	...	...
886	27.0	13.0000	0	2	0	0
887	19.0	30.0000	0	1	1	0
888	NaN	23.4500	2	3	1	1
889	26.0	30.0000	0	1	0	0
890	32.0	7.7500	0	3	0	0

891 rows × 6 columns

In [65]:

Out[65]:

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [66]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [68]:

clf = DecisionTreeClassifier(max_depth=4)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [69]:

# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.7821229050279329

In [70]:

import numpy as np

In [ ]:

person = np.array([ # 23 ročný muž bez detí
    23,
    100,
    0,
    1,
    1,
    0
])

In [72]:

person

Out[72]:

array([ 23, 100,   0,   1,   1,   0])

In [73]:

person.reshape(1, -1)

Out[73]:

array([[ 23, 100,   0,   1,   1,   0]])

In [ ]:

print('Prediction: ', clf.predict(person.reshape(1,-1))) # Prežije?

Prediction:  [1]

/home/br0kenpixel/Documents/ui-cviko1/lib64/python3.13/site-packages/sklearn/utils/validation.py:2739: UserWarning: X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names
  warnings.warn(

In [75]:

titanic_columns = X_train.columns.to_list()

In [76]:

import matplotlib.pyplot as plt
from sklearn import tree
fig = plt.figure(figsize=(20,6))
_ = tree.plot_tree(clf,
                   feature_names = titanic_columns,
                   class_names=['0','1'],
                   filled=True)

In [ ]:

	Client	Income	Credit	Gender	Unemployed	Safe
0	K1	0	0	0	0	Yes
1	K2	0	0	1	0	Yes
2	K3	1	1	1	0	No
3	K4	1	0	0	1	Yes
4	K5	1	0	1	1	Yes
5	K6	1	1	0	1	No
6	K7	0	1	1	0	Yes
7	K8	0	1	0	1	Yes
8	K9	1	2	1	1	No
9	K10	0	2	0	0	Yes
10	K11	1	2	0	1	No
11	K12	1	2	1	0	Yes

	Income	Credit	Gender	Unemployed
0	0	0	0	0
1	0	0	1	0
2	1	1	1	0
3	1	0	0	1
4	1	0	1	1
5	1	1	0	1
6	0	1	1	0
7	0	1	0	1
8	1	2	1	1
9	0	2	0	0
10	1	2	0	1
11	1	2	1	0

	Client	Income	Credit	Gender	Unemployed	Safe
0	K1	0	0	0	0	Yes
1	K2	0	0	1	0	Yes
2	K3	1	1	1	0	No
3	K4	1	0	0	1	Yes
4	K5	1	0	1	1	Yes
5	K6	1	1	0	1	No
6	K7	0	1	1	0	Yes
7	K8	0	1	0	1	Yes
8	K9	1	2	1	1	No
9	K10	0	2	0	0	Yes
10	K11	1	2	0	1	No
11	K12	1	2	1	0	Yes

	Income	Credit	Gender	Unemployed
0	0	0	0	0
1	0	0	1	0
2	1	1	1	0
3	1	0	0	1
4	1	0	1	1
5	1	1	0	1
6	0	1	1	0
7	0	1	0	1
8	1	2	1	1
9	0	2	0	0
10	1	2	0	1
11	1	2	1	0

329 KiB Raw Blame History Unescape Escape

Titanic¶

329 KiB

Raw Blame History

	Client	Income	Credit	Gender	Unemployed	Safe
0	K1	0	0	0	0	Yes
1	K2	0	0	1	0	Yes
2	K3	1	1	1	0	No
3	K4	1	0	0	1	Yes
4	K5	1	0	1	1	Yes
5	K6	1	1	0	1	No
6	K7	0	1	1	0	Yes
7	K8	0	1	0	1	Yes
8	K9	1	2	1	1	No
9	K10	0	2	0	0	Yes
10	K11	1	2	0	1	No
11	K12	1	2	1	0	Yes

	Income	Credit	Gender	Unemployed
0	0	0	0	0
1	0	0	1	0
2	1	1	1	0
3	1	0	0	1
4	1	0	1	1
5	1	1	0	1
6	0	1	1	0
7	0	1	0	1
8	1	2	1	1
9	0	2	0	0
10	1	2	0	1
11	1	2	1	0