import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
!pip install seaborn
import seaborn as sns
import re

Requirement already satisfied: seaborn in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from seaborn) (1.26.4)
Requirement already satisfied: pandas>=1.2 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from seaborn) (1.5.3)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from seaborn) (3.9.2)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from pandas>=1.2->seaborn) (2024.1)
Requirement already satisfied: six>=1.5 in c:\users\gjawo\miniconda3\envs\od_zera_do_ai\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0)

pd.set_option('display.max_rows', 10)

df = pd.read_csv('26__titanic.csv', sep=",")
df

df.sample(10)

df['ticket'] = df['ticket'].astype(str)

df['ticket_letters'] = df['ticket'].str.extract('(\D*)')
df['ticket_number'] = df['ticket'].str.extract('(\d*)')

# convert 'ticket number' into number format
df['ticket_number'] = pd.to_numeric(df['ticket_number'], errors='coerce')

df = df.drop(columns=['ticket'])

df['ticket_letters'] = df['ticket_letters'].replace('', np.nan)

df.sample(10)

df.describe()

groups = df.groupby('pclass')

for name, group in groups:
    print(f"\npclass: {name}\n")
    print(group.describe())

pclass: 1.0

       pclass    survived         age       sibsp       parch        fare  \
count   323.0  323.000000  284.000000  323.000000  323.000000  323.000000   
mean      1.0    0.619195   39.159918    0.436533    0.365325   87.508992   
std       0.0    0.486338   14.548059    0.609064    0.715602   80.447178   
min       1.0    0.000000    0.916700    0.000000    0.000000    0.000000   
25%       1.0    0.000000   28.000000    0.000000    0.000000   30.695800   
50%       1.0    1.000000   39.000000    0.000000    0.000000   60.000000   
75%       1.0    1.000000   50.000000    1.000000    0.500000  107.662500   
max       1.0    1.000000   80.000000    3.000000    4.000000  512.329200   

             body  ticket_number  
count   35.000000     224.000000  
mean   162.828571   60923.937500  
std     82.652172   47572.161207  
min     16.000000     680.000000  
25%    109.500000   16966.000000  
50%    166.000000   35281.000000  
75%    233.000000  113059.000000  
max    307.000000  113807.000000  

pclass: 2.0

       pclass    survived         age       sibsp       parch        fare  \
count   277.0  277.000000  261.000000  277.000000  277.000000  277.000000   
mean      2.0    0.429603   29.506705    0.393502    0.368231   21.179196   
std       0.0    0.495915   13.638628    0.590100    0.692717   13.607122   
min       2.0    0.000000    0.666700    0.000000    0.000000    0.000000   
25%       2.0    0.000000   22.000000    0.000000    0.000000   13.000000   
50%       2.0    0.000000   29.000000    0.000000    0.000000   15.045800   
75%       2.0    1.000000   36.000000    1.000000    1.000000   26.000000   
max       2.0    1.000000   70.000000    3.000000    3.000000   73.500000   

             body  ticket_number  
count   31.000000     184.000000  
mean   167.387097  169141.195652  
std    107.077753  100855.731848  
min     15.000000    2003.000000  
25%     78.000000   29105.750000  
50%    155.000000  233558.500000  
75%    284.500000  244256.500000  
max    322.000000  250655.000000  

pclass: 3.0

       pclass    survived         age       sibsp       parch        fare  \
count   709.0  709.000000  501.000000  709.000000  709.000000  708.000000   
mean      3.0    0.255289   24.816367    0.568406    0.400564   13.302889   
std       0.0    0.436331   11.958202    1.299681    0.981639   11.494358   
min       3.0    0.000000    0.166700    0.000000    0.000000    0.000000   
25%       3.0    0.000000   18.000000    0.000000    0.000000    7.750000   
50%       3.0    0.000000   24.000000    0.000000    0.000000    8.050000   
75%       3.0    1.000000   32.000000    1.000000    0.000000   15.245800   
max       3.0    1.000000   74.000000    8.000000    9.000000   69.550000   

             body  ticket_number  
count   55.000000   5.490000e+02  
mean   155.818182   3.525711e+05  
std    102.403720   5.569656e+05  
min      1.000000   1.222000e+03  
25%     67.500000   1.431100e+04  
50%    153.000000   3.470770e+05  
75%    257.000000   3.504050e+05  
max    328.000000   3.101298e+06

groups = df.groupby('sex')


for name, group in groups:
    
    print(f"\nsex: {name}\n")

    
    print(group.describe().to_string())

sex: female

           pclass    survived         age       sibsp       parch        fare        body  ticket_number
count  466.000000  466.000000  388.000000  466.000000  466.000000  466.000000    8.000000   3.420000e+02
mean     2.154506    0.727468   28.687071    0.652361    0.633047   46.198097  166.625000   2.415890e+05
std      0.866181    0.445741   14.576995    1.101009    1.049579   63.292599  138.110657   4.673953e+05
min      1.000000    0.000000    0.166700    0.000000    0.000000    6.750000    7.000000   2.003000e+03
25%      1.000000    0.000000   19.000000    0.000000    0.000000   10.504175   52.750000   1.746525e+04
50%      2.000000    1.000000   27.000000    0.000000    0.000000   23.000000  133.500000   1.138045e+05
75%      3.000000    1.000000   38.000000    1.000000    1.000000   55.331275  306.000000   3.470728e+05
max      3.000000    1.000000   76.000000    8.000000    9.000000  512.329200  328.000000   3.101298e+06

sex: male

           pclass    survived         age       sibsp       parch        fare        body  ticket_number
count  843.000000  843.000000  658.000000  843.000000  843.000000  842.000000  113.000000   6.150000e+02
mean     2.372479    0.190985   30.585233    0.413998    0.247924   26.154601  160.398230   2.531822e+05
std      0.811908    0.393310   14.280571    0.997928    0.708938   42.486877   95.035289   4.286631e+05
min      1.000000    0.000000    0.333300    0.000000    0.000000    0.000000    1.000000   6.800000e+02
25%      2.000000    0.000000   21.000000    0.000000    0.000000    7.876050   79.000000   2.687450e+04
50%      3.000000    0.000000   28.000000    0.000000    0.000000   11.887500  155.000000   2.398530e+05
75%      3.000000    0.000000   39.000000    1.000000    0.000000   26.550000  255.000000   3.492105e+05
max      3.000000    1.000000   80.000000    8.000000    9.000000  512.329200  322.000000   3.101296e+06

pd.set_option('display.max_rows', None)

df.nunique()

unikatowe = df.nunique()

unikatowe_df = unikatowe.reset_index()
unikatowe_df.columns = ['Kolumna', 'Liczba unikatowych wartości']

unikatowe_df

brakujące_dane = df.isnull().sum()

df_brakujące_dane = pd.DataFrame(brakujące_dane, columns=['Brakujące dane'])

df_brakujące_dane['% brakujących danych'] = ((df.isnull().sum() / df.shape[0]) * 100).apply(lambda x: f'{x:.2f}%')


df_brakujące_dane.columns = ['Brakujące dane', '% brakujących danych']

df_brakujące_dane

df['survived'].value_counts()

0.0    809
1.0    500
Name: survived, dtype: int64

grouped_data = df.groupby(['pclass', 'sex', 'survived']).size().reset_index(name='Count')


plt.figure(figsize=(10, 6))
sns.barplot(x="pclass", y="Count", hue="sex", data=grouped_data)

plt.title("Liczba pasażerów w podziale na klasy i płeć")
plt.xlabel('Klasa')
plt.ylabel('Liczba osób')

plt.legend(title='Płeć')

plt.show()

wszyscy = len(df)

grouped_data = df.groupby(['pclass', 'sex', 'survived']).size().reset_index(name='Ilość')

grouped_data['Procent całości'] = grouped_data['Ilość'] / wszyscy * 100

grouped_data.columns = ['Klasa biletu', 'Płeć', 'Ocaleni / zmarli', 'Ilość', 'Procent całości']

grouped_data

grouped_total = df.groupby(['sex', 'pclass']).size().reset_index(name='Wszystkie kobiety lub mężczyźni w tej klasie')

grouped_survived = df[df['survived'] == 1].groupby(['sex', 'pclass']).size().reset_index(name='Ocaleni')

grouped_died = df[df['survived'] == 0].groupby(['sex', 'pclass']).size().reset_index(name='Zmarli')

grouped_data = pd.merge(grouped_total, grouped_survived, on=['sex', 'pclass'])
grouped_data = pd.merge(grouped_data, grouped_died, on=['sex', 'pclass'])

total_people = df.shape[0]

grouped_data['Procent_ocalonych'] = (grouped_data['Ocaleni'] / grouped_data['Wszystkie kobiety lub mężczyźni w tej klasie']) * 100
grouped_data['Procent_zmarłych'] = (grouped_data['Zmarli'] / grouped_data['Wszystkie kobiety lub mężczyźni w tej klasie']) * 100
grouped_data['Procent_ogółem'] = (grouped_data['Wszystkie kobiety lub mężczyźni w tej klasie'] / total_people) * 100


grouped_data

grouped_total = df.groupby(['sex', 'pclass']).size().reset_index(name='Total')
grouped_survived = df[df['survived'] == 1].groupby(['sex', 'pclass']).size().reset_index(name='Survived')
grouped_died = df[df['survived'] == 0].groupby(['sex', 'pclass']).size().reset_index(name='Died')
grouped_data = pd.merge(grouped_total, grouped_survived, on=['sex', 'pclass'])
grouped_data = pd.merge(grouped_data, grouped_died, on=['sex', 'pclass'])
total_people = df.shape[0]
grouped_data['Procent_ocalonych'] = (grouped_data['Survived'] / grouped_data['Total']) * 100
grouped_data['Procent_zmarłych'] = (grouped_data['Died'] / grouped_data['Total']) * 100
grouped_data['Procent_ogółem'] = (grouped_data['Total'] / total_people) * 100


grouped_data['sex_pclass'] = grouped_data['sex'] + '_' + grouped_data['pclass'].astype(str)

grouped_data.plot(kind='bar', x='sex_pclass', y=['Procent_ocalonych', 'Procent_zmarłych', 'Procent_ogółem'])
plt.xlabel('Płeć i klasa, gdzie female to kobiety, male mężczyźni, a numery 01, 02 i 03 to odpowiedniki klasy biletu')
plt.ylabel('Ujęcie procentowe')
plt.title('Ujęcie procentowe ocalałych, zmarłych oraz ogólnej liczby pasażerów podzielonych na klasy i płeć')
plt.show()



plt.show()

ocaleni = df[df['survived'] == 1].groupby(['pclass', 'sex']).size()


ocaleni_unstacked = ocaleni.unstack()

ocaleni_unstacked.plot(kind='bar', stacked=True)

plt.title('Pasażerowie ocaleni, podzieleni według klasy biletu i płci')
plt.xlabel('Klasa biletu')
plt.ylabel('Liczba osób ocalałych')
plt.show()

survived_men = grouped_data[(grouped_data['survived'] == 1) & (grouped_data['sex'] == 'male')]
survived_women = grouped_data[(grouped_data['survived'] == 1) & (grouped_data['sex'] == 'female')]
not_survived_men = grouped_data[(grouped_data['survived'] == 0) & (grouped_data['sex'] == 'male')]
not_survived_women = grouped_data[(grouped_data['survived'] == 0) & (grouped_data['sex'] == 'female')]

# Calculate sums for each subgroup
survived_men_sum = survived_men['Count'].sum()
survived_women_sum = survived_women['Count'].sum()
not_survived_men_sum = not_survived_men['Count'].sum()
not_survived_women_sum = not_survived_women['Count'].sum()

# Prepare data for pie chart
sizes = [survived_men_sum, survived_women_sum, not_survived_men_sum, not_survived_women_sum]
labels = ['Mężczyźni, którzy przeżyli', 'Kobiety, które przeżyły', 'Mężczyźni ofiary', 'Kobiety ofiary']
colors = ['green', 'blue', 'red', 'orange']

# Plotting pie chart
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)

# Ensure pie chart is drawn in a circle
ax.axis('equal')

# Add a title to the pie chart
ax.set_title('Wykres kołowy pokazujący podział na pasażerów którzy zginęli i przeżyli , z podziałem na obie płcie')

# Display the pie chart
plt.show()

grouped = df.groupby(['sex', 'pclass'])['survived'].sum()

table = grouped.reset_index()  

print(table)

      sex  pclass  survived
0  female     1.0     139.0
1  female     2.0      94.0
2  female     3.0     106.0
3    male     1.0      61.0
4    male     2.0      25.0
5    male     3.0      75.0

plt.figure(figsize=(10, 6))

sns.barplot(x='sex', y='survived', hue='pclass', data=table)

<Axes: xlabel='sex', ylabel='survived'>

survived = df[df['survived'] == 0].groupby(['sex', 'pclass']).size().unstack()
survived_df = survived.stack().reset_index().rename(columns={0:'Liczba ofiar'})
survived_df['Procent'] =  (survived_df['Liczba ofiar'] / survived_df['Liczba ofiar'].sum())*100

survived_df

plt.figure(figsize=(10, 6))

sns.barplot(x='sex', y='Procent', hue='pclass', data=survived_df)

<Axes: xlabel='sex', ylabel='Procent'>

df['sex'] = df['sex'].map({'male': 0, 'female': 1})


corr_matrix = df[['sex', 'survived']].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt=".2f")
plt.show()

df['boat'].value_counts()

13         39
C          38
15         37
14         33
4          31
           ..
13 15       2
5 9         1
8 10        1
13 15 B     1
15 16       1
Name: boat, Length: 27, dtype: int64

df['embarked'].value_counts()

S    914
C    270
Q    123
Name: embarked, dtype: int64

df['sibsp'].value_counts()

0.0    891
1.0    319
2.0     42
4.0     22
3.0     20
8.0      9
5.0      6
Name: sibsp, dtype: int64

df['parch'].value_counts()

0.0    1002
1.0     170
2.0     113
3.0       8
4.0       6
5.0       6
6.0       2
9.0       2
Name: parch, dtype: int64

#### 3.5 Histogramy

#### Poniżej - wizualizacja niektórych danych w wykresach.
#### Pasażerowie podzieleni na klasy biletowe

def create_histogram(df, column_name, label, title):
    # Drop NaN values
    df = df.dropna(subset=[column_name])

    
    unique_values = sorted(df[column_name].unique())
    
    
    bin_edges = unique_values + [max(unique_values) + 1]
    
  
    plt.hist(df[column_name], bins=bin_edges, edgecolor='black', align='left')

    
    plt.xticks(unique_values)
    plt.xlabel(column_name)
    plt.ylabel(label)
    plt.title(title)
    
    plt.show()

create_histogram(df, 'pclass', 'Ilość', 'Liczba pasażerów w 3 różnych klasach')

df['age'].hist()

<Axes: >

df['survived'].hist()

<Axes: >

df['fare'].hist()

<Axes: >

df['sibsp'].hist()

<Axes: >

df['parch'].hist()

<Axes: >

corr_mat = df.corr()

plt.figure(figsize=(10,10))
sns.heatmap(corr_mat, annot=True, cmap='coolwarm')

plt.title('Correlation Matrix for Titanic Passengers Data')
plt.show()

C:\Users\gjawo\AppData\Local\Temp\ipykernel_18624\3312680243.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  corr_mat = df.corr()

df[["pclass", "fare"]].corr()

plt.figure(figsize=(10,6))
sns.scatterplot(data=df, x='fare', y='boat', hue='pclass', palette='viridis')


plt.title('Wykres zależności pomiędzy ceną biletu, klasą biletu a łodzią, w której znaleźli się pasażerowie:')

plt.show()

plt.figure(figsize=(12, 6))  # Widen the plot
plt.scatter(df['ticket_number'], df['fare'])

plt.xlabel('Ticket Number')  # Label for x-axis
plt.ylabel('Fare')  # Label for y-axis

plt.title('Scatter plot of Ticket Number vs Fare for Titanic Passengers')  # Title for the plot

plt.show()  # Display the plot

plt.figure(figsize=(10,6))
sns.boxplot(x='pclass', y='fare', data=df)

plt.show()

plt.figure(figsize=(10,6))
sns.boxplot(x='pclass', y='age', data=df)

plt.show()

plt.figure(figsize=(10,6))
sns.boxplot(x='pclass', y='ticket_number', data=df)

plt.show()

	Brakujące dane	% brakujących danych
pclass	1	0.08%
survived	1	0.08%
name	1	0.08%
sex	1	0.08%
age	264	20.15%
sibsp	1	0.08%
parch	1	0.08%
fare	2	0.15%
cabin	1015	77.48%
embarked	3	0.23%
boat	824	62.90%
body	1189	90.76%
home.dest	565	43.13%
ticket_letters	957	73.05%
ticket_number	353	26.95%

Zadanie domowe - EDA Danych dotyczących Titanica¶

O Danych¶

1. Ogólny przegląd danych¶

1.1 10 przykładowych wierszy¶

1.2 Modyfikacja zapisu danych o biletach¶

1.3 Krótka analiza najważniejszych statystyk opisujących dane z poszczególnych kolumn:¶

1.4 Analiza danych w podziale na klasy pasażerów.¶

1.5 Analiza w podziale na płeć¶

1.6 Wartości unikatowe¶

2 Analiza brakujących wartości¶

3 Eksploatacja poszczególnych kolumn¶

3.1 Analiza informacji o ilości ofiar i osób które przeżyły¶

3.2 Łodzie¶

3.3 Port zaokrętowania¶

3.4 Rodziny¶

4. Relacje między danymi¶

4.1 Wykres zależności pomiędzy kolumnami danych:¶

4.2 Wykres zależności pomiędzy ceną biletu, klasą biletu a łodzią, w której znaleźli się pasażerowie:¶

4.3 Numer biletu a wysokość stawki:¶

5. Analiza wartości odstających¶

5.1 Klasa biletu a cena¶

5.2 Klasa biletu a wiek pasażerów¶

5.3 Klasa biletu i numer biletu¶

	pclass	survived	name	sex	age	sibsp	parch	ticket	fare	cabin	embarked	boat	body	home.dest
0	1.0	1.0	Allen, Miss. Elisabeth Walton	female	29.0000	0.0	0.0	24160	211.3375	B5	S	2	NaN	St Louis, MO
1	1.0	1.0	Allison, Master. Hudson Trevor	male	0.9167	1.0	2.0	113781	151.5500	C22 C26	S	11	NaN	Montreal, PQ / Chesterville, ON
2	1.0	0.0	Allison, Miss. Helen Loraine	female	2.0000	1.0	2.0	113781	151.5500	C22 C26	S	NaN	NaN	Montreal, PQ / Chesterville, ON
3	1.0	0.0	Allison, Mr. Hudson Joshua Creighton	male	30.0000	1.0	2.0	113781	151.5500	C22 C26	S	NaN	135.0	Montreal, PQ / Chesterville, ON
4	1.0	0.0	Allison, Mrs. Hudson J C (Bessie Waldo Daniels)	female	25.0000	1.0	2.0	113781	151.5500	C22 C26	S	NaN	NaN	Montreal, PQ / Chesterville, ON
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1305	3.0	0.0	Zabour, Miss. Thamine	female	NaN	1.0	0.0	2665	14.4542	NaN	C	NaN	NaN	NaN
1306	3.0	0.0	Zakarian, Mr. Mapriededer	male	26.5000	0.0	0.0	2656	7.2250	NaN	C	NaN	304.0	NaN
1307	3.0	0.0	Zakarian, Mr. Ortin	male	27.0000	0.0	0.0	2670	7.2250	NaN	C	NaN	NaN	NaN
1308	3.0	0.0	Zimmerman, Mr. Leo	male	29.0000	0.0	0.0	315082	7.8750	NaN	S	NaN	NaN	NaN
1309	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	pclass	survived	name	sex	age	sibsp	parch	ticket	fare	cabin	embarked	boat	body	home.dest
56	1.0	1.0	Carter, Mr. William Ernest	male	36.0	1.0	2.0	113760	120.0000	B96 B98	S	C	NaN	Bryn Mawr, PA
231	1.0	1.0	Peuchen, Major. Arthur Godfrey	male	52.0	0.0	0.0	113786	30.5000	C104	S	6	NaN	Toronto, ON
734	3.0	1.0	Coutts, Master. William Loch "William"	male	3.0	1.0	1.0	C.A. 37671	15.9000	NaN	S	2	NaN	England Brooklyn, NY
105	1.0	0.0	Evans, Miss. Edith Corse	female	36.0	0.0	0.0	PC 17531	31.6792	A29	C	NaN	NaN	New York, NY
43	1.0	1.0	Bucknell, Mrs. William Robert (Emma Eliza Ward)	female	60.0	0.0	0.0	11813	76.2917	D15	C	8	NaN	Philadelphia, PA
581	2.0	0.0	Watson, Mr. Ennis Hastings	male	NaN	0.0	0.0	239856	0.0000	NaN	S	NaN	NaN	Belfast
903	3.0	0.0	Johnston, Mrs. Andrew G (Elizabeth "Lily" Watson)	female	NaN	1.0	2.0	W./C. 6607	23.4500	NaN	S	NaN	NaN	NaN
1168	3.0	0.0	Sadowitz, Mr. Harry	male	NaN	0.0	0.0	LP 1588	7.5750	NaN	S	NaN	NaN	NaN
178	1.0	1.0	Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)	female	45.0	1.0	0.0	11753	52.5542	D19	S	5	NaN	Boston, MA
107	1.0	1.0	Flegenheim, Mrs. Alfred (Antoinette)	female	NaN	0.0	0.0	PC 17598	31.6833	NaN	S	7	NaN	New York, NY

	pclass	survived	age	sibsp	parch	fare	body	ticket_number
count	1309.000000	1309.000000	1046.000000	1309.000000	1309.000000	1308.000000	121.000000	9.570000e+02
mean	2.294882	0.381971	29.881135	0.498854	0.385027	33.295479	160.809917	2.490391e+05
std	0.837836	0.486055	14.413500	1.041658	0.865560	51.758668	97.696922	4.426853e+05
min	1.000000	0.000000	0.166700	0.000000	0.000000	0.000000	1.000000	6.800000e+02
25%	2.000000	0.000000	21.000000	0.000000	0.000000	7.895800	72.000000	1.995000e+04
50%	3.000000	0.000000	28.000000	0.000000	0.000000	14.454200	155.000000	2.346040e+05
75%	3.000000	1.000000	39.000000	1.000000	0.000000	31.275000	256.000000	3.474680e+05
max	3.000000	1.000000	80.000000	8.000000	9.000000	512.329200	328.000000	3.101298e+06

	Klasa biletu	Płeć	Ocaleni / zmarli	Ilość	Procent całości
0	1.0	female	0.0	5	0.381679
1	1.0	female	1.0	139	10.610687
2	1.0	male	0.0	118	9.007634
3	1.0	male	1.0	61	4.656489
4	2.0	female	0.0	12	0.916031
5	2.0	female	1.0	94	7.175573
6	2.0	male	0.0	146	11.145038
7	2.0	male	1.0	25	1.908397
8	3.0	female	0.0	110	8.396947
9	3.0	female	1.0	106	8.091603
10	3.0	male	0.0	418	31.908397
11	3.0	male	1.0	75	5.725191

	sex	pclass	Wszystkie kobiety lub mężczyźni w tej klasie	Ocaleni	Zmarli	Procent_ocalonych	Procent_zmarłych	Procent_ogółem
0	female	1.0	144	139	5	96.527778	3.472222	10.992366
1	female	2.0	106	94	12	88.679245	11.320755	8.091603
2	female	3.0	216	106	110	49.074074	50.925926	16.488550
3	male	1.0	179	61	118	34.078212	65.921788	13.664122
4	male	2.0	171	25	146	14.619883	85.380117	13.053435
5	male	3.0	493	75	418	15.212982	84.787018	37.633588

	sex	pclass	Liczba ofiar	Procent
0	female	1.0	5	0.618047
1	female	2.0	12	1.483313
2	female	3.0	110	13.597033
3	male	1.0	118	14.585909
4	male	2.0	146	18.046972
5	male	3.0	418	51.668727