Learn with Anu Arora: Case Study: Working withTitanic Dataset

Wednesday, November 17, 2021

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

df=pd.read_csv('titanic_dataset.csv')

#dataset can be downloaded from www.kaggle.com

df.head() # to show top 5 rows

df.tail(2) # to show bottom 2 rows

df.nunique()

df['Survived'].unique()

s1=df['Survived'].unique()df['Survived'].value_counts() # to find number of people survived vs not survived

#using matplotlib

plt.bar(s1.index,s1.values)

plt.show()

plt.bar(['Not Survived','Survived'],s1.values)

plt.show()

#seaborn method

sns.countplot(x='Survived', data=df)

plt.show()

df['Sex'].value_counts() # to find number of people survived vs not survived

#seaborn method

sns.countplot(x='Sex', data=df)

plt.show()

df['Survived']==1

df[df['Survived']==1]

df[df['Survived']==1]['Sex'].value_counts()

df.groupby(['Survived']).sum()

df.groupby(['Survived','Sex']).size()

sns.catplot(x='Survived',hue='Sex',kind='count',data=df)

plt.show()

#dealing with missing values

df.isnull()

mean_age=df['Age'].mean()

mean_age

29.69911764705882

df['Age']=df['Age'].fillna(mean_age)

sns.kdeplot(df['Age'])

Learn with Anu Arora