import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv('titanic_dataset.csv')
#dataset can be downloaded from www.kaggle.com
df
df.head() # to show top 5 rows
df.tail(2) # to show bottom 2 rows
df.nunique()
df['Survived'].unique()
s1=df['Survived'].unique()df['Survived'].value_counts() # to find number of people survived vs not survived
#using matplotlib
plt.bar(s1.index,s1.values)
plt.show()
plt.bar(['Not Survived','Survived'],s1.values)
plt.show()
#seaborn method
sns.countplot(x='Survived', data=df)
plt.show()
df['Sex'].value_counts() # to find number of people survived vs not survived
s1
#seaborn method
sns.countplot(x='Sex', data=df)
plt.show()
df['Survived']==1
df[df['Survived']==1]
df[df['Survived']==1]['Sex'].value_counts()
df.groupby(['Survived']).sum()
df.groupby(['Survived','Sex']).size()
sns.catplot(x='Survived',hue='Sex',kind='count',data=df)
plt.show()
#dealing with missing values
df.isnull()
mean_age=df['Age'].mean()
mean_age
29.69911764705882
df['Age']=df['Age'].fillna(mean_age)
sns.kdeplot(df['Age'])
No comments:
Post a Comment