Wednesday, November 17, 2021

Case Study: Working withTitanic Dataset

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

df=pd.read_csv('titanic_dataset.csv') 

 #dataset can be downloaded from www.kaggle.com

df


df.head() # to show top 5 rows

df.tail(2) # to show bottom 2 rows

df.nunique()

df['Survived'].unique()

s1=df['Survived'].unique()df['Survived'].value_counts() # to find number of people survived vs not survived

#using matplotlib

plt.bar(s1.index,s1.values)

plt.show()



plt.bar(['Not Survived','Survived'],s1.values)

plt.show()




#seaborn method

sns.countplot(x='Survived', data=df)

plt.show()





df['Sex'].value_counts() # to find number of people survived vs not survived

s1

#seaborn method

sns.countplot(x='Sex', data=df)

plt.show()



df['Survived']==1

df[df['Survived']==1]

df[df['Survived']==1]['Sex'].value_counts()


df.groupby(['Survived']).sum()


df.groupby(['Survived','Sex']).size()

sns.catplot(x='Survived',hue='Sex',kind='count',data=df)

plt.show()



#dealing with missing values

df.isnull()

mean_age=df['Age'].mean()

mean_age

29.69911764705882

df['Age']=df['Age'].fillna(mean_age)

sns.kdeplot(df['Age'])


No comments:

Post a Comment

Clustering in Machine Learning

Clustering is a type of unsupervised learning in machine learning where the goal is to group a set of objects in such a way that objects in...