import scipy.stats as stats
import pandas as pd
import numpy as np
df = pd.read_csv("Placement.csv")
df.head()
Null Hypothesis (H0) = There is no relationship between the 2 categorical variables
Alternate Hypothesis (H1) = There is a relationship between the 2 categorical variables
df_crosstab = pd.crosstab(df['specialisation'],df['status'])
df_crosstab
df_crosstab.values
observed_values = df_crosstab.values
print("Observed values: ",observed_values)
test_dependence = stats.chi2_contingency(observed_values)
test_dependence
Expected_value = test_dependence[3]
print("Expected value: ",Expected_value)
alpha = 0.05
rows = len(df_crosstab.iloc[0:2,0])
columns = len(df_crosstab.iloc[0,0:2])
degree_of_freedom = (rows-1)*(columns-1)
print("Degree of Freedom: ",degree_of_freedom)
from scipy.stats import chi2
test_statistic = sum([(O-E)**2/E for O,E in zip(observed_values,Expected_value)])
chi_Square_test_statistic = test_statistic[0]+test_statistic[1]
print("The value of chi-squared test statistic: ",chi_Square_test_statistic)
critical_value = stats.chi2.ppf(q = 1-alpha,df = degree_of_freedom)
print("critical value: ",critical_value)
p_value = 1- stats.chi2.cdf(chi_Square_test_statistic,1)
print("P Value: ",p_value)
print("significance level: ",alpha)
if chi_Square_test_statistic>=critical_value:
print("Reject the Null hypothesis H0, as there is relationship between the 2 categorical variables")
else:
print("Retain the Null hypothesis H0, as there is no relationship between the 2 categorical variables")
if p_value<=alpha:
print("Reject the Null hypothesis H0, as there is relationship between the 2 categorical variables")
else:
print("Retain the Null hypothesis H0, as there is no relationship between the 2 categorical variables")