#Load the data from the CSV file
states_and_ut <- read.csv("~/states_and_ut.csv")
View(states_and_ut)
# Display the first few rows of the dataset
head(states_and_ut)
## Name Type ISO Vehicle.Code Zone Capital
## 1 Andhra Pradesh State IN-AP AP Southern Amaravati
## 2 Arunachal Pradesh State IN-AR AR North-Eastern North-Eastern
## 3 Assam State IN-AS AS North-Eastern Dispur
## 4 Bihar State IN-BR BR Eastern Eastern
## 5 Chhattisgarh State IN-CG CG Central Central
## 6 Goa State IN-GA GA Western Panaji
## Largest.City Statehood Population Area..sq..km. Official.Languages
## 1 Visakhapatnam 11/1/1956 49506799 162975 Telugu
## 2 Itanagar 2/20/1987 1383727 83743 English
## 3 Guwahati 1/26/1950 31205576 78438 Assamese,Boro
## 4 Patna 1/26/1950 104099452 94163 Hindi
## 5 Raipur 11/1/2000 25545198 135194 Hindi
## 6 Vasco da Gama 5/30/1987 1458545 3702 Konkani
## Additional.Official.Languages
## 1 Urdu
## 2 —
## 3 Bengali,Meitei
## 4 Urdu
## 5 Chhattisgarhi
## 6 Marathi
# Inspect unique values in the Zone column (ESSENTIAL!)
unique_zones <- unique(states_and_ut$Zone)
print(unique_zones)
## [1] "Southern" "North-Eastern" "Eastern" "Central"
## [5] "Western" "Northern"
print(table(states_and_ut$Zone)) # Show counts of each zone
##
## Central Eastern North-Eastern Northern Southern
## 4 5 8 8 7
## Western
## 4
# Correctly recode Zone to a binary variable (example: "Southern" vs. "Other")
states_and_ut$Zone_Southern <- ifelse(states_and_ut$Zone == "Southern", 1, 0)
# Verify the new column
print(head(states_and_ut))
## Name Type ISO Vehicle.Code Zone Capital
## 1 Andhra Pradesh State IN-AP AP Southern Amaravati
## 2 Arunachal Pradesh State IN-AR AR North-Eastern North-Eastern
## 3 Assam State IN-AS AS North-Eastern Dispur
## 4 Bihar State IN-BR BR Eastern Eastern
## 5 Chhattisgarh State IN-CG CG Central Central
## 6 Goa State IN-GA GA Western Panaji
## Largest.City Statehood Population Area..sq..km. Official.Languages
## 1 Visakhapatnam 11/1/1956 49506799 162975 Telugu
## 2 Itanagar 2/20/1987 1383727 83743 English
## 3 Guwahati 1/26/1950 31205576 78438 Assamese,Boro
## 4 Patna 1/26/1950 104099452 94163 Hindi
## 5 Raipur 11/1/2000 25545198 135194 Hindi
## 6 Vasco da Gama 5/30/1987 1458545 3702 Konkani
## Additional.Official.Languages Zone_Southern
## 1 Urdu 1
## 2 — 0
## 3 Bengali,Meitei 0
## 4 Urdu 0
## 5 Chhattisgarhi 0
## 6 Marathi 0
print(table(states_and_ut$Zone_Southern)) # Show counts of 0 and 1
##
## 0 1
## 29 7
# Convert relevant columns to factors (important for logistic regression)
states_and_ut$Zone_Southern <- as.factor(states_and_ut$Zone_Southern)
states_and_ut$Capital <- as.factor(states_and_ut$Capital)
states_and_ut$Largest.City <- as.factor(states_and_ut$Largest.City)
states_and_ut$Statehood <- as.factor(states_and_ut$Statehood)
# Fit the logistic regression model (Corrected)
logistic_model <- glm(Zone_Southern ~ Capital + Largest.City + Statehood + Population + Area..sq..km. ,
data = states_and_ut,
family = binomial())
# Display the summary of the model
summary(logistic_model)
##
## Call:
## glm(formula = Zone_Southern ~ Capital + Largest.City + Statehood +
## Population + Area..sq..km., family = binomial(), data = states_and_ut)
##
## Deviance Residuals:
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [26] 0 0 0 0 0 0 0 0 0 0 0
##
## Coefficients: (37 not defined because of singularities)
## Estimate Std. Error z value
## (Intercept) 2.557e+01 2.160e+05 0
## CapitalBhararisain(Summer)Dehradun(Winter) -5.113e+01 3.055e+05 0
## CapitalBhopal -5.113e+01 3.055e+05 0
## CapitalCentral -5.113e+01 3.055e+05 0
## CapitalChandigarh -5.113e+01 3.055e+05 0
## CapitalDaman -5.113e+01 3.055e+05 0
## CapitalDispur -5.113e+01 3.055e+05 0
## CapitalEastern -5.113e+01 3.055e+05 0
## CapitalGandhinagar -5.113e+01 3.055e+05 0
## CapitalKohima -5.113e+01 3.055e+05 0
## CapitalLeh(Summer)Kargil(Winter) -5.113e+01 3.055e+05 0
## CapitalMumbai(Summer)Nagpur(Winter) -5.113e+01 3.055e+05 0
## CapitalNew Delhi -5.113e+01 3.055e+05 0
## CapitalNorth-Eastern -5.113e+01 3.055e+05 0
## CapitalNorthern -5.113e+01 3.055e+05 0
## CapitalPanaji -5.113e+01 3.055e+05 0
## CapitalRanchi -5.113e+01 3.055e+05 0
## CapitalShimla(Summer)Dharamshala(Winter) -5.113e+01 3.055e+05 0
## CapitalSouthern -4.726e-09 3.055e+05 0
## CapitalSrinagar(Summer)Jammu(Winter) -5.113e+01 3.055e+05 0
## Largest.CityAhmedabad NA NA NA
## Largest.CityAizawl -1.297e-13 3.055e+05 0
## Largest.CityBengaluru 1.013e-09 3.055e+05 0
## Largest.CityBhubaneswar 7.711e-15 3.055e+05 0
## Largest.CityChandigarh 5.773e-22 3.055e+05 0
## Largest.CityChennai -2.207e-06 3.055e+05 0
## Largest.CityDehradun NA NA NA
## Largest.CityDelhi NA NA NA
## Largest.CityDimapur NA NA NA
## Largest.CityFaridabad -1.083e-14 3.055e+05 0
## Largest.CityGangtok -1.311e-21 3.055e+05 0
## Largest.CityGuwahati NA NA NA
## Largest.CityHyderabad -2.207e-06 3.055e+05 0
## Largest.CityImphal -3.824e-14 3.055e+05 0
## Largest.CityIndore NA NA NA
## Largest.CityItanagar 3.737e-14 3.055e+05 0
## Largest.CityJaipur NA NA NA
## Largest.CityJamshedpur NA NA NA
## Largest.CityKavaratti -2.207e-06 3.055e+05 0
## Largest.CityKolkata -9.021e-24 3.055e+05 0
## Largest.CityLeh NA NA NA
## Largest.CityLucknow 1.985e-13 3.055e+05 0
## Largest.CityLudhiana NA NA NA
## Largest.CityMumbai NA NA NA
## Largest.CityPatna -1.498e-13 3.055e+05 0
## Largest.CityPondicherry -2.207e-06 3.055e+05 0
## Largest.CityRaipur NA NA NA
## Largest.CityShillong -1.497e-15 3.055e+05 0
## Largest.CityShimla NA NA NA
## Largest.CitySilvassa NA NA NA
## Largest.CitySri Vijaya Puram NA NA NA
## Largest.CitySrinagar NA NA NA
## Largest.CityThiruvananthapuram NA NA NA
## Largest.CityVasco da Gama NA NA NA
## Largest.CityVisakhapatnam NA NA NA
## Statehood1/25/1971 NA NA NA
## Statehood1/26/1950 NA NA NA
## Statehood1/26/2020 NA NA NA
## Statehood10/31/2019 NA NA NA
## Statehood11/1/1956 NA NA NA
## Statehood11/1/1966 NA NA NA
## Statehood11/1/2000 NA NA NA
## Statehood11/15/2000 NA NA NA
## Statehood11/9/2000 NA NA NA
## Statehood12/1/1963 NA NA NA
## Statehood2/20/1987 NA NA NA
## Statehood5/1/1960 NA NA NA
## Statehood5/16/1975 NA NA NA
## Statehood5/30/1987 NA NA NA
## Statehood6/2/2014 NA NA NA
## Statehood8/16/1962 NA NA NA
## Population NA NA NA
## Area..sq..km. NA NA NA
## Pr(>|z|)
## (Intercept) 1
## CapitalBhararisain(Summer)Dehradun(Winter) 1
## CapitalBhopal 1
## CapitalCentral 1
## CapitalChandigarh 1
## CapitalDaman 1
## CapitalDispur 1
## CapitalEastern 1
## CapitalGandhinagar 1
## CapitalKohima 1
## CapitalLeh(Summer)Kargil(Winter) 1
## CapitalMumbai(Summer)Nagpur(Winter) 1
## CapitalNew Delhi 1
## CapitalNorth-Eastern 1
## CapitalNorthern 1
## CapitalPanaji 1
## CapitalRanchi 1
## CapitalShimla(Summer)Dharamshala(Winter) 1
## CapitalSouthern 1
## CapitalSrinagar(Summer)Jammu(Winter) 1
## Largest.CityAhmedabad NA
## Largest.CityAizawl 1
## Largest.CityBengaluru 1
## Largest.CityBhubaneswar 1
## Largest.CityChandigarh 1
## Largest.CityChennai 1
## Largest.CityDehradun NA
## Largest.CityDelhi NA
## Largest.CityDimapur NA
## Largest.CityFaridabad 1
## Largest.CityGangtok 1
## Largest.CityGuwahati NA
## Largest.CityHyderabad 1
## Largest.CityImphal 1
## Largest.CityIndore NA
## Largest.CityItanagar 1
## Largest.CityJaipur NA
## Largest.CityJamshedpur NA
## Largest.CityKavaratti 1
## Largest.CityKolkata 1
## Largest.CityLeh NA
## Largest.CityLucknow 1
## Largest.CityLudhiana NA
## Largest.CityMumbai NA
## Largest.CityPatna 1
## Largest.CityPondicherry 1
## Largest.CityRaipur NA
## Largest.CityShillong 1
## Largest.CityShimla NA
## Largest.CitySilvassa NA
## Largest.CitySri Vijaya Puram NA
## Largest.CitySrinagar NA
## Largest.CityThiruvananthapuram NA
## Largest.CityVasco da Gama NA
## Largest.CityVisakhapatnam NA
## Statehood1/25/1971 NA
## Statehood1/26/1950 NA
## Statehood1/26/2020 NA
## Statehood10/31/2019 NA
## Statehood11/1/1956 NA
## Statehood11/1/1966 NA
## Statehood11/1/2000 NA
## Statehood11/15/2000 NA
## Statehood11/9/2000 NA
## Statehood12/1/1963 NA
## Statehood2/20/1987 NA
## Statehood5/1/1960 NA
## Statehood5/16/1975 NA
## Statehood5/30/1987 NA
## Statehood6/2/2014 NA
## Statehood8/16/1962 NA
## Population NA
## Area..sq..km. NA
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3.5467e+01 on 35 degrees of freedom
## Residual deviance: 5.6772e-10 on 0 degrees of freedom
## AIC: 72
##
## Number of Fisher Scoring iterations: 24
# Make predictions on the same data (for illustration)
probabilities <- predict(logistic_model, type = "response") # Get predicted probabilities
# Convert probabilities to classes (0 or 1) using a threshold (e.g., 0.5)
predictions <- ifelse(probabilities > 0.5, 1, 0)
# Add predictions to the data frame
states_and_ut$predicted_probability <- probabilities
states_and_ut$predicted_zone <- as.factor(predictions) # Convert to factor
# View the data with predictions
print(head(states_and_ut))
## Name Type ISO Vehicle.Code Zone Capital
## 1 Andhra Pradesh State IN-AP AP Southern Amaravati
## 2 Arunachal Pradesh State IN-AR AR North-Eastern North-Eastern
## 3 Assam State IN-AS AS North-Eastern Dispur
## 4 Bihar State IN-BR BR Eastern Eastern
## 5 Chhattisgarh State IN-CG CG Central Central
## 6 Goa State IN-GA GA Western Panaji
## Largest.City Statehood Population Area..sq..km. Official.Languages
## 1 Visakhapatnam 11/1/1956 49506799 162975 Telugu
## 2 Itanagar 2/20/1987 1383727 83743 English
## 3 Guwahati 1/26/1950 31205576 78438 Assamese,Boro
## 4 Patna 1/26/1950 104099452 94163 Hindi
## 5 Raipur 11/1/2000 25545198 135194 Hindi
## 6 Vasco da Gama 5/30/1987 1458545 3702 Konkani
## Additional.Official.Languages Zone_Southern predicted_probability
## 1 Urdu 1 1.000000e+00
## 2 — 0 7.884924e-12
## 3 Bengali,Meitei 0 7.884924e-12
## 4 Urdu 0 7.884924e-12
## 5 Chhattisgarhi 0 7.884924e-12
## 6 Marathi 0 7.884924e-12
## predicted_zone
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
# Evaluate model performance (optional)
# Confusion matrix
confusion_matrix <- table(states_and_ut$Zone_Southern, states_and_ut$predicted_zone)
print(confusion_matrix)
##
## 0 1
## 0 29 0
## 1 0 7
# Accuracy
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Accuracy:", accuracy))
## [1] "Accuracy: 1"