#Load the data from the CSV file
states_and_ut <- read.csv("~/states_and_ut.csv")
View(states_and_ut)
# Display the first few rows of the dataset
head(states_and_ut)
##                Name  Type   ISO Vehicle.Code          Zone       Capital
## 1    Andhra Pradesh State IN-AP           AP      Southern     Amaravati
## 2 Arunachal Pradesh State IN-AR           AR North-Eastern North-Eastern
## 3             Assam State IN-AS           AS North-Eastern        Dispur
## 4             Bihar State IN-BR           BR       Eastern       Eastern
## 5      Chhattisgarh State IN-CG           CG       Central       Central
## 6               Goa State IN-GA           GA       Western        Panaji
##    Largest.City Statehood Population Area..sq..km. Official.Languages
## 1 Visakhapatnam 11/1/1956   49506799        162975             Telugu
## 2      Itanagar 2/20/1987    1383727         83743            English
## 3      Guwahati 1/26/1950   31205576         78438      Assamese,Boro
## 4         Patna 1/26/1950  104099452         94163              Hindi
## 5        Raipur 11/1/2000   25545198        135194              Hindi
## 6 Vasco da Gama 5/30/1987    1458545          3702            Konkani
##   Additional.Official.Languages
## 1                          Urdu
## 2                             —
## 3                Bengali,Meitei
## 4                          Urdu
## 5                 Chhattisgarhi
## 6                       Marathi
# Inspect unique values in the Zone column (ESSENTIAL!)
unique_zones <- unique(states_and_ut$Zone)
print(unique_zones)
## [1] "Southern"      "North-Eastern" "Eastern"       "Central"      
## [5] "Western"       "Northern"
print(table(states_and_ut$Zone)) # Show counts of each zone
## 
##       Central       Eastern North-Eastern      Northern      Southern 
##             4             5             8             8             7 
##       Western 
##             4
# Correctly recode Zone to a binary variable (example: "Southern" vs. "Other")
states_and_ut$Zone_Southern <- ifelse(states_and_ut$Zone == "Southern", 1, 0)
# Verify the new column
print(head(states_and_ut))
##                Name  Type   ISO Vehicle.Code          Zone       Capital
## 1    Andhra Pradesh State IN-AP           AP      Southern     Amaravati
## 2 Arunachal Pradesh State IN-AR           AR North-Eastern North-Eastern
## 3             Assam State IN-AS           AS North-Eastern        Dispur
## 4             Bihar State IN-BR           BR       Eastern       Eastern
## 5      Chhattisgarh State IN-CG           CG       Central       Central
## 6               Goa State IN-GA           GA       Western        Panaji
##    Largest.City Statehood Population Area..sq..km. Official.Languages
## 1 Visakhapatnam 11/1/1956   49506799        162975             Telugu
## 2      Itanagar 2/20/1987    1383727         83743            English
## 3      Guwahati 1/26/1950   31205576         78438      Assamese,Boro
## 4         Patna 1/26/1950  104099452         94163              Hindi
## 5        Raipur 11/1/2000   25545198        135194              Hindi
## 6 Vasco da Gama 5/30/1987    1458545          3702            Konkani
##   Additional.Official.Languages Zone_Southern
## 1                          Urdu             1
## 2                             —             0
## 3                Bengali,Meitei             0
## 4                          Urdu             0
## 5                 Chhattisgarhi             0
## 6                       Marathi             0
print(table(states_and_ut$Zone_Southern)) # Show counts of 0 and 1
## 
##  0  1 
## 29  7
# Convert relevant columns to factors (important for logistic regression)
states_and_ut$Zone_Southern <- as.factor(states_and_ut$Zone_Southern)
states_and_ut$Capital <- as.factor(states_and_ut$Capital)
states_and_ut$Largest.City <- as.factor(states_and_ut$Largest.City)
states_and_ut$Statehood <- as.factor(states_and_ut$Statehood)
# Fit the logistic regression model (Corrected)
logistic_model <- glm(Zone_Southern ~ Capital + Largest.City + Statehood + Population + Area..sq..km. , 
                      data = states_and_ut, 
                      family = binomial())
# Display the summary of the model
summary(logistic_model)
## 
## Call:
## glm(formula = Zone_Southern ~ Capital + Largest.City + Statehood + 
##     Population + Area..sq..km., family = binomial(), data = states_and_ut)
## 
## Deviance Residuals: 
##  [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [26]  0  0  0  0  0  0  0  0  0  0  0
## 
## Coefficients: (37 not defined because of singularities)
##                                              Estimate Std. Error z value
## (Intercept)                                 2.557e+01  2.160e+05       0
## CapitalBhararisain(Summer)Dehradun(Winter) -5.113e+01  3.055e+05       0
## CapitalBhopal                              -5.113e+01  3.055e+05       0
## CapitalCentral                             -5.113e+01  3.055e+05       0
## CapitalChandigarh                          -5.113e+01  3.055e+05       0
## CapitalDaman                               -5.113e+01  3.055e+05       0
## CapitalDispur                              -5.113e+01  3.055e+05       0
## CapitalEastern                             -5.113e+01  3.055e+05       0
## CapitalGandhinagar                         -5.113e+01  3.055e+05       0
## CapitalKohima                              -5.113e+01  3.055e+05       0
## CapitalLeh(Summer)Kargil(Winter)           -5.113e+01  3.055e+05       0
## CapitalMumbai(Summer)Nagpur(Winter)        -5.113e+01  3.055e+05       0
## CapitalNew Delhi                           -5.113e+01  3.055e+05       0
## CapitalNorth-Eastern                       -5.113e+01  3.055e+05       0
## CapitalNorthern                            -5.113e+01  3.055e+05       0
## CapitalPanaji                              -5.113e+01  3.055e+05       0
## CapitalRanchi                              -5.113e+01  3.055e+05       0
## CapitalShimla(Summer)Dharamshala(Winter)   -5.113e+01  3.055e+05       0
## CapitalSouthern                            -4.726e-09  3.055e+05       0
## CapitalSrinagar(Summer)Jammu(Winter)       -5.113e+01  3.055e+05       0
## Largest.CityAhmedabad                              NA         NA      NA
## Largest.CityAizawl                         -1.297e-13  3.055e+05       0
## Largest.CityBengaluru                       1.013e-09  3.055e+05       0
## Largest.CityBhubaneswar                     7.711e-15  3.055e+05       0
## Largest.CityChandigarh                      5.773e-22  3.055e+05       0
## Largest.CityChennai                        -2.207e-06  3.055e+05       0
## Largest.CityDehradun                               NA         NA      NA
## Largest.CityDelhi                                  NA         NA      NA
## Largest.CityDimapur                                NA         NA      NA
## Largest.CityFaridabad                      -1.083e-14  3.055e+05       0
## Largest.CityGangtok                        -1.311e-21  3.055e+05       0
## Largest.CityGuwahati                               NA         NA      NA
## Largest.CityHyderabad                      -2.207e-06  3.055e+05       0
## Largest.CityImphal                         -3.824e-14  3.055e+05       0
## Largest.CityIndore                                 NA         NA      NA
## Largest.CityItanagar                        3.737e-14  3.055e+05       0
## Largest.CityJaipur                                 NA         NA      NA
## Largest.CityJamshedpur                             NA         NA      NA
## Largest.CityKavaratti                      -2.207e-06  3.055e+05       0
## Largest.CityKolkata                        -9.021e-24  3.055e+05       0
## Largest.CityLeh                                    NA         NA      NA
## Largest.CityLucknow                         1.985e-13  3.055e+05       0
## Largest.CityLudhiana                               NA         NA      NA
## Largest.CityMumbai                                 NA         NA      NA
## Largest.CityPatna                          -1.498e-13  3.055e+05       0
## Largest.CityPondicherry                    -2.207e-06  3.055e+05       0
## Largest.CityRaipur                                 NA         NA      NA
## Largest.CityShillong                       -1.497e-15  3.055e+05       0
## Largest.CityShimla                                 NA         NA      NA
## Largest.CitySilvassa                               NA         NA      NA
## Largest.CitySri Vijaya Puram                       NA         NA      NA
## Largest.CitySrinagar                               NA         NA      NA
## Largest.CityThiruvananthapuram                     NA         NA      NA
## Largest.CityVasco da Gama                          NA         NA      NA
## Largest.CityVisakhapatnam                          NA         NA      NA
## Statehood1/25/1971                                 NA         NA      NA
## Statehood1/26/1950                                 NA         NA      NA
## Statehood1/26/2020                                 NA         NA      NA
## Statehood10/31/2019                                NA         NA      NA
## Statehood11/1/1956                                 NA         NA      NA
## Statehood11/1/1966                                 NA         NA      NA
## Statehood11/1/2000                                 NA         NA      NA
## Statehood11/15/2000                                NA         NA      NA
## Statehood11/9/2000                                 NA         NA      NA
## Statehood12/1/1963                                 NA         NA      NA
## Statehood2/20/1987                                 NA         NA      NA
## Statehood5/1/1960                                  NA         NA      NA
## Statehood5/16/1975                                 NA         NA      NA
## Statehood5/30/1987                                 NA         NA      NA
## Statehood6/2/2014                                  NA         NA      NA
## Statehood8/16/1962                                 NA         NA      NA
## Population                                         NA         NA      NA
## Area..sq..km.                                      NA         NA      NA
##                                            Pr(>|z|)
## (Intercept)                                       1
## CapitalBhararisain(Summer)Dehradun(Winter)        1
## CapitalBhopal                                     1
## CapitalCentral                                    1
## CapitalChandigarh                                 1
## CapitalDaman                                      1
## CapitalDispur                                     1
## CapitalEastern                                    1
## CapitalGandhinagar                                1
## CapitalKohima                                     1
## CapitalLeh(Summer)Kargil(Winter)                  1
## CapitalMumbai(Summer)Nagpur(Winter)               1
## CapitalNew Delhi                                  1
## CapitalNorth-Eastern                              1
## CapitalNorthern                                   1
## CapitalPanaji                                     1
## CapitalRanchi                                     1
## CapitalShimla(Summer)Dharamshala(Winter)          1
## CapitalSouthern                                   1
## CapitalSrinagar(Summer)Jammu(Winter)              1
## Largest.CityAhmedabad                            NA
## Largest.CityAizawl                                1
## Largest.CityBengaluru                             1
## Largest.CityBhubaneswar                           1
## Largest.CityChandigarh                            1
## Largest.CityChennai                               1
## Largest.CityDehradun                             NA
## Largest.CityDelhi                                NA
## Largest.CityDimapur                              NA
## Largest.CityFaridabad                             1
## Largest.CityGangtok                               1
## Largest.CityGuwahati                             NA
## Largest.CityHyderabad                             1
## Largest.CityImphal                                1
## Largest.CityIndore                               NA
## Largest.CityItanagar                              1
## Largest.CityJaipur                               NA
## Largest.CityJamshedpur                           NA
## Largest.CityKavaratti                             1
## Largest.CityKolkata                               1
## Largest.CityLeh                                  NA
## Largest.CityLucknow                               1
## Largest.CityLudhiana                             NA
## Largest.CityMumbai                               NA
## Largest.CityPatna                                 1
## Largest.CityPondicherry                           1
## Largest.CityRaipur                               NA
## Largest.CityShillong                              1
## Largest.CityShimla                               NA
## Largest.CitySilvassa                             NA
## Largest.CitySri Vijaya Puram                     NA
## Largest.CitySrinagar                             NA
## Largest.CityThiruvananthapuram                   NA
## Largest.CityVasco da Gama                        NA
## Largest.CityVisakhapatnam                        NA
## Statehood1/25/1971                               NA
## Statehood1/26/1950                               NA
## Statehood1/26/2020                               NA
## Statehood10/31/2019                              NA
## Statehood11/1/1956                               NA
## Statehood11/1/1966                               NA
## Statehood11/1/2000                               NA
## Statehood11/15/2000                              NA
## Statehood11/9/2000                               NA
## Statehood12/1/1963                               NA
## Statehood2/20/1987                               NA
## Statehood5/1/1960                                NA
## Statehood5/16/1975                               NA
## Statehood5/30/1987                               NA
## Statehood6/2/2014                                NA
## Statehood8/16/1962                               NA
## Population                                       NA
## Area..sq..km.                                    NA
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3.5467e+01  on 35  degrees of freedom
## Residual deviance: 5.6772e-10  on  0  degrees of freedom
## AIC: 72
## 
## Number of Fisher Scoring iterations: 24
# Make predictions on the same data (for illustration)
probabilities <- predict(logistic_model, type = "response")  # Get predicted probabilities

# Convert probabilities to classes (0 or 1) using a threshold (e.g., 0.5)
predictions <- ifelse(probabilities > 0.5, 1, 0)

# Add predictions to the data frame
states_and_ut$predicted_probability <- probabilities
states_and_ut$predicted_zone <- as.factor(predictions) # Convert to factor

# View the data with predictions
print(head(states_and_ut))
##                Name  Type   ISO Vehicle.Code          Zone       Capital
## 1    Andhra Pradesh State IN-AP           AP      Southern     Amaravati
## 2 Arunachal Pradesh State IN-AR           AR North-Eastern North-Eastern
## 3             Assam State IN-AS           AS North-Eastern        Dispur
## 4             Bihar State IN-BR           BR       Eastern       Eastern
## 5      Chhattisgarh State IN-CG           CG       Central       Central
## 6               Goa State IN-GA           GA       Western        Panaji
##    Largest.City Statehood Population Area..sq..km. Official.Languages
## 1 Visakhapatnam 11/1/1956   49506799        162975             Telugu
## 2      Itanagar 2/20/1987    1383727         83743            English
## 3      Guwahati 1/26/1950   31205576         78438      Assamese,Boro
## 4         Patna 1/26/1950  104099452         94163              Hindi
## 5        Raipur 11/1/2000   25545198        135194              Hindi
## 6 Vasco da Gama 5/30/1987    1458545          3702            Konkani
##   Additional.Official.Languages Zone_Southern predicted_probability
## 1                          Urdu             1          1.000000e+00
## 2                             —             0          7.884924e-12
## 3                Bengali,Meitei             0          7.884924e-12
## 4                          Urdu             0          7.884924e-12
## 5                 Chhattisgarhi             0          7.884924e-12
## 6                       Marathi             0          7.884924e-12
##   predicted_zone
## 1              1
## 2              0
## 3              0
## 4              0
## 5              0
## 6              0
# Evaluate model performance (optional)
# Confusion matrix
confusion_matrix <- table(states_and_ut$Zone_Southern, states_and_ut$predicted_zone)
print(confusion_matrix)
##    
##      0  1
##   0 29  0
##   1  0  7
# Accuracy
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Accuracy:", accuracy))
## [1] "Accuracy: 1"