Steps:

Important — Before we move on, please create a separate folder for mid-term and have all your shape files and csv files there, so that it’s easier for your computer to locate files or else you will have an error showing that ‘the file that the file that you are looking for does not exist’

  1. Loading the packages(Installing package “sf” if its not there in your packages)
library(ggplot2)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

2.Finding, organizing, downloading the data that you want your choropleth Map to show into csv file and reading the csv file through read.csv function

Indian_Literacy_Rate <- read.csv ("Data_Indian_Literacy.csv") 
head(Indian_Literacy_Rate)
##   S.No                State.Name  Population  Literates Literacy.Rate
## 1    1 Andaman & Nicobar Islands     380,581    294,281        86.63%
## 2    2            Andhra Pradesh  84,580,777 50,556,760        67.02%
## 3    3         Arunachal Pradesh   1,383,727    766,005        65.38%
## 4    4                     Assam  31,205,576 19,177,977        72.19%
## 5    5                     Bihar 104,099,452 52,504,553        61.80%
## 6    6                Chandigarh   1,055,450    805,438        86.05%

3.Downloading the Shape file of your country/state/county that you are using and reading it through read_sf function. After that you would plot the shapefile to see how it looks and have quick glance of first 6 rows of the shapefile data

Shapefile1<- read_sf('./gadm41_IND_shp', 'gadm41_IND_0')
Shapefile2 <- read_sf('./gadm41_IND_shp', 'gadm41_IND_1')

head(Shapefile1)
## Simple feature collection with 6 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 68.18625 ymin: 6.754256 xmax: 97.41516 ymax: 35.50133
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 3
##   GID_0 COUNTRY                                                         geometry
##   <chr> <chr>                                                 <MULTIPOLYGON [°]>
## 1 IND   India   (((76.97542 8.38514, 76.97486 8.38514, 76.97486 8.38625, 76.975…
## 2 Z01   India   (((75.07161 32.48296, 75.06268 32.48213, 75.0614 32.48201, 75.0…
## 3 Z04   India   (((78.65135 32.09228, 78.65241 32.08826, 78.65672 32.08383, 78.…
## 4 Z05   India   (((80.08794 30.79071, 80.08796 30.79026, 80.10108 30.78151, 80.…
## 5 Z07   India   (((94.19125 27.49632, 94.1869 27.49081, 94.17471 27.48407, 94.1…
## 6 Z09   India   (((78.90891 31.25886, 78.91309 31.26072, 78.91409 31.26163, 78.…

Step 3. Continue… repeating same method for shapefile 2

head(Shapefile1)
## Simple feature collection with 6 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 68.18625 ymin: 6.754256 xmax: 97.41516 ymax: 35.50133
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 3
##   GID_0 COUNTRY                                                         geometry
##   <chr> <chr>                                                 <MULTIPOLYGON [°]>
## 1 IND   India   (((76.97542 8.38514, 76.97486 8.38514, 76.97486 8.38625, 76.975…
## 2 Z01   India   (((75.07161 32.48296, 75.06268 32.48213, 75.0614 32.48201, 75.0…
## 3 Z04   India   (((78.65135 32.09228, 78.65241 32.08826, 78.65672 32.08383, 78.…
## 4 Z05   India   (((80.08794 30.79071, 80.08796 30.79026, 80.10108 30.78151, 80.…
## 5 Z07   India   (((94.19125 27.49632, 94.1869 27.49081, 94.17471 27.48407, 94.1…
## 6 Z09   India   (((78.90891 31.25886, 78.91309 31.26072, 78.91409 31.26163, 78.…
head(Shapefile2)
## Simple feature collection with 6 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 76.75699 ymin: 6.754256 xmax: 97.41516 ymax: 29.46334
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 12
##   GID_1   GID_0 COUNTRY NAME_1 VARNAME_1 NL_NAME_1 TYPE_1 ENGTYPE_1 CC_1  HASC_1
##   <chr>   <chr> <chr>   <chr>  <chr>     <chr>     <chr>  <chr>     <chr> <chr> 
## 1 IND.1_1 IND   India   Andam… Andaman … NA        Union… Union Te… NA    IN.AN 
## 2 IND.2_1 IND   India   Andhr… NA        NA        State  State     NA    IN.AP 
## 3 IND.3_1 IND   India   Aruna… Agence d… NA        State  State     NA    IN.AR 
## 4 Z07.3_1 Z07   India   Aruna… Agence d… NA        State  State     NA    IN.AR 
## 5 IND.4_1 IND   India   Assam  NA        NA        State  State     NA    IN.AS 
## 6 IND.5_1 IND   India   Bihar  NA        NA        State  State     NA    IN.BR 
## # ℹ 2 more variables: ISO_1 <chr>, geometry <MULTIPOLYGON [°]>
library(ggiraph)

#1st- Adding the State name variable from Indian literacy data to Shapefile and since some state names are changed in both, I make i t same through recoding it

New_numeric <- Indian_Literacy_Rate %>% 
  mutate(Numeric_L.R=as.numeric(str_remove(Literacy.Rate,"%"))) %>% 
  mutate(State.Name=recode(State.Name, 
                           "Andaman & Nicobar Islands"="Andaman and Nicobar",
                           "Delhi"="NCT of Delhi",
                           "Orissa"="Odisha"))



# 2nd - Creating breaks "ranges" for my scale of Literacy Rate across different Indian states

Shapefile2_LiteracyRate <- New_numeric %>% 
  mutate(Category = cut(Numeric_L.R, 
                       breaks = c(60, 65, 70, 80, 85, 90, 95, 100), 
                       labels = c('60-65', '65-70', '70-80', '80-85', '85-90', '90-95', '95-100')))

# 3rd - Creating a whole new dataset called "joining" which joins State.Name and Name_1 

head(New_numeric)
##   S.No          State.Name  Population  Literates Literacy.Rate Numeric_L.R
## 1    1 Andaman and Nicobar     380,581    294,281        86.63%       86.63
## 2    2      Andhra Pradesh  84,580,777 50,556,760        67.02%       67.02
## 3    3   Arunachal Pradesh   1,383,727    766,005        65.38%       65.38
## 4    4               Assam  31,205,576 19,177,977        72.19%       72.19
## 5    5               Bihar 104,099,452 52,504,553        61.80%       61.80
## 6    6          Chandigarh   1,055,450    805,438        86.05%       86.05
joining <- Shapefile2 %>%
  left_join(Shapefile2_LiteracyRate, by=c("NAME_1" = "State.Name"))



# 4th - Customizing and adding Interactivity to your Choropleth Map. Installing "ggiraph", which helps to add interactivity to our choropleth Map. We follow, 3 steps : 1) use geom_sf_interactive instead of geom_sf and include the tooltip aesthetic parameter assigning it to whatever we  want to be show in the tooltip  (2) save the ggplot object into a variable say My_India  (3) call girafe(ggobj = 'Objcet Name')

My_India <- ggplot(data = joining)+geom_sf_interactive(aes(fill=Category, tooltip= NAME_1), color="transparent") +
  geom_sf_interactive(fill="transparent", color='white') +
  scale_fill_viridis_d(name='Literacy Rate') +
  labs(title = "India's Literacy Rate", subtitle = "Literacy rate of Indian states as per 2011 census", caption = 'Source: GADM.org and IndiaCensus.net' ) +
  theme_void()+theme(title = element_text(face = 'bold'))

girafe(ggobj = My_India)