A brief analysis to compare the literacy rates in India across various age groups and states.
Data Source: http://www.censusindia.gov.in/2011census/C-series/C08.html
library(ggplot2)
library(knitr)
india <- read.csv("india.csv")
index = which(india$X1 == "20-24" &
india$area=="INDIA" &
india$type == "Total")
index
## [1] 17
illit_females <- round(india[index, c("X7")]/india[index, c("X4")],2)
illit_males <- round(india[index, c("X6")]/india[index, c("X3")], 2)
ill <- c(illit_females=illit_females, illit_males=illit_males)
ill
## illit_females illit_males
## 0.23 0.11
opts_chunk$set(out.width='900px', dpi=200)
india_total <- india[which(india$area=="INDIA" &
india$type == "Total"), ]
india_total$lit_rate <- round(india_total$X8/india_total$X2, 2)
lit <- india_total[,c("X1", "lit_rate")]
str(lit)
## 'data.frame': 29 obs. of 2 variables:
## $ X1 : Factor w/ 30 levels "","0-6","10",..: 30 2 23 26 28 3 4 5 6 7 ...
## $ lit_rate: num 0.63 0 0.73 0.85 0.89 0.89 0.92 0.91 0.93 0.92 ...
lit <- lit[!(lit$X1=="0-6" | lit$X1=="All ages"), ]
ggplot(lit, aes(X1, lit_rate))+
geom_bar(stat="identity", width=0.6)+
geom_text(data=lit,aes(x=X1,y=lit_rate,label=lit_rate), size=2, vjust=-0.5)+
ggtitle("Literacy Rates Across Age Groups")+
theme(axis.text.x=element_text(angle=70, hjust=1)) +
xlab("Age Group")+ylab("Literacy Rate")
india_total$f_lit_rate <- round(india_total$X10/india_total$X4, 2)
ggplot(india_total, aes(x=X1, y=f_lit_rate)) +
geom_bar(stat = "identity", width=0.6) +
theme(axis.text.x=element_text(angle=70, hjust=1)) +
geom_text(data=india_total,aes(x=X1,y=f_lit_rate,label=f_lit_rate), size=2, vjust=-0.5)+
ggtitle("Female Literacy Rates Across Age Groups")+
xlab("Age Group")+ylab("Female Literacy Rate")
## 3. Comparing literacy rates across states ### 3.1 Compare the literacy rates (i.e. no. of total people/number of literates) ### across all the states
states_total <- india[which(india$type == "Total" &
india$X1 == "All ages"), ]
states_total$lit_rate <- round(states_total$X8/states_total$X2, 2)
ggplot(states_total, aes(x = reorder(area, -lit_rate), y=lit_rate)) +
geom_bar(stat = "identity", width=0.6) +
theme(axis.text.x=element_text(angle=70, hjust=1)) +
geom_text(data=states_total,aes(x=area,y=lit_rate,label=lit_rate), size=2, vjust=-0.5)+
ggtitle("Literacy Rates Across States")+
xlab("State")+ylab("Literacy Rate")
### 3.2. Comparing female literacy rates across states
states_total$f_lit_rate <- round(states_total$X10/states_total$X4, 2)
ggplot(states_total, aes(x=reorder(area, -f_lit_rate), y=f_lit_rate)) +
geom_bar(stat = "identity", width=0.6) +
theme(axis.text.x=element_text(angle=70, hjust=1)) +
geom_text(data=states_total,aes(x=area,y=f_lit_rate,label=f_lit_rate), size=2, vjust=-0.5)+
ggtitle("Female Literacy Rates")+
xlab("State")+ylab("Female Literacy Rate")