knitr::opts_chunk$set(echo = TRUE)
library(haven)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'readr' was built under R version 3.4.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(janitor)

#Q 1.1
setwd("C:/Users/kuffu/Desktop/Research Methods/week 1")
turnout<-read.csv("turnout.csv")


#Q 1.2
turnout2<-turnout %>% mutate(VAP2=VAP+overseas)
turnout2<-turnout2 %>% mutate(TOA=total/VAP2)
turnout2<-turnout2 %>% mutate(TOE=total/VEP)

#Q 1.3
turnout2<-turnout2 %>% mutate(TOAP=TOA*100)
turnout2<-turnout2 %>% mutate(TOEP=TOE*100)
turnout2<-turnout2 %>% mutate(DIFT=TOEP-TOAP)
turnout2<-turnout2 %>% mutate(ANTOA=ANES-TOAP)
turnout2 %>% summarise(meanANT=mean(ANTOA))
##    meanANT
## 1 20.32914
range(turnout2$ANTOA)
## [1] 11.06116 26.17150
turnout2<-turnout2 %>% mutate(ANTOE=ANES-TOEP)
turnout2 %>% summarise(meanANTE=mean(ANTOE))
##   meanANTE
## 1 16.83634
range(turnout2$ANTOE)
## [1]  8.581054 22.489359
#Q 1.4
turnout2 %>% select(year,TOAP) %>% filter(year==1980|year==1984|year==1988|year==1992|year==1996|year==2000|year==2004|year==2008)
##   year     TOAP
## 1 1980 52.03972
## 2 1984 52.53748
## 3 1988 49.72260
## 4 1992 54.04097
## 5 1996 47.53376
## 6 2000 49.34211
## 7 2004 54.54777
## 8 2008 55.67409
turnout2 %>% select(year,TOEP) %>% filter(year==1982|year==1986|year==1990|year==1994|year==1998|year==2002|year==2006)
##   year     TOEP
## 1 1982 42.13701
## 2 1986 38.14115
## 3 1990 38.41895
## 4 1994 41.12625
## 5 1998 38.09316
## 6 2002 39.51064
turnout2 %>% select(year,ANES) %>% filter(year==1982|year==1986|year==1990|year==1994|year==1998|year==2002|year==2006)
##   year ANES
## 1 1982   60
## 2 1986   53
## 3 1990   47
## 4 1994   56
## 5 1998   52
## 6 2002   62
turnout2 %>% select(year,ANES) %>% filter(year==1980|year==1984|year==1988|year==1992|year==1996|year==2000|year==2004|year==2008)
##   year ANES
## 1 1980   71
## 2 1984   74
## 3 1988   70
## 4 1992   75
## 5 1996   73
## 6 2000   73
## 7 2004   77
## 8 2008   78
#Q 1.5
period1<-turnout2 %>% filter(year<=1992)
period1 %>% tabyl(ANTOA)
##      ANTOA n   percent
## 1 11.06116 1 0.1428571
## 2 16.92155 1 0.1428571
## 3 18.96028 1 0.1428571
## 4 19.75478 1 0.1428571
## 5 20.27740 1 0.1428571
## 6 20.95903 1 0.1428571
## 7 21.46252 1 0.1428571
period1 %>% tabyl(ANTOE)
##       ANTOE n   percent
## 1  8.581054 1 0.1428571
## 2 14.858846 1 0.1428571
## 3 16.804491 1 0.1428571
## 4 16.886160 1 0.1428571
## 5 17.231520 1 0.1428571
## 6 17.862987 1 0.1428571
## 7 18.751404 1 0.1428571
period2<-turnout2 %>% filter(year>=1992)
period2 %>% tabyl(ANTOE)
##      ANTOE n percent
## 1 13.90684 1   0.125
## 2 14.87375 1   0.125
## 3 16.44567 1   0.125
## 4 16.88616 1   0.125
## 5 16.89916 1   0.125
## 6 18.77551 1   0.125
## 7 21.34207 1   0.125
## 8 22.48936 1   0.125
period2 %>% tabyl(ANTOA)
##      ANTOA n percent
## 1 17.16831 1   0.125
## 2 17.96914 1   0.125
## 3 20.95903 1   0.125
## 4 22.32591 1   0.125
## 5 22.45223 1   0.125
## 6 23.65789 1   0.125
## 7 25.46624 1   0.125
## 8 26.17150 1   0.125
#Q 1.6
turnout2<-turnout2 %>% mutate(ADJVAP=VAP-(felons+ noncit))
turnout2<-turnout2 %>% mutate(ADJTOTAL=total-overseas)
turnout2<-turnout2 %>% mutate(ADJVAP2=ADJTOTAL/ADJVAP)
turnout2 %>% filter(year==2008) %>% select(VEP,VAP,ADJVAP2,ANES)
##      VEP    VAP   ADJVAP2 ANES
## 1 213314 230872 0.6063887   78
#Question 2
kenya<-read.csv("kenya.csv")
sweden<-read.csv("sweden.csv")
world<-read.csv("World.csv")
View(kenya)
world<-world %>% mutate(totpy=py.men+py.women)
sweden<-sweden %>% mutate(totpy=py.men+py.women)
kenya<-kenya %>% mutate(totpy=py.men+py.women)
kenya<-kenya %>% mutate(CBR=births/totpy)
kenyaCBR1<-kenya %>% filter(period=='1950-1955') %>% mutate(CBR=births/totpy)
kenyaCBR2<-kenya %>% filter(period=='2005-2010') %>% mutate(CBR=births/totpy)
swedenCBR1<-sweden%>% filter(period=='1950-1955') %>% mutate(CBR=births/totpy)
swedenCBR2<-sweden%>% filter(period=='2005-2010') %>% mutate(CBR=births/totpy)
worldCBR1<-world %>% filter(period=='1950-1955') %>% mutate(CBR=births/totpy)
worldCBR2<-world %>% filter(period=='2005-2010') %>% mutate(CBR=births/totpy)

#Q 2.2
kenyaCBR1<-kenyaCBR1 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASFR=births/py.women)
kenyaCBR2<-kenyaCBR2 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASFR=births/py.women)
swedenCBR1<-swedenCBR1 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASFR=births/py.women)
swedenCBR2<-swedenCBR2 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASFR=births/py.women)
worldCBR1<-worldCBR1 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASFR=births/py.women)
worldCBR2<-worldCBR2 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASFR=births/py.women)
kenyaCBR1 %>% tabyl(ASFR)
##         ASFR n    percent
## 1 0.00000000 8 0.53333333
## 2 0.03905205 1 0.06666667
## 3 0.11193267 1 0.06666667
## 4 0.16884585 1 0.06666667
## 5 0.20644016 1 0.06666667
## 6 0.28946367 1 0.06666667
## 7 0.34657814 1 0.06666667
## 8 0.35596942 1 0.06666667
swedenCBR1 %>% tabyl(ASFR)
##          ASFR n    percent
## 1 0.000000000 8 0.53333333
## 2 0.001341829 1 0.06666667
## 3 0.016210186 1 0.06666667
## 4 0.038908952 1 0.06666667
## 5 0.048603771 1 0.06666667
## 6 0.087364159 1 0.06666667
## 7 0.125243665 1 0.06666667
## 8 0.127710883 1 0.06666667
kenyaCBR2 %>% tabyl(ASFR)
##         ASFR n    percent
## 1 0.00000000 8 0.53333333
## 2 0.03815044 1 0.06666667
## 3 0.05626214 1 0.06666667
## 4 0.10057087 1 0.06666667
## 5 0.13126805 1 0.06666667
## 6 0.18087964 1 0.06666667
## 7 0.23294721 1 0.06666667
## 8 0.23583536 1 0.06666667
swedenCBR2 %>% tabyl(ASFR)
##           ASFR n    percent
## 1 0.0000000000 8 0.53333333
## 2 0.0006143942 1 0.06666667
## 3 0.0059709097 1 0.06666667
## 4 0.0121600765 1 0.06666667
## 5 0.0507320271 1 0.06666667
## 6 0.0625923991 1 0.06666667
## 7 0.1162085625 1 0.06666667
## 8 0.1322744621 1 0.06666667
#women in Kenya have a higher ASFR than women in Sweden

#Q 2.3
kenyaCBR1<-kenyaCBR1 %>% mutate(TFR=ASFR*5)
kenyaCBR1 %>% summarise(sum1=sum(TFR))
##      sum1
## 1 7.59141
kenyaCBR2<-kenyaCBR2 %>% mutate(TFR=ASFR*5)
kenyaCBR2 %>% summarise(sum1=sum(TFR))
##       sum1
## 1 4.879568
swedenCBR1<-swedenCBR1 %>% mutate(TFR=ASFR*5)
swedenCBR1 %>% summarise(sum1=sum(TFR))
##       sum1
## 1 2.226917
swedenCBR2<-swedenCBR2 %>% mutate(TFR=ASFR*5)
swedenCBR2 %>% summarise(sum1=sum(TFR))
##       sum1
## 1 1.902764
worldCBR1<-worldCBR1 %>% mutate(TFR=ASFR*5)
worldCBR1 %>% summarise(sum1=sum(TFR))
##       sum1
## 1 5.007248
worldCBR2<-worldCBR2 %>% mutate(TFR=ASFR*5)
worldCBR2 %>% summarise(sum1=sum(TFR))
##       sum1
## 1 2.543623
#fertility rate of women in the world is higher in the period of 2005 to 2010 compared to 1950 to 1955

#Q 2.4
kenyaCBR1<-kenyaCBR1 %>% mutate(CDR=deaths/totpy)
kenyaCBR2<-kenyaCBR2 %>% mutate(CDR=deaths/totpy)
swedenCBR1<-swedenCBR1 %>% mutate(CDR=deaths/totpy)
swedenCBR2<-swedenCBR2 %>% mutate(CDR=deaths/totpy)
worldCBR1<-worldCBR1 %>% mutate(CDR=deaths/totpy)
worldCBR2<-worldCBR2 %>% mutate(CDR=deaths/totpy)
worldCBR1 %>% tabyl(CDR)
##            CDR n    percent
## 1  0.004261869 1 0.06666667
## 2  0.004752908 1 0.06666667
## 3  0.005600412 1 0.06666667
## 4  0.005891020 1 0.06666667
## 5  0.006325420 1 0.06666667
## 6  0.007132501 1 0.06666667
## 7  0.008534487 1 0.06666667
## 8  0.010572557 1 0.06666667
## 9  0.013459846 1 0.06666667
## 10 0.017335769 1 0.06666667
## 11 0.024265320 1 0.06666667
## 12 0.042262017 1 0.06666667
## 13 0.054589755 1 0.06666667
## 14 0.086910343 1 0.06666667
## 15 0.184364978 1 0.06666667
worldCBR2 %>% tabyl(CDR)
##            CDR n    percent
## 1  0.001079067 1 0.06666667
## 2  0.001256903 1 0.06666667
## 3  0.001302818 1 0.06666667
## 4  0.001832602 1 0.06666667
## 5  0.002278500 1 0.06666667
## 6  0.002623982 1 0.06666667
## 7  0.003031563 1 0.06666667
## 8  0.003753402 1 0.06666667
## 9  0.005085583 1 0.06666667
## 10 0.007126588 1 0.06666667
## 11 0.010477192 1 0.06666667
## 12 0.012802492 1 0.06666667
## 13 0.020235894 1 0.06666667
## 14 0.047457519 1 0.06666667
## 15 0.120679385 1 0.06666667
# the death rate in the world iis relatively higher in the 2005 to 2010 era

#Q 2.5
kenyaCBR2<-kenyaCBR2 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASDR=deaths/totpy)
swedenCBR2<-swedenCBR2 %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASDR=deaths/totpy)
sweden<-sweden %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASDR=deaths/totpy)
kenya<-kenya %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASDR=deaths/totpy)
world<-world %>% filter(age!='0-4'|age!='5-9'|age!='10-14'|age!='50-54'|age!='55-59'|age!='60-69'|age!='70-79'|age!='80+') %>% mutate(ASDR=deaths/totpy)


#Q 2.6
swedenCBR2 %>% summarise(zigmapop=sum(totpy))
##   zigmapop
## 1 45823.85
swedenCBR2<-swedenCBR2 %>% mutate(proppy=totpy/45823.85)
kenyaCBR2<-kenyaCBR2 %>% mutate(CDR2=ASDR*swedenCBR2$proppy)
kenyaCBR2 %>% summarise(sum1=sum(CDR2))
##         sum1
## 1 0.02321646