population<-read.csv("https://raw.githubusercontent.com/chirag-vithlani/607/master/week3/population.csv", sep=",")
head(population)
## country year population
## 1 Afghanistan 1995 17586073
## 2 Algeria 1995 29315463
## 3 Angola 1995 12104952
## 4 Argentina 1995 34833168
## 5 Azerbaijan 1995 7770806
## 6 Bangladesh 1995 119869585
So we have first two columns. to calculate rate, we need cases.
Use below query to get data to R, once data is uploaded to database using SQL.
SELECT YEAR,COUNTRY,SUM(MALECASES) FROM (
SELECT TB1.*,TB1.CHILD+TB1.ADULT+TB1.ELDERLY AS maleCases
FROM TB TB1 WHERE TB1.SEX=‘MALE’
UNION
SELECT TB2.*,TB2.CHILD+TB2.ADULT+TB2.ELDERLY AS femaleCases FROM TB TB2 WHERE TB2.SEX=‘FEMALE’
) MASTER GROUP BY YEAR,COUNTRY
## Warning: package 'RMySQL' was built under R version 3.2.3
## Loading required package: DBI
rs = dbSendQuery(mydb, "select year,country,sum(malecases) as total from (select tb1.*,tb1.child+tb1.adult+tb1.elderly as malecases from tb tb1 where tb1.sex='male' union select tb2.*,tb2.child+tb2.adult+tb2.elderly as femalecases from tb tb2 where tb2.sex='female' ) master group by year,country")
## Warning in .local(conn, statement, ...): Decimal MySQL column 2 imported as
## numeric
data = fetch(rs, n=-1)
subset(data,data$country=='Afghanistan')
## year country total
## 1 1995 Afghanistan NA
## 101 1996 Afghanistan NA
## 201 1997 Afghanistan 128
## 301 1998 Afghanistan 1778
## 401 1999 Afghanistan 745
## 501 2000 Afghanistan 2666
## 601 2001 Afghanistan 4639
## 701 2002 Afghanistan 6509
## 801 2003 Afghanistan 6528
## 901 2004 Afghanistan 8245
## 1001 2005 Afghanistan 9949
## 1101 2006 Afghanistan 12469
## 1201 2007 Afghanistan 13213
## 1301 2008 Afghanistan 13136
## 1401 2009 Afghanistan 12503
## 1501 2010 Afghanistan 12947
## 1601 2011 Afghanistan 15542
## 1701 2012 Afghanistan 15774
## 1801 2013 Afghanistan NA
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
fullData=inner_join(data,population)
## Joining by: c("year", "country")
## Warning in inner_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
fullData$rate=fullData$total/fullData$population
finalFrame=subset(fullData,select=c(1,2,5))
head(finalFrame)
## year country rate
## 1 1995 Afghanistan NA
## 2 1995 Algeria NA
## 3 1995 Angola 3.793489e-04
## 4 1995 Argentina NA
## 5 1995 Azerbaijan 1.158181e-05
## 6 1995 Bangladesh 5.024627e-05