Install the RPostgreSQL library and load the SQL Server.
library(RPostgreSQL);
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "tb", host="localhost", user="user", password="password");
Load the population dataframe from github.
population <- read.csv('https://raw.githubusercontent.com/mkivenson/Data-Acquisition-and-Management/master/Lab%202.3/population.csv', header=TRUE, sep=',')
head(population)
## country year population
## 1 Afghanistan 1995 17586073
## 2 Algeria 1995 29315463
## 3 Angola 1995 12104952
## 4 Argentina 1995 34833168
## 5 Azerbaijan 1995 7770806
## 6 Bangladesh 1995 119869585
Load and query the tb database from SQL.
tb <- dbGetQuery(con, statement = paste(
'SELECT country, year, SUM(child + adult + elderly) AS cases',
'FROM tb',
'WHERE child + adult + elderly != 0',
'GROUP BY country, year',
'ORDER BY country, year;'));
head(tb, n=3)
## country year cases
## 1 Afghanistan 1997 128
## 2 Afghanistan 1998 1778
## 3 Afghanistan 1999 745
Left join the tb and population databases by country and year.
join_tb<- merge(x = tb, y = population, by = c("country", "year"), all.x = TRUE)
head(join_tb, n=3)
## country year cases population
## 1 Afghanistan 1997 128 19021226
## 2 Afghanistan 1998 1778 19496836
## 3 Afghanistan 1999 745 19987071
Calculate the rate of tb occurance by dividing the amount of tb cases by total population.
join_tb$ratetb <- join_tb[,3]/join_tb[,4]
Summaries by country of tb incidence occurance and rate.
Countries with highest mean tb incidence rates
tb_by_country_top <- sort(tapply(join_tb$cases, join_tb$country, mean),decreasing=TRUE)
head(tb_by_country_top)
## China India South Africa Indonesia Bangladesh
## 441570.47 394364.00 200684.80 161662.50 84476.00
## Philippines
## 63521.87
Countries with highest mean tb occurance per population
tbrate_by_country_top <- sort(tapply(join_tb$ratetb, join_tb$country, mean),decreasing=TRUE)
head(tbrate_by_country_top)
## South Africa Swaziland Lesotho Namibia Botswana
## 0.003984937 0.003312376 0.002619363 0.002357512 0.002350148
## Zimbabwe
## 0.002299511
Annual mean tb incidence rates
barplot(tapply(join_tb$cases, join_tb$year, mean))
Annual mean tb occurance per population
barplot(tapply(join_tb$ratetb, join_tb$year, mean))