Load SQL Library

Install the RPostgreSQL library and load the SQL Server.

library(RPostgreSQL);
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname = "tb", host="localhost", user="user", password="password");

Load Population Dataframe

Load the population dataframe from github.

population <- read.csv('https://raw.githubusercontent.com/mkivenson/Data-Acquisition-and-Management/master/Lab%202.3/population.csv', header=TRUE, sep=',')
head(population)
##       country year population
## 1 Afghanistan 1995   17586073
## 2     Algeria 1995   29315463
## 3      Angola 1995   12104952
## 4   Argentina 1995   34833168
## 5  Azerbaijan 1995    7770806
## 6  Bangladesh 1995  119869585

Load tb Dataframe from SQL Database

Load and query the tb database from SQL.

tb <- dbGetQuery(con, statement = paste(
  'SELECT country, year, SUM(child + adult + elderly) AS cases',
  'FROM tb',
  'WHERE child + adult + elderly != 0',
  'GROUP BY country, year',
  'ORDER BY country, year;'));
head(tb, n=3)
##       country year cases
## 1 Afghanistan 1997   128
## 2 Afghanistan 1998  1778
## 3 Afghanistan 1999   745

Left join tb and population databases

Left join the tb and population databases by country and year.

join_tb<- merge(x = tb, y = population, by = c("country", "year"), all.x = TRUE)
head(join_tb, n=3)
##       country year cases population
## 1 Afghanistan 1997   128   19021226
## 2 Afghanistan 1998  1778   19496836
## 3 Afghanistan 1999   745   19987071

Add column calculating rate of tb occurance

Calculate the rate of tb occurance by dividing the amount of tb cases by total population.

join_tb$ratetb <- join_tb[,3]/join_tb[,4]

tb occurance summary

Summaries by country of tb incidence occurance and rate.

Countries with highest mean tb incidence rates

tb_by_country_top <- sort(tapply(join_tb$cases, join_tb$country, mean),decreasing=TRUE)
head(tb_by_country_top)
##        China        India South Africa    Indonesia   Bangladesh 
##    441570.47    394364.00    200684.80    161662.50     84476.00 
##  Philippines 
##     63521.87

Countries with highest mean tb occurance per population

tbrate_by_country_top <- sort(tapply(join_tb$ratetb, join_tb$country, mean),decreasing=TRUE)
head(tbrate_by_country_top)
## South Africa    Swaziland      Lesotho      Namibia     Botswana 
##  0.003984937  0.003312376  0.002619363  0.002357512  0.002350148 
##     Zimbabwe 
##  0.002299511

Annual mean tb incidence rates

barplot(tapply(join_tb$cases, join_tb$year, mean))

Annual mean tb occurance per population

barplot(tapply(join_tb$ratetb, join_tb$year, mean))