Author: Arindam Barman Date: February 12, 2016
select * from tb; desc tb;
Select count(distinct year ) from tb;
UPDATE tb SET child=0 WHERE child IS NULL ;
UPDATE tb SET adult=0 WHERE adult IS NULL ;
UPDATE tb SET elderly=0 WHERE elderly IS NULL ;
Creating a new column total_cases-
ALTER TABLE tb ADD total_cases integer(15);
update tb set total_cases = (child+adult+ elderly);
select country,year,sum(child),sum(adult),sum(elderly),sum(total_cases) from tb group by year,country;
country_population<-read.csv("C:/Users/Arindam/Documents/Data Science/Cuny/Data 607/Assignments/population.csv",header=TRUE, stringsAsFactors = FALSE)
country_tb_cases<-read.csv("C:/Users/Arindam/Documents/Data Science/Cuny/Data 607/Assignments/Country tb cases.csv",header=TRUE, stringsAsFactors = FALSE)
country_tb_yr<-merge(country_population,country_tb_cases,by.country_population="country",by.country_tb_cases="country",by.country_population="year",by.country_tb_cases="year")
country_tb_yr$tb_cases<-(country_tb_yr$sum.total_cases)/(country_tb_yr$population)*100
country_yr_tbcases<-country_tb_yr[ ,c("country","year","tb_cases")]
str(country_yr_tbcases)
## 'data.frame': 1900 obs. of 3 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : int 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 ...
## $ tb_cases: num 0 0 0.000673 0.009119 0.003727 ...
summary(country_yr_tbcases)
## country year tb_cases
## Length:1900 Min. :1995 Min. :0.00000
## Class :character 1st Qu.:1999 1st Qu.:0.01279
## Mode :character Median :2004 Median :0.03210
## Mean :2004 Mean :0.05416
## 3rd Qu.:2009 3rd Qu.:0.06780
## Max. :2013 Max. :0.81348
head(country_yr_tbcases)
## country year tb_cases
## 1 Afghanistan 1995 0.0000000000
## 2 Afghanistan 1996 0.0000000000
## 3 Afghanistan 1997 0.0006729324
## 4 Afghanistan 1998 0.0091194284
## 5 Afghanistan 1999 0.0037274096
## 6 Afghanistan 2000 0.0129446633
unique(country_yr_tbcases$year)
## [1] 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
## [15] 2009 2010 2011 2012 2013
# Find out country with maximun and minimum cases above sero
Country_tb_2013<-subset(country_yr_tbcases,country_yr_tbcases$year==2013 & country_yr_tbcases$tb_cases >0)
Country_tb_2013[which.max(Country_tb_2013$tb_cases),]
## country year tb_cases
## 1520 South Africa 2013 0.5918964
Country_tb_2013[which.min(Country_tb_2013$tb_cases),]
## country year tb_cases
## 304 Cameroon 2013 0.001307632
plot(country_yr_tbcases$year[country_yr_tbcases$country=="South Africa"],country_yr_tbcases$tb_cases[country_yr_tbcases$country=="South Africa"], xlab = 'South Africa',ylab='percent tb cases')
plot(country_yr_tbcases$year[country_yr_tbcases$country=="Cameroon"],country_yr_tbcases$tb_cases[country_yr_tbcases$country=="Cameroon"], xlab = 'Cameroon',ylab='percent tb cases')