library(RCurl)
## Loading required package: bitops
tb <- getURL("https://raw.githubusercontent.com/gpsingh12/IS-607-Assignment-3/master/tb.csv")
tb <- read.csv(text = tb, header = FALSE)
Assigning the names to the columns of dataset tb
names(tb)<-c("country", "year", "sex", "child", "adult", "elderly")
head(tb)
## country year sex child adult elderly
## 1 Afghanistan 1995 female -1 -1 -1
## 2 Afghanistan 1995 male -1 -1 -1
## 3 Afghanistan 1996 female -1 -1 -1
## 4 Afghanistan 1996 male -1 -1 -1
## 5 Afghanistan 1997 female 5 96 1
## 6 Afghanistan 1997 male 0 26 0
As we looked at first few rows of tb, there were values that were NA giving us -1 in the columns. We need to replace these values for mathematical operations.
tb <- replace(tb, tb == -1, 0)
head(tb)
## country year sex child adult elderly
## 1 Afghanistan 1995 female 0 0 0
## 2 Afghanistan 1995 male 0 0 0
## 3 Afghanistan 1996 female 0 0 0
## 4 Afghanistan 1996 male 0 0 0
## 5 Afghanistan 1997 female 5 96 1
## 6 Afghanistan 1997 male 0 26 0
creating new column by adding child, adult and elderly as cases
tb <- transform(tb, case = child + adult + elderly)
# remove the columns child, adult and elderly
tb$child<-NULL
tb$adult<-NULL
tb$elderly<-NULL
head(tb)
## country year sex case
## 1 Afghanistan 1995 female 0
## 2 Afghanistan 1995 male 0
## 3 Afghanistan 1996 female 0
## 4 Afghanistan 1996 male 0
## 5 Afghanistan 1997 female 102
## 6 Afghanistan 1997 male 26
The the dataset tb contains both cases for male and female. We create two datasets with male and female.
male_data <- subset(tb, tb$sex == "male")
female_data<-subset(tb, tb$sex == "female")
head(male_data)
## country year sex case
## 2 Afghanistan 1995 male 0
## 4 Afghanistan 1996 male 0
## 6 Afghanistan 1997 male 26
## 8 Afghanistan 1998 male 571
## 10 Afghanistan 1999 male 228
## 12 Afghanistan 2000 male 915
head(female_data)
## country year sex case
## 1 Afghanistan 1995 female 0
## 3 Afghanistan 1996 female 0
## 5 Afghanistan 1997 female 102
## 7 Afghanistan 1998 female 1207
## 9 Afghanistan 1999 female 517
## 11 Afghanistan 2000 female 1751
Add another column from female_data to male_date to add both the cases
t1 <- transform(male_data, case1 = female_data$case)
head(t1)
## country year sex case case1
## 2 Afghanistan 1995 male 0 0
## 4 Afghanistan 1996 male 0 0
## 6 Afghanistan 1997 male 26 102
## 8 Afghanistan 1998 male 571 1207
## 10 Afghanistan 1999 male 228 517
## 12 Afghanistan 2000 male 915 1751
here case represent male cases and case1 represent female cases
# i will use matrix operations to add the columns
t2<-transform(t1, cases = case+case1)
#remove columns sex, case and case1
t2$sex<-NULL
t2$case<-NULL
t2$case1<-NULL
# t2 contains the combined cases of male and female datasets
head(t2)
## country year cases
## 2 Afghanistan 1995 0
## 4 Afghanistan 1996 0
## 6 Afghanistan 1997 128
## 8 Afghanistan 1998 1778
## 10 Afghanistan 1999 745
## 12 Afghanistan 2000 2666
Reading data for population dataset
p<- getURL("https://raw.githubusercontent.com/gpsingh12/IS-607-Assignment-3/master/population.csv")
population<- read.csv(text=p)
head(population)
## country year population
## 1 Afghanistan 1995 17586073
## 2 Algeria 1995 29315463
## 3 Angola 1995 12104952
## 4 Argentina 1995 34833168
## 5 Azerbaijan 1995 7770806
## 6 Bangladesh 1995 119869585
Merge t2 and population datasets
tb <- merge(t2, population, by=c("country", "year"))
#create a new column in tb by dividing cases by population
tb<-transform(tb,rate = (cases/population)*100000)
# remove population and cases columns
tb$population<-NULL
tb$cases<-NULL
#final result
head(tb)
## country year rate
## 1 Afghanistan 1995 0.0000000
## 2 Afghanistan 1996 0.0000000
## 3 Afghanistan 1997 0.6729324
## 4 Afghanistan 1998 9.1194284
## 5 Afghanistan 1999 3.7274096
## 6 Afghanistan 2000 12.9446633