library(RCurl)
## Loading required package: bitops
tb <- getURL("https://raw.githubusercontent.com/gpsingh12/IS-607-Assignment-3/master/tb.csv")
tb <- read.csv(text = tb, header = FALSE)

Assigning the names to the columns of dataset tb

names(tb)<-c("country", "year", "sex", "child", "adult", "elderly")

head(tb)
##       country year    sex child adult elderly
## 1 Afghanistan 1995 female    -1    -1      -1
## 2 Afghanistan 1995   male    -1    -1      -1
## 3 Afghanistan 1996 female    -1    -1      -1
## 4 Afghanistan 1996   male    -1    -1      -1
## 5 Afghanistan 1997 female     5    96       1
## 6 Afghanistan 1997   male     0    26       0

As we looked at first few rows of tb, there were values that were NA giving us -1 in the columns. We need to replace these values for mathematical operations.

tb <- replace(tb, tb == -1, 0)
head(tb)
##       country year    sex child adult elderly
## 1 Afghanistan 1995 female     0     0       0
## 2 Afghanistan 1995   male     0     0       0
## 3 Afghanistan 1996 female     0     0       0
## 4 Afghanistan 1996   male     0     0       0
## 5 Afghanistan 1997 female     5    96       1
## 6 Afghanistan 1997   male     0    26       0

creating new column by adding child, adult and elderly as cases

tb <- transform(tb, case = child + adult + elderly)


# remove the columns  child, adult and elderly

tb$child<-NULL
tb$adult<-NULL
tb$elderly<-NULL

head(tb)
##       country year    sex case
## 1 Afghanistan 1995 female    0
## 2 Afghanistan 1995   male    0
## 3 Afghanistan 1996 female    0
## 4 Afghanistan 1996   male    0
## 5 Afghanistan 1997 female  102
## 6 Afghanistan 1997   male   26

The the dataset tb contains both cases for male and female. We create two datasets with male and female.

male_data <- subset(tb, tb$sex == "male")
female_data<-subset(tb, tb$sex == "female")

head(male_data)
##        country year  sex case
## 2  Afghanistan 1995 male    0
## 4  Afghanistan 1996 male    0
## 6  Afghanistan 1997 male   26
## 8  Afghanistan 1998 male  571
## 10 Afghanistan 1999 male  228
## 12 Afghanistan 2000 male  915
head(female_data)
##        country year    sex case
## 1  Afghanistan 1995 female    0
## 3  Afghanistan 1996 female    0
## 5  Afghanistan 1997 female  102
## 7  Afghanistan 1998 female 1207
## 9  Afghanistan 1999 female  517
## 11 Afghanistan 2000 female 1751

Add another column from female_data to male_date to add both the cases

t1 <- transform(male_data, case1 = female_data$case)
head(t1)
##        country year  sex case case1
## 2  Afghanistan 1995 male    0     0
## 4  Afghanistan 1996 male    0     0
## 6  Afghanistan 1997 male   26   102
## 8  Afghanistan 1998 male  571  1207
## 10 Afghanistan 1999 male  228   517
## 12 Afghanistan 2000 male  915  1751

here case represent male cases and case1 represent female cases

# i will use matrix operations to add the columns  
t2<-transform(t1, cases = case+case1)


#remove columns sex, case and case1
t2$sex<-NULL
t2$case<-NULL
t2$case1<-NULL
# t2 contains the combined cases of male and female datasets
head(t2)
##        country year cases
## 2  Afghanistan 1995     0
## 4  Afghanistan 1996     0
## 6  Afghanistan 1997   128
## 8  Afghanistan 1998  1778
## 10 Afghanistan 1999   745
## 12 Afghanistan 2000  2666

Reading data for population dataset

p<- getURL("https://raw.githubusercontent.com/gpsingh12/IS-607-Assignment-3/master/population.csv")
population<- read.csv(text=p)
head(population)
##       country year population
## 1 Afghanistan 1995   17586073
## 2     Algeria 1995   29315463
## 3      Angola 1995   12104952
## 4   Argentina 1995   34833168
## 5  Azerbaijan 1995    7770806
## 6  Bangladesh 1995  119869585

Merge t2 and population datasets

tb <- merge(t2, population, by=c("country", "year"))


#create a new column in tb by dividing cases by population

tb<-transform(tb,rate = (cases/population)*100000)


#  remove population and cases columns 

tb$population<-NULL
tb$cases<-NULL


#final result

head(tb)
##       country year       rate
## 1 Afghanistan 1995  0.0000000
## 2 Afghanistan 1996  0.0000000
## 3 Afghanistan 1997  0.6729324
## 4 Afghanistan 1998  9.1194284
## 5 Afghanistan 1999  3.7274096
## 6 Afghanistan 2000 12.9446633