# Unit 1: An Introduction to Analytics > Working with Data: An Introduction to R
getwd()
## [1] "D:/Dropbox/MOOC/MIT_AnalyticsEdge_edx/Ch1_IntroToAnalytics/lec1.2WHO"
setwd("D:/Dropbox/MOOC/MIT_AnalyticsEdge_edx/Ch1_IntroToAnalytics/lec1.2WHO")
# Global Health Observatory data repository
# http://apps.who.int/gho/data/node.main
WHO <- read.csv("WHO.csv")
names(WHO)
## [1] "Country" "Region"
## [3] "Population" "Under15"
## [5] "Over60" "FertilityRate"
## [7] "LifeExpectancy" "ChildMortality"
## [9] "CellularSubscribers" "LiteracyRate"
## [11] "GNI" "PrimarySchoolEnrollmentMale"
## [13] "PrimarySchoolEnrollmentFemale"
head(WHO)
## Country Region Population Under15 Over60
## 1 Afghanistan Eastern Mediterranean 29825 47.42 3.82
## 2 Albania Europe 3162 21.33 14.93
## 3 Algeria Africa 38482 27.42 7.17
## 4 Andorra Europe 78 15.20 22.86
## 5 Angola Africa 20821 47.58 3.84
## 6 Antigua and Barbuda Americas 89 25.96 12.35
## FertilityRate LifeExpectancy ChildMortality CellularSubscribers
## 1 5.40 60 98.5 54.26
## 2 1.75 74 16.7 96.39
## 3 2.83 73 20.0 98.99
## 4 NA 82 3.2 75.49
## 5 6.10 51 163.5 48.38
## 6 2.12 75 9.9 196.41
## LiteracyRate GNI PrimarySchoolEnrollmentMale
## 1 NA 1140 NA
## 2 NA 8820 NA
## 3 NA 8310 98.2
## 4 NA NA 78.4
## 5 70.1 5230 93.1
## 6 99.0 17900 91.1
## PrimarySchoolEnrollmentFemale
## 1 NA
## 2 NA
## 3 96.4
## 4 79.4
## 5 78.2
## 6 84.5
mean(WHO$Under15)
## [1] 28.73242
sd(WHO$Under15)
## [1] 10.53457
WHO$Country[which.min(WHO$Under15)]
## [1] Japan
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe
WHO$Under15[which.min(WHO$Under15)]
## [1] 13.12
WHO$Country[which.max(WHO$Under15)]
## [1] Niger
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe
WHO$Under15[which.max(WHO$Under15)]
## [1] 49.99
plot(WHO$GNI, WHO$FertilityRate)

outliers <- subset(WHO, GNI>10000 & FertilityRate>2.5)
nrow(outliers)
## [1] 7
outliers[c("Country","GNI","FertilityRate")]
## Country GNI FertilityRate
## 23 Botswana 14550 2.71
## 56 Equatorial Guinea 25620 5.04
## 63 Gabon 13740 4.18
## 83 Israel 27110 2.92
## 88 Kazakhstan 11250 2.52
## 131 Panama 14510 2.52
## 150 Saudi Arabia 24700 2.76
hist(WHO$CellularSubscribers)

boxplot(WHO$LifeExpectancy ~ WHO$Region, xlab="Region", ylab="Life Expectancy", main="Life expectancy of countries by region")

table(WHO$Region)
##
## Africa Americas Eastern Mediterranean
## 46 35 22
## Europe South-East Asia Western Pacific
## 53 11 27
tapply(WHO$Over60, WHO$Region, mean)
## Africa Americas Eastern Mediterranean
## 5.220652 10.943714 5.620000
## Europe South-East Asia Western Pacific
## 19.774906 8.769091 10.162963
tapply(WHO$LiteracyRate, WHO$Region, min, na.rm=TRUE)
## Africa Americas Eastern Mediterranean
## 31.1 75.2 63.9
## Europe South-East Asia Western Pacific
## 95.2 56.8 60.6
mean(WHO$Over60)
## [1] 11.16366
WHO$Country[which.max(WHO$LiteracyRate)]
## [1] Cuba
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe