# Unit 1: An Introduction to Analytics > Working with Data: An Introduction to R 

getwd()
## [1] "D:/Dropbox/MOOC/MIT_AnalyticsEdge_edx/Ch1_IntroToAnalytics/lec1.2WHO"
setwd("D:/Dropbox/MOOC/MIT_AnalyticsEdge_edx/Ch1_IntroToAnalytics/lec1.2WHO")

# Global Health Observatory data repository
# http://apps.who.int/gho/data/node.main
WHO <- read.csv("WHO.csv")
names(WHO)
##  [1] "Country"                       "Region"                       
##  [3] "Population"                    "Under15"                      
##  [5] "Over60"                        "FertilityRate"                
##  [7] "LifeExpectancy"                "ChildMortality"               
##  [9] "CellularSubscribers"           "LiteracyRate"                 
## [11] "GNI"                           "PrimarySchoolEnrollmentMale"  
## [13] "PrimarySchoolEnrollmentFemale"
head(WHO)
##               Country                Region Population Under15 Over60
## 1         Afghanistan Eastern Mediterranean      29825   47.42   3.82
## 2             Albania                Europe       3162   21.33  14.93
## 3             Algeria                Africa      38482   27.42   7.17
## 4             Andorra                Europe         78   15.20  22.86
## 5              Angola                Africa      20821   47.58   3.84
## 6 Antigua and Barbuda              Americas         89   25.96  12.35
##   FertilityRate LifeExpectancy ChildMortality CellularSubscribers
## 1          5.40             60           98.5               54.26
## 2          1.75             74           16.7               96.39
## 3          2.83             73           20.0               98.99
## 4            NA             82            3.2               75.49
## 5          6.10             51          163.5               48.38
## 6          2.12             75            9.9              196.41
##   LiteracyRate   GNI PrimarySchoolEnrollmentMale
## 1           NA  1140                          NA
## 2           NA  8820                          NA
## 3           NA  8310                        98.2
## 4           NA    NA                        78.4
## 5         70.1  5230                        93.1
## 6         99.0 17900                        91.1
##   PrimarySchoolEnrollmentFemale
## 1                            NA
## 2                            NA
## 3                          96.4
## 4                          79.4
## 5                          78.2
## 6                          84.5
mean(WHO$Under15)
## [1] 28.73242
sd(WHO$Under15)
## [1] 10.53457
WHO$Country[which.min(WHO$Under15)]
## [1] Japan
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe
WHO$Under15[which.min(WHO$Under15)]
## [1] 13.12
WHO$Country[which.max(WHO$Under15)]
## [1] Niger
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe
WHO$Under15[which.max(WHO$Under15)]
## [1] 49.99
plot(WHO$GNI, WHO$FertilityRate)

outliers <- subset(WHO, GNI>10000 & FertilityRate>2.5)
nrow(outliers)
## [1] 7
outliers[c("Country","GNI","FertilityRate")]
##               Country   GNI FertilityRate
## 23           Botswana 14550          2.71
## 56  Equatorial Guinea 25620          5.04
## 63              Gabon 13740          4.18
## 83             Israel 27110          2.92
## 88         Kazakhstan 11250          2.52
## 131            Panama 14510          2.52
## 150      Saudi Arabia 24700          2.76
hist(WHO$CellularSubscribers)

boxplot(WHO$LifeExpectancy ~ WHO$Region, xlab="Region", ylab="Life Expectancy", main="Life expectancy of countries by region")

table(WHO$Region)
## 
##                Africa              Americas Eastern Mediterranean 
##                    46                    35                    22 
##                Europe       South-East Asia       Western Pacific 
##                    53                    11                    27
tapply(WHO$Over60, WHO$Region, mean)
##                Africa              Americas Eastern Mediterranean 
##              5.220652             10.943714              5.620000 
##                Europe       South-East Asia       Western Pacific 
##             19.774906              8.769091             10.162963
tapply(WHO$LiteracyRate, WHO$Region, min, na.rm=TRUE)
##                Africa              Americas Eastern Mediterranean 
##                  31.1                  75.2                  63.9 
##                Europe       South-East Asia       Western Pacific 
##                  95.2                  56.8                  60.6
mean(WHO$Over60)
## [1] 11.16366
WHO$Country[which.max(WHO$LiteracyRate)]
## [1] Cuba
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe