Buildup CountryData

Country = c("Brazil", "China", "India","Switzerland","USA")
LifeExpectancy = c(74,76,65,83,79)
Country
## [1] "Brazil"      "China"       "India"       "Switzerland" "USA"
Country[1]
## [1] "Brazil"
LifeExpectancy
## [1] 74 76 65 83 79
LifeExpectancy[3]
## [1] 65
CountryData = data.frame(Country, LifeExpectancy)
CountryData
##       Country LifeExpectancy
## 1      Brazil             74
## 2       China             76
## 3       India             65
## 4 Switzerland             83
## 5         USA             79

Add a new column “Population”

CountryData$Population = c(199000,1390000,1240000,7997,318000)
CountryData
##       Country LifeExpectancy Population
## 1      Brazil             74     199000
## 2       China             76    1390000
## 3       India             65    1240000
## 4 Switzerland             83       7997
## 5         USA             79     318000

Build a new country data

Country = c("Australia","Greece")
LifeExpectancy = c(82,81)
Population = c(23050,11125)
NewCountryData = data.frame(Country, LifeExpectancy, Population)
NewCountryData
##     Country LifeExpectancy Population
## 1 Australia             82      23050
## 2    Greece             81      11125

User rbind to combine CountryData and NewCountryData

AllCountryData = rbind(CountryData, NewCountryData)
AllCountryData
##       Country LifeExpectancy Population
## 1      Brazil             74     199000
## 2       China             76    1390000
## 3       India             65    1240000
## 4 Switzerland             83       7997
## 5         USA             79     318000
## 6   Australia             82      23050
## 7      Greece             81      11125

Read WHO document

WHO = read.csv("WHO.csv")

Subsetting

WHO_Europe = subset(WHO, Region == "Europe")
str(WHO_Europe)
## 'data.frame':    53 obs. of  13 variables:
##  $ Country                      : Factor w/ 194 levels "Afghanistan",..: 2 4 8 10 11 16 17 22 26 42 ...
##  $ Region                       : Factor w/ 6 levels "Africa","Americas",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Population                   : int  3162 78 2969 8464 9309 9405 11060 3834 7278 4307 ...
##  $ Under15                      : num  21.3 15.2 20.3 14.5 22.2 ...
##  $ Over60                       : num  14.93 22.86 14.06 23.52 8.24 ...
##  $ FertilityRate                : num  1.75 NA 1.74 1.44 1.96 1.47 1.85 1.26 1.51 1.48 ...
##  $ LifeExpectancy               : int  74 82 71 81 71 71 80 76 74 77 ...
##  $ ChildMortality               : num  16.7 3.2 16.4 4 35.2 5.2 4.2 6.7 12.1 4.7 ...
##  $ CellularSubscribers          : num  96.4 75.5 103.6 154.8 108.8 ...
##  $ LiteracyRate                 : num  NA NA 99.6 NA NA NA NA 97.9 NA 98.8 ...
##  $ GNI                          : num  8820 NA 6100 42050 8960 ...
##  $ PrimarySchoolEnrollmentMale  : num  NA 78.4 NA NA 85.3 NA 98.9 86.5 99.3 94.8 ...
##  $ PrimarySchoolEnrollmentFemale: num  NA 79.4 NA NA 84.1 NA 99.2 88.4 99.7 97 ...
summary(WHO_Europe)
##        Country                     Region     Population    
##  Albania   : 1   Africa               : 0   Min.   :    31  
##  Andorra   : 1   Americas             : 0   1st Qu.:  3028  
##  Armenia   : 1   Eastern Mediterranean: 0   Median :  7278  
##  Austria   : 1   Europe               :53   Mean   : 17063  
##  Azerbaijan: 1   South-East Asia      : 0   3rd Qu.: 11125  
##  Belarus   : 1   Western Pacific      : 0   Max.   :143000  
##  (Other)   :47                                              
##     Under15          Over60      FertilityRate   LifeExpectancy 
##  Min.   :13.17   Min.   : 4.80   Min.   :1.260   Min.   :63.00  
##  1st Qu.:14.92   1st Qu.:17.56   1st Qu.:1.455   1st Qu.:74.00  
##  Median :16.45   Median :20.76   Median :1.550   Median :77.00  
##  Mean   :18.04   Mean   :19.77   Mean   :1.761   Mean   :76.74  
##  3rd Qu.:18.64   3rd Qu.:23.82   3rd Qu.:1.923   3rd Qu.:81.00  
##  Max.   :35.75   Max.   :26.97   Max.   :3.810   Max.   :83.00  
##                                  NA's   :3                      
##  ChildMortality  CellularSubscribers  LiteracyRate        GNI       
##  Min.   : 2.20   Min.   : 68.77      Min.   :95.20   Min.   : 2180  
##  1st Qu.: 3.80   1st Qu.:104.19      1st Qu.:98.00   1st Qu.:11210  
##  Median : 4.80   Median :115.39      Median :99.20   Median :20495  
##  Mean   :10.05   Mean   :117.11      Mean   :98.78   Mean   :23489  
##  3rd Qu.:10.70   3rd Qu.:129.61      3rd Qu.:99.70   3rd Qu.:34613  
##  Max.   :58.30   Max.   :179.31      Max.   :99.80   Max.   :64260  
##                  NA's   :2           NA's   :27      NA's   :5      
##  PrimarySchoolEnrollmentMale PrimarySchoolEnrollmentFemale
##  Min.   :78.40               Min.   : 79.40               
##  1st Qu.:94.72               1st Qu.: 95.25               
##  Median :97.70               Median : 97.55               
##  Mean   :95.79               Mean   : 95.99               
##  3rd Qu.:99.10               3rd Qu.: 99.20               
##  Max.   :99.80               Max.   :100.00               
##  NA's   :15                  NA's   :15

Basic data analysis

mean(WHO$Under15)
## [1] 28.73242
sd(WHO$Under15)
## [1] 10.53457
summary(WHO$Under15)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.12   18.72   28.65   28.73   37.75   49.99
which.min(WHO$Under15)
## [1] 86
WHO$Country[86]
## [1] Japan
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe
which.max(WHO$Under15)
## [1] 124
WHO$Country[124]
## [1] Niger
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe

Scatterplot

plot(WHO$GNI, WHO$FertilityRate)

hist(WHO$CellularSubscribers)

boxplot(WHO$LifeExpectancy ~ WHO$Region)

boxplot(WHO$LifeExpectancy ~ WHO$Region, xlab = "", ylab = "Life Expectancy", main = "Life Expectancy of Countries by Region")

Summary Tables

table(WHO$Region)
## 
##                Africa              Americas Eastern Mediterranean 
##                    46                    35                    22 
##                Europe       South-East Asia       Western Pacific 
##                    53                    11                    27

tapply

  • example:To compute the average amount of iron, sorted by high and low protein
  • tapply(USDA\(Iron, USDA\)HighProtein, mean, na.rm=TRUE)
tapply(WHO$Over60, WHO$Region, mean)
##                Africa              Americas Eastern Mediterranean 
##              5.220652             10.943714              5.620000 
##                Europe       South-East Asia       Western Pacific 
##             19.774906              8.769091             10.162963
tapply(WHO$LiteracyRate, WHO$Region, min)
##                Africa              Americas Eastern Mediterranean 
##                    NA                    NA                    NA 
##                Europe       South-East Asia       Western Pacific 
##                    NA                    NA                    NA
tapply(WHO$LiteracyRate, WHO$Region, min, na.rm=TRUE)
##                Africa              Americas Eastern Mediterranean 
##                  31.1                  75.2                  63.9 
##                Europe       South-East Asia       Western Pacific 
##                  95.2                  56.8                  60.6