Buildup CountryData
Country = c("Brazil", "China", "India","Switzerland","USA")
LifeExpectancy = c(74,76,65,83,79)
Country
## [1] "Brazil" "China" "India" "Switzerland" "USA"
Country[1]
## [1] "Brazil"
LifeExpectancy
## [1] 74 76 65 83 79
LifeExpectancy[3]
## [1] 65
CountryData = data.frame(Country, LifeExpectancy)
CountryData
## Country LifeExpectancy
## 1 Brazil 74
## 2 China 76
## 3 India 65
## 4 Switzerland 83
## 5 USA 79
Add a new column “Population”
CountryData$Population = c(199000,1390000,1240000,7997,318000)
CountryData
## Country LifeExpectancy Population
## 1 Brazil 74 199000
## 2 China 76 1390000
## 3 India 65 1240000
## 4 Switzerland 83 7997
## 5 USA 79 318000
Build a new country data
Country = c("Australia","Greece")
LifeExpectancy = c(82,81)
Population = c(23050,11125)
NewCountryData = data.frame(Country, LifeExpectancy, Population)
NewCountryData
## Country LifeExpectancy Population
## 1 Australia 82 23050
## 2 Greece 81 11125
User rbind to combine CountryData and NewCountryData
AllCountryData = rbind(CountryData, NewCountryData)
AllCountryData
## Country LifeExpectancy Population
## 1 Brazil 74 199000
## 2 China 76 1390000
## 3 India 65 1240000
## 4 Switzerland 83 7997
## 5 USA 79 318000
## 6 Australia 82 23050
## 7 Greece 81 11125
Read WHO document
WHO = read.csv("WHO.csv")
Subsetting
WHO_Europe = subset(WHO, Region == "Europe")
str(WHO_Europe)
## 'data.frame': 53 obs. of 13 variables:
## $ Country : Factor w/ 194 levels "Afghanistan",..: 2 4 8 10 11 16 17 22 26 42 ...
## $ Region : Factor w/ 6 levels "Africa","Americas",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Population : int 3162 78 2969 8464 9309 9405 11060 3834 7278 4307 ...
## $ Under15 : num 21.3 15.2 20.3 14.5 22.2 ...
## $ Over60 : num 14.93 22.86 14.06 23.52 8.24 ...
## $ FertilityRate : num 1.75 NA 1.74 1.44 1.96 1.47 1.85 1.26 1.51 1.48 ...
## $ LifeExpectancy : int 74 82 71 81 71 71 80 76 74 77 ...
## $ ChildMortality : num 16.7 3.2 16.4 4 35.2 5.2 4.2 6.7 12.1 4.7 ...
## $ CellularSubscribers : num 96.4 75.5 103.6 154.8 108.8 ...
## $ LiteracyRate : num NA NA 99.6 NA NA NA NA 97.9 NA 98.8 ...
## $ GNI : num 8820 NA 6100 42050 8960 ...
## $ PrimarySchoolEnrollmentMale : num NA 78.4 NA NA 85.3 NA 98.9 86.5 99.3 94.8 ...
## $ PrimarySchoolEnrollmentFemale: num NA 79.4 NA NA 84.1 NA 99.2 88.4 99.7 97 ...
summary(WHO_Europe)
## Country Region Population
## Albania : 1 Africa : 0 Min. : 31
## Andorra : 1 Americas : 0 1st Qu.: 3028
## Armenia : 1 Eastern Mediterranean: 0 Median : 7278
## Austria : 1 Europe :53 Mean : 17063
## Azerbaijan: 1 South-East Asia : 0 3rd Qu.: 11125
## Belarus : 1 Western Pacific : 0 Max. :143000
## (Other) :47
## Under15 Over60 FertilityRate LifeExpectancy
## Min. :13.17 Min. : 4.80 Min. :1.260 Min. :63.00
## 1st Qu.:14.92 1st Qu.:17.56 1st Qu.:1.455 1st Qu.:74.00
## Median :16.45 Median :20.76 Median :1.550 Median :77.00
## Mean :18.04 Mean :19.77 Mean :1.761 Mean :76.74
## 3rd Qu.:18.64 3rd Qu.:23.82 3rd Qu.:1.923 3rd Qu.:81.00
## Max. :35.75 Max. :26.97 Max. :3.810 Max. :83.00
## NA's :3
## ChildMortality CellularSubscribers LiteracyRate GNI
## Min. : 2.20 Min. : 68.77 Min. :95.20 Min. : 2180
## 1st Qu.: 3.80 1st Qu.:104.19 1st Qu.:98.00 1st Qu.:11210
## Median : 4.80 Median :115.39 Median :99.20 Median :20495
## Mean :10.05 Mean :117.11 Mean :98.78 Mean :23489
## 3rd Qu.:10.70 3rd Qu.:129.61 3rd Qu.:99.70 3rd Qu.:34613
## Max. :58.30 Max. :179.31 Max. :99.80 Max. :64260
## NA's :2 NA's :27 NA's :5
## PrimarySchoolEnrollmentMale PrimarySchoolEnrollmentFemale
## Min. :78.40 Min. : 79.40
## 1st Qu.:94.72 1st Qu.: 95.25
## Median :97.70 Median : 97.55
## Mean :95.79 Mean : 95.99
## 3rd Qu.:99.10 3rd Qu.: 99.20
## Max. :99.80 Max. :100.00
## NA's :15 NA's :15
Basic data analysis
mean(WHO$Under15)
## [1] 28.73242
sd(WHO$Under15)
## [1] 10.53457
summary(WHO$Under15)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.12 18.72 28.65 28.73 37.75 49.99
which.min(WHO$Under15)
## [1] 86
WHO$Country[86]
## [1] Japan
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe
which.max(WHO$Under15)
## [1] 124
WHO$Country[124]
## [1] Niger
## 194 Levels: Afghanistan Albania Algeria Andorra ... Zimbabwe
Scatterplot
plot(WHO$GNI, WHO$FertilityRate)

hist(WHO$CellularSubscribers)

boxplot(WHO$LifeExpectancy ~ WHO$Region)

boxplot(WHO$LifeExpectancy ~ WHO$Region, xlab = "", ylab = "Life Expectancy", main = "Life Expectancy of Countries by Region")

Summary Tables
table(WHO$Region)
##
## Africa Americas Eastern Mediterranean
## 46 35 22
## Europe South-East Asia Western Pacific
## 53 11 27
tapply
- example:To compute the average amount of iron, sorted by high and low protein
- tapply(USDA\(Iron, USDA\)HighProtein, mean, na.rm=TRUE)
tapply(WHO$Over60, WHO$Region, mean)
## Africa Americas Eastern Mediterranean
## 5.220652 10.943714 5.620000
## Europe South-East Asia Western Pacific
## 19.774906 8.769091 10.162963
tapply(WHO$LiteracyRate, WHO$Region, min)
## Africa Americas Eastern Mediterranean
## NA NA NA
## Europe South-East Asia Western Pacific
## NA NA NA
tapply(WHO$LiteracyRate, WHO$Region, min, na.rm=TRUE)
## Africa Americas Eastern Mediterranean
## 31.1 75.2 63.9
## Europe South-East Asia Western Pacific
## 95.2 56.8 60.6