Simple Exploratory Data Analytics on Gapminder Data Set(I)

load paketage and data

library(plyr)
## Warning: package 'plyr' was built under R version 3.0.2
library(xtable)

gdURL <- "http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt"
gDat <- read.delim(file = gdURL)

Get the maximum and minimum of GDP per capita for all continents in a “wide” format.

d1 <- ddply(gDat,~continent,summarize, maxGDPpercap=max(gdpPercap),
            minGDPpercap=min(gdpPercap))
d1 <- xtable(d1)
print(d1, type = "html",include.rownames = FALSE)
continent maxGDPpercap minGDPpercap
Africa 21951.21 241.17
Americas 42951.65 1201.64
Asia 113523.13 331.00
Europe 49357.19 973.53
Oceania 34435.37 10039.60

Look at the spread of GDP per capita within the continents.

d2_sd <- ddply(gDat,~continent,summarize,sd=sd(gdpPercap),var=var(gdpPercap),
               mad=mad(gdpPercap),IQR=IQR(gdpPercap))
d2_sd <- arrange(d2_sd, sd)
d2_sd <- xtable(d2_sd)
print(d2_sd, type = "html", include.rownames = FALSE)
continent sd var mad IQR
Africa 2827.93 7997187.31 775.32 1616.17
Oceania 6358.98 40436668.87 6459.10 8072.26
Americas 6396.76 40918591.10 3269.33 4402.43
Europe 9355.21 87520019.60 8846.05 13248.30
Asia 14045.37 197272505.85 2820.83 7492.26

whether the continents rank similarly w/r/t heterogeneity of GDP/capita by the other measures: No

Compute a trimmed mean of life expectancy for different years,trim level=0.1

d3 <- ddply(gDat,~year,summarize, trimmed_mean=mean(lifeExp,trim=0.1))
d3 <- xtable(d3)
print(d3, type = "html",include.rownames = FALSE)
year trimmed_mean
1952 48.58
1957 51.27
1962 53.58
1967 55.87
1972 58.01
1977 60.10
1982 62.12
1987 63.92
1992 65.19
1997 66.02
2002 66.72
2007 68.11

How is life expectancy changing over time on different continents? “Tall” format.

d4 <- ddply(gDat,~continent+year,summarize, mean=mean(lifeExp))
d4 <- xtable(d4)
print(d4,type = "html", include.rownames = FALSE)
continent year mean
Africa 1952 39.14
Africa 1957 41.27
Africa 1962 43.32
Africa 1967 45.33
Africa 1972 47.45
Africa 1977 49.58
Africa 1982 51.59
Africa 1987 53.34
Africa 1992 53.63
Africa 1997 53.60
Africa 2002 53.33
Africa 2007 54.81
Americas 1952 53.28
Americas 1957 55.96
Americas 1962 58.40
Americas 1967 60.41
Americas 1972 62.39
Americas 1977 64.39
Americas 1982 66.23
Americas 1987 68.09
Americas 1992 69.57
Americas 1997 71.15
Americas 2002 72.42
Americas 2007 73.61
Asia 1952 46.31
Asia 1957 49.32
Asia 1962 51.56
Asia 1967 54.66
Asia 1972 57.32
Asia 1977 59.61
Asia 1982 62.62
Asia 1987 64.85
Asia 1992 66.54
Asia 1997 68.02
Asia 2002 69.23
Asia 2007 70.73
Europe 1952 64.41
Europe 1957 66.70
Europe 1962 68.54
Europe 1967 69.74
Europe 1972 70.78
Europe 1977 71.94
Europe 1982 72.81
Europe 1987 73.64
Europe 1992 74.44
Europe 1997 75.51
Europe 2002 76.70
Europe 2007 77.65
Oceania 1952 69.25
Oceania 1957 70.30
Oceania 1962 71.09
Oceania 1967 71.31
Oceania 1972 71.91
Oceania 1977 72.85
Oceania 1982 74.29
Oceania 1987 75.32
Oceania 1992 76.94
Oceania 1997 78.19
Oceania 2002 79.74
Oceania 2007 80.72