For your reference, here are commands to read in a set of Gapminder files, taking the mean value of the variable for year 2000 and later.
varNames = c("Alcohol", "Fertility", "TrafficDeathRate")
res = NULL # start with no data
for (XXX in varNames) {
gapdat = fetchGapminder(paste("Gapminder/", XXX, ".csv", sep = ""))
gapdat = subset(gapdat, Year >= 2000)
command = parse(text = paste("mean(", XXX, ")"))
oneVar = groupBy(gapdat, by = Country, value = eval(command))
oneVar = rename(oneVar, c(value = XXX))
# Outer join with existing data
if (is.null(res)) {
res = oneVar
} else {
res = join(res, oneVar, by = "Country", type = "full")
}
}
Make a parallel coordinates plot involving Fertility, Alcohol and TrafficDeathRate. Show the plot and interpret it as regards the connection between per capita alcohol consumption and traffic death rate.
varNames = c("Alcohol", "Fertility", "TrafficDeathRate")
res = NULL # start with no data
for (XXX in varNames) {
gapdat = fetchGapminder(paste("Gapminder/", XXX, ".csv", sep = ""))
gapdat = subset(gapdat, Year >= 2000)
command = parse(text = paste("mean(", XXX, ")"))
oneVar = groupBy(gapdat, by = Country, value = eval(command))
oneVar = rename(oneVar, c(value = XXX))
# Outer join with existing data
if (is.null(res)) {
res = oneVar
} else {
res = join(res, oneVar, by = "Country", type = "full")
}
}
## Retrieving from
## http://www.mosaic-web.org/go/datasets/Gapminder/Alcohol.csv
## Retrieving from
## http://www.mosaic-web.org/go/datasets/Gapminder/Fertility.csv
## Retrieving from
## http://www.mosaic-web.org/go/datasets/Gapminder/TrafficDeathRate.csv
res = transform(res, TD = log(TrafficDeathRate))
parallelPlot(~TD - Alcohol + Fertility, data = res, axes = TRUE)
This parallel plot suggests that countries with the highest traffic death rates also have the lowest per capita alcohol consumption, as well as the highest fertility rates. However, this trend does not appear to be followed by countries with a TD of less than 4, for which there seems to be little correlation between per capita alcohol consumption and traffic death rates.
These data are counterintuitive because they account for countries and not individuals.
Pull out a set of Gapminder variables that you think are important
in showing health risks.
varNames = c("IncomePerCapitaPPP", "ImprovedSanitation", "Under5mortality")
res = NULL # start with no data
for (XXX in varNames) {
gapdat = fetchGapminder(paste("Gapminder/", XXX, ".csv", sep = ""))
gapdat = subset(gapdat, Year >= 2000)
command = parse(text = paste("mean(", XXX, ")"))
oneVar = groupBy(gapdat, by = Country, value = eval(command))
oneVar = rename(oneVar, c(value = XXX))
# Outer join with existing data
if (is.null(res)) {
res = oneVar
} else {
res = join(res, oneVar, by = "Country", type = "full")
}
}
## Retrieving from
## http://www.mosaic-web.org/go/datasets/Gapminder/IncomePerCapitaPPP.csv
## Retrieving from
## http://www.mosaic-web.org/go/datasets/Gapminder/ImprovedSanitation.csv
## Retrieving from
## http://www.mosaic-web.org/go/datasets/Gapminder/Under5mortality.csv
res = transform(res, capita = log(IncomePerCapitaPPP))
parallelPlot(res, ~capita + ImprovedSanitation - Under5mortality, axes = TRUE)
Do the same for variables that indicate