## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, echo = TRUE,
tidy = FALSE, fig.width = 7, fig.height = 7)
options(width = 116, scipen = 10)
library(datamart)
## Establish connection
gm <- gapminder()
## Query for data available
queries(gm)
[1] "Population" "MainReligion" "TotalFertilityRate"
[4] "PerCapitaCO2Emissions" "IncomePerCapita" "InfantMortalityRate"
[7] "LifeExpectancyAtBirth" "AdolescentFertilityRate" "BirthsAttendedBySkilledHealthStaff"
[10] "ContraceptiveUse" "CrudeBirthRate" "MaternalMortalityRate"
[13] "Under5MortalityRate" "CrudeDeathRate" "PopulationGrowth"
[16] "SugarConsumption" "GDP" "ConsumerPricesIndex"
[19] "GDPImplicitDeflator" "CoalConsumption" "HydroelectricityConsumption"
[22] "NaturalGasConsumption" "NuclearConsumption" "OilConsumption"
[25] "CoalProduction" "ElectricityGeneration" "NaturalGasProduction"
[28] "OilProduction" "PrimaryEnergyConsumption" "CO2Emissions"
[31] "SulfurEmissions" "TotalForestArea" "PrimaryForestArea"
[34] "PlantedForestArea" "WoodRemoval" "BiomassStockInForest"
[37] "TotalWaterWithdrawal" "SurfaceArea" "BadTeethPerChild"
[40] "PeopleLivingWithHIV" "MalariaReportedCases" "MalariaReportedDeaths"
[43] "WorkingHoursPerWeek" "UrbanPopulation" "WomensAgeAtFirstMarriage"
[46] "NumberOfBillionaires" "GiniIndex" "BroadbandSubscribers"
[49] "CellPhones" "PersonalComputers" "PatentApplications"
[52] "PatentsGranted" "PatentsInForce" "ArmsExports"
[55] "ArmsImports" "HumanDevelopmentIndex"
## babies per woman
TotalFertilityRate <- query(gm, "TotalFertilityRate")
babies <- as.vector(TotalFertilityRate["2008"])
names(babies) <- names(TotalFertilityRate)
df.babies <- data.frame(babies)
df.babies$country <- names(babies)
## income per capita, PPP adjusted
IncomePerCapita <- query(gm, "IncomePerCapita")
income <- as.vector(IncomePerCapita["2008"])
names(income) <- names(IncomePerCapita)
df.income <- data.frame(income)
df.income$country <- names(income)
## Population
Population <- query(gm, "Population")
population <- as.vector(Population["2008"])
names(population) <- names(Population)
df.population <- data.frame(population)
df.population$country <- names(population)
## religion
MainReligion <- query(gm, "MainReligion")
religion <- MainReligion[,"Group"]
names(religion) <- MainReligion[,"Entity"]
religion[religion == ""] <- "unknown"
df.religion <- data.frame(religion)
df.religion$country <- names(religion)
colcodes <- c("Christian" = "blue",
"Eastern religions" = "red",
"Muslim" = "green",
"unknown" = "grey"
)
## Merge
df.merged <- merge(df.religion, merge(df.population, merge(df.babies, df.income)))
## ggplot2
library(ggplot2)
ggplot(df.merged) +
geom_point(aes(x = income,
y = babies,
color = religion,
size = log(population + 10000000)
## size = population
)
) +
scale_color_manual(values = colcodes)
Population was not used in these graphs as contribution to overall information was not important.
## Configure
par(mgp = c(1.6, 0.6, 0), mar = c(3, 3, 0.5, 0.8))
## Get 4 figures in one device
layout(matrix(1:4, 2, byrow = TRUE))
## Load plyr for d_ply()
library(plyr)
d_ply(.data = df.merged,
.variables = "religion",
.fun = function(data.split) {
plot(formula = babies ~ income,
data = data.split,
xlim = c(0, 80000), ylim = c(0, 7),
pch = 19, col = "#00330060", cex = 0.7)
text(35000, 6, data.split[1, "religion"])
})
## Revert to 1 figure 1 device
layout(1)