Getting data from GapMinder.org

## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, echo = TRUE, 
    tidy = FALSE, fig.width = 7, fig.height = 7)
options(width = 116, scipen = 10)

See also the revised version at: http://rpubs.com/kaz_yos/1285

References

Load datamart library

library(datamart)

Use gapminder() function

## Establish connection
gm <- gapminder()
## Query for data available
queries(gm)
 [1] "Population"                         "MainReligion"                       "TotalFertilityRate"                
 [4] "PerCapitaCO2Emissions"              "IncomePerCapita"                    "InfantMortalityRate"               
 [7] "LifeExpectancyAtBirth"              "AdolescentFertilityRate"            "BirthsAttendedBySkilledHealthStaff"
[10] "ContraceptiveUse"                   "CrudeBirthRate"                     "MaternalMortalityRate"             
[13] "Under5MortalityRate"                "CrudeDeathRate"                     "PopulationGrowth"                  
[16] "SugarConsumption"                   "GDP"                                "ConsumerPricesIndex"               
[19] "GDPImplicitDeflator"                "CoalConsumption"                    "HydroelectricityConsumption"       
[22] "NaturalGasConsumption"              "NuclearConsumption"                 "OilConsumption"                    
[25] "CoalProduction"                     "ElectricityGeneration"              "NaturalGasProduction"              
[28] "OilProduction"                      "PrimaryEnergyConsumption"           "CO2Emissions"                      
[31] "SulfurEmissions"                    "TotalForestArea"                    "PrimaryForestArea"                 
[34] "PlantedForestArea"                  "WoodRemoval"                        "BiomassStockInForest"              
[37] "TotalWaterWithdrawal"               "SurfaceArea"                        "BadTeethPerChild"                  
[40] "PeopleLivingWithHIV"                "MalariaReportedCases"               "MalariaReportedDeaths"             
[43] "WorkingHoursPerWeek"                "UrbanPopulation"                    "WomensAgeAtFirstMarriage"          
[46] "NumberOfBillionaires"               "GiniIndex"                          "BroadbandSubscribers"              
[49] "CellPhones"                         "PersonalComputers"                  "PatentApplications"                
[52] "PatentsGranted"                     "PatentsInForce"                     "ArmsExports"                       
[55] "ArmsImports"                        "HumanDevelopmentIndex"             

Contruct data

## babies per woman
TotalFertilityRate       <- query(gm, "TotalFertilityRate")
babies                   <- as.vector(TotalFertilityRate["2008"])
names(babies)            <- names(TotalFertilityRate)
df.babies                <- data.frame(babies)
df.babies$country        <- names(babies)

## income per capita, PPP adjusted
IncomePerCapita          <- query(gm, "IncomePerCapita")
income                   <- as.vector(IncomePerCapita["2008"])
names(income)            <- names(IncomePerCapita)
df.income                <- data.frame(income)
df.income$country        <- names(income)

## Population
Population               <- query(gm, "Population")
population               <- as.vector(Population["2008"])
names(population)        <- names(Population)
df.population            <- data.frame(population)
df.population$country    <- names(population)

## religion
MainReligion             <- query(gm, "MainReligion")
religion                 <- MainReligion[,"Group"]
names(religion)          <- MainReligion[,"Entity"]
religion[religion == ""] <- "unknown"
df.religion              <- data.frame(religion)
df.religion$country      <- names(religion)

colcodes                 <- c("Christian"         = "blue",
                              "Eastern religions" = "red",
                              "Muslim"            = "green",
                              "unknown"           = "grey"
                              )

## Merge
df.merged                <- merge(df.religion, merge(df.population, merge(df.babies, df.income)))
## ggplot2
library(ggplot2)

ggplot(df.merged) +
    geom_point(aes(x = income,
                   y = babies,
                   color = religion,
                   size = log(population + 10000000)
                   ## size = population
                   )
               ) +
    scale_color_manual(values = colcodes)

plot of chunk unnamed-chunk-5

Traditional graph per suggestion in Ura-R-jp Wiki (http://blog.goo.ne.jp/r-de-r/e/a8ad52c2299c4975fad3dfd4ffae5f90)

Population was not used in these graphs as contribution to overall information was not important.

## Configure
par(mgp = c(1.6, 0.6, 0), mar = c(3, 3, 0.5, 0.8))

## Get 4 figures in one device
layout(matrix(1:4, 2, byrow = TRUE))

## Load plyr for d_ply()
library(plyr)
d_ply(.data      = df.merged,
      .variables = "religion",
      .fun       = function(data.split) {

          plot(formula = babies ~ income,
               data = data.split,
               xlim = c(0, 80000), ylim = c(0, 7),
               pch = 19, col = "#00330060", cex = 0.7)

          text(35000, 6, data.split[1, "religion"])
      })

plot of chunk unnamed-chunk-6


## Revert to 1 figure 1 device
layout(1)