library(readr)
library(tidyr)
library(dplyr)
wdi <- read_csv("http://ucl.ac.uk/~uctqiax/data/wdi.csv", na = "..")
wdi_long <- wdi %>%
filter(Country.Code != "") %>%
gather(Year, Value, starts_with("X")) %>%
select(Country.Code, Country.Name, Year, Series.Code, Value) %>%
spread(Series.Code, Value)
wdi_long
## Source: local data frame [6 x 5]
##
## Country.Code Country.Name Year SH.STA.MMRT SH.XPD.TOTL.ZS
## <chr> <chr> <chr> <dbl> <dbl>
## 1 ESP Spain X1995.YR1995 6 7.444592
## 2 ESP Spain X2000.YR2000 5 7.214756
## 3 ESP Spain X2005.YR2005 5 8.288271
## 4 FRA France X1995.YR1995 15 10.355906
## 5 FRA France X2000.YR2000 12 10.084833
## 6 FRA France X2005.YR2005 10 10.932626
The code below can be used to verify that we didn’t make a mistake while reshaping the WDI dataset. Make sure to run this code before you rename the columns and before stripping the numeric year column.
columns_to_verify <- unique(wdi$Series.Code)
for (i in seq(1, nrow(wdi_long))) {
country_code <- wdi_long[i,]$Country.Code
year <- wdi_long[i,]$Year
long_row <- subset(wdi_long, Country.Code == country_code & Year == year)
for (column in columns_to_verify) {
wide_row <- subset(wdi, Country.Code == country_code & Series.Code == column)
if (long_row[1,column] == wide_row[1,year]) {
# uncommment the next line if you want to see a message for every value compared
# print(paste("Everything OK at row ", i, "country =", country_code, "year =", year, "column =", column))
} else {
print(paste("Mismatch at row ", i, "country =", country_code, "year =", year, "column =", column))
}
}
}