Verify Reshape

library(readr)
library(tidyr)
library(dplyr)

wdi <- read_csv("http://ucl.ac.uk/~uctqiax/data/wdi.csv", na = "..")

wdi_long <- wdi %>%
  filter(Country.Code != "") %>% 
  gather(Year, Value, starts_with("X")) %>% 
  select(Country.Code, Country.Name, Year, Series.Code, Value) %>%
  spread(Series.Code, Value)  

wdi_long

## Source: local data frame [6 x 5]
## 
##   Country.Code Country.Name         Year SH.STA.MMRT SH.XPD.TOTL.ZS
##          <chr>        <chr>        <chr>       <dbl>          <dbl>
## 1          ESP        Spain X1995.YR1995           6       7.444592
## 2          ESP        Spain X2000.YR2000           5       7.214756
## 3          ESP        Spain X2005.YR2005           5       8.288271
## 4          FRA       France X1995.YR1995          15      10.355906
## 5          FRA       France X2000.YR2000          12      10.084833
## 6          FRA       France X2005.YR2005          10      10.932626

The code below can be used to verify that we didn’t make a mistake while reshaping the WDI dataset. Make sure to run this code before you rename the columns and before stripping the numeric year column.

columns_to_verify <- unique(wdi$Series.Code)

for (i in seq(1, nrow(wdi_long))) {
  country_code <- wdi_long[i,]$Country.Code
  year <- wdi_long[i,]$Year
  long_row <- subset(wdi_long, Country.Code == country_code & Year == year)
  for (column in columns_to_verify) {
    wide_row <- subset(wdi,  Country.Code == country_code & Series.Code == column)
    if (long_row[1,column] == wide_row[1,year]) {
      # uncommment the next line if you want to see a message for every value compared
      # print(paste("Everything OK at row ", i, "country =", country_code, "year =", year, "column =", column))
    } else {
      print(paste("Mismatch at row ", i, "country =", country_code, "year =", year, "column =", column))
    }
  }
}