Cleaning the gdp metro data
library(reshape2)
library(stringr)
allgmp <- read.csv("data/gdp-metro/allgmp.csv")
head(allgmp)
## component_id component_name
## 1 200 GDP by Metropolitan Area (millions of current dollars)
## 2 200 GDP by Metropolitan Area (millions of current dollars)
## 3 200 GDP by Metropolitan Area (millions of current dollars)
## 4 200 GDP by Metropolitan Area (millions of current dollars)
## 5 200 GDP by Metropolitan Area (millions of current dollars)
## 6 200 GDP by Metropolitan Area (millions of current dollars)
## FIPS area_name industry_id
## 1 00998 U.S. Metropolitan Portion 1
## 2 00998 U.S. Metropolitan Portion 2
## 3 00998 U.S. Metropolitan Portion 3
## 4 00998 U.S. Metropolitan Portion 4
## 5 00998 U.S. Metropolitan Portion 5
## 6 00998 U.S. Metropolitan Portion 6
## industry_name X2001 X2002
## 1 All industry total 9046139.000 9347825.000
## 2 Private industries 8026223.000 8258864.000
## 3 Agriculture, forestry, fishing, and hunting 51168.000 51264.000
## 4 Crop and animal production (Farms) 35984.000 35939.000
## 5 Forestry, fishing, and related activities 15184.000 15325.000
## 6 Mining 88968.000 77420.000
## X2003 X2004 X2005 X2006 X2007
## 1 9771963.000 10413832.000 11082353.000 11772193.000 12324166.000
## 2 8617065.000 9197683.000 9803105.000 10426966.000 10899969.000
## 3 58056.000 69766.000 66533.000 63517.000 82931.000
## 4 41755.000 52718.000 48412.000 44444.000 63630.000
## 5 16301.000 17049.000 18120.000 19073.000 19301.000
## 6 107198.000 129218.000 170753.000 198165.000 207769.000
## X2008
## 1 12724270.000
## 2 11221566.000
## 3 76455.000
## 4 n/a
## 5 n/a
## 6 244226.000
tail(allgmp)
## component_id
## 96900 900
## 96901 Note: NAICS Industry detail is based on the 1997 North American Industry Classification System (NAICS)
## 96902 Note: (D) Not shown in order to avoid the disclosure of confidential information; estimates are included in higher level totals.
## 96903 Note: (L) Less than $500,000 in nominal or real GDP by metropolitan area.
## 96904 Note: n/a Not available.
## 96905 Source: U.S. Department of Commerce / Bureau of Economic Analysis / Regional Product Division -- September 2009
## component_name
## 96900 Real GDP by Metropolitan Area (millions of chained 2001 dollars)
## 96901
## 96902
## 96903
## 96904
## 96905
## FIPS area_name industry_id
## 96900 49740 Yuma, AZ (MSA) 109
## 96901
## 96902
## 96903
## 96904
## 96905
## industry_name X2001 X2002 X2003
## 96900 Private services-providing industries 1228.000 1342.000 1485.000
## 96901
## 96902
## 96903
## 96904
## 96905
## X2004 X2005 X2006 X2007 X2008
## 96900 1598.000 1745.000 1872.000 1994.000 2084.000
## 96901
## 96902
## 96903
## 96904
## 96905
allgmp <- subset(allgmp, component_name == "GDP by Metropolitan Area (millions of current dollars)")
allgmp$component_name <- NULL
all_m <- melt(allgmp, m = paste("X", 2001:2008, sep = ""))
all_m$year <- as.numeric(str_replace(all_m$variable, "X", ""))
all_m$variable <- NULL
gdp <- all_m[c("FIPS", "industry_id", "year", "value")]
names(gdp) <- c("fips", "indust", "year", "gdp")
gdp <- gdp[!gdp$fips %in% c("FIPS", ""), ]
gdp$code <- ifelse(gdp$gdp %in% c("n/a", "(D)", "(L)"), as.character(gdp$gdp),
NA)
gdp$gdp[!is.na(gdp$code)] <- NA
gdp$gdp <- as.numeric(as.character(gdp$gdp))
gdp$fips <- as.numeric(as.character(gdp$fips))
gdp$indust <- as.numeric(as.character(gdp$indust))
write.table(gdp, "gdp-metro.csv", sep = ",", row = F)
head(gdp)
## fips indust year gdp code
## 1 998 1 2001 9046139 <NA>
## 2 998 2 2001 8026223 <NA>
## 3 998 3 2001 51168 <NA>
## 4 998 4 2001 35984 <NA>
## 5 998 5 2001 15184 <NA>
## 6 998 6 2001 88968 <NA>
tail(gdp)
## fips indust year gdp code
## 258363 49740 103 2008 283 <NA>
## 258364 49740 104 2008 394 <NA>
## 258365 49740 105 2008 158 <NA>
## 258366 49740 106 2008 NA n/a
## 258367 49740 108 2008 1198 <NA>
## 258368 49740 109 2008 2348 <NA>