Cleaning the HPI for states and for msa
- clean the data such that only the data from 2000 to 2009 is available
- edit the column names for convenience
# add 'city, state, fips, year, quarter, hpi, error' as column titles
a <- read.csv("data/fhfa-house-price-index/Original/1q09hpi-cbsa-Ver2.csv")
head(a)
## Abilene TX X10180 X1975 X1 NA. NA..1
## 1 Abilene TX 10180 1975 2 NA NA
## 2 Abilene TX 10180 1975 3 NA NA
## 3 Abilene TX 10180 1975 4 NA NA
## 4 Abilene TX 10180 1976 1 NA NA
## 5 Abilene TX 10180 1976 2 NA NA
## 6 Abilene TX 10180 1976 3 NA NA
tail(a)
## Abilene TX X10180 X1975 X1 NA. NA..1
## 52739 Yuma AZ 49740 2007 4 230.1 3.49
## 52740 Yuma AZ 49740 2008 1 224.8 3.35
## 52741 Yuma AZ 49740 2008 2 220.7 3.45
## 52742 Yuma AZ 49740 2008 3 214.6 3.89
## 52743 Yuma AZ 49740 2008 4 214.5 4.11
## 52744 Yuma AZ 49740 2009 1 208.1 3.16
names(a) <- c("city", "state", "fips_msa", "year", "quarter", "hpi", "error")
new <- a[a[, "year"] %in% 2000:2009, ]
write.table(new, "fhfa-house-price-index-msa.csv", sep = ",", row = FALSE)
head(new)
## city state fips_msa year quarter hpi error
## 100 Abilene TX 10180 2000 1 112.1 2.63
## 101 Abilene TX 10180 2000 2 112.5 2.44
## 102 Abilene TX 10180 2000 3 114.1 2.47
## 103 Abilene TX 10180 2000 4 116.7 2.70
## 104 Abilene TX 10180 2001 1 116.8 2.64
## 105 Abilene TX 10180 2001 2 117.7 2.55
tail(new)
## city state fips_msa year quarter hpi error
## 52739 Yuma AZ 49740 2007 4 230.1 3.49
## 52740 Yuma AZ 49740 2008 1 224.8 3.35
## 52741 Yuma AZ 49740 2008 2 220.7 3.45
## 52742 Yuma AZ 49740 2008 3 214.6 3.89
## 52743 Yuma AZ 49740 2008 4 214.5 4.11
## 52744 Yuma AZ 49740 2009 1 208.1 3.16