Tourism's share in the gdp during the housing crisis (2000-2008)

options(stringsAsFactors = F)
gdp <- read.csv("data/gdp-metro/gdp-metro.csv", header = T)
fips1 <- read.csv("data/gdp-metro/fips-dictionary.csv", header = T)
fips1 <- fips1[1:364, ]
latitude <- read.csv("data/city-location/latlng.csv", header = T)
# cleaning fips_dictionary, so we want to seperate the city and the sate and
# then remove the (MSA) from the column.

head(fips1)
##    fips                 Metropolitan.Area
## 1   998         U.S. Metropolitan Portion
## 2 10180                 Abilene, TX (MSA)
## 3 10420                   Akron, OH (MSA)
## 4 10500                  Albany, GA (MSA)
## 5 10580 Albany-Schenectady-Troy, NY (MSA)
## 6 10740             Albuquerque, NM (MSA)
tail(fips1)
##      fips                       Metropolitan.Area
## 359 49340                     Worcester, MA (MSA)
## 360 49420                        Yakima, WA (MSA)
## 361 49620                  York-Hanover, PA (MSA)
## 362 49660 Youngstown-Warren-Boardman, OH-PA (MSA)
## 363 49700                     Yuba City, CA (MSA)
## 364 49740                          Yuma, AZ (MSA)
library(reshape2)
fips <- fips1[, -2]
fips <- as.data.frame(fips)
split_fips <- colsplit(fips1$Metropolitan.Area, pattern = ", ", names = c("msa", 
    "state"))[, -3]
fips <- cbind(fips, split_fips)

fips$state <- substr(fips$state, 1, 2)
for (i in 1:nrow(fips)) {
    fips$city[i] <- strsplit(as.character(fips$msa[i]), "-")[[1]]
}
fips <- fips[-1, c(1, 4, 3)]
names(fips) <- c("msa_fips", "city", "state")
write.table(fips, "trimmed-fips-dictionary.csv", sep = ",", row = F)
head(fips)
##   msa_fips        city state
## 2    10180     Abilene    TX
## 3    10420       Akron    OH
## 4    10500      Albany    GA
## 5    10580      Albany    NY
## 6    10740 Albuquerque    NM
## 7    10780  Alexandria    LA
tail(fips)
##     msa_fips       city state
## 359    49340  Worcester    MA
## 360    49420     Yakima    WA
## 361    49620       York    PA
## 362    49660 Youngstown    OH
## 363    49700  Yuba City    CA
## 364    49740       Yuma    AZ

# calculating role of tourism and growth in economy
library(plyr)
gdpanalysis <- function(df) {
    total <- df$gdp[df$indust == 1]

    # tourism economy is a combination of amusement, accomodation and leisure
    leisure <- df$gdp[df$indust == 105]  #leisure
    accomodations <- df$gdp[df$indust == 74]  #accomodation
    entertainment <- df$gdp[df$indust == 71]  #amusement

    # growth is a combination of construction and real estate
    real_estate <- df$gdp[df$indust == 55]
    construction <- df$gdp[df$indust == 11]
    c(total = total, leisure = leisure, accomodations = accomodations, entertainment = entertainment, 
        real_estate = real_estate, construction = construction)
}

# analyzing the gdp by location for each year
new_gdp <- ddply(gdp, .(fips, year), gdpanalysis)
# adding city and state information to new_gdp
fips$fips <- as.numeric(fips$msa_fips)
gdp_w_cs <- merge(new_gdp, fips, by.x = "fips", by.y = "fips", all.x = T)


# adding latitude and longitude information to gdp
gdp_final <- merge(gdp_w_cs, latitude, by.x = c("city", "state"), by.y = c("city", 
    "state"), all.x = T)
write.table(gdp_final, "tourism-and-growth.csv", sep = ",", row = F)

The plots are:

library(ggplot2)
# can also sum these and find out which state has good contribution to
# tourism. tourism=sum(leisure,accomodation,entertainment)
states <- map_data("state")
p <- ggplot() + geom_polygon(data = states, aes(x = long, y = lat, fill = "grey", 
    group = group), colour = "white")
p <- p + geom_point(data = gdp_final[gdp_final$year == 2008, ], aes(x = gdp_final$longitude, 
    y = gdp_final$latitude, colour = gdp_final$leisure/gdp_final$total, size = gdp_final$leisure))
p + scale_colour_gradient("percent of GDP", trans = "sqrt") + scale_size("total dollars", 
    trans = "log10")

plot of chunk unnamed-chunk-2


q <- ggplot() + geom_polygon(data = states, aes(x = long, y = lat, fill = "grey", 
    group = group), colour = "white")
q <- q + geom_point(data = gdp_final[gdp_final$year == 2008, ], aes(x = gdp_final$longitude, 
    y = gdp_final$latitude, colour = gdp_final$accomodations/gdp_final$total, 
    size = gdp_final$accomodations))
q + scale_colour_gradient("percent of GDP", trans = "log") + scale_size("total dollars", 
    trans = "log10")

plot of chunk unnamed-chunk-2


r <- ggplot(main = "Cities by amount of arts, entertainment, and recreation dollars spent in 2004") + 
    geom_polygon(data = states, aes(x = long, y = lat, fill = "grey", group = group), 
        colour = "white")
r <- r + geom_point(data = gdp_final[gdp_final$year == 2008, ], aes(x = gdp_final$longitude, 
    y = gdp_final$latitude, colour = gdp_final$entertainment/gdp_final$total, 
    size = gdp_final$entertainment))
r + scale_colour_gradient("percent of GDP", trans = "log") + scale_size("total dollars", 
    trans = "log10")

plot of chunk unnamed-chunk-2



# growth
s <- ggplot() + geom_polygon(data = states, aes(x = long, y = lat, fill = "grey", 
    group = group), colour = "white")
s <- s + geom_point(data = gdp_final[gdp_final$year == 2004, ], aes(x = gdp_final$longitude, 
    y = gdp_final$latitude, colour = gdp_final$real_estate/gdp_final$total, 
    size = gdp_final$real_estate, main = "Cities by amount of Real Estate dollars spent 2004"))
s + scale_colour_gradient("percent of GDP") + scale_size("total dollars", trans = "log10")

plot of chunk unnamed-chunk-2


t <- ggplot() + geom_polygon(data = states, aes(x = long, y = lat, fill = "grey", 
    group = group), colour = "white")
t <- t + geom_point(data = gdp_final[gdp_final$year == 2004, ], aes(x = gdp_final$longitude, 
    y = gdp_final$latitude, colour = gdp_final$construction/gdp_final$total, 
    size = gdp_final$construction, main = "Cities by amount of construction dollars spent 2004"))
t + scale_colour_gradient("percent of GDP") + scale_size("total dollars", trans = "log10")

plot of chunk unnamed-chunk-2