PROBLEM 1. Checking the raw data in the primary dataset

# Load the data
housingdata <- readRDS("testdata20250121.RDS")

# Extract the day, month, and year
housingdata$sale_date <- as.integer(housingdata$sale_date)
housingdata$day <- 100 * ((housingdata$sale_date / 100) - round(housingdata$sale_date / 100))
housingdata$month <- 100 * (((housingdata$sale_date - housingdata$day) / 10000) - round((housingdata$sale_date - housingdata$day) / 10000))
housingdata$year <- round(housingdata$sale_date / 10000)

# Summarize
housingdata$sale_date[1:10]
##  [1] 20150129 20090904 20130228 20080825 20140225 20180111 20150408 20150623
##  [9] 20090528 20120131
housingdata$year[1:10]
##  [1] 2015 2009 2013 2008 2014 2018 2015 2015 2009 2012
housingdata$month[1:10]
##  [1] 1 9 2 8 2 1 4 6 5 1
housingdata$day[1:10]
##  [1] 29  4 28 25 25 11  8 23 28 31
# Generate categorical variables by year built
housingdata$year_built_very_old <- ifelse(housingdata$year_built < 1960, 1, 0)
housingdata$year_built_quite_old <- ifelse(housingdata$year_built >= 1960 & housingdata$year_built < 1980, 1, 0)
housingdata$year_built_quite_new <- ifelse(housingdata$year_built >= 1980 & housingdata$year_built < 2000, 1, 0)
housingdata$year_built_very_new <- ifelse(housingdata$year_built >= 2000, 1, 0)
housingdata$year_built_grp <- 1 * housingdata$year_built_very_old + 2 * housingdata$year_built_quite_old + 3 * housingdata$year_built_quite_new + 4 * housingdata$year_built_very_new
housingdata$log_sale_amount <- log(housingdata$sale_amount)

# 3D plot (not working)
#scatter3D(housingdata$log_sale_amount, housingdata$year_built_group, housingdata$number_of_fireplaces)
#Error in plot.new() : figure margins too large

# Save the data
cleanhousingdata <- housingdata

PROBLEM 2. Adding secondary data to the primary dataset

# Load the HPI data
hpidata <- read_excel("hpi_po_us_and_census.xls")
summary(hpidata)
##    division              year           qtr       
##  Length:1350        Min.   :1991   Min.   :1.000  
##  Class :character   1st Qu.:1999   1st Qu.:1.000  
##  Mode  :character   Median :2007   Median :2.000  
##                     Mean   :2007   Mean   :2.489  
##                     3rd Qu.:2016   3rd Qu.:3.000  
##                     Max.   :2024   Max.   :4.000  
##  index_po_not_seasonally_adjusted index_po_seasonally_adjusted
##  Min.   : 94.17                   Min.   : 94.35              
##  1st Qu.:136.18                   1st Qu.:135.38              
##  Median :190.72                   Median :189.87              
##  Mean   :202.96                   Mean   :202.11              
##  3rd Qu.:233.36                   3rd Qu.:233.31              
##  Max.   :599.69                   Max.   :591.16
# Clean the data to corresponding range
cleanhpidata <- hpidata[hpidata$division != "USA", ]
cleanhpidata <- cleanhpidata[cleanhpidata$year >= 2008, ]

# Re-adjust HPI according to the new base
hpi_base <- cleanhpidata %>%
  group_by(division) %>%
  slice(1)
hpi_base$base <- hpi_base$index_po_seasonally_adjusted
hpi_base <- subset(hpi_base, select = c(division, base))
cleanhpidata <- merge(cleanhpidata, hpi_base, by = "division")
cleanhpidata$hpi <- round(cleanhpidata$index_po_seasonally_adjusted / cleanhpidata$base * 100, digits = 2)
cleanhpidata <- subset(cleanhpidata, select = c(division, year, qtr, hpi))

# Convert the data to division level
df_abbr_to_division <- read_csv("df_abbr_to_division.csv", col_names = TRUE)
## Rows: 51 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): abbr, division
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleanhousingdata <- merge(cleanhousingdata, df_abbr_to_division, by = "abbr")

cleanhousingdata$qtr1 <- ifelse(cleanhousingdata$month > 0 & cleanhousingdata$month <= 3, 1, 0)
cleanhousingdata$qtr2 <- ifelse(cleanhousingdata$month > 3 & cleanhousingdata$month <= 6, 1, 0)
cleanhousingdata$qtr3 <- ifelse(cleanhousingdata$month > 6 & cleanhousingdata$month <= 9, 1, 0)
cleanhousingdata$qtr4 <- ifelse(cleanhousingdata$month > 9 & cleanhousingdata$month <= 12, 1, 0)
cleanhousingdata$qtr <- 1 * cleanhousingdata$qtr1 + 2 * cleanhousingdata$qtr2 + 3 * cleanhousingdata$qtr3 + 4 * cleanhousingdata$qtr4

cleanhousingdata <- merge(cleanhousingdata, cleanhpidata, by = c("division", "year", "qtr"))

cleanhousingdata$dlog_sale_amount <- log(cleanhousingdata$sale_amount * 100 / cleanhousingdata$hpi)

mappingdata <- cleanhousingdata %>%
  group_by(abbr, year) %>%
  summarise(mean_dlog_sale_amount = mean(dlog_sale_amount))
## `summarise()` has grouped output by 'abbr'. You can override using the
## `.groups` argument.
mappingdata$state <- mappingdata$abbr

# Map of housing prices for the year of 2008
mappingdata2008 <- mappingdata[mappingdata$year == 2008, ]
plot_usmap(data = mappingdata2008, values = "mean_dlog_sale_amount", color = "red") +
    labs(title = "2008") +
  scale_fill_continuous(name = "Mean of log of deflated sale amounts over time") +
  theme(legend.position = "bottom")

# Map of housing prices for the year of 2019
mappingdata2019 <- mappingdata[mappingdata$year == 2019, ]
plot_usmap(data = mappingdata2019, values = "mean_dlog_sale_amount", color = "red") +
    labs(title = "2019") +
  scale_fill_continuous(name = "Mean of log of deflated sale amounts over time") +
  theme(legend.position = "bottom")

# Map of housing prices by county
mappingdata <- cleanhousingdata %>%
  group_by(fips_county_code) %>%
  summarise(mean_dlog_sale_amount = mean(dlog_sale_amount))
mappingdata$fips <- mappingdata$fips_county_code
plot_usmap(data = mappingdata, values = "mean_dlog_sale_amount", color = "red") +
    labs(title = "Mean of log of deflated sale amounts") +
  scale_fill_continuous(name = "Mean of log of deflated sale amounts") +
  theme(legend.position = "bottom")

# Map of year built by county
mappingdata <- cleanhousingdata %>%
  group_by(fips_county_code) %>%
  summarise(mean_year_built = mean(year_built))
mappingdata$fips <- mappingdata$fips_county_code
plot_usmap(data = mappingdata, values = "mean_year_built", color = "red") +
    labs(title = "Mean of year built") +
  scale_fill_continuous(name = "Mean of year built") +
  theme(legend.position = "bottom")

# DISCUSSION: The later the house was built, the higher the sale price is.

PROBLEM 3. Adding further secondary data

# Load the crime rate data
cleancrimedata <- read_excel("table-1.xls")
## New names:
## • `` -> `...11`
## • `` -> `...12`
cleancrimedata <- cleancrimedata[cleancrimedata$year >= 2008 & cleancrimedata$year <= 2019, ]
cleancrimedata$year <- as.numeric(cleancrimedata$year)

cleanhousingcrimedata <- merge(cleanhousingdata, cleancrimedata, by = "year")

# Plot crime rate against year
ggplot(cleancrimedata, aes(x = as.factor(year), y = violent)) +
  geom_bar(stat = "identity") +
  labs(title = "Crime rate over time",
       x = "Year",
       y = "Total violent crime rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

plottingdata <- cleanhousingdata %>%
  group_by(year, year_built_grp) %>%
  summarise(mean_dlog_sale_amount = mean(dlog_sale_amount))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plottingdata_grp1 <- plottingdata[plottingdata$year_built_grp == 1, ]
plottingdata_grp2 <- plottingdata[plottingdata$year_built_grp == 2, ]
plottingdata_grp3 <- plottingdata[plottingdata$year_built_grp == 3, ]
plottingdata_grp4 <- plottingdata[plottingdata$year_built_grp == 4, ]

ggplot(plottingdata_grp1, aes(x = as.factor(year), y = mean_dlog_sale_amount)) +
  geom_bar(stat = "identity") +
  labs(title = "Mean of log of deflated sale amounts over time: very old housing",
       x = "Year",
       y = "Mean of log of deflated sale amounts") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(plottingdata_grp2, aes(x = as.factor(year), y = mean_dlog_sale_amount)) +
  geom_bar(stat = "identity") +
  labs(title = "Mean of log of deflated sale amounts over time: quite old housing",
       x = "Year",
       y = "Mean of log of deflated sale amounts") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(plottingdata_grp3, aes(x = as.factor(year), y = mean_dlog_sale_amount)) +
  geom_bar(stat = "identity") +
  labs(title = "Mean of log of deflated sale amounts over time: quite new housing",
       x = "Year",
       y = "Mean of log of deflated sale amounts") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(plottingdata_grp4, aes(x = as.factor(year), y = mean_dlog_sale_amount)) +
  geom_bar(stat = "identity") +
  labs(title = "Mean of log of deflated sale amounts over time: very new housing",
       x = "Year",
       y = "Mean of log of deflated sale amounts") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# DISCUSSION: The total violent crime rates generally decreased over time, but the general sale amounts did not change much over time in each group. The sales amount of the quite new houses are somewhat higher than the other houses.

PROBLEM 4. Summarizing the data

empiricsdata <- subset(cleanhousingcrimedata, select = c(sale_amount, dlog_sale_amount, 
                                                         violent, murder, rape, robbery, assault, property, burglary, larceny, 
                                                         bedrooms_all_buildings, number_of_bathrooms, number_of_fireplaces, stories_number, land_square_footage, AC_presence, 
                                                         division, year_built_grp))

summary(empiricsdata)
##   sale_amount        dlog_sale_amount    violent          murder     
##  Min.   :1.000e+02   Min.   : 4.39    Min.   :361.6   Min.   :4.400  
##  1st Qu.:1.800e+05   1st Qu.:12.07    1st Qu.:369.1   1st Qu.:4.700  
##  Median :2.870e+05   Median :12.53    Median :383.8   Median :5.000  
##  Mean   :4.417e+05   Mean   :12.57    Mean   :384.7   Mean   :4.951  
##  3rd Qu.:4.740e+05   3rd Qu.:13.03    3rd Qu.:387.1   3rd Qu.:5.300  
##  Max.   :2.730e+09   Max.   :21.64    Max.   :458.6   Max.   :5.400  
##       rape          robbery         assault         property       burglary    
##  Min.   :25.90   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.10   1st Qu.: 98.6   1st Qu.:238.1   1st Qu.:2363   1st Qu.:429.7  
##  Median :29.80   Median :102.2   Median :248.3   Median :2500   Median :494.7  
##  Mean   :28.89   Mean   :104.2   Mean   :246.6   Mean   :2561   Mean   :524.2  
##  3rd Qu.:30.00   3rd Qu.:113.1   3rd Qu.:250.2   3rd Qu.:2868   3rd Qu.:672.2  
##  Max.   :31.00   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   :  1.000        Min.   :  0.010     Min.   :  1.00      
##  1st Qu.:1696   1st Qu.:  3.000        1st Qu.:  2.000     1st Qu.:  1.00      
##  Median :1784   Median :  3.000        Median :  2.000     Median :  1.00      
##  Mean   :1802   Mean   :  3.431        Mean   :  2.472     Mean   :  1.19      
##  3rd Qu.:1965   3rd Qu.:  4.000        3rd Qu.:  3.000     3rd Qu.:  1.00      
##  Max.   :2166   Max.   :999.000        Max.   :450.000     Max.   :999.00      
##  stories_number    land_square_footage  AC_presence       division        
##  Min.   :  0.500   Min.   :        1   Min.   :0.0000   Length:2350548    
##  1st Qu.:  1.000   1st Qu.:     6874   1st Qu.:0.0000   Class :character  
##  Median :  1.000   Median :     9583   Median :1.0000   Mode  :character  
##  Mean   :  1.371   Mean   :    39671   Mean   :0.6091                     
##  3rd Qu.:  2.000   3rd Qu.:    17031   3rd Qu.:1.0000                     
##  Max.   :175.000   Max.   :999999999   Max.   :1.0000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :3.000  
##  Mean   :2.679  
##  3rd Qu.:4.000  
##  Max.   :4.000
tapply(empiricsdata, empiricsdata$division, summary)
## $DV_ENC
##   sale_amount       dlog_sale_amount    violent          murder     
##  Min.   :     500   Min.   : 6.211   Min.   :361.6   Min.   :4.400  
##  1st Qu.:  151000   1st Qu.:11.885   1st Qu.:369.1   1st Qu.:4.800  
##  Median :  205000   Median :12.203   Median :383.8   Median :5.000  
##  Mean   :  270018   Mean   :12.225   Mean   :386.9   Mean   :4.984  
##  3rd Qu.:  281750   3rd Qu.:12.539   3rd Qu.:387.8   3rd Qu.:5.300  
##  Max.   :31000000   Max.   :17.201   Max.   :458.6   Max.   :5.400  
##       rape          robbery         assault         property       burglary    
##  Min.   :25.90   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.70   1st Qu.: 86.1   1st Qu.:241.5   1st Qu.:2210   1st Qu.:378.0  
##  Median :29.80   Median :102.2   Median :248.3   Median :2500   Median :494.7  
##  Mean   :29.05   Mean   :104.8   Mean   :248.1   Mean   :2564   Mean   :522.7  
##  3rd Qu.:30.00   3rd Qu.:113.1   3rd Qu.:250.2   3rd Qu.:2868   3rd Qu.:672.2  
##  Max.   :31.00   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   : 1.000         Min.   : 1.000      Min.   : 1.000      
##  1st Qu.:1602   1st Qu.: 3.000         1st Qu.: 2.000      1st Qu.: 1.000      
##  Median :1784   Median : 3.000         Median : 2.500      Median : 1.000      
##  Mean   :1804   Mean   : 3.294         Mean   : 2.338      Mean   : 1.084      
##  3rd Qu.:1965   3rd Qu.: 4.000         3rd Qu.: 3.000      3rd Qu.: 1.000      
##  Max.   :2166   Max.   :30.000         Max.   :22.500      Max.   :11.000      
##  stories_number  land_square_footage  AC_presence       division        
##  Min.   :1.000   Min.   :     44     Min.   :0.0000   Length:6924       
##  1st Qu.:1.000   1st Qu.:   7777     1st Qu.:1.0000   Class :character  
##  Median :1.500   Median :  10200     Median :1.0000   Mode  :character  
##  Mean   :1.535   Mean   :  33879     Mean   :0.7595                     
##  3rd Qu.:2.000   3rd Qu.:  18901     3rd Qu.:1.0000                     
##  Max.   :5.000   Max.   :1711908     Max.   :1.0000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :3.000  
##  Mean   :2.625  
##  3rd Qu.:4.000  
##  Max.   :4.000  
## 
## $DV_ESC
##   sale_amount       dlog_sale_amount    violent          murder     
##  Min.   :     500   Min.   : 6.001   Min.   :361.6   Min.   :4.400  
##  1st Qu.:  148465   1st Qu.:11.824   1st Qu.:366.7   1st Qu.:4.900  
##  Median :  212900   Median :12.155   Median :370.4   Median :5.000  
##  Mean   :  259805   Mean   :12.110   Mean   :380.1   Mean   :4.983  
##  3rd Qu.:  289200   3rd Qu.:12.482   3rd Qu.:386.6   3rd Qu.:5.000  
##  Max.   :80240000   Max.   :18.096   Max.   :458.6   Max.   :5.400  
##       rape          robbery          assault         property   
##  Min.   :25.90   Min.   : 81.60   Min.   :229.2   Min.   :2110  
##  1st Qu.:28.40   1st Qu.: 86.10   1st Qu.:242.8   1st Qu.:2210  
##  Median :29.90   Median : 98.60   Median :248.3   Median :2363  
##  Mean   :29.34   Mean   : 98.46   Mean   :247.3   Mean   :2440  
##  3rd Qu.:30.70   3rd Qu.:102.90   3rd Qu.:250.2   3rd Qu.:2574  
##  Max.   :31.00   Max.   :145.90   Max.   :277.5   Max.   :3215  
##     burglary        larceny     bedrooms_all_buildings number_of_bathrooms
##  Min.   :340.5   Min.   :1550   Min.   : 1.000         Min.   : 0.500     
##  1st Qu.:378.0   1st Qu.:1602   1st Qu.: 3.000         1st Qu.: 2.000     
##  Median :429.7   Median :1696   Median : 3.000         Median : 2.100     
##  Mean   :472.9   Mean   :1735   Mean   : 3.434         Mean   : 2.528     
##  3rd Qu.:537.2   3rd Qu.:1822   3rd Qu.: 4.000         3rd Qu.: 3.000     
##  Max.   :733.0   Max.   :2166   Max.   :13.000         Max.   :11.500     
##  number_of_fireplaces stories_number  land_square_footage  AC_presence    
##  Min.   : 1.000       Min.   :1.000   Min.   :     436    Min.   :0.0000  
##  1st Qu.: 1.000       1st Qu.:1.000   1st Qu.:    9888    1st Qu.:0.0000  
##  Median : 1.000       Median :1.000   Median :   15216    Median :1.0000  
##  Mean   : 1.069       Mean   :1.393   Mean   :   85106    Mean   :0.5625  
##  3rd Qu.: 1.000       3rd Qu.:2.000   3rd Qu.:   26180    3rd Qu.:1.0000  
##  Max.   :11.000       Max.   :4.000   Max.   :16984697    Max.   :1.0000  
##    division         year_built_grp 
##  Length:17793       Min.   :1.000  
##  Class :character   1st Qu.:3.000  
##  Mode  :character   Median :4.000  
##                     Mean   :3.214  
##                     3rd Qu.:4.000  
##                     Max.   :4.000  
## 
## $DV_MA
##   sale_amount       dlog_sale_amount    violent          murder     
##  Min.   :    1000   Min.   : 6.812   Min.   :361.6   Min.   :4.400  
##  1st Qu.:  100000   1st Qu.:11.528   1st Qu.:369.1   1st Qu.:4.700  
##  Median :  160000   Median :12.004   Median :383.8   Median :5.000  
##  Mean   :  209782   Mean   :11.958   Mean   :384.3   Mean   :4.962  
##  3rd Qu.:  253000   3rd Qu.:12.454   3rd Qu.:387.1   3rd Qu.:5.300  
##  Max.   :50000000   Max.   :17.663   Max.   :458.6   Max.   :5.400  
##       rape          robbery         assault         property       burglary    
##  Min.   :25.90   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.10   1st Qu.: 98.6   1st Qu.:238.1   1st Qu.:2363   1st Qu.:429.7  
##  Median :29.80   Median :102.2   Median :248.3   Median :2500   Median :494.7  
##  Mean   :28.95   Mean   :103.7   Mean   :246.7   Mean   :2547   Mean   :518.0  
##  3rd Qu.:30.00   3rd Qu.:113.1   3rd Qu.:250.2   3rd Qu.:2868   3rd Qu.:672.2  
##  Max.   :31.00   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   : 1.000         Min.   : 0.500      Min.   : 1.000      
##  1st Qu.:1696   1st Qu.: 3.000         1st Qu.: 1.000      1st Qu.: 1.000      
##  Median :1784   Median : 3.000         Median : 2.000      Median : 1.000      
##  Mean   :1795   Mean   : 3.268         Mean   : 1.973      Mean   : 1.108      
##  3rd Qu.:1965   3rd Qu.: 4.000         3rd Qu.: 2.500      3rd Qu.: 1.000      
##  Max.   :2166   Max.   :30.000         Max.   :11.000      Max.   :20.000      
##  stories_number  land_square_footage  AC_presence       division        
##  Min.   :0.910   Min.   :     44     Min.   :0.0000   Length:25401      
##  1st Qu.:1.000   1st Qu.:  11761     1st Qu.:0.0000   Class :character  
##  Median :1.500   Median :  20473     Median :0.0000   Mode  :character  
##  Mean   :1.566   Mean   :  42832     Mean   :0.1528                     
##  3rd Qu.:2.000   3rd Qu.:  43560     3rd Qu.:0.0000                     
##  Max.   :5.000   Max.   :4856940     Max.   :1.0000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:1.000  
##  Median :2.000  
##  Mean   :2.374  
##  3rd Qu.:3.000  
##  Max.   :4.000  
## 
## $DV_MT
##   sale_amount       dlog_sale_amount    violent          murder    
##  Min.   :     500   Min.   : 6.098   Min.   :361.6   Min.   :4.40  
##  1st Qu.:  255100   1st Qu.:12.440   1st Qu.:369.1   1st Qu.:4.70  
##  Median :  360000   Median :12.745   Median :373.7   Median :5.00  
##  Mean   :  483744   Mean   :12.761   Mean   :382.6   Mean   :4.96  
##  3rd Qu.:  492600   3rd Qu.:13.045   3rd Qu.:386.6   3rd Qu.:5.30  
##  Max.   :82000000   Max.   :18.003   Max.   :458.6   Max.   :5.40  
##       rape          robbery         assault         property       burglary    
##  Min.   :25.90   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.10   1st Qu.: 86.1   1st Qu.:238.1   1st Qu.:2210   1st Qu.:378.0  
##  Median :29.90   Median :102.2   Median :248.3   Median :2452   Median :468.9  
##  Mean   :28.98   Mean   :102.5   Mean   :246.1   Mean   :2525   Mean   :508.8  
##  3rd Qu.:30.70   3rd Qu.:109.0   3rd Qu.:250.2   3rd Qu.:2734   3rd Qu.:610.5  
##  Max.   :31.00   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   :  1.000        Min.   : 0.300      Min.   :  1.000     
##  1st Qu.:1602   1st Qu.:  3.000        1st Qu.: 2.000      1st Qu.:  1.000     
##  Median :1745   Median :  3.000        Median : 3.000      Median :  1.000     
##  Mean   :1783   Mean   :  3.502        Mean   : 2.799      Mean   :  1.218     
##  3rd Qu.:1902   3rd Qu.:  4.000        3rd Qu.: 3.250      3rd Qu.:  1.000     
##  Max.   :2166   Max.   :372.000        Max.   :77.500      Max.   :999.000     
##  stories_number   land_square_footage  AC_presence       division        
##  Min.   : 1.000   Min.   :        1   Min.   :0.0000   Length:258690     
##  1st Qu.: 1.000   1st Qu.:     6534   1st Qu.:0.0000   Class :character  
##  Median : 2.000   Median :     8625   Median :0.0000   Mode  :character  
##  Mean   : 1.554   Mean   :    44919   Mean   :0.3339                     
##  3rd Qu.: 2.000   3rd Qu.:    14269   3rd Qu.:1.0000                     
##  Max.   :10.000   Max.   :104633298   Max.   :1.0000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:3.000  
##  Median :3.000  
##  Mean   :3.112  
##  3rd Qu.:4.000  
##  Max.   :4.000  
## 
## $DV_PAC
##   sale_amount        dlog_sale_amount    violent          murder     
##  Min.   :      280   Min.   : 5.369   Min.   :361.6   Min.   :4.400  
##  1st Qu.:   244000   1st Qu.:12.386   1st Qu.:369.1   1st Qu.:4.700  
##  Median :   400000   Median :12.900   Median :383.8   Median :5.000  
##  Mean   :   598281   Mean   :12.928   Mean   :386.4   Mean   :4.942  
##  3rd Qu.:   670000   3rd Qu.:13.401   3rd Qu.:387.8   3rd Qu.:5.300  
##  Max.   :288164568   Max.   :19.209   Max.   :458.6   Max.   :5.400  
##       rape          robbery         assault         property       burglary    
##  Min.   :25.90   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.10   1st Qu.: 98.6   1st Qu.:241.5   1st Qu.:2363   1st Qu.:429.7  
##  Median :29.10   Median :102.9   Median :248.3   Median :2500   Median :494.7  
##  Mean   :28.79   Mean   :105.7   Mean   :246.9   Mean   :2593   Mean   :538.0  
##  3rd Qu.:30.00   3rd Qu.:113.9   3rd Qu.:250.2   3rd Qu.:2905   3rd Qu.:701.0  
##  Max.   :31.00   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   :  1.000        Min.   :  0.010     Min.   :  1.000     
##  1st Qu.:1696   1st Qu.:  3.000        1st Qu.:  2.000     1st Qu.:  1.000     
##  Median :1784   Median :  3.000        Median :  2.000     Median :  1.000     
##  Mean   :1820   Mean   :  3.351        Mean   :  2.284     Mean   :  1.111     
##  3rd Qu.:1974   3rd Qu.:  4.000        3rd Qu.:  2.500     3rd Qu.:  1.000     
##  Max.   :2166   Max.   :999.000        Max.   :175.000     Max.   :198.000     
##  stories_number    land_square_footage  AC_presence      division        
##  Min.   :  1.000   Min.   :        1   Min.   :0.000   Length:950325     
##  1st Qu.:  1.000   1st Qu.:     6098   1st Qu.:0.000   Class :character  
##  Median :  1.000   Median :     7700   Median :1.000   Mode  :character  
##  Mean   :  1.313   Mean   :    31139   Mean   :0.504                     
##  3rd Qu.:  2.000   3rd Qu.:    11480   3rd Qu.:1.000                     
##  Max.   :175.000   Max.   :304920000   Max.   :1.000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:1.000  
##  Median :2.000  
##  Mean   :2.211  
##  3rd Qu.:3.000  
##  Max.   :4.000  
## 
## $DV_SA
##   sale_amount        dlog_sale_amount    violent          murder     
##  Min.   :      100   Min.   : 4.39    Min.   :361.6   Min.   :4.400  
##  1st Qu.:   144000   1st Qu.:11.89    1st Qu.:369.1   1st Qu.:4.700  
##  Median :   220000   Median :12.28    Median :373.7   Median :5.000  
##  Mean   :   332789   Mean   :12.30    Mean   :382.7   Mean   :4.962  
##  3rd Qu.:   340000   3rd Qu.:12.73    3rd Qu.:386.6   3rd Qu.:5.300  
##  Max.   :600000000   Max.   :20.00    Max.   :458.6   Max.   :5.400  
##       rape         robbery         assault         property       burglary    
##  Min.   :25.9   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.1   1st Qu.: 86.1   1st Qu.:238.1   1st Qu.:2210   1st Qu.:378.0  
##  Median :29.9   Median :102.2   Median :248.3   Median :2452   Median :468.9  
##  Mean   :29.0   Mean   :102.4   Mean   :246.3   Mean   :2523   Mean   :508.1  
##  3rd Qu.:30.7   3rd Qu.:109.0   3rd Qu.:250.2   3rd Qu.:2734   3rd Qu.:610.5  
##  Max.   :31.0   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   :  1.000        Min.   :  0.100     Min.   :  1.000     
##  1st Qu.:1602   1st Qu.:  3.000        1st Qu.:  2.000     1st Qu.:  1.000     
##  Median :1745   Median :  3.000        Median :  2.500     Median :  1.000     
##  Mean   :1782   Mean   :  3.483        Mean   :  2.615     Mean   :  1.339     
##  3rd Qu.:1902   3rd Qu.:  4.000        3rd Qu.:  3.000     3rd Qu.:  1.000     
##  Max.   :2166   Max.   :164.000        Max.   :450.000     Max.   :998.000     
##  stories_number   land_square_footage  AC_presence       division        
##  Min.   : 0.500   Min.   :        2   Min.   :0.0000   Length:624384     
##  1st Qu.: 1.000   1st Qu.:     9360   1st Qu.:1.0000   Class :character  
##  Median : 1.000   Median :    14375   Median :1.0000   Mode  :character  
##  Mean   : 1.379   Mean   :    32888   Mean   :0.7522                     
##  3rd Qu.: 2.000   3rd Qu.:    24394   3rd Qu.:1.0000                     
##  Max.   :21.000   Max.   :130592880   Max.   :1.0000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :3.000  
##  Mean   :2.907  
##  3rd Qu.:4.000  
##  Max.   :4.000  
## 
## $DV_WNC
##   sale_amount        dlog_sale_amount    violent          murder     
##  Min.   :4.850e+02   Min.   : 5.946   Min.   :361.6   Min.   :4.400  
##  1st Qu.:1.500e+05   1st Qu.:11.857   1st Qu.:369.1   1st Qu.:4.700  
##  Median :2.250e+05   Median :12.248   Median :373.7   Median :5.000  
##  Mean   :3.114e+05   Mean   :12.213   Mean   :384.5   Mean   :4.954  
##  3rd Qu.:3.300e+05   3rd Qu.:12.630   3rd Qu.:387.1   3rd Qu.:5.300  
##  Max.   :2.730e+09   Max.   :21.640   Max.   :458.6   Max.   :5.400  
##       rape         robbery         assault         property       burglary    
##  Min.   :25.9   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.1   1st Qu.: 98.6   1st Qu.:238.1   1st Qu.:2363   1st Qu.:429.7  
##  Median :29.8   Median :102.2   Median :248.3   Median :2500   Median :494.7  
##  Mean   :28.9   Mean   :104.0   Mean   :246.5   Mean   :2556   Mean   :522.0  
##  3rd Qu.:30.0   3rd Qu.:113.1   3rd Qu.:250.2   3rd Qu.:2868   3rd Qu.:672.2  
##  Max.   :31.0   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   :  1.000        Min.   : 0.250      Min.   : 1.000      
##  1st Qu.:1696   1st Qu.:  3.000        1st Qu.: 2.000      1st Qu.: 1.000      
##  Median :1784   Median :  3.000        Median : 2.500      Median : 1.000      
##  Mean   :1800   Mean   :  3.301        Mean   : 2.595      Mean   : 1.193      
##  3rd Qu.:1965   3rd Qu.:  4.000        3rd Qu.: 3.000      3rd Qu.: 1.000      
##  Max.   :2166   Max.   :440.000        Max.   :32.000      Max.   :12.000      
##  stories_number   land_square_footage  AC_presence       division        
##  Min.   : 1.000   Min.   :        4   Min.   :0.0000   Length:106834     
##  1st Qu.: 1.000   1st Qu.:     8836   1st Qu.:0.0000   Class :character  
##  Median : 1.000   Median :    12413   Median :0.0000   Mode  :character  
##  Mean   : 1.367   Mean   :    86432   Mean   :0.2803                     
##  3rd Qu.: 2.000   3rd Qu.:    24829   3rd Qu.:1.0000                     
##  Max.   :12.000   Max.   :710899200   Max.   :1.0000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :3.000  
##  Mean   :2.656  
##  3rd Qu.:4.000  
##  Max.   :4.000  
## 
## $DV_WSC
##   sale_amount        dlog_sale_amount    violent          murder     
##  Min.   :      101   Min.   : 4.518   Min.   :361.6   Min.   :4.400  
##  1st Qu.:   149000   1st Qu.:11.767   1st Qu.:369.1   1st Qu.:4.700  
##  Median :   215000   Median :12.119   Median :383.8   Median :5.000  
##  Mean   :   254375   Mean   :12.102   Mean   :385.7   Mean   :4.945  
##  3rd Qu.:   314366   3rd Qu.:12.484   3rd Qu.:387.1   3rd Qu.:5.300  
##  Max.   :166250000   Max.   :18.929   Max.   :458.6   Max.   :5.400  
##       rape          robbery         assault         property       burglary    
##  Min.   :25.90   Min.   : 81.6   Min.   :229.2   Min.   :2110   Min.   :340.5  
##  1st Qu.:27.10   1st Qu.: 98.6   1st Qu.:238.1   1st Qu.:2363   1st Qu.:429.7  
##  Median :29.80   Median :102.2   Median :248.2   Median :2500   Median :494.7  
##  Mean   :28.84   Mean   :105.0   Mean   :246.9   Mean   :2577   Mean   :530.5  
##  3rd Qu.:30.00   3rd Qu.:113.1   3rd Qu.:250.2   3rd Qu.:2868   3rd Qu.:672.2  
##  Max.   :31.00   Max.   :145.9   Max.   :277.5   Max.   :3215   Max.   :733.0  
##     larceny     bedrooms_all_buildings number_of_bathrooms number_of_fireplaces
##  Min.   :1550   Min.   :  1.000        Min.   :  0.500     Min.   : 1.000      
##  1st Qu.:1696   1st Qu.:  3.000        1st Qu.:  2.000     1st Qu.: 1.000      
##  Median :1784   Median :  3.000        Median :  2.000     Median : 1.000      
##  Mean   :1811   Mean   :  3.555        Mean   :  2.486     Mean   : 1.129      
##  3rd Qu.:1965   3rd Qu.:  4.000        3rd Qu.:  3.000     3rd Qu.: 1.000      
##  Max.   :2166   Max.   :999.000        Max.   :205.000     Max.   :13.000      
##  stories_number  land_square_footage  AC_presence       division        
##  Min.   :1.000   Min.   :        1   Min.   :0.0000   Length:360197     
##  1st Qu.:1.000   1st Qu.:     7220   1st Qu.:1.0000   Class :character  
##  Median :1.000   Median :     9091   Median :1.0000   Mode  :character  
##  Mean   :1.365   Mean   :    53944   Mean   :0.9651                     
##  3rd Qu.:2.000   3rd Qu.:    13445   3rd Qu.:1.0000                     
##  Max.   :4.000   Max.   :999999999   Max.   :1.0000                     
##  year_built_grp 
##  Min.   :1.000  
##  1st Qu.:3.000  
##  Median :3.000  
##  Mean   :3.213  
##  3rd Qu.:4.000  
##  Max.   :4.000

PROBLEM 5. Running regressions

# Generate the dependent variable
cleanhousingcrimedata$dep_var <- log(cleanhousingcrimedata$sale_amount / cleanhousingcrimedata$hpi)

# Estimate three different models
model1 <- lm(dep_var ~ violent + 
             bedrooms_all_buildings + number_of_bathrooms + number_of_fireplaces + stories_number + land_square_footage + AC_presence, 
             data = cleanhousingcrimedata)
summary(model1) # Preferred model
## 
## Call:
## lm(formula = dep_var ~ violent + bedrooms_all_buildings + number_of_bathrooms + 
##     number_of_fireplaces + stories_number + land_square_footage + 
##     AC_presence, data = cleanhousingcrimedata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -133.968   -0.419   -0.045    0.403    9.374 
## 
## Coefficients:
##                          Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)             7.176e+00  8.101e-03  885.851   <2e-16 ***
## violent                 1.702e-04  2.047e-05    8.317   <2e-16 ***
## bedrooms_all_buildings  2.305e-02  3.374e-04   68.328   <2e-16 ***
## number_of_bathrooms     3.020e-01  5.627e-04  536.706   <2e-16 ***
## number_of_fireplaces    6.984e-05  6.994e-05    0.999    0.318    
## stories_number          6.241e-02  1.048e-03   59.573   <2e-16 ***
## land_square_footage    -6.413e-12  2.242e-10   -0.029    0.977    
## AC_presence            -3.165e-01  1.009e-03 -313.676   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7511 on 2350540 degrees of freedom
## Multiple R-squared:  0.1746, Adjusted R-squared:  0.1746 
## F-statistic: 7.103e+04 on 7 and 2350540 DF,  p-value: < 2.2e-16
model2 <- lm(dep_var ~ murder + rape + robbery + assault + property + burglary + larceny + motor, 
             data = cleanhousingcrimedata)
summary(model2)
## 
## Call:
## lm(formula = dep_var ~ murder + rape + robbery + assault + property + 
##     burglary + larceny + motor, data = cleanhousingcrimedata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1060 -0.4950 -0.0360  0.4662  9.0704 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.1526350  0.2069650  34.560  < 2e-16 ***
## murder       0.0826872  0.0069346  11.924  < 2e-16 ***
## rape         0.0152382  0.0013705  11.118  < 2e-16 ***
## robbery     -0.0070928  0.0010279  -6.900 5.19e-12 ***
## assault     -0.0038842  0.0001839 -21.124  < 2e-16 ***
## property    -0.0140170  0.0295511  -0.474    0.635    
## burglary     0.0140722  0.0294747   0.477    0.633    
## larceny      0.0148963  0.0296853   0.502    0.616    
## motor        0.0141916  0.0295732   0.480    0.631    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8259 on 2350539 degrees of freedom
## Multiple R-squared:  0.002052,   Adjusted R-squared:  0.002048 
## F-statistic: 604.1 on 8 and 2350539 DF,  p-value: < 2.2e-16
model3 <- lm(dep_var ~ murder + rape + robbery + assault + property + burglary + larceny + motor + 
             bedrooms_all_buildings + number_of_bathrooms + number_of_fireplaces + stories_number + land_square_footage + AC_presence, 
             data = cleanhousingcrimedata)
summary(model3)
## 
## Call:
## lm(formula = dep_var ~ murder + rape + robbery + assault + property + 
##     burglary + larceny + motor + bedrooms_all_buildings + number_of_bathrooms + 
##     number_of_fireplaces + stories_number + land_square_footage + 
##     AC_presence, data = cleanhousingcrimedata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -133.750   -0.417   -0.043    0.403    9.374 
## 
## Coefficients:
##                          Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)             6.799e+00  1.880e-01   36.155  < 2e-16 ***
## murder                  4.930e-02  6.301e-03    7.825 5.08e-15 ***
## rape                    1.599e-02  1.245e-03   12.844  < 2e-16 ***
## robbery                -4.077e-03  9.339e-04   -4.366 1.27e-05 ***
## assault                -3.180e-03  1.671e-04  -19.036  < 2e-16 ***
## property                4.811e-02  2.685e-02    1.792   0.0732 .  
## burglary               -4.785e-02  2.678e-02   -1.787   0.0740 .  
## larceny                -4.764e-02  2.697e-02   -1.766   0.0773 .  
## motor                  -4.821e-02  2.687e-02   -1.794   0.0728 .  
## bedrooms_all_buildings  2.299e-02  3.371e-04   68.202  < 2e-16 ***
## number_of_bathrooms     3.015e-01  5.622e-04  536.340  < 2e-16 ***
## number_of_fireplaces    7.321e-05  6.987e-05    1.048   0.2947    
## stories_number          6.301e-02  1.047e-03   60.208  < 2e-16 ***
## land_square_footage     4.710e-12  2.240e-10    0.021   0.9832    
## AC_presence            -3.161e-01  1.008e-03 -313.513  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7504 on 2350533 degrees of freedom
## Multiple R-squared:  0.1762, Adjusted R-squared:  0.1762 
## F-statistic: 3.591e+04 on 14 and 2350533 DF,  p-value: < 2.2e-16
# Estimate the preferred model with regional data
df_NE <- cleanhousingcrimedata[cleanhousingcrimedata$division == "DV_NE" | cleanhousingcrimedata$division == "DV_MA", ]
model3_NE <- lm(dep_var ~ violent + 
               bedrooms_all_buildings + number_of_bathrooms + number_of_fireplaces + stories_number + land_square_footage + AC_presence, 
             data = df_NE)
summary(model3_NE)
## 
## Call:
## lm(formula = dep_var ~ violent + bedrooms_all_buildings + number_of_bathrooms + 
##     number_of_fireplaces + stories_number + land_square_footage + 
##     AC_presence, data = df_NE)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3308 -0.3071  0.1174  0.4276  5.2193 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            5.588e+00  7.120e-02  78.482  < 2e-16 ***
## violent                8.561e-04  1.783e-04   4.801 1.59e-06 ***
## bedrooms_all_buildings 1.251e-01  5.676e-03  22.035  < 2e-16 ***
## number_of_bathrooms    3.049e-01  6.256e-03  48.735  < 2e-16 ***
## number_of_fireplaces   1.366e-01  1.039e-02  13.155  < 2e-16 ***
## stories_number         1.076e-01  9.600e-03  11.212  < 2e-16 ***
## land_square_footage    5.634e-07  3.532e-08  15.952  < 2e-16 ***
## AC_presence            5.302e-01  1.196e-02  44.327  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6722 on 25393 degrees of freedom
## Multiple R-squared:  0.3032, Adjusted R-squared:  0.303 
## F-statistic:  1579 on 7 and 25393 DF,  p-value: < 2.2e-16
df_MW <- cleanhousingcrimedata[cleanhousingcrimedata$division == "DV_ENC" | cleanhousingcrimedata$division == "DV_WNC", ]
model3_MW <- lm(dep_var ~ violent + 
               bedrooms_all_buildings + number_of_bathrooms + number_of_fireplaces + stories_number + land_square_footage + AC_presence, 
             data = df_MW)
summary(model3_MW)
## 
## Call:
## lm(formula = dep_var ~ violent + bedrooms_all_buildings + number_of_bathrooms + 
##     number_of_fireplaces + stories_number + land_square_footage + 
##     AC_presence, data = df_MW)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.5318 -0.2277  0.0399  0.3126  9.6270 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             6.329e+00  3.173e-02 199.451  < 2e-16 ***
## violent                 3.173e-04  7.985e-05   3.974 7.09e-05 ***
## bedrooms_all_buildings  6.369e-03  1.195e-03   5.329 9.89e-08 ***
## number_of_bathrooms     2.817e-01  2.085e-03 135.152  < 2e-16 ***
## number_of_fireplaces    2.754e-01  4.471e-03  61.600  < 2e-16 ***
## stories_number          4.909e-02  4.218e-03  11.639  < 2e-16 ***
## land_square_footage    -2.290e-10  5.264e-10  -0.435    0.664    
## AC_presence             4.842e-02  4.239e-03  11.423  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6467 on 113750 degrees of freedom
## Multiple R-squared:  0.2422, Adjusted R-squared:  0.2421 
## F-statistic:  5193 on 7 and 113750 DF,  p-value: < 2.2e-16
df_S <- cleanhousingcrimedata[cleanhousingcrimedata$division == "DV_SA" | cleanhousingcrimedata$division == "DV_ESC" | cleanhousingcrimedata$division == "DV_WSC", ]
model3_S <- lm(dep_var ~ violent + 
                      bedrooms_all_buildings + number_of_bathrooms + number_of_fireplaces + stories_number + land_square_footage + AC_presence, 
                    data = df_S)
summary(model3_S)
## 
## Call:
## lm(formula = dep_var ~ violent + bedrooms_all_buildings + number_of_bathrooms + 
##     number_of_fireplaces + stories_number + land_square_footage + 
##     AC_presence, data = df_S)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -127.778   -0.288    0.019    0.322    8.014 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             6.653e+00  1.129e-02 589.461   <2e-16 ***
## violent                -4.903e-04  2.858e-05 -17.156   <2e-16 ***
## bedrooms_all_buildings  2.821e-02  4.159e-04  67.830   <2e-16 ***
## number_of_bathrooms     2.882e-01  7.206e-04 399.995   <2e-16 ***
## number_of_fireplaces    1.904e-05  6.338e-05   0.300    0.764    
## stories_number          2.106e-01  1.524e-03 138.199   <2e-16 ***
## land_square_footage    -5.184e-11  2.315e-10  -0.224    0.823    
## AC_presence             3.289e-02  1.774e-03  18.539   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6736 on 1002366 degrees of freedom
## Multiple R-squared:  0.2376, Adjusted R-squared:  0.2376 
## F-statistic: 4.462e+04 on 7 and 1002366 DF,  p-value: < 2.2e-16
df_W <- cleanhousingcrimedata[cleanhousingcrimedata$division == "DV_MT" | cleanhousingcrimedata$division == "DV_PAC", ]
model3_W <- lm(dep_var ~ violent + 
                      bedrooms_all_buildings + number_of_bathrooms + number_of_fireplaces + stories_number + land_square_footage + AC_presence, 
                    data = df_W)
summary(model3_W)
## 
## Call:
## lm(formula = dep_var ~ violent + bedrooms_all_buildings + number_of_bathrooms + 
##     number_of_fireplaces + stories_number + land_square_footage + 
##     AC_presence, data = df_W)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -62.771  -0.441  -0.055   0.399   9.232 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             7.664e+00  9.905e-03  773.78  < 2e-16 ***
## violent                -2.639e-04  2.490e-05  -10.60  < 2e-16 ***
## bedrooms_all_buildings  9.686e-03  4.698e-04   20.62  < 2e-16 ***
## number_of_bathrooms     3.569e-01  7.672e-04  465.20  < 2e-16 ***
## number_of_fireplaces    1.178e-02  4.318e-04   27.29  < 2e-16 ***
## stories_number         -5.513e-02  1.240e-03  -44.47  < 2e-16 ***
## land_square_footage     3.893e-09  5.926e-10    6.57 5.04e-11 ***
## AC_presence            -2.159e-01  1.216e-03 -177.60  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6628 on 1209007 degrees of freedom
## Multiple R-squared:  0.1981, Adjusted R-squared:  0.1981 
## F-statistic: 4.267e+04 on 7 and 1209007 DF,  p-value: < 2.2e-16
df_effect_by_division <- read_csv("df_effect_by_division.csv", col_names = TRUE) # Result collected and manually put into an excel spreadsheet
## Rows: 9 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): region, division
## dbl (1): effect
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_murder_effect <- merge(df_abbr_to_division, df_effect_by_division, by = c("division"))
df_murder_effect$state <- df_murder_effect$abbr

# Map
plot_usmap(data = df_murder_effect, values = "effect", color = "red") +
  labs(title = "Variation of the effect of 1 unit increase in the murder rate on log of housing price") +
  scale_fill_continuous(name = "Effect of murder (log of deflated sale amount)") +
  theme(legend.position = "bottom")