# Load necessary libraries
library(sp)
library(spdep)
library(sf)
library(tmap)
library(spgwr)
library(tidyverse)
library(here)

Introduction

This report delves into the analysis of spatial variations in car prices within Maryland state. Leveraging a provided dataset encompassing vehicle information such as price, year, manufacturer, model, along with geographical coordinates (latitude and longitude), we employ Geographically Weighted Regression (GWR) to dissect how location intricately influences car prices across Maryland.

Data Exloration

Before conducting the analysis, it’s essential to understand the basic characteristics of the dataset. Let’s start by examining summary statistics:

# Load dataset
vehicles_md <- read.csv(here("r_data", "vehicles_md.csv"))

# Display summary statistics
head(vehicles_md)
##           id
## 1 7303577359
## 2 7311796139
## 3 7309980532
## 4 7306981944
## 5 7310314980
## 6 7309989721
##                                                                                          url
## 1            https://baltimore.craigslist.org/cto/d/baltimore-2021-temp-tags/7303577359.html
## 2 https://baltimore.craigslist.org/cto/d/white-hall-32-ford-all-steel-pickup/7311796139.html
## 3     https://smd.craigslist.org/cto/d/lexington-park-1934-ford-coupe-street/7309980532.html
## 4    https://annapolis.craigslist.org/cto/d/odenton-1935-chevy-coupe-hot-rod/7306981944.html
## 5  https://easternshore.craigslist.org/cto/d/pocomoke-city-1937-ford-hot-rod/7310314980.html
## 6      https://baltimore.craigslist.org/cto/d/parkville-1940-chevy-master-85/7309989721.html
##              region                          region_url price year manufacturer
## 1         baltimore    https://baltimore.craigslist.org   100 1901             
## 2         baltimore    https://baltimore.craigslist.org 21500 1932         ford
## 3 southern maryland          https://smd.craigslist.org 45000 1934         ford
## 4         annapolis    https://annapolis.craigslist.org 30000 1935    chevrolet
## 5     eastern shore https://easternshore.craigslist.org 37000 1937         ford
## 6         baltimore    https://baltimore.craigslist.org 10000 1940    chevrolet
##                 model condition   cylinders     fuel odometer title_status
## 1 rolls royce phantom                       electric     1000   parts only
## 2                     excellent 8 cylinders      gas     8000        clean
## 3               coupe excellent 8 cylinders      gas    43700        clean
## 4               coupe excellent 8 cylinders      gas     1560        clean
## 5               tudor           8 cylinders      gas    12345        clean
## 6           master 85      good 6 cylinders      gas    68000        clean
##   transmission        VIN drive      size   type paint_color
## 1       manual                                              
## 2    automatic              rwd           pickup       black
## 3    automatic 1812094476   rwd   compact  coupe       black
## 4    automatic              rwd                       orange
## 5    automatic                                              
## 6       manual              rwd full-size  sedan       brown
##                                                             image_url
## 1 https://images.craigslist.org/00I0I_hg8SJslUxpEz_0cU09Y_600x450.jpg
## 2 https://images.craigslist.org/00v0v_6CSlbqQHnfmz_0CI0pV_600x450.jpg
## 3 https://images.craigslist.org/00s0s_9Y6majth6mRz_0ak07K_600x450.jpg
## 4 https://images.craigslist.org/00h0h_9B7FuW71863z_0ak07K_600x450.jpg
## 5 https://images.craigslist.org/00p0p_lEsvgn2TSAlz_0CI0t2_600x450.jpg
## 6 https://images.craigslist.org/00808_cCPbSjpotO4z_0pO0nL_600x450.jpg
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             description
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             No hassle  Legal in all states Delivery upon request    show contact info
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Fresh 350-290 HP with Jeg's TH350 transmission 10 bolt rear with coil over ladder bars Build has less than 3,000 miles Built as period correct 60's rat rod Body channeled 8 inches over frame so not built for tall people Well sorted and reliable $21,500 or reasonable offer - no trades  Calls only - Rick at  show contact info
## 3 This ATTENTION-GETTING Classic Motor Coach was custom built in 2007 by Cars of the Past in Florida. “Wildfire” has been the star of many car shows and cruises; and was even on posters and calendars!  Garage kept her entire life.  The Ford Coupe has custom, hand-painted fire on beautiful black paint that is like a mirror, hence the name: “Wildfire”.  The interior is English black leather.    I traded a diesel pusher motorhome for this car; and although I like it a lot, it really isn’t my cup of tea.  I know someone out there really wants a custom street rod like this.  Here’s some highlights:  Suspension: Front Mustang 2" dropped spindle Rear Oldsmobile by GM 10" 411 gear posi-trac Strange Axle  Wheels: Weld Wheels; 14” up front; Rear 15" Hoosier tires 15"wide  Trans: 700 R4 Lokar Shifter w/lock-up torque converter  Engine: 355 CI SBC with performance cam and port and polish heads Pinkar Rods nickel plated crank P&J timing gears MSD ignition Edelbrock 650 CFM carb w/ e-choke Hedman shorty headers with 3” dual exhaust with flowmasters high volume oil pump 10mm Taylor plugs power windows, door openers and trunk Polished tilt steering column 15 gallon fuel cell  That’s the highlights so if you have any questions at all reach out and I will do my best to answer.  Asking $45,000. Serious inquiries only please. If this ad is still posted, then it is still available.
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   It’s a 1935 Chevy Coupe Hot Rod that had belonged to my father. It has a new ZZ4 crate engine, less than 1,600 miles as well as a new transmission and independent front end. I think the transmission is a turbo 400 but I don’t have any paperwork. Was completely rebuilt and I’ve put less than 150 miles on it over the past 14 years. There is no power steering, no power windows or A/C - this hot rod is all performance. I did replace the drivers door glass. The oil has been changed a few times and the brake fluid has been flushed and replaced. There is disc brakes on the new front end and drum brakes in the rear. The back is a trunk, it is not a jump seat.
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      1937 Ford hot rod all steel body Lt1 Corvette motor car is definitely a must see
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       1940 Chevy Master 85, runs drives and stops as it should. This is a 6 cylinder 3 speed on the tree mostly original car. Great shape for 81 years old. $10000.00
##   county state      lat      long             posting_date
## 1     NA    md 39.28560 -76.68990 2021-04-08T09:13:39-0400
## 2     NA    md 39.66180 -76.56660 2021-04-24T08:54:37-0400
## 3     NA    md 38.26285 -76.44373 2021-04-20T15:28:31-0400
## 4     NA    md 39.06545 -76.70517 2021-04-14T20:13:44-0400
## 5     NA    md 38.07140 -75.55500 2021-04-21T10:42:30-0400
## 6     NA    md 39.38760 -76.54180 2021-04-20T15:44:05-0400

This section loads the dataset containing information about vehicles in Maryland and displays the first few rows to provide an initial glimpse of the data.

Exploratory Data Analysis

Summary Statistics

The summary statistics provide an overview of the numerical variables in our dataset. This section computes summary statistics (such as mean, median, minimum, maximum, etc.) for numerical variables like price, year, and odometer reading to understand their distributions and characteristics.

# Summary statistics of numerical variables
summary(vehicles_md[, c("price", "year", "odometer")])
##      price             year         odometer      
##  Min.   :    25   Min.   :1901   Min.   :      0  
##  1st Qu.:  6500   1st Qu.:2008   1st Qu.:  29499  
##  Median : 16000   Median :2013   Median :  75000  
##  Mean   : 18942   Mean   :2011   Mean   :  89245  
##  3rd Qu.: 28990   3rd Qu.:2017   3rd Qu.: 133413  
##  Max.   :209995   Max.   :2022   Max.   :6481033

Distribution Plots

Here, histograms are plotted to visualize the distributions of price, year, and odometer reading. These plots provide insights into the spread and concentration of values within each variable.

# Histogram of Price
hist(vehicles_md$price, main = "Distribution of Price", xlab = "Price")

# Histogram of Year
hist(vehicles_md$year, main = "Distribution of Year", xlab = "Year")

# Set scientific notation for large values
options(scipen = 10)  # Change 10 to a suitable value depending on your preference

# Filter odometer data less than 1000000
filtered_odometer <- vehicles_md$odometer[vehicles_md$odometer < 1000000]

# Determine suitable breaks dynamically based on filtered data range
data_range <- range(filtered_odometer)
breaks <- seq(floor(data_range[1] / 10000) * 10000, ceiling(data_range[2] / 10000) * 10000, by = 10000)

# Histogram of filtered Odometer
hist(filtered_odometer, main = "Distribution of Odometer (Less than 1000000)", xlab = "Odometer Reading", breaks = breaks)

IN the following secton, scatter plots are created to examine the relationship between price and other variables (year and odometer reading). These plots help visualize any potential patterns or correlations between car prices and these attributes.

# Scatter plot of Price vs. Year
plot(vehicles_md$year, vehicles_md$price, main = "Price vs. Year", xlab = "Year", ylab = "Price")

# Filter data for Price vs. Odometer plot
filtered_odometer <- vehicles_md$odometer[vehicles_md$odometer < 1000000]
filtered_price <- vehicles_md$price[vehicles_md$odometer < 1000000]

# Plot filtered data
plot(filtered_odometer, filtered_price, main = "Price vs. Odometer (Odometer < 1000000)", 
     xlab = "Odometer", ylab = "Price")

Data Preprocessing

Before conducting the analysis, we need to preprocess the data by removing any missing values and outliers, and ensuring consistency in formatting. We will also filter the dataset to include only the observations from California and New York.

# Load necessary libraries
library(spdep)

# Convert data to spatial object
coordinates(vehicles_md) <- c("long", "lat")
proj4string(vehicles_md) <- CRS("+proj=longlat +datum=WGS84")

This code section selects an optimal bandwidth parameter for the GWR model. The bandwidth determines the spatial extent over which the influence of neighboring data points is considered when estimating regression coefficients.

GWRbandwidth <- gwr.sel(price ~ year + fuel + odometer, data = vehicles_md, adapt = TRUE)
## Adaptive q: 0.381966 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.618034 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.763932 CV score: 925086130305 
## Adaptive q: 0.854102 CV score: 931614020946 
## Adaptive q: 0.809017 CV score: 932641181299 
## Adaptive q: 0.7082039 CV score: 914554895648 
## Adaptive q: 0.6737621 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.7294902 CV score: 920152725868 
## Adaptive q: 0.6950483 CV score: 914154497982 
## Adaptive q: 0.6993722 CV score: 914190760891 
## Adaptive q: 0.6955306 CV score: 914167656789 
## Adaptive q: 0.6869177 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.6919427 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.6938621 CV score: 914115123813 
## Adaptive q: 0.6931289 CV score: 913693227526 
## Adaptive q: 0.6926758 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.693409 CV score: 913713462664 
## Adaptive q: 0.6932364 CV score: 913701610249 
## Adaptive q: 0.6929559 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.6930628 CV score: NA
## Warning in optimize(gwr.cv.adapt.f, lower = beta1, upper = beta2, maximum =
## FALSE, : NA/Inf replaced by maximum positive value
## Adaptive q: 0.69317 CV score: 913696679624 
## Adaptive q: 0.6931289 CV score: 913693227526
GWRbandwidth
## [1] 0.6931289
# Perform Geographically Weighted Regression
gwr_model <- gwr(price ~ year + fuel + odometer,
                 data = vehicles_md,
                 adapt = GWRbandwidth,
                 hatmatrix = TRUE,
                 se.fit = TRUE)
## Warning in proj4string(data): CRS object has comment, which is lost in output; in tests, see
## https://cran.r-project.org/web/packages/sp/vignettes/CRS_warnings.html
# Summary of GWR model
summary(gwr_model)
##           Length   Class                  Mode     
## SDF           4322 SpatialPointsDataFrame S4       
## lhat      18679684 -none-                 numeric  
## lm              11 -none-                 list     
## results         14 -none-                 list     
## bandwidth     4322 -none-                 numeric  
## adapt            1 -none-                 numeric  
## hatmatrix        1 -none-                 logical  
## gweight          1 -none-                 character
## gTSS             1 -none-                 numeric  
## this.call        6 -none-                 call     
## fp.given         1 -none-                 logical  
## timings         12 -none-                 numeric

Here, the GWR model is fitted using the selected bandwidth and the specified independent variables (year, fuel type, and odometer reading). The model estimates local regression coefficients for each location, allowing for spatially varying relationships between predictors and the response variable (car price).

# Extract residuals from GWR model
residuals <- resid(gwr_model)

# Plot spatial distribution of residuals
plot(vehicles_md, col = "lightgrey")  # Plot the spatial data (e.g., points)
plot(gwr_model$SDF, add = TRUE, pch = 20, col = "red")  # Plot residuals as points