library(AmesHousing)
set.seed(125)
ames <- make_ames()
ames %>% nrow
## [1] 2930
ames_split <- initial_split(ames, prop = .7, strata = "Sale_Price")
ames_train <- training(ames_split)
ames_test <- testing(ames_split)
mars <- earth(
Sale_Price ~ .,
data = ames_train
)
print(mars)
## Selected 39 of 45 terms, and 28 of 307 predictors
## Termination condition: RSq changed by less than 0.001 at 45 terms
## Importance: Gr_Liv_Area, Year_Built, Total_Bsmt_SF, ...
## Number of terms at each degree of interaction: 1 38 (additive model)
## GCV 541517521 RSS 1.030446e+12 GRSq 0.9114527 RSq 0.9178873
summary(mars) %>% .$coefficients %>% head(20)
## Sale_Price
## (Intercept) 225296.076852
## h(Gr_Liv_Area-2898) -609.321128
## h(2898-Gr_Liv_Area) -51.405211
## h(Year_Built-2003) 2085.245159
## h(2003-Year_Built) -465.791634
## h(Total_Bsmt_SF-2171) -625.781911
## h(2171-Total_Bsmt_SF) -34.308725
## h(1497-Bsmt_Unf_SF) 22.864377
## Overall_QualExcellent 93652.898756
## Overall_QualVery_Excellent 138705.262437
## Overall_QualVery_Good 43195.659866
## h(Kitchen_AbvGr-1) -19263.659879
## h(Second_Flr_SF-1407) 234.558336
## h(21780-Lot_Area) -1.807107
## Overall_QualGood 12971.548731
## h(Year_Remod_Add-1970) 513.165148
## h(1970-Year_Remod_Add) 408.113953
## h(Total_Bsmt_SF-2076) 577.301384
## NeighborhoodCrawford 24679.290321
## FunctionalTyp 16804.178157
MARS provide a convenient approach to capture the nonlinearity aspect of polynomial regression by assessing cutpoints (knots) similar to step functions.
plot(mars, which = 1)