library("tidyrules")
library("dplyr")
library("Cubist")
# ames housing data set
ames <- AmesHousing::make_ames()
head(ames)
## # A tibble: 6 × 81
## MS_SubClass MS_Zoning Lot_Frontage Lot_Area Street Alley Lot_Shape
## <fct> <fct> <dbl> <int> <fct> <fct> <fct>
## 1 One_Story_1946_and_New… Resident… 141 31770 Pave No_A… Slightly…
## 2 One_Story_1946_and_New… Resident… 80 11622 Pave No_A… Regular
## 3 One_Story_1946_and_New… Resident… 81 14267 Pave No_A… Slightly…
## 4 One_Story_1946_and_New… Resident… 93 11160 Pave No_A… Regular
## 5 Two_Story_1946_and_New… Resident… 74 13830 Pave No_A… Slightly…
## 6 Two_Story_1946_and_New… Resident… 78 9978 Pave No_A… Slightly…
## # ℹ 74 more variables: Land_Contour <fct>, Utilities <fct>, Lot_Config <fct>,
## # Land_Slope <fct>, Neighborhood <fct>, Condition_1 <fct>, Condition_2 <fct>,
## # Bldg_Type <fct>, House_Style <fct>, Overall_Qual <fct>, Overall_Cond <fct>,
## # Year_Built <int>, Year_Remod_Add <int>, Roof_Style <fct>, Roof_Matl <fct>,
## # Exterior_1st <fct>, Exterior_2nd <fct>, Mas_Vnr_Type <fct>,
## # Mas_Vnr_Area <dbl>, Exter_Qual <fct>, Exter_Cond <fct>, Foundation <fct>,
## # Bsmt_Qual <fct>, Bsmt_Cond <fct>, Bsmt_Exposure <fct>, …
cubist_ames <- cubist(x = ames[, setdiff(colnames(ames), c("Sale_Price"))],
y = log10(ames[["Sale_Price"]]),
committees = 3
)
# rule extract
rules_ames <- tidyRules(cubist_ames)
rules_ames
## # A tibble: 43 × 9
## id LHS RHS support mean min max error committee
## <int> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <int>
## 1 1 Overall_Qual %in% c('… (-20… 23 4.74 4.11 4.98 0.134 1
## 2 2 Overall_Qual %in% c('… (-19… 125 4.94 4.54 5.17 0.0679 1
## 3 3 Overall_Qual %in% c('… (12.… 99 5.04 4.75 5.29 0.0649 1
## 4 4 Overall_Qual %in% c('… (-47… 672 5.14 4.82 5.59 0.0338 1
## 5 5 MS_SubClass %in% c('O… (-53… 358 5.17 4.79 5.38 0.0309 1
## 6 6 MS_SubClass %in% c('D… (1.1… 85 5.17 4.80 5.54 0.0632 1
## 7 7 MS_SubClass %in% c('O… (10.… 287 5.18 4.78 5.49 0.0388 1
## 8 8 MS_SubClass %in% c('O… (5.6… 748 5.21 4.80 5.59 0.0315 1
## 9 9 MS_SubClass %in% c('O… (1.5… 26 5.23 5 5.48 0.0748 1
## 10 10 Overall_Qual %in% c('… (1.0… 71 5.26 4.96 5.58 0.0681 1
## # ℹ 33 more rows
library(DT)
datatable(rules_ames)