data_long <- train %>%
select (where (is.numeric)) %>%
pivot_longer (cols = everything (), names_to = "Variable" , values_to = "Value" )
ggplot (data_long, aes (x = Variable, y = Value)) +
geom_boxplot () +
coord_flip () +
facet_wrap (~ Variable, scales = "free" , ncol= 3 ) +
theme_minimal () +
theme (axis.text.x = element_blank ()) +
labs (title = "Horizontal Boxplot for Each Numeric Variable" )
library (broom)
train2 <- augment (model, data= train)
p <- ggplot (train2, mapping = aes (y= .fitted, x= murders))
p + geom_point ()
p <- ggplot (train2, mapping = aes (y= .resid, x= .fitted))
p + geom_point ()
pred <- predict (model, newdata = test, interval= "predict" )
test_w_predict <- cbind (test, pred)
test_w_predict %>%
head (10 ) %>%
select (murders, arrests, fit, lwr,upr) %>%
as_flextable (show_coltype = FALSE )
murders
arrests
fit
lwr
upr
1
1
0.7
-10.1
11.5
14
20
14.9
4.1
25.6
0
1
1.4
-9.3
12.2
3
1
0.2
-10.6
11.0
0
3
7.6
-3.2
18.4
0
1
0.9
-9.9
11.6
5
12
8.6
-2.1
19.4
5
3
4.7
-6.1
15.4
2
2
3.0
-7.8
13.7
0
1
1.1
-9.6
11.8
n: 10
pred <- predict (model, newdata = test, interval= "predict" )
test_w_predict <- cbind (test, pred)
test_w_predict %>%
head (10 ) %>%
select (arrests, murders, fit, lwr,upr) %>%
as_flextable (show_coltype = FALSE )
arrests
murders
fit
lwr
upr
1
1
0.7
-10.1
11.5
20
14
14.9
4.1
25.6
1
0
1.4
-9.3
12.2
1
3
0.2
-10.6
11.0
3
0
7.6
-3.2
18.4
1
0
0.9
-9.9
11.6
12
5
8.6
-2.1
19.4
3
5
4.7
-6.1
15.4
2
2
3.0
-7.8
13.7
1
0
1.1
-9.6
11.8
n: 10
library (Metrics)
metric_label <- c ("MAE" ,"RMSE" , "MAPE" )
metrics <- c (round (mae (test_w_predict$ arrests, test_w_predict$ murders),4 ),
round (rmse (test_w_predict$ arrests, test_w_predict$ murders),4 ),
round (mape (test_w_predict$ arrests, test_w_predict$ murders),4 ))
pmtable <- data.frame (Metric= metric_label, Value = metrics)
flextable (pmtable)
Metric
Value
MAE
3.5998
RMSE
26.4445
MAPE
pip <- lm (arrests ~ murders, data= train)
summary (pip)
Call:
lm(formula = arrests ~ murders, data = train)
Residuals:
Min 1Q Median 3Q Max
-70.863 -1.103 -1.103 -0.103 123.789
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.103157 0.182736 6.037 0.00000000204 ***
murders 0.815210 0.004064 200.598 < 0.0000000000000002 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 6.573 on 1318 degrees of freedom
Multiple R-squared: 0.9683, Adjusted R-squared: 0.9683
F-statistic: 4.024e+04 on 1 and 1318 DF, p-value: < 0.00000000000000022
library (ggplot2)
p <- ggplot (data= train, mapping= aes (y= log (arrests), x= murders))
p + geom_point (alpha = 0.2 ) +
geom_smooth (method = "lm" , aes (color = "OLS" , fill = "OLS" ))
p + geom_point (alpha= 0.1 ) +
geom_smooth (color = "tomato" , fill= "tomato" , method = MASS:: rlm) +
geom_smooth (color = "steelblue" , fill= "steelblue" , method = "lm" )
p + geom_point (alpha= 0.1 ) +
geom_smooth (color = "tomato" , method = "lm" , linewidth = 1.2 ,
formula = y ~ splines:: bs (x, 3 ), se = FALSE )
p + geom_point (alpha= 0.1 ) +
geom_quantile (color = "tomato" , size = 1.2 , method = "rqss" ,
lambda = 1 , quantiles = c (0.20 , 0.5 , 0.85 ))