mid term

#Question1 
library(pacman)
p_load(dplyr,psych, tidyverse, tidymodels, vip, ISLR2, kableExtra)
data("mtcars")
attach(mtcars)

## The following object is masked from package:ggplot2:
## 
##     mpg

mtcars %>%  kbl(caption= "Motor Trend Car Road Tests")%>% 
  row_spec(row =0, bold= TRUE, color = "black", background = "#F9EBEA") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "center") %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "400px")

Motor Trend Car Road Tests
	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
Mazda RX4	21.0	6	160.0	110	3.90	2.620	16.46	0	1	4	4
Mazda RX4 Wag	21.0	6	160.0	110	3.90	2.875	17.02	0	1	4	4
Datsun 710	22.8	4	108.0	93	3.85	2.320	18.61	1	1	4	1
Hornet 4 Drive	21.4	6	258.0	110	3.08	3.215	19.44	1	0	3	1
Hornet Sportabout	18.7	8	360.0	175	3.15	3.440	17.02	0	0	3	2
Valiant	18.1	6	225.0	105	2.76	3.460	20.22	1	0	3	1
Duster 360	14.3	8	360.0	245	3.21	3.570	15.84	0	0	3	4
Merc 240D	24.4	4	146.7	62	3.69	3.190	20.00	1	0	4	2
Merc 230	22.8	4	140.8	95	3.92	3.150	22.90	1	0	4	2
Merc 280	19.2	6	167.6	123	3.92	3.440	18.30	1	0	4	4
Merc 280C	17.8	6	167.6	123	3.92	3.440	18.90	1	0	4	4
Merc 450SE	16.4	8	275.8	180	3.07	4.070	17.40	0	0	3	3
Merc 450SL	17.3	8	275.8	180	3.07	3.730	17.60	0	0	3	3
Merc 450SLC	15.2	8	275.8	180	3.07	3.780	18.00	0	0	3	3
Cadillac Fleetwood	10.4	8	472.0	205	2.93	5.250	17.98	0	0	3	4
Lincoln Continental	10.4	8	460.0	215	3.00	5.424	17.82	0	0	3	4
Chrysler Imperial	14.7	8	440.0	230	3.23	5.345	17.42	0	0	3	4
Fiat 128	32.4	4	78.7	66	4.08	2.200	19.47	1	1	4	1
Honda Civic	30.4	4	75.7	52	4.93	1.615	18.52	1	1	4	2
Toyota Corolla	33.9	4	71.1	65	4.22	1.835	19.90	1	1	4	1
Toyota Corona	21.5	4	120.1	97	3.70	2.465	20.01	1	0	3	1
Dodge Challenger	15.5	8	318.0	150	2.76	3.520	16.87	0	0	3	2
AMC Javelin	15.2	8	304.0	150	3.15	3.435	17.30	0	0	3	2
Camaro Z28	13.3	8	350.0	245	3.73	3.840	15.41	0	0	3	4
Pontiac Firebird	19.2	8	400.0	175	3.08	3.845	17.05	0	0	3	2
Fiat X1-9	27.3	4	79.0	66	4.08	1.935	18.90	1	1	4	1
Porsche 914-2	26.0	4	120.3	91	4.43	2.140	16.70	0	1	5	2
Lotus Europa	30.4	4	95.1	113	3.77	1.513	16.90	1	1	5	2
Ford Pantera L	15.8	8	351.0	264	4.22	3.170	14.50	0	1	5	4
Ferrari Dino	19.7	6	145.0	175	3.62	2.770	15.50	0	1	5	6
Maserati Bora	15.0	8	301.0	335	3.54	3.570	14.60	0	1	5	8
Volvo 142E	21.4	4	121.0	109	4.11	2.780	18.60	1	1	4	2

pairs(mtcars)

par(mfrow = c(3, 4))

for (i in 2:ncol(mtcars)) {
  par(mar = c(3, 3, 2, 2)) # reduce the margin size
  plot(mtcars$mpg, mtcars[,i], main = paste("mpg vs.", names(mtcars)[i]))
}
# run multiple regression of mpg on all predictors
model <- lm(mpg ~ ., data = mtcars)

# display estimated regression coefficients
summary(model)

## 
## Call:
## lm(formula = mpg ~ ., data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4506 -1.6044 -0.1196  1.2193  4.6271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.30337   18.71788   0.657   0.5181  
## cyl         -0.11144    1.04502  -0.107   0.9161  
## disp         0.01334    0.01786   0.747   0.4635  
## hp          -0.02148    0.02177  -0.987   0.3350  
## drat         0.78711    1.63537   0.481   0.6353  
## wt          -3.71530    1.89441  -1.961   0.0633 .
## qsec         0.82104    0.73084   1.123   0.2739  
## vs           0.31776    2.10451   0.151   0.8814  
## am           2.52023    2.05665   1.225   0.2340  
## gear         0.65541    1.49326   0.439   0.6652  
## carb        -0.19942    0.82875  -0.241   0.8122  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.8066 
## F-statistic: 13.93 on 10 and 21 DF,  p-value: 3.793e-07

# calculate VIFs for all predictors in the model
vifs <- car::vif(model)

# print VIFs and identify predictors with VIF > 10
print(vifs)

##       cyl      disp        hp      drat        wt      qsec        vs        am 
## 15.373833 21.620241  9.832037  3.374620 15.164887  7.527958  4.965873  4.648487 
##      gear      carb 
##  5.357452  7.908747

high_vif <- names(vifs)[vifs > 10]
if (length(high_vif) > 0) {
  message("Predictors with VIF > 10:", paste(high_vif, collapse = ", "))
} else {
  message("No predictors with VIF > 10")
}

## Predictors with VIF > 10:cyl, disp, wt

lm1 <- lm(mpg ~ . -disp, data = mtcars)
summary(lm1)

## 
## Call:
## lm(formula = mpg ~ . - disp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7863 -1.4055 -0.2635  1.2029  4.4753 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.55052   18.52585   0.677   0.5052  
## cyl          0.09627    0.99715   0.097   0.9240  
## hp          -0.01295    0.01834  -0.706   0.4876  
## drat         0.92864    1.60794   0.578   0.5694  
## wt          -2.62694    1.19800  -2.193   0.0392 *
## qsec         0.66523    0.69335   0.959   0.3478  
## vs           0.16035    2.07277   0.077   0.9390  
## am           2.47882    2.03513   1.218   0.2361  
## gear         0.74300    1.47360   0.504   0.6191  
## carb        -0.61686    0.60566  -1.018   0.3195  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.623 on 22 degrees of freedom
## Multiple R-squared:  0.8655, Adjusted R-squared:  0.8105 
## F-statistic: 15.73 on 9 and 22 DF,  p-value: 1.183e-07

lm2 <- lm(mpg ~ . -disp -cyl, data = mtcars)
summary(model)$sigma

## [1] 2.650197

summary(lm1)$sigma

## [1] 2.623418

summary(lm2)$sigma

## [1] 2.566297

summary(model)$df

## [1] 11 21 11

summary(lm1)$df

## [1] 10 22 10

summary(lm2)$df

## [1]  9 23  9

#Question2
library(ISLR2)
data("Carseats")
dat <- Carseats
dat %>%  kbl(caption= "Sales of Child Car Seats")%>% 
  row_spec(row =0, bold= TRUE, color = "black", background = "#F9EBEA") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "center") %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "400px")

Sales of Child Car Seats
Sales	CompPrice	Income	Advertising	Population	Price	ShelveLoc	Age	Education	Urban	US
9.50	138	73	11	276	120	Bad	42	17	Yes	Yes
11.22	111	48	16	260	83	Good	65	10	Yes	Yes
10.06	113	35	10	269	80	Medium	59	12	Yes	Yes
7.40	117	100	4	466	97	Medium	55	14	Yes	Yes
4.15	141	64	3	340	128	Bad	38	13	Yes	No
10.81	124	113	13	501	72	Bad	78	16	No	Yes
6.63	115	105	0	45	108	Medium	71	15	Yes	No
11.85	136	81	15	425	120	Good	67	10	Yes	Yes
6.54	132	110	0	108	124	Medium	76	10	No	No
4.69	132	113	0	131	124	Medium	76	17	No	Yes
9.01	121	78	9	150	100	Bad	26	10	No	Yes
11.96	117	94	4	503	94	Good	50	13	Yes	Yes
3.98	122	35	2	393	136	Medium	62	18	Yes	No
10.96	115	28	11	29	86	Good	53	18	Yes	Yes
11.17	107	117	11	148	118	Good	52	18	Yes	Yes
8.71	149	95	5	400	144	Medium	76	18	No	No
7.58	118	32	0	284	110	Good	63	13	Yes	No
12.29	147	74	13	251	131	Good	52	10	Yes	Yes
13.91	110	110	0	408	68	Good	46	17	No	Yes
8.73	129	76	16	58	121	Medium	69	12	Yes	Yes
6.41	125	90	2	367	131	Medium	35	18	Yes	Yes
12.13	134	29	12	239	109	Good	62	18	No	Yes
5.08	128	46	6	497	138	Medium	42	13	Yes	No
5.87	121	31	0	292	109	Medium	79	10	Yes	No
10.14	145	119	16	294	113	Bad	42	12	Yes	Yes
14.90	139	32	0	176	82	Good	54	11	No	No
8.33	107	115	11	496	131	Good	50	11	No	Yes
5.27	98	118	0	19	107	Medium	64	17	Yes	No
2.99	103	74	0	359	97	Bad	55	11	Yes	Yes
7.81	104	99	15	226	102	Bad	58	17	Yes	Yes
13.55	125	94	0	447	89	Good	30	12	Yes	No
8.25	136	58	16	241	131	Medium	44	18	Yes	Yes
6.20	107	32	12	236	137	Good	64	10	No	Yes
8.77	114	38	13	317	128	Good	50	16	Yes	Yes
2.67	115	54	0	406	128	Medium	42	17	Yes	Yes
11.07	131	84	11	29	96	Medium	44	17	No	Yes
8.89	122	76	0	270	100	Good	60	18	No	No
4.95	121	41	5	412	110	Medium	54	10	Yes	Yes
6.59	109	73	0	454	102	Medium	65	15	Yes	No
3.24	130	60	0	144	138	Bad	38	10	No	No
2.07	119	98	0	18	126	Bad	73	17	No	No
7.96	157	53	0	403	124	Bad	58	16	Yes	No
10.43	77	69	0	25	24	Medium	50	18	Yes	No
4.12	123	42	11	16	134	Medium	59	13	Yes	Yes
4.16	85	79	6	325	95	Medium	69	13	Yes	Yes
4.56	141	63	0	168	135	Bad	44	12	Yes	Yes
12.44	127	90	14	16	70	Medium	48	15	No	Yes
4.38	126	98	0	173	108	Bad	55	16	Yes	No
3.91	116	52	0	349	98	Bad	69	18	Yes	No
10.61	157	93	0	51	149	Good	32	17	Yes	No
1.42	99	32	18	341	108	Bad	80	16	Yes	Yes
4.42	121	90	0	150	108	Bad	75	16	Yes	No
7.91	153	40	3	112	129	Bad	39	18	Yes	Yes
6.92	109	64	13	39	119	Medium	61	17	Yes	Yes
4.90	134	103	13	25	144	Medium	76	17	No	Yes
6.85	143	81	5	60	154	Medium	61	18	Yes	Yes
11.91	133	82	0	54	84	Medium	50	17	Yes	No
0.91	93	91	0	22	117	Bad	75	11	Yes	No
5.42	103	93	15	188	103	Bad	74	16	Yes	Yes
5.21	118	71	4	148	114	Medium	80	13	Yes	No
8.32	122	102	19	469	123	Bad	29	13	Yes	Yes
7.32	105	32	0	358	107	Medium	26	13	No	No
1.82	139	45	0	146	133	Bad	77	17	Yes	Yes
8.47	119	88	10	170	101	Medium	61	13	Yes	Yes
7.80	100	67	12	184	104	Medium	32	16	No	Yes
4.90	122	26	0	197	128	Medium	55	13	No	No
8.85	127	92	0	508	91	Medium	56	18	Yes	No
9.01	126	61	14	152	115	Medium	47	16	Yes	Yes
13.39	149	69	20	366	134	Good	60	13	Yes	Yes
7.99	127	59	0	339	99	Medium	65	12	Yes	No
9.46	89	81	15	237	99	Good	74	12	Yes	Yes
6.50	148	51	16	148	150	Medium	58	17	No	Yes
5.52	115	45	0	432	116	Medium	25	15	Yes	No
12.61	118	90	10	54	104	Good	31	11	No	Yes
6.20	150	68	5	125	136	Medium	64	13	No	Yes
8.55	88	111	23	480	92	Bad	36	16	No	Yes
10.64	102	87	10	346	70	Medium	64	15	Yes	Yes
7.70	118	71	12	44	89	Medium	67	18	No	Yes
4.43	134	48	1	139	145	Medium	65	12	Yes	Yes
9.14	134	67	0	286	90	Bad	41	13	Yes	No
8.01	113	100	16	353	79	Bad	68	11	Yes	Yes
7.52	116	72	0	237	128	Good	70	13	Yes	No
11.62	151	83	4	325	139	Good	28	17	Yes	Yes
4.42	109	36	7	468	94	Bad	56	11	Yes	Yes
2.23	111	25	0	52	121	Bad	43	18	No	No
8.47	125	103	0	304	112	Medium	49	13	No	No
8.70	150	84	9	432	134	Medium	64	15	Yes	No
11.70	131	67	7	272	126	Good	54	16	No	Yes
6.56	117	42	7	144	111	Medium	62	10	Yes	Yes
7.95	128	66	3	493	119	Medium	45	16	No	No
5.33	115	22	0	491	103	Medium	64	11	No	No
4.81	97	46	11	267	107	Medium	80	15	Yes	Yes
4.53	114	113	0	97	125	Medium	29	12	Yes	No
8.86	145	30	0	67	104	Medium	55	17	Yes	No
8.39	115	97	5	134	84	Bad	55	11	Yes	Yes
5.58	134	25	10	237	148	Medium	59	13	Yes	Yes
9.48	147	42	10	407	132	Good	73	16	No	Yes
7.45	161	82	5	287	129	Bad	33	16	Yes	Yes
12.49	122	77	24	382	127	Good	36	16	No	Yes
4.88	121	47	3	220	107	Bad	56	16	No	Yes
4.11	113	69	11	94	106	Medium	76	12	No	Yes
6.20	128	93	0	89	118	Medium	34	18	Yes	No
5.30	113	22	0	57	97	Medium	65	16	No	No
5.07	123	91	0	334	96	Bad	78	17	Yes	Yes
4.62	121	96	0	472	138	Medium	51	12	Yes	No
5.55	104	100	8	398	97	Medium	61	11	Yes	Yes
0.16	102	33	0	217	139	Medium	70	18	No	No
8.55	134	107	0	104	108	Medium	60	12	Yes	No
3.47	107	79	2	488	103	Bad	65	16	Yes	No
8.98	115	65	0	217	90	Medium	60	17	No	No
9.00	128	62	7	125	116	Medium	43	14	Yes	Yes
6.62	132	118	12	272	151	Medium	43	14	Yes	Yes
6.67	116	99	5	298	125	Good	62	12	Yes	Yes
6.01	131	29	11	335	127	Bad	33	12	Yes	Yes
9.31	122	87	9	17	106	Medium	65	13	Yes	Yes
8.54	139	35	0	95	129	Medium	42	13	Yes	No
5.08	135	75	0	202	128	Medium	80	10	No	No
8.80	145	53	0	507	119	Medium	41	12	Yes	No
7.57	112	88	2	243	99	Medium	62	11	Yes	Yes
7.37	130	94	8	137	128	Medium	64	12	Yes	Yes
6.87	128	105	11	249	131	Medium	63	13	Yes	Yes
11.67	125	89	10	380	87	Bad	28	10	Yes	Yes
6.88	119	100	5	45	108	Medium	75	10	Yes	Yes
8.19	127	103	0	125	155	Good	29	15	No	Yes
8.87	131	113	0	181	120	Good	63	14	Yes	No
9.34	89	78	0	181	49	Medium	43	15	No	No
11.27	153	68	2	60	133	Good	59	16	Yes	Yes
6.52	125	48	3	192	116	Medium	51	14	Yes	Yes
4.96	133	100	3	350	126	Bad	55	13	Yes	Yes
4.47	143	120	7	279	147	Bad	40	10	No	Yes
8.41	94	84	13	497	77	Medium	51	12	Yes	Yes
6.50	108	69	3	208	94	Medium	77	16	Yes	No
9.54	125	87	9	232	136	Good	72	10	Yes	Yes
7.62	132	98	2	265	97	Bad	62	12	Yes	Yes
3.67	132	31	0	327	131	Medium	76	16	Yes	No
6.44	96	94	14	384	120	Medium	36	18	No	Yes
5.17	131	75	0	10	120	Bad	31	18	No	No
6.52	128	42	0	436	118	Medium	80	11	Yes	No
10.27	125	103	12	371	109	Medium	44	10	Yes	Yes
12.30	146	62	10	310	94	Medium	30	13	No	Yes
6.03	133	60	10	277	129	Medium	45	18	Yes	Yes
6.53	140	42	0	331	131	Bad	28	15	Yes	No
7.44	124	84	0	300	104	Medium	77	15	Yes	No
0.53	122	88	7	36	159	Bad	28	17	Yes	Yes
9.09	132	68	0	264	123	Good	34	11	No	No
8.77	144	63	11	27	117	Medium	47	17	Yes	Yes
3.90	114	83	0	412	131	Bad	39	14	Yes	No
10.51	140	54	9	402	119	Good	41	16	No	Yes
7.56	110	119	0	384	97	Medium	72	14	No	Yes
11.48	121	120	13	140	87	Medium	56	11	Yes	Yes
10.49	122	84	8	176	114	Good	57	10	No	Yes
10.77	111	58	17	407	103	Good	75	17	No	Yes
7.64	128	78	0	341	128	Good	45	13	No	No
5.93	150	36	7	488	150	Medium	25	17	No	Yes
6.89	129	69	10	289	110	Medium	50	16	No	Yes
7.71	98	72	0	59	69	Medium	65	16	Yes	No
7.49	146	34	0	220	157	Good	51	16	Yes	No
10.21	121	58	8	249	90	Medium	48	13	No	Yes
12.53	142	90	1	189	112	Good	39	10	No	Yes
9.32	119	60	0	372	70	Bad	30	18	No	No
4.67	111	28	0	486	111	Medium	29	12	No	No
2.93	143	21	5	81	160	Medium	67	12	No	Yes
3.63	122	74	0	424	149	Medium	51	13	Yes	No
5.68	130	64	0	40	106	Bad	39	17	No	No
8.22	148	64	0	58	141	Medium	27	13	No	Yes
0.37	147	58	7	100	191	Bad	27	15	Yes	Yes
6.71	119	67	17	151	137	Medium	55	11	Yes	Yes
6.71	106	73	0	216	93	Medium	60	13	Yes	No
7.30	129	89	0	425	117	Medium	45	10	Yes	No
11.48	104	41	15	492	77	Good	73	18	Yes	Yes
8.01	128	39	12	356	118	Medium	71	10	Yes	Yes
12.49	93	106	12	416	55	Medium	75	15	Yes	Yes
9.03	104	102	13	123	110	Good	35	16	Yes	Yes
6.38	135	91	5	207	128	Medium	66	18	Yes	Yes
0.00	139	24	0	358	185	Medium	79	15	No	No
7.54	115	89	0	38	122	Medium	25	12	Yes	No
5.61	138	107	9	480	154	Medium	47	11	No	Yes
10.48	138	72	0	148	94	Medium	27	17	Yes	Yes
10.66	104	71	14	89	81	Medium	25	14	No	Yes
7.78	144	25	3	70	116	Medium	77	18	Yes	Yes
4.94	137	112	15	434	149	Bad	66	13	Yes	Yes
7.43	121	83	0	79	91	Medium	68	11	Yes	No
4.74	137	60	4	230	140	Bad	25	13	Yes	No
5.32	118	74	6	426	102	Medium	80	18	Yes	Yes
9.95	132	33	7	35	97	Medium	60	11	No	Yes
10.07	130	100	11	449	107	Medium	64	10	Yes	Yes
8.68	120	51	0	93	86	Medium	46	17	No	No
6.03	117	32	0	142	96	Bad	62	17	Yes	No
8.07	116	37	0	426	90	Medium	76	15	Yes	No
12.11	118	117	18	509	104	Medium	26	15	No	Yes
8.79	130	37	13	297	101	Medium	37	13	No	Yes
6.67	156	42	13	170	173	Good	74	14	Yes	Yes
7.56	108	26	0	408	93	Medium	56	14	No	No
13.28	139	70	7	71	96	Good	61	10	Yes	Yes
7.23	112	98	18	481	128	Medium	45	11	Yes	Yes
4.19	117	93	4	420	112	Bad	66	11	Yes	Yes
4.10	130	28	6	410	133	Bad	72	16	Yes	Yes
2.52	124	61	0	333	138	Medium	76	16	Yes	No
3.62	112	80	5	500	128	Medium	69	10	Yes	Yes
6.42	122	88	5	335	126	Medium	64	14	Yes	Yes
5.56	144	92	0	349	146	Medium	62	12	No	No
5.94	138	83	0	139	134	Medium	54	18	Yes	No
4.10	121	78	4	413	130	Bad	46	10	No	Yes
2.05	131	82	0	132	157	Bad	25	14	Yes	No
8.74	155	80	0	237	124	Medium	37	14	Yes	No
5.68	113	22	1	317	132	Medium	28	12	Yes	No
4.97	162	67	0	27	160	Medium	77	17	Yes	Yes
8.19	111	105	0	466	97	Bad	61	10	No	No
7.78	86	54	0	497	64	Bad	33	12	Yes	No
3.02	98	21	11	326	90	Bad	76	11	No	Yes
4.36	125	41	2	357	123	Bad	47	14	No	Yes
9.39	117	118	14	445	120	Medium	32	15	Yes	Yes
12.04	145	69	19	501	105	Medium	45	11	Yes	Yes
8.23	149	84	5	220	139	Medium	33	10	Yes	Yes
4.83	115	115	3	48	107	Medium	73	18	Yes	Yes
2.34	116	83	15	170	144	Bad	71	11	Yes	Yes
5.73	141	33	0	243	144	Medium	34	17	Yes	No
4.34	106	44	0	481	111	Medium	70	14	No	No
9.70	138	61	12	156	120	Medium	25	14	Yes	Yes
10.62	116	79	19	359	116	Good	58	17	Yes	Yes
10.59	131	120	15	262	124	Medium	30	10	Yes	Yes
6.43	124	44	0	125	107	Medium	80	11	Yes	No
7.49	136	119	6	178	145	Medium	35	13	Yes	Yes
3.45	110	45	9	276	125	Medium	62	14	Yes	Yes
4.10	134	82	0	464	141	Medium	48	13	No	No
6.68	107	25	0	412	82	Bad	36	14	Yes	No
7.80	119	33	0	245	122	Good	56	14	Yes	No
8.69	113	64	10	68	101	Medium	57	16	Yes	Yes
5.40	149	73	13	381	163	Bad	26	11	No	Yes
11.19	98	104	0	404	72	Medium	27	18	No	No
5.16	115	60	0	119	114	Bad	38	14	No	No
8.09	132	69	0	123	122	Medium	27	11	No	No
13.14	137	80	10	24	105	Good	61	15	Yes	Yes
8.65	123	76	18	218	120	Medium	29	14	No	Yes
9.43	115	62	11	289	129	Good	56	16	No	Yes
5.53	126	32	8	95	132	Medium	50	17	Yes	Yes
9.32	141	34	16	361	108	Medium	69	10	Yes	Yes
9.62	151	28	8	499	135	Medium	48	10	Yes	Yes
7.36	121	24	0	200	133	Good	73	13	Yes	No
3.89	123	105	0	149	118	Bad	62	16	Yes	Yes
10.31	159	80	0	362	121	Medium	26	18	Yes	No
12.01	136	63	0	160	94	Medium	38	12	Yes	No
4.68	124	46	0	199	135	Medium	52	14	No	No
7.82	124	25	13	87	110	Medium	57	10	Yes	Yes
8.78	130	30	0	391	100	Medium	26	18	Yes	No
10.00	114	43	0	199	88	Good	57	10	No	Yes
6.90	120	56	20	266	90	Bad	78	18	Yes	Yes
5.04	123	114	0	298	151	Bad	34	16	Yes	No
5.36	111	52	0	12	101	Medium	61	11	Yes	Yes
5.05	125	67	0	86	117	Bad	65	11	Yes	No
9.16	137	105	10	435	156	Good	72	14	Yes	Yes
3.72	139	111	5	310	132	Bad	62	13	Yes	Yes
8.31	133	97	0	70	117	Medium	32	16	Yes	No
5.64	124	24	5	288	122	Medium	57	12	No	Yes
9.58	108	104	23	353	129	Good	37	17	Yes	Yes
7.71	123	81	8	198	81	Bad	80	15	Yes	Yes
4.20	147	40	0	277	144	Medium	73	10	Yes	No
8.67	125	62	14	477	112	Medium	80	13	Yes	Yes
3.47	108	38	0	251	81	Bad	72	14	No	No
5.12	123	36	10	467	100	Bad	74	11	No	Yes
7.67	129	117	8	400	101	Bad	36	10	Yes	Yes
5.71	121	42	4	188	118	Medium	54	15	Yes	Yes
6.37	120	77	15	86	132	Medium	48	18	Yes	Yes
7.77	116	26	6	434	115	Medium	25	17	Yes	Yes
6.95	128	29	5	324	159	Good	31	15	Yes	Yes
5.31	130	35	10	402	129	Bad	39	17	Yes	Yes
9.10	128	93	12	343	112	Good	73	17	No	Yes
5.83	134	82	7	473	112	Bad	51	12	No	Yes
6.53	123	57	0	66	105	Medium	39	11	Yes	No
5.01	159	69	0	438	166	Medium	46	17	Yes	No
11.99	119	26	0	284	89	Good	26	10	Yes	No
4.55	111	56	0	504	110	Medium	62	16	Yes	No
12.98	113	33	0	14	63	Good	38	12	Yes	No
10.04	116	106	8	244	86	Medium	58	12	Yes	Yes
7.22	135	93	2	67	119	Medium	34	11	Yes	Yes
6.67	107	119	11	210	132	Medium	53	11	Yes	Yes
6.93	135	69	14	296	130	Medium	73	15	Yes	Yes
7.80	136	48	12	326	125	Medium	36	16	Yes	Yes
7.22	114	113	2	129	151	Good	40	15	No	Yes
3.42	141	57	13	376	158	Medium	64	18	Yes	Yes
2.86	121	86	10	496	145	Bad	51	10	Yes	Yes
11.19	122	69	7	303	105	Good	45	16	No	Yes
7.74	150	96	0	80	154	Good	61	11	Yes	No
5.36	135	110	0	112	117	Medium	80	16	No	No
6.97	106	46	11	414	96	Bad	79	17	No	No
7.60	146	26	11	261	131	Medium	39	10	Yes	Yes
7.53	117	118	11	429	113	Medium	67	18	No	Yes
6.88	95	44	4	208	72	Bad	44	17	Yes	Yes
6.98	116	40	0	74	97	Medium	76	15	No	No
8.75	143	77	25	448	156	Medium	43	17	Yes	Yes
9.49	107	111	14	400	103	Medium	41	11	No	Yes
6.64	118	70	0	106	89	Bad	39	17	Yes	No
11.82	113	66	16	322	74	Good	76	15	Yes	Yes
11.28	123	84	0	74	89	Good	59	10	Yes	No
12.66	148	76	3	126	99	Good	60	11	Yes	Yes
4.21	118	35	14	502	137	Medium	79	10	No	Yes
8.21	127	44	13	160	123	Good	63	18	Yes	Yes
3.07	118	83	13	276	104	Bad	75	10	Yes	Yes
10.98	148	63	0	312	130	Good	63	15	Yes	No
9.40	135	40	17	497	96	Medium	54	17	No	Yes
8.57	116	78	1	158	99	Medium	45	11	Yes	Yes
7.41	99	93	0	198	87	Medium	57	16	Yes	Yes
5.28	108	77	13	388	110	Bad	74	14	Yes	Yes
10.01	133	52	16	290	99	Medium	43	11	Yes	Yes
11.93	123	98	12	408	134	Good	29	10	Yes	Yes
8.03	115	29	26	394	132	Medium	33	13	Yes	Yes
4.78	131	32	1	85	133	Medium	48	12	Yes	Yes
5.90	138	92	0	13	120	Bad	61	12	Yes	No
9.24	126	80	19	436	126	Medium	52	10	Yes	Yes
11.18	131	111	13	33	80	Bad	68	18	Yes	Yes
9.53	175	65	29	419	166	Medium	53	12	Yes	Yes
6.15	146	68	12	328	132	Bad	51	14	Yes	Yes
6.80	137	117	5	337	135	Bad	38	10	Yes	Yes
9.33	103	81	3	491	54	Medium	66	13	Yes	No
7.72	133	33	10	333	129	Good	71	14	Yes	Yes
6.39	131	21	8	220	171	Good	29	14	Yes	Yes
15.63	122	36	5	369	72	Good	35	10	Yes	Yes
6.41	142	30	0	472	136	Good	80	15	No	No
10.08	116	72	10	456	130	Good	41	14	No	Yes
6.97	127	45	19	459	129	Medium	57	11	No	Yes
5.86	136	70	12	171	152	Medium	44	18	Yes	Yes
7.52	123	39	5	499	98	Medium	34	15	Yes	No
9.16	140	50	10	300	139	Good	60	15	Yes	Yes
10.36	107	105	18	428	103	Medium	34	12	Yes	Yes
2.66	136	65	4	133	150	Bad	53	13	Yes	Yes
11.70	144	69	11	131	104	Medium	47	11	Yes	Yes
4.69	133	30	0	152	122	Medium	53	17	Yes	No
6.23	112	38	17	316	104	Medium	80	16	Yes	Yes
3.15	117	66	1	65	111	Bad	55	11	Yes	Yes
11.27	100	54	9	433	89	Good	45	12	Yes	Yes
4.99	122	59	0	501	112	Bad	32	14	No	No
10.10	135	63	15	213	134	Medium	32	10	Yes	Yes
5.74	106	33	20	354	104	Medium	61	12	Yes	Yes
5.87	136	60	7	303	147	Medium	41	10	Yes	Yes
7.63	93	117	9	489	83	Bad	42	13	Yes	Yes
6.18	120	70	15	464	110	Medium	72	15	Yes	Yes
5.17	138	35	6	60	143	Bad	28	18	Yes	No
8.61	130	38	0	283	102	Medium	80	15	Yes	No
5.97	112	24	0	164	101	Medium	45	11	Yes	No
11.54	134	44	4	219	126	Good	44	15	Yes	Yes
7.50	140	29	0	105	91	Bad	43	16	Yes	No
7.38	98	120	0	268	93	Medium	72	10	No	No
7.81	137	102	13	422	118	Medium	71	10	No	Yes
5.99	117	42	10	371	121	Bad	26	14	Yes	Yes
8.43	138	80	0	108	126	Good	70	13	No	Yes
4.81	121	68	0	279	149	Good	79	12	Yes	No
8.97	132	107	0	144	125	Medium	33	13	No	No
6.88	96	39	0	161	112	Good	27	14	No	No
12.57	132	102	20	459	107	Good	49	11	Yes	Yes
9.32	134	27	18	467	96	Medium	49	14	No	Yes
8.64	111	101	17	266	91	Medium	63	17	No	Yes
10.44	124	115	16	458	105	Medium	62	16	No	Yes
13.44	133	103	14	288	122	Good	61	17	Yes	Yes
9.45	107	67	12	430	92	Medium	35	12	No	Yes
5.30	133	31	1	80	145	Medium	42	18	Yes	Yes
7.02	130	100	0	306	146	Good	42	11	Yes	No
3.58	142	109	0	111	164	Good	72	12	Yes	No
13.36	103	73	3	276	72	Medium	34	15	Yes	Yes
4.17	123	96	10	71	118	Bad	69	11	Yes	Yes
3.13	130	62	11	396	130	Bad	66	14	Yes	Yes
8.77	118	86	7	265	114	Good	52	15	No	Yes
8.68	131	25	10	183	104	Medium	56	15	No	Yes
5.25	131	55	0	26	110	Bad	79	12	Yes	Yes
10.26	111	75	1	377	108	Good	25	12	Yes	No
10.50	122	21	16	488	131	Good	30	14	Yes	Yes
6.53	154	30	0	122	162	Medium	57	17	No	No
5.98	124	56	11	447	134	Medium	53	12	No	Yes
14.37	95	106	0	256	53	Good	52	17	Yes	No
10.71	109	22	10	348	79	Good	74	14	No	Yes
10.26	135	100	22	463	122	Medium	36	14	Yes	Yes
7.68	126	41	22	403	119	Bad	42	12	Yes	Yes
9.08	152	81	0	191	126	Medium	54	16	Yes	No
7.80	121	50	0	508	98	Medium	65	11	No	No
5.58	137	71	0	402	116	Medium	78	17	Yes	No
9.44	131	47	7	90	118	Medium	47	12	Yes	Yes
7.90	132	46	4	206	124	Medium	73	11	Yes	No
16.27	141	60	19	319	92	Good	44	11	Yes	Yes
6.81	132	61	0	263	125	Medium	41	12	No	No
6.11	133	88	3	105	119	Medium	79	12	Yes	Yes
5.81	125	111	0	404	107	Bad	54	15	Yes	No
9.64	106	64	10	17	89	Medium	68	17	Yes	Yes
3.90	124	65	21	496	151	Bad	77	13	Yes	Yes
4.95	121	28	19	315	121	Medium	66	14	Yes	Yes
9.35	98	117	0	76	68	Medium	63	10	Yes	No
12.85	123	37	15	348	112	Good	28	12	Yes	Yes
5.87	131	73	13	455	132	Medium	62	17	Yes	Yes
5.32	152	116	0	170	160	Medium	39	16	Yes	No
8.67	142	73	14	238	115	Medium	73	14	No	Yes
8.14	135	89	11	245	78	Bad	79	16	Yes	Yes
8.44	128	42	8	328	107	Medium	35	12	Yes	Yes
5.47	108	75	9	61	111	Medium	67	12	Yes	Yes
6.10	153	63	0	49	124	Bad	56	16	Yes	No
4.53	129	42	13	315	130	Bad	34	13	Yes	Yes
5.57	109	51	10	26	120	Medium	30	17	No	Yes
5.35	130	58	19	366	139	Bad	33	16	Yes	Yes
12.57	138	108	17	203	128	Good	33	14	Yes	Yes
6.14	139	23	3	37	120	Medium	55	11	No	Yes
7.41	162	26	12	368	159	Medium	40	18	Yes	Yes
5.94	100	79	7	284	95	Bad	50	12	Yes	Yes
9.71	134	37	0	27	120	Good	49	16	Yes	Yes

model_2 <- lm(Sales ~ Price + Urban + US, data = dat)
x <- as.data.frame(model_2$coefficients, col.names = "intercof") %>% format(digits = 3)
print(paste("Sales = ",x[1,1],"+", x[2,1], "x Price +", x[3,1],"x Urban +", x[4,1], "x US"))

## [1] "Sales =  13.0435 + -0.0545 x Price + -0.0219 x Urban +  1.2006 x US"

summary(model_2)

## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16

#We take the 2 significant variables Price and US to conduct new small model
model_3 <- lm(Sales ~ Price + US, data = dat)
summary(model_3)

## 
## Call:
## lm(formula = Sales ~ Price + US, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16

library(caret)

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following objects are masked from 'package:yardstick':
## 
##     precision, recall, sensitivity, specificity

## The following object is masked from 'package:purrr':
## 
##     lift

library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:ISLR2':
## 
##     Boston

## The following object is masked from 'package:dplyr':
## 
##     select

# Load required library
library(caret)

# Define RMSE function
RMSE <- function(x, y) {
  return(sqrt(mean((x - y)^2)))
}

# Create data partition
test_index <- createDataPartition(dat$Sales, times = 1, p = 0.2, list = FALSE)

# Randomly divide dataset into 80% train data and 20% test data
set.seed(1)
train <- dat[-test_index, ]
test <- dat[test_index, ]
dim(train)

## [1] 319  11

dim(test)

## [1] 81 11

#For the model in question (a) which includes Price, US and Urban: 
model_a <- lm(Sales ~ Price + Urban + US, data = train)
predict <- predict(model_a, newdata = test)
RMSE(predict,test$Sales)

## [1] 2.406599

#For the model in question (e) which includes Price, US: 
model_e <- lm(Sales ~ Price + US, data = train)
predict <- predict(model_e, newdata = test)
RMSE(predict,test$Sales)

## [1] 2.406406

summary(model_3)

## 
## Call:
## lm(formula = Sales ~ Price + US, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16

par(mfrow = c(2, 2))

plot(model_3)

#Question3 
# Load required packages
library(ISLR2)
library(MASS)
library(e1071)

## 
## Attaching package: 'e1071'

## The following object is masked from 'package:tune':
## 
##     tune

## The following object is masked from 'package:rsample':
## 
##     permutations

## The following object is masked from 'package:parsnip':
## 
##     tune

library(class)

# Load Boston data set
data("Boston")

# Create response variable: crime_rate_above_median
crime_rate_above_median <- ifelse(Boston$crim > median(Boston$crim), 1, 0)

# Add the response variable to the Boston data set
Boston$crime_rate_above_median <- crime_rate_above_median

# Logistic Regression
logistic_model <- glm(crime_rate_above_median ~ ., data = Boston, family = "binomial")

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(logistic_model)

## 
## Call:
## glm(formula = crime_rate_above_median ~ ., family = "binomial", 
##     data = Boston)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.437e+01  1.202e+05   0.000    1.000
## crim         1.083e+03  1.773e+04   0.061    0.951
## zn           2.194e+00  5.856e+01   0.037    0.970
## indus       -2.510e+00  9.002e+02  -0.003    0.998
## chas         4.489e+00  1.014e+04   0.000    1.000
## nox         -2.585e+02  1.458e+05  -0.002    0.999
## rm          -3.953e+01  1.653e+03  -0.024    0.981
## age          3.437e-01  5.798e+01   0.006    0.995
## dis         -1.742e+01  2.146e+03  -0.008    0.994
## rad         -5.933e+00  2.642e+03  -0.002    0.998
## tax          1.639e-01  1.078e+02   0.002    0.999
## ptratio      5.525e+00  3.640e+03   0.002    0.999
## black        3.266e-02  1.208e+01   0.003    0.998
## lstat       -1.687e+00  3.560e+02  -0.005    0.996
## medv         2.358e+00  5.382e+02   0.004    0.997
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7.0146e+02  on 505  degrees of freedom
## Residual deviance: 2.8371e-05  on 491  degrees of freedom
## AIC: 30
## 
## Number of Fisher Scoring iterations: 25

# LDA
lda_model <- lda(crime_rate_above_median ~ ., data = Boston)
lda_model

## Call:
## lda(crime_rate_above_median ~ ., data = Boston)
## 
## Prior probabilities of groups:
##   0   1 
## 0.5 0.5 
## 
## Group means:
##        crim        zn     indus       chas       nox       rm      age      dis
## 0 0.0955715 21.525692  7.002292 0.05138340 0.4709711 6.394395 51.31028 5.091596
## 1 7.1314756  1.201581 15.271265 0.08695652 0.6384190 6.174874 85.83953 2.498489
##         rad      tax  ptratio    black     lstat     medv
## 0  4.158103 305.7431 17.90711 388.7061  9.419486 24.94941
## 1 14.940711 510.7312 19.00395 324.6420 15.886640 20.11621
## 
## Coefficients of linear discriminants:
##                   LD1
## crim     0.0046376592
## zn      -0.0056431194
## indus    0.0126159626
## chas    -0.0592836851
## nox      8.1826206579
## rm       0.0874007870
## age      0.0112829040
## dis      0.0453643651
## rad      0.0699133176
## tax     -0.0008444666
## ptratio  0.0513806507
## black   -0.0009892799
## lstat    0.0143945059
## medv     0.0386990631

# Load required library
library(e1071)

# Naive Bayes
naive_bayes_model <- naiveBayes(as.factor(crime_rate_above_median) ~ ., data = Boston)
naive_bayes_model

## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##   0   1 
## 0.5 0.5 
## 
## Conditional probabilities:
##    crim
## Y        [,1]        [,2]
##   0 0.0955715  0.06281773
##   1 7.1314756 11.10912294
## 
##    zn
## Y        [,1]      [,2]
##   0 21.525692 29.319808
##   1  1.201581  4.798611
## 
##    indus
## Y        [,1]     [,2]
##   0  7.002292 5.514454
##   1 15.271265 5.439010
## 
##    chas
## Y         [,1]      [,2]
##   0 0.05138340 0.2212161
##   1 0.08695652 0.2823299
## 
##    nox
## Y        [,1]       [,2]
##   0 0.4709711 0.05559789
##   1 0.6384190 0.09870365
## 
##    rm
## Y       [,1]      [,2]
##   0 6.394395 0.5556856
##   1 6.174874 0.8101381
## 
##    age
## Y       [,1]     [,2]
##   0 51.31028 25.88190
##   1 85.83953 17.87423
## 
##    dis
## Y       [,1]     [,2]
##   0 5.091596 2.081304
##   1 2.498489 1.085521
## 
##    rad
## Y        [,1]     [,2]
##   0  4.158103 1.659121
##   1 14.940711 9.529843
## 
##    tax
## Y       [,1]     [,2]
##   0 305.7431  87.4837
##   1 510.7312 167.8553
## 
##    ptratio
## Y       [,1]     [,2]
##   0 17.90711 1.811216
##   1 19.00395 2.346947
## 
##    black
## Y       [,1]      [,2]
##   0 388.7061  22.83774
##   1 324.6420 118.83084
## 
##    lstat
## Y        [,1]     [,2]
##   0  9.419486 4.923497
##   1 15.886640 7.546922
## 
##    medv
## Y       [,1]      [,2]
##   0 24.94941  7.232047
##   1 20.11621 10.270362

# Load required library
library(class)

# KNN
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(Boston), 0.7*nrow(Boston)) # 70% for training
train_data <- Boston[train_indices, ]
test_data <- Boston[-train_indices, ]
knn_model <- knn(train_data[, -1], test_data[, -1], train_data$crime_rate_above_median, k = 5)
table(knn_model, test_data$crime_rate_above_median)

##          
## knn_model  0  1
##         0 69  5
##         1  6 72

#Question3 
# Load libraries
library(quantmod)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## Loading required package: TTR

## 
## Attaching package: 'TTR'

## The following object is masked from 'package:dials':
## 
##     momentum

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(TTR)
library(xts)

# Download S&P500 data from Yahoo Finance
getSymbols("^GSPC", src = "yahoo", from = "2019-01-01", to = "2023-12-31")

## [1] "GSPC"

d_sp500 = Cl(GSPC)  # select close prices
colnames(d_sp500) = "Price"

# SMA
sma5 = lag(SMA(d_sp500, n = 5))  # notice the use of the lag function to take lagged values
# EMA
ema5 = lag(EMA(d_sp500, n = 5))
# MACD
macd1 = lag(MACD(d_sp500))
# RSI
rsi1 = lag(RSI(d_sp500, 5))
# log returns
ret1 = lag(dailyReturn(d_sp500, type = "log"))

# price direction indicator
dir = ifelse(d_sp500$Price >= lag(d_sp500$Price, 5), 1, 0)  # direction variable compared to 5 day before price

# Combine all the indicators and response variable in a data frame
d_ex1 = cbind(dir, ret1, sma5, ema5, macd1, rsi1)

# Change column names
colnames(d_ex1) = c("Direction", "Ret", "SMA", "EMA", "MACD", "Signal", "RSI")
# Using the quantmod package
chartSeries(d_sp500, theme = "white", name = "S&P500 Closing Prices and Indicators")

addTA(d_ex1[, 1], col = 1, legend = "Direction")  # Direction

# Plot each indicator separately
chartSeries(d_sp500, theme = "white", name = "S&P500 Closing Prices and Ret")

addTA(d_ex1[, "Ret"], on = NA, col = 2, legend = "Ret")

chartSeries(d_sp500, theme = "white", name = "S&P500 Closing Prices and SMA")

addTA(d_ex1[, "SMA"], on = NA, col = 3, legend = "SMA")

chartSeries(d_sp500, theme = "white", name = "S&P500 Closing Prices and EMA")

addTA(d_ex1[, "EMA"], on = NA, col = 4, legend = "EMA")

chartSeries(d_sp500, theme = "white", name = "S&P500 Closing Prices and MACD")

addTA(d_ex1[, "MACD"], on = NA, col = 5, legend = "MACD")

chartSeries(d_sp500, theme = "white", name = "S&P500 Closing Prices and Signal")

addTA(d_ex1[, "Signal"], on = NA, col = 6, legend = "Signal")

chartSeries(d_sp500, theme = "white", name = "S&P500 Closing Prices and RSI")

addTA(d_ex1[, "RSI"], on = NA, col = 7, legend = "RSI")

library(tidyr)
library(ggplot2)

# Create a dataset and convert data to long
d_plot = merge.xts(d_sp500, d_ex1)

# Remove NAs and then convert to long
d_plot = na.omit(d_plot)

# Convert to dataframe
d_plot = data.frame(Date = index(d_plot), coredata(d_plot))
d_plot_long = pivot_longer(d_plot, -c(Date, Direction), values_to = "value", names_to = "Indicator")

# Change direction to a factor
d_plot_long$Direction = as.factor(d_plot_long$Direction)

# Plot
(p2_ex = ggplot(d_plot_long, aes(Date, value, color = Indicator)) + geom_path(stat = "identity") +
    facet_grid(Indicator ~ ., scale = "free") + theme_minimal())

p2_ex = ggplot(d_plot_long, aes(value, Indicator, fill = Direction)) +
  geom_boxplot()

p2_ex + theme_minimal() + labs(title = "TA Indicators vs Price Direction") +
  scale_fill_manual(name = "Price Direction", values = c("orange", "lightblue"))

# remove NAs
d_ex1 = na.omit(d_ex1)
# convert to data frame
d_ex1 = as.data.frame(d_ex1)
# convert direction to a factor for classification

d_ex1$Direction = as.factor(d_ex1$Direction)

idx1 = c(1:round(nrow(d_ex1) * 0.7))  #create index for first 70% values to be in the testing set
d_train1 = d_ex1[idx1, ]  #training set
d_test1 = d_ex1[-idx1, ]  #testing set


library(caret)
set.seed(999)
# control
cntrl1 = trainControl(method = "timeslice", initialWindow = 250, horizon = 30,
                      fixedWindow = TRUE)
# preprocesing
prep1 = c("center", "scale")
# logistic regression
logit_ex1 = train(Direction ~ ., data = d_train1, method = "glm", family = "binomial",
                  trControl = cntrl1, preProcess = prep1)
logit_ex1  #final model accuracy

## Generalized Linear Model 
## 
## 857 samples
##   6 predictor
##   2 classes: '0', '1' 
## 
## Pre-processing: centered (6), scaled (6) 
## Resampling: Rolling Forecasting Origin Resampling (30 held-out with a fixed window) 
## Summary of sample sizes: 250, 250, 250, 250, 250, 250, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8247982  0.6095506

summary(logit_ex1$finalModel)  #summary of the final model

## 
## Call:
## NULL
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.6702     0.1042   6.434 1.24e-10 ***
## Ret          -0.1949     0.1315  -1.482   0.1384    
## SMA         -66.6941     9.2266  -7.228 4.88e-13 ***
## EMA          66.5705     9.2163   7.223 5.08e-13 ***
## MACD          1.3864     0.5452   2.543   0.0110 *  
## Signal       -1.4401     0.4976  -2.894   0.0038 ** 
## RSI           1.7669     0.2008   8.800  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1143.29  on 856  degrees of freedom
## Residual deviance:  612.22  on 850  degrees of freedom
## AIC: 626.22
## 
## Number of Fisher Scoring iterations: 6

library(vip)
vip(logit_ex1, geom = "point") + theme_minimal()

pred1 = predict(logit_ex1, newdata = d_test1)  #prediction on the test data
confusionMatrix(data = pred1, reference = d_test1$Direction)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 119  30
##          1  34 184
##                                          
##                Accuracy : 0.8256         
##                  95% CI : (0.7828, 0.863)
##     No Information Rate : 0.5831         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.64           
##                                          
##  Mcnemar's Test P-Value : 0.7077         
##                                          
##             Sensitivity : 0.7778         
##             Specificity : 0.8598         
##          Pos Pred Value : 0.7987         
##          Neg Pred Value : 0.8440         
##              Prevalence : 0.4169         
##          Detection Rate : 0.3243         
##    Detection Prevalence : 0.4060         
##       Balanced Accuracy : 0.8188         
##                                          
##        'Positive' Class : 0              
##

set.seed(999)
grid1 = expand.grid(k = seq(1, 10, by = 2))  #to search from k=1 to k=10 
knn_ex1 = train(Direction ~ ., data = d_train1, method = "knn", tuneGrid = grid1,
                trControl = cntrl1, preProcess = prep1)
plot(knn_ex1)  #may suggest using a wider grid

knn_ex1

## k-Nearest Neighbors 
## 
## 857 samples
##   6 predictor
##   2 classes: '0', '1' 
## 
## Pre-processing: centered (6), scaled (6) 
## Resampling: Rolling Forecasting Origin Resampling (30 held-out with a fixed window) 
## Summary of sample sizes: 250, 250, 250, 250, 250, 250, ... 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   1  0.7140138  0.3908750
##   3  0.7618800  0.4603220
##   5  0.7806805  0.4988542
##   7  0.7709343  0.4781111
##   9  0.7817186  0.5083004
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.

pred2 = predict(knn_ex1, newdata = d_test1)  #prediction on the test data
confusionMatrix(data = pred2, reference = d_test1$Direction)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 111  36
##          1  42 178
##                                          
##                Accuracy : 0.7875         
##                  95% CI : (0.742, 0.8282)
##     No Information Rate : 0.5831         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.5604         
##                                          
##  Mcnemar's Test P-Value : 0.5713         
##                                          
##             Sensitivity : 0.7255         
##             Specificity : 0.8318         
##          Pos Pred Value : 0.7551         
##          Neg Pred Value : 0.8091         
##              Prevalence : 0.4169         
##          Detection Rate : 0.3025         
##    Detection Prevalence : 0.4005         
##       Balanced Accuracy : 0.7786         
##                                          
##        'Positive' Class : 0              
##

resamp1 = resamples(list(logit = logit_ex1, knn = knn_ex1))

summary(resamp1)

## 
## Call:
## summary.resamples(object = resamp1)
## 
## Models: logit, knn 
## Number of resamples: 578 
## 
## Accuracy 
##            Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## logit 0.5333333 0.7666667 0.8333333 0.8247982 0.9000000 0.9666667    0
## knn   0.5000000 0.7333333 0.8000000 0.7817186 0.8666667 0.9666667    0
## 
## Kappa 
##                Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## logit -3.700743e-16 0.5240789 0.6318880 0.6095506 0.7263301 0.9180328    0
## knn    5.797101e-02 0.4000000 0.5263158 0.5083004 0.6363636 0.8695652    0

bwplot(resamp1, metric = "Accuracy")

mid term

Maralgua

2024-04-25