Final_financial

Question 1: Working with Boston Dataset:

Using the Boston dataset, fit classification models in order to predict whether a given suburb has a crime rate above or below the median. Explore logistic regression, LDA, naive Bayes, and KNN models using various subsets of the predictors. Describe your findings.

boston <- Boston

Explore Descriptive Data

boston <- boston %>%
    mutate(chas = factor(chas),
           crime_factor = factor(ifelse(crim > median(crim), 
                                              'High', 'Low'), 
                                       levels = c('High', 'Low')))
kbl(boston, caption = "Boston data with classification by crime rate factor")%>%
  row_spec(row =0, bold= TRUE, color = "black", background = "#F9EBEA") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "center")

Boston data with classification by crime rate factor
crim	zn	indus	chas	nox	rm	age	dis	rad	tax	ptratio	black	lstat	medv	crime_factor
0.00632	18.0	2.31	0	0.5380	6.575	65.2	4.0900	1	296	15.3	396.90	4.98	24.0	Low
0.02731	0.0	7.07	0	0.4690	6.421	78.9	4.9671	2	242	17.8	396.90	9.14	21.6	Low
0.02729	0.0	7.07	0	0.4690	7.185	61.1	4.9671	2	242	17.8	392.83	4.03	34.7	Low
0.03237	0.0	2.18	0	0.4580	6.998	45.8	6.0622	3	222	18.7	394.63	2.94	33.4	Low
0.06905	0.0	2.18	0	0.4580	7.147	54.2	6.0622	3	222	18.7	396.90	5.33	36.2	Low
0.02985	0.0	2.18	0	0.4580	6.430	58.7	6.0622	3	222	18.7	394.12	5.21	28.7	Low
0.08829	12.5	7.87	0	0.5240	6.012	66.6	5.5605	5	311	15.2	395.60	12.43	22.9	Low
0.14455	12.5	7.87	0	0.5240	6.172	96.1	5.9505	5	311	15.2	396.90	19.15	27.1	Low
0.21124	12.5	7.87	0	0.5240	5.631	100.0	6.0821	5	311	15.2	386.63	29.93	16.5	Low
0.17004	12.5	7.87	0	0.5240	6.004	85.9	6.5921	5	311	15.2	386.71	17.10	18.9	Low
0.22489	12.5	7.87	0	0.5240	6.377	94.3	6.3467	5	311	15.2	392.52	20.45	15.0	Low
0.11747	12.5	7.87	0	0.5240	6.009	82.9	6.2267	5	311	15.2	396.90	13.27	18.9	Low
0.09378	12.5	7.87	0	0.5240	5.889	39.0	5.4509	5	311	15.2	390.50	15.71	21.7	Low
0.62976	0.0	8.14	0	0.5380	5.949	61.8	4.7075	4	307	21.0	396.90	8.26	20.4	High
0.63796	0.0	8.14	0	0.5380	6.096	84.5	4.4619	4	307	21.0	380.02	10.26	18.2	High
0.62739	0.0	8.14	0	0.5380	5.834	56.5	4.4986	4	307	21.0	395.62	8.47	19.9	High
1.05393	0.0	8.14	0	0.5380	5.935	29.3	4.4986	4	307	21.0	386.85	6.58	23.1	High
0.78420	0.0	8.14	0	0.5380	5.990	81.7	4.2579	4	307	21.0	386.75	14.67	17.5	High
0.80271	0.0	8.14	0	0.5380	5.456	36.6	3.7965	4	307	21.0	288.99	11.69	20.2	High
0.72580	0.0	8.14	0	0.5380	5.727	69.5	3.7965	4	307	21.0	390.95	11.28	18.2	High
1.25179	0.0	8.14	0	0.5380	5.570	98.1	3.7979	4	307	21.0	376.57	21.02	13.6	High
0.85204	0.0	8.14	0	0.5380	5.965	89.2	4.0123	4	307	21.0	392.53	13.83	19.6	High
1.23247	0.0	8.14	0	0.5380	6.142	91.7	3.9769	4	307	21.0	396.90	18.72	15.2	High
0.98843	0.0	8.14	0	0.5380	5.813	100.0	4.0952	4	307	21.0	394.54	19.88	14.5	High
0.75026	0.0	8.14	0	0.5380	5.924	94.1	4.3996	4	307	21.0	394.33	16.30	15.6	High
0.84054	0.0	8.14	0	0.5380	5.599	85.7	4.4546	4	307	21.0	303.42	16.51	13.9	High
0.67191	0.0	8.14	0	0.5380	5.813	90.3	4.6820	4	307	21.0	376.88	14.81	16.6	High
0.95577	0.0	8.14	0	0.5380	6.047	88.8	4.4534	4	307	21.0	306.38	17.28	14.8	High
0.77299	0.0	8.14	0	0.5380	6.495	94.4	4.4547	4	307	21.0	387.94	12.80	18.4	High
1.00245	0.0	8.14	0	0.5380	6.674	87.3	4.2390	4	307	21.0	380.23	11.98	21.0	High
1.13081	0.0	8.14	0	0.5380	5.713	94.1	4.2330	4	307	21.0	360.17	22.60	12.7	High
1.35472	0.0	8.14	0	0.5380	6.072	100.0	4.1750	4	307	21.0	376.73	13.04	14.5	High
1.38799	0.0	8.14	0	0.5380	5.950	82.0	3.9900	4	307	21.0	232.60	27.71	13.2	High
1.15172	0.0	8.14	0	0.5380	5.701	95.0	3.7872	4	307	21.0	358.77	18.35	13.1	High
1.61282	0.0	8.14	0	0.5380	6.096	96.9	3.7598	4	307	21.0	248.31	20.34	13.5	High
0.06417	0.0	5.96	0	0.4990	5.933	68.2	3.3603	5	279	19.2	396.90	9.68	18.9	Low
0.09744	0.0	5.96	0	0.4990	5.841	61.4	3.3779	5	279	19.2	377.56	11.41	20.0	Low
0.08014	0.0	5.96	0	0.4990	5.850	41.5	3.9342	5	279	19.2	396.90	8.77	21.0	Low
0.17505	0.0	5.96	0	0.4990	5.966	30.2	3.8473	5	279	19.2	393.43	10.13	24.7	Low
0.02763	75.0	2.95	0	0.4280	6.595	21.8	5.4011	3	252	18.3	395.63	4.32	30.8	Low
0.03359	75.0	2.95	0	0.4280	7.024	15.8	5.4011	3	252	18.3	395.62	1.98	34.9	Low
0.12744	0.0	6.91	0	0.4480	6.770	2.9	5.7209	3	233	17.9	385.41	4.84	26.6	Low
0.14150	0.0	6.91	0	0.4480	6.169	6.6	5.7209	3	233	17.9	383.37	5.81	25.3	Low
0.15936	0.0	6.91	0	0.4480	6.211	6.5	5.7209	3	233	17.9	394.46	7.44	24.7	Low
0.12269	0.0	6.91	0	0.4480	6.069	40.0	5.7209	3	233	17.9	389.39	9.55	21.2	Low
0.17142	0.0	6.91	0	0.4480	5.682	33.8	5.1004	3	233	17.9	396.90	10.21	19.3	Low
0.18836	0.0	6.91	0	0.4480	5.786	33.3	5.1004	3	233	17.9	396.90	14.15	20.0	Low
0.22927	0.0	6.91	0	0.4480	6.030	85.5	5.6894	3	233	17.9	392.74	18.80	16.6	Low
0.25387	0.0	6.91	0	0.4480	5.399	95.3	5.8700	3	233	17.9	396.90	30.81	14.4	Low
0.21977	0.0	6.91	0	0.4480	5.602	62.0	6.0877	3	233	17.9	396.90	16.20	19.4	Low
0.08873	21.0	5.64	0	0.4390	5.963	45.7	6.8147	4	243	16.8	395.56	13.45	19.7	Low
0.04337	21.0	5.64	0	0.4390	6.115	63.0	6.8147	4	243	16.8	393.97	9.43	20.5	Low
0.05360	21.0	5.64	0	0.4390	6.511	21.1	6.8147	4	243	16.8	396.90	5.28	25.0	Low
0.04981	21.0	5.64	0	0.4390	5.998	21.4	6.8147	4	243	16.8	396.90	8.43	23.4	Low
0.01360	75.0	4.00	0	0.4100	5.888	47.6	7.3197	3	469	21.1	396.90	14.80	18.9	Low
0.01311	90.0	1.22	0	0.4030	7.249	21.9	8.6966	5	226	17.9	395.93	4.81	35.4	Low
0.02055	85.0	0.74	0	0.4100	6.383	35.7	9.1876	2	313	17.3	396.90	5.77	24.7	Low
0.01432	100.0	1.32	0	0.4110	6.816	40.5	8.3248	5	256	15.1	392.90	3.95	31.6	Low
0.15445	25.0	5.13	0	0.4530	6.145	29.2	7.8148	8	284	19.7	390.68	6.86	23.3	Low
0.10328	25.0	5.13	0	0.4530	5.927	47.2	6.9320	8	284	19.7	396.90	9.22	19.6	Low
0.14932	25.0	5.13	0	0.4530	5.741	66.2	7.2254	8	284	19.7	395.11	13.15	18.7	Low
0.17171	25.0	5.13	0	0.4530	5.966	93.4	6.8185	8	284	19.7	378.08	14.44	16.0	Low
0.11027	25.0	5.13	0	0.4530	6.456	67.8	7.2255	8	284	19.7	396.90	6.73	22.2	Low
0.12650	25.0	5.13	0	0.4530	6.762	43.4	7.9809	8	284	19.7	395.58	9.50	25.0	Low
0.01951	17.5	1.38	0	0.4161	7.104	59.5	9.2229	3	216	18.6	393.24	8.05	33.0	Low
0.03584	80.0	3.37	0	0.3980	6.290	17.8	6.6115	4	337	16.1	396.90	4.67	23.5	Low
0.04379	80.0	3.37	0	0.3980	5.787	31.1	6.6115	4	337	16.1	396.90	10.24	19.4	Low
0.05789	12.5	6.07	0	0.4090	5.878	21.4	6.4980	4	345	18.9	396.21	8.10	22.0	Low
0.13554	12.5	6.07	0	0.4090	5.594	36.8	6.4980	4	345	18.9	396.90	13.09	17.4	Low
0.12816	12.5	6.07	0	0.4090	5.885	33.0	6.4980	4	345	18.9	396.90	8.79	20.9	Low
0.08826	0.0	10.81	0	0.4130	6.417	6.6	5.2873	4	305	19.2	383.73	6.72	24.2	Low
0.15876	0.0	10.81	0	0.4130	5.961	17.5	5.2873	4	305	19.2	376.94	9.88	21.7	Low
0.09164	0.0	10.81	0	0.4130	6.065	7.8	5.2873	4	305	19.2	390.91	5.52	22.8	Low
0.19539	0.0	10.81	0	0.4130	6.245	6.2	5.2873	4	305	19.2	377.17	7.54	23.4	Low
0.07896	0.0	12.83	0	0.4370	6.273	6.0	4.2515	5	398	18.7	394.92	6.78	24.1	Low
0.09512	0.0	12.83	0	0.4370	6.286	45.0	4.5026	5	398	18.7	383.23	8.94	21.4	Low
0.10153	0.0	12.83	0	0.4370	6.279	74.5	4.0522	5	398	18.7	373.66	11.97	20.0	Low
0.08707	0.0	12.83	0	0.4370	6.140	45.8	4.0905	5	398	18.7	386.96	10.27	20.8	Low
0.05646	0.0	12.83	0	0.4370	6.232	53.7	5.0141	5	398	18.7	386.40	12.34	21.2	Low
0.08387	0.0	12.83	0	0.4370	5.874	36.6	4.5026	5	398	18.7	396.06	9.10	20.3	Low
0.04113	25.0	4.86	0	0.4260	6.727	33.5	5.4007	4	281	19.0	396.90	5.29	28.0	Low
0.04462	25.0	4.86	0	0.4260	6.619	70.4	5.4007	4	281	19.0	395.63	7.22	23.9	Low
0.03659	25.0	4.86	0	0.4260	6.302	32.2	5.4007	4	281	19.0	396.90	6.72	24.8	Low
0.03551	25.0	4.86	0	0.4260	6.167	46.7	5.4007	4	281	19.0	390.64	7.51	22.9	Low
0.05059	0.0	4.49	0	0.4490	6.389	48.0	4.7794	3	247	18.5	396.90	9.62	23.9	Low
0.05735	0.0	4.49	0	0.4490	6.630	56.1	4.4377	3	247	18.5	392.30	6.53	26.6	Low
0.05188	0.0	4.49	0	0.4490	6.015	45.1	4.4272	3	247	18.5	395.99	12.86	22.5	Low
0.07151	0.0	4.49	0	0.4490	6.121	56.8	3.7476	3	247	18.5	395.15	8.44	22.2	Low
0.05660	0.0	3.41	0	0.4890	7.007	86.3	3.4217	2	270	17.8	396.90	5.50	23.6	Low
0.05302	0.0	3.41	0	0.4890	7.079	63.1	3.4145	2	270	17.8	396.06	5.70	28.7	Low
0.04684	0.0	3.41	0	0.4890	6.417	66.1	3.0923	2	270	17.8	392.18	8.81	22.6	Low
0.03932	0.0	3.41	0	0.4890	6.405	73.9	3.0921	2	270	17.8	393.55	8.20	22.0	Low
0.04203	28.0	15.04	0	0.4640	6.442	53.6	3.6659	4	270	18.2	395.01	8.16	22.9	Low
0.02875	28.0	15.04	0	0.4640	6.211	28.9	3.6659	4	270	18.2	396.33	6.21	25.0	Low
0.04294	28.0	15.04	0	0.4640	6.249	77.3	3.6150	4	270	18.2	396.90	10.59	20.6	Low
0.12204	0.0	2.89	0	0.4450	6.625	57.8	3.4952	2	276	18.0	357.98	6.65	28.4	Low
0.11504	0.0	2.89	0	0.4450	6.163	69.6	3.4952	2	276	18.0	391.83	11.34	21.4	Low
0.12083	0.0	2.89	0	0.4450	8.069	76.0	3.4952	2	276	18.0	396.90	4.21	38.7	Low
0.08187	0.0	2.89	0	0.4450	7.820	36.9	3.4952	2	276	18.0	393.53	3.57	43.8	Low
0.06860	0.0	2.89	0	0.4450	7.416	62.5	3.4952	2	276	18.0	396.90	6.19	33.2	Low
0.14866	0.0	8.56	0	0.5200	6.727	79.9	2.7778	5	384	20.9	394.76	9.42	27.5	Low
0.11432	0.0	8.56	0	0.5200	6.781	71.3	2.8561	5	384	20.9	395.58	7.67	26.5	Low
0.22876	0.0	8.56	0	0.5200	6.405	85.4	2.7147	5	384	20.9	70.80	10.63	18.6	Low
0.21161	0.0	8.56	0	0.5200	6.137	87.4	2.7147	5	384	20.9	394.47	13.44	19.3	Low
0.13960	0.0	8.56	0	0.5200	6.167	90.0	2.4210	5	384	20.9	392.69	12.33	20.1	Low
0.13262	0.0	8.56	0	0.5200	5.851	96.7	2.1069	5	384	20.9	394.05	16.47	19.5	Low
0.17120	0.0	8.56	0	0.5200	5.836	91.9	2.2110	5	384	20.9	395.67	18.66	19.5	Low
0.13117	0.0	8.56	0	0.5200	6.127	85.2	2.1224	5	384	20.9	387.69	14.09	20.4	Low
0.12802	0.0	8.56	0	0.5200	6.474	97.1	2.4329	5	384	20.9	395.24	12.27	19.8	Low
0.26363	0.0	8.56	0	0.5200	6.229	91.2	2.5451	5	384	20.9	391.23	15.55	19.4	High
0.10793	0.0	8.56	0	0.5200	6.195	54.4	2.7778	5	384	20.9	393.49	13.00	21.7	Low
0.10084	0.0	10.01	0	0.5470	6.715	81.6	2.6775	6	432	17.8	395.59	10.16	22.8	Low
0.12329	0.0	10.01	0	0.5470	5.913	92.9	2.3534	6	432	17.8	394.95	16.21	18.8	Low
0.22212	0.0	10.01	0	0.5470	6.092	95.4	2.5480	6	432	17.8	396.90	17.09	18.7	Low
0.14231	0.0	10.01	0	0.5470	6.254	84.2	2.2565	6	432	17.8	388.74	10.45	18.5	Low
0.17134	0.0	10.01	0	0.5470	5.928	88.2	2.4631	6	432	17.8	344.91	15.76	18.3	Low
0.13158	0.0	10.01	0	0.5470	6.176	72.5	2.7301	6	432	17.8	393.30	12.04	21.2	Low
0.15098	0.0	10.01	0	0.5470	6.021	82.6	2.7474	6	432	17.8	394.51	10.30	19.2	Low
0.13058	0.0	10.01	0	0.5470	5.872	73.1	2.4775	6	432	17.8	338.63	15.37	20.4	Low
0.14476	0.0	10.01	0	0.5470	5.731	65.2	2.7592	6	432	17.8	391.50	13.61	19.3	Low
0.06899	0.0	25.65	0	0.5810	5.870	69.7	2.2577	2	188	19.1	389.15	14.37	22.0	Low
0.07165	0.0	25.65	0	0.5810	6.004	84.1	2.1974	2	188	19.1	377.67	14.27	20.3	Low
0.09299	0.0	25.65	0	0.5810	5.961	92.9	2.0869	2	188	19.1	378.09	17.93	20.5	Low
0.15038	0.0	25.65	0	0.5810	5.856	97.0	1.9444	2	188	19.1	370.31	25.41	17.3	Low
0.09849	0.0	25.65	0	0.5810	5.879	95.8	2.0063	2	188	19.1	379.38	17.58	18.8	Low
0.16902	0.0	25.65	0	0.5810	5.986	88.4	1.9929	2	188	19.1	385.02	14.81	21.4	Low
0.38735	0.0	25.65	0	0.5810	5.613	95.6	1.7572	2	188	19.1	359.29	27.26	15.7	High
0.25915	0.0	21.89	0	0.6240	5.693	96.0	1.7883	4	437	21.2	392.11	17.19	16.2	High
0.32543	0.0	21.89	0	0.6240	6.431	98.8	1.8125	4	437	21.2	396.90	15.39	18.0	High
0.88125	0.0	21.89	0	0.6240	5.637	94.7	1.9799	4	437	21.2	396.90	18.34	14.3	High
0.34006	0.0	21.89	0	0.6240	6.458	98.9	2.1185	4	437	21.2	395.04	12.60	19.2	High
1.19294	0.0	21.89	0	0.6240	6.326	97.7	2.2710	4	437	21.2	396.90	12.26	19.6	High
0.59005	0.0	21.89	0	0.6240	6.372	97.9	2.3274	4	437	21.2	385.76	11.12	23.0	High
0.32982	0.0	21.89	0	0.6240	5.822	95.4	2.4699	4	437	21.2	388.69	15.03	18.4	High
0.97617	0.0	21.89	0	0.6240	5.757	98.4	2.3460	4	437	21.2	262.76	17.31	15.6	High
0.55778	0.0	21.89	0	0.6240	6.335	98.2	2.1107	4	437	21.2	394.67	16.96	18.1	High
0.32264	0.0	21.89	0	0.6240	5.942	93.5	1.9669	4	437	21.2	378.25	16.90	17.4	High
0.35233	0.0	21.89	0	0.6240	6.454	98.4	1.8498	4	437	21.2	394.08	14.59	17.1	High
0.24980	0.0	21.89	0	0.6240	5.857	98.2	1.6686	4	437	21.2	392.04	21.32	13.3	Low
0.54452	0.0	21.89	0	0.6240	6.151	97.9	1.6687	4	437	21.2	396.90	18.46	17.8	High
0.29090	0.0	21.89	0	0.6240	6.174	93.6	1.6119	4	437	21.2	388.08	24.16	14.0	High
1.62864	0.0	21.89	0	0.6240	5.019	100.0	1.4394	4	437	21.2	396.90	34.41	14.4	High
3.32105	0.0	19.58	1	0.8710	5.403	100.0	1.3216	5	403	14.7	396.90	26.82	13.4	High
4.09740	0.0	19.58	0	0.8710	5.468	100.0	1.4118	5	403	14.7	396.90	26.42	15.6	High
2.77974	0.0	19.58	0	0.8710	4.903	97.8	1.3459	5	403	14.7	396.90	29.29	11.8	High
2.37934	0.0	19.58	0	0.8710	6.130	100.0	1.4191	5	403	14.7	172.91	27.80	13.8	High
2.15505	0.0	19.58	0	0.8710	5.628	100.0	1.5166	5	403	14.7	169.27	16.65	15.6	High
2.36862	0.0	19.58	0	0.8710	4.926	95.7	1.4608	5	403	14.7	391.71	29.53	14.6	High
2.33099	0.0	19.58	0	0.8710	5.186	93.8	1.5296	5	403	14.7	356.99	28.32	17.8	High
2.73397	0.0	19.58	0	0.8710	5.597	94.9	1.5257	5	403	14.7	351.85	21.45	15.4	High
1.65660	0.0	19.58	0	0.8710	6.122	97.3	1.6180	5	403	14.7	372.80	14.10	21.5	High
1.49632	0.0	19.58	0	0.8710	5.404	100.0	1.5916	5	403	14.7	341.60	13.28	19.6	High
1.12658	0.0	19.58	1	0.8710	5.012	88.0	1.6102	5	403	14.7	343.28	12.12	15.3	High
2.14918	0.0	19.58	0	0.8710	5.709	98.5	1.6232	5	403	14.7	261.95	15.79	19.4	High
1.41385	0.0	19.58	1	0.8710	6.129	96.0	1.7494	5	403	14.7	321.02	15.12	17.0	High
3.53501	0.0	19.58	1	0.8710	6.152	82.6	1.7455	5	403	14.7	88.01	15.02	15.6	High
2.44668	0.0	19.58	0	0.8710	5.272	94.0	1.7364	5	403	14.7	88.63	16.14	13.1	High
1.22358	0.0	19.58	0	0.6050	6.943	97.4	1.8773	5	403	14.7	363.43	4.59	41.3	High
1.34284	0.0	19.58	0	0.6050	6.066	100.0	1.7573	5	403	14.7	353.89	6.43	24.3	High
1.42502	0.0	19.58	0	0.8710	6.510	100.0	1.7659	5	403	14.7	364.31	7.39	23.3	High
1.27346	0.0	19.58	1	0.6050	6.250	92.6	1.7984	5	403	14.7	338.92	5.50	27.0	High
1.46336	0.0	19.58	0	0.6050	7.489	90.8	1.9709	5	403	14.7	374.43	1.73	50.0	High
1.83377	0.0	19.58	1	0.6050	7.802	98.2	2.0407	5	403	14.7	389.61	1.92	50.0	High
1.51902	0.0	19.58	1	0.6050	8.375	93.9	2.1620	5	403	14.7	388.45	3.32	50.0	High
2.24236	0.0	19.58	0	0.6050	5.854	91.8	2.4220	5	403	14.7	395.11	11.64	22.7	High
2.92400	0.0	19.58	0	0.6050	6.101	93.0	2.2834	5	403	14.7	240.16	9.81	25.0	High
2.01019	0.0	19.58	0	0.6050	7.929	96.2	2.0459	5	403	14.7	369.30	3.70	50.0	High
1.80028	0.0	19.58	0	0.6050	5.877	79.2	2.4259	5	403	14.7	227.61	12.14	23.8	High
2.30040	0.0	19.58	0	0.6050	6.319	96.1	2.1000	5	403	14.7	297.09	11.10	23.8	High
2.44953	0.0	19.58	0	0.6050	6.402	95.2	2.2625	5	403	14.7	330.04	11.32	22.3	High
1.20742	0.0	19.58	0	0.6050	5.875	94.6	2.4259	5	403	14.7	292.29	14.43	17.4	High
2.31390	0.0	19.58	0	0.6050	5.880	97.3	2.3887	5	403	14.7	348.13	12.03	19.1	High
0.13914	0.0	4.05	0	0.5100	5.572	88.5	2.5961	5	296	16.6	396.90	14.69	23.1	Low
0.09178	0.0	4.05	0	0.5100	6.416	84.1	2.6463	5	296	16.6	395.50	9.04	23.6	Low
0.08447	0.0	4.05	0	0.5100	5.859	68.7	2.7019	5	296	16.6	393.23	9.64	22.6	Low
0.06664	0.0	4.05	0	0.5100	6.546	33.1	3.1323	5	296	16.6	390.96	5.33	29.4	Low
0.07022	0.0	4.05	0	0.5100	6.020	47.2	3.5549	5	296	16.6	393.23	10.11	23.2	Low
0.05425	0.0	4.05	0	0.5100	6.315	73.4	3.3175	5	296	16.6	395.60	6.29	24.6	Low
0.06642	0.0	4.05	0	0.5100	6.860	74.4	2.9153	5	296	16.6	391.27	6.92	29.9	Low
0.05780	0.0	2.46	0	0.4880	6.980	58.4	2.8290	3	193	17.8	396.90	5.04	37.2	Low
0.06588	0.0	2.46	0	0.4880	7.765	83.3	2.7410	3	193	17.8	395.56	7.56	39.8	Low
0.06888	0.0	2.46	0	0.4880	6.144	62.2	2.5979	3	193	17.8	396.90	9.45	36.2	Low
0.09103	0.0	2.46	0	0.4880	7.155	92.2	2.7006	3	193	17.8	394.12	4.82	37.9	Low
0.10008	0.0	2.46	0	0.4880	6.563	95.6	2.8470	3	193	17.8	396.90	5.68	32.5	Low
0.08308	0.0	2.46	0	0.4880	5.604	89.8	2.9879	3	193	17.8	391.00	13.98	26.4	Low
0.06047	0.0	2.46	0	0.4880	6.153	68.8	3.2797	3	193	17.8	387.11	13.15	29.6	Low
0.05602	0.0	2.46	0	0.4880	7.831	53.6	3.1992	3	193	17.8	392.63	4.45	50.0	Low
0.07875	45.0	3.44	0	0.4370	6.782	41.1	3.7886	5	398	15.2	393.87	6.68	32.0	Low
0.12579	45.0	3.44	0	0.4370	6.556	29.1	4.5667	5	398	15.2	382.84	4.56	29.8	Low
0.08370	45.0	3.44	0	0.4370	7.185	38.9	4.5667	5	398	15.2	396.90	5.39	34.9	Low
0.09068	45.0	3.44	0	0.4370	6.951	21.5	6.4798	5	398	15.2	377.68	5.10	37.0	Low
0.06911	45.0	3.44	0	0.4370	6.739	30.8	6.4798	5	398	15.2	389.71	4.69	30.5	Low
0.08664	45.0	3.44	0	0.4370	7.178	26.3	6.4798	5	398	15.2	390.49	2.87	36.4	Low
0.02187	60.0	2.93	0	0.4010	6.800	9.9	6.2196	1	265	15.6	393.37	5.03	31.1	Low
0.01439	60.0	2.93	0	0.4010	6.604	18.8	6.2196	1	265	15.6	376.70	4.38	29.1	Low
0.01381	80.0	0.46	0	0.4220	7.875	32.0	5.6484	4	255	14.4	394.23	2.97	50.0	Low
0.04011	80.0	1.52	0	0.4040	7.287	34.1	7.3090	2	329	12.6	396.90	4.08	33.3	Low
0.04666	80.0	1.52	0	0.4040	7.107	36.6	7.3090	2	329	12.6	354.31	8.61	30.3	Low
0.03768	80.0	1.52	0	0.4040	7.274	38.3	7.3090	2	329	12.6	392.20	6.62	34.6	Low
0.03150	95.0	1.47	0	0.4030	6.975	15.3	7.6534	3	402	17.0	396.90	4.56	34.9	Low
0.01778	95.0	1.47	0	0.4030	7.135	13.9	7.6534	3	402	17.0	384.30	4.45	32.9	Low
0.03445	82.5	2.03	0	0.4150	6.162	38.4	6.2700	2	348	14.7	393.77	7.43	24.1	Low
0.02177	82.5	2.03	0	0.4150	7.610	15.7	6.2700	2	348	14.7	395.38	3.11	42.3	Low
0.03510	95.0	2.68	0	0.4161	7.853	33.2	5.1180	4	224	14.7	392.78	3.81	48.5	Low
0.02009	95.0	2.68	0	0.4161	8.034	31.9	5.1180	4	224	14.7	390.55	2.88	50.0	Low
0.13642	0.0	10.59	0	0.4890	5.891	22.3	3.9454	4	277	18.6	396.90	10.87	22.6	Low
0.22969	0.0	10.59	0	0.4890	6.326	52.5	4.3549	4	277	18.6	394.87	10.97	24.4	Low
0.25199	0.0	10.59	0	0.4890	5.783	72.7	4.3549	4	277	18.6	389.43	18.06	22.5	Low
0.13587	0.0	10.59	1	0.4890	6.064	59.1	4.2392	4	277	18.6	381.32	14.66	24.4	Low
0.43571	0.0	10.59	1	0.4890	5.344	100.0	3.8750	4	277	18.6	396.90	23.09	20.0	High
0.17446	0.0	10.59	1	0.4890	5.960	92.1	3.8771	4	277	18.6	393.25	17.27	21.7	Low
0.37578	0.0	10.59	1	0.4890	5.404	88.6	3.6650	4	277	18.6	395.24	23.98	19.3	High
0.21719	0.0	10.59	1	0.4890	5.807	53.8	3.6526	4	277	18.6	390.94	16.03	22.4	Low
0.14052	0.0	10.59	0	0.4890	6.375	32.3	3.9454	4	277	18.6	385.81	9.38	28.1	Low
0.28955	0.0	10.59	0	0.4890	5.412	9.8	3.5875	4	277	18.6	348.93	29.55	23.7	High
0.19802	0.0	10.59	0	0.4890	6.182	42.4	3.9454	4	277	18.6	393.63	9.47	25.0	Low
0.04560	0.0	13.89	1	0.5500	5.888	56.0	3.1121	5	276	16.4	392.80	13.51	23.3	Low
0.07013	0.0	13.89	0	0.5500	6.642	85.1	3.4211	5	276	16.4	392.78	9.69	28.7	Low
0.11069	0.0	13.89	1	0.5500	5.951	93.8	2.8893	5	276	16.4	396.90	17.92	21.5	Low
0.11425	0.0	13.89	1	0.5500	6.373	92.4	3.3633	5	276	16.4	393.74	10.50	23.0	Low
0.35809	0.0	6.20	1	0.5070	6.951	88.5	2.8617	8	307	17.4	391.70	9.71	26.7	High
0.40771	0.0	6.20	1	0.5070	6.164	91.3	3.0480	8	307	17.4	395.24	21.46	21.7	High
0.62356	0.0	6.20	1	0.5070	6.879	77.7	3.2721	8	307	17.4	390.39	9.93	27.5	High
0.61470	0.0	6.20	0	0.5070	6.618	80.8	3.2721	8	307	17.4	396.90	7.60	30.1	High
0.31533	0.0	6.20	0	0.5040	8.266	78.3	2.8944	8	307	17.4	385.05	4.14	44.8	High
0.52693	0.0	6.20	0	0.5040	8.725	83.0	2.8944	8	307	17.4	382.00	4.63	50.0	High
0.38214	0.0	6.20	0	0.5040	8.040	86.5	3.2157	8	307	17.4	387.38	3.13	37.6	High
0.41238	0.0	6.20	0	0.5040	7.163	79.9	3.2157	8	307	17.4	372.08	6.36	31.6	High
0.29819	0.0	6.20	0	0.5040	7.686	17.0	3.3751	8	307	17.4	377.51	3.92	46.7	High
0.44178	0.0	6.20	0	0.5040	6.552	21.4	3.3751	8	307	17.4	380.34	3.76	31.5	High
0.53700	0.0	6.20	0	0.5040	5.981	68.1	3.6715	8	307	17.4	378.35	11.65	24.3	High
0.46296	0.0	6.20	0	0.5040	7.412	76.9	3.6715	8	307	17.4	376.14	5.25	31.7	High
0.57529	0.0	6.20	0	0.5070	8.337	73.3	3.8384	8	307	17.4	385.91	2.47	41.7	High
0.33147	0.0	6.20	0	0.5070	8.247	70.4	3.6519	8	307	17.4	378.95	3.95	48.3	High
0.44791	0.0	6.20	1	0.5070	6.726	66.5	3.6519	8	307	17.4	360.20	8.05	29.0	High
0.33045	0.0	6.20	0	0.5070	6.086	61.5	3.6519	8	307	17.4	376.75	10.88	24.0	High
0.52058	0.0	6.20	1	0.5070	6.631	76.5	4.1480	8	307	17.4	388.45	9.54	25.1	High
0.51183	0.0	6.20	0	0.5070	7.358	71.6	4.1480	8	307	17.4	390.07	4.73	31.5	High
0.08244	30.0	4.93	0	0.4280	6.481	18.5	6.1899	6	300	16.6	379.41	6.36	23.7	Low
0.09252	30.0	4.93	0	0.4280	6.606	42.2	6.1899	6	300	16.6	383.78	7.37	23.3	Low
0.11329	30.0	4.93	0	0.4280	6.897	54.3	6.3361	6	300	16.6	391.25	11.38	22.0	Low
0.10612	30.0	4.93	0	0.4280	6.095	65.1	6.3361	6	300	16.6	394.62	12.40	20.1	Low
0.10290	30.0	4.93	0	0.4280	6.358	52.9	7.0355	6	300	16.6	372.75	11.22	22.2	Low
0.12757	30.0	4.93	0	0.4280	6.393	7.8	7.0355	6	300	16.6	374.71	5.19	23.7	Low
0.20608	22.0	5.86	0	0.4310	5.593	76.5	7.9549	7	330	19.1	372.49	12.50	17.6	Low
0.19133	22.0	5.86	0	0.4310	5.605	70.2	7.9549	7	330	19.1	389.13	18.46	18.5	Low
0.33983	22.0	5.86	0	0.4310	6.108	34.9	8.0555	7	330	19.1	390.18	9.16	24.3	High
0.19657	22.0	5.86	0	0.4310	6.226	79.2	8.0555	7	330	19.1	376.14	10.15	20.5	Low
0.16439	22.0	5.86	0	0.4310	6.433	49.1	7.8265	7	330	19.1	374.71	9.52	24.5	Low
0.19073	22.0	5.86	0	0.4310	6.718	17.5	7.8265	7	330	19.1	393.74	6.56	26.2	Low
0.14030	22.0	5.86	0	0.4310	6.487	13.0	7.3967	7	330	19.1	396.28	5.90	24.4	Low
0.21409	22.0	5.86	0	0.4310	6.438	8.9	7.3967	7	330	19.1	377.07	3.59	24.8	Low
0.08221	22.0	5.86	0	0.4310	6.957	6.8	8.9067	7	330	19.1	386.09	3.53	29.6	Low
0.36894	22.0	5.86	0	0.4310	8.259	8.4	8.9067	7	330	19.1	396.90	3.54	42.8	High
0.04819	80.0	3.64	0	0.3920	6.108	32.0	9.2203	1	315	16.4	392.89	6.57	21.9	Low
0.03548	80.0	3.64	0	0.3920	5.876	19.1	9.2203	1	315	16.4	395.18	9.25	20.9	Low
0.01538	90.0	3.75	0	0.3940	7.454	34.2	6.3361	3	244	15.9	386.34	3.11	44.0	Low
0.61154	20.0	3.97	0	0.6470	8.704	86.9	1.8010	5	264	13.0	389.70	5.12	50.0	High
0.66351	20.0	3.97	0	0.6470	7.333	100.0	1.8946	5	264	13.0	383.29	7.79	36.0	High
0.65665	20.0	3.97	0	0.6470	6.842	100.0	2.0107	5	264	13.0	391.93	6.90	30.1	High
0.54011	20.0	3.97	0	0.6470	7.203	81.8	2.1121	5	264	13.0	392.80	9.59	33.8	High
0.53412	20.0	3.97	0	0.6470	7.520	89.4	2.1398	5	264	13.0	388.37	7.26	43.1	High
0.52014	20.0	3.97	0	0.6470	8.398	91.5	2.2885	5	264	13.0	386.86	5.91	48.8	High
0.82526	20.0	3.97	0	0.6470	7.327	94.5	2.0788	5	264	13.0	393.42	11.25	31.0	High
0.55007	20.0	3.97	0	0.6470	7.206	91.6	1.9301	5	264	13.0	387.89	8.10	36.5	High
0.76162	20.0	3.97	0	0.6470	5.560	62.8	1.9865	5	264	13.0	392.40	10.45	22.8	High
0.78570	20.0	3.97	0	0.6470	7.014	84.6	2.1329	5	264	13.0	384.07	14.79	30.7	High
0.57834	20.0	3.97	0	0.5750	8.297	67.0	2.4216	5	264	13.0	384.54	7.44	50.0	High
0.54050	20.0	3.97	0	0.5750	7.470	52.6	2.8720	5	264	13.0	390.30	3.16	43.5	High
0.09065	20.0	6.96	1	0.4640	5.920	61.5	3.9175	3	223	18.6	391.34	13.65	20.7	Low
0.29916	20.0	6.96	0	0.4640	5.856	42.1	4.4290	3	223	18.6	388.65	13.00	21.1	High
0.16211	20.0	6.96	0	0.4640	6.240	16.3	4.4290	3	223	18.6	396.90	6.59	25.2	Low
0.11460	20.0	6.96	0	0.4640	6.538	58.7	3.9175	3	223	18.6	394.96	7.73	24.4	Low
0.22188	20.0	6.96	1	0.4640	7.691	51.8	4.3665	3	223	18.6	390.77	6.58	35.2	Low
0.05644	40.0	6.41	1	0.4470	6.758	32.9	4.0776	4	254	17.6	396.90	3.53	32.4	Low
0.09604	40.0	6.41	0	0.4470	6.854	42.8	4.2673	4	254	17.6	396.90	2.98	32.0	Low
0.10469	40.0	6.41	1	0.4470	7.267	49.0	4.7872	4	254	17.6	389.25	6.05	33.2	Low
0.06127	40.0	6.41	1	0.4470	6.826	27.6	4.8628	4	254	17.6	393.45	4.16	33.1	Low
0.07978	40.0	6.41	0	0.4470	6.482	32.1	4.1403	4	254	17.6	396.90	7.19	29.1	Low
0.21038	20.0	3.33	0	0.4429	6.812	32.2	4.1007	5	216	14.9	396.90	4.85	35.1	Low
0.03578	20.0	3.33	0	0.4429	7.820	64.5	4.6947	5	216	14.9	387.31	3.76	45.4	Low
0.03705	20.0	3.33	0	0.4429	6.968	37.2	5.2447	5	216	14.9	392.23	4.59	35.4	Low
0.06129	20.0	3.33	1	0.4429	7.645	49.7	5.2119	5	216	14.9	377.07	3.01	46.0	Low
0.01501	90.0	1.21	1	0.4010	7.923	24.8	5.8850	1	198	13.6	395.52	3.16	50.0	Low
0.00906	90.0	2.97	0	0.4000	7.088	20.8	7.3073	1	285	15.3	394.72	7.85	32.2	Low
0.01096	55.0	2.25	0	0.3890	6.453	31.9	7.3073	1	300	15.3	394.72	8.23	22.0	Low
0.01965	80.0	1.76	0	0.3850	6.230	31.5	9.0892	1	241	18.2	341.60	12.93	20.1	Low
0.03871	52.5	5.32	0	0.4050	6.209	31.3	7.3172	6	293	16.6	396.90	7.14	23.2	Low
0.04590	52.5	5.32	0	0.4050	6.315	45.6	7.3172	6	293	16.6	396.90	7.60	22.3	Low
0.04297	52.5	5.32	0	0.4050	6.565	22.9	7.3172	6	293	16.6	371.72	9.51	24.8	Low
0.03502	80.0	4.95	0	0.4110	6.861	27.9	5.1167	4	245	19.2	396.90	3.33	28.5	Low
0.07886	80.0	4.95	0	0.4110	7.148	27.7	5.1167	4	245	19.2	396.90	3.56	37.3	Low
0.03615	80.0	4.95	0	0.4110	6.630	23.4	5.1167	4	245	19.2	396.90	4.70	27.9	Low
0.08265	0.0	13.92	0	0.4370	6.127	18.4	5.5027	4	289	16.0	396.90	8.58	23.9	Low
0.08199	0.0	13.92	0	0.4370	6.009	42.3	5.5027	4	289	16.0	396.90	10.40	21.7	Low
0.12932	0.0	13.92	0	0.4370	6.678	31.1	5.9604	4	289	16.0	396.90	6.27	28.6	Low
0.05372	0.0	13.92	0	0.4370	6.549	51.0	5.9604	4	289	16.0	392.85	7.39	27.1	Low
0.14103	0.0	13.92	0	0.4370	5.790	58.0	6.3200	4	289	16.0	396.90	15.84	20.3	Low
0.06466	70.0	2.24	0	0.4000	6.345	20.1	7.8278	5	358	14.8	368.24	4.97	22.5	Low
0.05561	70.0	2.24	0	0.4000	7.041	10.0	7.8278	5	358	14.8	371.58	4.74	29.0	Low
0.04417	70.0	2.24	0	0.4000	6.871	47.4	7.8278	5	358	14.8	390.86	6.07	24.8	Low
0.03537	34.0	6.09	0	0.4330	6.590	40.4	5.4917	7	329	16.1	395.75	9.50	22.0	Low
0.09266	34.0	6.09	0	0.4330	6.495	18.4	5.4917	7	329	16.1	383.61	8.67	26.4	Low
0.10000	34.0	6.09	0	0.4330	6.982	17.7	5.4917	7	329	16.1	390.43	4.86	33.1	Low
0.05515	33.0	2.18	0	0.4720	7.236	41.1	4.0220	7	222	18.4	393.68	6.93	36.1	Low
0.05479	33.0	2.18	0	0.4720	6.616	58.1	3.3700	7	222	18.4	393.36	8.93	28.4	Low
0.07503	33.0	2.18	0	0.4720	7.420	71.9	3.0992	7	222	18.4	396.90	6.47	33.4	Low
0.04932	33.0	2.18	0	0.4720	6.849	70.3	3.1827	7	222	18.4	396.90	7.53	28.2	Low
0.49298	0.0	9.90	0	0.5440	6.635	82.5	3.3175	4	304	18.4	396.90	4.54	22.8	High
0.34940	0.0	9.90	0	0.5440	5.972	76.7	3.1025	4	304	18.4	396.24	9.97	20.3	High
2.63548	0.0	9.90	0	0.5440	4.973	37.8	2.5194	4	304	18.4	350.45	12.64	16.1	High
0.79041	0.0	9.90	0	0.5440	6.122	52.8	2.6403	4	304	18.4	396.90	5.98	22.1	High
0.26169	0.0	9.90	0	0.5440	6.023	90.4	2.8340	4	304	18.4	396.30	11.72	19.4	High
0.26938	0.0	9.90	0	0.5440	6.266	82.8	3.2628	4	304	18.4	393.39	7.90	21.6	High
0.36920	0.0	9.90	0	0.5440	6.567	87.3	3.6023	4	304	18.4	395.69	9.28	23.8	High
0.25356	0.0	9.90	0	0.5440	5.705	77.7	3.9450	4	304	18.4	396.42	11.50	16.2	Low
0.31827	0.0	9.90	0	0.5440	5.914	83.2	3.9986	4	304	18.4	390.70	18.33	17.8	High
0.24522	0.0	9.90	0	0.5440	5.782	71.7	4.0317	4	304	18.4	396.90	15.94	19.8	Low
0.40202	0.0	9.90	0	0.5440	6.382	67.2	3.5325	4	304	18.4	395.21	10.36	23.1	High
0.47547	0.0	9.90	0	0.5440	6.113	58.8	4.0019	4	304	18.4	396.23	12.73	21.0	High
0.16760	0.0	7.38	0	0.4930	6.426	52.3	4.5404	5	287	19.6	396.90	7.20	23.8	Low
0.18159	0.0	7.38	0	0.4930	6.376	54.3	4.5404	5	287	19.6	396.90	6.87	23.1	Low
0.35114	0.0	7.38	0	0.4930	6.041	49.9	4.7211	5	287	19.6	396.90	7.70	20.4	High
0.28392	0.0	7.38	0	0.4930	5.708	74.3	4.7211	5	287	19.6	391.13	11.74	18.5	High
0.34109	0.0	7.38	0	0.4930	6.415	40.1	4.7211	5	287	19.6	396.90	6.12	25.0	High
0.19186	0.0	7.38	0	0.4930	6.431	14.7	5.4159	5	287	19.6	393.68	5.08	24.6	Low
0.30347	0.0	7.38	0	0.4930	6.312	28.9	5.4159	5	287	19.6	396.90	6.15	23.0	High
0.24103	0.0	7.38	0	0.4930	6.083	43.7	5.4159	5	287	19.6	396.90	12.79	22.2	Low
0.06617	0.0	3.24	0	0.4600	5.868	25.8	5.2146	4	430	16.9	382.44	9.97	19.3	Low
0.06724	0.0	3.24	0	0.4600	6.333	17.2	5.2146	4	430	16.9	375.21	7.34	22.6	Low
0.04544	0.0	3.24	0	0.4600	6.144	32.2	5.8736	4	430	16.9	368.57	9.09	19.8	Low
0.05023	35.0	6.06	0	0.4379	5.706	28.4	6.6407	1	304	16.9	394.02	12.43	17.1	Low
0.03466	35.0	6.06	0	0.4379	6.031	23.3	6.6407	1	304	16.9	362.25	7.83	19.4	Low
0.05083	0.0	5.19	0	0.5150	6.316	38.1	6.4584	5	224	20.2	389.71	5.68	22.2	Low
0.03738	0.0	5.19	0	0.5150	6.310	38.5	6.4584	5	224	20.2	389.40	6.75	20.7	Low
0.03961	0.0	5.19	0	0.5150	6.037	34.5	5.9853	5	224	20.2	396.90	8.01	21.1	Low
0.03427	0.0	5.19	0	0.5150	5.869	46.3	5.2311	5	224	20.2	396.90	9.80	19.5	Low
0.03041	0.0	5.19	0	0.5150	5.895	59.6	5.6150	5	224	20.2	394.81	10.56	18.5	Low
0.03306	0.0	5.19	0	0.5150	6.059	37.3	4.8122	5	224	20.2	396.14	8.51	20.6	Low
0.05497	0.0	5.19	0	0.5150	5.985	45.4	4.8122	5	224	20.2	396.90	9.74	19.0	Low
0.06151	0.0	5.19	0	0.5150	5.968	58.5	4.8122	5	224	20.2	396.90	9.29	18.7	Low
0.01301	35.0	1.52	0	0.4420	7.241	49.3	7.0379	1	284	15.5	394.74	5.49	32.7	Low
0.02498	0.0	1.89	0	0.5180	6.540	59.7	6.2669	1	422	15.9	389.96	8.65	16.5	Low
0.02543	55.0	3.78	0	0.4840	6.696	56.4	5.7321	5	370	17.6	396.90	7.18	23.9	Low
0.03049	55.0	3.78	0	0.4840	6.874	28.1	6.4654	5	370	17.6	387.97	4.61	31.2	Low
0.03113	0.0	4.39	0	0.4420	6.014	48.5	8.0136	3	352	18.8	385.64	10.53	17.5	Low
0.06162	0.0	4.39	0	0.4420	5.898	52.3	8.0136	3	352	18.8	364.61	12.67	17.2	Low
0.01870	85.0	4.15	0	0.4290	6.516	27.7	8.5353	4	351	17.9	392.43	6.36	23.1	Low
0.01501	80.0	2.01	0	0.4350	6.635	29.7	8.3440	4	280	17.0	390.94	5.99	24.5	Low
0.02899	40.0	1.25	0	0.4290	6.939	34.5	8.7921	1	335	19.7	389.85	5.89	26.6	Low
0.06211	40.0	1.25	0	0.4290	6.490	44.4	8.7921	1	335	19.7	396.90	5.98	22.9	Low
0.07950	60.0	1.69	0	0.4110	6.579	35.9	10.7103	4	411	18.3	370.78	5.49	24.1	Low
0.07244	60.0	1.69	0	0.4110	5.884	18.5	10.7103	4	411	18.3	392.33	7.79	18.6	Low
0.01709	90.0	2.02	0	0.4100	6.728	36.1	12.1265	5	187	17.0	384.46	4.50	30.1	Low
0.04301	80.0	1.91	0	0.4130	5.663	21.9	10.5857	4	334	22.0	382.80	8.05	18.2	Low
0.10659	80.0	1.91	0	0.4130	5.936	19.5	10.5857	4	334	22.0	376.04	5.57	20.6	Low
8.98296	0.0	18.10	1	0.7700	6.212	97.4	2.1222	24	666	20.2	377.73	17.60	17.8	High
3.84970	0.0	18.10	1	0.7700	6.395	91.0	2.5052	24	666	20.2	391.34	13.27	21.7	High
5.20177	0.0	18.10	1	0.7700	6.127	83.4	2.7227	24	666	20.2	395.43	11.48	22.7	High
4.26131	0.0	18.10	0	0.7700	6.112	81.3	2.5091	24	666	20.2	390.74	12.67	22.6	High
4.54192	0.0	18.10	0	0.7700	6.398	88.0	2.5182	24	666	20.2	374.56	7.79	25.0	High
3.83684	0.0	18.10	0	0.7700	6.251	91.1	2.2955	24	666	20.2	350.65	14.19	19.9	High
3.67822	0.0	18.10	0	0.7700	5.362	96.2	2.1036	24	666	20.2	380.79	10.19	20.8	High
4.22239	0.0	18.10	1	0.7700	5.803	89.0	1.9047	24	666	20.2	353.04	14.64	16.8	High
3.47428	0.0	18.10	1	0.7180	8.780	82.9	1.9047	24	666	20.2	354.55	5.29	21.9	High
4.55587	0.0	18.10	0	0.7180	3.561	87.9	1.6132	24	666	20.2	354.70	7.12	27.5	High
3.69695	0.0	18.10	0	0.7180	4.963	91.4	1.7523	24	666	20.2	316.03	14.00	21.9	High
13.52220	0.0	18.10	0	0.6310	3.863	100.0	1.5106	24	666	20.2	131.42	13.33	23.1	High
4.89822	0.0	18.10	0	0.6310	4.970	100.0	1.3325	24	666	20.2	375.52	3.26	50.0	High
5.66998	0.0	18.10	1	0.6310	6.683	96.8	1.3567	24	666	20.2	375.33	3.73	50.0	High
6.53876	0.0	18.10	1	0.6310	7.016	97.5	1.2024	24	666	20.2	392.05	2.96	50.0	High
9.23230	0.0	18.10	0	0.6310	6.216	100.0	1.1691	24	666	20.2	366.15	9.53	50.0	High
8.26725	0.0	18.10	1	0.6680	5.875	89.6	1.1296	24	666	20.2	347.88	8.88	50.0	High
11.10810	0.0	18.10	0	0.6680	4.906	100.0	1.1742	24	666	20.2	396.90	34.77	13.8	High
18.49820	0.0	18.10	0	0.6680	4.138	100.0	1.1370	24	666	20.2	396.90	37.97	13.8	High
19.60910	0.0	18.10	0	0.6710	7.313	97.9	1.3163	24	666	20.2	396.90	13.44	15.0	High
15.28800	0.0	18.10	0	0.6710	6.649	93.3	1.3449	24	666	20.2	363.02	23.24	13.9	High
9.82349	0.0	18.10	0	0.6710	6.794	98.8	1.3580	24	666	20.2	396.90	21.24	13.3	High
23.64820	0.0	18.10	0	0.6710	6.380	96.2	1.3861	24	666	20.2	396.90	23.69	13.1	High
17.86670	0.0	18.10	0	0.6710	6.223	100.0	1.3861	24	666	20.2	393.74	21.78	10.2	High
88.97620	0.0	18.10	0	0.6710	6.968	91.9	1.4165	24	666	20.2	396.90	17.21	10.4	High
15.87440	0.0	18.10	0	0.6710	6.545	99.1	1.5192	24	666	20.2	396.90	21.08	10.9	High
9.18702	0.0	18.10	0	0.7000	5.536	100.0	1.5804	24	666	20.2	396.90	23.60	11.3	High
7.99248	0.0	18.10	0	0.7000	5.520	100.0	1.5331	24	666	20.2	396.90	24.56	12.3	High
20.08490	0.0	18.10	0	0.7000	4.368	91.2	1.4395	24	666	20.2	285.83	30.63	8.8	High
16.81180	0.0	18.10	0	0.7000	5.277	98.1	1.4261	24	666	20.2	396.90	30.81	7.2	High
24.39380	0.0	18.10	0	0.7000	4.652	100.0	1.4672	24	666	20.2	396.90	28.28	10.5	High
22.59710	0.0	18.10	0	0.7000	5.000	89.5	1.5184	24	666	20.2	396.90	31.99	7.4	High
14.33370	0.0	18.10	0	0.7000	4.880	100.0	1.5895	24	666	20.2	372.92	30.62	10.2	High
8.15174	0.0	18.10	0	0.7000	5.390	98.9	1.7281	24	666	20.2	396.90	20.85	11.5	High
6.96215	0.0	18.10	0	0.7000	5.713	97.0	1.9265	24	666	20.2	394.43	17.11	15.1	High
5.29305	0.0	18.10	0	0.7000	6.051	82.5	2.1678	24	666	20.2	378.38	18.76	23.2	High
11.57790	0.0	18.10	0	0.7000	5.036	97.0	1.7700	24	666	20.2	396.90	25.68	9.7	High
8.64476	0.0	18.10	0	0.6930	6.193	92.6	1.7912	24	666	20.2	396.90	15.17	13.8	High
13.35980	0.0	18.10	0	0.6930	5.887	94.7	1.7821	24	666	20.2	396.90	16.35	12.7	High
8.71675	0.0	18.10	0	0.6930	6.471	98.8	1.7257	24	666	20.2	391.98	17.12	13.1	High
5.87205	0.0	18.10	0	0.6930	6.405	96.0	1.6768	24	666	20.2	396.90	19.37	12.5	High
7.67202	0.0	18.10	0	0.6930	5.747	98.9	1.6334	24	666	20.2	393.10	19.92	8.5	High
38.35180	0.0	18.10	0	0.6930	5.453	100.0	1.4896	24	666	20.2	396.90	30.59	5.0	High
9.91655	0.0	18.10	0	0.6930	5.852	77.8	1.5004	24	666	20.2	338.16	29.97	6.3	High
25.04610	0.0	18.10	0	0.6930	5.987	100.0	1.5888	24	666	20.2	396.90	26.77	5.6	High
14.23620	0.0	18.10	0	0.6930	6.343	100.0	1.5741	24	666	20.2	396.90	20.32	7.2	High
9.59571	0.0	18.10	0	0.6930	6.404	100.0	1.6390	24	666	20.2	376.11	20.31	12.1	High
24.80170	0.0	18.10	0	0.6930	5.349	96.0	1.7028	24	666	20.2	396.90	19.77	8.3	High
41.52920	0.0	18.10	0	0.6930	5.531	85.4	1.6074	24	666	20.2	329.46	27.38	8.5	High
67.92080	0.0	18.10	0	0.6930	5.683	100.0	1.4254	24	666	20.2	384.97	22.98	5.0	High
20.71620	0.0	18.10	0	0.6590	4.138	100.0	1.1781	24	666	20.2	370.22	23.34	11.9	High
11.95110	0.0	18.10	0	0.6590	5.608	100.0	1.2852	24	666	20.2	332.09	12.13	27.9	High
7.40389	0.0	18.10	0	0.5970	5.617	97.9	1.4547	24	666	20.2	314.64	26.40	17.2	High
14.43830	0.0	18.10	0	0.5970	6.852	100.0	1.4655	24	666	20.2	179.36	19.78	27.5	High
51.13580	0.0	18.10	0	0.5970	5.757	100.0	1.4130	24	666	20.2	2.60	10.11	15.0	High
14.05070	0.0	18.10	0	0.5970	6.657	100.0	1.5275	24	666	20.2	35.05	21.22	17.2	High
18.81100	0.0	18.10	0	0.5970	4.628	100.0	1.5539	24	666	20.2	28.79	34.37	17.9	High
28.65580	0.0	18.10	0	0.5970	5.155	100.0	1.5894	24	666	20.2	210.97	20.08	16.3	High
45.74610	0.0	18.10	0	0.6930	4.519	100.0	1.6582	24	666	20.2	88.27	36.98	7.0	High
18.08460	0.0	18.10	0	0.6790	6.434	100.0	1.8347	24	666	20.2	27.25	29.05	7.2	High
10.83420	0.0	18.10	0	0.6790	6.782	90.8	1.8195	24	666	20.2	21.57	25.79	7.5	High
25.94060	0.0	18.10	0	0.6790	5.304	89.1	1.6475	24	666	20.2	127.36	26.64	10.4	High
73.53410	0.0	18.10	0	0.6790	5.957	100.0	1.8026	24	666	20.2	16.45	20.62	8.8	High
11.81230	0.0	18.10	0	0.7180	6.824	76.5	1.7940	24	666	20.2	48.45	22.74	8.4	High
11.08740	0.0	18.10	0	0.7180	6.411	100.0	1.8589	24	666	20.2	318.75	15.02	16.7	High
7.02259	0.0	18.10	0	0.7180	6.006	95.3	1.8746	24	666	20.2	319.98	15.70	14.2	High
12.04820	0.0	18.10	0	0.6140	5.648	87.6	1.9512	24	666	20.2	291.55	14.10	20.8	High
7.05042	0.0	18.10	0	0.6140	6.103	85.1	2.0218	24	666	20.2	2.52	23.29	13.4	High
8.79212	0.0	18.10	0	0.5840	5.565	70.6	2.0635	24	666	20.2	3.65	17.16	11.7	High
15.86030	0.0	18.10	0	0.6790	5.896	95.4	1.9096	24	666	20.2	7.68	24.39	8.3	High
12.24720	0.0	18.10	0	0.5840	5.837	59.7	1.9976	24	666	20.2	24.65	15.69	10.2	High
37.66190	0.0	18.10	0	0.6790	6.202	78.7	1.8629	24	666	20.2	18.82	14.52	10.9	High
7.36711	0.0	18.10	0	0.6790	6.193	78.1	1.9356	24	666	20.2	96.73	21.52	11.0	High
9.33889	0.0	18.10	0	0.6790	6.380	95.6	1.9682	24	666	20.2	60.72	24.08	9.5	High
8.49213	0.0	18.10	0	0.5840	6.348	86.1	2.0527	24	666	20.2	83.45	17.64	14.5	High
10.06230	0.0	18.10	0	0.5840	6.833	94.3	2.0882	24	666	20.2	81.33	19.69	14.1	High
6.44405	0.0	18.10	0	0.5840	6.425	74.8	2.2004	24	666	20.2	97.95	12.03	16.1	High
5.58107	0.0	18.10	0	0.7130	6.436	87.9	2.3158	24	666	20.2	100.19	16.22	14.3	High
13.91340	0.0	18.10	0	0.7130	6.208	95.0	2.2222	24	666	20.2	100.63	15.17	11.7	High
11.16040	0.0	18.10	0	0.7400	6.629	94.6	2.1247	24	666	20.2	109.85	23.27	13.4	High
14.42080	0.0	18.10	0	0.7400	6.461	93.3	2.0026	24	666	20.2	27.49	18.05	9.6	High
15.17720	0.0	18.10	0	0.7400	6.152	100.0	1.9142	24	666	20.2	9.32	26.45	8.7	High
13.67810	0.0	18.10	0	0.7400	5.935	87.9	1.8206	24	666	20.2	68.95	34.02	8.4	High
9.39063	0.0	18.10	0	0.7400	5.627	93.9	1.8172	24	666	20.2	396.90	22.88	12.8	High
22.05110	0.0	18.10	0	0.7400	5.818	92.4	1.8662	24	666	20.2	391.45	22.11	10.5	High
9.72418	0.0	18.10	0	0.7400	6.406	97.2	2.0651	24	666	20.2	385.96	19.52	17.1	High
5.66637	0.0	18.10	0	0.7400	6.219	100.0	2.0048	24	666	20.2	395.69	16.59	18.4	High
9.96654	0.0	18.10	0	0.7400	6.485	100.0	1.9784	24	666	20.2	386.73	18.85	15.4	High
12.80230	0.0	18.10	0	0.7400	5.854	96.6	1.8956	24	666	20.2	240.52	23.79	10.8	High
10.67180	0.0	18.10	0	0.7400	6.459	94.8	1.9879	24	666	20.2	43.06	23.98	11.8	High
6.28807	0.0	18.10	0	0.7400	6.341	96.4	2.0720	24	666	20.2	318.01	17.79	14.9	High
9.92485	0.0	18.10	0	0.7400	6.251	96.6	2.1980	24	666	20.2	388.52	16.44	12.6	High
9.32909	0.0	18.10	0	0.7130	6.185	98.7	2.2616	24	666	20.2	396.90	18.13	14.1	High
7.52601	0.0	18.10	0	0.7130	6.417	98.3	2.1850	24	666	20.2	304.21	19.31	13.0	High
6.71772	0.0	18.10	0	0.7130	6.749	92.6	2.3236	24	666	20.2	0.32	17.44	13.4	High
5.44114	0.0	18.10	0	0.7130	6.655	98.2	2.3552	24	666	20.2	355.29	17.73	15.2	High
5.09017	0.0	18.10	0	0.7130	6.297	91.8	2.3682	24	666	20.2	385.09	17.27	16.1	High
8.24809	0.0	18.10	0	0.7130	7.393	99.3	2.4527	24	666	20.2	375.87	16.74	17.8	High
9.51363	0.0	18.10	0	0.7130	6.728	94.1	2.4961	24	666	20.2	6.68	18.71	14.9	High
4.75237	0.0	18.10	0	0.7130	6.525	86.5	2.4358	24	666	20.2	50.92	18.13	14.1	High
4.66883	0.0	18.10	0	0.7130	5.976	87.9	2.5806	24	666	20.2	10.48	19.01	12.7	High
8.20058	0.0	18.10	0	0.7130	5.936	80.3	2.7792	24	666	20.2	3.50	16.94	13.5	High
7.75223	0.0	18.10	0	0.7130	6.301	83.7	2.7831	24	666	20.2	272.21	16.23	14.9	High
6.80117	0.0	18.10	0	0.7130	6.081	84.4	2.7175	24	666	20.2	396.90	14.70	20.0	High
4.81213	0.0	18.10	0	0.7130	6.701	90.0	2.5975	24	666	20.2	255.23	16.42	16.4	High
3.69311	0.0	18.10	0	0.7130	6.376	88.4	2.5671	24	666	20.2	391.43	14.65	17.7	High
6.65492	0.0	18.10	0	0.7130	6.317	83.0	2.7344	24	666	20.2	396.90	13.99	19.5	High
5.82115	0.0	18.10	0	0.7130	6.513	89.9	2.8016	24	666	20.2	393.82	10.29	20.2	High
7.83932	0.0	18.10	0	0.6550	6.209	65.4	2.9634	24	666	20.2	396.90	13.22	21.4	High
3.16360	0.0	18.10	0	0.6550	5.759	48.2	3.0665	24	666	20.2	334.40	14.13	19.9	High
3.77498	0.0	18.10	0	0.6550	5.952	84.7	2.8715	24	666	20.2	22.01	17.15	19.0	High
4.42228	0.0	18.10	0	0.5840	6.003	94.5	2.5403	24	666	20.2	331.29	21.32	19.1	High
15.57570	0.0	18.10	0	0.5800	5.926	71.0	2.9084	24	666	20.2	368.74	18.13	19.1	High
13.07510	0.0	18.10	0	0.5800	5.713	56.7	2.8237	24	666	20.2	396.90	14.76	20.1	High
4.34879	0.0	18.10	0	0.5800	6.167	84.0	3.0334	24	666	20.2	396.90	16.29	19.9	High
4.03841	0.0	18.10	0	0.5320	6.229	90.7	3.0993	24	666	20.2	395.33	12.87	19.6	High
3.56868	0.0	18.10	0	0.5800	6.437	75.0	2.8965	24	666	20.2	393.37	14.36	23.2	High
4.64689	0.0	18.10	0	0.6140	6.980	67.6	2.5329	24	666	20.2	374.68	11.66	29.8	High
8.05579	0.0	18.10	0	0.5840	5.427	95.4	2.4298	24	666	20.2	352.58	18.14	13.8	High
6.39312	0.0	18.10	0	0.5840	6.162	97.4	2.2060	24	666	20.2	302.76	24.10	13.3	High
4.87141	0.0	18.10	0	0.6140	6.484	93.6	2.3053	24	666	20.2	396.21	18.68	16.7	High
15.02340	0.0	18.10	0	0.6140	5.304	97.3	2.1007	24	666	20.2	349.48	24.91	12.0	High
10.23300	0.0	18.10	0	0.6140	6.185	96.7	2.1705	24	666	20.2	379.70	18.03	14.6	High
14.33370	0.0	18.10	0	0.6140	6.229	88.0	1.9512	24	666	20.2	383.32	13.11	21.4	High
5.82401	0.0	18.10	0	0.5320	6.242	64.7	3.4242	24	666	20.2	396.90	10.74	23.0	High
5.70818	0.0	18.10	0	0.5320	6.750	74.9	3.3317	24	666	20.2	393.07	7.74	23.7	High
5.73116	0.0	18.10	0	0.5320	7.061	77.0	3.4106	24	666	20.2	395.28	7.01	25.0	High
2.81838	0.0	18.10	0	0.5320	5.762	40.3	4.0983	24	666	20.2	392.92	10.42	21.8	High
2.37857	0.0	18.10	0	0.5830	5.871	41.9	3.7240	24	666	20.2	370.73	13.34	20.6	High
3.67367	0.0	18.10	0	0.5830	6.312	51.9	3.9917	24	666	20.2	388.62	10.58	21.2	High
5.69175	0.0	18.10	0	0.5830	6.114	79.8	3.5459	24	666	20.2	392.68	14.98	19.1	High
4.83567	0.0	18.10	0	0.5830	5.905	53.2	3.1523	24	666	20.2	388.22	11.45	20.6	High
0.15086	0.0	27.74	0	0.6090	5.454	92.7	1.8209	4	711	20.1	395.09	18.06	15.2	Low
0.18337	0.0	27.74	0	0.6090	5.414	98.3	1.7554	4	711	20.1	344.05	23.97	7.0	Low
0.20746	0.0	27.74	0	0.6090	5.093	98.0	1.8226	4	711	20.1	318.43	29.68	8.1	Low
0.10574	0.0	27.74	0	0.6090	5.983	98.8	1.8681	4	711	20.1	390.11	18.07	13.6	Low
0.11132	0.0	27.74	0	0.6090	5.983	83.5	2.1099	4	711	20.1	396.90	13.35	20.1	Low
0.17331	0.0	9.69	0	0.5850	5.707	54.0	2.3817	6	391	19.2	396.90	12.01	21.8	Low
0.27957	0.0	9.69	0	0.5850	5.926	42.6	2.3817	6	391	19.2	396.90	13.59	24.5	High
0.17899	0.0	9.69	0	0.5850	5.670	28.8	2.7986	6	391	19.2	393.29	17.60	23.1	Low
0.28960	0.0	9.69	0	0.5850	5.390	72.9	2.7986	6	391	19.2	396.90	21.14	19.7	High
0.26838	0.0	9.69	0	0.5850	5.794	70.6	2.8927	6	391	19.2	396.90	14.10	18.3	High
0.23912	0.0	9.69	0	0.5850	6.019	65.3	2.4091	6	391	19.2	396.90	12.92	21.2	Low
0.17783	0.0	9.69	0	0.5850	5.569	73.5	2.3999	6	391	19.2	395.77	15.10	17.5	Low
0.22438	0.0	9.69	0	0.5850	6.027	79.7	2.4982	6	391	19.2	396.90	14.33	16.8	Low
0.06263	0.0	11.93	0	0.5730	6.593	69.1	2.4786	1	273	21.0	391.99	9.67	22.4	Low
0.04527	0.0	11.93	0	0.5730	6.120	76.7	2.2875	1	273	21.0	396.90	9.08	20.6	Low
0.06076	0.0	11.93	0	0.5730	6.976	91.0	2.1675	1	273	21.0	396.90	5.64	23.9	Low
0.10959	0.0	11.93	0	0.5730	6.794	89.3	2.3889	1	273	21.0	393.45	6.48	22.0	Low
0.04741	0.0	11.93	0	0.5730	6.030	80.8	2.5050	1	273	21.0	396.90	7.88	11.9	Low

The table describes the boston data with the last column is defined as the crime_factor, which tells us whether a given suburb has a crime rate above or below the median. The “high” value means that suburb has a crime rate above the median, while “low” value means that suburb has a crime rate below the median.

#Correlation plot 
cor_test <- boston %>%
    dplyr::select(-chas) %>% dplyr::select(-crime_factor) %>%
    cor.mtest(conf.level = .95)

boston %>%
    dplyr::select(-chas, -crime_factor) %>%
    cor %>%
    corrplot(method = 'color', 
         order = 'hclust', addrect = 2,
         tl.col = 'black', addCoef.col = 'black', number.cex = 0.65,
         p.mat = cor_test$p, sig.level = .05)

The correlation matrix tells us about the relationship among variables. The blue color means positive relation, while red color means negative relation.

In terms of crime rate, there are 5 variables that strongly correlated with crim, including rad (0.63), tax (0.58), lstat (0.46), nox (0.42), and indus(0.41).
Besides, there are some independent variables that are strongly correlated which then can lead to multicollinearity.

#boxplot
boston %>%
    dplyr::select(zn:crime_factor) %>%
    gather(value_type, value, -crime_factor, -chas) %>%
    ggplot(aes(value_type, value, fill = crime_factor)) +
    geom_boxplot(alpha = 0.5) +
    facet_wrap(~value_type, scales = 'free') +
    scale_fill_discrete(name = 'Crime Rate') +
    theme(legend.position = 'top')

The figures above help us to distinguish data between high crime group and low crime group. Based on figures, we can clearly see some patterns such as:

The age in suburbs that have high crime rate are generally larger than that of low crime rate.
The dis in suburbs that have high crime rate are generally lower than that of high crime rate.
The indus in suburbs that have high crime rate are generally higher than that of high crime rate.
The nox in suburbs that have high crime rate are generally higher than that of high crime rate.
The rad in suburbs that have high crime rate are generally higher than that of high crime rate.
The tax in suburbs that have high crime rate are generally higher than that of high crime rate.

#boxplot
boston %>%
    dplyr::select(crim, crime_factor, rad, nox, tax, age, dis, indus) %>%
    gather(Variable, value, -crim, -crime_factor) %>%
    mutate(Variable = str_to_title(Variable)) %>%
    ggplot(aes(value, crim)) +
    geom_point(aes(col = crime_factor)) +
    facet_wrap(~ Variable, scales = 'free') +
    geom_smooth(method = 'lm', formula = y ~ poly(x, 3), se = FALSE) +
    guides(col = FALSE) +
    labs(title = 'Scatterplots for each strong predictor')

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

From the above figures, I chose some significant variables such as: age, dis, indus, nox, rad, and tax. Then, I plot to see the relationship between these significant variables and dependent variable as crim.

Looking at the graph, we can see the relation will be polynomial. Hence, the models will use polynomial relationships.

Construct model

Splitting the data

set.seed(123)
boston_split <- initial_split(boston, prop=0.8,strata= crime_factor)
boston_training <- training(boston_split)
boston_testing <- testing(boston_split)

logistic regression

glm_model <- glm(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3)+ poly(indus, 3), 
               data = boston_training, family = "binomial")

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(glm_model)

## 
## Call:
## glm(formula = crime_factor ~ poly(rad, 3) + poly(nox, 3) + poly(tax, 
##     3) + poly(age, 3) + poly(dis, 3) + poly(indus, 3), family = "binomial", 
##     data = boston_training)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9398   0.0000   0.0000   0.0004   2.1725  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -1448.667    637.382  -2.273  0.02304 * 
## poly(rad, 3)1   -55456.257  23688.738  -2.341  0.01923 * 
## poly(rad, 3)2    -8892.445   3783.981  -2.350  0.01877 * 
## poly(rad, 3)3    -1376.436    598.658  -2.299  0.02149 * 
## poly(nox, 3)1    -4332.733   1473.024  -2.941  0.00327 **
## poly(nox, 3)2    -3639.043   1268.403  -2.869  0.00412 **
## poly(nox, 3)3    -1201.098    436.083  -2.754  0.00588 **
## poly(tax, 3)1     8329.522   2969.219   2.805  0.00503 **
## poly(tax, 3)2     2935.135   1044.474   2.810  0.00495 **
## poly(tax, 3)3      536.127    228.136   2.350  0.01877 * 
## poly(age, 3)1       -1.168      9.836  -0.119  0.90545   
## poly(age, 3)2      -19.426      9.010  -2.156  0.03107 * 
## poly(age, 3)3       -8.199      8.119  -1.010  0.31256   
## poly(dis, 3)1       52.903     25.556   2.070  0.03845 * 
## poly(dis, 3)2      -23.914     20.294  -1.178  0.23865   
## poly(dis, 3)3      -39.021     13.779  -2.832  0.00463 **
## poly(indus, 3)1    -16.255     42.342  -0.384  0.70105   
## poly(indus, 3)2    -52.148     24.376  -2.139  0.03241 * 
## poly(indus, 3)3   -188.155     61.799  -3.045  0.00233 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 560.063  on 403  degrees of freedom
## Residual deviance:  77.071  on 385  degrees of freedom
## AIC: 115.07
## 
## Number of Fisher Scoring iterations: 19

glm_fit <- predict(glm_model,type="response", newdata=boston_testing) 

predict_binary_glm <- ifelse(glm_fit > 0.5, "Low", "High")
predict_binary_glm <- predict_binary_glm %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))

## New names:
## • `` -> `...1`

colnames(predict_binary_glm) <- c("predicted_value", "actual_value")
predict_binary_glm$predicted_value <- as.factor(predict_binary_glm$predicted_value)
predict_binary_glm$actual_value <- as.factor(predict_binary_glm$actual_value)
confusion_glm <- confusionMatrix(predict_binary_glm$predicted_value, predict_binary_glm$actual_value)
confusion_glm

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   50   5
##       Low     1  46
##                                           
##                Accuracy : 0.9412          
##                  95% CI : (0.8764, 0.9781)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8824          
##                                           
##  Mcnemar's Test P-Value : 0.2207          
##                                           
##             Sensitivity : 0.9804          
##             Specificity : 0.9020          
##          Pos Pred Value : 0.9091          
##          Neg Pred Value : 0.9787          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4902          
##    Detection Prevalence : 0.5392          
##       Balanced Accuracy : 0.9412          
##                                           
##        'Positive' Class : High            
##

The confusion matrix indicate that the accuracy of the model is 94.12% meaning that 94.12% of results are predicted as true. The sensitivity is the ability of a test to correctly identify true High: ratio = TP/(TP+FN)=48/(48+3) = 94.12%. The specificity is the ability of a test to correctly identify true Low: ratio = TN/(TN+FP)=48/(48+3)=94.12%. The confusion matrix shows that the model perform very well

LDA

lda_model <- lda(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3)+ poly(indus, 3), 
               data = boston_training)
lda_model

## Call:
## lda(crime_factor ~ poly(rad, 3) + poly(nox, 3) + poly(tax, 3) + 
##     poly(age, 3) + poly(dis, 3) + poly(indus, 3), data = boston_training)
## 
## Prior probabilities of groups:
## High  Low 
##  0.5  0.5 
## 
## Group means:
##      poly(rad, 3)1 poly(rad, 3)2 poly(rad, 3)3 poly(nox, 3)1 poly(nox, 3)2
## High    0.03053036  -0.007576335   0.001805575     0.0356466   -0.01258601
## Low    -0.03053036   0.007576335  -0.001805575    -0.0356466    0.01258601
##      poly(nox, 3)3 poly(tax, 3)1 poly(tax, 3)2 poly(tax, 3)3 poly(age, 3)1
## High  -0.002945454    0.02954138   -0.00362748 -0.0009944897    0.02948616
## Low    0.002945454   -0.02954138    0.00362748  0.0009944897   -0.02948616
##      poly(age, 3)2 poly(age, 3)3 poly(dis, 3)1 poly(dis, 3)2 poly(dis, 3)3
## High   0.008543963 -0.0006928698   -0.03135008    0.01015439    0.00142612
## Low   -0.008543963  0.0006928698    0.03135008   -0.01015439   -0.00142612
##      poly(indus, 3)1 poly(indus, 3)2 poly(indus, 3)3
## High      0.03004324     -0.01078947     -0.01153712
## Low      -0.03004324      0.01078947      0.01153712
## 
## Coefficients of linear discriminants:
##                         LD1
## poly(rad, 3)1   -52.7634533
## poly(rad, 3)2    -2.5849385
## poly(rad, 3)3     1.4250961
## poly(nox, 3)1   -24.7126359
## poly(nox, 3)2    10.1489586
## poly(nox, 3)3    -1.4780811
## poly(tax, 3)1    46.0327368
## poly(tax, 3)2    19.0327107
## poly(tax, 3)3    -9.7843183
## poly(age, 3)1    -0.9765566
## poly(age, 3)2    -4.9999505
## poly(age, 3)3    -1.7035940
## poly(dis, 3)1     0.6205349
## poly(dis, 3)2     4.2583329
## poly(dis, 3)3    -5.1157704
## poly(indus, 3)1  -8.4164621
## poly(indus, 3)2  -8.7011396
## poly(indus, 3)3   4.5357491

predict_lda <- predict(lda_model, type= "response", newdata=boston_testing)$class
predict_lda

##   [1] High High Low  High High High High High Low  Low  Low  Low  Low  Low  Low 
##  [16] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [31] High High High High High High High High High Low  Low  Low  Low  High High
##  [46] High High High Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [61] High Low  Low  Low  Low  Low  Low  High High High High High High High High
##  [76] High High High High High High High High High High High High High High High
##  [91] High High High High High High High Low  High High High Low 
## Levels: High Low

predict_result_lda <- predict_lda %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))

## New names:
## • `` -> `...1`

colnames(predict_result_lda) <- c("predicted_value", "actual_value")
confusion_lda <- confusionMatrix(predict_result_lda$predicted_value, predict_result_lda$actual_value)
confusion_lda

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   49   8
##       Low     2  43
##                                          
##                Accuracy : 0.902          
##                  95% CI : (0.8271, 0.952)
##     No Information Rate : 0.5            
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.8039         
##                                          
##  Mcnemar's Test P-Value : 0.1138         
##                                          
##             Sensitivity : 0.9608         
##             Specificity : 0.8431         
##          Pos Pred Value : 0.8596         
##          Neg Pred Value : 0.9556         
##              Prevalence : 0.5000         
##          Detection Rate : 0.4804         
##    Detection Prevalence : 0.5588         
##       Balanced Accuracy : 0.9020         
##                                          
##        'Positive' Class : High           
##

The lda model shows an accuracy at 94.12%, which means that 94.12% of the data is predicted as true. The sensitivity is the ability of a test to correctly identify true High: ratio = TP/(TP+FN)=47/(47+4) = 92.16%. The specificity is the ability of a test to correctly identify true Low: ratio = TN/(TN+FP)=49/(49+2)=96.08%. Compare to the logistic model, lda has better performance in specificity, while it illustrates a worst sensitivity. In general, the two models’ performance are same with the similar accuracy.

naive Bayes

naivebayes_model <- naive_bayes(crime_factor ~ rad + nox+ tax + age + indus, 
               data = boston_training)
naivebayes_model

## 
## ================================== Naive Bayes ================================== 
##  
##  Call: 
## naive_bayes.formula(formula = crime_factor ~ rad + nox + tax + 
##     age + indus, data = boston_training)
## 
## --------------------------------------------------------------------------------- 
##  
## Laplace smoothing: 0
## 
## --------------------------------------------------------------------------------- 
##  
##  A priori probabilities: 
## 
## High  Low 
##  0.5  0.5 
## 
## --------------------------------------------------------------------------------- 
##  
##  Tables: 
## 
## --------------------------------------------------------------------------------- 
##  ::: rad (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## rad         High       Low
##   mean 14.658416  4.079208
##   sd    9.510254  1.631061
## 
## --------------------------------------------------------------------------------- 
##  ::: nox (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## nox          High        Low
##   mean 0.63711386 0.46869703
##   sd   0.10224700 0.05528707
## 
## --------------------------------------------------------------------------------- 
##  ::: tax (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## tax         High       Low
##   mean 504.81683 307.27228
##   sd   168.51777  87.23754
## 
## --------------------------------------------------------------------------------- 
##  ::: age (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## age        High      Low
##   mean 85.48119 51.60594
##   sd   18.49810 26.88528
## 
## --------------------------------------------------------------------------------- 
##  ::: indus (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## indus       High       Low
##   mean 15.143663  6.930396
##   sd    5.512455  5.354737
## 
## ---------------------------------------------------------------------------------

predict_naivebayes <- predict(naivebayes_model, type= "class", newdata=boston_testing)

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

predict_naivebayes

##   [1] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low 
##  [16] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High High High
##  [31] High High High High High High High Low  Low  Low  Low  Low  Low  Low  Low 
##  [46] Low  High Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low 
##  [61] Low  Low  Low  Low  Low  Low  Low  High High High High High High High High
##  [76] High High High High High High High High High High High High High High High
##  [91] High High High High High High High High Low  Low  Low  Low 
## Levels: High Low

predict_result_naivebayes <- predict_naivebayes %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))

## New names:
## • `` -> `...1`

colnames(predict_result_naivebayes) <- c("predicted_value", "actual_value")
confusion_naivebayes <- confusionMatrix(predict_result_naivebayes$predicted_value, predict_result_naivebayes$actual_value)
confusion_naivebayes

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   39   3
##       Low    12  48
##                                           
##                Accuracy : 0.8529          
##                  95% CI : (0.7691, 0.9153)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 8.267e-14       
##                                           
##                   Kappa : 0.7059          
##                                           
##  Mcnemar's Test P-Value : 0.03887         
##                                           
##             Sensitivity : 0.7647          
##             Specificity : 0.9412          
##          Pos Pred Value : 0.9286          
##          Neg Pred Value : 0.8000          
##              Prevalence : 0.5000          
##          Detection Rate : 0.3824          
##    Detection Prevalence : 0.4118          
##       Balanced Accuracy : 0.8529          
##                                           
##        'Positive' Class : High            
##

The naive bayes model is considered as worst model when its accuracy is only at 85.29%, in which sensitivity is 74.51% and specificity is 96.08%. Hence, the decrease in accuracy is because the model fail to predict the High - or its sensitivity.

KNN

variables <- c('rad', 'nox', 'tax', 'age', 'dis', 'zn', 'indus')

x_training <- boston_training[, variables]
y_training <- boston_training$crime_factor
x_testing <- boston_testing[, variables]
acc <- list()

for (i in 1:20) {
    knn_pred <- knn(train = x_training, test = x_testing, cl = y_training, k = i)
    acc[as.character(i)] = mean(knn_pred == boston_testing$crime_factor)
}

acc <- unlist(acc)

data_frame(acc = acc) %>%
    mutate(k = row_number()) %>%
    ggplot(aes(k, acc)) +
    geom_col(aes(fill = k == which.max(acc))) +
    labs(x = 'K', y = 'Accuracy', title = 'KNN Accuracy for different values of K') +
    scale_x_continuous(breaks = 1:20) +
    scale_y_continuous(breaks = round(c(seq(0.90, 0.94, 0.01), max(acc)),
                                      digits = 3)) +
    geom_hline(yintercept = max(acc), lty = 2) +
    coord_cartesian(ylim = c(min(acc), max(acc))) +
    guides(fill = FALSE)

## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#final model
knn_final <- knn(train = x_training, test = x_testing, cl = y_training, k = 3)
knn_final

##   [1] Low  High High High High High High Low  Low  Low  Low  Low  Low  Low  Low 
##  [16] Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [31] High High High High High High High High Low  Low  Low  Low  Low  Low  High
##  [46] High High High Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  Low  High
##  [61] High Low  Low  Low  Low  Low  Low  High High High High High High High High
##  [76] High High High High High High High High High High High High High High High
##  [91] High High High High High High High Low  Low  Low  Low  Low 
## Levels: High Low

predict_result_knn <- knn_final %>% bind_cols(boston_testing %>% dplyr::select(crime_factor))

## New names:
## • `` -> `...1`

colnames(predict_result_knn) <- c("predicted_value", "actual_value")
confusion_knn <- confusionMatrix(predict_result_knn$predicted_value, predict_result_knn$actual_value)
confusion_knn

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction High Low
##       High   49   2
##       Low     2  49
##                                           
##                Accuracy : 0.9608          
##                  95% CI : (0.9026, 0.9892)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9216          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9608          
##             Specificity : 0.9608          
##          Pos Pred Value : 0.9608          
##          Neg Pred Value : 0.9608          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4804          
##    Detection Prevalence : 0.5000          
##       Balanced Accuracy : 0.9608          
##                                           
##        'Positive' Class : High            
##

By running the k from 1 to 20, we can see the best model is k=3 with model accuracy at 96.08%. Looking at the confusion matrix, the sensitivity is caculated at 94.12%, while specificity is reported as 98.04%. All the numbers conclude that KNN with K=3 is the best model.

Question 2: Model selection:

We perform best subset, forward stepwise, and backward stepwise selection on a single data set. For each approach, we obtain p+1 models containing 0,1,2,⋯,p predictors. Explain your answers :

2a: Which of the three models with k predictors has the smallest training RSS ?

When performing best subset selection, the model with k predictors is the model with the smallest RSS among all the pCk models with k predictors.

When performing forward stepwise selection, the model with k predictors is the model with the smallest RSS among the p−k models which augment the predictors in M(k−1) with one additional predictor.

When performing backward stepwise selection, the model with k predictors is the model with the smallest RSS among the k models which contains all but one of the predictors in M(k+1).

So, the model with k predictors which has the smallest training RSS is the one obtained from best subset selection as it is the one selected among all k predictors models.

2b: Which of the three models with k predictors has the smallest test RSS ?

Best subset selection may have the smallest test RSS because it considers more models then the other methods.

However, the other models might have better luck picking a model that fits the test data better, as they would be less subject to overfitting.

The outcome will depend more heavily on the choice of test set / validation method than on the selection method.

2c: True or False:

The predictors in the k-variable model identified by forward stepwise are a subset of the predictors in the (k+1)-variable model identified by forward stepwise selection.

TRUE. The model with (k+1) predictors is obtained by augmenting the predictors in the model with k predictors with one additional predictor.

The predictors in the k-variable model identified by backward stepwise are a subset of the predictors in the (k+1)-variable model identified by backward stepwise selection.

TRUE. The model with k predictors is obtained by removing one predictor from the model with (k+1) predictors.

The predictors in the k-variable model identified by backward stepwise are a subset of the predictors in the (k+1)-variable model identified by forward stepwise selection.

FALSE. There is no direct link between the models obtained from forward and backward selection.

The predictors in the k-variable model identified by forward stepwise are a subset of the predictors in the (k+1)-variable model identified by backward stepwise selection.

FALSE. There is no direct link between the models obtained from forward and backward selection.

The predictors in the k-variable model identified by best subset are a subset of the predictors in the (k+1)-variable model identified by best subset selection.

FALSE. The predictors in the k-variable model identified by best subset are a subset of the predictors in the (k+1)-variable model identified by best subset selection.

Question 3: Working with College Dataset:

3a: Split the data set into a training set and a test set:

First, we load the data. Then, we split the data into train data (college_train) and test data (college_test) with 80% and 20%, respectively. The data is stratified in terms of number of applications (Apps) to make sure that the sample is more representative of the population.

#import data
college <- College

#split data
set.seed(123)
college_split <- initial_split(college,prop=0.8,strate="Apps")
college_train <- training(college_split)
college_test <- testing(college_split)

3b: Fit a linear model using least squares on the training set, and report the test error obtained:

Step 1: We build the model by set up the engine as linear model (lm) and the mode (regression)

Step 2: We fit the model into the data train by defining the dependable “Apps” and all other variables are used as the predictors of number of applications.

Step 3: We see the results of our model by summary model. There are 9 significant variables as: Private, Accept, Top10perc, Top25perc, F.Undergrad, OutState, Room.Board, Expend, Grad.Rate. The adjusted R-squared of train data model result is 91.69%, meaning that the predictors can explain 91.69% the change of “Apps”.

Step 4: We use the model to predict the result in data test. Then the result of prediction and actual result are put together to compare.

Step 5: The test error metrics are reported:

RMSE is reported at 1449.199, meaning that our model’s predictions deviate from the actual number of applications by approximately 1449.199.
Rsquared is reported at 93.61%, meaning that the predictors can explain 93.61% the fluctuation of dependant variable.

#build model
lm_model <- linear_reg() %>% set_engine('lm') %>% set_mode('regression')
lm_fit <- lm_model %>% fit(Apps~.,data=college_train)

#model result
summary(lm_fit$fit)

## 
## Call:
## stats::lm(formula = Apps ~ ., data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3257.7  -431.1   -57.5   318.8  6581.9 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.475e+02  4.238e+02  -1.056  0.29141    
## PrivateYes  -5.964e+02  1.471e+02  -4.055 5.67e-05 ***
## Accept       1.262e+00  5.474e-02  23.060  < 2e-16 ***
## Enroll      -2.867e-01  1.960e-01  -1.463  0.14402    
## Top10perc    4.485e+01  5.787e+00   7.749 3.93e-14 ***
## Top25perc   -1.362e+01  4.713e+00  -2.889  0.00400 ** 
## F.Undergrad  9.257e-02  3.473e-02   2.665  0.00790 ** 
## P.Undergrad  4.950e-03  3.319e-02   0.149  0.88150    
## Outstate    -5.318e-02  1.962e-02  -2.710  0.00692 ** 
## Room.Board   1.615e-01  4.929e-02   3.277  0.00111 ** 
## Books        5.242e-02  2.402e-01   0.218  0.82734    
## Personal    -8.572e-03  6.533e-02  -0.131  0.89565    
## PhD         -5.727e+00  4.779e+00  -1.199  0.23118    
## Terminal    -5.017e+00  5.205e+00  -0.964  0.33546    
## S.F.Ratio    3.827e+00  1.342e+01   0.285  0.77560    
## perc.alumni -6.235e+00  4.325e+00  -1.442  0.14991    
## Expend       7.915e-02  1.270e-02   6.233 8.58e-10 ***
## Grad.Rate    1.064e+01  3.063e+00   3.474  0.00055 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 971.6 on 603 degrees of freedom
## Multiple R-squared:  0.9192, Adjusted R-squared:  0.9169 
## F-statistic: 403.6 on 17 and 603 DF,  p-value: < 2.2e-16

#fit test data
predict(lm_fit, new_data = college_test)

## # A tibble: 156 × 1
##    .pred
##    <dbl>
##  1 1438.
##  2 1159.
##  3 1221.
##  4 3852.
##  5 6127.
##  6 2055.
##  7 9687.
##  8 1529.
##  9 1243.
## 10  446.
## # … with 146 more rows

college_test_results <- predict(lm_fit, new_data = college_test) %>% 
  bind_cols(college_test$Apps)

## New names:
## • `` -> `...2`

colnames(college_test_results) <- c("Prediction","Actual data")
college_test_results

## # A tibble: 156 × 2
##    Prediction `Actual data`
##         <dbl>         <dbl>
##  1      1438.          1660
##  2      1159.          1428
##  3      1221.          1038
##  4      3852.          4302
##  5      6127.          7313
##  6      2055.          2135
##  7      9687.          7548
##  8      1529.           948
##  9      1243.           807
## 10       446.           632
## # … with 146 more rows

#test error RMSE:
rmse <- rmse(college_test_results, 
     truth = "Actual data", 
     estimate = "Prediction")
rmse

## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard       1449.

#test error Rsquare:
rsq <- rsq(college_test_results, 
    truth = "Actual data", 
    estimate = "Prediction")
rsq

## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.936

3c: Fit a ridge regression model on the training set, with λ chosen by cross-validation. Report the test error obtained.

Step 1: We build the model matrix by defining the dependable “Apps” and all other variables are used as the predictors of number of applications.

Step 2: We use cross-validation to choose the lamda for the model. Lamda has the function of tune the hyperparameters by handling variable selection and control the magnitude of coefficients in the model.

Then, we plot the lamda to see the best selection of lamda. Looking at the chart, we can see Ox present the log(lamda) value and Oy show the Mean-Squared Error. Thus, our objective is to select the lamda with the minimum Mean-Squared Error. The chart shows us best selections of log(lamda) are ranging around 6.

After that, we run to see the best lamda and have the result at 313.5603.

Step 3: We build the model by the train data

Step 4: We fit the test data into the model and use the best lamda found before

Step 5: The test error metrics are reported:

RMSE is reported at 1986.326, meaning that our model’s predictions deviate from the actual number of applications by approximately 1986.326.
Rsquared is reported at 88.41%, meaning that the predictors can explain 88.41% the fluctuation of dependant variable.

set.seed(123)
#Set up matrices needed for the glmnet functions
train_matrix <-  model.matrix(Apps~., data = college_train)
test_matrix = model.matrix(Apps~., data =college_test)

#Choose lambda using cross-validation
lamda <- cv.glmnet(train_matrix,college_train$Apps,alpha=0)
plot(lamda)

bestlam <- lamda$lambda.min
bestlam

## [1] 313.5603

#Build model 
ridge_model <- glmnet(train_matrix,college_train$Apps,alpha = 0)

#Fit test data
ridge_fit <- predict(ridge_model,s=bestlam,newx = test_matrix)

ridge_test_results <- ridge_fit %>% 
  bind_cols(college_test$Apps)

## New names:
## • `` -> `...2`

colnames(ridge_test_results) <- c("Prediction","Actual data")
ridge_test_results

## # A tibble: 156 × 2
##    Prediction `Actual data`
##         <dbl>         <dbl>
##  1      1620.          1660
##  2      1047.          1428
##  3      1506.          1038
##  4      3667.          4302
##  5      6163.          7313
##  6      2202.          2135
##  7      9817.          7548
##  8      1554.           948
##  9      1090.           807
## 10       431.           632
## # … with 146 more rows

#test error RMSE:
rmse_ridge <- rmse(ridge_test_results, 
     truth = "Actual data", 
     estimate = "Prediction")
rmse_ridge

## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard       1986.

#test error Rsquare:
rsq_ridge <- rsq(ridge_test_results, 
    truth = "Actual data", 
    estimate = "Prediction")
rsq_ridge

## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.884

3d: Fit a lasso model on the training set, with λ chosen by crossvalidation. Report the test error obtained, along with the number of non-zero coefficient estimates.

For lasso mode, we utilize the same steps with ridge regression. The only different thing is the alpha is set as alpha = 1 instead of alpha =0 in ridge model.

The test coefficient estimate:

There are 4 variables with non-zero coefficient estimates as: Accept, Top10perc, F.Undergrad, and Expend.
The coeffcients of Accept, Top10perc, F.Undergrad, and Expend are 1.213, 18.392, 0.031, and 0.019, respectively. It means that all the variables have the positive relations with “Apps”.

The test error metrics are reported:

RMSE is reported at 1636.073, meaning that our model’s predictions deviate from the actual number of applications by approximately 1636.073.
Rsquared is reported at 93.60%, meaning that the predictors can explain 93.60% the fluctuation of dependant variable.

#Choose lambda using cross-validation
lamda_2 <- cv.glmnet(train_matrix,college_train$Apps,alpha=1)
plot(lamda_2)

bestlam_2 <- lamda_2$lambda.min
bestlam_2

## [1] 10.75659

#Build model 
lasso_model <- glmnet(train_matrix,college_train$Apps,alpha = 1)

#model result
lasso_coef <- predict(lasso_model,s=bestlam, type="coefficients")
lasso_coef

## 19 x 1 sparse Matrix of class "dgCMatrix"
##                        s1
## (Intercept) -269.29591655
## (Intercept)    .         
## PrivateYes     .         
## Accept         1.21301871
## Enroll         .         
## Top10perc     18.39197894
## Top25perc      .         
## F.Undergrad    0.03146116
## P.Undergrad    .         
## Outstate       .         
## Room.Board     .         
## Books          .         
## Personal       .         
## PhD            .         
## Terminal       .         
## S.F.Ratio      .         
## perc.alumni    .         
## Expend         0.01897122
## Grad.Rate      .

#Fit test data
lasso_fit <- predict(lasso_model,s=bestlam,newx = test_matrix)



lasso_test_results <- lasso_fit %>% 
  bind_cols(college_test$Apps)

## New names:
## • `` -> `...2`

colnames(lasso_test_results) <- c("Prediction","Actual data")
lasso_test_results

## # A tibble: 156 × 2
##    Prediction `Actual data`
##         <dbl>         <dbl>
##  1      1873.          1660
##  2      1664.          1428
##  3      1552.          1038
##  4      2917.          4302
##  5      6180.          7313
##  6      2404.          2135
##  7      9066.          7548
##  8      1722.           948
##  9      1470.           807
## 10       808.           632
## # … with 146 more rows

#test error RMSE:
rmse_lasso <- rmse(lasso_test_results, 
     truth = "Actual data", 
     estimate = "Prediction")
rmse_lasso

## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard       1636.

#test error Rsquare:
rsq_lasso <- rsq(lasso_test_results, 
    truth = "Actual data", 
    estimate = "Prediction")
rsq_lasso

## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rsq     standard       0.936

Final_financial_software

Nguyen Bao Quynh Trang

2023-06-12