set.seed(1234)
# read flight data set in
library(readxl)
flightdata<-read_excel("G:/Other computers/My Laptop/Documents/Richard 622 last semester/Test 1/flight_data.xlsx")
# what does the data look like?
str(flightdata)
## tibble [400 × 2] (S3: tbl_df/tbl/data.frame)
## $ speed : num [1:400] 27.7 29.2 33.8 34.1 34.2 ...
## $ distance: num [1:400] 1324 1077 937 1198 956 ...
#What does the data look like plotted?
#scatter plot
plot(flightdata$distance ~ flightdata$speed, xlab = "Speed", ylab = "Distance", main = "Scatterplot of Distance against Speed")
#plot is not linear but dips down and curves up
#load spline library to run spline model
library(splines)
#Change variables to X and Y to make them easier to work with
Y <- flightdata$distance
X <- flightdata$speed
#spline model with 5 degrees of freedom
flightdata_sp = lm(Y~bs(X, df = 5), data = flightdata)
summary(flightdata_sp)
##
## Call:
## lm(formula = Y ~ bs(X, df = 5), data = flightdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -541.47 -122.79 -5.74 131.70 544.96
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1211.2 111.7 10.842 < 2e-16 ***
## bs(X, df = 5)1 -365.2 177.9 -2.052 0.0408 *
## bs(X, df = 5)2 -494.2 104.6 -4.722 3.25e-06 ***
## bs(X, df = 5)3 1050.5 140.3 7.486 4.70e-13 ***
## bs(X, df = 5)4 3203.4 139.1 23.024 < 2e-16 ***
## bs(X, df = 5)5 5442.5 179.1 30.388 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 194.2 on 394 degrees of freedom
## Multiple R-squared: 0.9637, Adjusted R-squared: 0.9632
## F-statistic: 2090 on 5 and 394 DF, p-value: < 2.2e-16
#a) Calculate 70% confidence intervals of all data points
flightdata_conf_inter <- predict(flightdata_sp, interval = "confidence", level = 0.70)
flightdata_conf_inter
## fit lwr upr
## 1 1211.1619 1095.2248 1327.0990
## 2 1172.4360 1074.0206 1270.8513
## 3 1067.4686 1009.2992 1125.6380
## 4 1061.4940 1005.2173 1117.7706
## 5 1059.4027 1003.7762 1115.0293
## 6 1057.7753 1002.6502 1112.9005
## 7 987.6433 949.7622 1025.5245
## 8 966.1149 931.5559 1000.6740
## 9 965.5131 931.0329 999.9934
## 10 953.4851 920.4292 986.5410
## 11 951.7346 918.8628 984.6063
## 12 946.9374 914.5423 979.3325
## 13 943.9299 911.8137 976.0461
## 14 942.1182 910.1631 974.0734
## 15 937.4581 905.8949 969.0212
## 16 931.9344 900.7992 963.0696
## 17 931.0594 899.9888 962.1299
## 18 929.1353 898.2044 960.0663
## 19 925.8858 895.1835 956.5880
## 20 925.3933 894.7251 956.0616
## 21 923.7406 893.1856 954.2957
## 22 914.4049 884.4782 944.3316
## 23 912.4103 882.6210 942.1995
## 24 911.6770 881.9391 941.4148
## 25 909.1075 879.5549 938.6602
## 26 903.6303 874.5141 932.7464
## 27 902.4947 873.4799 931.5096
## 28 899.5013 870.7805 928.2221
## 29 898.3260 869.7344 926.9176
## 30 897.2415 868.7779 925.7050
## 31 896.3336 867.9851 924.6822
## 32 893.1243 865.2610 920.9875
## 33 891.5240 863.9736 919.0745
## 34 890.1536 862.9380 917.3692
## 35 889.1668 862.2586 916.0751
## 36 888.4741 861.8427 915.1056
## 37 887.2217 861.7741 912.6693
## 38 887.8116 863.2955 912.3276
## 39 888.4849 864.3995 912.5703
## 40 888.6514 864.6536 912.6492
## 41 888.9223 865.0572 912.7875
## 42 889.0497 865.2434 912.8560
## 43 889.1849 865.4389 912.9309
## 44 890.5044 867.2651 913.7436
## 45 891.8909 869.0836 914.6983
## 46 892.4449 869.7915 915.0983
## 47 892.7732 870.2071 915.3393
## 48 893.7960 871.4856 916.1064
## 49 894.7401 872.6473 916.8328
## 50 895.5826 873.6718 917.4935
## 51 896.3135 874.5522 918.0749
## 52 896.3711 874.6212 918.1209
## 53 897.0678 875.4536 918.6819
## 54 897.9052 876.4468 919.3636
## 55 899.0032 877.7381 920.2682
## 56 900.7223 879.7381 921.7064
## 57 901.0127 880.0737 921.9517
## 58 901.7558 880.9294 922.5822
## 59 903.3369 882.7372 923.9365
## 60 903.5747 883.0078 924.1416
## 61 904.8058 884.4028 925.2088
## 62 907.9404 887.9178 927.9630
## 63 909.5084 889.6585 929.3584
## 64 911.6681 892.0388 931.2973
## 65 912.7503 893.2248 932.2757
## 66 913.2253 893.7440 932.7065
## 67 916.0535 896.8192 935.2879
## 68 918.0916 899.0192 937.1640
## 69 920.4475 901.5472 939.3478
## 70 924.2746 905.6226 942.9266
## 71 930.6245 912.3100 948.9391
## 72 930.6734 912.3611 948.9857
## 73 932.8864 914.6723 951.1005
## 74 935.5143 917.4054 953.6232
## 75 936.8921 918.8337 954.9506
## 76 942.3407 924.4529 960.2285
## 77 944.9576 927.1368 962.7783
## 78 946.6150 928.8321 964.3979
## 79 948.4625 930.7178 966.2071
## 80 949.1487 931.4172 966.8802
## 81 951.8652 934.1808 969.5496
## 82 959.9494 942.3613 977.5375
## 83 960.2070 942.6210 977.7930
## 84 964.6725 947.1146 982.2304
## 85 965.8911 948.3383 983.4439
## 86 968.7431 951.1983 986.2879
## 87 969.5261 951.9825 987.0696
## 88 973.1692 955.6267 990.7117
## 89 975.2680 957.7228 992.8133
## 90 982.0737 964.5049 999.6425
## 91 982.8311 965.2584 1000.4038
## 92 983.1030 965.5289 1000.6771
## 93 983.3958 965.8202 1000.9715
## 94 986.4588 968.8647 1004.0529
## 95 991.4619 973.8311 1009.0927
## 96 992.2452 974.6080 1009.8824
## 97 992.6673 975.0266 1010.3080
## 98 992.9007 975.2580 1010.5434
## 99 996.0859 978.4151 1013.7567
## 100 997.5677 979.8830 1015.2523
## 101 998.0999 980.4102 1015.7897
## 102 998.5923 980.8978 1016.2868
## 103 999.9081 982.2007 1017.6156
## 104 1009.5295 991.7197 1027.3393
## 105 1013.8029 995.9445 1031.6613
## 106 1014.5582 996.6912 1032.4253
## 107 1023.0566 1005.0903 1041.0228
## 108 1024.1331 1006.1542 1042.1120
## 109 1024.2983 1006.3175 1042.2792
## 110 1024.6347 1006.6499 1042.6194
## 111 1024.6844 1006.6991 1042.6697
## 112 1025.1276 1007.1371 1043.1181
## 113 1025.2338 1007.2421 1043.2256
## 114 1027.3025 1009.2866 1045.3184
## 115 1030.7718 1012.7157 1048.8279
## 116 1031.9767 1013.9068 1050.0466
## 117 1036.2311 1018.1131 1054.3491
## 118 1036.8782 1018.7530 1055.0035
## 119 1038.3879 1020.2460 1056.5298
## 120 1040.8621 1022.6933 1059.0309
## 121 1041.9993 1023.8183 1060.1803
## 122 1043.5277 1025.3305 1061.7249
## 123 1050.1701 1031.9055 1068.4347
## 124 1055.1206 1036.8093 1073.4318
## 125 1055.4238 1037.1098 1073.7378
## 126 1056.0051 1037.6858 1074.3243
## 127 1069.5108 1051.0843 1087.9372
## 128 1074.8795 1056.4189 1093.3401
## 129 1079.9056 1061.4177 1098.3935
## 130 1082.7127 1064.2116 1101.2138
## 131 1091.1128 1072.5813 1109.6443
## 132 1091.4588 1072.9263 1109.9913
## 133 1103.7872 1085.2364 1122.3379
## 134 1111.7888 1093.2429 1130.3347
## 135 1120.2297 1101.7031 1138.7562
## 136 1127.7918 1109.2944 1146.2893
## 137 1130.8198 1112.3369 1149.3028
## 138 1136.6220 1118.1713 1155.0727
## 139 1137.5323 1119.0872 1155.9774
## 140 1137.7836 1119.3401 1156.2271
## 141 1141.3509 1122.9306 1159.7712
## 142 1141.8914 1123.4748 1160.3080
## 143 1145.0768 1126.6828 1163.4707
## 144 1146.3269 1127.9423 1164.7116
## 145 1149.3677 1131.0067 1167.7288
## 146 1149.6546 1131.2958 1168.0133
## 147 1150.1469 1131.7921 1168.5017
## 148 1151.8565 1133.5157 1170.1972
## 149 1158.1970 1139.9116 1176.4825
## 150 1167.1266 1148.9275 1185.3257
## 151 1174.7724 1156.6543 1192.8905
## 152 1178.6239 1160.5488 1196.6990
## 153 1179.5270 1161.4622 1197.5918
## 154 1183.4649 1165.4457 1201.4840
## 155 1187.1565 1169.1813 1205.1316
## 156 1193.6067 1175.7111 1211.5024
## 157 1195.8015 1177.9335 1213.6695
## 158 1196.8326 1178.9777 1214.6874
## 159 1201.4171 1183.6214 1219.2127
## 160 1202.3021 1184.5180 1220.0861
## 161 1203.3121 1185.5412 1221.0829
## 162 1211.4908 1193.8294 1229.1522
## 163 1213.5565 1195.9232 1231.1898
## 164 1220.4855 1202.9479 1238.0231
## 165 1221.2791 1203.7526 1238.8057
## 166 1222.3089 1204.7968 1239.8211
## 167 1222.4828 1204.9731 1239.9926
## 168 1227.7754 1210.3402 1245.2107
## 169 1233.5254 1216.1719 1250.8789
## 170 1245.2626 1228.0776 1262.4475
## 171 1250.3259 1233.2139 1267.4378
## 172 1252.5546 1235.4749 1269.6344
## 173 1262.7268 1245.7935 1279.6601
## 174 1263.2794 1246.3541 1280.2048
## 175 1263.9813 1247.0661 1280.8966
## 176 1267.4552 1250.5896 1284.3207
## 177 1268.8302 1251.9843 1285.6761
## 178 1275.5823 1258.8323 1292.3324
## 179 1280.9700 1264.2957 1297.6443
## 180 1296.9557 1280.5007 1313.4106
## 181 1297.0129 1280.5587 1313.4671
## 182 1303.4020 1287.0327 1319.7713
## 183 1307.0152 1290.6931 1323.3374
## 184 1312.4072 1296.1543 1328.6601
## 185 1324.1460 1308.0385 1340.2536
## 186 1329.2190 1313.1717 1345.2663
## 187 1339.3750 1323.4436 1355.3064
## 188 1343.9519 1328.0704 1359.8333
## 189 1358.3201 1342.5858 1374.0545
## 190 1358.8500 1343.1208 1374.5792
## 191 1358.9272 1343.1988 1374.6557
## 192 1375.6998 1360.1223 1391.2772
## 193 1376.7390 1361.1701 1392.3079
## 194 1392.9358 1377.4894 1408.3821
## 195 1397.5125 1382.0967 1412.9283
## 196 1410.2689 1394.9287 1425.6091
## 197 1415.1756 1399.8606 1430.4906
## 198 1423.4326 1408.1553 1438.7099
## 199 1426.7138 1411.4498 1441.9778
## 200 1427.2047 1411.9426 1442.4668
## 201 1441.1065 1425.8895 1456.3236
## 202 1442.6841 1427.4711 1457.8971
## 203 1446.9871 1431.7840 1462.1903
## 204 1450.1854 1434.9886 1465.3823
## 205 1454.5901 1439.4004 1469.7798
## 206 1467.1886 1452.0103 1482.3670
## 207 1470.4595 1455.2819 1485.6372
## 208 1473.7050 1458.5272 1488.8828
## 209 1473.7749 1458.5971 1488.9527
## 210 1482.4560 1467.2734 1497.6386
## 211 1487.1618 1471.9740 1502.3496
## 212 1487.6783 1472.4898 1502.8668
## 213 1495.4332 1480.2320 1510.6344
## 214 1498.3004 1483.0933 1513.5076
## 215 1504.0993 1488.8783 1519.3202
## 216 1512.7282 1497.4819 1527.9744
## 217 1514.5746 1499.3223 1529.8270
## 218 1515.3459 1500.0910 1530.6009
## 219 1515.4832 1500.2278 1530.7387
## 220 1516.5968 1501.3375 1531.8561
## 221 1517.1910 1501.9295 1532.4524
## 222 1529.9323 1514.6198 1545.2447
## 223 1538.5240 1523.1710 1553.8769
## 224 1541.6650 1526.2960 1557.0340
## 225 1565.7534 1550.2426 1581.2642
## 226 1566.6224 1551.1059 1582.1389
## 227 1567.8744 1552.3495 1583.3992
## 228 1597.2266 1581.4862 1612.9669
## 229 1600.5025 1584.7358 1616.2692
## 230 1610.2859 1594.4384 1626.1335
## 231 1612.6194 1596.7521 1628.4867
## 232 1616.1131 1600.2159 1632.0103
## 233 1644.7069 1628.5526 1660.8612
## 234 1645.8564 1629.6914 1662.0214
## 235 1658.9868 1642.6978 1675.2757
## 236 1659.5638 1643.2693 1675.8583
## 237 1659.7860 1643.4894 1676.0826
## 238 1663.3240 1646.9936 1679.6545
## 239 1664.2895 1647.9498 1680.6292
## 240 1685.5347 1668.9891 1702.0803
## 241 1689.5712 1672.9862 1706.1562
## 242 1689.9821 1673.3931 1706.5712
## 243 1697.9084 1681.2420 1714.5749
## 244 1707.8040 1691.0410 1724.5670
## 245 1709.6098 1692.8293 1726.3904
## 246 1722.0675 1705.1661 1738.9689
## 247 1724.4944 1707.5696 1741.4191
## 248 1727.1543 1710.2040 1744.1047
## 249 1734.4021 1717.3824 1751.4217
## 250 1745.2907 1728.1682 1762.4133
## 251 1752.1779 1734.9911 1769.3646
## 252 1753.3855 1736.1876 1770.5834
## 253 1757.2831 1740.0494 1774.5169
## 254 1759.1058 1741.8553 1776.3562
## 255 1761.4145 1744.1430 1778.6859
## 256 1770.8932 1753.5364 1788.2499
## 257 1775.2380 1757.8429 1792.6332
## 258 1777.1541 1759.7421 1794.5661
## 259 1796.5967 1779.0192 1814.1743
## 260 1799.7933 1782.1895 1817.3971
## 261 1801.8982 1784.2773 1819.5192
## 262 1805.4925 1787.8426 1823.1424
## 263 1816.5415 1798.8050 1834.2781
## 264 1822.1642 1804.3849 1839.9434
## 265 1823.7654 1805.9742 1841.5566
## 266 1824.5042 1806.7075 1842.3009
## 267 1872.7377 1854.6239 1890.8515
## 268 1879.2950 1861.1448 1897.4452
## 269 1890.9392 1872.7286 1909.1499
## 270 1912.3610 1894.0525 1930.6695
## 271 1916.0770 1897.7533 1934.4007
## 272 1922.6819 1904.3322 1941.0315
## 273 1923.6421 1905.2889 1941.9954
## 274 1937.4639 1919.0619 1955.8659
## 275 1939.0853 1920.6780 1957.4925
## 276 1969.4533 1950.9628 1987.9438
## 277 1969.4707 1950.9802 1987.9612
## 278 1984.5592 1966.0380 2003.0805
## 279 1992.8913 1974.3559 2011.4267
## 280 1996.1953 1977.6549 2014.7358
## 281 2007.4052 1988.8497 2025.9607
## 282 2010.0432 1991.4847 2028.6018
## 283 2014.4842 1995.9209 2033.0476
## 284 2015.7585 1997.1939 2034.3231
## 285 2024.0456 2005.4737 2042.6175
## 286 2042.6167 2024.0343 2061.1992
## 287 2049.5739 2030.9896 2068.1583
## 288 2052.8317 2034.2467 2071.4167
## 289 2065.6891 2047.1038 2084.2743
## 290 2065.8089 2047.2236 2084.3941
## 291 2076.0466 2057.4635 2094.6298
## 292 2082.3731 2063.7921 2100.9541
## 293 2082.5666 2063.9856 2101.1475
## 294 2091.0744 2072.4974 2109.6514
## 295 2094.4717 2075.8966 2113.0467
## 296 2098.8221 2080.2497 2117.3945
## 297 2117.1253 2098.5671 2135.6834
## 298 2119.0599 2100.5035 2137.6163
## 299 2135.0881 2116.5478 2153.6284
## 300 2140.4090 2121.8747 2158.9433
## 301 2149.7599 2131.2367 2168.2832
## 302 2157.2133 2138.6994 2175.7272
## 303 2164.2363 2145.7316 2182.7410
## 304 2175.1980 2156.7082 2193.6878
## 305 2179.8993 2161.4160 2198.3825
## 306 2186.5107 2168.0370 2204.9845
## 307 2187.7111 2169.2391 2206.1832
## 308 2194.9307 2176.4692 2213.3922
## 309 2204.5618 2186.1145 2223.0090
## 310 2208.5977 2190.1565 2227.0390
## 311 2216.6289 2198.1997 2235.0581
## 312 2226.0737 2207.6587 2244.4887
## 313 2231.3707 2212.9636 2249.7777
## 314 2234.1767 2215.7738 2252.5795
## 315 2234.5434 2216.1411 2252.9457
## 316 2249.1168 2230.7360 2267.4976
## 317 2255.6727 2237.3013 2274.0440
## 318 2271.5607 2253.2115 2289.9099
## 319 2281.0034 2262.6667 2299.3400
## 320 2299.1356 2280.8213 2317.4499
## 321 2309.2163 2290.9133 2327.5194
## 322 2315.2371 2296.9404 2333.5338
## 323 2337.8975 2319.6212 2356.1738
## 324 2341.4471 2323.1734 2359.7207
## 325 2347.1534 2328.8837 2365.4230
## 326 2382.8031 2364.5485 2401.0577
## 327 2435.3409 2417.0723 2453.6094
## 328 2457.0773 2438.7885 2475.3661
## 329 2502.4651 2484.1035 2520.8266
## 330 2504.2026 2485.8374 2522.5677
## 331 2505.1221 2486.7550 2523.4892
## 332 2559.6036 2541.0875 2578.1196
## 333 2569.7509 2551.1997 2588.3020
## 334 2584.2249 2565.6196 2602.8302
## 335 2590.8242 2572.1927 2609.4558
## 336 2617.1260 2598.3798 2635.8722
## 337 2639.4510 2620.5951 2658.3070
## 338 2646.5297 2627.6366 2665.4228
## 339 2668.8137 2649.7961 2687.8312
## 340 2701.2908 2682.0723 2720.5094
## 341 2729.4017 2709.9907 2748.8126
## 342 2735.3625 2715.9086 2754.8164
## 343 2736.8153 2717.3508 2756.2797
## 344 2744.8869 2725.3629 2764.4109
## 345 2773.7232 2753.9759 2793.4705
## 346 2809.3931 2789.3476 2829.4387
## 347 2832.8582 2812.6040 2853.1125
## 348 2920.0608 2898.9549 2941.1667
## 349 2939.8471 2918.5334 2961.1608
## 350 2967.8333 2946.2178 2989.4488
## 351 2978.8479 2957.1114 3000.5845
## 352 2998.3729 2976.4188 3020.3270
## 353 3001.1933 2979.2076 3023.1791
## 354 3046.3445 3023.8420 3068.8469
## 355 3048.9207 3026.3883 3071.4531
## 356 3080.1273 3057.2290 3103.0257
## 357 3123.7216 3100.3035 3147.1398
## 358 3129.2648 3105.7799 3152.7496
## 359 3144.1091 3120.4453 3167.7729
## 360 3154.0328 3130.2490 3177.8167
## 361 3159.7841 3135.9306 3183.6376
## 362 3173.3051 3149.2875 3197.3226
## 363 3198.3112 3173.9895 3222.6330
## 364 3222.4982 3197.8818 3247.1147
## 365 3228.1590 3203.4736 3252.8445
## 366 3240.0340 3215.2039 3264.8641
## 367 3252.8637 3227.8773 3277.8500
## 368 3310.2881 3284.6052 3335.9711
## 369 3354.9152 3328.6964 3381.1339
## 370 3405.3885 3378.5725 3432.2044
## 371 3476.3661 3448.7311 3504.0012
## 372 3488.0445 3460.2774 3515.8117
## 373 3494.3783 3466.5398 3522.2167
## 374 3552.9379 3524.4524 3581.4235
## 375 3571.4390 3542.7537 3600.1243
## 376 3651.7959 3622.2710 3681.3207
## 377 3727.4207 3697.1485 3757.6928
## 378 3734.0565 3703.7208 3764.3922
## 379 3736.9189 3706.5559 3767.2820
## 380 3763.2712 3732.6593 3793.8830
## 381 3780.0442 3749.2766 3810.8118
## 382 3971.7498 3939.3327 4004.1669
## 383 3982.9112 3950.4046 4015.4179
## 384 4069.1328 4035.9507 4102.3148
## 385 4173.0696 4139.0981 4207.0412
## 386 4190.2802 4156.1786 4224.3819
## 387 4234.0701 4199.6361 4268.5041
## 388 4260.3673 4225.7319 4295.0027
## 389 4262.6372 4227.9843 4297.2901
## 390 4473.9336 4437.5185 4510.3488
## 391 4565.8056 4528.4798 4603.1313
## 392 4687.4681 4648.7158 4726.2205
## 393 4750.3538 4710.7392 4789.9684
## 394 4969.5808 4926.0990 5013.0626
## 395 4987.6054 4943.7369 5031.4740
## 396 5248.9184 5198.0616 5299.7751
## 397 5253.5425 5202.5373 5304.5476
## 398 5642.3197 5575.5448 5709.0947
## 399 6094.8633 6001.5138 6188.2128
## 400 6653.6284 6515.9397 6791.3172
#b) Scatter plot comparing Speed and Distance to confidence intervals
plot(X, Y, xlab = "Speed", main = "Regression Spline with df = 5", ylab = "Distance", cex = .5)
lines(X, flightdata_sp$fitted.values, col = "blue", lwd = 1)
flightdata_conf_inter <- predict(flightdata_sp, interval = "confidence", level = 0.70)
lines(X, flightdata_conf_inter[,2], col = "red", lty = 2)
lines(X, flightdata_conf_inter[,3], col = "red", lty = 2)
#Natural Spline 3 degrees of freedom
ns_flightdata = lm(Y~ns(X, df = 3), data = flightdata)
summary(ns_flightdata)
##
## Call:
## lm(formula = Y ~ ns(X, df = 3), data = flightdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -564.04 -121.52 -3.54 130.88 541.75
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1069.81 59.34 18.03 <2e-16 ***
## ns(X, df = 3)1 1266.40 44.35 28.55 <2e-16 ***
## ns(X, df = 3)2 3236.23 137.77 23.49 <2e-16 ***
## ns(X, df = 3)3 5494.25 75.91 72.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 195.6 on 396 degrees of freedom
## Multiple R-squared: 0.9629, Adjusted R-squared: 0.9626
## F-statistic: 3429 on 3 and 396 DF, p-value: < 2.2e-16
# Compare this model to the model in question 1 using ANOVA
anova(flightdata_sp, ns_flightdata)
## Analysis of Variance Table
##
## Model 1: Y ~ bs(X, df = 5)
## Model 2: Y ~ ns(X, df = 3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 394 14851546
## 2 396 15155277 -2 -303730 4.0289 0.01853 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The bs model has the lowest sum of squared errors at 14851546 and is statistically significant in that the p value for the two models is below 0.05 so model 1 as captured in the ANOVA/bs model should be used to predict landing distance.
# set seed to always randomly sample same numbers
set.seed(1234)
# read health data set in
health <- read.csv("G:/Other computers/My Laptop/Documents/Richard 622 last semester/Test 1/health_dataset.csv")
# what does the data look like?
str(health)
## 'data.frame': 2000 obs. of 20 variables:
## $ Gender : chr "female" "female" "female" "female" ...
## $ Age : int 49 45 45 45 66 54 50 16 56 56 ...
## $ Weight : num 86.7 75.7 75.7 75.7 68 74.7 84.1 73.2 57.5 57.5 ...
## $ Height : num 168 167 167 167 170 ...
## $ BMI : num 30.6 27.2 27.2 27.2 23.7 ...
## $ Pulse : int 86 62 62 62 60 76 74 76 64 64 ...
## $ BPSysAve : int 112 118 118 118 111 134 142 126 95 95 ...
## $ BPDiaAve : int 75 64 64 64 63 85 68 72 69 69 ...
## $ BPSys1 : int 118 106 106 106 124 136 138 132 94 94 ...
## $ BPDia1 : int 82 62 62 62 64 86 66 74 74 74 ...
## $ BPSys2 : int 108 118 118 118 108 132 142 126 94 94 ...
## $ BPDia2 : int 74 68 68 68 62 88 74 68 70 70 ...
## $ BPSys3 : int 116 118 118 118 114 136 142 126 96 96 ...
## $ BPDia3 : int 76 60 60 60 64 82 62 76 68 68 ...
## $ DirectChol : num 1.16 2.12 2.12 2.12 0.67 1.16 1.06 1.14 2.22 2.22 ...
## $ TotChol : num 6.7 5.82 5.82 5.82 4.99 6.41 5.22 3 5.79 5.79 ...
## $ UrineVol1 : int 77 106 106 106 113 215 64 345 26 26 ...
## $ UrineFlow1 : num 0.094 1.116 1.116 1.116 0.489 ...
## $ Diabetes : chr "No" "No" "No" "No" ...
## $ SleepTrouble: chr "Yes" "No" "No" "No" ...
# 20 variables some numbers, some characters, and some integers
# separate into 50% training and testing data set
sample_health <- sample(nrow(health), nrow(health)*0.50)
health_train <- health[sample_health,]
health_test <- health[-sample_health,]
#decision tree (classification tree) for character variable Diabetes
#load library for decsion trees
library(rpart)
library(rpart.plot)
#descision tree model
health_rpart1 <- rpart(formula = Diabetes ~., data = health_train, method = "class")
pred1 <- predict(health_rpart1, health_test, type = "class")
pred1
## 1 2 3 5 7 8 9 12 14 15 16 17 18 20 21 22
## No No No No No No No No No No No No No No No No
## 24 26 28 29 31 33 36 37 38 44 46 47 50 52 56 62
## No No No No No No No No No No No No No No No No
## 64 68 69 70 72 73 74 75 76 77 78 80 81 83 84 85
## No No No No No No No No No No No No No Yes No No
## 87 89 90 92 93 94 99 101 104 105 106 108 110 111 112 113
## No No No No No No No No No No No No No No No No
## 114 115 117 119 120 121 122 129 130 133 134 139 140 143 144 146
## No No No No No No No No No No No No No No No Yes
## 147 148 149 150 151 152 154 156 157 158 159 167 172 173 175 179
## No No No No No No No No No No No No No No No No
## 180 185 186 187 188 190 192 193 195 196 197 198 200 209 210 211
## Yes No No No No No Yes No No No No Yes No No No No
## 212 213 217 218 219 220 221 227 229 230 231 232 233 234 236 238
## No No No No No No No No No No No No No No No No
## 239 240 242 243 244 246 247 249 252 253 255 256 257 260 261 264
## No No No No No No No No No No No No No No No No
## 265 266 267 268 271 272 273 274 275 277 282 285 287 288 289 290
## No No No No No No Yes No No Yes No No No No No No
## 291 295 297 299 300 301 302 304 306 310 313 314 315 316 317 320
## No No No No No No No No No No No No No No No No
## 322 323 324 325 327 329 330 332 336 337 339 342 343 345 351 353
## No No No No No No No No No No No No No No No No
## 355 356 358 359 360 362 363 364 370 372 374 375 376 377 378 381
## No No No No No No No No No No No No Yes Yes No No
## 384 385 387 391 392 393 394 397 398 401 403 404 407 408 409 410
## No No No No No No No No No No No No No No No No
## 411 412 414 415 416 418 419 423 424 425 426 428 430 433 435 437
## No No No No No No No No No No No No No No No No
## 438 439 441 443 444 445 446 447 450 451 452 455 458 466 468 470
## No No No No No No No No No No No No No No No No
## 472 481 483 490 492 494 496 497 498 500 502 503 505 506 507 509
## No No No No No No No No No No No No No No Yes No
## 512 513 516 517 521 527 528 531 536 538 539 544 550 551 553 555
## No No No No No No No No No No No No No No No No
## 556 557 558 559 560 564 565 566 567 568 571 575 579 581 582 584
## No No No No No No No No No No No No No No No No
## 586 591 592 593 594 596 599 610 611 612 616 619 620 621 624 628
## No No No No No No No No No No No No No No No No
## 631 632 636 640 641 643 644 649 650 651 654 656 657 658 660 661
## No No No No No No No No No No No No Yes No No No
## 662 663 666 671 672 673 675 676 678 682 684 685 686 690 691 692
## No No No Yes No No No No No No No No No No No No
## 695 701 705 706 707 710 712 713 714 717 718 719 720 721 722 723
## No No No No No No No No No No No No No No No No
## 728 732 734 735 736 738 739 743 744 745 747 748 749 750 751 753
## No No No No No Yes Yes No No No No No No No No No
## 756 758 759 761 762 763 764 766 767 769 774 777 781 783 785 787
## No No No No No No No No No No No No No No No No
## 794 795 796 797 798 799 800 805 809 812 814 817 820 821 825 826
## No No No No No No No No No No No No No No No No
## 827 829 832 837 839 842 844 845 846 847 850 852 855 856 857 858
## No No No No No No No No No No Yes Yes No No No No
## 859 862 863 867 871 872 873 876 878 879 880 881 882 887 888 890
## No No No No No No No No No No No No No No No No
## 892 899 904 907 910 914 915 923 925 927 932 933 935 941 942 943
## No No No No No No No No No No No No No No No No
## 944 945 948 949 954 955 958 960 961 964 965 966 968 969 974 975
## No No No No Yes No No No No No No No No No No No
## 977 978 981 983 985 987 990 992 994 997 999 1001 1002 1005 1006 1007
## No No No No No No No No No No No No No No No No
## 1013 1015 1016 1019 1020 1021 1022 1023 1026 1027 1029 1031 1032 1033 1034 1035
## No No No No No No No No No No No No No No Yes No
## 1037 1038 1040 1042 1044 1047 1048 1049 1050 1052 1053 1057 1059 1063 1064 1066
## No No No No No No No No Yes No No No No No No No
## 1067 1068 1069 1070 1071 1076 1078 1080 1082 1083 1085 1086 1089 1090 1091 1093
## No No No No No No No No No No No No No No No No
## 1096 1097 1102 1105 1112 1113 1114 1115 1116 1117 1120 1122 1123 1126 1130 1133
## No No No No No No No No No No No No No No No No
## 1134 1135 1137 1139 1141 1145 1149 1152 1153 1156 1157 1158 1161 1162 1163 1164
## No No No No No No No No No No No No No No No No
## 1168 1170 1172 1177 1178 1180 1181 1182 1186 1187 1189 1191 1193 1196 1197 1199
## No No No No No No No No No No No No No No No No
## 1200 1201 1204 1205 1207 1210 1214 1215 1217 1220 1221 1223 1224 1227 1229 1230
## No No No No No No No No No No No No No No No No
## 1232 1233 1237 1240 1243 1244 1246 1249 1250 1252 1253 1257 1263 1265 1266 1268
## Yes No No No No No No No Yes No No No No No No No
## 1269 1270 1272 1278 1279 1280 1282 1283 1285 1286 1288 1290 1291 1292 1293 1294
## No No No No No No No No No No No No No No No No
## 1300 1303 1304 1306 1307 1310 1312 1313 1314 1316 1317 1319 1320 1322 1323 1324
## No No No No No No No No No No No No No No No No
## 1325 1326 1327 1334 1338 1339 1340 1341 1342 1344 1346 1347 1351 1353 1354 1355
## No No No No No No No No No No No No No No No No
## 1356 1357 1360 1363 1370 1372 1375 1376 1377 1378 1379 1380 1383 1384 1388 1391
## No No No No No No No Yes Yes No No No No No No No
## 1393 1397 1398 1401 1404 1405 1408 1410 1411 1412 1413 1414 1415 1416 1417 1418
## No No Yes No No No No No No No No No No No No No
## 1420 1425 1426 1430 1431 1432 1434 1435 1437 1439 1440 1441 1442 1444 1447 1449
## No No No No No No No No No No No No No No No Yes
## 1450 1451 1453 1455 1457 1459 1461 1462 1464 1466 1467 1469 1471 1475 1477 1481
## No No No No No No No No No No No No No No No No
## 1484 1486 1488 1489 1496 1497 1500 1504 1507 1509 1511 1513 1514 1516 1517 1518
## No No No No No No No No No No No No No No No No
## 1520 1522 1523 1527 1528 1529 1530 1533 1537 1538 1540 1541 1543 1544 1547 1549
## No No No No No No No No No No No No No No No No
## 1550 1551 1554 1555 1556 1559 1564 1566 1567 1568 1569 1570 1571 1572 1573 1574
## No No No No No No No No No No No Yes No No No No
## 1576 1582 1583 1587 1589 1591 1593 1596 1598 1599 1600 1603 1606 1607 1608 1609
## No No No No No No No Yes No No No No No No No No
## 1610 1611 1612 1615 1625 1631 1637 1638 1639 1640 1641 1642 1644 1645 1646 1647
## No No No No No No No No No No No No No No No No
## 1648 1649 1652 1654 1656 1657 1658 1661 1663 1665 1668 1669 1670 1674 1675 1676
## No No No No No No No No No No No No No No No No
## 1677 1678 1681 1683 1686 1687 1688 1689 1690 1692 1693 1696 1697 1698 1700 1707
## No No No No No No No No No No No No No No Yes No
## 1708 1710 1712 1713 1714 1717 1718 1719 1720 1722 1724 1726 1727 1728 1729 1730
## No No No No No No No No No No No No No No No No
## 1733 1734 1735 1737 1738 1742 1743 1747 1748 1750 1755 1757 1758 1759 1763 1764
## No Yes No No No No No No No No No No No No No No
## 1765 1767 1769 1770 1771 1773 1775 1776 1777 1778 1780 1781 1783 1784 1786 1787
## No No No No No No No No No No No No No No No No
## 1790 1791 1793 1794 1795 1797 1799 1801 1805 1810 1812 1813 1815 1816 1817 1819
## No No No No No No No No No No No No No No No No
## 1825 1827 1831 1833 1836 1837 1838 1839 1844 1845 1846 1847 1852 1856 1857 1859
## No No No No No No Yes Yes No No No No No No No No
## 1860 1861 1865 1866 1868 1869 1871 1873 1874 1876 1878 1879 1881 1882 1885 1886
## No No No No No No No No No No No No No No No No
## 1887 1891 1892 1893 1896 1899 1903 1906 1907 1909 1911 1914 1916 1917 1919 1921
## No No No No No No No No No Yes No No No No No No
## 1923 1927 1929 1931 1932 1933 1938 1941 1944 1948 1949 1950 1951 1952 1954 1958
## No No No No No No No No No No No No No No No No
## 1961 1963 1967 1969 1970 1971 1973 1974 1975 1977 1978 1979 1981 1983 1985 1988
## No Yes No No Yes No No No No No No No No No No No
## 1989 1991 1993 1994 1995 1996 1998 1999
## No No No No No No No No
## Levels: No Yes
#confusion matrix
table(health_test$Diabetes, pred1, dnn = c("True", "Pred"))
## Pred
## True No Yes
## No 894 17
## Yes 72 17
sum(health_test$Diabete != pred1) #number misclassified in testing set
## [1] 89
# Pred
#True No Yes
# No 894 17
# Yes 72 17
#total misclassified 89
set.seed(1234)
# random forest
#install.packages('randomForest')
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#random forest model
health_train$Diabetes <- as.factor(health_train$Diabetes)
health_test$Diabetes <- as.factor(health_test$Diabetes)
rf_health <- randomForest(Diabetes ~., data = health_train, importance = TRUE, ntree = 100)
rf_pred1 <- predict(rf_health, health_test)
rf_pred1
## 1 2 3 5 7 8 9 12 14 15 16 17 18 20 21 22
## No No No No No No No No No No No No No No No No
## 24 26 28 29 31 33 36 37 38 44 46 47 50 52 56 62
## No No No No No No No No No No No No No No No No
## 64 68 69 70 72 73 74 75 76 77 78 80 81 83 84 85
## No No No No No No No No No No No No No No No No
## 87 89 90 92 93 94 99 101 104 105 106 108 110 111 112 113
## No No No No No No No No No No No No No No No No
## 114 115 117 119 120 121 122 129 130 133 134 139 140 143 144 146
## No No No No No No No No No No No No No No No Yes
## 147 148 149 150 151 152 154 156 157 158 159 167 172 173 175 179
## No No No No No No No No No No No No No No No No
## 180 185 186 187 188 190 192 193 195 196 197 198 200 209 210 211
## Yes No No No No No No No No No No No No No No No
## 212 213 217 218 219 220 221 227 229 230 231 232 233 234 236 238
## No No No No No No No No No No No No No No No No
## 239 240 242 243 244 246 247 249 252 253 255 256 257 260 261 264
## No No No No No No No No No No No No No No No No
## 265 266 267 268 271 272 273 274 275 277 282 285 287 288 289 290
## No No No No No No No No No No No No No No No No
## 291 295 297 299 300 301 302 304 306 310 313 314 315 316 317 320
## No No No No No No Yes Yes No No No No No No No No
## 322 323 324 325 327 329 330 332 336 337 339 342 343 345 351 353
## No No No No No No No No Yes No No No No No No No
## 355 356 358 359 360 362 363 364 370 372 374 375 376 377 378 381
## No No No No No No No No No No No No No No No No
## 384 385 387 391 392 393 394 397 398 401 403 404 407 408 409 410
## No No No No No No No No No No No No No No No No
## 411 412 414 415 416 418 419 423 424 425 426 428 430 433 435 437
## No No No No No No No No No No No No No No No No
## 438 439 441 443 444 445 446 447 450 451 452 455 458 466 468 470
## No No No No No No No No No No No No No No No No
## 472 481 483 490 492 494 496 497 498 500 502 503 505 506 507 509
## No No No No No No No No No No No No No No No No
## 512 513 516 517 521 527 528 531 536 538 539 544 550 551 553 555
## No No No No No No No No No No No No No No No No
## 556 557 558 559 560 564 565 566 567 568 571 575 579 581 582 584
## No No No No No No No No No No No No No No No No
## 586 591 592 593 594 596 599 610 611 612 616 619 620 621 624 628
## No No No No No No No No No No No No No No No No
## 631 632 636 640 641 643 644 649 650 651 654 656 657 658 660 661
## No No No No No No No No No No No No Yes No No No
## 662 663 666 671 672 673 675 676 678 682 684 685 686 690 691 692
## No No No No No No No No No No No No No No No No
## 695 701 705 706 707 710 712 713 714 717 718 719 720 721 722 723
## No No No No No No No No No No No No No No No No
## 728 732 734 735 736 738 739 743 744 745 747 748 749 750 751 753
## No No No No No No Yes No No No No No No No No No
## 756 758 759 761 762 763 764 766 767 769 774 777 781 783 785 787
## No No No No No No No No No No No No No No No No
## 794 795 796 797 798 799 800 805 809 812 814 817 820 821 825 826
## No No No No No No No No No No No No No No No No
## 827 829 832 837 839 842 844 845 846 847 850 852 855 856 857 858
## No No No No No No No No No No No No No No No No
## 859 862 863 867 871 872 873 876 878 879 880 881 882 887 888 890
## No No No No No No No No No No No No No No Yes No
## 892 899 904 907 910 914 915 923 925 927 932 933 935 941 942 943
## No No No No No No No No No No No No No No No No
## 944 945 948 949 954 955 958 960 961 964 965 966 968 969 974 975
## No No No No Yes No No No No No No No No No No No
## 977 978 981 983 985 987 990 992 994 997 999 1001 1002 1005 1006 1007
## No No No No No No No No No No No No No No No No
## 1013 1015 1016 1019 1020 1021 1022 1023 1026 1027 1029 1031 1032 1033 1034 1035
## No No No No No No No No No No No No No No No No
## 1037 1038 1040 1042 1044 1047 1048 1049 1050 1052 1053 1057 1059 1063 1064 1066
## No No No No No No No No No No No No No No No No
## 1067 1068 1069 1070 1071 1076 1078 1080 1082 1083 1085 1086 1089 1090 1091 1093
## No No No No No No No No No No No No No No No No
## 1096 1097 1102 1105 1112 1113 1114 1115 1116 1117 1120 1122 1123 1126 1130 1133
## No No No No No No No No No No No Yes Yes No No No
## 1134 1135 1137 1139 1141 1145 1149 1152 1153 1156 1157 1158 1161 1162 1163 1164
## No No No No No No Yes No No No No No No No No No
## 1168 1170 1172 1177 1178 1180 1181 1182 1186 1187 1189 1191 1193 1196 1197 1199
## No No No No No No No No No No No No No No No No
## 1200 1201 1204 1205 1207 1210 1214 1215 1217 1220 1221 1223 1224 1227 1229 1230
## No No No No No No No No No No No No No Yes No No
## 1232 1233 1237 1240 1243 1244 1246 1249 1250 1252 1253 1257 1263 1265 1266 1268
## No No No No No No No No No No No No No No No No
## 1269 1270 1272 1278 1279 1280 1282 1283 1285 1286 1288 1290 1291 1292 1293 1294
## No No No No No No No No No No No No No No No No
## 1300 1303 1304 1306 1307 1310 1312 1313 1314 1316 1317 1319 1320 1322 1323 1324
## No No No No No No No No No No No No No No No No
## 1325 1326 1327 1334 1338 1339 1340 1341 1342 1344 1346 1347 1351 1353 1354 1355
## No No No No No No No No No No No No No No No No
## 1356 1357 1360 1363 1370 1372 1375 1376 1377 1378 1379 1380 1383 1384 1388 1391
## No No No No No No No No No No No No No No No No
## 1393 1397 1398 1401 1404 1405 1408 1410 1411 1412 1413 1414 1415 1416 1417 1418
## No No No No No No No No No No No No No No No No
## 1420 1425 1426 1430 1431 1432 1434 1435 1437 1439 1440 1441 1442 1444 1447 1449
## No No No No No No No No No No No No No No No No
## 1450 1451 1453 1455 1457 1459 1461 1462 1464 1466 1467 1469 1471 1475 1477 1481
## No No No No No No No No No No No No No No No Yes
## 1484 1486 1488 1489 1496 1497 1500 1504 1507 1509 1511 1513 1514 1516 1517 1518
## No No No No No No No No No No No No No No No No
## 1520 1522 1523 1527 1528 1529 1530 1533 1537 1538 1540 1541 1543 1544 1547 1549
## No No No No No No No No No No No No No No No No
## 1550 1551 1554 1555 1556 1559 1564 1566 1567 1568 1569 1570 1571 1572 1573 1574
## No No No No No No No No No No No No No No No No
## 1576 1582 1583 1587 1589 1591 1593 1596 1598 1599 1600 1603 1606 1607 1608 1609
## No No No No No No No Yes No No No No No No No No
## 1610 1611 1612 1615 1625 1631 1637 1638 1639 1640 1641 1642 1644 1645 1646 1647
## No No No No No No No No No No No No No No No No
## 1648 1649 1652 1654 1656 1657 1658 1661 1663 1665 1668 1669 1670 1674 1675 1676
## No No No No No No No No No No No No No No No No
## 1677 1678 1681 1683 1686 1687 1688 1689 1690 1692 1693 1696 1697 1698 1700 1707
## No No No No No No No No No No No No No No No No
## 1708 1710 1712 1713 1714 1717 1718 1719 1720 1722 1724 1726 1727 1728 1729 1730
## No No No No No No No No No No No No No No No No
## 1733 1734 1735 1737 1738 1742 1743 1747 1748 1750 1755 1757 1758 1759 1763 1764
## No No No No No No No No No No No No No No No No
## 1765 1767 1769 1770 1771 1773 1775 1776 1777 1778 1780 1781 1783 1784 1786 1787
## No No No No No No No No No No No No No No No No
## 1790 1791 1793 1794 1795 1797 1799 1801 1805 1810 1812 1813 1815 1816 1817 1819
## No No No No No No No No No No No No No No No No
## 1825 1827 1831 1833 1836 1837 1838 1839 1844 1845 1846 1847 1852 1856 1857 1859
## No No No No No No Yes Yes No No No No No No No No
## 1860 1861 1865 1866 1868 1869 1871 1873 1874 1876 1878 1879 1881 1882 1885 1886
## No No No No No No No No No No No No No No No No
## 1887 1891 1892 1893 1896 1899 1903 1906 1907 1909 1911 1914 1916 1917 1919 1921
## No No No No No No No No No No No No No Yes Yes No
## 1923 1927 1929 1931 1932 1933 1938 1941 1944 1948 1949 1950 1951 1952 1954 1958
## No No No Yes Yes Yes No No No No No No No No No No
## 1961 1963 1967 1969 1970 1971 1973 1974 1975 1977 1978 1979 1981 1983 1985 1988
## No No No No No No No No No No No No No No No No
## 1989 1991 1993 1994 1995 1996 1998 1999
## No No No No No No No No
## Levels: No Yes
# confusion matrix
table(health_test$Diabetes, rf_pred1, dnn = c("True", "Pred"))
## Pred
## True No Yes
## No 911 0
## Yes 67 22
#Pred
#True No Yes
# No 907 4
# Yes 67 22
sum(health_test$Diabetes != rf_pred1)
## [1] 67
# total misclassified 67
set.seed(1234)
# boosting model
#install.packages("ggplot2")
#install.packages("caret")
#install.packages("adabag")
library(adabag)
## Loading required package: caret
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## Loading required package: lattice
## Loading required package: foreach
## Loading required package: doParallel
## Loading required package: iterators
## Loading required package: parallel
#boosting model
health_train$Diabetes <- as.factor(health_train$Diabetes)
health_test$Diabetes <- as.factor(health_test$Diabetes)
health_boost = boosting(Diabetes~., data = health_train, boos = T)
#variable of importance
health_boost$importance
## Age BMI BPDia1 BPDia2 BPDia3 BPDiaAve
## 9.7224818 7.3471566 4.2315529 2.2132480 2.6707419 4.0858637
## BPSys1 BPSys2 BPSys3 BPSysAve DirectChol Gender
## 2.7075574 3.8308598 4.1279362 4.8625001 6.7513092 0.8828666
## Height Pulse SleepTrouble TotChol UrineFlow1 UrineVol1
## 6.5782872 6.5247443 1.0706133 9.7997150 6.5167752 6.8937461
## Weight
## 9.1820448
boost_pred1 = predict(health_boost, newdata = health_test)
boost_pred1$class
## [1] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [13] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [25] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [37] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [49] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [61] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [73] "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No"
## [85] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [97] "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [109] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [121] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [133] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [145] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [157] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "Yes"
## [169] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [181] "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No"
## [193] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [205] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [217] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [229] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [241] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [253] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [265] "No" "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No"
## [277] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [289] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [301] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [313] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [325] "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No" "No"
## [337] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [349] "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [361] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [373] "No" "Yes" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [385] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [397] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [409] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [421] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [433] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [445] "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [457] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [469] "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No"
## [481] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [493] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [505] "No" "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No"
## [517] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [529] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [541] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [553] "No" "No" "No" "Yes" "Yes" "No" "No" "No" "No" "No" "No" "No"
## [565] "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "Yes" "No"
## [577] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [589] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [601] "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No"
## [613] "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No"
## [625] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [637] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [649] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [661] "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No"
## [673] "No" "No" "No" "No" "No" "No" "No" "Yes" "Yes" "No" "No" "No"
## [685] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [697] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [709] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [721] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [733] "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No"
## [745] "No" "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No"
## [757] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [769] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [781] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes"
## [793] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [805] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [817] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [829] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [841] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [853] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [865] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [877] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [889] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [901] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [913] "No" "No" "No" "No" "No" "No" "Yes" "Yes" "No" "No" "No" "No"
## [925] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [937] "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "Yes"
## [949] "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "Yes" "No"
## [961] "No" "No" "No" "Yes" "Yes" "Yes" "No" "No" "No" "No" "No" "No"
## [973] "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No" "No"
## [985] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## [997] "No" "No" "No" "No"
boost_pred1$confusion
## Observed Class
## Predicted Class No Yes
## No 903 61
## Yes 8 28
boost_pred1$error
## [1] 0.069
boost_pred1$error*nrow(health_test)
## [1] 69
#confusion matrix
#Observed Class
#Predicted Class No Yes
# No 904 58
# Yes 7 31
# total misclassified 69
#the random forest model misclassifies the smallest number in the testing set with 67 misclassifications unlike decision trees which had 89 misclassifcations and the boosting model had 69 total misclassifications
set.seed(1234)
# KNN model
library(class)
knn_health <- knn(train = health_train[,2:18], test = health_test[,2:18], cl = health_train[,19], k = 5)
#Show the predictions made on the testing set
#output dataset with predictions
knn_health
## [1] No No No No Yes No No No No No No No No No No No No No
## [19] No No No No No No No No No No No No No No No No No No
## [37] No No No No No Yes No No No No No No No No No No No No
## [55] No No No No No No No No No No No No No No No No No No
## [73] No No No No No No No No No No No No No No No No No No
## [91] No No No No No No No No No No No No No No No No No Yes
## [109] No No No No No No No No No No No No No No No No No No
## [127] No No No No No No No No No No No No No No No No No No
## [145] No No No No No No No No No No No No No No No No No No
## [163] No No No No Yes Yes No No No No No No No No No No No No
## [181] No No No No No No No No No No No No No No No No No No
## [199] No No No No No No No No Yes No No No No No No No No No
## [217] No No No No No No No No No No No No No No No No No Yes
## [235] No No No No No No No No No No No No No No No No No No
## [253] No No No No No No No No No No No No No No No No No No
## [271] No No No No No No No No No No No No No No No No No No
## [289] No No No No No No No No No No No No No No No No No No
## [307] No No No No No No No No No No No Yes No No No No No No
## [325] No No No No No No No No No No No No No No No No No No
## [343] No No No No No No No No No No No No No No No No No No
## [361] No No No No No No No No No No No No No No Yes No No No
## [379] No No No No No No No No No No No No No No No No No No
## [397] No No No No No No No No No No No No No No No No No No
## [415] No No No No No No No No No No No No No No No No No No
## [433] No No No No No No No No No No No No No No No No No No
## [451] No No No No No No No No No No No No No No No No No No
## [469] No No No No No No No No Yes No No No No No No No No No
## [487] No No No No No No No No No No Yes No No No No No No No
## [505] No No No No No No No No No No No No No No No No No No
## [523] No No No No No No No No No No No No No No No No No No
## [541] No No No No No No No No No No No No No No No No No No
## [559] No No No No No No No No No No No No No No No No No No
## [577] No Yes No No No No No No No No No No No No No No No No
## [595] No No No No No No No No No No No No No No Yes No No No
## [613] No No No No No No No No No No Yes No No No No No No No
## [631] No No No No No No No No No No No No Yes No No No No No
## [649] No No No No No No No No No No No No No No No No No No
## [667] No No No No No No No No No No No No No No No No No No
## [685] No No No No No No No No No No No No No No No No No No
## [703] No No No No No No No No No No No No No No No No No Yes
## [721] No No No No No No No No No No No No No No No No No No
## [739] No No No No No No No No No No No No Yes No No No No No
## [757] No No No No No No No No No No No No No No No No No No
## [775] No No No No No No No No No No No No No No No No No Yes
## [793] No No No No No No No No No No No No No No No No No No
## [811] No No No No No No No No No No No No No No No No No No
## [829] No No No No No No No No No No No No No No No No No No
## [847] No No No No No No No No No No No No No No No No No No
## [865] No No No No No No No No No No No No No No No No No No
## [883] No No No No No No No No No No No No No No No No No No
## [901] No No No No No No No No No No No No No No No No No No
## [919] Yes Yes No No No No No No No No No No No No No No No No
## [937] No No No No No No No No No No No No No No No No No No
## [955] No No No No No No No No No No No No No No No No No No
## [973] No No No No Yes No No No No No No No No No No No No No
## [991] No No No No No No No No No No
## Levels: No Yes
# Confusion Matrix
table(health_test[,19], knn_health, dnn = c("True", "Predicted"))
## Predicted
## True No Yes
## No 902 9
## Yes 77 12
#True No Yes
# No 902 9
# Yes 77 12
sum(health_test[,19] != knn_health) #number misclassified #number misclassified
## [1] 86
#total misclassified is 86, this model is worse than all models except the decision tree
set.seed(1234)
#read CSV file in
default1<- read.csv("G:/Other computers/My Laptop/Documents/Richard 622 last semester/Test 1/default_payments.csv")
#column names
colnames(default1)
## [1] "LIMIT_BAL" "SEX" "MARRIAGE" "AGE" "PAY_0" "BILL_AMT1"
## [7] "PAY_AMT1" "DEFAULT"
#what does the data look like
str(default1)
## 'data.frame': 12000 obs. of 8 variables:
## $ LIMIT_BAL: int 290000 20000 280000 280000 20000 50000 20000 30000 200000 110000 ...
## $ SEX : chr "F" "M" "M" "F" ...
## $ MARRIAGE : chr "Currently Married" "Currently Married" "Currently Married" "Currently Married" ...
## $ AGE : int 26 51 29 47 24 26 23 25 38 29 ...
## $ PAY_0 : int 0 -1 -2 0 0 -2 1 1 -2 0 ...
## $ BILL_AMT1: int 18125 780 10660 269124 17924 2411 16332 29628 5625 107195 ...
## $ PAY_AMT1 : int 3000 0 5123 11268 1400 3068 2500 1600 12125 7845 ...
## $ DEFAULT : int 0 0 0 0 0 0 1 0 0 0 ...
# 80% sampling of the data
sample_default1 <- sample(nrow(default1), nrow(default1)*0.80)
default1_train <- default1[sample_default1, ]
default1_test <- default1[-sample_default1, ]
# library for GAM
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-40. For overview type 'help("mgcv-package")'.
# GAM model with smoothing splines on all numeric covariates
default1_gam <- gam (DEFAULT ~ s(LIMIT_BAL) + s(AGE) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) + (SEX) + (MARRIAGE), data = default1_train)
summary(default1_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## DEFAULT ~ s(LIMIT_BAL) + s(AGE) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) +
## (SEX) + (MARRIAGE)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.202645 0.006492 31.213 < 2e-16 ***
## SEXM 0.027993 0.007854 3.564 0.000367 ***
## MARRIAGESingle 0.010227 0.008709 1.174 0.240276
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(LIMIT_BAL) 8.141 8.694 17.029 <2e-16 ***
## s(AGE) 3.791 4.720 0.786 0.6576
## s(PAY_0) 8.649 8.929 223.243 <2e-16 ***
## s(BILL_AMT1) 1.000 1.000 29.590 <2e-16 ***
## s(PAY_AMT1) 2.706 3.412 2.978 0.0243 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.205 Deviance explained = 20.7%
## GCV = 0.13618 Scale est. = 0.1358 n = 9600
#a) Are there any variables which appear to not have a statistically significant relationship with the target variable? If so, remove these variables, and create a new GAM that does not include these variables.
set.seed(1234)
# both marriage and age did not have a statistically significant relationships and will be removed below
default1_gam_model2 <- gam (DEFAULT ~ s(LIMIT_BAL) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) + (SEX), data = default1_train)
summary(default1_gam_model2)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## DEFAULT ~ s(LIMIT_BAL) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) +
## (SEX)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.207319 0.004877 42.511 < 2e-16 ***
## SEXM 0.028122 0.007778 3.616 0.000301 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(LIMIT_BAL) 8.173 8.713 17.544 <2e-16 ***
## s(PAY_0) 8.664 8.934 224.247 <2e-16 ***
## s(BILL_AMT1) 1.000 1.000 30.982 <2e-16 ***
## s(PAY_AMT1) 2.984 3.744 2.915 0.0225 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.205 Deviance explained = 20.6%
## GCV = 0.13616 Scale est. = 0.13584 n = 9600
#b) Which numeric covariate(s) are related linearly (rather than nonlinearly) to the target?
#bill_amt1 is related linearly due to it having an edf of 1.000
#s(BILL_AMT1) 1.000 1.000 30.982 <2e-16 ***
#c) Create a collection of plots that display the relationship between each covariate and its smoothing spline.
# plot of model2 with smoothing splines
#default1_gam_model2 <- gam (DEFAULT ~ s(LIMIT_BAL) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) + (SEX), data = default1_train)
#summary(default1_gam_model2)
plot(default1_gam_model2, pages = 1)
#d) Using a cutoff value of 0.5, create a confusion matrix for the data in the testing set.
set.seed(1234)
pcut_default_gam_model2 <- 1/2 #defining the cut-off
prob_defautl_gam_model2_out <- predict(default1_gam_model2, default1_test, type = "response")
pred_default_gam_model2_out <- (prob_defautl_gam_model2_out >= pcut_default_gam_model2)*1
table(default1_test$DEFAULT, pred_default_gam_model2_out, dnn = c("Observed", "Predicted"))
## Predicted
## Observed 0 1
## 0 1776 90
## 1 370 164
#confusion matrix
#Predicted
#Observed 0 1
#0 1776 90
#1 370 164