flight_data xls file into R. This
file contains information about the speed of a plane at landing, and the
distance along the runway required for the plane to come to a full stop.
Use the b spline algorithm to develop a spline model with 5 degrees of
freedom, with the column “speed” as the covariate, and the column
“distance” as the target variable. Perform the following for your spline
model:library("readxl")
library(rpart)
library(rpart.plot)
library(splines)
library(randomForest)
library(gbm)
library(adabag)
library(class)
library(mgcv)
set.seed(12345) #to ensure consistent results
flight <- read_excel("/Users/ryannlaky/Documents/University of Indianapolis/MSDA 622/Exams/Exam 1/flight_data.xls")
str(flight)
## tibble [400 × 2] (S3: tbl_df/tbl/data.frame)
## $ speed : num [1:400] 27.7 29.2 33.8 34.1 34.2 ...
## $ distance: num [1:400] 1324 1077 937 1198 956 ...
flight_bs <- lm(distance~bs(speed, df = 5), data = flight)
summary(flight_bs)
##
## Call:
## lm(formula = distance ~ bs(speed, df = 5), data = flight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -541.47 -122.79 -5.74 131.70 544.96
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1211.2 111.7 10.842 < 2e-16 ***
## bs(speed, df = 5)1 -365.2 177.9 -2.052 0.0408 *
## bs(speed, df = 5)2 -494.2 104.6 -4.722 3.25e-06 ***
## bs(speed, df = 5)3 1050.5 140.3 7.486 4.70e-13 ***
## bs(speed, df = 5)4 3203.4 139.1 23.024 < 2e-16 ***
## bs(speed, df = 5)5 5442.5 179.1 30.388 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 194.2 on 394 degrees of freedom
## Multiple R-squared: 0.9637, Adjusted R-squared: 0.9632
## F-statistic: 2090 on 5 and 394 DF, p-value: < 2.2e-16
bs_int <- predict(flight_bs, interval = "confidence", level = 0.70)
bs_int
## fit lwr upr
## 1 1211.1619 1095.2248 1327.0990
## 2 1172.4360 1074.0206 1270.8513
## 3 1067.4686 1009.2992 1125.6380
## 4 1061.4940 1005.2173 1117.7706
## 5 1059.4027 1003.7762 1115.0293
## 6 1057.7753 1002.6502 1112.9005
## 7 987.6433 949.7622 1025.5245
## 8 966.1149 931.5559 1000.6740
## 9 965.5131 931.0329 999.9934
## 10 953.4851 920.4292 986.5410
## 11 951.7346 918.8628 984.6063
## 12 946.9374 914.5423 979.3325
## 13 943.9299 911.8137 976.0461
## 14 942.1182 910.1631 974.0734
## 15 937.4581 905.8949 969.0212
## 16 931.9344 900.7992 963.0696
## 17 931.0594 899.9888 962.1299
## 18 929.1353 898.2044 960.0663
## 19 925.8858 895.1835 956.5880
## 20 925.3933 894.7251 956.0616
## 21 923.7406 893.1856 954.2957
## 22 914.4049 884.4782 944.3316
## 23 912.4103 882.6210 942.1995
## 24 911.6770 881.9391 941.4148
## 25 909.1075 879.5549 938.6602
## 26 903.6303 874.5141 932.7464
## 27 902.4947 873.4799 931.5096
## 28 899.5013 870.7805 928.2221
## 29 898.3260 869.7344 926.9176
## 30 897.2415 868.7779 925.7050
## 31 896.3336 867.9851 924.6822
## 32 893.1243 865.2610 920.9875
## 33 891.5240 863.9736 919.0745
## 34 890.1536 862.9380 917.3692
## 35 889.1668 862.2586 916.0751
## 36 888.4741 861.8427 915.1056
## 37 887.2217 861.7741 912.6693
## 38 887.8116 863.2955 912.3276
## 39 888.4849 864.3995 912.5703
## 40 888.6514 864.6536 912.6492
## 41 888.9223 865.0572 912.7875
## 42 889.0497 865.2434 912.8560
## 43 889.1849 865.4389 912.9309
## 44 890.5044 867.2651 913.7436
## 45 891.8909 869.0836 914.6983
## 46 892.4449 869.7915 915.0983
## 47 892.7732 870.2071 915.3393
## 48 893.7960 871.4856 916.1064
## 49 894.7401 872.6473 916.8328
## 50 895.5826 873.6718 917.4935
## 51 896.3135 874.5522 918.0749
## 52 896.3711 874.6212 918.1209
## 53 897.0678 875.4536 918.6819
## 54 897.9052 876.4468 919.3636
## 55 899.0032 877.7381 920.2682
## 56 900.7223 879.7381 921.7064
## 57 901.0127 880.0737 921.9517
## 58 901.7558 880.9294 922.5822
## 59 903.3369 882.7372 923.9365
## 60 903.5747 883.0078 924.1416
## 61 904.8058 884.4028 925.2088
## 62 907.9404 887.9178 927.9630
## 63 909.5084 889.6585 929.3584
## 64 911.6681 892.0388 931.2973
## 65 912.7503 893.2248 932.2757
## 66 913.2253 893.7440 932.7065
## 67 916.0535 896.8192 935.2879
## 68 918.0916 899.0192 937.1640
## 69 920.4475 901.5472 939.3478
## 70 924.2746 905.6226 942.9266
## 71 930.6245 912.3100 948.9391
## 72 930.6734 912.3611 948.9857
## 73 932.8864 914.6723 951.1005
## 74 935.5143 917.4054 953.6232
## 75 936.8921 918.8337 954.9506
## 76 942.3407 924.4529 960.2285
## 77 944.9576 927.1368 962.7783
## 78 946.6150 928.8321 964.3979
## 79 948.4625 930.7178 966.2071
## 80 949.1487 931.4172 966.8802
## 81 951.8652 934.1808 969.5496
## 82 959.9494 942.3613 977.5375
## 83 960.2070 942.6210 977.7930
## 84 964.6725 947.1146 982.2304
## 85 965.8911 948.3383 983.4439
## 86 968.7431 951.1983 986.2879
## 87 969.5261 951.9825 987.0696
## 88 973.1692 955.6267 990.7117
## 89 975.2680 957.7228 992.8133
## 90 982.0737 964.5049 999.6425
## 91 982.8311 965.2584 1000.4038
## 92 983.1030 965.5289 1000.6771
## 93 983.3958 965.8202 1000.9715
## 94 986.4588 968.8647 1004.0529
## 95 991.4619 973.8311 1009.0927
## 96 992.2452 974.6080 1009.8824
## 97 992.6673 975.0266 1010.3080
## 98 992.9007 975.2580 1010.5434
## 99 996.0859 978.4151 1013.7567
## 100 997.5677 979.8830 1015.2523
## 101 998.0999 980.4102 1015.7897
## 102 998.5923 980.8978 1016.2868
## 103 999.9081 982.2007 1017.6156
## 104 1009.5295 991.7197 1027.3393
## 105 1013.8029 995.9445 1031.6613
## 106 1014.5582 996.6912 1032.4253
## 107 1023.0566 1005.0903 1041.0228
## 108 1024.1331 1006.1542 1042.1120
## 109 1024.2983 1006.3175 1042.2792
## 110 1024.6347 1006.6499 1042.6194
## 111 1024.6844 1006.6991 1042.6697
## 112 1025.1276 1007.1371 1043.1181
## 113 1025.2338 1007.2421 1043.2256
## 114 1027.3025 1009.2866 1045.3184
## 115 1030.7718 1012.7157 1048.8279
## 116 1031.9767 1013.9068 1050.0466
## 117 1036.2311 1018.1131 1054.3491
## 118 1036.8782 1018.7530 1055.0035
## 119 1038.3879 1020.2460 1056.5298
## 120 1040.8621 1022.6933 1059.0309
## 121 1041.9993 1023.8183 1060.1803
## 122 1043.5277 1025.3305 1061.7249
## 123 1050.1701 1031.9055 1068.4347
## 124 1055.1206 1036.8093 1073.4318
## 125 1055.4238 1037.1098 1073.7378
## 126 1056.0051 1037.6858 1074.3243
## 127 1069.5108 1051.0843 1087.9372
## 128 1074.8795 1056.4189 1093.3401
## 129 1079.9056 1061.4177 1098.3935
## 130 1082.7127 1064.2116 1101.2138
## 131 1091.1128 1072.5813 1109.6443
## 132 1091.4588 1072.9263 1109.9913
## 133 1103.7872 1085.2364 1122.3379
## 134 1111.7888 1093.2429 1130.3347
## 135 1120.2297 1101.7031 1138.7562
## 136 1127.7918 1109.2944 1146.2893
## 137 1130.8198 1112.3369 1149.3028
## 138 1136.6220 1118.1713 1155.0727
## 139 1137.5323 1119.0872 1155.9774
## 140 1137.7836 1119.3401 1156.2271
## 141 1141.3509 1122.9306 1159.7712
## 142 1141.8914 1123.4748 1160.3080
## 143 1145.0768 1126.6828 1163.4707
## 144 1146.3269 1127.9423 1164.7116
## 145 1149.3677 1131.0067 1167.7288
## 146 1149.6546 1131.2958 1168.0133
## 147 1150.1469 1131.7921 1168.5017
## 148 1151.8565 1133.5157 1170.1972
## 149 1158.1970 1139.9116 1176.4825
## 150 1167.1266 1148.9275 1185.3257
## 151 1174.7724 1156.6543 1192.8905
## 152 1178.6239 1160.5488 1196.6990
## 153 1179.5270 1161.4622 1197.5918
## 154 1183.4649 1165.4457 1201.4840
## 155 1187.1565 1169.1813 1205.1316
## 156 1193.6067 1175.7111 1211.5024
## 157 1195.8015 1177.9335 1213.6695
## 158 1196.8326 1178.9777 1214.6874
## 159 1201.4171 1183.6214 1219.2127
## 160 1202.3021 1184.5180 1220.0861
## 161 1203.3121 1185.5412 1221.0829
## 162 1211.4908 1193.8294 1229.1522
## 163 1213.5565 1195.9232 1231.1898
## 164 1220.4855 1202.9479 1238.0231
## 165 1221.2791 1203.7526 1238.8057
## 166 1222.3089 1204.7968 1239.8211
## 167 1222.4828 1204.9731 1239.9926
## 168 1227.7754 1210.3402 1245.2107
## 169 1233.5254 1216.1719 1250.8789
## 170 1245.2626 1228.0776 1262.4475
## 171 1250.3259 1233.2139 1267.4378
## 172 1252.5546 1235.4749 1269.6344
## 173 1262.7268 1245.7935 1279.6601
## 174 1263.2794 1246.3541 1280.2048
## 175 1263.9813 1247.0661 1280.8966
## 176 1267.4552 1250.5896 1284.3207
## 177 1268.8302 1251.9843 1285.6761
## 178 1275.5823 1258.8323 1292.3324
## 179 1280.9700 1264.2957 1297.6443
## 180 1296.9557 1280.5007 1313.4106
## 181 1297.0129 1280.5587 1313.4671
## 182 1303.4020 1287.0327 1319.7713
## 183 1307.0152 1290.6931 1323.3374
## 184 1312.4072 1296.1543 1328.6601
## 185 1324.1460 1308.0385 1340.2536
## 186 1329.2190 1313.1717 1345.2663
## 187 1339.3750 1323.4436 1355.3064
## 188 1343.9519 1328.0704 1359.8333
## 189 1358.3201 1342.5858 1374.0545
## 190 1358.8500 1343.1208 1374.5792
## 191 1358.9272 1343.1988 1374.6557
## 192 1375.6998 1360.1223 1391.2772
## 193 1376.7390 1361.1701 1392.3079
## 194 1392.9358 1377.4894 1408.3821
## 195 1397.5125 1382.0967 1412.9283
## 196 1410.2689 1394.9287 1425.6091
## 197 1415.1756 1399.8606 1430.4906
## 198 1423.4326 1408.1553 1438.7099
## 199 1426.7138 1411.4498 1441.9778
## 200 1427.2047 1411.9426 1442.4668
## 201 1441.1065 1425.8895 1456.3236
## 202 1442.6841 1427.4711 1457.8971
## 203 1446.9871 1431.7840 1462.1903
## 204 1450.1854 1434.9886 1465.3823
## 205 1454.5901 1439.4004 1469.7798
## 206 1467.1886 1452.0103 1482.3670
## 207 1470.4595 1455.2819 1485.6372
## 208 1473.7050 1458.5272 1488.8828
## 209 1473.7749 1458.5971 1488.9527
## 210 1482.4560 1467.2734 1497.6386
## 211 1487.1618 1471.9740 1502.3496
## 212 1487.6783 1472.4898 1502.8668
## 213 1495.4332 1480.2320 1510.6344
## 214 1498.3004 1483.0933 1513.5076
## 215 1504.0993 1488.8783 1519.3202
## 216 1512.7282 1497.4819 1527.9744
## 217 1514.5746 1499.3223 1529.8270
## 218 1515.3459 1500.0910 1530.6009
## 219 1515.4832 1500.2278 1530.7387
## 220 1516.5968 1501.3375 1531.8561
## 221 1517.1910 1501.9295 1532.4524
## 222 1529.9323 1514.6198 1545.2447
## 223 1538.5240 1523.1710 1553.8769
## 224 1541.6650 1526.2960 1557.0340
## 225 1565.7534 1550.2426 1581.2642
## 226 1566.6224 1551.1059 1582.1389
## 227 1567.8744 1552.3495 1583.3992
## 228 1597.2266 1581.4862 1612.9669
## 229 1600.5025 1584.7358 1616.2692
## 230 1610.2859 1594.4384 1626.1335
## 231 1612.6194 1596.7521 1628.4867
## 232 1616.1131 1600.2159 1632.0103
## 233 1644.7069 1628.5526 1660.8612
## 234 1645.8564 1629.6914 1662.0214
## 235 1658.9868 1642.6978 1675.2757
## 236 1659.5638 1643.2693 1675.8583
## 237 1659.7860 1643.4894 1676.0826
## 238 1663.3240 1646.9936 1679.6545
## 239 1664.2895 1647.9498 1680.6292
## 240 1685.5347 1668.9891 1702.0803
## 241 1689.5712 1672.9862 1706.1562
## 242 1689.9821 1673.3931 1706.5712
## 243 1697.9084 1681.2420 1714.5749
## 244 1707.8040 1691.0410 1724.5670
## 245 1709.6098 1692.8293 1726.3904
## 246 1722.0675 1705.1661 1738.9689
## 247 1724.4944 1707.5696 1741.4191
## 248 1727.1543 1710.2040 1744.1047
## 249 1734.4021 1717.3824 1751.4217
## 250 1745.2907 1728.1682 1762.4133
## 251 1752.1779 1734.9911 1769.3646
## 252 1753.3855 1736.1876 1770.5834
## 253 1757.2831 1740.0494 1774.5169
## 254 1759.1058 1741.8553 1776.3562
## 255 1761.4145 1744.1430 1778.6859
## 256 1770.8932 1753.5364 1788.2499
## 257 1775.2380 1757.8429 1792.6332
## 258 1777.1541 1759.7421 1794.5661
## 259 1796.5967 1779.0192 1814.1743
## 260 1799.7933 1782.1895 1817.3971
## 261 1801.8982 1784.2773 1819.5192
## 262 1805.4925 1787.8426 1823.1424
## 263 1816.5415 1798.8050 1834.2781
## 264 1822.1642 1804.3849 1839.9434
## 265 1823.7654 1805.9742 1841.5566
## 266 1824.5042 1806.7075 1842.3009
## 267 1872.7377 1854.6239 1890.8515
## 268 1879.2950 1861.1448 1897.4452
## 269 1890.9392 1872.7286 1909.1499
## 270 1912.3610 1894.0525 1930.6695
## 271 1916.0770 1897.7533 1934.4007
## 272 1922.6819 1904.3322 1941.0315
## 273 1923.6421 1905.2889 1941.9954
## 274 1937.4639 1919.0619 1955.8659
## 275 1939.0853 1920.6780 1957.4925
## 276 1969.4533 1950.9628 1987.9438
## 277 1969.4707 1950.9802 1987.9612
## 278 1984.5592 1966.0380 2003.0805
## 279 1992.8913 1974.3559 2011.4267
## 280 1996.1953 1977.6549 2014.7358
## 281 2007.4052 1988.8497 2025.9607
## 282 2010.0432 1991.4847 2028.6018
## 283 2014.4842 1995.9209 2033.0476
## 284 2015.7585 1997.1939 2034.3231
## 285 2024.0456 2005.4737 2042.6175
## 286 2042.6167 2024.0343 2061.1992
## 287 2049.5739 2030.9896 2068.1583
## 288 2052.8317 2034.2467 2071.4167
## 289 2065.6891 2047.1038 2084.2743
## 290 2065.8089 2047.2236 2084.3941
## 291 2076.0466 2057.4635 2094.6298
## 292 2082.3731 2063.7921 2100.9541
## 293 2082.5666 2063.9856 2101.1475
## 294 2091.0744 2072.4974 2109.6514
## 295 2094.4717 2075.8966 2113.0467
## 296 2098.8221 2080.2497 2117.3945
## 297 2117.1253 2098.5671 2135.6834
## 298 2119.0599 2100.5035 2137.6163
## 299 2135.0881 2116.5478 2153.6284
## 300 2140.4090 2121.8747 2158.9433
## 301 2149.7599 2131.2367 2168.2832
## 302 2157.2133 2138.6994 2175.7272
## 303 2164.2363 2145.7316 2182.7410
## 304 2175.1980 2156.7082 2193.6878
## 305 2179.8993 2161.4160 2198.3825
## 306 2186.5107 2168.0370 2204.9845
## 307 2187.7111 2169.2391 2206.1832
## 308 2194.9307 2176.4692 2213.3922
## 309 2204.5618 2186.1145 2223.0090
## 310 2208.5977 2190.1565 2227.0390
## 311 2216.6289 2198.1997 2235.0581
## 312 2226.0737 2207.6587 2244.4887
## 313 2231.3707 2212.9636 2249.7777
## 314 2234.1767 2215.7738 2252.5795
## 315 2234.5434 2216.1411 2252.9457
## 316 2249.1168 2230.7360 2267.4976
## 317 2255.6727 2237.3013 2274.0440
## 318 2271.5607 2253.2115 2289.9099
## 319 2281.0034 2262.6667 2299.3400
## 320 2299.1356 2280.8213 2317.4499
## 321 2309.2163 2290.9133 2327.5194
## 322 2315.2371 2296.9404 2333.5338
## 323 2337.8975 2319.6212 2356.1738
## 324 2341.4471 2323.1734 2359.7207
## 325 2347.1534 2328.8837 2365.4230
## 326 2382.8031 2364.5485 2401.0577
## 327 2435.3409 2417.0723 2453.6094
## 328 2457.0773 2438.7885 2475.3661
## 329 2502.4651 2484.1035 2520.8266
## 330 2504.2026 2485.8374 2522.5677
## 331 2505.1221 2486.7550 2523.4892
## 332 2559.6036 2541.0875 2578.1196
## 333 2569.7509 2551.1997 2588.3020
## 334 2584.2249 2565.6196 2602.8302
## 335 2590.8242 2572.1927 2609.4558
## 336 2617.1260 2598.3798 2635.8722
## 337 2639.4510 2620.5951 2658.3070
## 338 2646.5297 2627.6366 2665.4228
## 339 2668.8137 2649.7961 2687.8312
## 340 2701.2908 2682.0723 2720.5094
## 341 2729.4017 2709.9907 2748.8126
## 342 2735.3625 2715.9086 2754.8164
## 343 2736.8153 2717.3508 2756.2797
## 344 2744.8869 2725.3629 2764.4109
## 345 2773.7232 2753.9759 2793.4705
## 346 2809.3931 2789.3476 2829.4387
## 347 2832.8582 2812.6040 2853.1125
## 348 2920.0608 2898.9549 2941.1667
## 349 2939.8471 2918.5334 2961.1608
## 350 2967.8333 2946.2178 2989.4488
## 351 2978.8479 2957.1114 3000.5845
## 352 2998.3729 2976.4188 3020.3270
## 353 3001.1933 2979.2076 3023.1791
## 354 3046.3445 3023.8420 3068.8469
## 355 3048.9207 3026.3883 3071.4531
## 356 3080.1273 3057.2290 3103.0257
## 357 3123.7216 3100.3035 3147.1398
## 358 3129.2648 3105.7799 3152.7496
## 359 3144.1091 3120.4453 3167.7729
## 360 3154.0328 3130.2490 3177.8167
## 361 3159.7841 3135.9306 3183.6376
## 362 3173.3051 3149.2875 3197.3226
## 363 3198.3112 3173.9895 3222.6330
## 364 3222.4982 3197.8818 3247.1147
## 365 3228.1590 3203.4736 3252.8445
## 366 3240.0340 3215.2039 3264.8641
## 367 3252.8637 3227.8773 3277.8500
## 368 3310.2881 3284.6052 3335.9711
## 369 3354.9152 3328.6964 3381.1339
## 370 3405.3885 3378.5725 3432.2044
## 371 3476.3661 3448.7311 3504.0012
## 372 3488.0445 3460.2774 3515.8117
## 373 3494.3783 3466.5398 3522.2167
## 374 3552.9379 3524.4524 3581.4235
## 375 3571.4390 3542.7537 3600.1243
## 376 3651.7959 3622.2710 3681.3207
## 377 3727.4207 3697.1485 3757.6928
## 378 3734.0565 3703.7208 3764.3922
## 379 3736.9189 3706.5559 3767.2820
## 380 3763.2712 3732.6593 3793.8830
## 381 3780.0442 3749.2766 3810.8118
## 382 3971.7498 3939.3327 4004.1669
## 383 3982.9112 3950.4046 4015.4179
## 384 4069.1328 4035.9507 4102.3148
## 385 4173.0696 4139.0981 4207.0412
## 386 4190.2802 4156.1786 4224.3819
## 387 4234.0701 4199.6361 4268.5041
## 388 4260.3673 4225.7319 4295.0027
## 389 4262.6372 4227.9843 4297.2901
## 390 4473.9336 4437.5185 4510.3488
## 391 4565.8056 4528.4798 4603.1313
## 392 4687.4681 4648.7158 4726.2205
## 393 4750.3538 4710.7392 4789.9684
## 394 4969.5808 4926.0990 5013.0626
## 395 4987.6054 4943.7369 5031.4740
## 396 5248.9184 5198.0616 5299.7751
## 397 5253.5425 5202.5373 5304.5476
## 398 5642.3197 5575.5448 5709.0947
## 399 6094.8633 6001.5138 6188.2128
## 400 6653.6284 6515.9397 6791.3172
plot(flight$speed, flight$distance, xlab = "Speed", ylab = "Distance", main = "Regression Spline Scatter Plot, df = 5", cex = .5)
lines(flight$speed, flight_bs$fitted.values, col = "blue", lwd = 1)
lines(flight$speed, bs_int[,2], col = "red", lty = 2)
lines(flight$speed, bs_int[,3], col = "red", lty = 2)
flight_cs = lm(distance~ns(speed, df = 3), data = flight)
summary(flight_cs)
##
## Call:
## lm(formula = distance ~ ns(speed, df = 3), data = flight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -564.04 -121.52 -3.54 130.88 541.75
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1069.81 59.34 18.03 <2e-16 ***
## ns(speed, df = 3)1 1266.40 44.35 28.55 <2e-16 ***
## ns(speed, df = 3)2 3236.23 137.77 23.49 <2e-16 ***
## ns(speed, df = 3)3 5494.25 75.91 72.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 195.6 on 396 degrees of freedom
## Multiple R-squared: 0.9629, Adjusted R-squared: 0.9626
## F-statistic: 3429 on 3 and 396 DF, p-value: < 2.2e-16
anova(flight_bs, flight_cs)
## Analysis of Variance Table
##
## Model 1: distance ~ bs(speed, df = 5)
## Model 2: distance ~ ns(speed, df = 3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 394 14851546
## 2 396 15155277 -2 -303730 4.0289 0.01853 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
14851546. Based on these results, there is a statistically
significant difference between the predictive performance of the two
models because the p-value is less than 0.05 at 0.01853.
Because of this statistically significant difference in predictive
performance and the reduced Sum of Squared Errors for the b spline
model, it would be considered the better model for predicting distance
based on speed.
health_dataset csv file into R.
This data contains various pieces of health information about some
unidentified individuals. Randomly separate the rows of this data into
training and testing sets. Use 50% of the data as your training set, and
use the rest of the data as your testing set.health <- read.csv("/Users/ryannlaky/Documents/University of Indianapolis/MSDA 622/Exams/Exam 1/health_dataset.csv")
str(health)
## 'data.frame': 2000 obs. of 20 variables:
## $ Gender : chr "female" "female" "female" "female" ...
## $ Age : int 49 45 45 45 66 54 50 16 56 56 ...
## $ Weight : num 86.7 75.7 75.7 75.7 68 74.7 84.1 73.2 57.5 57.5 ...
## $ Height : num 168 167 167 167 170 ...
## $ BMI : num 30.6 27.2 27.2 27.2 23.7 ...
## $ Pulse : int 86 62 62 62 60 76 74 76 64 64 ...
## $ BPSysAve : int 112 118 118 118 111 134 142 126 95 95 ...
## $ BPDiaAve : int 75 64 64 64 63 85 68 72 69 69 ...
## $ BPSys1 : int 118 106 106 106 124 136 138 132 94 94 ...
## $ BPDia1 : int 82 62 62 62 64 86 66 74 74 74 ...
## $ BPSys2 : int 108 118 118 118 108 132 142 126 94 94 ...
## $ BPDia2 : int 74 68 68 68 62 88 74 68 70 70 ...
## $ BPSys3 : int 116 118 118 118 114 136 142 126 96 96 ...
## $ BPDia3 : int 76 60 60 60 64 82 62 76 68 68 ...
## $ DirectChol : num 1.16 2.12 2.12 2.12 0.67 1.16 1.06 1.14 2.22 2.22 ...
## $ TotChol : num 6.7 5.82 5.82 5.82 4.99 6.41 5.22 3 5.79 5.79 ...
## $ UrineVol1 : int 77 106 106 106 113 215 64 345 26 26 ...
## $ UrineFlow1 : num 0.094 1.116 1.116 1.116 0.489 ...
## $ Diabetes : chr "No" "No" "No" "No" ...
## $ SleepTrouble: chr "Yes" "No" "No" "No" ...
index <- sample(nrow(health), nrow(health)*0.50)
health_train <- health[index,]
health_test <- health[-index,]
reg_tree <- rpart(formula = Diabetes ~., data = health_train, method = "class")
prp(reg_tree, digits = 4, extra = 1)
pred1 <- predict(reg_tree, health_test, type = "class")
table(health_test$Diabetes, pred1, dnn = c("True", "Pred"))
## Pred
## True No Yes
## No 875 39
## Yes 59 27
98 were
misclassified in this model.
health_train$Diabetes <- as.factor(health_train$Diabetes)
health_test$Diabetes <- as.factor(health_test$Diabetes)
random_forest <- randomForest(Diabetes ~., data = health_train, importance = TRUE, ntree = 100)
random_forest
##
## Call:
## randomForest(formula = Diabetes ~ ., data = health_train, importance = TRUE, ntree = 100)
## Type of random forest: classification
## Number of trees: 100
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 7%
## Confusion matrix:
## No Yes class.error
## No 914 4 0.004357298
## Yes 66 16 0.804878049
pred2 <- predict(random_forest, health_test)
table(health_test$Diabetes, pred2, dnn = c("True", "Pred"))
## Pred
## True No Yes
## No 912 2
## Yes 59 27
61 were
misclassified in this model.
boost_model = boosting(Diabetes ~., data = health_train, boos = T)
boost_model$importance
## Age BMI BPDia1 BPDia2 BPDia3 BPDiaAve
## 10.7068221 6.6289676 3.9847315 2.5020648 2.3664295 4.7742607
## BPSys1 BPSys2 BPSys3 BPSysAve DirectChol Gender
## 4.9896555 1.9582537 2.8535258 4.4116752 6.4310455 0.5271525
## Height Pulse SleepTrouble TotChol UrineFlow1 UrineVol1
## 7.9398322 6.0675614 0.5585061 9.0733131 8.1156687 9.1795717
## Weight
## 6.9309625
pred3 <- predict(boost_model, health_test)
pred3$confusion
## Observed Class
## Predicted Class No Yes
## No 900 53
## Yes 14 33
67 were misclassified in this model.
str(health)
## 'data.frame': 2000 obs. of 20 variables:
## $ Gender : chr "female" "female" "female" "female" ...
## $ Age : int 49 45 45 45 66 54 50 16 56 56 ...
## $ Weight : num 86.7 75.7 75.7 75.7 68 74.7 84.1 73.2 57.5 57.5 ...
## $ Height : num 168 167 167 167 170 ...
## $ BMI : num 30.6 27.2 27.2 27.2 23.7 ...
## $ Pulse : int 86 62 62 62 60 76 74 76 64 64 ...
## $ BPSysAve : int 112 118 118 118 111 134 142 126 95 95 ...
## $ BPDiaAve : int 75 64 64 64 63 85 68 72 69 69 ...
## $ BPSys1 : int 118 106 106 106 124 136 138 132 94 94 ...
## $ BPDia1 : int 82 62 62 62 64 86 66 74 74 74 ...
## $ BPSys2 : int 108 118 118 118 108 132 142 126 94 94 ...
## $ BPDia2 : int 74 68 68 68 62 88 74 68 70 70 ...
## $ BPSys3 : int 116 118 118 118 114 136 142 126 96 96 ...
## $ BPDia3 : int 76 60 60 60 64 82 62 76 68 68 ...
## $ DirectChol : num 1.16 2.12 2.12 2.12 0.67 1.16 1.06 1.14 2.22 2.22 ...
## $ TotChol : num 6.7 5.82 5.82 5.82 4.99 6.41 5.22 3 5.79 5.79 ...
## $ UrineVol1 : int 77 106 106 106 113 215 64 345 26 26 ...
## $ UrineFlow1 : num 0.094 1.116 1.116 1.116 0.489 ...
## $ Diabetes : chr "No" "No" "No" "No" ...
## $ SleepTrouble: chr "Yes" "No" "No" "No" ...
knn_model <- knn(train = health_train[,2:18], test = health_test[,2:18], cl = health_train[,19], k = 5)
summary(knn_model)
## No Yes
## 975 25
table(health_test$Diabetes, knn_model, dnn = c("True", "Pred"))
## Pred
## True No Yes
## No 895 19
## Yes 80 6
99 were misclassified in this model. Based on this
mislassification rate, this model performed worse than the boosting
model and the random forest, then only slightly worse than the decision
tree.
default_payments csv file. Use the “DEFAULT” column as your
target, and all other variables as your covariates. Use smoothing
splines for all numeric covariates. Use 80% of your data as your
training set to develop your model, and use the rest of your data as
your testing set. (Note that the default_payments csv file contains
different data than that in the credit_default csv file that was
provided for practice in our Google Drive.)payment <- read.csv("/Users/ryannlaky/Documents/University of Indianapolis/MSDA 622/Exams/Exam 1/default_payments.csv")
str(payment)
## 'data.frame': 12000 obs. of 8 variables:
## $ LIMIT_BAL: int 290000 20000 280000 280000 20000 50000 20000 30000 200000 110000 ...
## $ SEX : chr "F" "M" "M" "F" ...
## $ MARRIAGE : chr "Currently Married" "Currently Married" "Currently Married" "Currently Married" ...
## $ AGE : int 26 51 29 47 24 26 23 25 38 29 ...
## $ PAY_0 : int 0 -1 -2 0 0 -2 1 1 -2 0 ...
## $ BILL_AMT1: int 18125 780 10660 269124 17924 2411 16332 29628 5625 107195 ...
## $ PAY_AMT1 : int 3000 0 5123 11268 1400 3068 2500 1600 12125 7845 ...
## $ DEFAULT : int 0 0 0 0 0 0 1 0 0 0 ...
ind <- sample(nrow(payment), nrow(payment)*0.80)
payment_train <- payment[ind,]
payment_test <- payment[-ind,]
gam1 <- gam(DEFAULT ~ s(LIMIT_BAL) + s(AGE) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) + (SEX) + (MARRIAGE), data = payment_train)
summary(gam1)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## DEFAULT ~ s(LIMIT_BAL) + s(AGE) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) +
## (SEX) + (MARRIAGE)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.195985 0.006437 30.444 < 2e-16 ***
## SEXM 0.024385 0.007875 3.096 0.00196 **
## MARRIAGESingle 0.023293 0.008558 2.722 0.00651 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(LIMIT_BAL) 5.931 6.906 17.430 < 2e-16 ***
## s(AGE) 1.000 1.000 0.362 0.5472
## s(PAY_0) 6.613 7.302 245.371 < 2e-16 ***
## s(BILL_AMT1) 1.347 1.614 16.868 3.48e-05 ***
## s(PAY_AMT1) 3.086 3.862 3.006 0.0178 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.189 Deviance explained = 19.1%
## GCV = 0.13776 Scale est. = 0.13746 n = 9600
age was not statistically significant in this model, and is
therefore removed for further model development.
gam2 <- gam(DEFAULT ~ s(LIMIT_BAL) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) + (SEX) + (MARRIAGE), data = payment_train)
summary(gam2)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## DEFAULT ~ s(LIMIT_BAL) + s(PAY_0) + s(BILL_AMT1) + s(PAY_AMT1) +
## (SEX) + (MARRIAGE)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.194700 0.006073 32.062 < 2e-16 ***
## SEXM 0.024951 0.007819 3.191 0.001422 **
## MARRIAGESingle 0.025598 0.007651 3.346 0.000823 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(LIMIT_BAL) 5.926 6.901 17.377 < 2e-16 ***
## s(PAY_0) 6.610 7.300 245.451 < 2e-16 ***
## s(BILL_AMT1) 1.347 1.614 17.061 3.12e-05 ***
## s(PAY_AMT1) 3.109 3.890 3.001 0.0178 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.189 Deviance explained = 19.1%
## GCV = 0.13773 Scale est. = 0.13745 n = 9600
plot(gam2, pages = 1)
pred4 <- (predict(gam2, payment_test, type = "response") >= 0.5)
table(payment_test$DEFAULT, pred4, dnn = c("Observed", "Pred"))
## Pred
## Observed FALSE TRUE
## 0 1775 70
## 1 354 201