Part I

#install DAAG from archived source

if(!is.element("DAAG", installed.packages()[,1])){
  packageurl <- "https://cran.r-project.org/src/contrib/Archive/DAAG/DAAG_1.22.tar.gz"
  install.packages("latticeExtra")
  install.packages(packageurl, repos=NULL, type="source")
}

library(DAAG)
## Loading required package: lattice
  1. Continue with the PGA.csv data set. For this data set, fit a multiple linear regression to the data. Use the log of Average winnings as the response variable and use Age, Average Drive (Yards), Driving Accuracy (percent), Greens on Regulation (%), Average # of Putts, Save Percent, and # Events as covariates. Perform a 5-fold cross validation and obtain the PRESS statistic, MPSE, and prediction R-squared.
PGA <- read.csv("C:/Users/bhuvi iyer/Downloads/PGA.csv",h=T)
head(PGA)
##               Name Age AverageDrive DrivingAccuracy GreensonRegulation
## 1   Aaron Baddeley  23        288.0            53.1               58.2
## 2       Adam Scott  24        295.4            57.7               65.6
## 3       Alex Cejka  34        285.8            64.2               63.8
## 4      Andre Stolz  34        297.9            59.0               63.0
## 5      Arjun Atwal  31        289.4            60.5               62.5
## 6 Arron Oberholser  29        284.6            68.8               67.0
##   AverageNumofPutts SavePercent MoneyRank NumEvents TotalWinnings
## 1             1.767        50.9       123        27        632878
## 2             1.757        59.3         7        16       3724984
## 3             1.795        50.7        54        24       1313484
## 4             1.787        47.7       101        20        808373
## 5             1.766        43.5       146        30        486053
## 6             1.780        50.9        52        23       1355433
##   AverageWinnings
## 1           23440
## 2          232812
## 3           54729
## 4           40419
## 5           16202
## 6           58932
model_pga <- lm(AverageWinnings~Age+AverageDrive+DrivingAccuracy+ +GreensonRegulation +AverageNumofPutts+SavePercent + NumEvents, data = PGA)

summary(model_pga)
## 
## Call:
## lm(formula = AverageWinnings ~ Age + AverageDrive + DrivingAccuracy + 
##     +GreensonRegulation + AverageNumofPutts + SavePercent + NumEvents, 
##     data = PGA)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -71690 -22176  -6735  17147 247928 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         945579.88  305886.59   3.091  0.00230 ** 
## Age                   -587.13     519.32  -1.131  0.25968    
## AverageDrive           -94.76     567.42  -0.167  0.86755    
## DrivingAccuracy      -2360.57     854.02  -2.764  0.00628 ** 
## GreensonRegulation    8466.04    1303.87   6.493 7.30e-10 ***
## AverageNumofPutts  -694226.49  138155.99  -5.025 1.17e-06 ***
## SavePercent           1395.67     587.54   2.375  0.01853 *  
## NumEvents            -3159.22     644.24  -4.904 2.03e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 41430 on 188 degrees of freedom
## Multiple R-squared:  0.4527, Adjusted R-squared:  0.4323 
## F-statistic: 22.21 on 7 and 188 DF,  p-value: < 2.2e-16
KCV=cv.lm(data=PGA, model_pga, m=5, seed=123)
## Analysis of Variance Table
## 
## Response: AverageWinnings
##                     Df   Sum Sq  Mean Sq F value  Pr(>F)    
## Age                  1 1.71e+09 1.71e+09    0.99 0.32011    
## AverageDrive         1 2.19e+10 2.19e+10   12.74 0.00045 ***
## DrivingAccuracy      1 5.19e+07 5.19e+07    0.03 0.86220    
## GreensonRegulation   1 1.13e+11 1.13e+11   66.08 5.7e-14 ***
## AverageNumofPutts    1 7.57e+10 7.57e+10   44.09 3.3e-10 ***
## SavePercent          1 1.29e+10 1.29e+10    7.51 0.00673 ** 
## NumEvents            1 4.13e+10 4.13e+10   24.05 2.0e-06 ***
## Residuals          188 3.23e+11 1.72e+09                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in cv.lm(data = PGA, model_pga, m = 5, seed = 123): 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## 
## fold 1 
## Observations in test set: 39 
##                      4     5     11    13     25    36     38    39    43
## Predicted        54281 26199  41198  -690  71891 33982 -12370 40130 13636
## cvpred           56300 25301  42783 -2912  79496 29407 -15697 37234 12304
## AverageWinnings  40419 16202  20359  6026  38551 38345  16486 81787  4378
## CV residual     -15881 -9099 -22424  8938 -40945  8938  32183 44553 -7926
##                     53     72     73    75    76    78    81     92  111
## Predicted       108551 -23602  76158 37758 25550 34250 20946  66001 3186
## cvpred          115529 -29528  85012 42230 25240 36558 18906  74185 1293
## AverageWinnings  98230   5449  27230 34414 29452 27103  9660  36636 1031
## CV residual     -17299  34977 -57782 -7816  4212 -9455 -9246 -37549 -262
##                    114    116    119    120    125    127    128    136
## Predicted        74126  78632  46836  89611  80412  53702  34344  41938
## cvpred           75748  84344  55495  96514  85301  53404  37495  45154
## AverageWinnings  65268  33499  18582  78394  15806  32685  18188  19901
## CV residual     -10480 -50845 -36913 -18120 -69495 -20719 -19307 -25253
##                    141    145    152    159    160   162    164   169
## Predicted        78190  26320  91937  84791  49187 73553  51887 42194
## cvpred           77816  27160  98826  88195  52381 74598  56481 45302
## AverageWinnings  26149   8600  66836  72904  29286 98566  35182 48607
## CV residual     -51667 -18560 -31990 -15291 -23095 23968 -21299  3305
##                   183    185   189    195   196
## Predicted       17294  33462 41647  63191 60052
## cvpred          17593  31829 45797  68725 63387
## AverageWinnings 37292 113473 47589  51586 80590
## CV residual     19699  81644  1792 -17139 17203
## 
## Sum of squares = 3.64e+10    Mean square = 9.34e+08    n = 39 
## 
## fold 2 
## Observations in test set: 40 
##                     9     12    18     24    28    33    40     46     50
## Predicted       27814 -11945 39971  -8586 61377 57696 26539  64420 -55958
## cvpred          20584 -19961 42354 -22535 58787 55666 17991  63950 -75176
## AverageWinnings 25041  15727 36318   6664 72422 56206 15694  18862    850
## CV residual      4457  35688 -6036  29199 13635   540 -2297 -45088  76026
##                     51     52     54    55     57     58    63     80
## Predicted       -12356  37163  68168 25565  70638 -26310 39691  34220
## cvpred          -16754  37007  68885 24464  67524 -36524 41640  35715
## AverageWinnings   6090  18441 128129 17010  32240  10014 46638  25230
## CV residual      22844 -18566  59244 -7454 -35284  46538  4998 -10485
##                     84     87    88     95     97    99    103    106
## Predicted        33780 -13120 67484  49893  82544 15504  60796 108133
## cvpred           30563 -17601 67476  51978  80344 13377  62421 107109
## AverageWinnings   7315  13203 76394  11076  43714 25447  38960  56217
## CV residual     -23248  30804  8918 -40902 -36630 12070 -23461 -50892
##                   109   115    126    131   134    135    137   142    143
## Predicted       83254  5834  -2172  89954  8174  55652 -11751 13857 125603
## cvpred          85654 -2755 -10141  91911  1100  55902 -19460  5671 127026
## AverageWinnings 84871  7891  17657 125524 12065  25371  12350 29356 262947
## CV residual      -783 10646  27798  33613 10965 -30531  31810 23685 135921
##                   149   155   173    188   190    192
## Predicted       11851 53206 40323  98936  6623  22907
## cvpred           7385 47061 41563 104162  3024  18391
## AverageWinnings 33335 96169 42544  70699 30166   4123
## CV residual     25950 49108   981 -33463 27142 -14268
## 
## Sum of squares = 5.43e+10    Mean square = 1.36e+09    n = 40 
## 
## fold 3 
## Observations in test set: 39 
##                    14    15    17     19     21     27    30     47     62
## Predicted       58117 16655 33067  61430  59075  86277 33859  94327 -43969
## cvpred          55373 17573 36275  72911  64494  91941 30537  99574 -46034
## AverageWinnings 51794 19381 37175  15438  26980  48856 56783 125614   3200
## CV residual     -3579  1808   900 -57473 -37514 -43085 26246  26040  49234
##                     66     67     68    69     71     74     77    79
## Predicted        85201 -13466 104887 11756  80341  77758 -45212 33729
## cvpred           93434 -15529 107499 12439  88284  79059 -54860 27745
## AverageWinnings  52460   9572  47573 15756  18290  57871   6004 28294
## CV residual     -40974  25101 -59926  3317 -69994 -21188  60864   549
##                     89    90    93   101    104   105   108   113  117
## Predicted        55633 64575 32134 46527  97855 68785 15745 10685  210
## cvpred           56874 65146 34347 47941 110829 69733 19231  8745  743
## AverageWinnings  35987 86077 45105 41969  29121 61241 10044 34398 8548
## CV residual     -20887 20931 10758 -5972 -81708 -8492 -9187 25653 7805
##                    118   123    124    132   140   157   158    163   166
## Predicted        97075 52508  56942  41766 32866 46417 23867 132427 14363
## cvpred          105861 54265  62652  47891 33183 45816 23801 136100 13514
## AverageWinnings  45394 93751  31992  21250 37404 48253 18319 179956 41601
## CV residual     -60467 39486 -30660 -26641  4221  2437 -5482  43856 28087
##                    178   179    180    191
## Predicted       106316 31487 152278  41358
## cvpred          113125 25983 162687  49080
## AverageWinnings  34626 46768 282393   6074
## CV residual     -78499 20785 119706 -43006
## 
## Sum of squares = 6.63e+10    Mean square = 1.7e+09    n = 39 
## 
## fold 4 
## Observations in test set: 39 
##                      6     7     10    20    23     26    29     32     35
## Predicted        69056 52426  81848 31939 33284  50033 49954  73339 -29929
## cvpred           69087 51150  75895 32674 28310  50416 53022  70588 -27368
## AverageWinnings  58932 41833  62906 40638 20162  25973 80892 110068   7820
## CV residual     -10155 -9317 -12989  7964 -8148 -24443 27870  39480  35188
##                     37     45     49     61    65     86    94     96
## Predicted        66588  45903  27229 129503 20421  28022  2208 133109
## cvpred           68939  47958  31270 118322 22195  31876  5942 125029
## AverageWinnings  23833  14200  15484 361702 72542  20001 49577 107250
## CV residual     -45106 -33758 -15786 243380 50347 -11875 43635 -17779
##                     98    100   112    121    122    130   133    144
## Predicted        87354  78904 17301 -41084  54421  26604 53668 146852
## cvpred           80350  83140 18584 -40483  51318  19978 44318 139511
## AverageWinnings  24898  24936 28161   8519  29912   9776 53072 242848
## CV residual     -55452 -58204  9577  49002 -21406 -10202  8754 103337
##                    146    147    151   156   161    170    171   172
## Predicted        22072  34111  37346 17511 13387  42243  50701 45603
## cvpred           27850  33680  36960 16734 12644  46480  50939 44731
## AverageWinnings   8232  14412  13563  7404 10610  19651  12162 79413
## CV residual     -19618 -19268 -23397 -9330 -2034 -26829 -38777 34682
##                    174    175    177   182   186    193
## Predicted        35986  46171  70165 55993 18212  77473
## cvpred           36757  37024  67323 55258 18124  74583
## AverageWinnings   8315  16330 117969 66445 34926  43572
## CV residual     -28442 -20694  50646 11187 16802 -31011
## 
## Sum of squares = 1.02e+11    Mean square = 2.63e+09    n = 39 
## 
## fold 5 
## Observations in test set: 39 
##                      1      2     3      8     16     22     31     34
## Predicted        31204 135123 35922  75580  60388  73359   5902  36616
## cvpred           39008 128487 37935  75783  61664  73286  18699  31302
## AverageWinnings  23440 232812 54729  38406  45481  38302   4178  20993
## CV residual     -15568 104325 16794 -37377 -16183 -34984 -14521 -10309
##                     41    42     44     48     56    59    60    64   70
## Predicted        35194 15529  57668  19871  49849 44427 62817 92520  576
## cvpred           36125  8166  54499  21826  52344 46581 58970 84473 2221
## AverageWinnings  13220 11269  23129   9787  15756 37158 57227 87257 8252
## CV residual     -22905  3103 -31370 -12039 -36588 -9423 -1743  2784 6031
##                     82     83    85     91   102   107    110    129  138
## Predicted        42457  49626 86806  88840 36540 71375  33417  57590 5238
## cvpred           44938  48760 72938  79915 38432 68840  32318  49002 7772
## AverageWinnings  19584  11349 90071  64589 67813 86574  21091  31531 9356
## CV residual     -25354 -37411 17133 -15326 29381 17734 -11227 -17471 1584
##                    139   148    150    153    154   165   167    168
## Predicted        43042 74524  42229  29158  53809 69712 -8063  92850
## cvpred           42019 67246  40458  31539  55879 68299 -5190  80998
## AverageWinnings  22616 58213  23399   8363  10782 88527 11339 122341
## CV residual     -19403 -9033 -17059 -23176 -45097 20228 16529  41343
##                    176    181   184   187    194
## Predicted       117117  57459 26657  5150 128112
## cvpred          106245  60852 26251  4841 105828
## AverageWinnings 158938  42623 25697 11308 376040
## CV residual      52693 -18229  -554  6467 270212
## 
## Sum of squares = 1.04e+11    Mean square = 2.66e+09    n = 39 
## 
## Overall (Sum over all 39 folds) 
##       ms 
## 1.85e+09

MSPE can be alternatively calculated as

sum((PGA$AverageWinnings-KCV$cvpred)^2)/5
## [1] 7.27e+10

PRESS can be calculated as

sum((PGA$AverageWinnings-KCV$cvpred)^2)
## [1] 3.63e+11

Prediction R squared can be calculated as

sum((PGA$AverageWinnings-KCV$cvpred)^2)/sum((PGA$AverageWinnings-mean(PGA$AverageWinnings))^2)
## [1] 0.616
summary(model_pga)$r.squared
## [1] 0.453

#LOCV

LOOCV=cv.lm(data=PGA, model_pga, m=5,seed=123)
## Analysis of Variance Table
## 
## Response: AverageWinnings
##                     Df   Sum Sq  Mean Sq F value  Pr(>F)    
## Age                  1 1.71e+09 1.71e+09    0.99 0.32011    
## AverageDrive         1 2.19e+10 2.19e+10   12.74 0.00045 ***
## DrivingAccuracy      1 5.19e+07 5.19e+07    0.03 0.86220    
## GreensonRegulation   1 1.13e+11 1.13e+11   66.08 5.7e-14 ***
## AverageNumofPutts    1 7.57e+10 7.57e+10   44.09 3.3e-10 ***
## SavePercent          1 1.29e+10 1.29e+10    7.51 0.00673 ** 
## NumEvents            1 4.13e+10 4.13e+10   24.05 2.0e-06 ***
## Residuals          188 3.23e+11 1.72e+09                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in cv.lm(data = PGA, model_pga, m = 5, seed = 123): 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## 
## fold 1 
## Observations in test set: 39 
##                      4     5     11    13     25    36     38    39    43
## Predicted        54281 26199  41198  -690  71891 33982 -12370 40130 13636
## cvpred           56300 25301  42783 -2912  79496 29407 -15697 37234 12304
## AverageWinnings  40419 16202  20359  6026  38551 38345  16486 81787  4378
## CV residual     -15881 -9099 -22424  8938 -40945  8938  32183 44553 -7926
##                     53     72     73    75    76    78    81     92  111
## Predicted       108551 -23602  76158 37758 25550 34250 20946  66001 3186
## cvpred          115529 -29528  85012 42230 25240 36558 18906  74185 1293
## AverageWinnings  98230   5449  27230 34414 29452 27103  9660  36636 1031
## CV residual     -17299  34977 -57782 -7816  4212 -9455 -9246 -37549 -262
##                    114    116    119    120    125    127    128    136
## Predicted        74126  78632  46836  89611  80412  53702  34344  41938
## cvpred           75748  84344  55495  96514  85301  53404  37495  45154
## AverageWinnings  65268  33499  18582  78394  15806  32685  18188  19901
## CV residual     -10480 -50845 -36913 -18120 -69495 -20719 -19307 -25253
##                    141    145    152    159    160   162    164   169
## Predicted        78190  26320  91937  84791  49187 73553  51887 42194
## cvpred           77816  27160  98826  88195  52381 74598  56481 45302
## AverageWinnings  26149   8600  66836  72904  29286 98566  35182 48607
## CV residual     -51667 -18560 -31990 -15291 -23095 23968 -21299  3305
##                   183    185   189    195   196
## Predicted       17294  33462 41647  63191 60052
## cvpred          17593  31829 45797  68725 63387
## AverageWinnings 37292 113473 47589  51586 80590
## CV residual     19699  81644  1792 -17139 17203
## 
## Sum of squares = 3.64e+10    Mean square = 9.34e+08    n = 39 
## 
## fold 2 
## Observations in test set: 40 
##                     9     12    18     24    28    33    40     46     50
## Predicted       27814 -11945 39971  -8586 61377 57696 26539  64420 -55958
## cvpred          20584 -19961 42354 -22535 58787 55666 17991  63950 -75176
## AverageWinnings 25041  15727 36318   6664 72422 56206 15694  18862    850
## CV residual      4457  35688 -6036  29199 13635   540 -2297 -45088  76026
##                     51     52     54    55     57     58    63     80
## Predicted       -12356  37163  68168 25565  70638 -26310 39691  34220
## cvpred          -16754  37007  68885 24464  67524 -36524 41640  35715
## AverageWinnings   6090  18441 128129 17010  32240  10014 46638  25230
## CV residual      22844 -18566  59244 -7454 -35284  46538  4998 -10485
##                     84     87    88     95     97    99    103    106
## Predicted        33780 -13120 67484  49893  82544 15504  60796 108133
## cvpred           30563 -17601 67476  51978  80344 13377  62421 107109
## AverageWinnings   7315  13203 76394  11076  43714 25447  38960  56217
## CV residual     -23248  30804  8918 -40902 -36630 12070 -23461 -50892
##                   109   115    126    131   134    135    137   142    143
## Predicted       83254  5834  -2172  89954  8174  55652 -11751 13857 125603
## cvpred          85654 -2755 -10141  91911  1100  55902 -19460  5671 127026
## AverageWinnings 84871  7891  17657 125524 12065  25371  12350 29356 262947
## CV residual      -783 10646  27798  33613 10965 -30531  31810 23685 135921
##                   149   155   173    188   190    192
## Predicted       11851 53206 40323  98936  6623  22907
## cvpred           7385 47061 41563 104162  3024  18391
## AverageWinnings 33335 96169 42544  70699 30166   4123
## CV residual     25950 49108   981 -33463 27142 -14268
## 
## Sum of squares = 5.43e+10    Mean square = 1.36e+09    n = 40 
## 
## fold 3 
## Observations in test set: 39 
##                    14    15    17     19     21     27    30     47     62
## Predicted       58117 16655 33067  61430  59075  86277 33859  94327 -43969
## cvpred          55373 17573 36275  72911  64494  91941 30537  99574 -46034
## AverageWinnings 51794 19381 37175  15438  26980  48856 56783 125614   3200
## CV residual     -3579  1808   900 -57473 -37514 -43085 26246  26040  49234
##                     66     67     68    69     71     74     77    79
## Predicted        85201 -13466 104887 11756  80341  77758 -45212 33729
## cvpred           93434 -15529 107499 12439  88284  79059 -54860 27745
## AverageWinnings  52460   9572  47573 15756  18290  57871   6004 28294
## CV residual     -40974  25101 -59926  3317 -69994 -21188  60864   549
##                     89    90    93   101    104   105   108   113  117
## Predicted        55633 64575 32134 46527  97855 68785 15745 10685  210
## cvpred           56874 65146 34347 47941 110829 69733 19231  8745  743
## AverageWinnings  35987 86077 45105 41969  29121 61241 10044 34398 8548
## CV residual     -20887 20931 10758 -5972 -81708 -8492 -9187 25653 7805
##                    118   123    124    132   140   157   158    163   166
## Predicted        97075 52508  56942  41766 32866 46417 23867 132427 14363
## cvpred          105861 54265  62652  47891 33183 45816 23801 136100 13514
## AverageWinnings  45394 93751  31992  21250 37404 48253 18319 179956 41601
## CV residual     -60467 39486 -30660 -26641  4221  2437 -5482  43856 28087
##                    178   179    180    191
## Predicted       106316 31487 152278  41358
## cvpred          113125 25983 162687  49080
## AverageWinnings  34626 46768 282393   6074
## CV residual     -78499 20785 119706 -43006
## 
## Sum of squares = 6.63e+10    Mean square = 1.7e+09    n = 39 
## 
## fold 4 
## Observations in test set: 39 
##                      6     7     10    20    23     26    29     32     35
## Predicted        69056 52426  81848 31939 33284  50033 49954  73339 -29929
## cvpred           69087 51150  75895 32674 28310  50416 53022  70588 -27368
## AverageWinnings  58932 41833  62906 40638 20162  25973 80892 110068   7820
## CV residual     -10155 -9317 -12989  7964 -8148 -24443 27870  39480  35188
##                     37     45     49     61    65     86    94     96
## Predicted        66588  45903  27229 129503 20421  28022  2208 133109
## cvpred           68939  47958  31270 118322 22195  31876  5942 125029
## AverageWinnings  23833  14200  15484 361702 72542  20001 49577 107250
## CV residual     -45106 -33758 -15786 243380 50347 -11875 43635 -17779
##                     98    100   112    121    122    130   133    144
## Predicted        87354  78904 17301 -41084  54421  26604 53668 146852
## cvpred           80350  83140 18584 -40483  51318  19978 44318 139511
## AverageWinnings  24898  24936 28161   8519  29912   9776 53072 242848
## CV residual     -55452 -58204  9577  49002 -21406 -10202  8754 103337
##                    146    147    151   156   161    170    171   172
## Predicted        22072  34111  37346 17511 13387  42243  50701 45603
## cvpred           27850  33680  36960 16734 12644  46480  50939 44731
## AverageWinnings   8232  14412  13563  7404 10610  19651  12162 79413
## CV residual     -19618 -19268 -23397 -9330 -2034 -26829 -38777 34682
##                    174    175    177   182   186    193
## Predicted        35986  46171  70165 55993 18212  77473
## cvpred           36757  37024  67323 55258 18124  74583
## AverageWinnings   8315  16330 117969 66445 34926  43572
## CV residual     -28442 -20694  50646 11187 16802 -31011
## 
## Sum of squares = 1.02e+11    Mean square = 2.63e+09    n = 39 
## 
## fold 5 
## Observations in test set: 39 
##                      1      2     3      8     16     22     31     34
## Predicted        31204 135123 35922  75580  60388  73359   5902  36616
## cvpred           39008 128487 37935  75783  61664  73286  18699  31302
## AverageWinnings  23440 232812 54729  38406  45481  38302   4178  20993
## CV residual     -15568 104325 16794 -37377 -16183 -34984 -14521 -10309
##                     41    42     44     48     56    59    60    64   70
## Predicted        35194 15529  57668  19871  49849 44427 62817 92520  576
## cvpred           36125  8166  54499  21826  52344 46581 58970 84473 2221
## AverageWinnings  13220 11269  23129   9787  15756 37158 57227 87257 8252
## CV residual     -22905  3103 -31370 -12039 -36588 -9423 -1743  2784 6031
##                     82     83    85     91   102   107    110    129  138
## Predicted        42457  49626 86806  88840 36540 71375  33417  57590 5238
## cvpred           44938  48760 72938  79915 38432 68840  32318  49002 7772
## AverageWinnings  19584  11349 90071  64589 67813 86574  21091  31531 9356
## CV residual     -25354 -37411 17133 -15326 29381 17734 -11227 -17471 1584
##                    139   148    150    153    154   165   167    168
## Predicted        43042 74524  42229  29158  53809 69712 -8063  92850
## cvpred           42019 67246  40458  31539  55879 68299 -5190  80998
## AverageWinnings  22616 58213  23399   8363  10782 88527 11339 122341
## CV residual     -19403 -9033 -17059 -23176 -45097 20228 16529  41343
##                    176    181   184   187    194
## Predicted       117117  57459 26657  5150 128112
## cvpred          106245  60852 26251  4841 105828
## AverageWinnings 158938  42623 25697 11308 376040
## CV residual      52693 -18229  -554  6467 270212
## 
## Sum of squares = 1.04e+11    Mean square = 2.66e+09    n = 39 
## 
## Overall (Sum over all 39 folds) 
##       ms 
## 1.85e+09

MSPE can be alternatively calculated as

sum((PGA$DrivingAccuracy-LOOCV$cvpred)^2)/5
## [1] 1.4e+11

PRESS can be calculated as

sum((PGA$DrivingAccuracy-LOOCV$cvpred)^2)
## [1] 7.02e+11

Prediction R squared can be calculated as

1-sum((PGA$DrivingAccuracy-LOOCV$cvpred)^2)/sum((PGA$DrivingAccuracy-mean(PGA$DrivingAccuracy))^2)
## [1] -1.28e+08
summary(model_pga)$r.squared
## [1] 0.453

Computing using the funcrions

### This calculate the PRESS (predictive residual sum of squares), the lower, the better
#' @title PRESS
#' @author Thomas Hopper
#' @description Returns the PRESS statistic (predictive residual sum of squares).
#'              Useful for evaluating predictive power of regression models.
#' @param linear.model A linear regression model (class 'lm'). Required.
PRESS <- function(linear.model) {
  #' calculate the predictive residuals
  pr <- residuals(linear.model)/(1-lm.influence(linear.model)$hat)
  #' calculate the PRESS
  PRESS <- sum(pr^2)
  return(PRESS)
}
### This calculate the MSPE (mean square prediction error), the lower, the better
#' @title MSPE
#' @author Yichen Qin
#' @description Returns the MSPE statistic (mean square prediction error).
#' @param linear.model A linear regression model (class 'lm'). Required.
MSPE <- function(linear.model) {
  #' calculate the MSPE =PRESS/sample size
  return(PRESS(linear.model)/length(residuals(linear.model)))
}
### This calculate the Prediction r-squared
#' @title Predictive R-squared
#' @author Thomas Hopper
#' @description returns the prediction r-squared. Requires the function PRESS(), which returns
#'              the PRESS statistic.
#' @param linear.model A linear regression model (class 'lm'). Required.
pred_r_squared <- function(linear.model) {
  #' Use anova() to get the sum of squares for the linear model
  lm.anova <- anova(linear.model)
  #' Calculate the total sum of squares
  tss <- sum(lm.anova$'Sum Sq')
  # Calculate the predictive R^2
  pred.r.squared <- 1-PRESS(linear.model)/(tss)
  
  return(pred.r.squared)
}
MSPE(model_pga)
## [1] 1.84e+09
PRESS(model_pga)
## [1] 3.62e+11
pred_r_squared(model_pga)
## [1] 0.387
summary(model_pga)$r.squared
## [1] 0.453
  1. Fit another multiple linear regression to the data. Use the log of Average winnings as the response variable and use Driving Accuracy (percent), Greens on Regulation (%), Average # of Putts, Save Percent, and # Events as covariates. Perform a 5-fold cross validation and obtain the PRESS statistic, MPSE, and prediction R-squared.
Model_Pga2 <- lm(AverageWinnings~DrivingAccuracy+ +GreensonRegulation +AverageNumofPutts+SavePercent + NumEvents, data = PGA)
summary(Model_Pga2)
## 
## Call:
## lm(formula = AverageWinnings ~ DrivingAccuracy + +GreensonRegulation + 
##     AverageNumofPutts + SavePercent + NumEvents, data = PGA)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -69302 -23747  -6472  17736 243585 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          916419     272720    3.36  0.00094 ***
## DrivingAccuracy       -2430        593   -4.10  6.1e-05 ***
## GreensonRegulation     8391       1115    7.52  2.1e-12 ***
## AverageNumofPutts   -701063     137013   -5.12  7.6e-07 ***
## SavePercent            1380        585    2.36  0.01932 *  
## NumEvents             -3042        632   -4.81  3.0e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 41400 on 190 degrees of freedom
## Multiple R-squared:  0.449,  Adjusted R-squared:  0.434 
## F-statistic: 30.9 on 5 and 190 DF,  p-value: <2e-16

5-Cross Validation

KCV2=cv.lm(data=PGA, Model_Pga2, m=5, seed=123)
## Analysis of Variance Table
## 
## Response: AverageWinnings
##                     Df   Sum Sq  Mean Sq F value  Pr(>F)    
## DrivingAccuracy      1 9.71e+09 9.71e+09    5.68   0.018 *  
## GreensonRegulation   1 1.22e+11 1.22e+11   71.40 7.5e-15 ***
## AverageNumofPutts    1 8.00e+10 8.00e+10   46.74 1.1e-10 ***
## SavePercent          1 1.32e+10 1.32e+10    7.71   0.006 ** 
## NumEvents            1 3.96e+10 3.96e+10   23.16 3.0e-06 ***
## Residuals          190 3.25e+11 1.71e+09                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in cv.lm(data = PGA, Model_Pga2, m = 5, seed = 123): 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## 
## fold 1 
## Observations in test set: 39 
##                      4     5     11   13     25    36     38    39     43
## Predicted        53873 24541  44140 4747  69825 36760 -14291 39282  16957
## cvpred           54605 23485  47674 4275  77581 33399 -18125 36898  16787
## AverageWinnings  40419 16202  20359 6026  38551 38345  16486 81787   4378
## CV residual     -14186 -7283 -27315 1751 -39030  4946  34611 44889 -12409
##                     53     72     73    75    76    78     81     92   111
## Predicted       108536 -21992  76166 33334 22759 27344  22114  66931 -1051
## cvpred          116400 -28035  83257 37119 21505 27896  20319  74295 -3751
## AverageWinnings  98230   5449  27230 34414 29452 27103   9660  36636  1031
## CV residual     -18170  33484 -56027 -2705  7947  -793 -10659 -37659  4782
##                    114    116    119    120    125    127    128    136
## Predicted        76053  80616  42048  82180  74679  51625  28589  46276
## cvpred           77990  87553  47721  88422  78210  49983  30784  49783
## AverageWinnings  65268  33499  18582  78394  15806  32685  18188  19901
## CV residual     -12722 -54054 -29139 -10028 -62404 -17298 -12596 -29882
##                    141    145    152    159    160   162    164   169
## Predicted        83115  23245  91975  90046  51658 73888  51285 40585
## cvpred           84771  23622  99045  94647  54010 74303  56222 43829
## AverageWinnings  26149   8600  66836  72904  29286 98566  35182 48607
## CV residual     -58622 -15022 -32209 -21743 -24724 24263 -21040  4778
##                   183    185   189    195   196
## Predicted       19332  35628 46879  66151 55148
## cvpred          19907  35682 51523  72213 56955
## AverageWinnings 37292 113473 47589  51586 80590
## CV residual     17385  77791 -3934 -20627 23635
## 
## Sum of squares = 3.56e+10    Mean square = 9.13e+08    n = 39 
## 
## fold 2 
## Observations in test set: 40 
##                     9     12    18     24    28    33    40     46     50
## Predicted       21334 -11114 43613 -10294 63589 53853 27995  63424 -61866
## cvpred          16022 -19294 43261 -23215 59618 52365 17952  63468 -79926
## AverageWinnings 25041  15727 36318   6664 72422 56206 15694  18862    850
## CV residual      9019  35021 -6943  29879 12804  3841 -2258 -44606  80776
##                     51     52     54    55     57     58    63     80
## Predicted       -14256  41375  72022 25140  75037 -28326 39322  39623
## cvpred          -18543  38921  72365 23938  71484 -38301 41649  39202
## AverageWinnings   6090  18441 128129 17010  32240  10014 46638  25230
## CV residual      24633 -20480  55764 -6928 -39244  48315  4989 -13972
##                     84     87    88     95     97    99    103    106
## Predicted        38634  -9670 67926  53059  85171 11673  61665 101025
## cvpred           33371 -14638 67512  53014  81841 11780  63792 102928
## AverageWinnings   7315  13203 76394  11076  43714 25447  38960  56217
## CV residual     -26056  27841  8882 -41938 -38127 13667 -24832 -46711
##                   109   115    126    131   134    135    137   142    143
## Predicted       88321  4504  -1804  87938  5902  52468 -13751 14571 124491
## cvpred          89133 -3007 -10218  90446  -454  54353 -21058  6114 127120
## AverageWinnings 84871  7891  17657 125524 12065  25371  12350 29356 262947
## CV residual     -4262 10898  27875  35078 12519 -28982  33408 23242 135827
##                   149   155   173    188   190    192
## Predicted        8361 49231 46029 102598 12505  22684
## cvpred           4952 44621 44453 106503  6437  17851
## AverageWinnings 33335 96169 42544  70699 30166   4123
## CV residual     28383 51548 -1909 -35804 23729 -13728
## 
## Sum of squares = 5.57e+10    Mean square = 1.39e+09    n = 40 
## 
## fold 3 
## Observations in test set: 39 
##                    14    15    17     19     21     27    30     47     62
## Predicted       55041 16070 37499  63453  60093  82057 27729  93785 -40463
## cvpred          54004 16428 37283  73892  64640  89159 25837  99431 -43542
## AverageWinnings 51794 19381 37175  15438  26980  48856 56783 125614   3200
## CV residual     -2210  2953  -108 -58454 -37660 -40303 30946  26183  46742
##                     66     67     68      69     71     74     77    79
## Predicted        82411 -10035 101035 14586.9  77656  77107 -47910 31315
## cvpred           89192 -13215 107818 15834.2  87464  81404 -55483 28564
## AverageWinnings  52460   9572  47573 15756.0  18290  57871   6004 28294
## CV residual     -36732  22787 -60245   -78.2 -69174 -23533  61487  -270
##                     89    90    93   101    104   105   108   113  117
## Predicted        61105 64613 30022 42006  97938 65577 15904 12764 -242
## cvpred           58870 63976 33192 47413 107585 67049 18131 11567 1329
## AverageWinnings  35987 86077 45105 41969  29121 61241 10044 34398 8548
## CV residual     -22883 22101 11913 -5444 -78464 -5808 -8087 22831 7219
##                    118   123    124    132   140   157   158    163   166
## Predicted       102113 50919  60961  46887 33696 43391 24363 125070 16472
## cvpred          106851 54332  64103  50760 34120 45138 27787 128775 15662
## AverageWinnings  45394 93751  31992  21250 37404 48253 18319 179956 41601
## CV residual     -61457 39419 -32111 -29510  3284  3115 -9468  51181 25939
##                    178   179    180    191
## Predicted       103928 29294 148859  43500
## cvpred          112664 25693 160865  48742
## AverageWinnings  34626 46768 282393   6074
## CV residual     -78038 21075 121528 -42668
## 
## Sum of squares = 6.67e+10    Mean square = 1.71e+09    n = 39 
## 
## fold 4 
## Observations in test set: 39 
##                     6      7     10    20    23     26    29     32     35
## Predicted       63819  54250  86642 32883 31955  49459 46448  72194 -29157
## cvpred          64119  53662  82097 33353 26868  49552 49512  69621 -26383
## AverageWinnings 58932  41833  62906 40638 20162  25973 80892 110068   7820
## CV residual     -5187 -11829 -19191  7285 -6706 -23579 31380  40447  34203
##                     37     45     49     61    65     86    94     96
## Predicted        63893  50134  30840 129588 25387  29020  8758 136454
## cvpred           66172  52475  35467 119015 27502  32325 12745 128127
## AverageWinnings  23833  14200  15484 361702 72542  20001 49577 107250
## CV residual     -42339 -38275 -19983 242687 45040 -12324 36832 -20877
##                     98    100   112    121    122    130   133    144
## Predicted        89237  77631  8329 -37499  59342  28279 57680 145732
## cvpred           83464  81380  9389 -36112  56554  22727 49866 138744
## AverageWinnings  24898  24936 28161   8519  29912   9776 53072 242848
## CV residual     -58566 -56444 18772  44631 -26642 -12951  3206 104104
##                    146    147    151    156   161    170    171   172
## Predicted        21931  29537  38562  22581 18848  41536  53011 46004
## cvpred           27509  28754  39261  22659 19515  45023  54029 45110
## AverageWinnings   8232  14412  13563   7404 10610  19651  12162 79413
## CV residual     -19277 -14342 -25698 -15255 -8905 -25372 -41867 34303
##                    174    175    177   182   186    193
## Predicted        39325  47808  69011 55812 20720  73291
## cvpred           40769  39220  66169 55199 21379  70263
## AverageWinnings   8315  16330 117969 66445 34926  43572
## CV residual     -32454 -22890  51800 11246 13547 -26691
## 
## Sum of squares = 1.03e+11    Mean square = 2.63e+09    n = 39 
## 
## fold 5 
## Observations in test set: 39 
##                     1      2     3      8     16     22     31     34
## Predicted       25081 128057 34314  70896  60639  70485   5108  38372
## cvpred          29095 116434 35031  68092  62829  67751  15888  34517
## AverageWinnings 23440 232812 54729  38406  45481  38302   4178  20993
## CV residual     -5655 116378 19698 -29686 -17348 -29449 -11710 -13524
##                     41    42     44     48     56    59    60    64   70
## Predicted        30666 20824  56408  19317  51215 44058 65723 97594 3743
## cvpred           28543 16880  52020  21135  53592 45992 64047 92501 7690
## AverageWinnings  13220 11269  23129   9787  15756 37158 57227 87257 8252
## CV residual     -15323 -5611 -28891 -11348 -37836 -8834 -6820 -5244  562
##                     82     83    85     91   102   107    110    129  138
## Predicted        40301  44855 93844  90715 35373 69989  34102  64166 3630
## cvpred           40855  40842 85859  83532 36310 67398  33263  60221 6246
## AverageWinnings  19584  11349 90071  64589 67813 86574  21091  31531 9356
## CV residual     -21271 -29493  4212 -18943 31503 19176 -12172 -28690 3110
##                    139   148    150    153    154   165   167    168
## Predicted        39243 72928  42852  26631  48329 68716 -8157  95131
## cvpred           35518 64717  41380  26479  46737 67342 -4830  85028
## AverageWinnings  22616 58213  23399   8363  10782 88527 11339 122341
## CV residual     -12902 -6504 -17981 -18116 -35955 21185 16169  37313
##                    176   181   184   187    194
## Predicted       115260 51378 26561  7883 132455
## cvpred          103735 50652 27337  9499 113545
## AverageWinnings 158938 42623 25697 11308 376040
## CV residual      55203 -8029 -1640  1809 262495
## 
## Sum of squares = 9.95e+10    Mean square = 2.55e+09    n = 39 
## 
## Overall (Sum over all 39 folds) 
##       ms 
## 1.84e+09

MSPE

sum((PGA$AverageWinnings-KCV2$cvpred)^2)/5
## [1] 7.2e+10

PRESS can be calculated as

sum((PGA$AverageWinnings-KCV2$cvpred)^2)
## [1] 3.6e+11

Prediction R squared can be calculated as

1-sum((PGA$AverageWinnings-KCV2$cvpred)^2)/sum((PGA$AverageWinnings-mean(PGA$AverageWinnings))^2)
## [1] 0.389
summary(Model_Pga2)$r.squared
## [1] 0.449
  1. Fit a third multiple linear regression to the data. Use the log of Average winnings as the response variable and use Average # of Putts, Save Percent, and # Events as covariates. Perform a 5-fold cross validation and obtain the PRESS statistic, MPSE, and prediction R-squared.
Model_Pga3 <- lm(AverageWinnings~ AverageNumofPutts+ SavePercent + NumEvents, data = PGA)
summary(Model_Pga3)
## 
## Call:
## lm(formula = AverageWinnings ~ AverageNumofPutts + SavePercent + 
##     NumEvents, data = PGA)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -94583 -23983  -5520  11815 318377 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1735961     283752    6.12  5.2e-09 ***
## AverageNumofPutts  -915366     152848   -5.99  1.0e-08 ***
## SavePercent            521        654    0.80     0.43    
## NumEvents            -3328        720   -4.62  6.9e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47200 on 192 degrees of freedom
## Multiple R-squared:  0.275,  Adjusted R-squared:  0.264 
## F-statistic: 24.3 on 3 and 192 DF,  p-value: 2.2e-13

5-Cross Validation

KCV3=cv.lm(data=PGA, Model_Pga3, m=5, seed=123)
## Analysis of Variance Table
## 
## Response: AverageWinnings
##                    Df   Sum Sq  Mean Sq F value  Pr(>F)    
## AverageNumofPutts   1 1.12e+11 1.12e+11   50.26 2.5e-11 ***
## SavePercent         1 2.92e+09 2.92e+09    1.31    0.25    
## NumEvents           1 4.76e+10 4.76e+10   21.38 6.9e-06 ***
## Residuals         192 4.27e+11 2.23e+09                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in cv.lm(data = PGA, Model_Pga3, m = 5, seed = 123): 
## 
##  As there is >1 explanatory variable, cross-validation
##  predicted values for a fold are not a linear function
##  of corresponding overall predicted values.  Lines that
##  are shown for the different folds are approximate

## 
## fold 1 
## Observations in test set: 39 
##                      4      5     11    13    25     36    38    39     43
## Predicted        58485  42245  36154 13158 28701  76436 24359 66598  26840
## cvpred           60387  43734  37222 14041 29914  78364 25064 68488  28016
## AverageWinnings  40419  16202  20359  6026 38551  38345 16486 81787   4378
## CV residual     -19968 -27532 -16863 -8015  8637 -40019 -8578 13299 -23638
##                    53     72     73    75     76    78     81    92    111
## Predicted       75885  21414  51386 24402  38423 35204  44480 25326  21840
## cvpred          77844  22541  52517 25047  39942 36439  45976 26543  22865
## AverageWinnings 98230   5449  27230 34414  29452 27103   9660 36636   1031
## CV residual     20386 -17092 -25287  9367 -10490 -9336 -36316 10093 -21834
##                    114    116   119   120    125    127   128    136
## Predicted        78045  49317 16420 53838  61105  79546 23968  36245
## cvpred           80292  50772 17105 55446  63074  81665 24937  37432
## AverageWinnings  65268  33499 18582 78394  15806  32685 18188  19901
## CV residual     -15024 -17273  1477 22948 -47268 -48980 -6749 -17531
##                    141    145   152   159    160   162   164   169   183
## Predicted        89030  22910 66319 70580  49963 85138 29439 32655 29404
## cvpred           91471  24303 67895 72743  51464 87636 30536 33584 30339
## AverageWinnings  26149   8600 66836 72904  29286 98566 35182 48607 37292
## CV residual     -65322 -15703 -1059   161 -22178 10930  4646 15023  6953
##                    185   189   195   196
## Predicted        45477 34128 39104 56912
## cvpred           47057 34968 40411 58690
## AverageWinnings 113473 47589 51586 80590
## CV residual      66416 12621 11175 21900
## 
## Sum of squares = 2.42e+10    Mean square = 6.19e+08    n = 39 
## 
## fold 2 
## Observations in test set: 40 
##                     9     12     18     24    28     33    40     46
## Predicted       35483 -18850  69355  -4711 24810  69755 11019  69260
## cvpred          33301 -24396  70955 -11642 22696  70161  6869  70052
## AverageWinnings 25041  15727  36318   6664 72422  56206 15694  18862
## CV residual     -8260  40123 -34637  18306 49726 -13955  8825 -51190
##                     50     51     52     54     55     57     58     63
## Predicted       -13075  36888  47040  78483  46965  74033  -5071  57229
## cvpred          -19730  35601  46334  80013  47871  72493 -10340  59583
## AverageWinnings    850   6090  18441 128129  17010  32240  10014  46638
## CV residual      20580 -29511 -27893  48116 -30861 -40253  20354 -12945
##                     80     84     87    88     95      97     99    103
## Predicted        35759  31240  -9832 57591  61179 47192.6  48757  95981
## cvpred           35856  28690 -13251 56273  59835 43617.6  49847  96253
## AverageWinnings  25230   7315  13203 76394  11076 43714.0  25447  38960
## CV residual     -10626 -21375  26454 20121 -48759    96.4 -24400 -57293
##                    106   109   115   126    131    134    135   137   142
## Predicted        75599 57582 16277 -5394  89737  30793  61326 12802 18898
## cvpred           76715 57847 13982 -8484  91475  29010  62271  7942 15002
## AverageWinnings  56217 84871  7891 17657 125524  12065  25371 12350 29356
## CV residual     -20498 27024 -6091 26141  34049 -16945 -36900  4408 14354
##                    143   149   155   173   188   190    192
## Predicted        81989 28172 34567 46310 68124 11422  42476
## cvpred           82435 26261 32806 47290 68613  8907  41293
## AverageWinnings 262947 33335 96169 42544 70699 30166   4123
## CV residual     180512  7074 63363 -4746  2086 21259 -37170
## 
## Sum of squares = 6.75e+10    Mean square = 1.69e+09    n = 40 
## 
## fold 3 
## Observations in test set: 39 
##                    14     15    17     19    21     27    30     47     62
## Predicted       40206  43872 37187  41642 30892  84001 12103 109459   54.9
## cvpred          41345  47617 37401  47586 33750  91770 11507 113849 1113.3
## AverageWinnings 51794  19381 37175  15438 26980  48856 56783 125614 3200.0
## CV residual     10449 -28236  -226 -32148 -6770 -42914 45276  11765 2086.7
##                     66     67     68     69      71     74    77    79
## Predicted        77064  21690  77481  49906  112873  79629   260 21170
## cvpred           80277  20635  85765  54386  122808  85456 -4069 19806
## AverageWinnings  52460   9572  47573  15756   18290  57871  6004 28294
## CV residual     -27817 -11063 -38192 -38630 -104518 -27585 10073  8488
##                    89    90    93    101    104   105    108   113    117
## Predicted       40081 53160 47845  64228 108735 61603  27144 27331  43778
## cvpred          41137 57798 51759  70149 114670 65964  28000 28505  47666
## AverageWinnings 35987 86077 45105  41969  29121 61241  10044 34398   8548
## CV residual     -5150 28279 -6654 -28180 -85549 -4723 -17956  5893 -39118
##                    118   123    124   132   140   157   158    163   166
## Predicted       102722 76669  88983 23165 38409 34492 11821  62602 47156
## cvpred          111907 84265  93307 25840 40468 38174 10835  62929 49994
## AverageWinnings  45394 93751  31992 21250 37404 48253 18319 179956 41601
## CV residual     -66513  9486 -61315 -4590 -3064 10079  7484 117027 -8393
##                    178   179    180    191
## Predicted        98978 24475 122500  49729
## cvpred          103744 26618 131331  50132
## AverageWinnings  34626 46768 282393   6074
## CV residual     -69118 20150 151062 -44058
## 
## Sum of squares = 8.48e+10    Mean square = 2.17e+09    n = 39 
## 
## fold 4 
## Observations in test set: 39 
##                     6     7     10    20     23     26    29     32   35
## Predicted       56575 53855  84276 33699  66665  39646 20047  68739  454
## cvpred          56030 51049  80842 34194  63262  39048 22183  65674 4101
## AverageWinnings 58932 41833  62906 40638  20162  25973 80892 110068 7820
## CV residual      2902 -9216 -17936  6444 -43100 -13075 58709  44394 3719
##                     37     45     49     61    65     86    94     96
## Predicted        44554  26194  31244 114921 48605  29772 -6647 102210
## cvpred           45714  27381  35786 107225 48834  32138 -4292  96723
## AverageWinnings  23833  14200  15484 361702 72542  20001 49577 107250
## CV residual     -21881 -13181 -20302 254477 23708 -12137 53869  10527
##                     98   100   112   121    122    130    133    144   146
## Predicted        76701 29712 30579 11456  67707  57443 118380 115663 11679
## cvpred           71676 31677 31390 13485  65816  53225 113122 109542 17512
## AverageWinnings  24898 24936 28161  8519  29912   9776  53072 242848  8232
## CV residual     -46778 -6741 -3229 -4966 -35904 -43449 -60050 133306 -9280
##                    147    151    156    161   170    171   172    174
## Predicted        46713  48507  37144  55062  8528  50923 44527  36437
## cvpred           45636  48381  37497  55811 11892  51270 43496  37789
## AverageWinnings  14412  13563   7404  10610 19651  12162 79413   8315
## CV residual     -31224 -34818 -30093 -45201  7759 -39108 35917 -29474
##                    175    177   182    186   193
## Predicted        74210  64826 38571  54415 55971
## cvpred           69961  62893 39055  54152 52444
## AverageWinnings  16330 117969 66445  34926 43572
## CV residual     -53631  55076 27390 -19226 -8872
## 
## Sum of squares = 1.2e+11    Mean square = 3.09e+09    n = 39 
## 
## fold 5 
## Observations in test set: 39 
##                      1      2     3      8     16     22     31     34
## Predicted        55165 105295 39413  81390  75368  64814  29908 -14292
## cvpred           51385  98369 37666  74840  72906  61810  33564  -9498
## AverageWinnings  23440 232812 54729  38406  45481  38302   4178  20993
## CV residual     -27945 134443 17063 -36434 -27425 -23508 -29386  30491
##                    41    42     44    48     56     59    60    64   70
## Predicted       15127 17496  53268 14628  60024  50155 50748 76975 7283
## cvpred          14685 12992  47643 15238  59167  49333 50299 75623 8536
## AverageWinnings 13220 11269  23129  9787  15756  37158 57227 87257 8252
## CV residual     -1465 -1723 -24514 -5451 -43411 -12175  6928 11634 -284
##                     82     83    85    91   102   107   110    129  138
## Predicted        33807  31617 79421 72991 24337 59746 27998  54458 2502
## cvpred           33607  28857 73439 68413 25859 57887 26138  51030 2960
## AverageWinnings  19584  11349 90071 64589 67813 86574 21091  31531 9356
## CV residual     -14023 -17508 16632 -3824 41954 28687 -5047 -19499 6396
##                   139   148    150    153    154   165   167    168    176
## Predicted       26731 27825  40501  24459  23722 76934 -6817  68024  94821
## cvpred          23656 27594  37471  23141  25799 72237 -6165  62265  86351
## AverageWinnings 22616 58213  23399   8363  10782 88527 11339 122341 158938
## CV residual     -1040 30619 -14072 -14778 -15017 16290 17504  60076  72587
##                    181   184   187    194
## Predicted        62488 31182  3222  57663
## cvpred           57751 28760  3473  53160
## AverageWinnings  42623 25697 11308 376040
## CV residual     -15128 -3063  7835 322880
## 
## Sum of squares = 1.46e+11    Mean square = 3.74e+09    n = 39 
## 
## Overall (Sum over all 39 folds) 
##       ms 
## 2.26e+09

#MSPE

sum((PGA$AverageWinnings-KCV3$cvpred)^2)/5
## [1] 8.85e+10

PRESS can be calculated as

sum((PGA$AverageWinnings-KCV3$cvpred)^2)
## [1] 4.43e+11

Prediction R squared can be calculated as

1-sum((PGA$AverageWinnings-KCV3$cvpred)^2)/sum((PGA$AverageWinnings-mean(PGA$AverageWinnings))^2)
## [1] 0.249
summary(Model_Pga3)$r.squared
## [1] 0.275
  1. Compare the prediction R-squared obtained from the previous three questions. Based on the comparison, which model is preferred in terms of model validation? Compare each prediction R-squared with its own traditional R-squared. Which one is higher and why?

The predcition R -squared for the 2 question is the highest where Driving Accuracy (percent), Greens on Regulation (%), Average # of Putts, Save Percent, and # Events are the covariates.

The difference between predicted and traiditional r-squared should be the least which it is in the case of 2.

From the above inefernce we can conclude that model in question 2 is a good fitting model from the rest.

Part II

  1. The Iris flower data set iris.csv is a multivariate data set introduced by the British statistician and biologist Ronald Fisher. It consists of 50 samples from each of three species of Iris (Iris setosa, Iris virginica and Iris versicolor). Four features were measured from each sample: the length and the width of the sepals and petals, in centimeters. Please read the data set into R and visualize the data.
Iris_Df <- read.csv("C:/Users/bhuvi iyer/Downloads/iris.csv",h=T)
Iris_Df
##       X Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1     1          5.1         3.5          1.4         0.2     setosa
## 2     2          4.9         3.0          1.4         0.2     setosa
## 3     3          4.7         3.2          1.3         0.2     setosa
## 4     4          4.6         3.1          1.5         0.2     setosa
## 5     5          5.0         3.6          1.4         0.2     setosa
## 6     6          5.4         3.9          1.7         0.4     setosa
## 7     7          4.6         3.4          1.4         0.3     setosa
## 8     8          5.0         3.4          1.5         0.2     setosa
## 9     9          4.4         2.9          1.4         0.2     setosa
## 10   10          4.9         3.1          1.5         0.1     setosa
## 11   11          5.4         3.7          1.5         0.2     setosa
## 12   12          4.8         3.4          1.6         0.2     setosa
## 13   13          4.8         3.0          1.4         0.1     setosa
## 14   14          4.3         3.0          1.1         0.1     setosa
## 15   15          5.8         4.0          1.2         0.2     setosa
## 16   16          5.7         4.4          1.5         0.4     setosa
## 17   17          5.4         3.9          1.3         0.4     setosa
## 18   18          5.1         3.5          1.4         0.3     setosa
## 19   19          5.7         3.8          1.7         0.3     setosa
## 20   20          5.1         3.8          1.5         0.3     setosa
## 21   21          5.4         3.4          1.7         0.2     setosa
## 22   22          5.1         3.7          1.5         0.4     setosa
## 23   23          4.6         3.6          1.0         0.2     setosa
## 24   24          5.1         3.3          1.7         0.5     setosa
## 25   25          4.8         3.4          1.9         0.2     setosa
## 26   26          5.0         3.0          1.6         0.2     setosa
## 27   27          5.0         3.4          1.6         0.4     setosa
## 28   28          5.2         3.5          1.5         0.2     setosa
## 29   29          5.2         3.4          1.4         0.2     setosa
## 30   30          4.7         3.2          1.6         0.2     setosa
## 31   31          4.8         3.1          1.6         0.2     setosa
## 32   32          5.4         3.4          1.5         0.4     setosa
## 33   33          5.2         4.1          1.5         0.1     setosa
## 34   34          5.5         4.2          1.4         0.2     setosa
## 35   35          4.9         3.1          1.5         0.2     setosa
## 36   36          5.0         3.2          1.2         0.2     setosa
## 37   37          5.5         3.5          1.3         0.2     setosa
## 38   38          4.9         3.6          1.4         0.1     setosa
## 39   39          4.4         3.0          1.3         0.2     setosa
## 40   40          5.1         3.4          1.5         0.2     setosa
## 41   41          5.0         3.5          1.3         0.3     setosa
## 42   42          4.5         2.3          1.3         0.3     setosa
## 43   43          4.4         3.2          1.3         0.2     setosa
## 44   44          5.0         3.5          1.6         0.6     setosa
## 45   45          5.1         3.8          1.9         0.4     setosa
## 46   46          4.8         3.0          1.4         0.3     setosa
## 47   47          5.1         3.8          1.6         0.2     setosa
## 48   48          4.6         3.2          1.4         0.2     setosa
## 49   49          5.3         3.7          1.5         0.2     setosa
## 50   50          5.0         3.3          1.4         0.2     setosa
## 51   51          7.0         3.2          4.7         1.4 versicolor
## 52   52          6.4         3.2          4.5         1.5 versicolor
## 53   53          6.9         3.1          4.9         1.5 versicolor
## 54   54          5.5         2.3          4.0         1.3 versicolor
## 55   55          6.5         2.8          4.6         1.5 versicolor
## 56   56          5.7         2.8          4.5         1.3 versicolor
## 57   57          6.3         3.3          4.7         1.6 versicolor
## 58   58          4.9         2.4          3.3         1.0 versicolor
## 59   59          6.6         2.9          4.6         1.3 versicolor
## 60   60          5.2         2.7          3.9         1.4 versicolor
## 61   61          5.0         2.0          3.5         1.0 versicolor
## 62   62          5.9         3.0          4.2         1.5 versicolor
## 63   63          6.0         2.2          4.0         1.0 versicolor
## 64   64          6.1         2.9          4.7         1.4 versicolor
## 65   65          5.6         2.9          3.6         1.3 versicolor
## 66   66          6.7         3.1          4.4         1.4 versicolor
## 67   67          5.6         3.0          4.5         1.5 versicolor
## 68   68          5.8         2.7          4.1         1.0 versicolor
## 69   69          6.2         2.2          4.5         1.5 versicolor
## 70   70          5.6         2.5          3.9         1.1 versicolor
## 71   71          5.9         3.2          4.8         1.8 versicolor
## 72   72          6.1         2.8          4.0         1.3 versicolor
## 73   73          6.3         2.5          4.9         1.5 versicolor
## 74   74          6.1         2.8          4.7         1.2 versicolor
## 75   75          6.4         2.9          4.3         1.3 versicolor
## 76   76          6.6         3.0          4.4         1.4 versicolor
## 77   77          6.8         2.8          4.8         1.4 versicolor
## 78   78          6.7         3.0          5.0         1.7 versicolor
## 79   79          6.0         2.9          4.5         1.5 versicolor
## 80   80          5.7         2.6          3.5         1.0 versicolor
## 81   81          5.5         2.4          3.8         1.1 versicolor
## 82   82          5.5         2.4          3.7         1.0 versicolor
## 83   83          5.8         2.7          3.9         1.2 versicolor
## 84   84          6.0         2.7          5.1         1.6 versicolor
## 85   85          5.4         3.0          4.5         1.5 versicolor
## 86   86          6.0         3.4          4.5         1.6 versicolor
## 87   87          6.7         3.1          4.7         1.5 versicolor
## 88   88          6.3         2.3          4.4         1.3 versicolor
## 89   89          5.6         3.0          4.1         1.3 versicolor
## 90   90          5.5         2.5          4.0         1.3 versicolor
## 91   91          5.5         2.6          4.4         1.2 versicolor
## 92   92          6.1         3.0          4.6         1.4 versicolor
## 93   93          5.8         2.6          4.0         1.2 versicolor
## 94   94          5.0         2.3          3.3         1.0 versicolor
## 95   95          5.6         2.7          4.2         1.3 versicolor
## 96   96          5.7         3.0          4.2         1.2 versicolor
## 97   97          5.7         2.9          4.2         1.3 versicolor
## 98   98          6.2         2.9          4.3         1.3 versicolor
## 99   99          5.1         2.5          3.0         1.1 versicolor
## 100 100          5.7         2.8          4.1         1.3 versicolor
## 101 101          6.3         3.3          6.0         2.5  virginica
## 102 102          5.8         2.7          5.1         1.9  virginica
## 103 103          7.1         3.0          5.9         2.1  virginica
## 104 104          6.3         2.9          5.6         1.8  virginica
## 105 105          6.5         3.0          5.8         2.2  virginica
## 106 106          7.6         3.0          6.6         2.1  virginica
## 107 107          4.9         2.5          4.5         1.7  virginica
## 108 108          7.3         2.9          6.3         1.8  virginica
## 109 109          6.7         2.5          5.8         1.8  virginica
## 110 110          7.2         3.6          6.1         2.5  virginica
## 111 111          6.5         3.2          5.1         2.0  virginica
## 112 112          6.4         2.7          5.3         1.9  virginica
## 113 113          6.8         3.0          5.5         2.1  virginica
## 114 114          5.7         2.5          5.0         2.0  virginica
## 115 115          5.8         2.8          5.1         2.4  virginica
## 116 116          6.4         3.2          5.3         2.3  virginica
## 117 117          6.5         3.0          5.5         1.8  virginica
## 118 118          7.7         3.8          6.7         2.2  virginica
## 119 119          7.7         2.6          6.9         2.3  virginica
## 120 120          6.0         2.2          5.0         1.5  virginica
## 121 121          6.9         3.2          5.7         2.3  virginica
## 122 122          5.6         2.8          4.9         2.0  virginica
## 123 123          7.7         2.8          6.7         2.0  virginica
## 124 124          6.3         2.7          4.9         1.8  virginica
## 125 125          6.7         3.3          5.7         2.1  virginica
## 126 126          7.2         3.2          6.0         1.8  virginica
## 127 127          6.2         2.8          4.8         1.8  virginica
## 128 128          6.1         3.0          4.9         1.8  virginica
## 129 129          6.4         2.8          5.6         2.1  virginica
## 130 130          7.2         3.0          5.8         1.6  virginica
## 131 131          7.4         2.8          6.1         1.9  virginica
## 132 132          7.9         3.8          6.4         2.0  virginica
## 133 133          6.4         2.8          5.6         2.2  virginica
## 134 134          6.3         2.8          5.1         1.5  virginica
## 135 135          6.1         2.6          5.6         1.4  virginica
## 136 136          7.7         3.0          6.1         2.3  virginica
## 137 137          6.3         3.4          5.6         2.4  virginica
## 138 138          6.4         3.1          5.5         1.8  virginica
## 139 139          6.0         3.0          4.8         1.8  virginica
## 140 140          6.9         3.1          5.4         2.1  virginica
## 141 141          6.7         3.1          5.6         2.4  virginica
## 142 142          6.9         3.1          5.1         2.3  virginica
## 143 143          5.8         2.7          5.1         1.9  virginica
## 144 144          6.8         3.2          5.9         2.3  virginica
## 145 145          6.7         3.3          5.7         2.5  virginica
## 146 146          6.7         3.0          5.2         2.3  virginica
## 147 147          6.3         2.5          5.0         1.9  virginica
## 148 148          6.5         3.0          5.2         2.0  virginica
## 149 149          6.2         3.4          5.4         2.3  virginica
## 150 150          5.9         3.0          5.1         1.8  virginica
  1. Build a linear regression using Sepal.Length as response variable and Sepal.Width as covariate. Interpret the coefficient.
model_Iris <- lm(Sepal.Length ~ Sepal.Width, data = Iris_Df)
summary(model_Iris)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = Iris_Df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.556 -0.633 -0.112  0.558  2.223 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.526      0.479   13.63   <2e-16 ***
## Sepal.Width   -0.223      0.155   -1.44     0.15    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.825 on 148 degrees of freedom
## Multiple R-squared:  0.0138, Adjusted R-squared:  0.00716 
## F-statistic: 2.07 on 1 and 148 DF,  p-value: 0.152
  1. Repeat the same analysis for only the species of setosa: Build a linear regression using Sepal.Length as response variable and Sepal.Width as covariate, and interpret the coefficient.

Filtering data for only setosa

Setosa_Df <- Iris_Df[Iris_Df$Species == "setosa",]
head(Setosa_Df)
##   X Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 1          5.1         3.5          1.4         0.2  setosa
## 2 2          4.9         3.0          1.4         0.2  setosa
## 3 3          4.7         3.2          1.3         0.2  setosa
## 4 4          4.6         3.1          1.5         0.2  setosa
## 5 5          5.0         3.6          1.4         0.2  setosa
## 6 6          5.4         3.9          1.7         0.4  setosa

Model for the filtered dataframe

model_Iris_Setosa <- lm(Sepal.Length ~ Sepal.Width, data = Setosa_Df)
summary(model_Iris_Setosa)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = Setosa_Df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5248 -0.1629  0.0217  0.1383  0.4443 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.6390     0.3100    8.51  3.7e-11 ***
## Sepal.Width   0.6905     0.0899    7.68  6.7e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.239 on 48 degrees of freedom
## Multiple R-squared:  0.551,  Adjusted R-squared:  0.542 
## F-statistic:   59 on 1 and 48 DF,  p-value: 6.71e-10
  1. Repeat the same analysis for only the species of versicolor.

Filtering data for only versicolor.

Versicolor_Df <- Iris_Df[Iris_Df$Species == "versicolor",]
head(Versicolor_Df)
##     X Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 51 51          7.0         3.2          4.7         1.4 versicolor
## 52 52          6.4         3.2          4.5         1.5 versicolor
## 53 53          6.9         3.1          4.9         1.5 versicolor
## 54 54          5.5         2.3          4.0         1.3 versicolor
## 55 55          6.5         2.8          4.6         1.5 versicolor
## 56 56          5.7         2.8          4.5         1.3 versicolor

Model for the filtered dataframe

model_Iris_versicolor <- lm(Sepal.Length ~ Sepal.Width, data = Versicolor_Df)
summary(model_Iris_versicolor)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = Versicolor_Df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7350 -0.2856 -0.0754  0.4367  0.8380 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.540      0.563    6.29  9.1e-08 ***
## Sepal.Width    0.865      0.202    4.28  8.8e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.444 on 48 degrees of freedom
## Multiple R-squared:  0.277,  Adjusted R-squared:  0.262 
## F-statistic: 18.4 on 1 and 48 DF,  p-value: 8.77e-05
  1. Repeat the same analysis for only the species of virginica.
Virginica_df <- Iris_Df[Iris_Df$Species == "virginica",]
head(Virginica_df)
##       X Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 101 101          6.3         3.3          6.0         2.5 virginica
## 102 102          5.8         2.7          5.1         1.9 virginica
## 103 103          7.1         3.0          5.9         2.1 virginica
## 104 104          6.3         2.9          5.6         1.8 virginica
## 105 105          6.5         3.0          5.8         2.2 virginica
## 106 106          7.6         3.0          6.6         2.1 virginica
model_Iris_virginica <- lm(Sepal.Length ~ Sepal.Width, data = Virginica_df)
summary(model_Iris_virginica)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width, data = Virginica_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2607 -0.3692 -0.0361  0.1984  1.4492 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.907      0.757    5.16  4.7e-06 ***
## Sepal.Width    0.902      0.253    3.56  0.00084 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.571 on 48 degrees of freedom
## Multiple R-squared:  0.209,  Adjusted R-squared:  0.193 
## F-statistic: 12.7 on 1 and 48 DF,  p-value: 0.000843
  1. Compare the three results from questions 3,4,5 with the results in question 2. Are these results consistent? Why?

In question 2 the sepal length and width do not seem to be correlated by obeserving the p-value but in the case where species are grouped individually and modelled the regression seems show the different result.The model created by separating is more accurate and has a appropriate p-value.

  1. Build a simple linear regression using Sepal.Length as response variable and Petal.Length as covariate. If we run the regression for all three species combined what is the estimated slope? If we run the regression for all three species separately what are the estimated slopes respectively? Are these results consistent? Why?
colors <- c("#00AFBB", "#E7B800", "#FC4E07")
colors <- colors[as.numeric(Iris_Df$Species)]

shapes = c(16, 17, 18) 
shapes <- shapes[as.numeric(Iris_Df$Species)]

Model_Iris_2 <- lm(Sepal.Length ~ Petal.Length, data=Iris_Df) # obtain least square estimate
plot(Iris_Df$Petal.Length,Iris_Df$Sepal.Length,pch=shapes,col=colors,frame = FALSE) 
abline(Model_Iris_2)
legend("topright", legend = levels(Iris_Df$Species),
      col =  c("#00AFBB", "#E7B800", "#FC4E07"),
      pch = c(16, 17, 18) )

summary(Model_Iris_2)
## 
## Call:
## lm(formula = Sepal.Length ~ Petal.Length, data = Iris_Df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2468 -0.2966 -0.0152  0.2768  1.0027 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    4.3066     0.0784    54.9   <2e-16 ***
## Petal.Length   0.4089     0.0189    21.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.407 on 148 degrees of freedom
## Multiple R-squared:  0.76,   Adjusted R-squared:  0.758 
## F-statistic:  469 on 1 and 148 DF,  p-value: <2e-16

Analysis for setosa

model_Iris_Setosa_2 <- lm(Sepal.Length ~ Petal.Length, data = Setosa_Df)
plot(Setosa_Df$Petal.Length,Setosa_Df$Sepal.Length,pch=20) 
abline(model_Iris_Setosa_2)

summary(model_Iris_Setosa_2)
## 
## Call:
## lm(formula = Sepal.Length ~ Petal.Length, data = Setosa_Df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5724 -0.2067 -0.0308  0.1734  0.9361 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.213      0.416   10.14  1.6e-13 ***
## Petal.Length    0.542      0.282    1.92    0.061 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.343 on 48 degrees of freedom
## Multiple R-squared:  0.0714, Adjusted R-squared:  0.052 
## F-statistic: 3.69 on 1 and 48 DF,  p-value: 0.0607

Analysis for Versicolor

model_Iris_versicolor_2 <- lm(Sepal.Length ~ Petal.Length, data = Versicolor_Df)
plot(Versicolor_Df$Petal.Length,Versicolor_Df$Sepal.Length,pch=20) 
abline(model_Iris_versicolor_2)

summary(model_Iris_versicolor_2)
## 
## Call:
## lm(formula = Sepal.Length ~ Petal.Length, data = Versicolor_Df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7348 -0.2027 -0.0206  0.2609  0.6996 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.408      0.446    5.39  2.1e-06 ***
## Petal.Length    0.828      0.104    7.95  2.6e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.343 on 48 degrees of freedom
## Multiple R-squared:  0.569,  Adjusted R-squared:  0.56 
## F-statistic: 63.3 on 1 and 48 DF,  p-value: 2.59e-10

Analysis for Versicolor

model_Iris_virginica_2 <- lm(Sepal.Length ~ Petal.Length, data = Virginica_df)
plot(Virginica_df$Petal.Length,Virginica_df$Sepal.Length,pch=20) 
abline(model_Iris_virginica_2)

summary(model_Iris_virginica_2)
## 
## Call:
## lm(formula = Sepal.Length ~ Petal.Length, data = Virginica_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7341 -0.2364 -0.0313  0.2377  0.7621 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.0597     0.4668    2.27    0.028 *  
## Petal.Length   0.9957     0.0837   11.90  6.3e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.323 on 48 degrees of freedom
## Multiple R-squared:  0.747,  Adjusted R-squared:  0.742 
## F-statistic:  142 on 1 and 48 DF,  p-value: 6.3e-16

The slope of the data combining all three spicies is 0.4089.The slop for setosa specie is 0.542, Versicolor is 0.828 and Verginica is 0.9957.

The slope in all the cases are positive which shows that all the function run uphill.