startups <- read.csv("D:\\DataScience\\Assignments\\MultiLinearRegression\\50_Startups.csv")
View(startups)
contrasts(startups$State)
##            Florida New York
## California       0        0
## Florida          1        0
## New York         0        1
startups$copyofState <- NA
startups$copyofState[startups$State=="California"]=0
startups$copyofState[startups$State=="New York"]=1
startups$copyofState[startups$State=="Florida"]=2

STR <- startups[,-4]

View(STR)
attach(STR)
plot(STR)

#install.packages("corpcor")
cor(STR[,-5])
##                 R.D.Spend Administration Marketing.Spend    Profit
## R.D.Spend       1.0000000     0.24195525      0.72424813 0.9729005
## Administration  0.2419552     1.00000000     -0.03215388 0.2007166
## Marketing.Spend 0.7242481    -0.03215388      1.00000000 0.7477657
## Profit          0.9729005     0.20071657      0.74776572 1.0000000
#cor2pcor(cor(STR[,-5]))
MLQ1 <- lm(Profit ~ R.D.Spend+Administration+Marketing.Spend)
summary(MLQ1)
## 
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33534  -4795     63   6606  17275 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.012e+04  6.572e+03   7.626 1.06e-09 ***
## R.D.Spend        8.057e-01  4.515e-02  17.846  < 2e-16 ***
## Administration  -2.682e-02  5.103e-02  -0.526    0.602    
## Marketing.Spend  2.723e-02  1.645e-02   1.655    0.105    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9232 on 46 degrees of freedom
## Multiple R-squared:  0.9507, Adjusted R-squared:  0.9475 
## F-statistic:   296 on 3 and 46 DF,  p-value: < 2.2e-16
MLQ1a <- lm(Profit ~ Administration+Marketing.Spend)

summary(MLQ1a)
## 
## Call:
## lm(formula = Profit ~ Administration + Marketing.Spend)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -82155 -12168   2836  13650  56472 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.022e+04  1.770e+04   1.143   0.2589    
## Administration  3.237e-01  1.312e-01   2.468   0.0173 *  
## Marketing.Spend 2.488e-01  3.005e-02   8.281 9.73e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25710 on 47 degrees of freedom
## Multiple R-squared:  0.6097, Adjusted R-squared:  0.5931 
## F-statistic: 36.71 on 2 and 47 DF,  p-value: 2.496e-10
influence.measures(MLQ1)
## Influence measures of
##   lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend) :
## 
##       dfb.1_  dfb.R.D.  dfb.Admn  dfb.Mr.S    dffit cov.r   cook.d    hat
## 1   0.004431 -0.002040 -0.001828 -4.86e-03 -0.01122 1.248 3.22e-05 0.1249
## 2  -0.062584  0.018541  0.041065  4.46e-02  0.11333 1.236 3.28e-03 0.1235
## 3   0.081857  0.182130 -0.165823  2.39e-02  0.36009 1.119 3.24e-02 0.1103
## 4  -0.005134  0.128991 -0.056777  4.47e-02  0.29500 1.074 2.17e-02 0.0748
## 5  -0.103057 -0.136473  0.150239  1.97e-02 -0.23928 1.178 1.45e-02 0.1103
## 6  -0.069716 -0.097046  0.111396 -1.46e-02 -0.21541 1.129 1.17e-02 0.0782
## 7  -0.010014 -0.091486  0.004568  8.48e-02 -0.10692 1.312 2.92e-03 0.1720
## 8   0.054198 -0.035776 -0.043622 -1.64e-02 -0.11841 1.138 3.57e-03 0.0591
## 9  -0.006926  0.001890  0.006332  2.84e-03  0.01258 1.156 4.04e-05 0.0556
## 10 -0.046927 -0.083594  0.065087  2.18e-02 -0.13850 1.125 4.87e-03 0.0563
## 11  0.124367  0.164282 -0.123463 -1.09e-01  0.24686 1.009 1.51e-02 0.0420
## 12  0.181220  0.143325 -0.193694 -8.15e-02  0.25563 1.074 1.64e-02 0.0647
## 13 -0.010018  0.046846  0.023103  7.05e-03  0.21705 0.948 1.16e-02 0.0242
## 14 -0.039982  0.002448  0.050526  2.34e-02  0.12610 1.069 4.01e-03 0.0278
## 15  0.256593 -0.181700 -0.268033  5.91e-02 -0.51010 0.842 6.13e-02 0.0638
## 16 -0.062230 -0.242883  0.078834  1.10e-01 -0.37839 0.848 3.40e-02 0.0401
## 17 -0.010617 -0.057633  0.025599  8.95e-02  0.18305 1.007 8.34e-03 0.0265
## 18  0.065688  0.024044 -0.071189 -4.93e-02 -0.11408 1.114 3.31e-03 0.0443
## 19 -0.011150  0.004966  0.014323 -3.72e-02 -0.09238 1.100 2.17e-03 0.0308
## 20  0.043989  0.261698  0.035221 -3.47e-01  0.40250 1.252 4.07e-02 0.1822
## 21  0.000102 -0.018343  0.000474  2.74e-02  0.04081 1.133 4.25e-04 0.0397
## 22  0.140627  0.117170 -0.155388 -1.39e-01 -0.20837 1.148 1.10e-02 0.0866
## 23  0.024966  0.061810 -0.028686 -8.15e-02 -0.10712 1.123 2.92e-03 0.0476
## 24 -0.003676  0.017598  0.003252 -2.33e-02 -0.03287 1.151 2.76e-04 0.0529
## 25 -0.109661 -0.087336  0.092409  9.71e-02 -0.14097 1.139 5.05e-03 0.0650
## 26 -0.010284  0.008784  0.044543 -3.78e-02  0.10875 1.101 3.00e-03 0.0356
## 27  0.013462 -0.034455 -0.044048  5.70e-02 -0.11604 1.115 3.42e-03 0.0449
## 28  0.132540  0.239831 -0.132426 -3.05e-01 -0.34604 1.090 2.98e-02 0.0944
## 29 -0.043457 -0.008583  0.060294 -6.42e-03  0.07179 1.250 1.32e-03 0.1292
## 30  0.005453 -0.002942 -0.011993  9.09e-03 -0.02221 1.162 1.26e-04 0.0607
## 31  0.007462  0.006378 -0.004159 -9.90e-03  0.01280 1.154 4.19e-05 0.0542
## 32  0.001163 -0.000866 -0.003027  2.78e-03 -0.00602 1.169 9.27e-06 0.0661
## 33 -0.019167 -0.028418  0.005176  4.45e-02 -0.05268 1.186 7.09e-04 0.0822
## 34 -0.011897  0.006926  0.008144 -5.33e-03 -0.02239 1.126 1.28e-04 0.0317
## 35 -0.173631 -0.193610  0.231928  1.50e-01  0.28895 1.133 2.10e-02 0.0987
## 36  0.123050 -0.034659 -0.103225  2.06e-02  0.16264 1.114 6.70e-03 0.0570
## 37 -0.090124 -0.402279  0.203553  2.91e-01  0.48212 0.896 5.55e-02 0.0690
## 38  0.014721  0.000948 -0.014072 -1.47e-03  0.01601 1.283 6.55e-05 0.1489
## 39  0.369985 -0.154973 -0.312156  8.05e-02  0.47799 1.053 5.62e-02 0.1132
## 40 -0.064018  0.013047  0.051627  3.92e-05 -0.07778 1.155 1.54e-03 0.0624
## 41  0.006168 -0.062763  0.018554  3.59e-02  0.08518 1.133 1.85e-03 0.0487
## 42  0.065951 -0.028739 -0.048818  8.06e-03  0.08804 1.157 1.98e-03 0.0655
## 43  0.014072 -0.010602 -0.007914  2.93e-03  0.02308 1.153 1.36e-04 0.0539
## 44  0.069222 -0.068538  0.051283 -9.75e-02  0.28724 1.056 2.06e-02 0.0664
## 45 -0.004343 -0.005425  0.012671 -5.46e-03  0.02195 1.211 1.23e-04 0.0984
## 46  0.183015 -0.169881  0.078009 -2.10e-01  0.62093 0.844 9.03e-02 0.0862
## 47  0.096322  0.434681 -0.145428 -3.90e-01 -0.46543 1.354 5.45e-02 0.2406
## 48 -0.014007  0.049791 -0.046708  3.73e-02 -0.14485 1.187 5.34e-03 0.0962
## 49 -0.861899 -0.141175  0.693163  4.29e-01 -0.89031 1.093 1.91e-01 0.2180
## 50 -0.409479  0.599022 -0.107675  1.44e-01 -1.27816 0.268 2.88e-01 0.0748
##    inf
## 1     
## 2     
## 3     
## 4     
## 5     
## 6     
## 7    *
## 8     
## 9     
## 10    
## 11    
## 12    
## 13    
## 14    
## 15    
## 16    
## 17    
## 18    
## 19    
## 20    
## 21    
## 22    
## 23    
## 24    
## 25    
## 26    
## 27    
## 28    
## 29    
## 30    
## 31    
## 32    
## 33    
## 34    
## 35    
## 36    
## 37    
## 38   *
## 39    
## 40    
## 41    
## 42    
## 43    
## 44    
## 45    
## 46    
## 47   *
## 48    
## 49   *
## 50   *
#influenceIndexPlot(MLQ1a)

#influencePlot(MLQ1,id.n=3)
MLQ1b <- lm(Profit ~ R.D.Spend+Administration+Marketing.Spend,
   
                              data =STR[-c(46,47,49,50),])

summary(MLQ1b)
## 
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend, 
##     data = STR[-c(46, 47, 49, 50), ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15958.4  -5119.9   -919.9   6340.9  12986.9 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.690e+04  5.788e+03   9.830 1.87e-12 ***
## R.D.Spend        7.714e-01  3.993e-02  19.320  < 2e-16 ***
## Administration  -5.398e-02  4.303e-02  -1.254   0.2166    
## Marketing.Spend  2.771e-02  1.446e-02   1.916   0.0622 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7089 on 42 degrees of freedom
## Multiple R-squared:  0.9626, Adjusted R-squared:  0.9599 
## F-statistic: 360.3 on 3 and 42 DF,  p-value: < 2.2e-16
#vif(MLQ1)

#avPlots(MLQ1)
FINALmODEL50 <- lm(Profit~R.D.Spend+Marketing.Spend)

summary(FINALmODEL50)
## 
## Call:
## lm(formula = Profit ~ R.D.Spend + Marketing.Spend)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33645  -4632   -414   6484  17097 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.698e+04  2.690e+03  17.464   <2e-16 ***
## R.D.Spend       7.966e-01  4.135e-02  19.266   <2e-16 ***
## Marketing.Spend 2.991e-02  1.552e-02   1.927     0.06 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9161 on 47 degrees of freedom
## Multiple R-squared:  0.9505, Adjusted R-squared:  0.9483 
## F-statistic: 450.8 on 2 and 47 DF,  p-value: < 2.2e-16
plot(FINALmODEL50)

#qqPlot(MLQ1,id.no=5)
#x<- stepAIC(MLQ1)

#x