startups <- read.csv("D:\\DataScience\\Assignments\\MultiLinearRegression\\50_Startups.csv")
View(startups)
contrasts(startups$State)
## Florida New York
## California 0 0
## Florida 1 0
## New York 0 1
startups$copyofState <- NA
startups$copyofState[startups$State=="California"]=0
startups$copyofState[startups$State=="New York"]=1
startups$copyofState[startups$State=="Florida"]=2
STR <- startups[,-4]
View(STR)
attach(STR)
plot(STR)

#install.packages("corpcor")
cor(STR[,-5])
## R.D.Spend Administration Marketing.Spend Profit
## R.D.Spend 1.0000000 0.24195525 0.72424813 0.9729005
## Administration 0.2419552 1.00000000 -0.03215388 0.2007166
## Marketing.Spend 0.7242481 -0.03215388 1.00000000 0.7477657
## Profit 0.9729005 0.20071657 0.74776572 1.0000000
#cor2pcor(cor(STR[,-5]))
MLQ1 <- lm(Profit ~ R.D.Spend+Administration+Marketing.Spend)
summary(MLQ1)
##
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33534 -4795 63 6606 17275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.012e+04 6.572e+03 7.626 1.06e-09 ***
## R.D.Spend 8.057e-01 4.515e-02 17.846 < 2e-16 ***
## Administration -2.682e-02 5.103e-02 -0.526 0.602
## Marketing.Spend 2.723e-02 1.645e-02 1.655 0.105
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9232 on 46 degrees of freedom
## Multiple R-squared: 0.9507, Adjusted R-squared: 0.9475
## F-statistic: 296 on 3 and 46 DF, p-value: < 2.2e-16
MLQ1a <- lm(Profit ~ Administration+Marketing.Spend)
summary(MLQ1a)
##
## Call:
## lm(formula = Profit ~ Administration + Marketing.Spend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -82155 -12168 2836 13650 56472
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.022e+04 1.770e+04 1.143 0.2589
## Administration 3.237e-01 1.312e-01 2.468 0.0173 *
## Marketing.Spend 2.488e-01 3.005e-02 8.281 9.73e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25710 on 47 degrees of freedom
## Multiple R-squared: 0.6097, Adjusted R-squared: 0.5931
## F-statistic: 36.71 on 2 and 47 DF, p-value: 2.496e-10
influence.measures(MLQ1)
## Influence measures of
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend) :
##
## dfb.1_ dfb.R.D. dfb.Admn dfb.Mr.S dffit cov.r cook.d hat
## 1 0.004431 -0.002040 -0.001828 -4.86e-03 -0.01122 1.248 3.22e-05 0.1249
## 2 -0.062584 0.018541 0.041065 4.46e-02 0.11333 1.236 3.28e-03 0.1235
## 3 0.081857 0.182130 -0.165823 2.39e-02 0.36009 1.119 3.24e-02 0.1103
## 4 -0.005134 0.128991 -0.056777 4.47e-02 0.29500 1.074 2.17e-02 0.0748
## 5 -0.103057 -0.136473 0.150239 1.97e-02 -0.23928 1.178 1.45e-02 0.1103
## 6 -0.069716 -0.097046 0.111396 -1.46e-02 -0.21541 1.129 1.17e-02 0.0782
## 7 -0.010014 -0.091486 0.004568 8.48e-02 -0.10692 1.312 2.92e-03 0.1720
## 8 0.054198 -0.035776 -0.043622 -1.64e-02 -0.11841 1.138 3.57e-03 0.0591
## 9 -0.006926 0.001890 0.006332 2.84e-03 0.01258 1.156 4.04e-05 0.0556
## 10 -0.046927 -0.083594 0.065087 2.18e-02 -0.13850 1.125 4.87e-03 0.0563
## 11 0.124367 0.164282 -0.123463 -1.09e-01 0.24686 1.009 1.51e-02 0.0420
## 12 0.181220 0.143325 -0.193694 -8.15e-02 0.25563 1.074 1.64e-02 0.0647
## 13 -0.010018 0.046846 0.023103 7.05e-03 0.21705 0.948 1.16e-02 0.0242
## 14 -0.039982 0.002448 0.050526 2.34e-02 0.12610 1.069 4.01e-03 0.0278
## 15 0.256593 -0.181700 -0.268033 5.91e-02 -0.51010 0.842 6.13e-02 0.0638
## 16 -0.062230 -0.242883 0.078834 1.10e-01 -0.37839 0.848 3.40e-02 0.0401
## 17 -0.010617 -0.057633 0.025599 8.95e-02 0.18305 1.007 8.34e-03 0.0265
## 18 0.065688 0.024044 -0.071189 -4.93e-02 -0.11408 1.114 3.31e-03 0.0443
## 19 -0.011150 0.004966 0.014323 -3.72e-02 -0.09238 1.100 2.17e-03 0.0308
## 20 0.043989 0.261698 0.035221 -3.47e-01 0.40250 1.252 4.07e-02 0.1822
## 21 0.000102 -0.018343 0.000474 2.74e-02 0.04081 1.133 4.25e-04 0.0397
## 22 0.140627 0.117170 -0.155388 -1.39e-01 -0.20837 1.148 1.10e-02 0.0866
## 23 0.024966 0.061810 -0.028686 -8.15e-02 -0.10712 1.123 2.92e-03 0.0476
## 24 -0.003676 0.017598 0.003252 -2.33e-02 -0.03287 1.151 2.76e-04 0.0529
## 25 -0.109661 -0.087336 0.092409 9.71e-02 -0.14097 1.139 5.05e-03 0.0650
## 26 -0.010284 0.008784 0.044543 -3.78e-02 0.10875 1.101 3.00e-03 0.0356
## 27 0.013462 -0.034455 -0.044048 5.70e-02 -0.11604 1.115 3.42e-03 0.0449
## 28 0.132540 0.239831 -0.132426 -3.05e-01 -0.34604 1.090 2.98e-02 0.0944
## 29 -0.043457 -0.008583 0.060294 -6.42e-03 0.07179 1.250 1.32e-03 0.1292
## 30 0.005453 -0.002942 -0.011993 9.09e-03 -0.02221 1.162 1.26e-04 0.0607
## 31 0.007462 0.006378 -0.004159 -9.90e-03 0.01280 1.154 4.19e-05 0.0542
## 32 0.001163 -0.000866 -0.003027 2.78e-03 -0.00602 1.169 9.27e-06 0.0661
## 33 -0.019167 -0.028418 0.005176 4.45e-02 -0.05268 1.186 7.09e-04 0.0822
## 34 -0.011897 0.006926 0.008144 -5.33e-03 -0.02239 1.126 1.28e-04 0.0317
## 35 -0.173631 -0.193610 0.231928 1.50e-01 0.28895 1.133 2.10e-02 0.0987
## 36 0.123050 -0.034659 -0.103225 2.06e-02 0.16264 1.114 6.70e-03 0.0570
## 37 -0.090124 -0.402279 0.203553 2.91e-01 0.48212 0.896 5.55e-02 0.0690
## 38 0.014721 0.000948 -0.014072 -1.47e-03 0.01601 1.283 6.55e-05 0.1489
## 39 0.369985 -0.154973 -0.312156 8.05e-02 0.47799 1.053 5.62e-02 0.1132
## 40 -0.064018 0.013047 0.051627 3.92e-05 -0.07778 1.155 1.54e-03 0.0624
## 41 0.006168 -0.062763 0.018554 3.59e-02 0.08518 1.133 1.85e-03 0.0487
## 42 0.065951 -0.028739 -0.048818 8.06e-03 0.08804 1.157 1.98e-03 0.0655
## 43 0.014072 -0.010602 -0.007914 2.93e-03 0.02308 1.153 1.36e-04 0.0539
## 44 0.069222 -0.068538 0.051283 -9.75e-02 0.28724 1.056 2.06e-02 0.0664
## 45 -0.004343 -0.005425 0.012671 -5.46e-03 0.02195 1.211 1.23e-04 0.0984
## 46 0.183015 -0.169881 0.078009 -2.10e-01 0.62093 0.844 9.03e-02 0.0862
## 47 0.096322 0.434681 -0.145428 -3.90e-01 -0.46543 1.354 5.45e-02 0.2406
## 48 -0.014007 0.049791 -0.046708 3.73e-02 -0.14485 1.187 5.34e-03 0.0962
## 49 -0.861899 -0.141175 0.693163 4.29e-01 -0.89031 1.093 1.91e-01 0.2180
## 50 -0.409479 0.599022 -0.107675 1.44e-01 -1.27816 0.268 2.88e-01 0.0748
## inf
## 1
## 2
## 3
## 4
## 5
## 6
## 7 *
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 32
## 33
## 34
## 35
## 36
## 37
## 38 *
## 39
## 40
## 41
## 42
## 43
## 44
## 45
## 46
## 47 *
## 48
## 49 *
## 50 *
#influenceIndexPlot(MLQ1a)
#influencePlot(MLQ1,id.n=3)
MLQ1b <- lm(Profit ~ R.D.Spend+Administration+Marketing.Spend,
data =STR[-c(46,47,49,50),])
summary(MLQ1b)
##
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend,
## data = STR[-c(46, 47, 49, 50), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -15958.4 -5119.9 -919.9 6340.9 12986.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.690e+04 5.788e+03 9.830 1.87e-12 ***
## R.D.Spend 7.714e-01 3.993e-02 19.320 < 2e-16 ***
## Administration -5.398e-02 4.303e-02 -1.254 0.2166
## Marketing.Spend 2.771e-02 1.446e-02 1.916 0.0622 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7089 on 42 degrees of freedom
## Multiple R-squared: 0.9626, Adjusted R-squared: 0.9599
## F-statistic: 360.3 on 3 and 42 DF, p-value: < 2.2e-16
#vif(MLQ1)
#avPlots(MLQ1)
FINALmODEL50 <- lm(Profit~R.D.Spend+Marketing.Spend)
summary(FINALmODEL50)
##
## Call:
## lm(formula = Profit ~ R.D.Spend + Marketing.Spend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33645 -4632 -414 6484 17097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.698e+04 2.690e+03 17.464 <2e-16 ***
## R.D.Spend 7.966e-01 4.135e-02 19.266 <2e-16 ***
## Marketing.Spend 2.991e-02 1.552e-02 1.927 0.06 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9161 on 47 degrees of freedom
## Multiple R-squared: 0.9505, Adjusted R-squared: 0.9483
## F-statistic: 450.8 on 2 and 47 DF, p-value: < 2.2e-16
plot(FINALmODEL50)




#qqPlot(MLQ1,id.no=5)
#x<- stepAIC(MLQ1)
#x