start <- read.csv("C:/Users/Shalini/Downloads/50_Startups.csv")
View(start)
colnames(start)
## [1] "R.D.Spend" "Administration" "Marketing.Spend" "State"
## [5] "Profit"
summary(start)
## R.D.Spend Administration Marketing.Spend State
## Min. : 0 Min. : 51283 Min. : 0 California:17
## 1st Qu.: 39936 1st Qu.:103731 1st Qu.:129300 Florida :16
## Median : 73051 Median :122700 Median :212716 New York :17
## Mean : 73722 Mean :121345 Mean :211025
## 3rd Qu.:101603 3rd Qu.:144842 3rd Qu.:299469
## Max. :165349 Max. :182646 Max. :471784
## Profit
## Min. : 14681
## 1st Qu.: 90139
## Median :107978
## Mean :112013
## 3rd Qu.:139766
## Max. :192262
attach(start)
cor(R.D.Spend,Profit)
## [1] 0.9729005
model <- lm(Profit~R.D.Spend+Administration+Marketing.Spend+State)
summary(model)
##
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend +
## State)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33504 -4736 90 6672 17338
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.013e+04 6.885e+03 7.281 4.44e-09 ***
## R.D.Spend 8.060e-01 4.641e-02 17.369 < 2e-16 ***
## Administration -2.700e-02 5.223e-02 -0.517 0.608
## Marketing.Spend 2.698e-02 1.714e-02 1.574 0.123
## StateFlorida 1.988e+02 3.371e+03 0.059 0.953
## StateNew York -4.189e+01 3.256e+03 -0.013 0.990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9439 on 44 degrees of freedom
## Multiple R-squared: 0.9508, Adjusted R-squared: 0.9452
## F-statistic: 169.9 on 5 and 44 DF, p-value: < 2.2e-16
confint(model,level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 3.624990e+04 6.400079e+04
## R.D.Spend 7.124960e-01 8.995502e-01
## Administration -1.322701e-01 7.826145e-02
## Marketing.Spend -7.567888e-03 6.152761e-02
## StateFlorida -6.595030e+03 6.992607e+03
## StateNew York -6.604003e+03 6.520229e+03
#predict(model,interval = "predict")
influence.measures(model)
## Influence measures of
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend + State) :
##
## dfb.1_ dfb.R.D. dfb.Admn dfb.Mr.S dfb.SttF dfb.StNY dffit
## 1 0.00263 -0.000680 -0.00108 -0.002943 0.001137 -0.002293 -0.00671
## 2 -0.04961 0.020841 0.04627 0.058280 -0.082437 -0.070955 0.15256
## 3 0.08011 0.197811 -0.17477 -0.013702 0.153248 -0.028648 0.40914
## 4 -0.03595 0.110000 -0.04970 0.073783 -0.047849 0.163879 0.37650
## 5 -0.10434 -0.153851 0.16342 0.047737 -0.115870 0.017478 -0.28836
## 6 -0.04629 -0.081405 0.10566 -0.033055 0.027220 -0.115816 -0.26364
## 7 -0.02130 -0.095187 0.00468 0.082074 0.037236 0.050562 -0.12099
## 8 0.05670 -0.045130 -0.04118 0.001538 -0.079214 0.007763 -0.15671
## 9 -0.01048 0.000992 0.00845 0.005091 -0.002030 0.010515 0.02068
## 10 -0.07333 -0.087676 0.06482 0.009105 0.113458 0.114051 -0.19312
## 11 0.11698 0.186811 -0.13740 -0.159583 0.227283 -0.009802 0.36595
## 12 0.22674 0.152695 -0.19750 -0.063320 -0.180498 -0.191722 0.34058
## 13 -0.01749 0.069420 0.01009 -0.048411 0.246939 -0.005732 0.36117
## 14 -0.00754 0.005568 0.05424 0.040526 -0.144106 -0.139683 0.20968
## 15 0.26742 -0.221204 -0.25724 0.142195 -0.359607 0.022591 -0.67871
## 16 -0.00258 -0.208289 0.06663 0.071114 0.021586 -0.300602 -0.52394
## 17 0.03723 -0.055403 0.02986 0.116725 -0.217604 -0.202959 0.30836
## 18 0.08149 0.035255 -0.07538 -0.061166 0.013546 -0.089993 -0.15985
## 19 -0.00866 -0.003027 0.01978 -0.017913 -0.089991 0.002396 -0.14249
## 20 0.00949 0.252210 0.03934 -0.342025 0.056267 0.163205 0.44287
## 21 0.00920 -0.018937 0.00127 0.034093 -0.043026 -0.038229 0.06425
## 22 0.16246 0.134008 -0.16322 -0.156986 0.024662 -0.123303 -0.26328
## 23 0.02844 0.057093 -0.02567 -0.065599 -0.080087 -0.001184 -0.14418
## 24 -0.00300 0.017345 0.00492 -0.019954 -0.025200 -0.000520 -0.04652
## 25 -0.08800 -0.077455 0.08926 0.087987 -0.013268 -0.093171 -0.17298
## 26 0.01269 0.012139 0.04637 -0.028528 -0.091010 -0.099577 0.15683
## 27 0.01876 -0.047078 -0.04093 0.086363 -0.120147 -0.002104 -0.18499
## 28 0.16992 0.271462 -0.14611 -0.339679 0.054174 -0.198665 -0.44228
## 29 -0.04142 -0.004892 0.05451 -0.014043 0.037581 0.001794 0.07940
## 30 0.00788 -0.001210 -0.01152 0.007045 -0.001871 -0.014675 -0.02593
## 31 0.00452 0.004911 -0.00318 -0.008237 0.008083 0.000222 0.01251
## 32 0.00132 -0.000298 -0.00210 0.001610 -0.000431 -0.002779 -0.00500
## 33 -0.02577 -0.029851 0.00520 0.042128 0.023004 0.031218 -0.06200
## 34 -0.01226 0.005720 0.01076 -0.000214 -0.026781 -0.001052 -0.03977
## 35 -0.13840 -0.196836 0.24231 0.173390 -0.173841 -0.153489 0.35397
## 36 0.09973 -0.049889 -0.10222 0.034443 0.001307 0.126044 0.22205
## 37 -0.10755 -0.379353 0.18952 0.218405 0.334034 0.033735 0.61014
## 38 0.01840 0.001322 -0.01609 -0.000850 -0.007621 -0.008050 0.02054
## 39 0.32020 -0.189819 -0.31345 0.109261 0.010955 0.273511 0.58408
## 40 -0.07434 0.011576 0.05092 -0.005414 0.049682 0.051171 -0.09721
## 41 0.02107 -0.063137 0.02014 0.044268 -0.066547 -0.063116 0.11560
## 42 0.05957 -0.022659 -0.05101 -0.006525 0.066841 0.005162 0.11358
## 43 0.01852 -0.010791 -0.00820 0.004902 -0.016530 -0.016757 0.03122
## 44 0.02429 -0.090858 0.05891 -0.085052 0.047181 0.220181 0.37544
## 45 -0.00188 -0.005257 0.01339 -0.004732 -0.009633 -0.011309 0.02616
## 46 0.09538 -0.212843 0.09139 -0.189969 0.108210 0.428337 0.77900
## 47 0.10683 0.434369 -0.14265 -0.364064 -0.144471 -0.025892 -0.50219
## 48 -0.02930 0.046774 -0.04694 0.031389 0.056945 0.068742 -0.16247
## 49 -0.78383 -0.112734 0.70160 0.418630 -0.124090 -0.373999 -0.98871
## 50 -0.56603 0.578956 -0.11423 0.080954 0.626360 0.703325 -1.50721
## cov.r cook.d hat inf
## 1 1.384 7.68e-06 0.1705
## 2 1.395 3.96e-03 0.1884
## 3 1.165 2.79e-02 0.1424
## 4 1.113 2.36e-02 0.1140
## 5 1.259 1.40e-02 0.1471
## 6 1.207 1.17e-02 0.1163
## 7 1.445 2.49e-03 0.2119 *
## 8 1.226 4.17e-03 0.0942
## 9 1.269 7.29e-05 0.0958
## 10 1.227 6.31e-03 0.1052
## 11 1.046 2.21e-02 0.0897
## 12 1.127 1.94e-02 0.1080
## 13 0.954 2.13e-02 0.0657
## 14 1.144 7.40e-03 0.0721
## 15 0.761 7.20e-02 0.1055
## 16 0.790 4.34e-02 0.0754
## 17 1.040 1.58e-02 0.0708
## 18 1.211 4.33e-03 0.0868
## 19 1.183 3.44e-03 0.0671
## 20 1.316 3.29e-02 0.2118
## 21 1.249 7.03e-04 0.0866
## 22 1.248 1.17e-02 0.1350
## 23 1.206 3.53e-03 0.0795
## 24 1.251 3.69e-04 0.0854
## 25 1.225 5.07e-03 0.0980
## 26 1.181 4.16e-03 0.0709
## 27 1.221 5.79e-03 0.0993
## 28 1.151 3.25e-02 0.1471
## 29 1.395 1.07e-03 0.1802
## 30 1.271 1.15e-04 0.0973
## 31 1.300 2.67e-05 0.1169
## 32 1.279 4.26e-06 0.1026
## 33 1.289 6.55e-04 0.1131
## 34 1.238 2.70e-04 0.0750
## 35 1.197 2.10e-02 0.1389
## 36 1.196 8.32e-03 0.0985
## 37 0.850 5.92e-02 0.1079
## 38 1.408 7.19e-05 0.1851
## 39 1.060 5.58e-02 0.1570
## 40 1.257 1.61e-03 0.0975
## 41 1.228 2.27e-03 0.0842
## 42 1.278 2.20e-03 0.1140
## 43 1.256 1.66e-04 0.0870
## 44 1.093 2.34e-02 0.1069
## 45 1.312 1.17e-04 0.1259
## 46 0.758 9.44e-02 0.1277
## 47 1.419 4.23e-02 0.2654 *
## 48 1.272 4.48e-03 0.1211
## 49 1.051 1.56e-01 0.2559
## 50 0.128 2.64e-01 0.1015 *
library(mvinfluence)
## Loading required package: car
## Loading required package: carData
## Loading required package: heplots
influenceIndexPlot(model)

model2 <- lm(Profit~R.D.Spend+Administration+Marketing.Spend+State,data = start[-50,])
summary(model2)
##
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend +
## State, data = start[-50, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -16374 -4572 -1225 5304 15402
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.338e+04 5.794e+03 9.212 9.79e-12 ***
## R.D.Spend 7.836e-01 3.907e-02 20.056 < 2e-16 ***
## Administration -2.202e-02 4.363e-02 -0.505 0.6163
## Marketing.Spend 2.582e-02 1.432e-02 1.804 0.0783 .
## StateFlorida -1.564e+03 2.842e+03 -0.550 0.5849
## StateNew York -1.954e+03 2.752e+03 -0.710 0.4815
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7881 on 43 degrees of freedom
## Multiple R-squared: 0.9618, Adjusted R-squared: 0.9574
## F-statistic: 216.6 on 5 and 43 DF, p-value: < 2.2e-16
#model3 <- lm(Profit~R.D.Spend+Administration+Marketing.Spend+State,data = start[-50,-49,])
#summary(model3)
finalmodel <-lm(Profit~R.D.Spend+Administration+Marketing.Spend+State,data = start)
summary(finalmodel)
##
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend +
## State, data = start)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33504 -4736 90 6672 17338
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.013e+04 6.885e+03 7.281 4.44e-09 ***
## R.D.Spend 8.060e-01 4.641e-02 17.369 < 2e-16 ***
## Administration -2.700e-02 5.223e-02 -0.517 0.608
## Marketing.Spend 2.698e-02 1.714e-02 1.574 0.123
## StateFlorida 1.988e+02 3.371e+03 0.059 0.953
## StateNew York -4.189e+01 3.256e+03 -0.013 0.990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9439 on 44 degrees of freedom
## Multiple R-squared: 0.9508, Adjusted R-squared: 0.9452
## F-statistic: 169.9 on 5 and 44 DF, p-value: < 2.2e-16
plot(finalmodel)



