start <- read.csv("C:/Users/Shalini/Downloads/50_Startups.csv")
View(start)
colnames(start)
## [1] "R.D.Spend"       "Administration"  "Marketing.Spend" "State"          
## [5] "Profit"
summary(start)
##    R.D.Spend      Administration   Marketing.Spend         State   
##  Min.   :     0   Min.   : 51283   Min.   :     0   California:17  
##  1st Qu.: 39936   1st Qu.:103731   1st Qu.:129300   Florida   :16  
##  Median : 73051   Median :122700   Median :212716   New York  :17  
##  Mean   : 73722   Mean   :121345   Mean   :211025                  
##  3rd Qu.:101603   3rd Qu.:144842   3rd Qu.:299469                  
##  Max.   :165349   Max.   :182646   Max.   :471784                  
##      Profit      
##  Min.   : 14681  
##  1st Qu.: 90139  
##  Median :107978  
##  Mean   :112013  
##  3rd Qu.:139766  
##  Max.   :192262
attach(start)
cor(R.D.Spend,Profit)
## [1] 0.9729005
model <- lm(Profit~R.D.Spend+Administration+Marketing.Spend+State)
summary(model)
## 
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend + 
##     State)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33504  -4736     90   6672  17338 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.013e+04  6.885e+03   7.281 4.44e-09 ***
## R.D.Spend        8.060e-01  4.641e-02  17.369  < 2e-16 ***
## Administration  -2.700e-02  5.223e-02  -0.517    0.608    
## Marketing.Spend  2.698e-02  1.714e-02   1.574    0.123    
## StateFlorida     1.988e+02  3.371e+03   0.059    0.953    
## StateNew York   -4.189e+01  3.256e+03  -0.013    0.990    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9439 on 44 degrees of freedom
## Multiple R-squared:  0.9508, Adjusted R-squared:  0.9452 
## F-statistic: 169.9 on 5 and 44 DF,  p-value: < 2.2e-16
confint(model,level = 0.95)
##                         2.5 %       97.5 %
## (Intercept)      3.624990e+04 6.400079e+04
## R.D.Spend        7.124960e-01 8.995502e-01
## Administration  -1.322701e-01 7.826145e-02
## Marketing.Spend -7.567888e-03 6.152761e-02
## StateFlorida    -6.595030e+03 6.992607e+03
## StateNew York   -6.604003e+03 6.520229e+03
#predict(model,interval = "predict")
influence.measures(model)
## Influence measures of
##   lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend +      State) :
## 
##      dfb.1_  dfb.R.D. dfb.Admn  dfb.Mr.S  dfb.SttF  dfb.StNY    dffit
## 1   0.00263 -0.000680 -0.00108 -0.002943  0.001137 -0.002293 -0.00671
## 2  -0.04961  0.020841  0.04627  0.058280 -0.082437 -0.070955  0.15256
## 3   0.08011  0.197811 -0.17477 -0.013702  0.153248 -0.028648  0.40914
## 4  -0.03595  0.110000 -0.04970  0.073783 -0.047849  0.163879  0.37650
## 5  -0.10434 -0.153851  0.16342  0.047737 -0.115870  0.017478 -0.28836
## 6  -0.04629 -0.081405  0.10566 -0.033055  0.027220 -0.115816 -0.26364
## 7  -0.02130 -0.095187  0.00468  0.082074  0.037236  0.050562 -0.12099
## 8   0.05670 -0.045130 -0.04118  0.001538 -0.079214  0.007763 -0.15671
## 9  -0.01048  0.000992  0.00845  0.005091 -0.002030  0.010515  0.02068
## 10 -0.07333 -0.087676  0.06482  0.009105  0.113458  0.114051 -0.19312
## 11  0.11698  0.186811 -0.13740 -0.159583  0.227283 -0.009802  0.36595
## 12  0.22674  0.152695 -0.19750 -0.063320 -0.180498 -0.191722  0.34058
## 13 -0.01749  0.069420  0.01009 -0.048411  0.246939 -0.005732  0.36117
## 14 -0.00754  0.005568  0.05424  0.040526 -0.144106 -0.139683  0.20968
## 15  0.26742 -0.221204 -0.25724  0.142195 -0.359607  0.022591 -0.67871
## 16 -0.00258 -0.208289  0.06663  0.071114  0.021586 -0.300602 -0.52394
## 17  0.03723 -0.055403  0.02986  0.116725 -0.217604 -0.202959  0.30836
## 18  0.08149  0.035255 -0.07538 -0.061166  0.013546 -0.089993 -0.15985
## 19 -0.00866 -0.003027  0.01978 -0.017913 -0.089991  0.002396 -0.14249
## 20  0.00949  0.252210  0.03934 -0.342025  0.056267  0.163205  0.44287
## 21  0.00920 -0.018937  0.00127  0.034093 -0.043026 -0.038229  0.06425
## 22  0.16246  0.134008 -0.16322 -0.156986  0.024662 -0.123303 -0.26328
## 23  0.02844  0.057093 -0.02567 -0.065599 -0.080087 -0.001184 -0.14418
## 24 -0.00300  0.017345  0.00492 -0.019954 -0.025200 -0.000520 -0.04652
## 25 -0.08800 -0.077455  0.08926  0.087987 -0.013268 -0.093171 -0.17298
## 26  0.01269  0.012139  0.04637 -0.028528 -0.091010 -0.099577  0.15683
## 27  0.01876 -0.047078 -0.04093  0.086363 -0.120147 -0.002104 -0.18499
## 28  0.16992  0.271462 -0.14611 -0.339679  0.054174 -0.198665 -0.44228
## 29 -0.04142 -0.004892  0.05451 -0.014043  0.037581  0.001794  0.07940
## 30  0.00788 -0.001210 -0.01152  0.007045 -0.001871 -0.014675 -0.02593
## 31  0.00452  0.004911 -0.00318 -0.008237  0.008083  0.000222  0.01251
## 32  0.00132 -0.000298 -0.00210  0.001610 -0.000431 -0.002779 -0.00500
## 33 -0.02577 -0.029851  0.00520  0.042128  0.023004  0.031218 -0.06200
## 34 -0.01226  0.005720  0.01076 -0.000214 -0.026781 -0.001052 -0.03977
## 35 -0.13840 -0.196836  0.24231  0.173390 -0.173841 -0.153489  0.35397
## 36  0.09973 -0.049889 -0.10222  0.034443  0.001307  0.126044  0.22205
## 37 -0.10755 -0.379353  0.18952  0.218405  0.334034  0.033735  0.61014
## 38  0.01840  0.001322 -0.01609 -0.000850 -0.007621 -0.008050  0.02054
## 39  0.32020 -0.189819 -0.31345  0.109261  0.010955  0.273511  0.58408
## 40 -0.07434  0.011576  0.05092 -0.005414  0.049682  0.051171 -0.09721
## 41  0.02107 -0.063137  0.02014  0.044268 -0.066547 -0.063116  0.11560
## 42  0.05957 -0.022659 -0.05101 -0.006525  0.066841  0.005162  0.11358
## 43  0.01852 -0.010791 -0.00820  0.004902 -0.016530 -0.016757  0.03122
## 44  0.02429 -0.090858  0.05891 -0.085052  0.047181  0.220181  0.37544
## 45 -0.00188 -0.005257  0.01339 -0.004732 -0.009633 -0.011309  0.02616
## 46  0.09538 -0.212843  0.09139 -0.189969  0.108210  0.428337  0.77900
## 47  0.10683  0.434369 -0.14265 -0.364064 -0.144471 -0.025892 -0.50219
## 48 -0.02930  0.046774 -0.04694  0.031389  0.056945  0.068742 -0.16247
## 49 -0.78383 -0.112734  0.70160  0.418630 -0.124090 -0.373999 -0.98871
## 50 -0.56603  0.578956 -0.11423  0.080954  0.626360  0.703325 -1.50721
##    cov.r   cook.d    hat inf
## 1  1.384 7.68e-06 0.1705    
## 2  1.395 3.96e-03 0.1884    
## 3  1.165 2.79e-02 0.1424    
## 4  1.113 2.36e-02 0.1140    
## 5  1.259 1.40e-02 0.1471    
## 6  1.207 1.17e-02 0.1163    
## 7  1.445 2.49e-03 0.2119   *
## 8  1.226 4.17e-03 0.0942    
## 9  1.269 7.29e-05 0.0958    
## 10 1.227 6.31e-03 0.1052    
## 11 1.046 2.21e-02 0.0897    
## 12 1.127 1.94e-02 0.1080    
## 13 0.954 2.13e-02 0.0657    
## 14 1.144 7.40e-03 0.0721    
## 15 0.761 7.20e-02 0.1055    
## 16 0.790 4.34e-02 0.0754    
## 17 1.040 1.58e-02 0.0708    
## 18 1.211 4.33e-03 0.0868    
## 19 1.183 3.44e-03 0.0671    
## 20 1.316 3.29e-02 0.2118    
## 21 1.249 7.03e-04 0.0866    
## 22 1.248 1.17e-02 0.1350    
## 23 1.206 3.53e-03 0.0795    
## 24 1.251 3.69e-04 0.0854    
## 25 1.225 5.07e-03 0.0980    
## 26 1.181 4.16e-03 0.0709    
## 27 1.221 5.79e-03 0.0993    
## 28 1.151 3.25e-02 0.1471    
## 29 1.395 1.07e-03 0.1802    
## 30 1.271 1.15e-04 0.0973    
## 31 1.300 2.67e-05 0.1169    
## 32 1.279 4.26e-06 0.1026    
## 33 1.289 6.55e-04 0.1131    
## 34 1.238 2.70e-04 0.0750    
## 35 1.197 2.10e-02 0.1389    
## 36 1.196 8.32e-03 0.0985    
## 37 0.850 5.92e-02 0.1079    
## 38 1.408 7.19e-05 0.1851    
## 39 1.060 5.58e-02 0.1570    
## 40 1.257 1.61e-03 0.0975    
## 41 1.228 2.27e-03 0.0842    
## 42 1.278 2.20e-03 0.1140    
## 43 1.256 1.66e-04 0.0870    
## 44 1.093 2.34e-02 0.1069    
## 45 1.312 1.17e-04 0.1259    
## 46 0.758 9.44e-02 0.1277    
## 47 1.419 4.23e-02 0.2654   *
## 48 1.272 4.48e-03 0.1211    
## 49 1.051 1.56e-01 0.2559    
## 50 0.128 2.64e-01 0.1015   *
library(mvinfluence)
## Loading required package: car
## Loading required package: carData
## Loading required package: heplots
influenceIndexPlot(model)

model2 <- lm(Profit~R.D.Spend+Administration+Marketing.Spend+State,data = start[-50,])
summary(model2)
## 
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend + 
##     State, data = start[-50, ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -16374  -4572  -1225   5304  15402 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.338e+04  5.794e+03   9.212 9.79e-12 ***
## R.D.Spend        7.836e-01  3.907e-02  20.056  < 2e-16 ***
## Administration  -2.202e-02  4.363e-02  -0.505   0.6163    
## Marketing.Spend  2.582e-02  1.432e-02   1.804   0.0783 .  
## StateFlorida    -1.564e+03  2.842e+03  -0.550   0.5849    
## StateNew York   -1.954e+03  2.752e+03  -0.710   0.4815    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7881 on 43 degrees of freedom
## Multiple R-squared:  0.9618, Adjusted R-squared:  0.9574 
## F-statistic: 216.6 on 5 and 43 DF,  p-value: < 2.2e-16
#model3 <- lm(Profit~R.D.Spend+Administration+Marketing.Spend+State,data = start[-50,-49,])
#summary(model3)
finalmodel <-lm(Profit~R.D.Spend+Administration+Marketing.Spend+State,data = start)
summary(finalmodel)
## 
## Call:
## lm(formula = Profit ~ R.D.Spend + Administration + Marketing.Spend + 
##     State, data = start)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33504  -4736     90   6672  17338 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.013e+04  6.885e+03   7.281 4.44e-09 ***
## R.D.Spend        8.060e-01  4.641e-02  17.369  < 2e-16 ***
## Administration  -2.700e-02  5.223e-02  -0.517    0.608    
## Marketing.Spend  2.698e-02  1.714e-02   1.574    0.123    
## StateFlorida     1.988e+02  3.371e+03   0.059    0.953    
## StateNew York   -4.189e+01  3.256e+03  -0.013    0.990    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9439 on 44 degrees of freedom
## Multiple R-squared:  0.9508, Adjusted R-squared:  0.9452 
## F-statistic: 169.9 on 5 and 44 DF,  p-value: < 2.2e-16
plot(finalmodel)