fifty.st <- read.csv("C:\\Users\\bdee\\Videos\\Data science\\Linear Regression files\\50_startups.csv")

View(fifty.st)

attach(fifty.st)

summary(fifty.st)
##    R.D.Spend      Administration   Marketing.Spend         State   
##  Min.   :     0   Min.   : 51283   Min.   :     0   California:17  
##  1st Qu.: 39936   1st Qu.:103731   1st Qu.:129300   Florida   :16  
##  Median : 73051   Median :122700   Median :212716   New York  :17  
##  Mean   : 73722   Mean   :121345   Mean   :211025                  
##  3rd Qu.:101603   3rd Qu.:144842   3rd Qu.:299469                  
##  Max.   :165349   Max.   :182646   Max.   :471784                  
##      Profit      
##  Min.   : 14681  
##  1st Qu.: 90139  
##  Median :107978  
##  Mean   :112013  
##  3rd Qu.:139766  
##  Max.   :192262
fifty.st <- fifty.st[, c("Marketing.Spend","Profit")]

mean(Profit)
## [1] 112012.6
mean(Marketing.Spend)
## [1] 211025.1
summary(Profit)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14681   90139  107978  112013  139766  192262
summary(Marketing.Spend)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  129300  212716  211025  299469  471784
qqnorm(Profit)

qqnorm(Marketing.Spend)

plot(Marketing.Spend,Profit)

cor(Marketing.Spend,Profit)
## [1] 0.7477657
m1 <- lm(Profit ~ Marketing.Spend)

summary(m1)
## 
## Call:
## lm(formula = Profit ~ Marketing.Spend)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -83739 -18802   4925  15879  64642 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6.000e+04  7.685e+03   7.808 4.29e-10 ***
## Marketing.Spend 2.465e-01  3.159e-02   7.803 4.38e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27040 on 48 degrees of freedom
## Multiple R-squared:  0.5592, Adjusted R-squared:   0.55 
## F-statistic: 60.88 on 1 and 48 DF,  p-value: 4.381e-10
pv <- predict(m1,fifty.st)

class(pv)
## [1] "numeric"
pv <- as.data.frame(pv)

pv
##           pv
## 1  176279.11
## 2  169406.45
## 3  160542.80
## 4  154446.65
## 5  150249.15
## 6  149434.09
## 7   91480.54
## 8  139825.96
## 9  136803.53
## 10 135169.09
## 11 116482.39
## 12 121555.41
## 13 121578.79
## 14 122275.16
## 15 123223.53
## 16 124520.73
## 17 125154.08
## 18 129646.61
## 19 132689.21
## 20  60003.55
## 21 133612.17
## 22 133876.58
## 23 134759.39
## 24 135116.63
## 25  94649.51
## 26  94005.71
## 27  93041.43
## 28 147048.97
## 29  89122.27
## 30  86408.80
## 31  82463.69
## 32  81745.75
## 33  71361.69
## 34 112902.29
## 35 111956.59
## 36 110655.28
## 37 109573.12
## 38 108563.27
## 39 105663.85
## 40 103133.75
## 41 102590.64
## 42 100538.88
## 43  96479.79
## 44  68761.27
## 45  66986.90
## 46  60472.79
## 47 133230.16
## 48  60003.55
## 49  60003.55
## 50  71136.87
final <- cbind(fifty.st,pv)

write.csv(final, "aug04.csv")
getwd()
## [1] "C:/Users/bdee/Desktop"
test <- read.csv("C:\\Users\\bdee\\Videos\\Data science\\Linear Regression files\\newdata.csv")
pv1 <- predict(m1, newdata = test)
## Warning: 'newdata' had 4 rows but variables found have 50 rows
pv1
##         1         2         3         4         5         6         7 
## 176279.11 169406.45 160542.80 154446.65 150249.15 149434.09  91480.54 
##         8         9        10        11        12        13        14 
## 139825.96 136803.53 135169.09 116482.39 121555.41 121578.79 122275.16 
##        15        16        17        18        19        20        21 
## 123223.53 124520.73 125154.08 129646.61 132689.21  60003.55 133612.17 
##        22        23        24        25        26        27        28 
## 133876.58 134759.39 135116.63  94649.51  94005.71  93041.43 147048.97 
##        29        30        31        32        33        34        35 
##  89122.27  86408.80  82463.69  81745.75  71361.69 112902.29 111956.59 
##        36        37        38        39        40        41        42 
## 110655.28 109573.12 108563.27 105663.85 103133.75 102590.64 100538.88 
##        43        44        45        46        47        48        49 
##  96479.79  68761.27  66986.90  60472.79 133230.16  60003.55  60003.55 
##        50 
##  71136.87
pv <- as.data.frame(pv)

getwd()
## [1] "C:/Users/bdee/Desktop"
#Logarthmic Trans

#reg_log <- lm(Profit ~ log(Marketing.Spend))

reg_exp <-lm(log(Profit) ~ Marketing.Spend)

summary(reg_exp)
## 
## Call:
## lm(formula = log(Profit) ~ Marketing.Spend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5289 -0.1353  0.0459  0.2052  0.7091 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.101e+01  9.905e-02 111.147  < 2e-16 ***
## Marketing.Spend 2.527e-06  4.071e-07   6.207 1.21e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3485 on 48 degrees of freedom
## Multiple R-squared:  0.4453, Adjusted R-squared:  0.4337 
## F-statistic: 38.53 on 1 and 48 DF,  p-value: 1.208e-07
predict(reg_exp, newdata = test)
## Warning: 'newdata' had 4 rows but variables found have 50 rows
##        1        2        3        4        5        6        7        8 
## 12.20137 12.13089 12.04000 11.97749 11.93445 11.92609 11.33182 11.82757 
##        9       10       11       12       13       14       15       16 
## 11.79658 11.77982 11.58820 11.64022 11.64046 11.64760 11.65732 11.67062 
##       17       18       19       20       21       22       23       24 
## 11.67712 11.72319 11.75439 11.00905 11.76385 11.76656 11.77561 11.77928 
##       25       26       27       28       29       30       31       32 
## 11.36432 11.35772 11.34783 11.90163 11.30764 11.27982 11.23936 11.23200 
##       33       34       35       36       37       38       39       40 
## 11.12552 11.55149 11.54179 11.52845 11.51735 11.50699 11.47726 11.45132 
##       41       42       43       44       45       46       47       48 
## 11.44575 11.42471 11.38309 11.09885 11.08066 11.01386 11.75993 11.00905 
##       49       50 
## 11.00905 11.12321
reg_exl <- lm(Profit ~ 1/log(Marketing.Spend))

summary(reg_exl)
## 
## Call:
## lm(formula = Profit ~ 1/log(Marketing.Spend))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -97331 -21874  -4034  27753  80249 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   112013       5700   19.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40310 on 49 degrees of freedom
reg_exr <- lm(sqrt(Profit) ~ Marketing.Spend)

summary(reg_exr)
## 
## Call:
## lm(formula = sqrt(Profit) ~ Marketing.Spend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -143.69  -27.51    8.49   27.84  102.86 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.475e+02  1.287e+01  19.229  < 2e-16 ***
## Marketing.Spend 3.835e-04  5.291e-05   7.247 3.07e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45.3 on 48 degrees of freedom
## Multiple R-squared:  0.5225, Adjusted R-squared:  0.5126 
## F-statistic: 52.53 on 1 and 48 DF,  p-value: 3.073e-09
#reg_sqr <- lm(profile ~ sqrt(Marketing.Spend))


reg_tr <- lm(1/log(Profit) ~ Marketing.Spend)
summary(reg_tr)
## 
## Call:
## lm(formula = 1/log(Profit) ~ Marketing.Spend)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0056117 -0.0015993 -0.0003705  0.0008909  0.0141699 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      9.095e-02  8.371e-04 108.642  < 2e-16 ***
## Marketing.Spend -1.973e-08  3.441e-09  -5.734 6.37e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002946 on 48 degrees of freedom
## Multiple R-squared:  0.4065, Adjusted R-squared:  0.3942 
## F-statistic: 32.88 on 1 and 48 DF,  p-value: 6.366e-07