#Read data correctly
mydata = read.csv(file="data/marketing.csv")
head(mydata)#Correlation Matrix
corr = cor(mydata [c(2,4,6)])
corr=cor(mydata [2:6])
corr           sales       radio       paper          tv         pos
sales  1.0000000  0.97713807 -0.28306828  0.95797025  0.01264860
radio  0.9771381  1.00000000 -0.23835848  0.96609579  0.06040209
paper -0.2830683 -0.23835848  1.00000000 -0.24587896 -0.09006241
tv     0.9579703  0.96609579 -0.24587896  1.00000000 -0.03602314
pos    0.0126486  0.06040209 -0.09006241 -0.03602314  1.00000000#install.packages("corrplot")
#install.packages("corrgram")
#library(corrgram)
#library(corrplot)
corrplot(corr)Error in corrplot(corr) : could not find function "corrplot"#Extract all variables
pos  = mydata$pos
paper = mydata$paper
tv = mydata$tv
sales = mydata$sales
radio = mydata$radio
#Plot of Radio and Sales using plot command from Worksheet 4
plot(radio,sales)#Simple Linear Regression
reg <- lm(sales ~ radio)
#Summary of Model
summary(reg)
Call:
lm(formula = sales ~ radio)
Residuals:
     Min       1Q   Median       3Q      Max 
-1732.85  -198.88    62.64   415.26   637.70 
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -9741.92    1362.94  -7.148 1.17e-06 ***
radio         347.69      17.83  19.499 1.49e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 571.6 on 18 degrees of freedom
Multiple R-squared:  0.9548,    Adjusted R-squared:  0.9523 
F-statistic: 380.2 on 1 and 18 DF,  p-value: 1.492e-13#Plot Radio and Sales 
plot(radio,sales)
#Add a trend line plot using the linear model we created above
abline(reg,col="blue",lwd=2) List some observations from this plot.
This graph shows that as sales increase, so does radio.
#Multiple Linear Regression Model
mlr1 <-lm(sales ~ radio + tv)
#Summary of Multiple Linear Regression Model
summary(mlr1)
Call:
lm(formula = sales ~ radio + tv)
Residuals:
     Min       1Q   Median       3Q      Max 
-1729.58  -205.97    56.95   335.15   759.26 
Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -17150.46    6965.59  -2.462 0.024791 *  
radio          275.69      68.73   4.011 0.000905 ***
tv              48.34      44.58   1.084 0.293351    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 568.9 on 17 degrees of freedom
Multiple R-squared:  0.9577,    Adjusted R-squared:  0.9527 
F-statistic: 192.6 on 2 and 17 DF,  p-value: 2.098e-12For mlr1, the R-Squared values is 0.9577 and the Adj R-Squared is 0.9527.
#mlr2 = Sales predicted by radio, tv, and pos
mlr2 <-lm(sales ~ radio + tv + pos)
summary(mlr2)
Call:
lm(formula = sales ~ radio + tv + pos)
Residuals:
     Min       1Q   Median       3Q      Max 
-1748.20  -187.42   -61.14   352.07   734.20 
Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) -15491.23    7697.08  -2.013  0.06130 . 
radio          291.36      75.48   3.860  0.00139 **
tv              38.26      48.90   0.782  0.44538   
pos           -107.62     191.25  -0.563  0.58142   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 580.7 on 16 degrees of freedom
Multiple R-squared:  0.9585,    Adjusted R-squared:  0.9508 
F-statistic: 123.3 on 3 and 16 DF,  p-value: 2.859e-11#mlr3 = Sales predicted by radio, tv, pos, and paper
mlr3 <-lm(sales ~ radio + tv + pos + paper)
summary(mlr3)
Call:
lm(formula = sales ~ radio + tv + pos + paper)
Residuals:
     Min       1Q   Median       3Q      Max 
-1558.13  -239.35     7.25   387.02   728.02 
Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -13801.015   7865.017  -1.755  0.09970 . 
radio          294.224     75.442   3.900  0.00142 **
tv              33.369     49.080   0.680  0.50693   
pos           -128.875    192.156  -0.671  0.51262   
paper           -9.159      8.991  -1.019  0.32449   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 580 on 15 degrees of freedom
Multiple R-squared:  0.9612,    Adjusted R-squared:  0.9509 
F-statistic: 92.96 on 4 and 15 DF,  p-value: 2.13e-10Based purely on the values for R-Squared and Adj R-Squared, which linear regression model is best in predicting sales. Explain why.
This shows the correlation between sales and paper. It shows that as sales increase, paper actually decreases.