#1. Import the data from the textbook website:
ads<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Advertising.csv",
header=TRUE)
attach(ads)
2. Learn about the data set, what kinds of variables are there:
names(ads)
## [1] "X" "TV" "radio" "newspaper" "sales"
head(ads)
## X TV radio newspaper sales
## 1 1 230.1 37.8 69.2 22.1
## 2 2 44.5 39.3 45.1 10.4
## 3 3 17.2 45.9 69.3 9.3
## 4 4 151.5 41.3 58.5 18.5
## 5 5 180.8 10.8 58.4 12.9
## 6 6 8.7 48.9 75.0 7.2
3. Describe the relationship between sales and the TV using cor() and plot()
cor(TV,sales)
## [1] 0.7822244
plot(TV,sales)

4. Create a linear model for sales as a function of TV
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
mod<-lm(sales~TV)
mod
##
## Call:
## lm(formula = sales ~ TV)
##
## Coefficients:
## (Intercept) TV
## 7.03259 0.04754
TvSales<-ggplot(ads, aes(x=TV, y=sales))+
geom_point()+
geom_abline(slope=mod$coefficients[2], intercept=mod$coefficients[1],
color="blue", lty=2, lwd=1)+
theme_bw()
TvSales

5. Interpret the slope coefficient for the context of the problem
#Slope guesstimate: 5/100 --> 0.05
mod
##
## Call:
## lm(formula = sales ~ TV)
##
## Coefficients:
## (Intercept) TV
## 7.03259 0.04754
mod$coefficients[2] #0.04754 --> for each unit of money spend on TV advertising, would expect an increase in sales of 0.04754
## TV
## 0.04753664
6. Create a confidence interval for the slope coefficient
confint(mod)
## 2.5 % 97.5 %
## (Intercept) 6.12971927 7.93546783
## TV 0.04223072 0.05284256
8. Predict the response for a new observation of TV at 100. Include the prediction and confidence intervals. Why are these intervals different?
newdata<-data.frame(TV=c(100))
predict(mod, newdata,
interval="predict")
## fit lwr upr
## 1 11.78626 5.339251 18.23326
predict(mod, newdata,
interval="confidence")
## fit lwr upr
## 1 11.78626 11.26782 12.3047
#These intervals are different because predict considers the extra variability of a single observation, whereas confidence looks at the average/mean of a set of observations
detach(ads)