library(readr)
## Warning: ³Ì¼°ü'readr'ÊÇÓÃR°æ±¾4.1.3 À´½¨ÔìµÄ
library(ggplot2)
## Warning: ³Ì¼°ü'ggplot2'ÊÇÓÃR°æ±¾4.1.3 À´½¨ÔìµÄ
MagazineAdCost <- read_csv("C:/Users/ywang/Desktop/MagazineAdCost.csv")
## Rows: 55 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Magazine
## dbl (4): pagecost, audience, male, income
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(MagazineAdCost)
head(MagazineAdCost,10)
## # A tibble: 10 x 5
## Magazine pagecost audience male income
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Audubon 25315 1645 51.1 38787
## 2 Better Homes & Gardens 198000 34797 22.1 41933
## 3 Business Week 103300 4760 68.1 63667
## 4 Cosmopolitan 94100 15452 17.3 44237
## 5 Elle 55540 3735 12.5 47211
## 6 Entrepreneur 40355 2476 60.4 47579
## 7 Esquire 51559 3037 71.3 44715
## 8 Family Circle 147500 24539 13 38759
## 9 First For Women 28059 3856 3.6 43850
## 10 Forbes 59340 4191 68.8 66606
MagazineAdCost$audience <-log(MagazineAdCost$audience)
MagazineAdCost$male <-log(MagazineAdCost$male)
MagazineAdCost$income<-log(MagazineAdCost$income)
head(MagazineAdCost, 10)
## # A tibble: 10 x 5
## Magazine pagecost audience male income
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Audubon 25315 7.41 3.93 10.6
## 2 Better Homes & Gardens 198000 10.5 3.10 10.6
## 3 Business Week 103300 8.47 4.22 11.1
## 4 Cosmopolitan 94100 9.65 2.85 10.7
## 5 Elle 55540 8.23 2.53 10.8
## 6 Entrepreneur 40355 7.81 4.10 10.8
## 7 Esquire 51559 8.02 4.27 10.7
## 8 Family Circle 147500 10.1 2.56 10.6
## 9 First For Women 28059 8.26 1.28 10.7
## 10 Forbes 59340 8.34 4.23 11.1
MultipleRegModel <- lm(MagazineAdCost$audience ~ MagazineAdCost$male +MagazineAdCost$income ,data = MagazineAdCost)
summary(MultipleRegModel)
##
## Call:
## lm(formula = MagazineAdCost$audience ~ MagazineAdCost$male +
## MagazineAdCost$income, data = MagazineAdCost)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9234 -0.6725 -0.1064 0.6064 1.7671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.204777 6.133958 3.457 0.0011 **
## MagazineAdCost$male -0.002154 0.161266 -0.013 0.9894
## MagazineAdCost$income -1.147846 0.594752 -1.930 0.0591 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8985 on 52 degrees of freedom
## Multiple R-squared: 0.08878, Adjusted R-squared: 0.05373
## F-statistic: 2.533 on 2 and 52 DF, p-value: 0.08916
anova(MultipleRegModel)
## Analysis of Variance Table
##
## Response: MagazineAdCost$audience
## Df Sum Sq Mean Sq F value Pr(>F)
## MagazineAdCost$male 1 1.083 1.08322 1.3417 0.25203
## MagazineAdCost$income 1 3.007 3.00721 3.7247 0.05908 .
## Residuals 52 41.983 0.80736
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MultipleRegModel.StdRes <- rstandard(MultipleRegModel)
MultipleRegModel.Fit <- fitted.values(MultipleRegModel)
p3=ggplot(data=MagazineAdCost, aes(x=MultipleRegModel.Fit, y=MultipleRegModel.StdRes))+geom_point(color="blue")
p3 + geom_smooth(method="lm",se=F,col="red") + labs(x="Fitted values", y="Standard Residuals")
## `geom_smooth()` using formula 'y ~ x'

the magazines in the dataset that have not unusually high or low pricing for full page colour ads relative to their audience/male/income profile.The male are useful in the model, in the presence of the other variables