rm(list=ls())
library(RCurl)
facebook_page <- read.table(text = getURL("https://raw.githubusercontent.com/Rajiv2806/SA-2-Mini-Project-1-Facebook-Page-Sales/master/Facebook.csv"),header = T,sep = ",")
names(facebook_page) <- c("Page_Likes","Type","Category","Month","Weekday","Hour","Paid"
,"Reach","Impressions","Engaged","Consumers","Consumptions"
,"Impressions_Liked","Reach_Liked","Engaged_Liked"
,"Comment","Like","Share","Interactions")
facebook_page <- facebook_page[,c(19,2,3,7,4,5,6,8,14,9,13,10,15,11,12,16,17,18,1)]
facebook_page$Impressions_in_100 <- facebook_page$Impressions/100
facebook_page$ImpressionsRate_Liked <- (facebook_page$Impressions_Liked/facebook_page$Page_Likes)*100
facebook_page$ReachRate <- (facebook_page$Reach/facebook_page$Impressions)*100
facebook_page$ReachRate_Liked <- (facebook_page$Reach_Liked/facebook_page$Impressions_Liked)*100
facebook_page$EngagedRate <- (facebook_page$Engaged/facebook_page$Reach)*100
facebook_page$EngagedRate_Liked <- (facebook_page$Engaged_Liked/facebook_page$Reach_Liked)*100
facebook_page$ConsumerRate <- (facebook_page$Consumers/facebook_page$Engaged)*100
facebook_page$consumptionRate <- (facebook_page$Consumptions/facebook_page$Reach) * 100
facebook_page <- facebook_page[,c(27,2,3,4,20,21,22,23,24,25,26,1)]
facebook_page$Paid[facebook_page$Paid == 1] = "Yes"
facebook_page$Paid[facebook_page$Paid == 0] = "No"
facebook_page$Paid <- as.factor(facebook_page$Paid)
facebook_page$Category[facebook_page$Category == 1] = "Action"
facebook_page$Category[facebook_page$Category == 2] = "Product"
facebook_page$Category[facebook_page$Category == 3] = "Inspiration"
facebook_page$Category <- as.factor(facebook_page$Category)
class(facebook_page)
## [1] "data.frame"
dim(facebook_page)
## [1] 500 12
sapply(facebook_page,class)
## consumptionRate Type Category
## "numeric" "factor" "factor"
## Paid Impressions_in_100 ImpressionsRate_Liked
## "factor" "numeric" "numeric"
## ReachRate ReachRate_Liked EngagedRate
## "numeric" "numeric" "numeric"
## EngagedRate_Liked ConsumerRate Interactions
## "numeric" "numeric" "integer"
sum(is.na(facebook_page))
## [1] 1
colSums(is.na(facebook_page))
## consumptionRate Type Category
## 0 0 0
## Paid Impressions_in_100 ImpressionsRate_Liked
## 1 0 0
## ReachRate ReachRate_Liked EngagedRate
## 0 0 0
## EngagedRate_Liked ConsumerRate Interactions
## 0 0 0
table(facebook_page$Paid)
##
## No Yes
## 360 139
facebook_page$Paid[is.na(facebook_page$Paid)] = "No"
sum(is.na(facebook_page))
## [1] 0
summary(facebook_page)
## consumptionRate Type Category Paid
## Min. : 0.4878 Link : 22 Action :215 No :361
## 1st Qu.: 6.7254 Photo :426 Inspiration:155 Yes:139
## Median : 16.1678 Status: 45 Product :130
## Mean : 20.9524 Video : 7
## 3rd Qu.: 21.6428
## Max. :350.4202
## Impressions_in_100 ImpressionsRate_Liked ReachRate
## Min. : 5.70 Min. : 0.5553 Min. : 4.47
## 1st Qu.: 56.95 1st Qu.: 3.1164 1st Qu.: 53.32
## Median : 90.51 Median : 5.6189 Median : 56.78
## Mean : 295.86 Mean : 14.5853 Mean : 60.46
## 3rd Qu.: 220.85 3rd Qu.: 12.0230 3rd Qu.: 60.13
## Max. :11102.82 Max. :1064.5075 Max. :790.63
## ReachRate_Liked EngagedRate EngagedRate_Liked ConsumerRate
## Min. : 4.366 Min. : 0.4878 Min. : 0.8929 Min. : 35.73
## 1st Qu.:51.452 1st Qu.: 6.4101 1st Qu.: 7.6525 1st Qu.: 81.46
## Median :55.097 Median :12.1717 Median :12.3771 Median : 90.48
## Mean :53.984 Mean :12.4339 Mean :12.7234 Mean : 86.46
## 3rd Qu.:58.012 3rd Qu.:16.2629 3rd Qu.:16.1054 3rd Qu.: 95.33
## Max. :73.220 Max. :60.0840 Max. :49.8580 Max. :100.00
## Interactions
## Min. : 0.0
## 1st Qu.: 71.0
## Median : 123.5
## Mean : 212.1
## 3rd Qu.: 228.5
## Max. :6334.0
par(mfrow=c(3,2))
boxplot(facebook_page$Impressions_in_100,main = "Total Impressions in 100's")
boxplot(facebook_page$ImpressionsRate_Liked,main = "Impression Rate Who Liked or Page")
boxplot(facebook_page$ReachRate,main = "Reach Rate")
boxplot(facebook_page$ReachRate_Liked,main = "Reach Rate Who Liked or Page")
boxplot(facebook_page$EngagedRate,main = "Engaged User Rate")
boxplot(facebook_page$EngagedRate_Liked,main = "Engaged Users who Liked Our Page")

par(mfrow=c(1,3))
boxplot(facebook_page$Interactions,main = "Total Interactions")
boxplot(facebook_page$ConsumerRate, main = "Consumer Rate")
boxplot(facebook_page$consumptionRate,main = "Consumption Rate")

library(PerformanceAnalytics)
chart.Correlation(facebook_page[,sapply(facebook_page,is.numeric)])

rownumbers <- sample(1:nrow(facebook_page),size = 0.8*nrow(facebook_page))
facebook_train <- facebook_page[rownumbers,]
facebook_Validation <- facebook_page[-rownumbers,]
Model0 <- lm(consumptionRate~
+Type + Category + Paid
+Impressions_in_100 + ImpressionsRate_Liked
+ReachRate + ReachRate_Liked
+EngagedRate + EngagedRate_Liked
+ConsumerRate + Interactions
,data = facebook_train)
summary(Model0)
##
## Call:
## lm(formula = consumptionRate ~ +Type + Category + Paid + Impressions_in_100 +
## ImpressionsRate_Liked + ReachRate + ReachRate_Liked + EngagedRate +
## EngagedRate_Liked + ConsumerRate + Interactions, data = facebook_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.030 -9.141 -2.539 2.832 294.351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.301489 20.608221 -0.354 0.723308
## TypePhoto 5.158267 8.957215 0.576 0.565034
## TypeStatus 6.653437 11.243788 0.592 0.554370
## TypeVideo -3.371814 16.866266 -0.200 0.841653
## CategoryInspiration -18.797940 4.278914 -4.393 1.45e-05 ***
## CategoryProduct -18.180765 4.759086 -3.820 0.000155 ***
## PaidYes 2.668188 3.645722 0.732 0.464694
## Impressions_in_100 0.001076 0.004314 0.250 0.803089
## ImpressionsRate_Liked -0.009812 0.048962 -0.200 0.841279
## ReachRate -0.007536 0.030860 -0.244 0.807200
## ReachRate_Liked 0.138020 0.247637 0.557 0.577612
## EngagedRate 3.277702 0.541239 6.056 3.32e-09 ***
## EngagedRate_Liked -1.062825 0.574488 -1.850 0.065074 .
## ConsumerRate -0.011237 0.188436 -0.060 0.952478
## Interactions 0.002520 0.004926 0.512 0.609258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32 on 385 degrees of freedom
## Multiple R-squared: 0.3152, Adjusted R-squared: 0.2903
## F-statistic: 12.66 on 14 and 385 DF, p-value: < 2.2e-16
library(car)
qqPlot(Model0)

residualPlots(Model0)


## Test stat Pr(>|t|)
## Type NA NA
## Category NA NA
## Paid NA NA
## Impressions_in_100 0.199 0.843
## ImpressionsRate_Liked 0.196 0.845
## ReachRate 1.568 0.118
## ReachRate_Liked 0.106 0.916
## EngagedRate 2.383 0.018
## EngagedRate_Liked -1.548 0.123
## ConsumerRate 0.281 0.779
## Interactions -0.675 0.500
## Tukey test 4.277 0.000
consumptionRate_Log <- log(facebook_train$consumptionRate)
Impressions_in_100_Log <- log(facebook_train$Impressions_in_100)
ImpressionsRate_Liked_Log <- log(facebook_train$ImpressionsRate_Liked)
ReachRate_Log <- log(facebook_train$ReachRate)
ReachRate_Liked_Log <- log(max(facebook_train$ReachRate_Liked)+1-facebook_train$ReachRate_Liked)
EngagedRate_Log <- log(facebook_train$EngagedRate)
EngagedRate_Liked_Log <- log(facebook_train$EngagedRate_Liked)
ConsumerRate_Log <- log(max(facebook_train$ConsumerRate)+1- facebook_train$ConsumerRate)
facebook_train <- cbind(facebook_train,consumptionRate_Log,Impressions_in_100_Log,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Liked_Log,ConsumerRate_Log)
rm(consumptionRate_Log,Impressions_in_100_Log,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Liked_Log,ConsumerRate_Log)
Model1 <- lm(consumptionRate_Log~
+Type+Category+Paid
+Impressions_in_100_Log + ImpressionsRate_Liked_Log
+ReachRate_Log + ReachRate_Liked_Log
+EngagedRate_Log + EngagedRate_Liked_Log
+ConsumerRate_Log
+Interactions
,data = facebook_train)
summary(Model1)
##
## Call:
## lm(formula = consumptionRate_Log ~ +Type + Category + Paid +
## Impressions_in_100_Log + ImpressionsRate_Liked_Log + ReachRate_Log +
## ReachRate_Liked_Log + EngagedRate_Log + EngagedRate_Liked_Log +
## ConsumerRate_Log + Interactions, data = facebook_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70985 -0.15695 -0.04284 0.06795 2.41531
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.388e+00 6.392e-01 2.172 0.030464 *
## TypePhoto 1.347e-01 1.132e-01 1.190 0.234775
## TypeStatus -4.941e-02 1.461e-01 -0.338 0.735355
## TypeVideo -4.469e-03 2.066e-01 -0.022 0.982757
## CategoryInspiration -2.720e-01 5.289e-02 -5.142 4.35e-07 ***
## CategoryProduct -2.206e-01 5.961e-02 -3.701 0.000246 ***
## PaidYes 3.236e-02 4.343e-02 0.745 0.456595
## Impressions_in_100_Log -1.028e-01 9.080e-02 -1.132 0.258200
## ImpressionsRate_Liked_Log 1.764e-01 8.813e-02 2.001 0.046052 *
## ReachRate_Log -1.577e-01 8.404e-02 -1.876 0.061392 .
## ReachRate_Liked_Log -7.975e-02 5.253e-02 -1.518 0.129804
## EngagedRate_Log 9.247e-01 1.205e-01 7.672 1.40e-13 ***
## EngagedRate_Liked_Log 2.281e-01 1.316e-01 1.734 0.083741 .
## ConsumerRate_Log -1.686e-01 2.816e-02 -5.986 4.93e-09 ***
## Interactions -6.189e-05 5.780e-05 -1.071 0.284895
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3785 on 385 degrees of freedom
## Multiple R-squared: 0.8566, Adjusted R-squared: 0.8514
## F-statistic: 164.2 on 14 and 385 DF, p-value: < 2.2e-16
qqPlot(Model1,id.n = 10)

## 436 428 437 89 97 424 425 287 269 279
## 391 392 393 394 395 396 397 398 399 400
residualPlot(Model1)

residualPlots(Model1)


## Test stat Pr(>|t|)
## Type NA NA
## Category NA NA
## Paid NA NA
## Impressions_in_100_Log -2.775 0.006
## ImpressionsRate_Liked_Log -2.881 0.004
## ReachRate_Log -1.135 0.257
## ReachRate_Liked_Log -2.089 0.037
## EngagedRate_Log -2.571 0.011
## EngagedRate_Liked_Log -2.686 0.008
## ConsumerRate_Log -2.276 0.023
## Interactions 0.225 0.822
## Tukey test -2.084 0.037
influenceIndexPlot(Model1,id.n=5)

Impressions_in_100_Log_Sq <- facebook_train$Impressions_in_100_Log^2
ImpressionsRate_Liked_Log_Sq <- facebook_train$ImpressionsRate_Liked_Log^2
ReachRate_Log_Sq <- facebook_train$ReachRate_Log^2
ReachRate_Liked_Log_Sq <- facebook_train$ReachRate_Liked_Log^2
EngagedRate_Log_Sq <- facebook_train$EngagedRate_Log^2
EngagedRate_Liked_Log_Sq <- facebook_train$EngagedRate_Liked_Log^2
ConsumerRate_Log_Sq <- facebook_train$ConsumerRate_Log^2
facebook_train <- cbind(facebook_train,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log_Sq,ReachRate_Log_Sq,ReachRate_Liked_Log_Sq,EngagedRate_Log_Sq,EngagedRate_Liked_Log_Sq,ConsumerRate_Log_Sq)
rm(Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log_Sq,ReachRate_Log_Sq,ReachRate_Liked_Log_Sq,EngagedRate_Log_Sq,EngagedRate_Liked_Log_Sq,ConsumerRate_Log_Sq)
Model2 <- lm(consumptionRate_Log~
+Type+Category+Paid
+Impressions_in_100_Log + Impressions_in_100_Log_Sq
+ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq
+ReachRate_Log + ReachRate_Log_Sq
+ReachRate_Liked_Log + ReachRate_Liked_Log_Sq
+EngagedRate_Log + EngagedRate_Log_Sq
+EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq
+ConsumerRate_Log + ConsumerRate_Log_Sq
+Interactions
,data = facebook_train)
summary(Model2)
##
## Call:
## lm(formula = consumptionRate_Log ~ +Type + Category + Paid +
## Impressions_in_100_Log + Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log +
## ImpressionsRate_Liked_Log_Sq + ReachRate_Log + ReachRate_Log_Sq +
## ReachRate_Liked_Log + ReachRate_Liked_Log_Sq + EngagedRate_Log +
## EngagedRate_Log_Sq + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq +
## ConsumerRate_Log + ConsumerRate_Log_Sq + Interactions, data = facebook_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53883 -0.16885 -0.05037 0.06896 2.38652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.872e+00 1.421e+00 2.021 0.0440 *
## TypePhoto 1.867e-01 1.205e-01 1.549 0.1222
## TypeStatus -1.248e-02 1.554e-01 -0.080 0.9360
## TypeVideo 6.821e-02 2.138e-01 0.319 0.7499
## CategoryInspiration -3.033e-01 5.418e-02 -5.598 4.17e-08 ***
## CategoryProduct -2.458e-01 6.002e-02 -4.096 5.14e-05 ***
## PaidYes 3.225e-02 4.310e-02 0.748 0.4547
## Impressions_in_100_Log -1.828e-01 2.994e-01 -0.611 0.5419
## Impressions_in_100_Log_Sq -6.194e-03 2.638e-02 -0.235 0.8145
## ImpressionsRate_Liked_Log 3.843e-01 1.834e-01 2.095 0.0368 *
## ImpressionsRate_Liked_Log_Sq -2.710e-02 2.853e-02 -0.950 0.3428
## ReachRate_Log -7.292e-01 4.896e-01 -1.489 0.1372
## ReachRate_Log_Sq 3.894e-02 5.038e-02 0.773 0.4401
## ReachRate_Liked_Log 1.560e-01 1.777e-01 0.878 0.3807
## ReachRate_Liked_Log_Sq -5.620e-02 4.075e-02 -1.379 0.1687
## EngagedRate_Log 7.530e-01 3.005e-01 2.506 0.0126 *
## EngagedRate_Log_Sq -6.517e-03 6.038e-02 -0.108 0.9141
## EngagedRate_Liked_Log 5.622e-01 3.523e-01 1.596 0.1113
## EngagedRate_Liked_Log_Sq -4.372e-02 7.144e-02 -0.612 0.5409
## ConsumerRate_Log -5.397e-03 9.922e-02 -0.054 0.9567
## ConsumerRate_Log_Sq -4.534e-02 2.210e-02 -2.052 0.0409 *
## Interactions 5.948e-05 7.128e-05 0.834 0.4046
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.373 on 378 degrees of freedom
## Multiple R-squared: 0.8632, Adjusted R-squared: 0.8556
## F-statistic: 113.6 on 21 and 378 DF, p-value: < 2.2e-16
qqPlot(Model2,id.n = 10)

## 428 423 437 424 89 425 97 287 269 279
## 391 392 393 394 395 396 397 398 399 400
residualPlot(Model2)

residualPlots(Model2,id.n=10)



## Test stat Pr(>|t|)
## Type NA NA
## Category NA NA
## Paid NA NA
## Impressions_in_100_Log 0.142 0.887
## Impressions_in_100_Log_Sq -2.606 0.010
## ImpressionsRate_Liked_Log 1.253 0.211
## ImpressionsRate_Liked_Log_Sq -3.779 0.000
## ReachRate_Log -0.001 0.999
## ReachRate_Log_Sq 3.501 0.001
## ReachRate_Liked_Log -0.093 0.926
## ReachRate_Liked_Log_Sq -1.192 0.234
## EngagedRate_Log 0.387 0.699
## EngagedRate_Log_Sq -0.578 0.564
## EngagedRate_Liked_Log 0.494 0.622
## EngagedRate_Liked_Log_Sq -1.622 0.106
## ConsumerRate_Log -1.393 0.164
## ConsumerRate_Log_Sq -2.710 0.007
## Interactions -1.171 0.242
## Tukey test 2.364 0.018
# influenceIndexPlot(Model2,id.n=5)
cutoff2 <- 4/((nrow(facebook_train)-length(Model2$coefficients)-2))
plot(Model2, which=4, cook.levels=cutoff2)

# Leverage points.
lev=hat(model.matrix(Model2))
plot(lev)

colnames((t(facebook_train[lev>0.2,]))) #(t(facebook_train[lev>0.2,]))
## [1] "478" "184" "244" "483" "373" "56" "447" "305" "309" "30" "245"
## [12] "464" "72" "477" "493" "461" "416" "403" "141"
plot( p.adjust(Model2$residuals, method = "bonferroni"))

Model3 <- lm(consumptionRate_Log~
+Type+Category+Paid
+Impressions_in_100_Log + Impressions_in_100_Log_Sq
+ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq
+ReachRate_Log + ReachRate_Log_Sq
+ReachRate_Liked_Log + ReachRate_Liked_Log_Sq
+EngagedRate_Log + EngagedRate_Log_Sq
+EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq
+ConsumerRate_Log + ConsumerRate_Log_Sq
+Interactions
,data = facebook_train[-c(97,245,279,413)])
summary(Model3)
##
## Call:
## lm(formula = consumptionRate_Log ~ +Type + Category + Paid +
## Impressions_in_100_Log + Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log +
## ImpressionsRate_Liked_Log_Sq + ReachRate_Log + ReachRate_Log_Sq +
## ReachRate_Liked_Log + ReachRate_Liked_Log_Sq + EngagedRate_Log +
## EngagedRate_Log_Sq + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq +
## ConsumerRate_Log + ConsumerRate_Log_Sq + Interactions, data = facebook_train[-c(97,
## 245, 279, 413)])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53883 -0.16885 -0.05037 0.06896 2.38652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.872e+00 1.421e+00 2.021 0.0440 *
## TypePhoto 1.867e-01 1.205e-01 1.549 0.1222
## TypeStatus -1.248e-02 1.554e-01 -0.080 0.9360
## TypeVideo 6.821e-02 2.138e-01 0.319 0.7499
## CategoryInspiration -3.033e-01 5.418e-02 -5.598 4.17e-08 ***
## CategoryProduct -2.458e-01 6.002e-02 -4.096 5.14e-05 ***
## PaidYes 3.225e-02 4.310e-02 0.748 0.4547
## Impressions_in_100_Log -1.828e-01 2.994e-01 -0.611 0.5419
## Impressions_in_100_Log_Sq -6.194e-03 2.638e-02 -0.235 0.8145
## ImpressionsRate_Liked_Log 3.843e-01 1.834e-01 2.095 0.0368 *
## ImpressionsRate_Liked_Log_Sq -2.710e-02 2.853e-02 -0.950 0.3428
## ReachRate_Log -7.292e-01 4.896e-01 -1.489 0.1372
## ReachRate_Log_Sq 3.894e-02 5.038e-02 0.773 0.4401
## ReachRate_Liked_Log 1.560e-01 1.777e-01 0.878 0.3807
## ReachRate_Liked_Log_Sq -5.620e-02 4.075e-02 -1.379 0.1687
## EngagedRate_Log 7.530e-01 3.005e-01 2.506 0.0126 *
## EngagedRate_Log_Sq -6.517e-03 6.038e-02 -0.108 0.9141
## EngagedRate_Liked_Log 5.622e-01 3.523e-01 1.596 0.1113
## EngagedRate_Liked_Log_Sq -4.372e-02 7.144e-02 -0.612 0.5409
## ConsumerRate_Log -5.397e-03 9.922e-02 -0.054 0.9567
## ConsumerRate_Log_Sq -4.534e-02 2.210e-02 -2.052 0.0409 *
## Interactions 5.948e-05 7.128e-05 0.834 0.4046
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.373 on 378 degrees of freedom
## Multiple R-squared: 0.8632, Adjusted R-squared: 0.8556
## F-statistic: 113.6 on 21 and 378 DF, p-value: < 2.2e-16
qqPlot(Model3,id.n=10)

## 428 423 437 424 89 425 97 287 269 279
## 391 392 393 394 395 396 397 398 399 400
residualPlots(Model3)



## Test stat Pr(>|t|)
## Type NA NA
## Category NA NA
## Paid NA NA
## Impressions_in_100_Log 0.142 0.887
## Impressions_in_100_Log_Sq -2.606 0.010
## ImpressionsRate_Liked_Log 1.253 0.211
## ImpressionsRate_Liked_Log_Sq -3.779 0.000
## ReachRate_Log -0.001 0.999
## ReachRate_Log_Sq 3.501 0.001
## ReachRate_Liked_Log -0.093 0.926
## ReachRate_Liked_Log_Sq -1.192 0.234
## EngagedRate_Log 0.387 0.699
## EngagedRate_Log_Sq -0.578 0.564
## EngagedRate_Liked_Log 0.494 0.622
## EngagedRate_Liked_Log_Sq -1.622 0.106
## ConsumerRate_Log -1.393 0.164
## ConsumerRate_Log_Sq -2.710 0.007
## Interactions -1.171 0.242
## Tukey test 2.364 0.018
lev=hat(model.matrix(Model3))
plot(lev)

which(lev>0.2)
## [1] 5 18 40 75 96 115 124 156 178 201 228 242 259 262 291 375 387
## [18] 397 400
cutoff <- 4/((nrow(facebook_train)-1-length(Model3$coefficients)-2))
plot(Model3, which=4, cook.levels=cutoff)

####multicollinearity check
library(perturb)
library(MASS)
vif(Model2)
## GVIF Df GVIF^(1/(2*Df))
## Type 3.990055 3 1.259398
## Category 2.158717 2 1.212129
## Paid 1.093573 1 1.045740
## Impressions_in_100_Log 370.229212 1 19.241341
## Impressions_in_100_Log_Sq 321.275733 1 17.924166
## ImpressionsRate_Liked_Log 113.341715 1 10.646207
## ImpressionsRate_Liked_Log_Sq 64.245229 1 8.015312
## ReachRate_Log 75.450738 1 8.686238
## ReachRate_Log_Sq 61.610364 1 7.849227
## ReachRate_Liked_Log 19.791796 1 4.448797
## ReachRate_Liked_Log_Sq 26.424604 1 5.140487
## EngagedRate_Log 168.750720 1 12.990409
## EngagedRate_Log_Sq 105.775408 1 10.284717
## EngagedRate_Liked_Log 145.779459 1 12.073916
## EngagedRate_Liked_Log_Sq 110.316670 1 10.503174
## ConsumerRate_Log 21.993609 1 4.689734
## ConsumerRate_Log_Sq 22.760082 1 4.770753
## Interactions 2.465581 1 1.570217
colldiag(facebook_train[,c(14:27)])
## Condition
## Index Variance Decomposition Proportions
## intercept Impressions_in_100_Log ImpressionsRate_Liked_Log
## 1 1.000 0.000 0.000 0.000
## 2 3.239 0.000 0.000 0.000
## 3 5.223 0.000 0.000 0.000
## 4 9.826 0.000 0.000 0.000
## 5 12.013 0.000 0.000 0.001
## 6 15.298 0.000 0.000 0.000
## 7 23.532 0.000 0.000 0.036
## 8 38.660 0.003 0.000 0.068
## 9 44.567 0.001 0.000 0.005
## 10 54.580 0.000 0.000 0.002
## 11 71.759 0.021 0.002 0.034
## 12 119.259 0.006 0.001 0.001
## 13 216.567 0.015 0.023 0.156
## 14 319.562 0.018 0.753 0.411
## 15 460.012 0.934 0.220 0.284
## ReachRate_Log ReachRate_Liked_Log EngagedRate_Log EngagedRate_Liked_Log
## 1 0.000 0.000 0.000 0.000
## 2 0.000 0.000 0.000 0.000
## 3 0.000 0.000 0.000 0.000
## 4 0.000 0.000 0.000 0.000
## 5 0.000 0.003 0.000 0.000
## 6 0.000 0.000 0.000 0.001
## 7 0.000 0.000 0.004 0.000
## 8 0.000 0.000 0.001 0.002
## 9 0.000 0.000 0.013 0.012
## 10 0.000 0.002 0.021 0.008
## 11 0.001 0.000 0.025 0.016
## 12 0.000 0.944 0.000 0.006
## 13 0.000 0.030 0.819 0.868
## 14 0.158 0.013 0.069 0.031
## 15 0.840 0.007 0.047 0.057
## ConsumerRate_Log Impressions_in_100_Log_Sq ImpressionsRate_Liked_Log_Sq
## 1 0.000 0.000 0.000
## 2 0.000 0.000 0.002
## 3 0.003 0.000 0.002
## 4 0.001 0.000 0.013
## 5 0.000 0.000 0.001
## 6 0.000 0.005 0.017
## 7 0.001 0.000 0.066
## 8 0.002 0.007 0.011
## 9 0.148 0.006 0.033
## 10 0.719 0.000 0.003
## 11 0.033 0.039 0.003
## 12 0.022 0.002 0.005
## 13 0.012 0.021 0.004
## 14 0.049 0.672 0.291
## 15 0.009 0.247 0.547
## ReachRate_Log_Sq ReachRate_Liked_Log_Sq EngagedRate_Log_Sq
## 1 0.000 0.000 0.000
## 2 0.000 0.000 0.000
## 3 0.000 0.000 0.000
## 4 0.001 0.000 0.002
## 5 0.001 0.017 0.000
## 6 0.001 0.001 0.001
## 7 0.003 0.000 0.003
## 8 0.003 0.010 0.036
## 9 0.004 0.004 0.106
## 10 0.000 0.003 0.049
## 11 0.032 0.020 0.001
## 12 0.000 0.840 0.000
## 13 0.016 0.059 0.534
## 14 0.186 0.011 0.199
## 15 0.752 0.034 0.068
## EngagedRate_Liked_Log_Sq ConsumerRate_Log_Sq
## 1 0.000 0.000
## 2 0.000 0.000
## 3 0.000 0.015
## 4 0.001 0.015
## 5 0.000 0.001
## 6 0.005 0.000
## 7 0.011 0.000
## 8 0.043 0.002
## 9 0.000 0.252
## 10 0.007 0.579
## 11 0.111 0.040
## 12 0.006 0.016
## 13 0.624 0.029
## 14 0.103 0.034
## 15 0.088 0.016
###Best subset regression
step <- stepAIC(Model2, direction="both")
## Start: AIC=-767.47
## consumptionRate_Log ~ +Type + Category + Paid + Impressions_in_100_Log +
## Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log +
## ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Log_Sq +
## EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + ConsumerRate_Log +
## ConsumerRate_Log_Sq + Interactions
##
## Df Sum of Sq RSS AIC
## - ConsumerRate_Log 1 0.0004 52.604 -769.47
## - EngagedRate_Log_Sq 1 0.0016 52.605 -769.46
## - Impressions_in_100_Log_Sq 1 0.0077 52.611 -769.41
## - Impressions_in_100_Log 1 0.0519 52.655 -769.08
## - EngagedRate_Liked_Log_Sq 1 0.0521 52.656 -769.08
## - Paid 1 0.0779 52.682 -768.88
## - ReachRate_Log_Sq 1 0.0831 52.687 -768.84
## - Interactions 1 0.0969 52.700 -768.74
## - ReachRate_Liked_Log 1 0.1072 52.711 -768.66
## - ImpressionsRate_Liked_Log_Sq 1 0.1256 52.729 -768.52
## <none> 52.604 -767.47
## - ReachRate_Liked_Log_Sq 1 0.2647 52.868 -767.46
## - ReachRate_Log 1 0.3087 52.912 -767.13
## - EngagedRate_Liked_Log 1 0.3545 52.958 -766.79
## - Type 3 0.9948 53.598 -765.98
## - ConsumerRate_Log_Sq 1 0.5860 53.190 -765.04
## - ImpressionsRate_Liked_Log 1 0.6109 53.215 -764.85
## - EngagedRate_Log 1 0.8736 53.477 -762.88
## - Category 2 4.5861 57.190 -738.04
##
## Step: AIC=-769.47
## consumptionRate_Log ~ Type + Category + Paid + Impressions_in_100_Log +
## Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log +
## ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Log_Sq +
## EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq +
## Interactions
##
## Df Sum of Sq RSS AIC
## - EngagedRate_Log_Sq 1 0.0017 52.606 -771.46
## - Impressions_in_100_Log_Sq 1 0.0074 52.611 -771.41
## - EngagedRate_Liked_Log_Sq 1 0.0517 52.656 -771.08
## - Impressions_in_100_Log 1 0.0535 52.658 -771.06
## - Paid 1 0.0777 52.682 -770.88
## - ReachRate_Log_Sq 1 0.0827 52.687 -770.84
## - Interactions 1 0.1013 52.705 -770.70
## - ReachRate_Liked_Log 1 0.1085 52.713 -770.64
## - ImpressionsRate_Liked_Log_Sq 1 0.1268 52.731 -770.51
## <none> 52.604 -769.47
## - ReachRate_Liked_Log_Sq 1 0.2667 52.871 -769.45
## - ReachRate_Log 1 0.3083 52.912 -769.13
## - EngagedRate_Liked_Log 1 0.3566 52.961 -768.77
## - Type 3 1.0047 53.609 -767.90
## + ConsumerRate_Log 1 0.0004 52.604 -767.47
## - ImpressionsRate_Liked_Log 1 0.6124 53.216 -766.84
## - EngagedRate_Log 1 0.8758 53.480 -764.86
## - Category 2 4.7041 57.308 -739.21
## - ConsumerRate_Log_Sq 1 5.4454 58.049 -732.07
##
## Step: AIC=-771.46
## consumptionRate_Log ~ Type + Category + Paid + Impressions_in_100_Log +
## Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log +
## ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Liked_Log +
## EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq + Interactions
##
## Df Sum of Sq RSS AIC
## - Impressions_in_100_Log_Sq 1 0.0178 52.623 -773.32
## - Impressions_in_100_Log 1 0.0553 52.661 -773.04
## - Paid 1 0.0766 52.682 -772.87
## - ReachRate_Log_Sq 1 0.0826 52.688 -772.83
## - ReachRate_Liked_Log 1 0.1134 52.719 -772.60
## - Interactions 1 0.1165 52.722 -772.57
## - ImpressionsRate_Liked_Log_Sq 1 0.1378 52.743 -772.41
## - EngagedRate_Liked_Log_Sq 1 0.2161 52.822 -771.82
## <none> 52.606 -771.46
## - ReachRate_Liked_Log_Sq 1 0.2799 52.886 -771.33
## - ReachRate_Log 1 0.3117 52.917 -771.09
## - Type 3 1.0049 53.611 -769.89
## + EngagedRate_Log_Sq 1 0.0017 52.604 -769.47
## + ConsumerRate_Log 1 0.0004 52.605 -769.46
## - ImpressionsRate_Liked_Log 1 0.6162 53.222 -768.80
## - EngagedRate_Liked_Log 1 1.0095 53.615 -765.85
## - EngagedRate_Log 1 4.0029 56.609 -744.12
## - Category 2 4.7333 57.339 -740.99
## - ConsumerRate_Log_Sq 1 5.5724 58.178 -733.18
##
## Step: AIC=-773.32
## consumptionRate_Log ~ Type + Category + Paid + Impressions_in_100_Log +
## ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log +
## ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Liked_Log +
## EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq + Interactions
##
## Df Sum of Sq RSS AIC
## - Paid 1 0.0755 52.699 -774.75
## - ReachRate_Log_Sq 1 0.0868 52.710 -774.66
## - Interactions 1 0.1021 52.726 -774.55
## - ReachRate_Liked_Log 1 0.1182 52.742 -774.42
## - EngagedRate_Liked_Log_Sq 1 0.2289 52.852 -773.58
## <none> 52.623 -773.32
## - ReachRate_Liked_Log_Sq 1 0.2970 52.920 -773.07
## - ReachRate_Log 1 0.3249 52.948 -772.86
## - Type 3 0.9877 53.611 -771.88
## - ImpressionsRate_Liked_Log_Sq 1 0.4909 53.114 -771.61
## + Impressions_in_100_Log_Sq 1 0.0178 52.606 -771.46
## + EngagedRate_Log_Sq 1 0.0121 52.611 -771.41
## + ConsumerRate_Log 1 0.0000 52.623 -771.32
## - Impressions_in_100_Log 1 0.8611 53.485 -768.83
## - EngagedRate_Liked_Log 1 1.0387 53.662 -767.50
## - ImpressionsRate_Liked_Log 1 1.3309 53.954 -765.33
## - EngagedRate_Log 1 4.0131 56.637 -745.92
## - Category 2 4.7471 57.371 -742.77
## - ConsumerRate_Log_Sq 1 6.2918 58.915 -730.15
##
## Step: AIC=-774.75
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log +
## ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log +
## ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Liked_Log +
## EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq + Interactions
##
## Df Sum of Sq RSS AIC
## - ReachRate_Log_Sq 1 0.0822 52.781 -776.12
## - Interactions 1 0.1083 52.807 -775.93
## - ReachRate_Liked_Log 1 0.1139 52.813 -775.88
## - EngagedRate_Liked_Log_Sq 1 0.2156 52.915 -775.11
## <none> 52.699 -774.75
## - ReachRate_Liked_Log_Sq 1 0.2871 52.986 -774.57
## - ReachRate_Log 1 0.3140 53.013 -774.37
## + Paid 1 0.0755 52.623 -773.32
## - Type 3 1.0198 53.719 -773.08
## - ImpressionsRate_Liked_Log_Sq 1 0.5057 53.205 -772.93
## + Impressions_in_100_Log_Sq 1 0.0167 52.682 -772.87
## + EngagedRate_Log_Sq 1 0.0086 52.690 -772.81
## + ConsumerRate_Log 1 0.0000 52.699 -772.75
## - Impressions_in_100_Log 1 0.8314 53.530 -770.49
## - EngagedRate_Liked_Log 1 1.0079 53.707 -769.17
## - ImpressionsRate_Liked_Log 1 1.3275 54.026 -766.80
## - EngagedRate_Log 1 4.0420 56.741 -747.19
## - Category 2 4.7198 57.419 -744.44
## - ConsumerRate_Log_Sq 1 6.3285 59.027 -731.38
##
## Step: AIC=-776.12
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log +
## ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Liked_Log + ReachRate_Liked_Log_Sq +
## EngagedRate_Log + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq +
## ConsumerRate_Log_Sq + Interactions
##
## Df Sum of Sq RSS AIC
## - Interactions 1 0.0653 52.846 -777.63
## - ReachRate_Liked_Log 1 0.0954 52.877 -777.40
## - ReachRate_Liked_Log_Sq 1 0.2449 53.026 -776.27
## <none> 52.781 -776.12
## - EngagedRate_Liked_Log_Sq 1 0.2724 53.053 -776.07
## + ReachRate_Log_Sq 1 0.0822 52.699 -774.75
## - Type 3 0.9885 53.770 -774.70
## + Paid 1 0.0708 52.710 -774.66
## - ImpressionsRate_Liked_Log_Sq 1 0.4676 53.249 -774.60
## + Impressions_in_100_Log_Sq 1 0.0207 52.760 -774.28
## + EngagedRate_Log_Sq 1 0.0101 52.771 -774.20
## + ConsumerRate_Log 1 0.0004 52.781 -774.13
## - Impressions_in_100_Log 1 0.9488 53.730 -771.00
## - EngagedRate_Liked_Log 1 1.2261 54.007 -768.94
## - ImpressionsRate_Liked_Log 1 1.2658 54.047 -768.65
## - ReachRate_Log 1 1.5960 54.377 -766.21
## - EngagedRate_Log 1 3.9610 56.742 -749.18
## - Category 2 4.6569 57.438 -746.30
## - ConsumerRate_Log_Sq 1 6.3169 59.098 -732.91
##
## Step: AIC=-777.63
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log +
## ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Liked_Log + ReachRate_Liked_Log_Sq +
## EngagedRate_Log + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq +
## ConsumerRate_Log_Sq
##
## Df Sum of Sq RSS AIC
## - ReachRate_Liked_Log 1 0.1186 52.965 -778.73
## - EngagedRate_Liked_Log_Sq 1 0.2474 53.094 -777.76
## <none> 52.846 -777.63
## - ReachRate_Liked_Log_Sq 1 0.2783 53.125 -777.53
## - ImpressionsRate_Liked_Log_Sq 1 0.4069 53.253 -776.56
## + Paid 1 0.0769 52.769 -776.21
## - Type 3 0.9998 53.846 -776.13
## + Interactions 1 0.0653 52.781 -776.12
## + ReachRate_Log_Sq 1 0.0391 52.807 -775.93
## + EngagedRate_Log_Sq 1 0.0153 52.831 -775.75
## + Impressions_in_100_Log_Sq 1 0.0057 52.841 -775.67
## + ConsumerRate_Log 1 0.0014 52.845 -775.64
## - Impressions_in_100_Log 1 0.8843 53.731 -772.99
## - EngagedRate_Liked_Log 1 1.1620 54.008 -770.93
## - ImpressionsRate_Liked_Log 1 1.2008 54.047 -770.64
## - ReachRate_Log 1 1.5336 54.380 -768.19
## - Category 2 4.6457 57.492 -747.93
## - EngagedRate_Log 1 4.5315 57.378 -746.72
## - ConsumerRate_Log_Sq 1 7.0408 59.887 -729.60
##
## Step: AIC=-778.73
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log +
## ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq +
## ReachRate_Log + ReachRate_Liked_Log_Sq + EngagedRate_Log +
## EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq
##
## Df Sum of Sq RSS AIC
## <none> 52.965 -778.73
## - EngagedRate_Liked_Log_Sq 1 0.2853 53.250 -778.59
## + ReachRate_Liked_Log 1 0.1186 52.846 -777.63
## - Type 3 0.9531 53.918 -777.60
## - ReachRate_Liked_Log_Sq 1 0.4418 53.407 -777.41
## + Interactions 1 0.0884 52.877 -777.40
## + Paid 1 0.0741 52.891 -777.29
## - ImpressionsRate_Liked_Log_Sq 1 0.4741 53.439 -777.17
## + EngagedRate_Log_Sq 1 0.0289 52.936 -776.95
## + ReachRate_Log_Sq 1 0.0219 52.943 -776.90
## + Impressions_in_100_Log_Sq 1 0.0064 52.959 -776.78
## + ConsumerRate_Log 1 0.0049 52.960 -776.77
## - Impressions_in_100_Log 1 0.8376 53.802 -774.46
## - ImpressionsRate_Liked_Log 1 1.1640 54.129 -772.04
## - EngagedRate_Liked_Log 1 1.1685 54.133 -772.01
## - ReachRate_Log 1 1.4161 54.381 -770.18
## - Category 2 4.5451 57.510 -749.80
## - EngagedRate_Log 1 4.8413 57.806 -745.75
## - ConsumerRate_Log_Sq 1 7.1453 60.110 -730.11
Model4 <- lm(consumptionRate_Log~
Type+Category
+Impressions_in_100_Log_Sq+ImpressionsRate_Liked_Log
+ReachRate_Log+ReachRate_Liked_Log
+EngagedRate_Log+EngagedRate_Log_Sq+EngagedRate_Liked_Log
+ConsumerRate_Log_Sq
,data = facebook_train
)
summary(Model4)
##
## Call:
## lm(formula = consumptionRate_Log ~ Type + Category + Impressions_in_100_Log_Sq +
## ImpressionsRate_Liked_Log + ReachRate_Log + ReachRate_Liked_Log +
## EngagedRate_Log + EngagedRate_Log_Sq + EngagedRate_Liked_Log +
## ConsumerRate_Log_Sq, data = facebook_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55261 -0.17149 -0.04124 0.06534 2.39001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.275026 0.507344 2.513 0.0124 *
## TypePhoto 0.153497 0.114133 1.345 0.1794
## TypeStatus -0.062552 0.143319 -0.436 0.6628
## TypeVideo 0.002955 0.204198 0.014 0.9885
## CategoryInspiration -0.276135 0.051430 -5.369 1.37e-07 ***
## CategoryProduct -0.226375 0.057509 -3.936 9.81e-05 ***
## Impressions_in_100_Log_Sq -0.014532 0.008008 -1.815 0.0703 .
## ImpressionsRate_Liked_Log 0.189188 0.084512 2.239 0.0258 *
## ReachRate_Log -0.226380 0.089807 -2.521 0.0121 *
## ReachRate_Liked_Log -0.062315 0.051241 -1.216 0.2247
## EngagedRate_Log 0.994298 0.180939 5.495 7.10e-08 ***
## EngagedRate_Log_Sq -0.048218 0.029570 -1.631 0.1038
## EngagedRate_Liked_Log 0.307729 0.131581 2.339 0.0199 *
## ConsumerRate_Log_Sq -0.048211 0.006459 -7.464 5.61e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3717 on 386 degrees of freedom
## Multiple R-squared: 0.8613, Adjusted R-squared: 0.8566
## F-statistic: 184.4 on 13 and 386 DF, p-value: < 2.2e-16
qqPlot(Model4)

residualPlots(Model4)


## Test stat Pr(>|t|)
## Type NA NA
## Category NA NA
## Impressions_in_100_Log_Sq -1.068 0.286
## ImpressionsRate_Liked_Log -0.994 0.321
## ReachRate_Log -0.438 0.661
## ReachRate_Liked_Log -1.427 0.154
## EngagedRate_Log 0.305 0.761
## EngagedRate_Log_Sq -0.994 0.321
## EngagedRate_Liked_Log -0.960 0.338
## ConsumerRate_Log_Sq -0.984 0.326
## Tukey test 1.994 0.046
Impressions_in_100_Log <- log(facebook_Validation$Impressions_in_100)
Impressions_in_100_Log_Sq <- Impressions_in_100_Log^2
ImpressionsRate_Liked_Log <- log(facebook_Validation$ImpressionsRate_Liked)
ReachRate_Log <- log(facebook_Validation$ReachRate)
ReachRate_Liked_Log <- log(max(facebook_Validation$ReachRate_Liked)+1-facebook_Validation$ReachRate_Liked)
EngagedRate_Log <- log(facebook_Validation$EngagedRate)
EngagedRate_Log_Sq <- EngagedRate_Log^2
EngagedRate_Liked_Log <- log(facebook_Validation$EngagedRate_Liked)
ConsumerRate_Log <- log(max(facebook_Validation$ConsumerRate)+1-facebook_Validation$ConsumerRate)
ConsumerRate_Log_Sq <- ConsumerRate_Log^2
facebook_test <- cbind(facebook_Validation,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log_Sq)
facebook_test <- facebook_test[,c(2,3,13:20)]
facebook_test <- as.data.frame(facebook_test)
rm(Impressions_in_100_Log,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log,ConsumerRate_Log_Sq)
consumptionRate_Log_Val <- log(facebook_Validation$consumptionRate)
y_hat<-predict.lm (Model4,newdata= facebook_test,se.fit=TRUE)$fit
y_hat<-as.vector (y_hat)
dev<- consumptionRate_Log_Val-(y_hat)
num<-sum(dev^2)
dev1<-consumptionRate_Log_Val-mean(consumptionRate_Log_Val)
den<-sum(dev1^2)
Predicted.Rsq<-1-(num/den)
Predicted.Rsq
## [1] 0.9101413
Impressions_in_100_Log <- log(facebook_page$Impressions_in_100)
Impressions_in_100_Log_Sq <- Impressions_in_100_Log^2
ImpressionsRate_Liked_Log <- log(facebook_page$ImpressionsRate_Liked)
ReachRate_Log <- log(facebook_page$ReachRate)
ReachRate_Liked_Log <- log(max(facebook_page$ReachRate_Liked)+1-facebook_page$ReachRate_Liked)
EngagedRate_Log <- log(facebook_page$EngagedRate)
EngagedRate_Log_Sq <- EngagedRate_Log^2
EngagedRate_Liked_Log <- log(facebook_page$EngagedRate_Liked)
ConsumerRate_Log <- log(max(facebook_page$ConsumerRate)+1-facebook_page$ConsumerRate)
ConsumerRate_Log_Sq <- ConsumerRate_Log^2
facebook_final <- cbind(facebook_page,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log_Sq)
facebook_final <- facebook_final[,c(2,3,13:20)]
facebook_final <- as.data.frame(facebook_final)
rm(Impressions_in_100_Log,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log,ConsumerRate_Log_Sq)
consumptionRate_Actual <- log(facebook_page$consumptionRate)
y_hat<-predict.lm ( Model4, newdata= facebook_final, se.fit=TRUE)$fit
y_hat<-as.vector (y_hat)
dev<-consumptionRate_Actual - (y_hat)
num<-sum(dev^2)
dev1<-consumptionRate_Actual-mean(consumptionRate_Actual)
den<-sum(dev1^2)
Predicted.Rsq<-1-(num/den)
Predicted.Rsq
## [1] 0.8710698
Final_Model <- lm(consumptionRate_Actual~., data = facebook_final)
summary(Final_Model)
##
## Call:
## lm(formula = consumptionRate_Actual ~ ., data = facebook_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55431 -0.16758 -0.03544 0.06822 2.42801
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.318364 0.430055 3.066 0.00229 **
## TypePhoto 0.156413 0.089924 1.739 0.08260 .
## TypeStatus -0.061118 0.115563 -0.529 0.59714
## TypeVideo 0.011553 0.162050 0.071 0.94319
## CategoryInspiration -0.231600 0.043137 -5.369 1.23e-07 ***
## CategoryProduct -0.192807 0.048654 -3.963 8.52e-05 ***
## Impressions_in_100_Log_Sq -0.016981 0.006653 -2.552 0.01101 *
## ImpressionsRate_Liked_Log 0.201324 0.071244 2.826 0.00491 **
## ReachRate_Log -0.247120 0.078402 -3.152 0.00172 **
## ReachRate_Liked_Log -0.027519 0.043329 -0.635 0.52566
## EngagedRate_Log 0.982067 0.145539 6.748 4.28e-11 ***
## EngagedRate_Log_Sq -0.065394 0.024091 -2.714 0.00688 **
## EngagedRate_Liked_Log 0.348820 0.110056 3.169 0.00162 **
## ConsumerRate_Log_Sq -0.052692 0.005561 -9.476 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3542 on 486 degrees of freedom
## Multiple R-squared: 0.872, Adjusted R-squared: 0.8686
## F-statistic: 254.8 on 13 and 486 DF, p-value: < 2.2e-16
residualPlots(Final_Model)


## Test stat Pr(>|t|)
## Type NA NA
## Category NA NA
## Impressions_in_100_Log_Sq -0.821 0.412
## ImpressionsRate_Liked_Log -0.884 0.377
## ReachRate_Log -0.438 0.661
## ReachRate_Liked_Log -1.716 0.087
## EngagedRate_Log -0.653 0.514
## EngagedRate_Log_Sq 0.084 0.933
## EngagedRate_Liked_Log -0.940 0.348
## ConsumerRate_Log_Sq -0.449 0.654
## Tukey test 1.739 0.082
qqPlot(Final_Model, main="QQ Plot of residuals: Fmodel")

stu.resid1 <- studres(Final_Model)
hist(stu.resid1, freq=FALSE,
main="Distribution of Studentized Residuals")
xfit1<-seq(min(stu.resid1),max(stu.resid1),length=40)
yfit1<-dnorm(xfit1)
lines(xfit1, yfit1)

vif(Final_Model)
## GVIF Df GVIF^(1/(2*Df))
## Type 2.488291 3 1.164082
## Category 1.789611 2 1.156617
## Impressions_in_100_Log_Sq 27.282371 1 5.223253
## ImpressionsRate_Liked_Log 22.899746 1 4.785368
## ReachRate_Log 2.217130 1 1.489003
## ReachRate_Liked_Log 1.615168 1 1.270893
## EngagedRate_Log 56.301009 1 7.503400
## EngagedRate_Log_Sq 23.177996 1 4.814353
## EngagedRate_Liked_Log 20.539815 1 4.532087
## ConsumerRate_Log_Sq 1.980869 1 1.407434
colldiag(facebook_final[,c(3:10)])
## Condition
## Index Variance Decomposition Proportions
## intercept Impressions_in_100_Log_Sq ImpressionsRate_Liked_Log
## 1 1.000 0.000 0.000 0.000
## 2 3.353 0.000 0.003 0.004
## 3 5.241 0.000 0.003 0.008
## 4 11.708 0.004 0.003 0.026
## 5 16.661 0.001 0.118 0.134
## 6 18.957 0.001 0.028 0.015
## 7 32.914 0.004 0.064 0.041
## 8 61.539 0.140 0.191 0.256
## 9 104.539 0.850 0.591 0.515
## ReachRate_Log ReachRate_Liked_Log EngagedRate_Log EngagedRate_Log_Sq
## 1 0.000 0.000 0.000 0.000
## 2 0.000 0.000 0.000 0.004
## 3 0.000 0.000 0.000 0.001
## 4 0.012 0.040 0.003 0.026
## 5 0.011 0.039 0.003 0.000
## 6 0.017 0.461 0.000 0.006
## 7 0.011 0.031 0.041 0.566
## 8 0.113 0.116 0.429 0.229
## 9 0.836 0.312 0.524 0.167
## EngagedRate_Liked_Log ConsumerRate_Log_Sq
## 1 0.000 0.002
## 2 0.000 0.022
## 3 0.000 0.481
## 4 0.000 0.246
## 5 0.015 0.012
## 6 0.021 0.008
## 7 0.188 0.155
## 8 0.363 0.058
## 9 0.413 0.016