rm(list=ls())
library(RCurl)
facebook_page <- read.table(text = getURL("https://raw.githubusercontent.com/Rajiv2806/SA-2-Mini-Project-1-Facebook-Page-Sales/master/Facebook.csv"),header = T,sep = ",")
names(facebook_page) <- c("Page_Likes","Type","Category","Month","Weekday","Hour","Paid"
                          ,"Reach","Impressions","Engaged","Consumers","Consumptions"
                          ,"Impressions_Liked","Reach_Liked","Engaged_Liked"
                          ,"Comment","Like","Share","Interactions")
facebook_page <- facebook_page[,c(19,2,3,7,4,5,6,8,14,9,13,10,15,11,12,16,17,18,1)]
facebook_page$Impressions_in_100 <- facebook_page$Impressions/100
facebook_page$ImpressionsRate_Liked <- (facebook_page$Impressions_Liked/facebook_page$Page_Likes)*100
facebook_page$ReachRate <- (facebook_page$Reach/facebook_page$Impressions)*100
facebook_page$ReachRate_Liked <- (facebook_page$Reach_Liked/facebook_page$Impressions_Liked)*100
facebook_page$EngagedRate <- (facebook_page$Engaged/facebook_page$Reach)*100
facebook_page$EngagedRate_Liked <- (facebook_page$Engaged_Liked/facebook_page$Reach_Liked)*100
facebook_page$ConsumerRate <- (facebook_page$Consumers/facebook_page$Engaged)*100
facebook_page$consumptionRate <- (facebook_page$Consumptions/facebook_page$Reach) * 100
facebook_page <- facebook_page[,c(27,2,3,4,20,21,22,23,24,25,26,1)]
facebook_page$Paid[facebook_page$Paid == 1] = "Yes"
facebook_page$Paid[facebook_page$Paid == 0] = "No"
facebook_page$Paid <- as.factor(facebook_page$Paid)

facebook_page$Category[facebook_page$Category == 1] = "Action"
facebook_page$Category[facebook_page$Category == 2] = "Product"
facebook_page$Category[facebook_page$Category == 3] = "Inspiration"
facebook_page$Category <- as.factor(facebook_page$Category)
class(facebook_page)
## [1] "data.frame"
dim(facebook_page)
## [1] 500  12
sapply(facebook_page,class)
##       consumptionRate                  Type              Category 
##             "numeric"              "factor"              "factor" 
##                  Paid    Impressions_in_100 ImpressionsRate_Liked 
##              "factor"             "numeric"             "numeric" 
##             ReachRate       ReachRate_Liked           EngagedRate 
##             "numeric"             "numeric"             "numeric" 
##     EngagedRate_Liked          ConsumerRate          Interactions 
##             "numeric"             "numeric"             "integer"
sum(is.na(facebook_page))
## [1] 1
colSums(is.na(facebook_page))
##       consumptionRate                  Type              Category 
##                     0                     0                     0 
##                  Paid    Impressions_in_100 ImpressionsRate_Liked 
##                     1                     0                     0 
##             ReachRate       ReachRate_Liked           EngagedRate 
##                     0                     0                     0 
##     EngagedRate_Liked          ConsumerRate          Interactions 
##                     0                     0                     0
table(facebook_page$Paid)
## 
##  No Yes 
## 360 139
facebook_page$Paid[is.na(facebook_page$Paid)] = "No"
sum(is.na(facebook_page))
## [1] 0
summary(facebook_page)
##  consumptionRate        Type            Category    Paid    
##  Min.   :  0.4878   Link  : 22   Action     :215   No :361  
##  1st Qu.:  6.7254   Photo :426   Inspiration:155   Yes:139  
##  Median : 16.1678   Status: 45   Product    :130            
##  Mean   : 20.9524   Video :  7                              
##  3rd Qu.: 21.6428                                           
##  Max.   :350.4202                                           
##  Impressions_in_100 ImpressionsRate_Liked   ReachRate     
##  Min.   :    5.70   Min.   :   0.5553     Min.   :  4.47  
##  1st Qu.:   56.95   1st Qu.:   3.1164     1st Qu.: 53.32  
##  Median :   90.51   Median :   5.6189     Median : 56.78  
##  Mean   :  295.86   Mean   :  14.5853     Mean   : 60.46  
##  3rd Qu.:  220.85   3rd Qu.:  12.0230     3rd Qu.: 60.13  
##  Max.   :11102.82   Max.   :1064.5075     Max.   :790.63  
##  ReachRate_Liked   EngagedRate      EngagedRate_Liked  ConsumerRate   
##  Min.   : 4.366   Min.   : 0.4878   Min.   : 0.8929   Min.   : 35.73  
##  1st Qu.:51.452   1st Qu.: 6.4101   1st Qu.: 7.6525   1st Qu.: 81.46  
##  Median :55.097   Median :12.1717   Median :12.3771   Median : 90.48  
##  Mean   :53.984   Mean   :12.4339   Mean   :12.7234   Mean   : 86.46  
##  3rd Qu.:58.012   3rd Qu.:16.2629   3rd Qu.:16.1054   3rd Qu.: 95.33  
##  Max.   :73.220   Max.   :60.0840   Max.   :49.8580   Max.   :100.00  
##   Interactions   
##  Min.   :   0.0  
##  1st Qu.:  71.0  
##  Median : 123.5  
##  Mean   : 212.1  
##  3rd Qu.: 228.5  
##  Max.   :6334.0
par(mfrow=c(3,2))
boxplot(facebook_page$Impressions_in_100,main = "Total Impressions in 100's")
boxplot(facebook_page$ImpressionsRate_Liked,main = "Impression Rate Who Liked or Page")
boxplot(facebook_page$ReachRate,main = "Reach Rate")
boxplot(facebook_page$ReachRate_Liked,main = "Reach Rate Who Liked or Page")
boxplot(facebook_page$EngagedRate,main = "Engaged User Rate")
boxplot(facebook_page$EngagedRate_Liked,main = "Engaged Users who Liked Our Page")

par(mfrow=c(1,3))
boxplot(facebook_page$Interactions,main = "Total Interactions")
boxplot(facebook_page$ConsumerRate, main = "Consumer Rate")
boxplot(facebook_page$consumptionRate,main = "Consumption Rate")

library(PerformanceAnalytics)
chart.Correlation(facebook_page[,sapply(facebook_page,is.numeric)])

rownumbers <- sample(1:nrow(facebook_page),size = 0.8*nrow(facebook_page))
facebook_train <- facebook_page[rownumbers,]
facebook_Validation <- facebook_page[-rownumbers,]
Model0 <- lm(consumptionRate~
                 +Type + Category + Paid 
                 +Impressions_in_100 + ImpressionsRate_Liked 
                 +ReachRate + ReachRate_Liked
                 +EngagedRate + EngagedRate_Liked
                 +ConsumerRate + Interactions
                 ,data = facebook_train)
summary(Model0)
## 
## Call:
## lm(formula = consumptionRate ~ +Type + Category + Paid + Impressions_in_100 + 
##     ImpressionsRate_Liked + ReachRate + ReachRate_Liked + EngagedRate + 
##     EngagedRate_Liked + ConsumerRate + Interactions, data = facebook_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.030  -9.141  -2.539   2.832 294.351 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -7.301489  20.608221  -0.354 0.723308    
## TypePhoto               5.158267   8.957215   0.576 0.565034    
## TypeStatus              6.653437  11.243788   0.592 0.554370    
## TypeVideo              -3.371814  16.866266  -0.200 0.841653    
## CategoryInspiration   -18.797940   4.278914  -4.393 1.45e-05 ***
## CategoryProduct       -18.180765   4.759086  -3.820 0.000155 ***
## PaidYes                 2.668188   3.645722   0.732 0.464694    
## Impressions_in_100      0.001076   0.004314   0.250 0.803089    
## ImpressionsRate_Liked  -0.009812   0.048962  -0.200 0.841279    
## ReachRate              -0.007536   0.030860  -0.244 0.807200    
## ReachRate_Liked         0.138020   0.247637   0.557 0.577612    
## EngagedRate             3.277702   0.541239   6.056 3.32e-09 ***
## EngagedRate_Liked      -1.062825   0.574488  -1.850 0.065074 .  
## ConsumerRate           -0.011237   0.188436  -0.060 0.952478    
## Interactions            0.002520   0.004926   0.512 0.609258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32 on 385 degrees of freedom
## Multiple R-squared:  0.3152, Adjusted R-squared:  0.2903 
## F-statistic: 12.66 on 14 and 385 DF,  p-value: < 2.2e-16
library(car)
qqPlot(Model0)

residualPlots(Model0)

##                       Test stat Pr(>|t|)
## Type                         NA       NA
## Category                     NA       NA
## Paid                         NA       NA
## Impressions_in_100        0.199    0.843
## ImpressionsRate_Liked     0.196    0.845
## ReachRate                 1.568    0.118
## ReachRate_Liked           0.106    0.916
## EngagedRate               2.383    0.018
## EngagedRate_Liked        -1.548    0.123
## ConsumerRate              0.281    0.779
## Interactions             -0.675    0.500
## Tukey test                4.277    0.000
consumptionRate_Log <- log(facebook_train$consumptionRate)
Impressions_in_100_Log <- log(facebook_train$Impressions_in_100)
ImpressionsRate_Liked_Log <- log(facebook_train$ImpressionsRate_Liked)
ReachRate_Log <- log(facebook_train$ReachRate)
ReachRate_Liked_Log <- log(max(facebook_train$ReachRate_Liked)+1-facebook_train$ReachRate_Liked)
EngagedRate_Log <- log(facebook_train$EngagedRate)
EngagedRate_Liked_Log <- log(facebook_train$EngagedRate_Liked)
ConsumerRate_Log <- log(max(facebook_train$ConsumerRate)+1- facebook_train$ConsumerRate)
facebook_train <- cbind(facebook_train,consumptionRate_Log,Impressions_in_100_Log,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Liked_Log,ConsumerRate_Log)
rm(consumptionRate_Log,Impressions_in_100_Log,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Liked_Log,ConsumerRate_Log)
Model1 <- lm(consumptionRate_Log~
                 +Type+Category+Paid 
                 +Impressions_in_100_Log + ImpressionsRate_Liked_Log 
                 +ReachRate_Log + ReachRate_Liked_Log
                 +EngagedRate_Log + EngagedRate_Liked_Log
                 +ConsumerRate_Log
                 +Interactions
                 ,data = facebook_train)

summary(Model1)
## 
## Call:
## lm(formula = consumptionRate_Log ~ +Type + Category + Paid + 
##     Impressions_in_100_Log + ImpressionsRate_Liked_Log + ReachRate_Log + 
##     ReachRate_Liked_Log + EngagedRate_Log + EngagedRate_Liked_Log + 
##     ConsumerRate_Log + Interactions, data = facebook_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70985 -0.15695 -0.04284  0.06795  2.41531 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.388e+00  6.392e-01   2.172 0.030464 *  
## TypePhoto                  1.347e-01  1.132e-01   1.190 0.234775    
## TypeStatus                -4.941e-02  1.461e-01  -0.338 0.735355    
## TypeVideo                 -4.469e-03  2.066e-01  -0.022 0.982757    
## CategoryInspiration       -2.720e-01  5.289e-02  -5.142 4.35e-07 ***
## CategoryProduct           -2.206e-01  5.961e-02  -3.701 0.000246 ***
## PaidYes                    3.236e-02  4.343e-02   0.745 0.456595    
## Impressions_in_100_Log    -1.028e-01  9.080e-02  -1.132 0.258200    
## ImpressionsRate_Liked_Log  1.764e-01  8.813e-02   2.001 0.046052 *  
## ReachRate_Log             -1.577e-01  8.404e-02  -1.876 0.061392 .  
## ReachRate_Liked_Log       -7.975e-02  5.253e-02  -1.518 0.129804    
## EngagedRate_Log            9.247e-01  1.205e-01   7.672 1.40e-13 ***
## EngagedRate_Liked_Log      2.281e-01  1.316e-01   1.734 0.083741 .  
## ConsumerRate_Log          -1.686e-01  2.816e-02  -5.986 4.93e-09 ***
## Interactions              -6.189e-05  5.780e-05  -1.071 0.284895    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3785 on 385 degrees of freedom
## Multiple R-squared:  0.8566, Adjusted R-squared:  0.8514 
## F-statistic: 164.2 on 14 and 385 DF,  p-value: < 2.2e-16
qqPlot(Model1,id.n = 10)

## 436 428 437  89  97 424 425 287 269 279 
## 391 392 393 394 395 396 397 398 399 400
residualPlot(Model1)

residualPlots(Model1)

##                           Test stat Pr(>|t|)
## Type                             NA       NA
## Category                         NA       NA
## Paid                             NA       NA
## Impressions_in_100_Log       -2.775    0.006
## ImpressionsRate_Liked_Log    -2.881    0.004
## ReachRate_Log                -1.135    0.257
## ReachRate_Liked_Log          -2.089    0.037
## EngagedRate_Log              -2.571    0.011
## EngagedRate_Liked_Log        -2.686    0.008
## ConsumerRate_Log             -2.276    0.023
## Interactions                  0.225    0.822
## Tukey test                   -2.084    0.037
influenceIndexPlot(Model1,id.n=5)

Impressions_in_100_Log_Sq <- facebook_train$Impressions_in_100_Log^2
ImpressionsRate_Liked_Log_Sq <-  facebook_train$ImpressionsRate_Liked_Log^2
ReachRate_Log_Sq <- facebook_train$ReachRate_Log^2 
ReachRate_Liked_Log_Sq <- facebook_train$ReachRate_Liked_Log^2
EngagedRate_Log_Sq <- facebook_train$EngagedRate_Log^2  
EngagedRate_Liked_Log_Sq <- facebook_train$EngagedRate_Liked_Log^2
ConsumerRate_Log_Sq <- facebook_train$ConsumerRate_Log^2
facebook_train <- cbind(facebook_train,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log_Sq,ReachRate_Log_Sq,ReachRate_Liked_Log_Sq,EngagedRate_Log_Sq,EngagedRate_Liked_Log_Sq,ConsumerRate_Log_Sq)
rm(Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log_Sq,ReachRate_Log_Sq,ReachRate_Liked_Log_Sq,EngagedRate_Log_Sq,EngagedRate_Liked_Log_Sq,ConsumerRate_Log_Sq)
Model2 <- lm(consumptionRate_Log~
                 +Type+Category+Paid 
                 +Impressions_in_100_Log + Impressions_in_100_Log_Sq
                 +ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq
                 +ReachRate_Log + ReachRate_Log_Sq
                 +ReachRate_Liked_Log + ReachRate_Liked_Log_Sq
                 +EngagedRate_Log + EngagedRate_Log_Sq
                 +EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq
                 +ConsumerRate_Log + ConsumerRate_Log_Sq
                 +Interactions
                 ,data = facebook_train)

summary(Model2)
## 
## Call:
## lm(formula = consumptionRate_Log ~ +Type + Category + Paid + 
##     Impressions_in_100_Log + Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + 
##     ImpressionsRate_Liked_Log_Sq + ReachRate_Log + ReachRate_Log_Sq + 
##     ReachRate_Liked_Log + ReachRate_Liked_Log_Sq + EngagedRate_Log + 
##     EngagedRate_Log_Sq + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + 
##     ConsumerRate_Log + ConsumerRate_Log_Sq + Interactions, data = facebook_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53883 -0.16885 -0.05037  0.06896  2.38652 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.872e+00  1.421e+00   2.021   0.0440 *  
## TypePhoto                     1.867e-01  1.205e-01   1.549   0.1222    
## TypeStatus                   -1.248e-02  1.554e-01  -0.080   0.9360    
## TypeVideo                     6.821e-02  2.138e-01   0.319   0.7499    
## CategoryInspiration          -3.033e-01  5.418e-02  -5.598 4.17e-08 ***
## CategoryProduct              -2.458e-01  6.002e-02  -4.096 5.14e-05 ***
## PaidYes                       3.225e-02  4.310e-02   0.748   0.4547    
## Impressions_in_100_Log       -1.828e-01  2.994e-01  -0.611   0.5419    
## Impressions_in_100_Log_Sq    -6.194e-03  2.638e-02  -0.235   0.8145    
## ImpressionsRate_Liked_Log     3.843e-01  1.834e-01   2.095   0.0368 *  
## ImpressionsRate_Liked_Log_Sq -2.710e-02  2.853e-02  -0.950   0.3428    
## ReachRate_Log                -7.292e-01  4.896e-01  -1.489   0.1372    
## ReachRate_Log_Sq              3.894e-02  5.038e-02   0.773   0.4401    
## ReachRate_Liked_Log           1.560e-01  1.777e-01   0.878   0.3807    
## ReachRate_Liked_Log_Sq       -5.620e-02  4.075e-02  -1.379   0.1687    
## EngagedRate_Log               7.530e-01  3.005e-01   2.506   0.0126 *  
## EngagedRate_Log_Sq           -6.517e-03  6.038e-02  -0.108   0.9141    
## EngagedRate_Liked_Log         5.622e-01  3.523e-01   1.596   0.1113    
## EngagedRate_Liked_Log_Sq     -4.372e-02  7.144e-02  -0.612   0.5409    
## ConsumerRate_Log             -5.397e-03  9.922e-02  -0.054   0.9567    
## ConsumerRate_Log_Sq          -4.534e-02  2.210e-02  -2.052   0.0409 *  
## Interactions                  5.948e-05  7.128e-05   0.834   0.4046    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.373 on 378 degrees of freedom
## Multiple R-squared:  0.8632, Adjusted R-squared:  0.8556 
## F-statistic: 113.6 on 21 and 378 DF,  p-value: < 2.2e-16
qqPlot(Model2,id.n = 10)

## 428 423 437 424  89 425  97 287 269 279 
## 391 392 393 394 395 396 397 398 399 400
residualPlot(Model2)

residualPlots(Model2,id.n=10)

##                              Test stat Pr(>|t|)
## Type                                NA       NA
## Category                            NA       NA
## Paid                                NA       NA
## Impressions_in_100_Log           0.142    0.887
## Impressions_in_100_Log_Sq       -2.606    0.010
## ImpressionsRate_Liked_Log        1.253    0.211
## ImpressionsRate_Liked_Log_Sq    -3.779    0.000
## ReachRate_Log                   -0.001    0.999
## ReachRate_Log_Sq                 3.501    0.001
## ReachRate_Liked_Log             -0.093    0.926
## ReachRate_Liked_Log_Sq          -1.192    0.234
## EngagedRate_Log                  0.387    0.699
## EngagedRate_Log_Sq              -0.578    0.564
## EngagedRate_Liked_Log            0.494    0.622
## EngagedRate_Liked_Log_Sq        -1.622    0.106
## ConsumerRate_Log                -1.393    0.164
## ConsumerRate_Log_Sq             -2.710    0.007
## Interactions                    -1.171    0.242
## Tukey test                       2.364    0.018
# influenceIndexPlot(Model2,id.n=5)
cutoff2 <- 4/((nrow(facebook_train)-length(Model2$coefficients)-2))
plot(Model2, which=4, cook.levels=cutoff2)

# Leverage points.
lev=hat(model.matrix(Model2))
plot(lev)

colnames((t(facebook_train[lev>0.2,]))) #(t(facebook_train[lev>0.2,])) 
##  [1] "478" "184" "244" "483" "373" "56"  "447" "305" "309" "30"  "245"
## [12] "464" "72"  "477" "493" "461" "416" "403" "141"
plot( p.adjust(Model2$residuals, method = "bonferroni"))

Model3 <- lm(consumptionRate_Log~
                 +Type+Category+Paid 
                 +Impressions_in_100_Log + Impressions_in_100_Log_Sq
                 +ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq
                 +ReachRate_Log + ReachRate_Log_Sq
                 +ReachRate_Liked_Log + ReachRate_Liked_Log_Sq
                 +EngagedRate_Log + EngagedRate_Log_Sq
                 +EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq
                 +ConsumerRate_Log + ConsumerRate_Log_Sq
                 +Interactions
                 ,data = facebook_train[-c(97,245,279,413)])
summary(Model3)
## 
## Call:
## lm(formula = consumptionRate_Log ~ +Type + Category + Paid + 
##     Impressions_in_100_Log + Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + 
##     ImpressionsRate_Liked_Log_Sq + ReachRate_Log + ReachRate_Log_Sq + 
##     ReachRate_Liked_Log + ReachRate_Liked_Log_Sq + EngagedRate_Log + 
##     EngagedRate_Log_Sq + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + 
##     ConsumerRate_Log + ConsumerRate_Log_Sq + Interactions, data = facebook_train[-c(97, 
##     245, 279, 413)])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53883 -0.16885 -0.05037  0.06896  2.38652 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   2.872e+00  1.421e+00   2.021   0.0440 *  
## TypePhoto                     1.867e-01  1.205e-01   1.549   0.1222    
## TypeStatus                   -1.248e-02  1.554e-01  -0.080   0.9360    
## TypeVideo                     6.821e-02  2.138e-01   0.319   0.7499    
## CategoryInspiration          -3.033e-01  5.418e-02  -5.598 4.17e-08 ***
## CategoryProduct              -2.458e-01  6.002e-02  -4.096 5.14e-05 ***
## PaidYes                       3.225e-02  4.310e-02   0.748   0.4547    
## Impressions_in_100_Log       -1.828e-01  2.994e-01  -0.611   0.5419    
## Impressions_in_100_Log_Sq    -6.194e-03  2.638e-02  -0.235   0.8145    
## ImpressionsRate_Liked_Log     3.843e-01  1.834e-01   2.095   0.0368 *  
## ImpressionsRate_Liked_Log_Sq -2.710e-02  2.853e-02  -0.950   0.3428    
## ReachRate_Log                -7.292e-01  4.896e-01  -1.489   0.1372    
## ReachRate_Log_Sq              3.894e-02  5.038e-02   0.773   0.4401    
## ReachRate_Liked_Log           1.560e-01  1.777e-01   0.878   0.3807    
## ReachRate_Liked_Log_Sq       -5.620e-02  4.075e-02  -1.379   0.1687    
## EngagedRate_Log               7.530e-01  3.005e-01   2.506   0.0126 *  
## EngagedRate_Log_Sq           -6.517e-03  6.038e-02  -0.108   0.9141    
## EngagedRate_Liked_Log         5.622e-01  3.523e-01   1.596   0.1113    
## EngagedRate_Liked_Log_Sq     -4.372e-02  7.144e-02  -0.612   0.5409    
## ConsumerRate_Log             -5.397e-03  9.922e-02  -0.054   0.9567    
## ConsumerRate_Log_Sq          -4.534e-02  2.210e-02  -2.052   0.0409 *  
## Interactions                  5.948e-05  7.128e-05   0.834   0.4046    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.373 on 378 degrees of freedom
## Multiple R-squared:  0.8632, Adjusted R-squared:  0.8556 
## F-statistic: 113.6 on 21 and 378 DF,  p-value: < 2.2e-16
qqPlot(Model3,id.n=10)

## 428 423 437 424  89 425  97 287 269 279 
## 391 392 393 394 395 396 397 398 399 400
residualPlots(Model3)

##                              Test stat Pr(>|t|)
## Type                                NA       NA
## Category                            NA       NA
## Paid                                NA       NA
## Impressions_in_100_Log           0.142    0.887
## Impressions_in_100_Log_Sq       -2.606    0.010
## ImpressionsRate_Liked_Log        1.253    0.211
## ImpressionsRate_Liked_Log_Sq    -3.779    0.000
## ReachRate_Log                   -0.001    0.999
## ReachRate_Log_Sq                 3.501    0.001
## ReachRate_Liked_Log             -0.093    0.926
## ReachRate_Liked_Log_Sq          -1.192    0.234
## EngagedRate_Log                  0.387    0.699
## EngagedRate_Log_Sq              -0.578    0.564
## EngagedRate_Liked_Log            0.494    0.622
## EngagedRate_Liked_Log_Sq        -1.622    0.106
## ConsumerRate_Log                -1.393    0.164
## ConsumerRate_Log_Sq             -2.710    0.007
## Interactions                    -1.171    0.242
## Tukey test                       2.364    0.018
lev=hat(model.matrix(Model3))
plot(lev)

which(lev>0.2)
##  [1]   5  18  40  75  96 115 124 156 178 201 228 242 259 262 291 375 387
## [18] 397 400
cutoff <- 4/((nrow(facebook_train)-1-length(Model3$coefficients)-2))
plot(Model3, which=4, cook.levels=cutoff)

####multicollinearity check
library(perturb)
library(MASS)
vif(Model2)
##                                    GVIF Df GVIF^(1/(2*Df))
## Type                           3.990055  3        1.259398
## Category                       2.158717  2        1.212129
## Paid                           1.093573  1        1.045740
## Impressions_in_100_Log       370.229212  1       19.241341
## Impressions_in_100_Log_Sq    321.275733  1       17.924166
## ImpressionsRate_Liked_Log    113.341715  1       10.646207
## ImpressionsRate_Liked_Log_Sq  64.245229  1        8.015312
## ReachRate_Log                 75.450738  1        8.686238
## ReachRate_Log_Sq              61.610364  1        7.849227
## ReachRate_Liked_Log           19.791796  1        4.448797
## ReachRate_Liked_Log_Sq        26.424604  1        5.140487
## EngagedRate_Log              168.750720  1       12.990409
## EngagedRate_Log_Sq           105.775408  1       10.284717
## EngagedRate_Liked_Log        145.779459  1       12.073916
## EngagedRate_Liked_Log_Sq     110.316670  1       10.503174
## ConsumerRate_Log              21.993609  1        4.689734
## ConsumerRate_Log_Sq           22.760082  1        4.770753
## Interactions                   2.465581  1        1.570217
colldiag(facebook_train[,c(14:27)])
## Condition
## Index    Variance Decomposition Proportions
##             intercept Impressions_in_100_Log ImpressionsRate_Liked_Log
## 1     1.000 0.000     0.000                  0.000                    
## 2     3.239 0.000     0.000                  0.000                    
## 3     5.223 0.000     0.000                  0.000                    
## 4     9.826 0.000     0.000                  0.000                    
## 5    12.013 0.000     0.000                  0.001                    
## 6    15.298 0.000     0.000                  0.000                    
## 7    23.532 0.000     0.000                  0.036                    
## 8    38.660 0.003     0.000                  0.068                    
## 9    44.567 0.001     0.000                  0.005                    
## 10   54.580 0.000     0.000                  0.002                    
## 11   71.759 0.021     0.002                  0.034                    
## 12  119.259 0.006     0.001                  0.001                    
## 13  216.567 0.015     0.023                  0.156                    
## 14  319.562 0.018     0.753                  0.411                    
## 15  460.012 0.934     0.220                  0.284                    
##    ReachRate_Log ReachRate_Liked_Log EngagedRate_Log EngagedRate_Liked_Log
## 1  0.000         0.000               0.000           0.000                
## 2  0.000         0.000               0.000           0.000                
## 3  0.000         0.000               0.000           0.000                
## 4  0.000         0.000               0.000           0.000                
## 5  0.000         0.003               0.000           0.000                
## 6  0.000         0.000               0.000           0.001                
## 7  0.000         0.000               0.004           0.000                
## 8  0.000         0.000               0.001           0.002                
## 9  0.000         0.000               0.013           0.012                
## 10 0.000         0.002               0.021           0.008                
## 11 0.001         0.000               0.025           0.016                
## 12 0.000         0.944               0.000           0.006                
## 13 0.000         0.030               0.819           0.868                
## 14 0.158         0.013               0.069           0.031                
## 15 0.840         0.007               0.047           0.057                
##    ConsumerRate_Log Impressions_in_100_Log_Sq ImpressionsRate_Liked_Log_Sq
## 1  0.000            0.000                     0.000                       
## 2  0.000            0.000                     0.002                       
## 3  0.003            0.000                     0.002                       
## 4  0.001            0.000                     0.013                       
## 5  0.000            0.000                     0.001                       
## 6  0.000            0.005                     0.017                       
## 7  0.001            0.000                     0.066                       
## 8  0.002            0.007                     0.011                       
## 9  0.148            0.006                     0.033                       
## 10 0.719            0.000                     0.003                       
## 11 0.033            0.039                     0.003                       
## 12 0.022            0.002                     0.005                       
## 13 0.012            0.021                     0.004                       
## 14 0.049            0.672                     0.291                       
## 15 0.009            0.247                     0.547                       
##    ReachRate_Log_Sq ReachRate_Liked_Log_Sq EngagedRate_Log_Sq
## 1  0.000            0.000                  0.000             
## 2  0.000            0.000                  0.000             
## 3  0.000            0.000                  0.000             
## 4  0.001            0.000                  0.002             
## 5  0.001            0.017                  0.000             
## 6  0.001            0.001                  0.001             
## 7  0.003            0.000                  0.003             
## 8  0.003            0.010                  0.036             
## 9  0.004            0.004                  0.106             
## 10 0.000            0.003                  0.049             
## 11 0.032            0.020                  0.001             
## 12 0.000            0.840                  0.000             
## 13 0.016            0.059                  0.534             
## 14 0.186            0.011                  0.199             
## 15 0.752            0.034                  0.068             
##    EngagedRate_Liked_Log_Sq ConsumerRate_Log_Sq
## 1  0.000                    0.000              
## 2  0.000                    0.000              
## 3  0.000                    0.015              
## 4  0.001                    0.015              
## 5  0.000                    0.001              
## 6  0.005                    0.000              
## 7  0.011                    0.000              
## 8  0.043                    0.002              
## 9  0.000                    0.252              
## 10 0.007                    0.579              
## 11 0.111                    0.040              
## 12 0.006                    0.016              
## 13 0.624                    0.029              
## 14 0.103                    0.034              
## 15 0.088                    0.016
###Best subset regression
step <- stepAIC(Model2, direction="both")
## Start:  AIC=-767.47
## consumptionRate_Log ~ +Type + Category + Paid + Impressions_in_100_Log + 
##     Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log + 
##     ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Log_Sq + 
##     EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + ConsumerRate_Log + 
##     ConsumerRate_Log_Sq + Interactions
## 
##                                Df Sum of Sq    RSS     AIC
## - ConsumerRate_Log              1    0.0004 52.604 -769.47
## - EngagedRate_Log_Sq            1    0.0016 52.605 -769.46
## - Impressions_in_100_Log_Sq     1    0.0077 52.611 -769.41
## - Impressions_in_100_Log        1    0.0519 52.655 -769.08
## - EngagedRate_Liked_Log_Sq      1    0.0521 52.656 -769.08
## - Paid                          1    0.0779 52.682 -768.88
## - ReachRate_Log_Sq              1    0.0831 52.687 -768.84
## - Interactions                  1    0.0969 52.700 -768.74
## - ReachRate_Liked_Log           1    0.1072 52.711 -768.66
## - ImpressionsRate_Liked_Log_Sq  1    0.1256 52.729 -768.52
## <none>                                      52.604 -767.47
## - ReachRate_Liked_Log_Sq        1    0.2647 52.868 -767.46
## - ReachRate_Log                 1    0.3087 52.912 -767.13
## - EngagedRate_Liked_Log         1    0.3545 52.958 -766.79
## - Type                          3    0.9948 53.598 -765.98
## - ConsumerRate_Log_Sq           1    0.5860 53.190 -765.04
## - ImpressionsRate_Liked_Log     1    0.6109 53.215 -764.85
## - EngagedRate_Log               1    0.8736 53.477 -762.88
## - Category                      2    4.5861 57.190 -738.04
## 
## Step:  AIC=-769.47
## consumptionRate_Log ~ Type + Category + Paid + Impressions_in_100_Log + 
##     Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log + 
##     ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Log_Sq + 
##     EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq + 
##     Interactions
## 
##                                Df Sum of Sq    RSS     AIC
## - EngagedRate_Log_Sq            1    0.0017 52.606 -771.46
## - Impressions_in_100_Log_Sq     1    0.0074 52.611 -771.41
## - EngagedRate_Liked_Log_Sq      1    0.0517 52.656 -771.08
## - Impressions_in_100_Log        1    0.0535 52.658 -771.06
## - Paid                          1    0.0777 52.682 -770.88
## - ReachRate_Log_Sq              1    0.0827 52.687 -770.84
## - Interactions                  1    0.1013 52.705 -770.70
## - ReachRate_Liked_Log           1    0.1085 52.713 -770.64
## - ImpressionsRate_Liked_Log_Sq  1    0.1268 52.731 -770.51
## <none>                                      52.604 -769.47
## - ReachRate_Liked_Log_Sq        1    0.2667 52.871 -769.45
## - ReachRate_Log                 1    0.3083 52.912 -769.13
## - EngagedRate_Liked_Log         1    0.3566 52.961 -768.77
## - Type                          3    1.0047 53.609 -767.90
## + ConsumerRate_Log              1    0.0004 52.604 -767.47
## - ImpressionsRate_Liked_Log     1    0.6124 53.216 -766.84
## - EngagedRate_Log               1    0.8758 53.480 -764.86
## - Category                      2    4.7041 57.308 -739.21
## - ConsumerRate_Log_Sq           1    5.4454 58.049 -732.07
## 
## Step:  AIC=-771.46
## consumptionRate_Log ~ Type + Category + Paid + Impressions_in_100_Log + 
##     Impressions_in_100_Log_Sq + ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log + 
##     ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Liked_Log + 
##     EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq + Interactions
## 
##                                Df Sum of Sq    RSS     AIC
## - Impressions_in_100_Log_Sq     1    0.0178 52.623 -773.32
## - Impressions_in_100_Log        1    0.0553 52.661 -773.04
## - Paid                          1    0.0766 52.682 -772.87
## - ReachRate_Log_Sq              1    0.0826 52.688 -772.83
## - ReachRate_Liked_Log           1    0.1134 52.719 -772.60
## - Interactions                  1    0.1165 52.722 -772.57
## - ImpressionsRate_Liked_Log_Sq  1    0.1378 52.743 -772.41
## - EngagedRate_Liked_Log_Sq      1    0.2161 52.822 -771.82
## <none>                                      52.606 -771.46
## - ReachRate_Liked_Log_Sq        1    0.2799 52.886 -771.33
## - ReachRate_Log                 1    0.3117 52.917 -771.09
## - Type                          3    1.0049 53.611 -769.89
## + EngagedRate_Log_Sq            1    0.0017 52.604 -769.47
## + ConsumerRate_Log              1    0.0004 52.605 -769.46
## - ImpressionsRate_Liked_Log     1    0.6162 53.222 -768.80
## - EngagedRate_Liked_Log         1    1.0095 53.615 -765.85
## - EngagedRate_Log               1    4.0029 56.609 -744.12
## - Category                      2    4.7333 57.339 -740.99
## - ConsumerRate_Log_Sq           1    5.5724 58.178 -733.18
## 
## Step:  AIC=-773.32
## consumptionRate_Log ~ Type + Category + Paid + Impressions_in_100_Log + 
##     ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log + 
##     ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Liked_Log + 
##     EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq + Interactions
## 
##                                Df Sum of Sq    RSS     AIC
## - Paid                          1    0.0755 52.699 -774.75
## - ReachRate_Log_Sq              1    0.0868 52.710 -774.66
## - Interactions                  1    0.1021 52.726 -774.55
## - ReachRate_Liked_Log           1    0.1182 52.742 -774.42
## - EngagedRate_Liked_Log_Sq      1    0.2289 52.852 -773.58
## <none>                                      52.623 -773.32
## - ReachRate_Liked_Log_Sq        1    0.2970 52.920 -773.07
## - ReachRate_Log                 1    0.3249 52.948 -772.86
## - Type                          3    0.9877 53.611 -771.88
## - ImpressionsRate_Liked_Log_Sq  1    0.4909 53.114 -771.61
## + Impressions_in_100_Log_Sq     1    0.0178 52.606 -771.46
## + EngagedRate_Log_Sq            1    0.0121 52.611 -771.41
## + ConsumerRate_Log              1    0.0000 52.623 -771.32
## - Impressions_in_100_Log        1    0.8611 53.485 -768.83
## - EngagedRate_Liked_Log         1    1.0387 53.662 -767.50
## - ImpressionsRate_Liked_Log     1    1.3309 53.954 -765.33
## - EngagedRate_Log               1    4.0131 56.637 -745.92
## - Category                      2    4.7471 57.371 -742.77
## - ConsumerRate_Log_Sq           1    6.2918 58.915 -730.15
## 
## Step:  AIC=-774.75
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log + 
##     ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Log_Sq + ReachRate_Liked_Log + 
##     ReachRate_Liked_Log_Sq + EngagedRate_Log + EngagedRate_Liked_Log + 
##     EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq + Interactions
## 
##                                Df Sum of Sq    RSS     AIC
## - ReachRate_Log_Sq              1    0.0822 52.781 -776.12
## - Interactions                  1    0.1083 52.807 -775.93
## - ReachRate_Liked_Log           1    0.1139 52.813 -775.88
## - EngagedRate_Liked_Log_Sq      1    0.2156 52.915 -775.11
## <none>                                      52.699 -774.75
## - ReachRate_Liked_Log_Sq        1    0.2871 52.986 -774.57
## - ReachRate_Log                 1    0.3140 53.013 -774.37
## + Paid                          1    0.0755 52.623 -773.32
## - Type                          3    1.0198 53.719 -773.08
## - ImpressionsRate_Liked_Log_Sq  1    0.5057 53.205 -772.93
## + Impressions_in_100_Log_Sq     1    0.0167 52.682 -772.87
## + EngagedRate_Log_Sq            1    0.0086 52.690 -772.81
## + ConsumerRate_Log              1    0.0000 52.699 -772.75
## - Impressions_in_100_Log        1    0.8314 53.530 -770.49
## - EngagedRate_Liked_Log         1    1.0079 53.707 -769.17
## - ImpressionsRate_Liked_Log     1    1.3275 54.026 -766.80
## - EngagedRate_Log               1    4.0420 56.741 -747.19
## - Category                      2    4.7198 57.419 -744.44
## - ConsumerRate_Log_Sq           1    6.3285 59.027 -731.38
## 
## Step:  AIC=-776.12
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log + 
##     ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Liked_Log + ReachRate_Liked_Log_Sq + 
##     EngagedRate_Log + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + 
##     ConsumerRate_Log_Sq + Interactions
## 
##                                Df Sum of Sq    RSS     AIC
## - Interactions                  1    0.0653 52.846 -777.63
## - ReachRate_Liked_Log           1    0.0954 52.877 -777.40
## - ReachRate_Liked_Log_Sq        1    0.2449 53.026 -776.27
## <none>                                      52.781 -776.12
## - EngagedRate_Liked_Log_Sq      1    0.2724 53.053 -776.07
## + ReachRate_Log_Sq              1    0.0822 52.699 -774.75
## - Type                          3    0.9885 53.770 -774.70
## + Paid                          1    0.0708 52.710 -774.66
## - ImpressionsRate_Liked_Log_Sq  1    0.4676 53.249 -774.60
## + Impressions_in_100_Log_Sq     1    0.0207 52.760 -774.28
## + EngagedRate_Log_Sq            1    0.0101 52.771 -774.20
## + ConsumerRate_Log              1    0.0004 52.781 -774.13
## - Impressions_in_100_Log        1    0.9488 53.730 -771.00
## - EngagedRate_Liked_Log         1    1.2261 54.007 -768.94
## - ImpressionsRate_Liked_Log     1    1.2658 54.047 -768.65
## - ReachRate_Log                 1    1.5960 54.377 -766.21
## - EngagedRate_Log               1    3.9610 56.742 -749.18
## - Category                      2    4.6569 57.438 -746.30
## - ConsumerRate_Log_Sq           1    6.3169 59.098 -732.91
## 
## Step:  AIC=-777.63
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log + 
##     ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Liked_Log + ReachRate_Liked_Log_Sq + 
##     EngagedRate_Log + EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + 
##     ConsumerRate_Log_Sq
## 
##                                Df Sum of Sq    RSS     AIC
## - ReachRate_Liked_Log           1    0.1186 52.965 -778.73
## - EngagedRate_Liked_Log_Sq      1    0.2474 53.094 -777.76
## <none>                                      52.846 -777.63
## - ReachRate_Liked_Log_Sq        1    0.2783 53.125 -777.53
## - ImpressionsRate_Liked_Log_Sq  1    0.4069 53.253 -776.56
## + Paid                          1    0.0769 52.769 -776.21
## - Type                          3    0.9998 53.846 -776.13
## + Interactions                  1    0.0653 52.781 -776.12
## + ReachRate_Log_Sq              1    0.0391 52.807 -775.93
## + EngagedRate_Log_Sq            1    0.0153 52.831 -775.75
## + Impressions_in_100_Log_Sq     1    0.0057 52.841 -775.67
## + ConsumerRate_Log              1    0.0014 52.845 -775.64
## - Impressions_in_100_Log        1    0.8843 53.731 -772.99
## - EngagedRate_Liked_Log         1    1.1620 54.008 -770.93
## - ImpressionsRate_Liked_Log     1    1.2008 54.047 -770.64
## - ReachRate_Log                 1    1.5336 54.380 -768.19
## - Category                      2    4.6457 57.492 -747.93
## - EngagedRate_Log               1    4.5315 57.378 -746.72
## - ConsumerRate_Log_Sq           1    7.0408 59.887 -729.60
## 
## Step:  AIC=-778.73
## consumptionRate_Log ~ Type + Category + Impressions_in_100_Log + 
##     ImpressionsRate_Liked_Log + ImpressionsRate_Liked_Log_Sq + 
##     ReachRate_Log + ReachRate_Liked_Log_Sq + EngagedRate_Log + 
##     EngagedRate_Liked_Log + EngagedRate_Liked_Log_Sq + ConsumerRate_Log_Sq
## 
##                                Df Sum of Sq    RSS     AIC
## <none>                                      52.965 -778.73
## - EngagedRate_Liked_Log_Sq      1    0.2853 53.250 -778.59
## + ReachRate_Liked_Log           1    0.1186 52.846 -777.63
## - Type                          3    0.9531 53.918 -777.60
## - ReachRate_Liked_Log_Sq        1    0.4418 53.407 -777.41
## + Interactions                  1    0.0884 52.877 -777.40
## + Paid                          1    0.0741 52.891 -777.29
## - ImpressionsRate_Liked_Log_Sq  1    0.4741 53.439 -777.17
## + EngagedRate_Log_Sq            1    0.0289 52.936 -776.95
## + ReachRate_Log_Sq              1    0.0219 52.943 -776.90
## + Impressions_in_100_Log_Sq     1    0.0064 52.959 -776.78
## + ConsumerRate_Log              1    0.0049 52.960 -776.77
## - Impressions_in_100_Log        1    0.8376 53.802 -774.46
## - ImpressionsRate_Liked_Log     1    1.1640 54.129 -772.04
## - EngagedRate_Liked_Log         1    1.1685 54.133 -772.01
## - ReachRate_Log                 1    1.4161 54.381 -770.18
## - Category                      2    4.5451 57.510 -749.80
## - EngagedRate_Log               1    4.8413 57.806 -745.75
## - ConsumerRate_Log_Sq           1    7.1453 60.110 -730.11
Model4 <- lm(consumptionRate_Log~ 
                            Type+Category
                            +Impressions_in_100_Log_Sq+ImpressionsRate_Liked_Log
                            +ReachRate_Log+ReachRate_Liked_Log
                            +EngagedRate_Log+EngagedRate_Log_Sq+EngagedRate_Liked_Log
                            +ConsumerRate_Log_Sq
             ,data = facebook_train
             )

summary(Model4)
## 
## Call:
## lm(formula = consumptionRate_Log ~ Type + Category + Impressions_in_100_Log_Sq + 
##     ImpressionsRate_Liked_Log + ReachRate_Log + ReachRate_Liked_Log + 
##     EngagedRate_Log + EngagedRate_Log_Sq + EngagedRate_Liked_Log + 
##     ConsumerRate_Log_Sq, data = facebook_train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55261 -0.17149 -0.04124  0.06534  2.39001 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.275026   0.507344   2.513   0.0124 *  
## TypePhoto                  0.153497   0.114133   1.345   0.1794    
## TypeStatus                -0.062552   0.143319  -0.436   0.6628    
## TypeVideo                  0.002955   0.204198   0.014   0.9885    
## CategoryInspiration       -0.276135   0.051430  -5.369 1.37e-07 ***
## CategoryProduct           -0.226375   0.057509  -3.936 9.81e-05 ***
## Impressions_in_100_Log_Sq -0.014532   0.008008  -1.815   0.0703 .  
## ImpressionsRate_Liked_Log  0.189188   0.084512   2.239   0.0258 *  
## ReachRate_Log             -0.226380   0.089807  -2.521   0.0121 *  
## ReachRate_Liked_Log       -0.062315   0.051241  -1.216   0.2247    
## EngagedRate_Log            0.994298   0.180939   5.495 7.10e-08 ***
## EngagedRate_Log_Sq        -0.048218   0.029570  -1.631   0.1038    
## EngagedRate_Liked_Log      0.307729   0.131581   2.339   0.0199 *  
## ConsumerRate_Log_Sq       -0.048211   0.006459  -7.464 5.61e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3717 on 386 degrees of freedom
## Multiple R-squared:  0.8613, Adjusted R-squared:  0.8566 
## F-statistic: 184.4 on 13 and 386 DF,  p-value: < 2.2e-16
qqPlot(Model4)

residualPlots(Model4)

##                           Test stat Pr(>|t|)
## Type                             NA       NA
## Category                         NA       NA
## Impressions_in_100_Log_Sq    -1.068    0.286
## ImpressionsRate_Liked_Log    -0.994    0.321
## ReachRate_Log                -0.438    0.661
## ReachRate_Liked_Log          -1.427    0.154
## EngagedRate_Log               0.305    0.761
## EngagedRate_Log_Sq           -0.994    0.321
## EngagedRate_Liked_Log        -0.960    0.338
## ConsumerRate_Log_Sq          -0.984    0.326
## Tukey test                    1.994    0.046
Impressions_in_100_Log <- log(facebook_Validation$Impressions_in_100)
Impressions_in_100_Log_Sq <-  Impressions_in_100_Log^2
ImpressionsRate_Liked_Log <- log(facebook_Validation$ImpressionsRate_Liked)
ReachRate_Log <- log(facebook_Validation$ReachRate)
ReachRate_Liked_Log <- log(max(facebook_Validation$ReachRate_Liked)+1-facebook_Validation$ReachRate_Liked)
EngagedRate_Log <- log(facebook_Validation$EngagedRate)
EngagedRate_Log_Sq <- EngagedRate_Log^2
EngagedRate_Liked_Log <- log(facebook_Validation$EngagedRate_Liked)
ConsumerRate_Log <- log(max(facebook_Validation$ConsumerRate)+1-facebook_Validation$ConsumerRate)
ConsumerRate_Log_Sq <- ConsumerRate_Log^2

facebook_test <- cbind(facebook_Validation,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log_Sq)

facebook_test <- facebook_test[,c(2,3,13:20)]

facebook_test <- as.data.frame(facebook_test)
 
rm(Impressions_in_100_Log,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log,ConsumerRate_Log_Sq)

consumptionRate_Log_Val  <- log(facebook_Validation$consumptionRate)
y_hat<-predict.lm (Model4,newdata= facebook_test,se.fit=TRUE)$fit
y_hat<-as.vector (y_hat)
dev<- consumptionRate_Log_Val-(y_hat)
num<-sum(dev^2)
dev1<-consumptionRate_Log_Val-mean(consumptionRate_Log_Val)
den<-sum(dev1^2)
Predicted.Rsq<-1-(num/den)
Predicted.Rsq
## [1] 0.9101413
Impressions_in_100_Log <- log(facebook_page$Impressions_in_100)
Impressions_in_100_Log_Sq <-  Impressions_in_100_Log^2
ImpressionsRate_Liked_Log <- log(facebook_page$ImpressionsRate_Liked)
ReachRate_Log <- log(facebook_page$ReachRate)
ReachRate_Liked_Log <- log(max(facebook_page$ReachRate_Liked)+1-facebook_page$ReachRate_Liked)
EngagedRate_Log <- log(facebook_page$EngagedRate)
EngagedRate_Log_Sq <- EngagedRate_Log^2
EngagedRate_Liked_Log <- log(facebook_page$EngagedRate_Liked)
ConsumerRate_Log <- log(max(facebook_page$ConsumerRate)+1-facebook_page$ConsumerRate)
ConsumerRate_Log_Sq <- ConsumerRate_Log^2

facebook_final <- cbind(facebook_page,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log_Sq)

facebook_final <- facebook_final[,c(2,3,13:20)]

facebook_final <- as.data.frame(facebook_final)
 
rm(Impressions_in_100_Log,Impressions_in_100_Log_Sq,ImpressionsRate_Liked_Log,ReachRate_Log,ReachRate_Liked_Log,EngagedRate_Log,EngagedRate_Log_Sq,EngagedRate_Liked_Log,ConsumerRate_Log,ConsumerRate_Log_Sq)

consumptionRate_Actual  <- log(facebook_page$consumptionRate)
y_hat<-predict.lm ( Model4, newdata= facebook_final, se.fit=TRUE)$fit
y_hat<-as.vector (y_hat)
dev<-consumptionRate_Actual - (y_hat)
num<-sum(dev^2)
dev1<-consumptionRate_Actual-mean(consumptionRate_Actual)
den<-sum(dev1^2)
Predicted.Rsq<-1-(num/den)
Predicted.Rsq
## [1] 0.8710698
Final_Model <- lm(consumptionRate_Actual~., data = facebook_final)
summary(Final_Model)
## 
## Call:
## lm(formula = consumptionRate_Actual ~ ., data = facebook_final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55431 -0.16758 -0.03544  0.06822  2.42801 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.318364   0.430055   3.066  0.00229 ** 
## TypePhoto                  0.156413   0.089924   1.739  0.08260 .  
## TypeStatus                -0.061118   0.115563  -0.529  0.59714    
## TypeVideo                  0.011553   0.162050   0.071  0.94319    
## CategoryInspiration       -0.231600   0.043137  -5.369 1.23e-07 ***
## CategoryProduct           -0.192807   0.048654  -3.963 8.52e-05 ***
## Impressions_in_100_Log_Sq -0.016981   0.006653  -2.552  0.01101 *  
## ImpressionsRate_Liked_Log  0.201324   0.071244   2.826  0.00491 ** 
## ReachRate_Log             -0.247120   0.078402  -3.152  0.00172 ** 
## ReachRate_Liked_Log       -0.027519   0.043329  -0.635  0.52566    
## EngagedRate_Log            0.982067   0.145539   6.748 4.28e-11 ***
## EngagedRate_Log_Sq        -0.065394   0.024091  -2.714  0.00688 ** 
## EngagedRate_Liked_Log      0.348820   0.110056   3.169  0.00162 ** 
## ConsumerRate_Log_Sq       -0.052692   0.005561  -9.476  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3542 on 486 degrees of freedom
## Multiple R-squared:  0.872,  Adjusted R-squared:  0.8686 
## F-statistic: 254.8 on 13 and 486 DF,  p-value: < 2.2e-16
residualPlots(Final_Model)

##                           Test stat Pr(>|t|)
## Type                             NA       NA
## Category                         NA       NA
## Impressions_in_100_Log_Sq    -0.821    0.412
## ImpressionsRate_Liked_Log    -0.884    0.377
## ReachRate_Log                -0.438    0.661
## ReachRate_Liked_Log          -1.716    0.087
## EngagedRate_Log              -0.653    0.514
## EngagedRate_Log_Sq            0.084    0.933
## EngagedRate_Liked_Log        -0.940    0.348
## ConsumerRate_Log_Sq          -0.449    0.654
## Tukey test                    1.739    0.082
qqPlot(Final_Model, main="QQ Plot of residuals: Fmodel")

stu.resid1 <- studres(Final_Model)
hist(stu.resid1, freq=FALSE,
main="Distribution of Studentized Residuals")
xfit1<-seq(min(stu.resid1),max(stu.resid1),length=40)
yfit1<-dnorm(xfit1)
lines(xfit1, yfit1)

vif(Final_Model)
##                                GVIF Df GVIF^(1/(2*Df))
## Type                       2.488291  3        1.164082
## Category                   1.789611  2        1.156617
## Impressions_in_100_Log_Sq 27.282371  1        5.223253
## ImpressionsRate_Liked_Log 22.899746  1        4.785368
## ReachRate_Log              2.217130  1        1.489003
## ReachRate_Liked_Log        1.615168  1        1.270893
## EngagedRate_Log           56.301009  1        7.503400
## EngagedRate_Log_Sq        23.177996  1        4.814353
## EngagedRate_Liked_Log     20.539815  1        4.532087
## ConsumerRate_Log_Sq        1.980869  1        1.407434
colldiag(facebook_final[,c(3:10)])
## Condition
## Index    Variance Decomposition Proportions
##            intercept Impressions_in_100_Log_Sq ImpressionsRate_Liked_Log
## 1    1.000 0.000     0.000                     0.000                    
## 2    3.353 0.000     0.003                     0.004                    
## 3    5.241 0.000     0.003                     0.008                    
## 4   11.708 0.004     0.003                     0.026                    
## 5   16.661 0.001     0.118                     0.134                    
## 6   18.957 0.001     0.028                     0.015                    
## 7   32.914 0.004     0.064                     0.041                    
## 8   61.539 0.140     0.191                     0.256                    
## 9  104.539 0.850     0.591                     0.515                    
##   ReachRate_Log ReachRate_Liked_Log EngagedRate_Log EngagedRate_Log_Sq
## 1 0.000         0.000               0.000           0.000             
## 2 0.000         0.000               0.000           0.004             
## 3 0.000         0.000               0.000           0.001             
## 4 0.012         0.040               0.003           0.026             
## 5 0.011         0.039               0.003           0.000             
## 6 0.017         0.461               0.000           0.006             
## 7 0.011         0.031               0.041           0.566             
## 8 0.113         0.116               0.429           0.229             
## 9 0.836         0.312               0.524           0.167             
##   EngagedRate_Liked_Log ConsumerRate_Log_Sq
## 1 0.000                 0.002              
## 2 0.000                 0.022              
## 3 0.000                 0.481              
## 4 0.000                 0.246              
## 5 0.015                 0.012              
## 6 0.021                 0.008              
## 7 0.188                 0.155              
## 8 0.363                 0.058              
## 9 0.413                 0.016