# Reading the raw data to a dataframe :

popNews <- read.csv('D:/MLDS/Datasets/OnlineNewsPopularity(1)/OnlineNewsPopularity/OnlineNewsPopularity.csv', header = TRUE)

# Summary and Structure of data before preprocessing:

summary(popNews)
##                                                              url       
##  http://mashable.com/2013/01/07/amazon-instant-video-browser/  :    1  
##  http://mashable.com/2013/01/07/ap-samsung-sponsored-tweets/   :    1  
##  http://mashable.com/2013/01/07/apple-40-billion-app-downloads/:    1  
##  http://mashable.com/2013/01/07/astronaut-notre-dame-bcs/      :    1  
##  http://mashable.com/2013/01/07/att-u-verse-apps/              :    1  
##  http://mashable.com/2013/01/07/beewi-smart-toys/              :    1  
##  (Other)                                                       :39638  
##    timedelta     n_tokens_title n_tokens_content n_unique_tokens   
##  Min.   :  8.0   Min.   : 2.0   Min.   :   0.0   Min.   :  0.0000  
##  1st Qu.:164.0   1st Qu.: 9.0   1st Qu.: 246.0   1st Qu.:  0.4709  
##  Median :339.0   Median :10.0   Median : 409.0   Median :  0.5392  
##  Mean   :354.5   Mean   :10.4   Mean   : 546.5   Mean   :  0.5482  
##  3rd Qu.:542.0   3rd Qu.:12.0   3rd Qu.: 716.0   3rd Qu.:  0.6087  
##  Max.   :731.0   Max.   :23.0   Max.   :8474.0   Max.   :701.0000  
##                                                                    
##  n_non_stop_words    n_non_stop_unique_tokens   num_hrefs     
##  Min.   :   0.0000   Min.   :  0.0000         Min.   :  0.00  
##  1st Qu.:   1.0000   1st Qu.:  0.6257         1st Qu.:  4.00  
##  Median :   1.0000   Median :  0.6905         Median :  8.00  
##  Mean   :   0.9965   Mean   :  0.6892         Mean   : 10.88  
##  3rd Qu.:   1.0000   3rd Qu.:  0.7546         3rd Qu.: 14.00  
##  Max.   :1042.0000   Max.   :650.0000         Max.   :304.00  
##                                                               
##  num_self_hrefs       num_imgs         num_videos    average_token_length
##  Min.   :  0.000   Min.   :  0.000   Min.   : 0.00   Min.   :0.000       
##  1st Qu.:  1.000   1st Qu.:  1.000   1st Qu.: 0.00   1st Qu.:4.478       
##  Median :  3.000   Median :  1.000   Median : 0.00   Median :4.664       
##  Mean   :  3.294   Mean   :  4.544   Mean   : 1.25   Mean   :4.548       
##  3rd Qu.:  4.000   3rd Qu.:  4.000   3rd Qu.: 1.00   3rd Qu.:4.855       
##  Max.   :116.000   Max.   :128.000   Max.   :91.00   Max.   :8.042       
##                                                                          
##   num_keywords    data_channel_is_lifestyle data_channel_is_entertainment
##  Min.   : 1.000   Min.   :0.00000           Min.   :0.000                
##  1st Qu.: 6.000   1st Qu.:0.00000           1st Qu.:0.000                
##  Median : 7.000   Median :0.00000           Median :0.000                
##  Mean   : 7.224   Mean   :0.05295           Mean   :0.178                
##  3rd Qu.: 9.000   3rd Qu.:0.00000           3rd Qu.:0.000                
##  Max.   :10.000   Max.   :1.00000           Max.   :1.000                
##                                                                          
##  data_channel_is_bus data_channel_is_socmed data_channel_is_tech
##  Min.   :0.0000      Min.   :0.0000         Min.   :0.0000      
##  1st Qu.:0.0000      1st Qu.:0.0000         1st Qu.:0.0000      
##  Median :0.0000      Median :0.0000         Median :0.0000      
##  Mean   :0.1579      Mean   :0.0586         Mean   :0.1853      
##  3rd Qu.:0.0000      3rd Qu.:0.0000         3rd Qu.:0.0000      
##  Max.   :1.0000      Max.   :1.0000         Max.   :1.0000      
##                                                                 
##  data_channel_is_world   kw_min_min       kw_max_min       kw_avg_min     
##  Min.   :0.0000        Min.   : -1.00   Min.   :     0   Min.   :   -1.0  
##  1st Qu.:0.0000        1st Qu.: -1.00   1st Qu.:   445   1st Qu.:  141.8  
##  Median :0.0000        Median : -1.00   Median :   660   Median :  235.5  
##  Mean   :0.2126        Mean   : 26.11   Mean   :  1154   Mean   :  312.4  
##  3rd Qu.:0.0000        3rd Qu.:  4.00   3rd Qu.:  1000   3rd Qu.:  357.0  
##  Max.   :1.0000        Max.   :377.00   Max.   :298400   Max.   :42827.9  
##                                                                           
##    kw_min_max       kw_max_max       kw_avg_max       kw_min_avg  
##  Min.   :     0   Min.   :     0   Min.   :     0   Min.   :  -1  
##  1st Qu.:     0   1st Qu.:843300   1st Qu.:172847   1st Qu.:   0  
##  Median :  1400   Median :843300   Median :244572   Median :1024  
##  Mean   : 13612   Mean   :752324   Mean   :259282   Mean   :1117  
##  3rd Qu.:  7900   3rd Qu.:843300   3rd Qu.:330980   3rd Qu.:2057  
##  Max.   :843300   Max.   :843300   Max.   :843300   Max.   :3613  
##                                                                   
##    kw_max_avg       kw_avg_avg    self_reference_min_shares
##  Min.   :     0   Min.   :    0   Min.   :     0           
##  1st Qu.:  3562   1st Qu.: 2382   1st Qu.:   639           
##  Median :  4356   Median : 2870   Median :  1200           
##  Mean   :  5657   Mean   : 3136   Mean   :  3999           
##  3rd Qu.:  6020   3rd Qu.: 3600   3rd Qu.:  2600           
##  Max.   :298400   Max.   :43568   Max.   :843300           
##                                                            
##  self_reference_max_shares self_reference_avg_sharess weekday_is_monday
##  Min.   :     0            Min.   :     0.0           Min.   :0.000    
##  1st Qu.:  1100            1st Qu.:   981.2           1st Qu.:0.000    
##  Median :  2800            Median :  2200.0           Median :0.000    
##  Mean   : 10329            Mean   :  6401.7           Mean   :0.168    
##  3rd Qu.:  8000            3rd Qu.:  5200.0           3rd Qu.:0.000    
##  Max.   :843300            Max.   :843300.0           Max.   :1.000    
##                                                                        
##  weekday_is_tuesday weekday_is_wednesday weekday_is_thursday
##  Min.   :0.0000     Min.   :0.0000       Min.   :0.0000     
##  1st Qu.:0.0000     1st Qu.:0.0000       1st Qu.:0.0000     
##  Median :0.0000     Median :0.0000       Median :0.0000     
##  Mean   :0.1864     Mean   :0.1875       Mean   :0.1833     
##  3rd Qu.:0.0000     3rd Qu.:0.0000       3rd Qu.:0.0000     
##  Max.   :1.0000     Max.   :1.0000       Max.   :1.0000     
##                                                             
##  weekday_is_friday weekday_is_saturday weekday_is_sunday   is_weekend    
##  Min.   :0.0000    Min.   :0.00000     Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000    1st Qu.:0.00000     1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000    Median :0.00000     Median :0.00000   Median :0.0000  
##  Mean   :0.1438    Mean   :0.06188     Mean   :0.06904   Mean   :0.1309  
##  3rd Qu.:0.0000    3rd Qu.:0.00000     3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.0000    Max.   :1.00000     Max.   :1.00000   Max.   :1.0000  
##                                                                          
##      LDA_00            LDA_01            LDA_02            LDA_03       
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.02505   1st Qu.:0.02501   1st Qu.:0.02857   1st Qu.:0.02857  
##  Median :0.03339   Median :0.03334   Median :0.04000   Median :0.04000  
##  Mean   :0.18460   Mean   :0.14126   Mean   :0.21632   Mean   :0.22377  
##  3rd Qu.:0.24096   3rd Qu.:0.15083   3rd Qu.:0.33422   3rd Qu.:0.37576  
##  Max.   :0.92699   Max.   :0.92595   Max.   :0.92000   Max.   :0.92653  
##                                                                         
##      LDA_04        global_subjectivity global_sentiment_polarity
##  Min.   :0.00000   Min.   :0.0000      Min.   :-0.39375         
##  1st Qu.:0.02857   1st Qu.:0.3962      1st Qu.: 0.05776         
##  Median :0.04073   Median :0.4535      Median : 0.11912         
##  Mean   :0.23403   Mean   :0.4434      Mean   : 0.11931         
##  3rd Qu.:0.39999   3rd Qu.:0.5083      3rd Qu.: 0.17783         
##  Max.   :0.92719   Max.   :1.0000      Max.   : 0.72784         
##                                                                 
##  global_rate_positive_words global_rate_negative_words rate_positive_words
##  Min.   :0.00000            Min.   :0.000000           Min.   :0.0000     
##  1st Qu.:0.02838            1st Qu.:0.009615           1st Qu.:0.6000     
##  Median :0.03902            Median :0.015337           Median :0.7105     
##  Mean   :0.03962            Mean   :0.016612           Mean   :0.6822     
##  3rd Qu.:0.05028            3rd Qu.:0.021739           3rd Qu.:0.8000     
##  Max.   :0.15549            Max.   :0.184932           Max.   :1.0000     
##                                                                           
##  rate_negative_words avg_positive_polarity min_positive_polarity
##  Min.   :0.0000      Min.   :0.0000        Min.   :0.00000      
##  1st Qu.:0.1852      1st Qu.:0.3062        1st Qu.:0.05000      
##  Median :0.2800      Median :0.3588        Median :0.10000      
##  Mean   :0.2879      Mean   :0.3538        Mean   :0.09545      
##  3rd Qu.:0.3846      3rd Qu.:0.4114        3rd Qu.:0.10000      
##  Max.   :1.0000      Max.   :1.0000        Max.   :1.00000      
##                                                                 
##  max_positive_polarity avg_negative_polarity min_negative_polarity
##  Min.   :0.0000        Min.   :-1.0000       Min.   :-1.0000      
##  1st Qu.:0.6000        1st Qu.:-0.3284       1st Qu.:-0.7000      
##  Median :0.8000        Median :-0.2533       Median :-0.5000      
##  Mean   :0.7567        Mean   :-0.2595       Mean   :-0.5219      
##  3rd Qu.:1.0000        3rd Qu.:-0.1869       3rd Qu.:-0.3000      
##  Max.   :1.0000        Max.   : 0.0000       Max.   : 0.0000      
##                                                                   
##  max_negative_polarity title_subjectivity title_sentiment_polarity
##  Min.   :-1.0000       Min.   :0.0000     Min.   :-1.00000        
##  1st Qu.:-0.1250       1st Qu.:0.0000     1st Qu.: 0.00000        
##  Median :-0.1000       Median :0.1500     Median : 0.00000        
##  Mean   :-0.1075       Mean   :0.2824     Mean   : 0.07143        
##  3rd Qu.:-0.0500       3rd Qu.:0.5000     3rd Qu.: 0.15000        
##  Max.   : 0.0000       Max.   :1.0000     Max.   : 1.00000        
##                                                                   
##  abs_title_subjectivity abs_title_sentiment_polarity     shares      
##  Min.   :0.0000         Min.   :0.0000               Min.   :     1  
##  1st Qu.:0.1667         1st Qu.:0.0000               1st Qu.:   946  
##  Median :0.5000         Median :0.0000               Median :  1400  
##  Mean   :0.3418         Mean   :0.1561               Mean   :  3395  
##  3rd Qu.:0.5000         3rd Qu.:0.2500               3rd Qu.:  2800  
##  Max.   :0.5000         Max.   :1.0000               Max.   :843300  
## 
str(popNews)
## 'data.frame':    39644 obs. of  61 variables:
##  $ url                          : Factor w/ 39644 levels "http://mashable.com/2013/01/07/amazon-instant-video-browser/",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ timedelta                    : num  731 731 731 731 731 731 731 731 731 731 ...
##  $ n_tokens_title               : num  12 9 9 9 13 10 8 12 11 10 ...
##  $ n_tokens_content             : num  219 255 211 531 1072 ...
##  $ n_unique_tokens              : num  0.664 0.605 0.575 0.504 0.416 ...
##  $ n_non_stop_words             : num  1 1 1 1 1 ...
##  $ n_non_stop_unique_tokens     : num  0.815 0.792 0.664 0.666 0.541 ...
##  $ num_hrefs                    : num  4 3 3 9 19 2 21 20 2 4 ...
##  $ num_self_hrefs               : num  2 1 1 0 19 2 20 20 0 1 ...
##  $ num_imgs                     : num  1 1 1 1 20 0 20 20 0 1 ...
##  $ num_videos                   : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ average_token_length         : num  4.68 4.91 4.39 4.4 4.68 ...
##  $ num_keywords                 : num  5 4 6 7 7 9 10 9 7 5 ...
##  $ data_channel_is_lifestyle    : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ data_channel_is_entertainment: num  1 0 0 1 0 0 0 0 0 0 ...
##  $ data_channel_is_bus          : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ data_channel_is_socmed       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ data_channel_is_tech         : num  0 0 0 0 1 1 0 1 1 0 ...
##  $ data_channel_is_world        : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ kw_min_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_min_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_min_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ self_reference_min_shares    : num  496 0 918 0 545 8500 545 545 0 0 ...
##  $ self_reference_max_shares    : num  496 0 918 0 16000 8500 16000 16000 0 0 ...
##  $ self_reference_avg_sharess   : num  496 0 918 0 3151 ...
##  $ weekday_is_monday            : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ weekday_is_tuesday           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_wednesday         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_thursday          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_friday            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_saturday          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_sunday            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_weekend                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ LDA_00                       : num  0.5003 0.7998 0.2178 0.0286 0.0286 ...
##  $ LDA_01                       : num  0.3783 0.05 0.0333 0.4193 0.0288 ...
##  $ LDA_02                       : num  0.04 0.0501 0.0334 0.4947 0.0286 ...
##  $ LDA_03                       : num  0.0413 0.0501 0.0333 0.0289 0.0286 ...
##  $ LDA_04                       : num  0.0401 0.05 0.6822 0.0286 0.8854 ...
##  $ global_subjectivity          : num  0.522 0.341 0.702 0.43 0.514 ...
##  $ global_sentiment_polarity    : num  0.0926 0.1489 0.3233 0.1007 0.281 ...
##  $ global_rate_positive_words   : num  0.0457 0.0431 0.0569 0.0414 0.0746 ...
##  $ global_rate_negative_words   : num  0.0137 0.01569 0.00948 0.02072 0.01213 ...
##  $ rate_positive_words          : num  0.769 0.733 0.857 0.667 0.86 ...
##  $ rate_negative_words          : num  0.231 0.267 0.143 0.333 0.14 ...
##  $ avg_positive_polarity        : num  0.379 0.287 0.496 0.386 0.411 ...
##  $ min_positive_polarity        : num  0.1 0.0333 0.1 0.1364 0.0333 ...
##  $ max_positive_polarity        : num  0.7 0.7 1 0.8 1 0.6 1 1 0.8 0.5 ...
##  $ avg_negative_polarity        : num  -0.35 -0.119 -0.467 -0.37 -0.22 ...
##  $ min_negative_polarity        : num  -0.6 -0.125 -0.8 -0.6 -0.5 -0.4 -0.5 -0.5 -0.125 -0.5 ...
##  $ max_negative_polarity        : num  -0.2 -0.1 -0.133 -0.167 -0.05 ...
##  $ title_subjectivity           : num  0.5 0 0 0 0.455 ...
##  $ title_sentiment_polarity     : num  -0.188 0 0 0 0.136 ...
##  $ abs_title_subjectivity       : num  0 0.5 0.5 0.5 0.0455 ...
##  $ abs_title_sentiment_polarity : num  0.188 0 0 0 0.136 ...
##  $ shares                       : int  593 711 1500 1200 505 855 556 891 3600 710 ...
# Exploratory Data Analysis and Cleaning Data:

# Initially, I check if the data has any missing values:

sum(is.na(popNews))
## [1] 0
# There are no missing values

# From the summary, I can see outliers in var:
# "n_unique_tokens", "n_non_stop_words", and "n_non_stop_unique_tokens"
# These values seem unusual from the rest values, hence I shall remove this observation.

popNews=popNews[!popNews$n_unique_tokens==701,]

# url, timedelta - being non-predictive variables, I don't include it in my analysis.
# Since, is_weekend seems to be repetitive with other weekday data, I remove it.

popNews <- subset( popNews, select = -c(url, timedelta, is_weekend ) )

# The following variables are categorical with 2 values : 0 and 1 but are numeric; hence, converted all such variables to factor variables with 2 levels.

popNews$weekday_is_monday <- factor(popNews$weekday_is_monday) 
popNews$weekday_is_wednesday <- factor(popNews$weekday_is_wednesday) 
popNews$weekday_is_thursday <- factor(popNews$weekday_is_thursday) 
popNews$weekday_is_friday <- factor(popNews$weekday_is_friday) 
popNews$weekday_is_tuesday <- factor(popNews$weekday_is_tuesday) 
popNews$weekday_is_saturday <- factor(popNews$weekday_is_saturday) 
popNews$weekday_is_sunday <- factor(popNews$weekday_is_sunday) 

popNews$data_channel_is_lifestyle <- factor(popNews$data_channel_is_lifestyle) 
popNews$data_channel_is_entertainment <- factor(popNews$data_channel_is_entertainment) 
popNews$data_channel_is_bus <- factor(popNews$data_channel_is_bus) 
popNews$data_channel_is_socmed <- factor(popNews$data_channel_is_socmed) 
popNews$data_channel_is_tech <- factor(popNews$data_channel_is_tech) 
popNews$data_channel_is_world <- factor(popNews$data_channel_is_world)

# Summary and Structure of data before preprocessing:

summary(popNews)
##  n_tokens_title n_tokens_content n_unique_tokens  n_non_stop_words
##  Min.   : 2.0   Min.   :   0.0   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 9.0   1st Qu.: 246.0   1st Qu.:0.4709   1st Qu.:1.0000  
##  Median :10.0   Median : 409.0   Median :0.5392   Median :1.0000  
##  Mean   :10.4   Mean   : 546.5   Mean   :0.5305   Mean   :0.9702  
##  3rd Qu.:12.0   3rd Qu.: 716.0   3rd Qu.:0.6087   3rd Qu.:1.0000  
##  Max.   :23.0   Max.   :8474.0   Max.   :1.0000   Max.   :1.0000  
##  n_non_stop_unique_tokens   num_hrefs      num_self_hrefs   
##  Min.   :0.0000           Min.   :  0.00   Min.   :  0.000  
##  1st Qu.:0.6257           1st Qu.:  4.00   1st Qu.:  1.000  
##  Median :0.6905           Median :  8.00   Median :  3.000  
##  Mean   :0.6728           Mean   : 10.88   Mean   :  3.293  
##  3rd Qu.:0.7546           3rd Qu.: 14.00   3rd Qu.:  4.000  
##  Max.   :1.0000           Max.   :304.00   Max.   :116.000  
##     num_imgs         num_videos    average_token_length  num_keywords   
##  Min.   :  0.000   Min.   : 0.00   Min.   :0.000        Min.   : 1.000  
##  1st Qu.:  1.000   1st Qu.: 0.00   1st Qu.:4.478        1st Qu.: 6.000  
##  Median :  1.000   Median : 0.00   Median :4.664        Median : 7.000  
##  Mean   :  4.543   Mean   : 1.25   Mean   :4.548        Mean   : 7.224  
##  3rd Qu.:  4.000   3rd Qu.: 1.00   3rd Qu.:4.855        3rd Qu.: 9.000  
##  Max.   :128.000   Max.   :91.00   Max.   :8.042        Max.   :10.000  
##  data_channel_is_lifestyle data_channel_is_entertainment
##  0:37544                   0:32587                      
##  1: 2099                   1: 7056                      
##                                                         
##                                                         
##                                                         
##                                                         
##  data_channel_is_bus data_channel_is_socmed data_channel_is_tech
##  0:33385             0:37320                0:32297             
##  1: 6258             1: 2323                1: 7346             
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  data_channel_is_world   kw_min_min       kw_max_min       kw_avg_min     
##  0:31216               Min.   : -1.00   Min.   :     0   Min.   :   -1.0  
##  1: 8427               1st Qu.: -1.00   1st Qu.:   445   1st Qu.:  141.8  
##                        Median : -1.00   Median :   660   Median :  235.5  
##                        Mean   : 26.11   Mean   :  1154   Mean   :  312.4  
##                        3rd Qu.:  4.00   3rd Qu.:  1000   3rd Qu.:  357.0  
##                        Max.   :377.00   Max.   :298400   Max.   :42827.9  
##    kw_min_max       kw_max_max       kw_avg_max       kw_min_avg  
##  Min.   :     0   Min.   :     0   Min.   :     0   Min.   :  -1  
##  1st Qu.:     0   1st Qu.:843300   1st Qu.:172844   1st Qu.:   0  
##  Median :  1400   Median :843300   Median :244567   Median :1024  
##  Mean   : 13612   Mean   :752322   Mean   :259280   Mean   :1117  
##  3rd Qu.:  7900   3rd Qu.:843300   3rd Qu.:330980   3rd Qu.:2057  
##  Max.   :843300   Max.   :843300   Max.   :843300   Max.   :3613  
##    kw_max_avg       kw_avg_avg    self_reference_min_shares
##  Min.   :     0   Min.   :    0   Min.   :     0           
##  1st Qu.:  3562   1st Qu.: 2382   1st Qu.:   639           
##  Median :  4356   Median : 2870   Median :  1200           
##  Mean   :  5657   Mean   : 3136   Mean   :  3999           
##  3rd Qu.:  6020   3rd Qu.: 3600   3rd Qu.:  2600           
##  Max.   :298400   Max.   :43568   Max.   :843300           
##  self_reference_max_shares self_reference_avg_sharess weekday_is_monday
##  Min.   :     0            Min.   :     0.0           0:32982          
##  1st Qu.:  1100            1st Qu.:   981.1           1: 6661          
##  Median :  2800            Median :  2200.0                            
##  Mean   : 10330            Mean   :  6401.7                            
##  3rd Qu.:  8000            3rd Qu.:  5200.0                            
##  Max.   :843300            Max.   :843300.0                            
##  weekday_is_tuesday weekday_is_wednesday weekday_is_thursday
##  0:32254            0:32208              0:32376            
##  1: 7389            1: 7435              1: 7267            
##                                                             
##                                                             
##                                                             
##                                                             
##  weekday_is_friday weekday_is_saturday weekday_is_sunday     LDA_00       
##  0:33942           0:37190             0:36906           Min.   :0.01818  
##  1: 5701           1: 2453             1: 2737           1st Qu.:0.02505  
##                                                          Median :0.03339  
##                                                          Mean   :0.18460  
##                                                          3rd Qu.:0.24097  
##                                                          Max.   :0.92699  
##      LDA_01            LDA_02            LDA_03            LDA_04       
##  Min.   :0.01818   Min.   :0.01818   Min.   :0.01818   Min.   :0.01818  
##  1st Qu.:0.02501   1st Qu.:0.02857   1st Qu.:0.02857   1st Qu.:0.02857  
##  Median :0.03334   Median :0.04000   Median :0.04000   Median :0.04073  
##  Mean   :0.14126   Mean   :0.21633   Mean   :0.22378   Mean   :0.23404  
##  3rd Qu.:0.15084   3rd Qu.:0.33422   3rd Qu.:0.37578   3rd Qu.:0.39999  
##  Max.   :0.92595   Max.   :0.92000   Max.   :0.92653   Max.   :0.92719  
##  global_subjectivity global_sentiment_polarity global_rate_positive_words
##  Min.   :0.0000      Min.   :-0.39375          Min.   :0.00000           
##  1st Qu.:0.3962      1st Qu.: 0.05776          1st Qu.:0.02839           
##  Median :0.4535      Median : 0.11912          Median :0.03902           
##  Mean   :0.4434      Mean   : 0.11931          Mean   :0.03963           
##  3rd Qu.:0.5083      3rd Qu.: 0.17784          3rd Qu.:0.05028           
##  Max.   :1.0000      Max.   : 0.72784          Max.   :0.15549           
##  global_rate_negative_words rate_positive_words rate_negative_words
##  Min.   :0.000000           Min.   :0.0000      Min.   :0.0000     
##  1st Qu.:0.009615           1st Qu.:0.6000      1st Qu.:0.1852     
##  Median :0.015337           Median :0.7105      Median :0.2800     
##  Mean   :0.016613           Mean   :0.6822      Mean   :0.2879     
##  3rd Qu.:0.021739           3rd Qu.:0.8000      3rd Qu.:0.3846     
##  Max.   :0.184932           Max.   :1.0000      Max.   :1.0000     
##  avg_positive_polarity min_positive_polarity max_positive_polarity
##  Min.   :0.0000        Min.   :0.00000       Min.   :0.0000       
##  1st Qu.:0.3062        1st Qu.:0.05000       1st Qu.:0.6000       
##  Median :0.3588        Median :0.10000       Median :0.8000       
##  Mean   :0.3538        Mean   :0.09545       Mean   :0.7567       
##  3rd Qu.:0.4114        3rd Qu.:0.10000       3rd Qu.:1.0000       
##  Max.   :1.0000        Max.   :1.00000       Max.   :1.0000       
##  avg_negative_polarity min_negative_polarity max_negative_polarity
##  Min.   :-1.0000       Min.   :-1.000        Min.   :-1.0000      
##  1st Qu.:-0.3284       1st Qu.:-0.700        1st Qu.:-0.1250      
##  Median :-0.2533       Median :-0.500        Median :-0.1000      
##  Mean   :-0.2595       Mean   :-0.522        Mean   :-0.1075      
##  3rd Qu.:-0.1869       3rd Qu.:-0.300        3rd Qu.:-0.0500      
##  Max.   : 0.0000       Max.   : 0.000        Max.   : 0.0000      
##  title_subjectivity title_sentiment_polarity abs_title_subjectivity
##  Min.   :0.0000     Min.   :-1.00000         Min.   :0.0000        
##  1st Qu.:0.0000     1st Qu.: 0.00000         1st Qu.:0.1667        
##  Median :0.1500     Median : 0.00000         Median :0.5000        
##  Mean   :0.2824     Mean   : 0.07143         Mean   :0.3419        
##  3rd Qu.:0.5000     3rd Qu.: 0.15000         3rd Qu.:0.5000        
##  Max.   :1.0000     Max.   : 1.00000         Max.   :0.5000        
##  abs_title_sentiment_polarity     shares      
##  Min.   :0.0000               Min.   :     1  
##  1st Qu.:0.0000               1st Qu.:   946  
##  Median :0.0000               Median :  1400  
##  Mean   :0.1561               Mean   :  3395  
##  3rd Qu.:0.2500               3rd Qu.:  2800  
##  Max.   :1.0000               Max.   :843300
str(popNews)
## 'data.frame':    39643 obs. of  58 variables:
##  $ n_tokens_title               : num  12 9 9 9 13 10 8 12 11 10 ...
##  $ n_tokens_content             : num  219 255 211 531 1072 ...
##  $ n_unique_tokens              : num  0.664 0.605 0.575 0.504 0.416 ...
##  $ n_non_stop_words             : num  1 1 1 1 1 ...
##  $ n_non_stop_unique_tokens     : num  0.815 0.792 0.664 0.666 0.541 ...
##  $ num_hrefs                    : num  4 3 3 9 19 2 21 20 2 4 ...
##  $ num_self_hrefs               : num  2 1 1 0 19 2 20 20 0 1 ...
##  $ num_imgs                     : num  1 1 1 1 20 0 20 20 0 1 ...
##  $ num_videos                   : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ average_token_length         : num  4.68 4.91 4.39 4.4 4.68 ...
##  $ num_keywords                 : num  5 4 6 7 7 9 10 9 7 5 ...
##  $ data_channel_is_lifestyle    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
##  $ data_channel_is_entertainment: Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 1 1 1 ...
##  $ data_channel_is_bus          : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 1 1 ...
##  $ data_channel_is_socmed       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ data_channel_is_tech         : Factor w/ 2 levels "0","1": 1 1 1 1 2 2 1 2 2 1 ...
##  $ data_channel_is_world        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ kw_min_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_min_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_min_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ self_reference_min_shares    : num  496 0 918 0 545 8500 545 545 0 0 ...
##  $ self_reference_max_shares    : num  496 0 918 0 16000 8500 16000 16000 0 0 ...
##  $ self_reference_avg_sharess   : num  496 0 918 0 3151 ...
##  $ weekday_is_monday            : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ weekday_is_tuesday           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ weekday_is_wednesday         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ weekday_is_thursday          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ weekday_is_friday            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ weekday_is_saturday          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ weekday_is_sunday            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ LDA_00                       : num  0.5003 0.7998 0.2178 0.0286 0.0286 ...
##  $ LDA_01                       : num  0.3783 0.05 0.0333 0.4193 0.0288 ...
##  $ LDA_02                       : num  0.04 0.0501 0.0334 0.4947 0.0286 ...
##  $ LDA_03                       : num  0.0413 0.0501 0.0333 0.0289 0.0286 ...
##  $ LDA_04                       : num  0.0401 0.05 0.6822 0.0286 0.8854 ...
##  $ global_subjectivity          : num  0.522 0.341 0.702 0.43 0.514 ...
##  $ global_sentiment_polarity    : num  0.0926 0.1489 0.3233 0.1007 0.281 ...
##  $ global_rate_positive_words   : num  0.0457 0.0431 0.0569 0.0414 0.0746 ...
##  $ global_rate_negative_words   : num  0.0137 0.01569 0.00948 0.02072 0.01213 ...
##  $ rate_positive_words          : num  0.769 0.733 0.857 0.667 0.86 ...
##  $ rate_negative_words          : num  0.231 0.267 0.143 0.333 0.14 ...
##  $ avg_positive_polarity        : num  0.379 0.287 0.496 0.386 0.411 ...
##  $ min_positive_polarity        : num  0.1 0.0333 0.1 0.1364 0.0333 ...
##  $ max_positive_polarity        : num  0.7 0.7 1 0.8 1 0.6 1 1 0.8 0.5 ...
##  $ avg_negative_polarity        : num  -0.35 -0.119 -0.467 -0.37 -0.22 ...
##  $ min_negative_polarity        : num  -0.6 -0.125 -0.8 -0.6 -0.5 -0.4 -0.5 -0.5 -0.125 -0.5 ...
##  $ max_negative_polarity        : num  -0.2 -0.1 -0.133 -0.167 -0.05 ...
##  $ title_subjectivity           : num  0.5 0 0 0 0.455 ...
##  $ title_sentiment_polarity     : num  -0.188 0 0 0 0.136 ...
##  $ abs_title_subjectivity       : num  0 0.5 0.5 0.5 0.0455 ...
##  $ abs_title_sentiment_polarity : num  0.188 0 0 0 0.136 ...
##  $ shares                       : int  593 711 1500 1200 505 855 556 891 3600 710 ...
# Sampling the dataset into 75% : training data and 25% : test data:

set.seed(174004689)

popNewsTrain <- sample(nrow(popNews),as.integer(nrow(popNews)*0.75))
train.news = popNews[popNewsTrain,]
test.news = popNews[-popNewsTrain,]

# Now, we fit a model with all the variables; shares being the dependent variable and all other explanatory variables from the dataset as the predictors.

fit_mlm1 <- lm(shares ~ ., data = train.news)

summary(fit_mlm1)
## 
## Call:
## lm(formula = shares ~ ., data = train.news)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29232  -2190  -1164    -55 837485 
## 
## Coefficients: (2 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -2.573e+02  9.709e+02  -0.265 0.791027    
## n_tokens_title                  6.275e+01  3.122e+01   2.010 0.044410 *  
## n_tokens_content                2.350e-01  2.434e-01   0.965 0.334303    
## n_unique_tokens                 2.892e+03  2.086e+03   1.386 0.165664    
## n_non_stop_words               -2.597e+03  6.443e+03  -0.403 0.686897    
## n_non_stop_unique_tokens        3.559e+02  1.773e+03   0.201 0.840935    
## num_hrefs                       2.788e+01  7.300e+00   3.819 0.000134 ***
## num_self_hrefs                 -4.806e+01  1.949e+01  -2.466 0.013679 *  
## num_imgs                        2.112e+01  9.709e+00   2.176 0.029585 *  
## num_videos                      1.976e+01  1.685e+01   1.172 0.241037    
## average_token_length           -5.188e+02  2.638e+02  -1.966 0.049285 *  
## num_keywords                    6.355e+01  4.045e+01   1.571 0.116148    
## data_channel_is_lifestyle1     -6.180e+02  4.297e+02  -1.438 0.150446    
## data_channel_is_entertainment1 -1.127e+03  2.783e+02  -4.051 5.11e-05 ***
## data_channel_is_bus1           -7.572e+02  4.169e+02  -1.816 0.069340 .  
## data_channel_is_socmed1        -5.402e+02  4.084e+02  -1.323 0.185902    
## data_channel_is_tech1          -5.256e+02  4.039e+02  -1.302 0.193097    
## data_channel_is_world1         -3.584e+02  4.100e+02  -0.874 0.382060    
## kw_min_min                      3.234e-01  1.779e+00   0.182 0.855739    
## kw_max_min                      7.133e-02  5.402e-02   1.320 0.186711    
## kw_avg_min                     -3.732e-01  3.382e-01  -1.103 0.269839    
## kw_min_max                     -2.194e-03  1.297e-03  -1.691 0.090811 .  
## kw_max_max                     -1.043e-03  6.350e-04  -1.643 0.100432    
## kw_avg_max                      5.755e-04  9.085e-04   0.633 0.526424    
## kw_min_avg                     -4.003e-01  8.275e-02  -4.838 1.32e-06 ***
## kw_max_avg                     -2.095e-01  2.763e-02  -7.582 3.51e-14 ***
## kw_avg_avg                      1.640e+00  1.579e-01  10.390  < 2e-16 ***
## self_reference_min_shares       3.250e-02  7.776e-03   4.179 2.93e-05 ***
## self_reference_max_shares       6.261e-03  4.275e-03   1.465 0.142999    
## self_reference_avg_sharess     -9.708e-03  1.077e-02  -0.901 0.367519    
## weekday_is_monday1              2.837e+02  2.878e+02   0.986 0.324298    
## weekday_is_tuesday1            -3.535e+02  2.834e+02  -1.247 0.212245    
## weekday_is_wednesday1          -1.731e+02  2.836e+02  -0.610 0.541578    
## weekday_is_thursday1           -5.327e+02  2.845e+02  -1.872 0.061181 .  
## weekday_is_friday1             -2.698e+02  2.944e+02  -0.916 0.359433    
## weekday_is_saturday1            1.397e+02  3.508e+02   0.398 0.690555    
## weekday_is_sunday1                     NA         NA      NA       NA    
## LDA_00                          3.678e+02  4.981e+02   0.739 0.460208    
## LDA_01                         -3.728e+02  5.510e+02  -0.677 0.498700    
## LDA_02                         -7.676e+02  4.978e+02  -1.542 0.123098    
## LDA_03                         -1.046e+02  5.230e+02  -0.200 0.841463    
## LDA_04                                 NA         NA      NA       NA    
## global_subjectivity             2.651e+03  9.258e+02   2.864 0.004190 ** 
## global_sentiment_polarity       1.040e+03  1.812e+03   0.574 0.565901    
## global_rate_positive_words     -1.081e+04  7.842e+03  -1.378 0.168234    
## global_rate_negative_words      1.234e+04  1.476e+04   0.836 0.403039    
## rate_positive_words             2.035e+03  6.306e+03   0.323 0.746898    
## rate_negative_words             1.575e+03  6.354e+03   0.248 0.804195    
## avg_positive_polarity          -1.415e+03  1.488e+03  -0.951 0.341846    
## min_positive_polarity          -2.133e+03  1.245e+03  -1.713 0.086701 .  
## max_positive_polarity           3.069e+02  4.713e+02   0.651 0.514998    
## avg_negative_polarity          -1.347e+03  1.373e+03  -0.981 0.326714    
## min_negative_polarity           1.903e+01  5.019e+02   0.038 0.969760    
## max_negative_polarity           3.454e+02  1.145e+03   0.302 0.762832    
## title_subjectivity             -1.111e+02  2.999e+02  -0.370 0.711100    
## title_sentiment_polarity        3.380e+02  2.721e+02   1.242 0.214168    
## abs_title_subjectivity          1.000e+03  3.975e+02   2.516 0.011879 *  
## abs_title_sentiment_polarity    5.015e+02  4.325e+02   1.160 0.246190    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10860 on 29676 degrees of freedom
## Multiple R-squared:  0.0262, Adjusted R-squared:  0.02439 
## F-statistic: 14.52 on 55 and 29676 DF,  p-value: < 2.2e-16
pred_mlm1 = predict(fit_mlm1, test.news)
## Warning in predict.lm(fit_mlm1, test.news): prediction from a rank-
## deficient fit may be misleading
sqrt(mean((test.news$shares - pred_mlm1)^2))
## [1] 13242.71
# This model has a low R-square value, we can use a transformation on the model.

# Now, I try using a log-transformation on our target variable to optimise our fit model:

popNews$shares <- log(popNews$shares)

popNewsTrain <- sample(nrow(popNews),as.integer(nrow(popNews)*0.75))
train.news = popNews[popNewsTrain,]
test.news = popNews[-popNewsTrain,]

# Now, I try fitting a model on the transformed target variable:

fit_m3 <- lm(shares ~ ., data = train.news)

summary(fit_m3)
## 
## Call:
## lm(formula = shares ~ ., data = train.news)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1050 -0.5455 -0.1645  0.3860  5.6075 
## 
## Coefficients: (2 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     6.906e+00  7.801e-02  88.521  < 2e-16 ***
## n_tokens_title                  5.493e-03  2.509e-03   2.189 0.028595 *  
## n_tokens_content                3.611e-05  1.934e-05   1.866 0.061988 .  
## n_unique_tokens                 1.524e-01  1.664e-01   0.916 0.359479    
## n_non_stop_words                1.207e-01  5.158e-01   0.234 0.815032    
## n_non_stop_unique_tokens       -2.271e-01  1.413e-01  -1.607 0.108162    
## num_hrefs                       3.608e-03  5.938e-04   6.076 1.25e-09 ***
## num_self_hrefs                 -7.094e-03  1.583e-03  -4.482 7.43e-06 ***
## num_imgs                        2.521e-03  7.815e-04   3.225 0.001259 ** 
## num_videos                      2.258e-03  1.373e-03   1.645 0.099989 .  
## average_token_length           -8.190e-02  2.109e-02  -3.883 0.000103 ***
## num_keywords                    1.173e-02  3.238e-03   3.622 0.000293 ***
## data_channel_is_lifestyle1     -9.525e-02  3.441e-02  -2.768 0.005648 ** 
## data_channel_is_entertainment1 -1.813e-01  2.222e-02  -8.157 3.57e-16 ***
## data_channel_is_bus1           -1.616e-01  3.344e-02  -4.832 1.36e-06 ***
## data_channel_is_socmed1         1.656e-01  3.276e-02   5.054 4.35e-07 ***
## data_channel_is_tech1           1.195e-01  3.243e-02   3.685 0.000229 ***
## data_channel_is_world1         -2.753e-02  3.295e-02  -0.836 0.403366    
## kw_min_min                      9.604e-04  1.406e-04   6.831 8.61e-12 ***
## kw_max_min                      1.937e-05  4.709e-06   4.114 3.89e-05 ***
## kw_avg_min                     -1.411e-04  2.738e-05  -5.152 2.60e-07 ***
## kw_min_max                     -4.398e-07  1.027e-07  -4.283 1.85e-05 ***
## kw_max_max                      8.330e-09  5.012e-08   0.166 0.867997    
## kw_avg_max                     -2.695e-07  7.278e-08  -3.702 0.000214 ***
## kw_min_avg                     -4.634e-05  6.584e-06  -7.037 2.00e-12 ***
## kw_max_avg                     -4.090e-05  2.182e-06 -18.741  < 2e-16 ***
## kw_avg_avg                      3.334e-04  1.250e-05  26.664  < 2e-16 ***
## self_reference_min_shares       1.009e-06  7.017e-07   1.438 0.150552    
## self_reference_max_shares      -8.002e-08  3.916e-07  -0.204 0.838104    
## self_reference_avg_sharess      1.470e-06  9.976e-07   1.473 0.140747    
## weekday_is_monday1             -2.322e-01  2.287e-02 -10.155  < 2e-16 ***
## weekday_is_tuesday1            -2.972e-01  2.257e-02 -13.166  < 2e-16 ***
## weekday_is_wednesday1          -2.970e-01  2.255e-02 -13.168  < 2e-16 ***
## weekday_is_thursday1           -3.015e-01  2.259e-02 -13.347  < 2e-16 ***
## weekday_is_friday1             -2.277e-01  2.343e-02  -9.717  < 2e-16 ***
## weekday_is_saturday1           -2.830e-03  2.780e-02  -0.102 0.918929    
## weekday_is_sunday1                     NA         NA      NA       NA    
## LDA_00                          2.432e-01  3.983e-02   6.106 1.04e-09 ***
## LDA_01                         -1.352e-01  4.428e-02  -3.053 0.002266 ** 
## LDA_02                         -2.384e-01  3.984e-02  -5.982 2.22e-09 ***
## LDA_03                         -1.138e-01  4.201e-02  -2.709 0.006749 ** 
## LDA_04                                 NA         NA      NA       NA    
## global_subjectivity             4.147e-01  7.452e-02   5.565 2.64e-08 ***
## global_sentiment_polarity      -1.498e-01  1.458e-01  -1.027 0.304200    
## global_rate_positive_words     -8.402e-01  6.245e-01  -1.345 0.178493    
## global_rate_negative_words     -4.637e-02  1.205e+00  -0.038 0.969312    
## rate_positive_words             1.634e-01  5.048e-01   0.324 0.746127    
## rate_negative_words             2.990e-02  5.087e-01   0.059 0.953120    
## avg_positive_polarity          -5.408e-03  1.195e-01  -0.045 0.963899    
## min_positive_polarity          -2.643e-01  9.974e-02  -2.650 0.008050 ** 
## max_positive_polarity          -8.057e-03  3.765e-02  -0.214 0.830542    
## avg_negative_polarity          -1.443e-01  1.099e-01  -1.313 0.189123    
## min_negative_polarity          -6.796e-03  4.012e-02  -0.169 0.865466    
## max_negative_polarity           4.416e-02  9.167e-02   0.482 0.629967    
## title_subjectivity              5.839e-02  2.392e-02   2.441 0.014650 *  
## title_sentiment_polarity        6.969e-02  2.185e-02   3.189 0.001429 ** 
## abs_title_subjectivity          1.372e-01  3.181e-02   4.312 1.62e-05 ***
## abs_title_sentiment_polarity   -5.147e-03  3.446e-02  -0.149 0.881269    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8696 on 29676 degrees of freedom
## Multiple R-squared:  0.1292, Adjusted R-squared:  0.1276 
## F-statistic: 80.04 on 55 and 29676 DF,  p-value: < 2.2e-16
# This model gives a better R-square value than the previous model.

# Now, I want to include only statistically significant variables in the model. So, I use the Stepwise regression step():

fit_mlm2 <- step(fit_m3)
## Start:  AIC=-8253.38
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     weekday_is_saturday + weekday_is_sunday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + LDA_04 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + global_rate_negative_words + 
##     rate_positive_words + rate_negative_words + avg_positive_polarity + 
##     min_positive_polarity + max_positive_polarity + avg_negative_polarity + 
##     min_negative_polarity + max_negative_polarity + title_subjectivity + 
##     title_sentiment_polarity + abs_title_subjectivity + abs_title_sentiment_polarity
## 
## 
## Step:  AIC=-8253.38
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     weekday_is_saturday + weekday_is_sunday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + global_rate_negative_words + 
##     rate_positive_words + rate_negative_words + avg_positive_polarity + 
##     min_positive_polarity + max_positive_polarity + avg_negative_polarity + 
##     min_negative_polarity + max_negative_polarity + title_subjectivity + 
##     title_sentiment_polarity + abs_title_subjectivity + abs_title_sentiment_polarity
## 
## 
## Step:  AIC=-8253.38
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     weekday_is_saturday + LDA_00 + LDA_01 + LDA_02 + LDA_03 + 
##     global_subjectivity + global_sentiment_polarity + global_rate_positive_words + 
##     global_rate_negative_words + rate_positive_words + rate_negative_words + 
##     avg_positive_polarity + min_positive_polarity + max_positive_polarity + 
##     avg_negative_polarity + min_negative_polarity + max_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity + 
##     abs_title_sentiment_polarity
## 
##                                 Df Sum of Sq   RSS     AIC
## - global_rate_negative_words     1      0.00 22440 -8255.4
## - avg_positive_polarity          1      0.00 22440 -8255.4
## - rate_negative_words            1      0.00 22440 -8255.4
## - weekday_is_saturday            1      0.01 22440 -8255.4
## - abs_title_sentiment_polarity   1      0.02 22441 -8255.4
## - kw_max_max                     1      0.02 22441 -8255.4
## - min_negative_polarity          1      0.02 22441 -8255.4
## - self_reference_max_shares      1      0.03 22441 -8255.3
## - max_positive_polarity          1      0.03 22441 -8255.3
## - n_non_stop_words               1      0.04 22441 -8255.3
## - rate_positive_words            1      0.08 22441 -8255.3
## - max_negative_polarity          1      0.18 22441 -8255.1
## - data_channel_is_world          1      0.53 22441 -8254.7
## - n_unique_tokens                1      0.63 22441 -8254.5
## - global_sentiment_polarity      1      0.80 22441 -8254.3
## - avg_negative_polarity          1      1.30 22442 -8253.7
## - global_rate_positive_words     1      1.37 22442 -8253.6
## <none>                                       22440 -8253.4
## - self_reference_min_shares      1      1.56 22442 -8253.3
## - self_reference_avg_sharess     1      1.64 22442 -8253.2
## - n_non_stop_unique_tokens       1      1.95 22442 -8252.8
## - num_videos                     1      2.05 22443 -8252.7
## - n_tokens_content               1      2.63 22443 -8251.9
## - n_tokens_title                 1      3.62 22444 -8250.6
## - title_subjectivity             1      4.51 22445 -8249.4
## - min_positive_polarity          1      5.31 22446 -8248.3
## - LDA_03                         1      5.55 22446 -8248.0
## - data_channel_is_lifestyle      1      5.79 22446 -8247.7
## - LDA_01                         1      7.05 22448 -8246.0
## - title_sentiment_polarity       1      7.69 22448 -8245.2
## - num_imgs                       1      7.87 22448 -8245.0
## - num_keywords                   1      9.92 22450 -8242.2
## - data_channel_is_tech           1     10.27 22451 -8241.8
## - kw_avg_max                     1     10.36 22451 -8241.7
## - average_token_length           1     11.40 22452 -8240.3
## - kw_max_min                     1     12.80 22453 -8238.4
## - kw_min_max                     1     13.87 22454 -8237.0
## - abs_title_subjectivity         1     14.06 22455 -8236.8
## - num_self_hrefs                 1     15.19 22456 -8235.3
## - data_channel_is_bus            1     17.65 22458 -8232.0
## - data_channel_is_socmed         1     19.32 22460 -8229.8
## - kw_avg_min                     1     20.07 22461 -8228.8
## - global_subjectivity            1     23.42 22464 -8224.4
## - LDA_02                         1     27.06 22468 -8219.5
## - num_hrefs                      1     27.92 22468 -8218.4
## - LDA_00                         1     28.19 22469 -8218.1
## - kw_min_min                     1     35.28 22476 -8208.7
## - kw_min_avg                     1     37.45 22478 -8205.8
## - data_channel_is_entertainment  1     50.31 22491 -8188.8
## - weekday_is_friday              1     71.40 22512 -8160.9
## - weekday_is_monday              1     77.98 22518 -8152.2
## - weekday_is_tuesday             1    131.08 22572 -8082.2
## - weekday_is_wednesday           1    131.11 22572 -8082.2
## - weekday_is_thursday            1    134.70 22575 -8077.4
## - kw_max_avg                     1    265.58 22706 -7905.6
## - kw_avg_avg                     1    537.64 22978 -7551.4
## 
## Step:  AIC=-8255.38
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     weekday_is_saturday + LDA_00 + LDA_01 + LDA_02 + LDA_03 + 
##     global_subjectivity + global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + rate_negative_words + avg_positive_polarity + 
##     min_positive_polarity + max_positive_polarity + avg_negative_polarity + 
##     min_negative_polarity + max_negative_polarity + title_subjectivity + 
##     title_sentiment_polarity + abs_title_subjectivity + abs_title_sentiment_polarity
## 
##                                 Df Sum of Sq   RSS     AIC
## - avg_positive_polarity          1      0.00 22440 -8257.4
## - rate_negative_words            1      0.00 22440 -8257.4
## - weekday_is_saturday            1      0.01 22440 -8257.4
## - abs_title_sentiment_polarity   1      0.02 22441 -8257.4
## - kw_max_max                     1      0.02 22441 -8257.4
## - min_negative_polarity          1      0.02 22441 -8257.3
## - self_reference_max_shares      1      0.03 22441 -8257.3
## - max_positive_polarity          1      0.04 22441 -8257.3
## - n_non_stop_words               1      0.04 22441 -8257.3
## - rate_positive_words            1      0.08 22441 -8257.3
## - max_negative_polarity          1      0.18 22441 -8257.1
## - data_channel_is_world          1      0.53 22441 -8256.7
## - n_unique_tokens                1      0.63 22441 -8256.5
## - global_sentiment_polarity      1      0.86 22441 -8256.2
## - avg_negative_polarity          1      1.33 22442 -8255.6
## <none>                                       22440 -8255.4
## - self_reference_min_shares      1      1.56 22442 -8255.3
## - self_reference_avg_sharess     1      1.64 22442 -8255.2
## - n_non_stop_unique_tokens       1      1.95 22442 -8254.8
## - num_videos                     1      2.06 22443 -8254.6
## - global_rate_positive_words     1      2.60 22443 -8253.9
## - n_tokens_content               1      2.64 22443 -8253.9
## - n_tokens_title                 1      3.63 22444 -8252.6
## - title_subjectivity             1      4.51 22445 -8251.4
## - min_positive_polarity          1      5.36 22446 -8250.3
## - LDA_03                         1      5.55 22446 -8250.0
## - data_channel_is_lifestyle      1      5.79 22446 -8249.7
## - LDA_01                         1      7.05 22448 -8248.0
## - title_sentiment_polarity       1      7.71 22448 -8247.2
## - num_imgs                       1      7.87 22448 -8247.0
## - num_keywords                   1      9.92 22450 -8244.2
## - data_channel_is_tech           1     10.28 22451 -8243.8
## - kw_avg_max                     1     10.37 22451 -8243.6
## - average_token_length           1     11.42 22452 -8242.2
## - kw_max_min                     1     12.80 22453 -8240.4
## - kw_min_max                     1     13.87 22454 -8239.0
## - abs_title_subjectivity         1     14.08 22455 -8238.7
## - num_self_hrefs                 1     15.19 22456 -8237.3
## - data_channel_is_bus            1     17.66 22458 -8234.0
## - data_channel_is_socmed         1     19.33 22460 -8231.8
## - kw_avg_min                     1     20.07 22461 -8230.8
## - global_subjectivity            1     23.99 22464 -8225.6
## - LDA_02                         1     27.06 22468 -8221.5
## - num_hrefs                      1     28.02 22469 -8220.3
## - LDA_00                         1     28.19 22469 -8220.1
## - kw_min_min                     1     35.28 22476 -8210.7
## - kw_min_avg                     1     37.45 22478 -8207.8
## - data_channel_is_entertainment  1     50.31 22491 -8190.8
## - weekday_is_friday              1     71.43 22512 -8162.9
## - weekday_is_monday              1     78.02 22519 -8154.2
## - weekday_is_wednesday           1    131.14 22572 -8084.1
## - weekday_is_tuesday             1    131.15 22572 -8084.1
## - weekday_is_thursday            1    134.77 22575 -8079.3
## - kw_max_avg                     1    265.59 22706 -7907.6
## - kw_avg_avg                     1    537.69 22978 -7553.4
## 
## Step:  AIC=-8257.38
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     weekday_is_saturday + LDA_00 + LDA_01 + LDA_02 + LDA_03 + 
##     global_subjectivity + global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + rate_negative_words + min_positive_polarity + 
##     max_positive_polarity + avg_negative_polarity + min_negative_polarity + 
##     max_negative_polarity + title_subjectivity + title_sentiment_polarity + 
##     abs_title_subjectivity + abs_title_sentiment_polarity
## 
##                                 Df Sum of Sq   RSS     AIC
## - rate_negative_words            1      0.00 22440 -8259.4
## - weekday_is_saturday            1      0.01 22440 -8259.4
## - abs_title_sentiment_polarity   1      0.02 22441 -8259.4
## - kw_max_max                     1      0.02 22441 -8259.3
## - min_negative_polarity          1      0.02 22441 -8259.3
## - self_reference_max_shares      1      0.03 22441 -8259.3
## - n_non_stop_words               1      0.04 22441 -8259.3
## - max_positive_polarity          1      0.06 22441 -8259.3
## - rate_positive_words            1      0.08 22441 -8259.3
## - max_negative_polarity          1      0.17 22441 -8259.1
## - data_channel_is_world          1      0.53 22441 -8258.7
## - n_unique_tokens                1      0.63 22441 -8258.5
## - avg_negative_polarity          1      1.35 22442 -8257.6
## - global_sentiment_polarity      1      1.35 22442 -8257.6
## <none>                                       22440 -8257.4
## - self_reference_min_shares      1      1.56 22442 -8257.3
## - self_reference_avg_sharess     1      1.64 22442 -8257.2
## - n_non_stop_unique_tokens       1      1.96 22442 -8256.8
## - num_videos                     1      2.06 22443 -8256.6
## - n_tokens_content               1      2.64 22443 -8255.9
## - global_rate_positive_words     1      2.68 22443 -8255.8
## - n_tokens_title                 1      3.63 22444 -8254.6
## - title_subjectivity             1      4.52 22445 -8253.4
## - LDA_03                         1      5.55 22446 -8252.0
## - data_channel_is_lifestyle      1      5.79 22446 -8251.7
## - LDA_01                         1      7.05 22448 -8250.0
## - min_positive_polarity          1      7.11 22448 -8250.0
## - title_sentiment_polarity       1      7.73 22448 -8249.1
## - num_imgs                       1      7.86 22448 -8249.0
## - num_keywords                   1      9.92 22450 -8246.2
## - data_channel_is_tech           1     10.28 22451 -8245.8
## - kw_avg_max                     1     10.36 22451 -8245.6
## - average_token_length           1     11.44 22452 -8244.2
## - kw_max_min                     1     12.80 22453 -8242.4
## - kw_min_max                     1     13.88 22454 -8241.0
## - abs_title_subjectivity         1     14.08 22455 -8240.7
## - num_self_hrefs                 1     15.19 22456 -8239.3
## - data_channel_is_bus            1     17.66 22458 -8236.0
## - data_channel_is_socmed         1     19.33 22460 -8233.8
## - kw_avg_min                     1     20.07 22461 -8232.8
## - global_subjectivity            1     24.27 22465 -8227.2
## - LDA_02                         1     27.08 22468 -8223.5
## - num_hrefs                      1     28.05 22469 -8222.2
## - LDA_00                         1     28.19 22469 -8222.0
## - kw_min_min                     1     35.28 22476 -8212.7
## - kw_min_avg                     1     37.45 22478 -8209.8
## - data_channel_is_entertainment  1     50.32 22491 -8192.8
## - weekday_is_friday              1     71.43 22512 -8164.9
## - weekday_is_monday              1     78.03 22519 -8156.2
## - weekday_is_tuesday             1    131.15 22572 -8086.1
## - weekday_is_wednesday           1    131.15 22572 -8086.1
## - weekday_is_thursday            1    134.77 22575 -8081.3
## - kw_max_avg                     1    265.60 22706 -7909.5
## - kw_avg_avg                     1    537.78 22978 -7555.3
## 
## Step:  AIC=-8259.37
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     weekday_is_saturday + LDA_00 + LDA_01 + LDA_02 + LDA_03 + 
##     global_subjectivity + global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + min_positive_polarity + max_positive_polarity + 
##     avg_negative_polarity + min_negative_polarity + max_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity + 
##     abs_title_sentiment_polarity
## 
##                                 Df Sum of Sq   RSS     AIC
## - weekday_is_saturday            1      0.01 22440 -8261.4
## - abs_title_sentiment_polarity   1      0.02 22441 -8261.3
## - kw_max_max                     1      0.02 22441 -8261.3
## - min_negative_polarity          1      0.02 22441 -8261.3
## - self_reference_max_shares      1      0.03 22441 -8261.3
## - max_positive_polarity          1      0.06 22441 -8261.3
## - max_negative_polarity          1      0.17 22441 -8261.1
## - data_channel_is_world          1      0.53 22441 -8260.7
## - n_unique_tokens                1      0.63 22441 -8260.5
## - n_non_stop_words               1      0.94 22441 -8260.1
## - avg_negative_polarity          1      1.35 22442 -8259.6
## - global_sentiment_polarity      1      1.37 22442 -8259.6
## <none>                                       22440 -8259.4
## - self_reference_min_shares      1      1.56 22442 -8259.3
## - self_reference_avg_sharess     1      1.64 22442 -8259.2
## - n_non_stop_unique_tokens       1      1.95 22442 -8258.8
## - num_videos                     1      2.06 22443 -8258.6
## - n_tokens_content               1      2.64 22443 -8257.9
## - global_rate_positive_words     1      2.67 22443 -8257.8
## - rate_positive_words            1      3.63 22444 -8256.6
## - n_tokens_title                 1      3.63 22444 -8256.6
## - title_subjectivity             1      4.52 22445 -8255.4
## - LDA_03                         1      5.55 22446 -8254.0
## - data_channel_is_lifestyle      1      5.79 22446 -8253.7
## - LDA_01                         1      7.06 22448 -8252.0
## - min_positive_polarity          1      7.11 22448 -8252.0
## - title_sentiment_polarity       1      7.73 22448 -8251.1
## - num_imgs                       1      7.87 22448 -8251.0
## - num_keywords                   1      9.92 22450 -8248.2
## - data_channel_is_tech           1     10.28 22451 -8247.8
## - kw_avg_max                     1     10.36 22451 -8247.6
## - average_token_length           1     11.43 22452 -8246.2
## - kw_max_min                     1     12.80 22453 -8244.4
## - kw_min_max                     1     13.88 22454 -8243.0
## - abs_title_subjectivity         1     14.08 22455 -8242.7
## - num_self_hrefs                 1     15.19 22456 -8241.3
## - data_channel_is_bus            1     17.67 22458 -8238.0
## - data_channel_is_socmed         1     19.33 22460 -8235.8
## - kw_avg_min                     1     20.07 22461 -8234.8
## - global_subjectivity            1     24.33 22465 -8229.2
## - LDA_02                         1     27.08 22468 -8225.5
## - num_hrefs                      1     28.05 22469 -8224.2
## - LDA_00                         1     28.19 22469 -8224.0
## - kw_min_min                     1     35.29 22476 -8214.7
## - kw_min_avg                     1     37.46 22478 -8211.8
## - data_channel_is_entertainment  1     50.32 22491 -8194.8
## - weekday_is_friday              1     71.44 22512 -8166.9
## - weekday_is_monday              1     78.03 22519 -8158.2
## - weekday_is_wednesday           1    131.15 22572 -8088.1
## - weekday_is_tuesday             1    131.16 22572 -8088.1
## - weekday_is_thursday            1    134.77 22575 -8083.3
## - kw_max_avg                     1    265.60 22706 -7911.5
## - kw_avg_avg                     1    537.79 22978 -7557.2
## 
## Step:  AIC=-8261.36
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     LDA_00 + LDA_01 + LDA_02 + LDA_03 + global_subjectivity + 
##     global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + min_positive_polarity + max_positive_polarity + 
##     avg_negative_polarity + min_negative_polarity + max_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity + 
##     abs_title_sentiment_polarity
## 
##                                 Df Sum of Sq   RSS     AIC
## - abs_title_sentiment_polarity   1      0.02 22441 -8263.3
## - kw_max_max                     1      0.02 22441 -8263.3
## - min_negative_polarity          1      0.02 22441 -8263.3
## - self_reference_max_shares      1      0.03 22441 -8263.3
## - max_positive_polarity          1      0.06 22441 -8263.3
## - max_negative_polarity          1      0.17 22441 -8263.1
## - data_channel_is_world          1      0.52 22441 -8262.7
## - n_unique_tokens                1      0.63 22441 -8262.5
## - n_non_stop_words               1      0.94 22441 -8262.1
## - avg_negative_polarity          1      1.35 22442 -8261.6
## - global_sentiment_polarity      1      1.37 22442 -8261.6
## <none>                                       22440 -8261.4
## - self_reference_min_shares      1      1.56 22442 -8261.3
## - self_reference_avg_sharess     1      1.64 22442 -8261.2
## - n_non_stop_unique_tokens       1      1.96 22442 -8260.8
## - num_videos                     1      2.06 22443 -8260.6
## - n_tokens_content               1      2.65 22443 -8259.9
## - global_rate_positive_words     1      2.67 22443 -8259.8
## - rate_positive_words            1      3.63 22444 -8258.6
## - n_tokens_title                 1      3.64 22444 -8258.5
## - title_subjectivity             1      4.51 22445 -8257.4
## - LDA_03                         1      5.55 22446 -8256.0
## - data_channel_is_lifestyle      1      5.79 22446 -8255.7
## - LDA_01                         1      7.05 22448 -8254.0
## - min_positive_polarity          1      7.11 22448 -8253.9
## - title_sentiment_polarity       1      7.72 22448 -8253.1
## - num_imgs                       1      7.87 22448 -8252.9
## - num_keywords                   1      9.92 22450 -8250.2
## - data_channel_is_tech           1     10.27 22451 -8249.8
## - kw_avg_max                     1     10.37 22451 -8249.6
## - average_token_length           1     11.43 22452 -8248.2
## - kw_max_min                     1     12.80 22453 -8246.4
## - kw_min_max                     1     13.88 22454 -8245.0
## - abs_title_subjectivity         1     14.08 22455 -8244.7
## - num_self_hrefs                 1     15.20 22456 -8243.2
## - data_channel_is_bus            1     17.66 22458 -8240.0
## - data_channel_is_socmed         1     19.32 22460 -8237.8
## - kw_avg_min                     1     20.07 22461 -8236.8
## - global_subjectivity            1     24.33 22465 -8231.1
## - LDA_02                         1     27.09 22468 -8227.5
## - num_hrefs                      1     28.04 22469 -8226.2
## - LDA_00                         1     28.19 22469 -8226.0
## - kw_min_min                     1     35.30 22476 -8216.6
## - kw_min_avg                     1     37.46 22478 -8213.8
## - data_channel_is_entertainment  1     50.31 22491 -8196.8
## - weekday_is_friday              1    103.25 22544 -8126.9
## - weekday_is_monday              1    115.47 22556 -8110.8
## - weekday_is_wednesday           1    197.35 22638 -8003.0
## - weekday_is_tuesday             1    197.36 22638 -8003.0
## - weekday_is_thursday            1    202.50 22643 -7996.3
## - kw_max_avg                     1    265.62 22706 -7913.5
## - kw_avg_avg                     1    537.78 22978 -7559.2
## 
## Step:  AIC=-8263.34
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_max_max + kw_avg_max + kw_min_avg + kw_max_avg + 
##     kw_avg_avg + self_reference_min_shares + self_reference_max_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     LDA_00 + LDA_01 + LDA_02 + LDA_03 + global_subjectivity + 
##     global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + min_positive_polarity + max_positive_polarity + 
##     avg_negative_polarity + min_negative_polarity + max_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - kw_max_max                     1      0.02 22441 -8265.3
## - min_negative_polarity          1      0.02 22441 -8265.3
## - self_reference_max_shares      1      0.03 22441 -8265.3
## - max_positive_polarity          1      0.06 22441 -8265.3
## - max_negative_polarity          1      0.17 22441 -8265.1
## - data_channel_is_world          1      0.53 22441 -8264.6
## - n_unique_tokens                1      0.63 22441 -8264.5
## - n_non_stop_words               1      0.93 22441 -8264.1
## - avg_negative_polarity          1      1.33 22442 -8263.6
## - global_sentiment_polarity      1      1.38 22442 -8263.5
## <none>                                       22441 -8263.3
## - self_reference_min_shares      1      1.56 22442 -8263.3
## - self_reference_avg_sharess     1      1.64 22442 -8263.2
## - n_non_stop_unique_tokens       1      1.95 22442 -8262.8
## - num_videos                     1      2.06 22443 -8262.6
## - n_tokens_content               1      2.65 22443 -8261.8
## - global_rate_positive_words     1      2.67 22443 -8261.8
## - n_tokens_title                 1      3.65 22444 -8260.5
## - rate_positive_words            1      3.66 22444 -8260.5
## - LDA_03                         1      5.56 22446 -8258.0
## - data_channel_is_lifestyle      1      5.80 22446 -8257.7
## - title_subjectivity             1      6.95 22447 -8256.1
## - LDA_01                         1      7.05 22448 -8256.0
## - min_positive_polarity          1      7.11 22448 -8255.9
## - num_imgs                       1      7.87 22448 -8254.9
## - title_sentiment_polarity       1      8.51 22449 -8254.1
## - num_keywords                   1      9.92 22450 -8252.2
## - data_channel_is_tech           1     10.27 22451 -8251.7
## - kw_avg_max                     1     10.37 22451 -8251.6
## - average_token_length           1     11.42 22452 -8250.2
## - kw_max_min                     1     12.79 22453 -8248.4
## - kw_min_max                     1     13.88 22454 -8247.0
## - abs_title_subjectivity         1     14.16 22455 -8246.6
## - num_self_hrefs                 1     15.19 22456 -8245.2
## - data_channel_is_bus            1     17.66 22458 -8241.9
## - data_channel_is_socmed         1     19.32 22460 -8239.8
## - kw_avg_min                     1     20.05 22461 -8238.8
## - global_subjectivity            1     24.40 22465 -8233.0
## - LDA_02                         1     27.08 22468 -8229.5
## - num_hrefs                      1     28.04 22469 -8228.2
## - LDA_00                         1     28.18 22469 -8228.0
## - kw_min_min                     1     35.29 22476 -8218.6
## - kw_min_avg                     1     37.48 22478 -8215.7
## - data_channel_is_entertainment  1     50.31 22491 -8198.8
## - weekday_is_friday              1    103.24 22544 -8128.9
## - weekday_is_monday              1    115.46 22556 -8112.8
## - weekday_is_wednesday           1    197.35 22638 -8005.0
## - weekday_is_tuesday             1    197.35 22638 -8005.0
## - weekday_is_thursday            1    202.48 22643 -7998.3
## - kw_max_avg                     1    265.60 22706 -7915.5
## - kw_avg_avg                     1    537.77 22978 -7561.2
## 
## Step:  AIC=-8265.31
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_avg_max + kw_min_avg + kw_max_avg + kw_avg_avg + 
##     self_reference_min_shares + self_reference_max_shares + self_reference_avg_sharess + 
##     weekday_is_monday + weekday_is_tuesday + weekday_is_wednesday + 
##     weekday_is_thursday + weekday_is_friday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + rate_positive_words + min_positive_polarity + 
##     max_positive_polarity + avg_negative_polarity + min_negative_polarity + 
##     max_negative_polarity + title_subjectivity + title_sentiment_polarity + 
##     abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - min_negative_polarity          1      0.02 22441 -8267.3
## - self_reference_max_shares      1      0.03 22441 -8267.3
## - max_positive_polarity          1      0.06 22441 -8267.2
## - max_negative_polarity          1      0.17 22441 -8267.1
## - data_channel_is_world          1      0.51 22441 -8266.6
## - n_unique_tokens                1      0.63 22441 -8266.5
## - n_non_stop_words               1      0.94 22441 -8266.1
## - avg_negative_polarity          1      1.33 22442 -8265.6
## - global_sentiment_polarity      1      1.39 22442 -8265.5
## <none>                                       22441 -8265.3
## - self_reference_min_shares      1      1.57 22442 -8265.2
## - self_reference_avg_sharess     1      1.64 22442 -8265.1
## - n_non_stop_unique_tokens       1      1.95 22442 -8264.7
## - num_videos                     1      2.05 22443 -8264.6
## - n_tokens_content               1      2.64 22443 -8263.8
## - global_rate_positive_words     1      2.67 22443 -8263.8
## - n_tokens_title                 1      3.66 22444 -8262.5
## - rate_positive_words            1      3.66 22444 -8262.5
## - LDA_03                         1      5.65 22446 -8259.8
## - data_channel_is_lifestyle      1      5.78 22446 -8259.7
## - title_subjectivity             1      6.95 22447 -8258.1
## - LDA_01                         1      7.06 22448 -8258.0
## - min_positive_polarity          1      7.10 22448 -8257.9
## - num_imgs                       1      7.90 22448 -8256.9
## - title_sentiment_polarity       1      8.50 22449 -8256.0
## - num_keywords                   1     10.28 22451 -8253.7
## - data_channel_is_tech           1     10.35 22451 -8253.6
## - kw_avg_max                     1     11.39 22452 -8252.2
## - average_token_length           1     11.42 22452 -8252.2
## - kw_max_min                     1     12.85 22453 -8250.3
## - abs_title_subjectivity         1     14.15 22455 -8248.6
## - kw_min_max                     1     14.31 22455 -8248.4
## - num_self_hrefs                 1     15.24 22456 -8247.1
## - data_channel_is_bus            1     17.65 22458 -8243.9
## - data_channel_is_socmed         1     19.54 22460 -8241.4
## - kw_avg_min                     1     20.15 22461 -8240.6
## - global_subjectivity            1     24.40 22465 -8235.0
## - LDA_02                         1     27.11 22468 -8231.4
## - num_hrefs                      1     28.11 22469 -8230.1
## - LDA_00                         1     28.16 22469 -8230.0
## - kw_min_avg                     1     37.68 22478 -8217.4
## - data_channel_is_entertainment  1     50.89 22491 -8200.0
## - kw_min_min                     1     75.83 22516 -8167.0
## - weekday_is_friday              1    103.29 22544 -8130.8
## - weekday_is_monday              1    115.67 22556 -8114.5
## - weekday_is_tuesday             1    197.53 22638 -8006.7
## - weekday_is_wednesday           1    197.60 22638 -8006.6
## - weekday_is_thursday            1    202.64 22643 -8000.0
## - kw_max_avg                     1    267.54 22708 -7914.9
## - kw_avg_avg                     1    543.77 22984 -7555.4
## 
## Step:  AIC=-8267.28
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_avg_max + kw_min_avg + kw_max_avg + kw_avg_avg + 
##     self_reference_min_shares + self_reference_max_shares + self_reference_avg_sharess + 
##     weekday_is_monday + weekday_is_tuesday + weekday_is_wednesday + 
##     weekday_is_thursday + weekday_is_friday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + rate_positive_words + min_positive_polarity + 
##     max_positive_polarity + avg_negative_polarity + max_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - self_reference_max_shares      1      0.03 22441 -8269.2
## - max_positive_polarity          1      0.06 22441 -8269.2
## - max_negative_polarity          1      0.30 22441 -8268.9
## - data_channel_is_world          1      0.51 22441 -8268.6
## - n_unique_tokens                1      0.61 22441 -8268.5
## - n_non_stop_words               1      0.96 22442 -8268.0
## - global_sentiment_polarity      1      1.39 22442 -8267.4
## <none>                                       22441 -8267.3
## - self_reference_min_shares      1      1.57 22442 -8267.2
## - self_reference_avg_sharess     1      1.64 22442 -8267.1
## - n_non_stop_unique_tokens       1      1.92 22442 -8266.7
## - num_videos                     1      2.05 22443 -8266.6
## - global_rate_positive_words     1      2.65 22443 -8265.8
## - n_tokens_content               1      2.89 22443 -8265.4
## - avg_negative_polarity          1      3.63 22444 -8264.5
## - n_tokens_title                 1      3.66 22444 -8264.4
## - rate_positive_words            1      3.68 22444 -8264.4
## - LDA_03                         1      5.65 22446 -8261.8
## - data_channel_is_lifestyle      1      5.78 22446 -8261.6
## - title_subjectivity             1      6.93 22447 -8260.1
## - LDA_01                         1      7.06 22448 -8259.9
## - min_positive_polarity          1      7.13 22448 -8259.8
## - num_imgs                       1      7.88 22448 -8258.8
## - title_sentiment_polarity       1      8.55 22449 -8258.0
## - num_keywords                   1     10.28 22451 -8255.7
## - data_channel_is_tech           1     10.34 22451 -8255.6
## - kw_avg_max                     1     11.40 22452 -8254.2
## - average_token_length           1     11.41 22452 -8254.2
## - kw_max_min                     1     12.85 22453 -8252.3
## - abs_title_subjectivity         1     14.15 22455 -8250.5
## - kw_min_max                     1     14.30 22455 -8250.3
## - num_self_hrefs                 1     15.28 22456 -8249.0
## - data_channel_is_bus            1     17.66 22458 -8245.9
## - data_channel_is_socmed         1     19.54 22460 -8243.4
## - kw_avg_min                     1     20.15 22461 -8242.6
## - global_subjectivity            1     24.39 22465 -8237.0
## - LDA_02                         1     27.09 22468 -8233.4
## - LDA_00                         1     28.17 22469 -8232.0
## - num_hrefs                      1     28.23 22469 -8231.9
## - kw_min_avg                     1     37.68 22478 -8219.4
## - data_channel_is_entertainment  1     50.94 22491 -8201.9
## - kw_min_min                     1     75.81 22516 -8169.0
## - weekday_is_friday              1    103.29 22544 -8132.7
## - weekday_is_monday              1    115.69 22556 -8116.4
## - weekday_is_tuesday             1    197.53 22638 -8008.7
## - weekday_is_wednesday           1    197.61 22638 -8008.6
## - weekday_is_thursday            1    202.63 22643 -8002.0
## - kw_max_avg                     1    267.58 22708 -7916.9
## - kw_avg_avg                     1    543.85 22984 -7557.3
## 
## Step:  AIC=-8269.24
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_avg_max + kw_min_avg + kw_max_avg + kw_avg_avg + 
##     self_reference_min_shares + self_reference_avg_sharess + 
##     weekday_is_monday + weekday_is_tuesday + weekday_is_wednesday + 
##     weekday_is_thursday + weekday_is_friday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + rate_positive_words + min_positive_polarity + 
##     max_positive_polarity + avg_negative_polarity + max_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - max_positive_polarity          1      0.06 22441 -8271.2
## - max_negative_polarity          1      0.30 22441 -8270.8
## - data_channel_is_world          1      0.51 22441 -8270.6
## - n_unique_tokens                1      0.61 22441 -8270.4
## - n_non_stop_words               1      0.96 22442 -8270.0
## - global_sentiment_polarity      1      1.39 22442 -8269.4
## <none>                                       22441 -8269.2
## - n_non_stop_unique_tokens       1      1.94 22443 -8268.7
## - num_videos                     1      2.03 22443 -8268.5
## - global_rate_positive_words     1      2.66 22443 -8267.7
## - n_tokens_content               1      2.90 22443 -8267.4
## - avg_negative_polarity          1      3.62 22444 -8266.4
## - n_tokens_title                 1      3.67 22444 -8266.4
## - rate_positive_words            1      3.68 22444 -8266.4
## - self_reference_min_shares      1      4.58 22445 -8265.2
## - LDA_03                         1      5.65 22446 -8263.8
## - data_channel_is_lifestyle      1      5.79 22446 -8263.6
## - title_subjectivity             1      6.92 22448 -8262.1
## - LDA_01                         1      7.06 22448 -8261.9
## - min_positive_polarity          1      7.15 22448 -8261.8
## - num_imgs                       1      7.89 22448 -8260.8
## - title_sentiment_polarity       1      8.54 22449 -8259.9
## - self_reference_avg_sharess     1      8.71 22449 -8259.7
## - num_keywords                   1     10.29 22451 -8257.6
## - data_channel_is_tech           1     10.34 22451 -8257.5
## - kw_avg_max                     1     11.40 22452 -8256.1
## - average_token_length           1     11.42 22452 -8256.1
## - kw_max_min                     1     12.85 22453 -8254.2
## - abs_title_subjectivity         1     14.15 22455 -8252.5
## - kw_min_max                     1     14.31 22455 -8252.3
## - num_self_hrefs                 1     16.13 22457 -8249.9
## - data_channel_is_bus            1     17.69 22458 -8247.8
## - data_channel_is_socmed         1     19.52 22460 -8245.4
## - kw_avg_min                     1     20.14 22461 -8244.6
## - global_subjectivity            1     24.40 22465 -8238.9
## - LDA_02                         1     27.10 22468 -8235.4
## - LDA_00                         1     28.17 22469 -8233.9
## - num_hrefs                      1     28.20 22469 -8233.9
## - kw_min_avg                     1     37.66 22478 -8221.4
## - data_channel_is_entertainment  1     50.98 22492 -8203.8
## - kw_min_min                     1     75.81 22516 -8171.0
## - weekday_is_friday              1    103.27 22544 -8134.7
## - weekday_is_monday              1    115.66 22556 -8118.4
## - weekday_is_tuesday             1    197.50 22638 -8010.7
## - weekday_is_wednesday           1    197.58 22638 -8010.6
## - weekday_is_thursday            1    202.60 22643 -8004.0
## - kw_max_avg                     1    267.59 22708 -7918.8
## - kw_avg_avg                     1    543.82 22984 -7559.3
## 
## Step:  AIC=-8271.16
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_avg_max + kw_min_avg + kw_max_avg + kw_avg_avg + 
##     self_reference_min_shares + self_reference_avg_sharess + 
##     weekday_is_monday + weekday_is_tuesday + weekday_is_wednesday + 
##     weekday_is_thursday + weekday_is_friday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + rate_positive_words + min_positive_polarity + 
##     avg_negative_polarity + max_negative_polarity + title_subjectivity + 
##     title_sentiment_polarity + abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - max_negative_polarity          1      0.27 22441 -8272.8
## - data_channel_is_world          1      0.50 22441 -8272.5
## - n_unique_tokens                1      0.70 22441 -8272.2
## - n_non_stop_words               1      0.91 22442 -8272.0
## <none>                                       22441 -8271.2
## - global_sentiment_polarity      1      1.96 22443 -8270.6
## - num_videos                     1      1.99 22443 -8270.5
## - n_non_stop_unique_tokens       1      2.05 22443 -8270.4
## - global_rate_positive_words     1      2.76 22443 -8269.5
## - n_tokens_content               1      2.85 22443 -8269.4
## - avg_negative_polarity          1      3.59 22444 -8268.4
## - n_tokens_title                 1      3.66 22444 -8268.3
## - rate_positive_words            1      4.06 22445 -8267.8
## - self_reference_min_shares      1      4.58 22445 -8267.1
## - LDA_03                         1      5.65 22446 -8265.7
## - data_channel_is_lifestyle      1      5.76 22446 -8265.5
## - title_subjectivity             1      6.90 22448 -8264.0
## - LDA_01                         1      7.05 22448 -8263.8
## - min_positive_polarity          1      7.11 22448 -8263.7
## - num_imgs                       1      7.84 22448 -8262.8
## - title_sentiment_polarity       1      8.57 22449 -8261.8
## - self_reference_avg_sharess     1      8.71 22449 -8261.6
## - num_keywords                   1     10.27 22451 -8259.6
## - data_channel_is_tech           1     10.40 22451 -8259.4
## - kw_avg_max                     1     11.39 22452 -8258.1
## - average_token_length           1     11.43 22452 -8258.0
## - kw_max_min                     1     12.84 22453 -8256.1
## - abs_title_subjectivity         1     14.16 22455 -8254.4
## - kw_min_max                     1     14.33 22455 -8254.2
## - num_self_hrefs                 1     16.11 22457 -8251.8
## - data_channel_is_bus            1     17.65 22458 -8249.8
## - data_channel_is_socmed         1     19.67 22460 -8247.1
## - kw_avg_min                     1     20.15 22461 -8246.5
## - global_subjectivity            1     24.41 22465 -8240.8
## - LDA_02                         1     27.07 22468 -8237.3
## - num_hrefs                      1     28.14 22469 -8235.9
## - LDA_00                         1     28.14 22469 -8235.9
## - kw_min_avg                     1     37.66 22478 -8223.3
## - data_channel_is_entertainment  1     50.97 22492 -8205.7
## - kw_min_min                     1     75.85 22516 -8172.8
## - weekday_is_friday              1    103.27 22544 -8136.7
## - weekday_is_monday              1    115.70 22556 -8120.3
## - weekday_is_tuesday             1    197.50 22638 -8012.6
## - weekday_is_wednesday           1    197.56 22638 -8012.6
## - weekday_is_thursday            1    202.56 22643 -8006.0
## - kw_max_avg                     1    267.54 22708 -7920.8
## - kw_avg_avg                     1    543.78 22984 -7561.3
## 
## Step:  AIC=-8272.81
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     data_channel_is_world + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_avg_max + kw_min_avg + kw_max_avg + kw_avg_avg + 
##     self_reference_min_shares + self_reference_avg_sharess + 
##     weekday_is_monday + weekday_is_tuesday + weekday_is_wednesday + 
##     weekday_is_thursday + weekday_is_friday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + rate_positive_words + min_positive_polarity + 
##     avg_negative_polarity + title_subjectivity + title_sentiment_polarity + 
##     abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - data_channel_is_world          1      0.51 22441 -8274.1
## - n_unique_tokens                1      0.55 22441 -8274.1
## - n_non_stop_words               1      0.97 22442 -8273.5
## <none>                                       22441 -8272.8
## - n_non_stop_unique_tokens       1      1.86 22443 -8272.3
## - num_videos                     1      2.10 22443 -8272.0
## - global_sentiment_polarity      1      2.17 22443 -8271.9
## - global_rate_positive_words     1      2.56 22443 -8271.4
## - n_tokens_content               1      2.99 22444 -8270.9
## - n_tokens_title                 1      3.67 22445 -8269.9
## - rate_positive_words            1      3.80 22445 -8269.8
## - avg_negative_polarity          1      4.39 22445 -8269.0
## - self_reference_min_shares      1      4.55 22445 -8268.8
## - LDA_03                         1      5.62 22447 -8267.4
## - data_channel_is_lifestyle      1      5.78 22447 -8267.2
## - title_subjectivity             1      6.97 22448 -8265.6
## - LDA_01                         1      7.02 22448 -8265.5
## - min_positive_polarity          1      7.26 22448 -8265.2
## - num_imgs                       1      7.90 22449 -8264.3
## - title_sentiment_polarity       1      8.50 22449 -8263.6
## - self_reference_avg_sharess     1      8.71 22450 -8263.3
## - num_keywords                   1     10.26 22451 -8261.2
## - data_channel_is_tech           1     10.34 22451 -8261.1
## - average_token_length           1     11.37 22452 -8259.7
## - kw_avg_max                     1     11.38 22452 -8259.7
## - kw_max_min                     1     12.85 22454 -8257.8
## - abs_title_subjectivity         1     14.24 22455 -8256.0
## - kw_min_max                     1     14.33 22455 -8255.8
## - num_self_hrefs                 1     16.14 22457 -8253.4
## - data_channel_is_bus            1     17.74 22459 -8251.3
## - data_channel_is_socmed         1     19.59 22461 -8248.9
## - kw_avg_min                     1     20.15 22461 -8248.1
## - global_subjectivity            1     25.69 22467 -8240.8
## - LDA_02                         1     27.08 22468 -8238.9
## - LDA_00                         1     28.15 22469 -8237.5
## - num_hrefs                      1     28.74 22470 -8236.8
## - kw_min_avg                     1     37.62 22479 -8225.0
## - data_channel_is_entertainment  1     50.90 22492 -8207.4
## - kw_min_min                     1     75.83 22517 -8174.5
## - weekday_is_friday              1    103.33 22544 -8138.2
## - weekday_is_monday              1    115.63 22557 -8122.0
## - weekday_is_wednesday           1    197.50 22638 -8014.3
## - weekday_is_tuesday             1    197.56 22638 -8014.2
## - weekday_is_thursday            1    202.55 22643 -8007.7
## - kw_max_avg                     1    267.42 22708 -7922.6
## - kw_avg_avg                     1    543.65 22985 -7563.1
## 
## Step:  AIC=-8274.14
## shares ~ n_tokens_title + n_tokens_content + n_unique_tokens + 
##     n_non_stop_words + n_non_stop_unique_tokens + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     kw_min_min + kw_max_min + kw_avg_min + kw_min_max + kw_avg_max + 
##     kw_min_avg + kw_max_avg + kw_avg_avg + self_reference_min_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     LDA_00 + LDA_01 + LDA_02 + LDA_03 + global_subjectivity + 
##     global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + min_positive_polarity + avg_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - n_unique_tokens                1      0.66 22442 -8275.3
## - n_non_stop_words               1      0.98 22442 -8274.8
## <none>                                       22441 -8274.1
## - n_non_stop_unique_tokens       1      2.04 22443 -8273.4
## - num_videos                     1      2.11 22444 -8273.3
## - global_sentiment_polarity      1      2.17 22444 -8273.3
## - global_rate_positive_words     1      2.43 22444 -8272.9
## - n_tokens_content               1      3.00 22444 -8272.2
## - n_tokens_title                 1      3.61 22445 -8271.4
## - rate_positive_words            1      3.76 22445 -8271.2
## - avg_negative_polarity          1      4.40 22446 -8270.3
## - self_reference_min_shares      1      4.56 22446 -8270.1
## - LDA_03                         1      5.18 22447 -8269.3
## - data_channel_is_lifestyle      1      5.71 22447 -8268.6
## - LDA_01                         1      6.53 22448 -8267.5
## - title_subjectivity             1      6.95 22448 -8266.9
## - min_positive_polarity          1      7.15 22449 -8266.7
## - num_imgs                       1      8.11 22450 -8265.4
## - title_sentiment_polarity       1      8.49 22450 -8264.9
## - self_reference_avg_sharess     1      8.71 22450 -8264.6
## - num_keywords                   1     10.36 22452 -8262.4
## - kw_avg_max                     1     11.06 22452 -8261.5
## - average_token_length           1     11.63 22453 -8260.7
## - kw_max_min                     1     12.88 22454 -8259.1
## - abs_title_subjectivity         1     14.19 22456 -8257.3
## - kw_min_max                     1     14.61 22456 -8256.8
## - num_self_hrefs                 1     15.99 22457 -8255.0
## - kw_avg_min                     1     20.22 22462 -8249.4
## - data_channel_is_bus            1     21.21 22463 -8248.1
## - data_channel_is_tech           1     22.91 22464 -8245.8
## - global_subjectivity            1     26.04 22467 -8241.7
## - LDA_00                         1     28.64 22470 -8238.2
## - num_hrefs                      1     29.03 22470 -8237.7
## - LDA_02                         1     32.72 22474 -8232.8
## - data_channel_is_socmed         1     34.45 22476 -8230.5
## - kw_min_avg                     1     38.41 22480 -8225.3
## - data_channel_is_entertainment  1     58.20 22500 -8199.1
## - kw_min_min                     1     78.39 22520 -8172.5
## - weekday_is_friday              1    103.29 22545 -8139.6
## - weekday_is_monday              1    115.73 22557 -8123.2
## - weekday_is_wednesday           1    197.42 22639 -8015.7
## - weekday_is_tuesday             1    197.55 22639 -8015.6
## - weekday_is_thursday            1    202.59 22644 -8008.9
## - kw_max_avg                     1    274.77 22716 -7914.3
## - kw_avg_avg                     1    561.26 23003 -7541.7
## 
## Step:  AIC=-8275.26
## shares ~ n_tokens_title + n_tokens_content + n_non_stop_words + 
##     n_non_stop_unique_tokens + num_hrefs + num_self_hrefs + num_imgs + 
##     num_videos + average_token_length + num_keywords + data_channel_is_lifestyle + 
##     data_channel_is_entertainment + data_channel_is_bus + data_channel_is_socmed + 
##     data_channel_is_tech + kw_min_min + kw_max_min + kw_avg_min + 
##     kw_min_max + kw_avg_max + kw_min_avg + kw_max_avg + kw_avg_avg + 
##     self_reference_min_shares + self_reference_avg_sharess + 
##     weekday_is_monday + weekday_is_tuesday + weekday_is_wednesday + 
##     weekday_is_thursday + weekday_is_friday + LDA_00 + LDA_01 + 
##     LDA_02 + LDA_03 + global_subjectivity + global_sentiment_polarity + 
##     global_rate_positive_words + rate_positive_words + min_positive_polarity + 
##     avg_negative_polarity + title_subjectivity + title_sentiment_polarity + 
##     abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - n_non_stop_words               1      0.76 22443 -8276.3
## <none>                                       22442 -8275.3
## - n_non_stop_unique_tokens       1      2.04 22444 -8274.6
## - global_rate_positive_words     1      2.28 22444 -8274.2
## - num_videos                     1      2.34 22444 -8274.2
## - global_sentiment_polarity      1      2.34 22444 -8274.2
## - n_tokens_content               1      2.37 22444 -8274.1
## - n_tokens_title                 1      3.59 22446 -8272.5
## - rate_positive_words            1      4.04 22446 -8271.9
## - avg_negative_polarity          1      4.22 22446 -8271.7
## - self_reference_min_shares      1      4.59 22447 -8271.2
## - LDA_03                         1      4.82 22447 -8270.9
## - data_channel_is_lifestyle      1      5.92 22448 -8269.4
## - LDA_01                         1      6.28 22448 -8268.9
## - min_positive_polarity          1      6.53 22449 -8268.6
## - title_subjectivity             1      7.00 22449 -8268.0
## - title_sentiment_polarity       1      8.45 22451 -8266.1
## - self_reference_avg_sharess     1      8.68 22451 -8265.8
## - num_imgs                       1      9.50 22452 -8264.7
## - num_keywords                   1     10.44 22453 -8263.4
## - kw_avg_max                     1     10.92 22453 -8262.8
## - average_token_length           1     10.99 22453 -8262.7
## - kw_max_min                     1     12.90 22455 -8260.2
## - abs_title_subjectivity         1     14.03 22456 -8258.7
## - kw_min_max                     1     14.64 22457 -8257.9
## - num_self_hrefs                 1     15.93 22458 -8256.2
## - kw_avg_min                     1     20.25 22462 -8250.4
## - data_channel_is_bus            1     21.28 22463 -8249.1
## - data_channel_is_tech           1     22.85 22465 -8247.0
## - global_subjectivity            1     26.02 22468 -8242.8
## - LDA_00                         1     28.49 22471 -8239.5
## - num_hrefs                      1     29.82 22472 -8237.8
## - LDA_02                         1     33.11 22475 -8233.4
## - data_channel_is_socmed         1     34.64 22477 -8231.4
## - kw_min_avg                     1     38.27 22480 -8226.6
## - data_channel_is_entertainment  1     58.65 22501 -8199.7
## - kw_min_min                     1     79.27 22521 -8172.4
## - weekday_is_friday              1    103.36 22545 -8140.6
## - weekday_is_monday              1    115.72 22558 -8124.3
## - weekday_is_wednesday           1    197.47 22640 -8016.8
## - weekday_is_tuesday             1    197.54 22640 -8016.7
## - weekday_is_thursday            1    202.57 22645 -8010.1
## - kw_max_avg                     1    274.84 22717 -7915.4
## - kw_avg_avg                     1    561.43 23004 -7542.6
## 
## Step:  AIC=-8276.25
## shares ~ n_tokens_title + n_tokens_content + n_non_stop_unique_tokens + 
##     num_hrefs + num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     kw_min_min + kw_max_min + kw_avg_min + kw_min_max + kw_avg_max + 
##     kw_min_avg + kw_max_avg + kw_avg_avg + self_reference_min_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     LDA_00 + LDA_01 + LDA_02 + LDA_03 + global_subjectivity + 
##     global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + min_positive_polarity + avg_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## - n_non_stop_unique_tokens       1      1.34 22444 -8276.5
## <none>                                       22443 -8276.3
## - global_rate_positive_words     1      2.22 22445 -8275.3
## - num_videos                     1      2.38 22445 -8275.1
## - global_sentiment_polarity      1      2.92 22446 -8274.4
## - n_tokens_title                 1      3.88 22447 -8273.1
## - n_tokens_content               1      4.08 22447 -8272.8
## - avg_negative_polarity          1      4.36 22447 -8272.5
## - self_reference_min_shares      1      4.61 22447 -8272.1
## - LDA_03                         1      4.96 22448 -8271.7
## - rate_positive_words            1      5.31 22448 -8271.2
## - data_channel_is_lifestyle      1      5.77 22449 -8270.6
## - min_positive_polarity          1      6.14 22449 -8270.1
## - LDA_01                         1      6.51 22449 -8269.6
## - title_subjectivity             1      6.90 22450 -8269.1
## - title_sentiment_polarity       1      8.51 22451 -8267.0
## - self_reference_avg_sharess     1      8.71 22452 -8266.7
## - num_imgs                       1      9.94 22453 -8265.1
## - num_keywords                   1     10.15 22453 -8264.8
## - kw_avg_max                     1     11.15 22454 -8263.5
## - kw_max_min                     1     12.81 22456 -8261.3
## - abs_title_subjectivity         1     14.06 22457 -8259.6
## - kw_min_max                     1     14.65 22457 -8258.9
## - num_self_hrefs                 1     15.61 22458 -8257.6
## - average_token_length           1     18.68 22462 -8253.5
## - kw_avg_min                     1     20.16 22463 -8251.6
## - data_channel_is_bus            1     21.07 22464 -8250.4
## - data_channel_is_tech           1     23.40 22466 -8247.3
## - LDA_00                         1     28.12 22471 -8241.0
## - num_hrefs                      1     29.11 22472 -8239.7
## - global_subjectivity            1     30.48 22473 -8237.9
## - LDA_02                         1     33.89 22477 -8233.4
## - data_channel_is_socmed         1     35.27 22478 -8231.6
## - kw_min_avg                     1     38.07 22481 -8227.9
## - data_channel_is_entertainment  1     57.96 22501 -8201.6
## - kw_min_min                     1     79.00 22522 -8173.8
## - weekday_is_friday              1    103.13 22546 -8141.9
## - weekday_is_monday              1    115.43 22558 -8125.7
## - weekday_is_tuesday             1    197.20 22640 -8018.1
## - weekday_is_wednesday           1    197.21 22640 -8018.1
## - weekday_is_thursday            1    202.28 22645 -8011.5
## - kw_max_avg                     1    274.23 22717 -7917.2
## - kw_avg_avg                     1    560.72 23004 -7544.6
## 
## Step:  AIC=-8276.48
## shares ~ n_tokens_title + n_tokens_content + num_hrefs + num_self_hrefs + 
##     num_imgs + num_videos + average_token_length + num_keywords + 
##     data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     kw_min_min + kw_max_min + kw_avg_min + kw_min_max + kw_avg_max + 
##     kw_min_avg + kw_max_avg + kw_avg_avg + self_reference_min_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     LDA_00 + LDA_01 + LDA_02 + LDA_03 + global_subjectivity + 
##     global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + min_positive_polarity + avg_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity
## 
##                                 Df Sum of Sq   RSS     AIC
## <none>                                       22444 -8276.5
## - num_videos                     1      2.31 22446 -8275.4
## - global_sentiment_polarity      1      2.49 22447 -8275.2
## - global_rate_positive_words     1      2.88 22447 -8274.7
## - n_tokens_title                 1      3.79 22448 -8273.5
## - avg_negative_polarity          1      4.32 22449 -8272.8
## - self_reference_min_shares      1      4.61 22449 -8272.4
## - rate_positive_words            1      4.80 22449 -8272.1
## - LDA_03                         1      5.01 22449 -8271.8
## - data_channel_is_lifestyle      1      6.04 22450 -8270.5
## - n_tokens_content               1      6.48 22451 -8269.9
## - LDA_01                         1      6.77 22451 -8269.5
## - title_subjectivity             1      7.01 22451 -8269.2
## - min_positive_polarity          1      7.34 22452 -8268.7
## - self_reference_avg_sharess     1      8.65 22453 -8267.0
## - title_sentiment_polarity       1      8.65 22453 -8267.0
## - num_keywords                   1     10.49 22455 -8264.6
## - kw_avg_max                     1     11.28 22455 -8263.5
## - kw_max_min                     1     12.89 22457 -8261.4
## - num_imgs                       1     13.47 22458 -8260.6
## - abs_title_subjectivity         1     13.97 22458 -8260.0
## - kw_min_max                     1     14.60 22459 -8259.1
## - num_self_hrefs                 1     16.20 22460 -8257.0
## - kw_avg_min                     1     20.25 22464 -8251.7
## - data_channel_is_bus            1     21.19 22465 -8250.4
## - data_channel_is_tech           1     23.29 22467 -8247.6
## - LDA_00                         1     28.21 22472 -8241.1
## - global_subjectivity            1     29.24 22473 -8239.8
## - num_hrefs                      1     31.66 22476 -8236.6
## - LDA_02                         1     33.75 22478 -8233.8
## - average_token_length           1     34.09 22478 -8233.3
## - data_channel_is_socmed         1     35.11 22479 -8232.0
## - kw_min_avg                     1     38.06 22482 -8228.1
## - data_channel_is_entertainment  1     58.45 22503 -8201.1
## - kw_min_min                     1     78.48 22523 -8174.7
## - weekday_is_friday              1    103.71 22548 -8141.4
## - weekday_is_monday              1    115.78 22560 -8125.5
## - weekday_is_wednesday           1    197.50 22642 -8018.0
## - weekday_is_tuesday             1    197.57 22642 -8017.9
## - weekday_is_thursday            1    202.53 22647 -8011.4
## - kw_max_avg                     1    275.11 22719 -7916.3
## - kw_avg_avg                     1    562.10 23006 -7543.0
summary(fit_mlm2)
## 
## Call:
## lm(formula = shares ~ n_tokens_title + n_tokens_content + num_hrefs + 
##     num_self_hrefs + num_imgs + num_videos + average_token_length + 
##     num_keywords + data_channel_is_lifestyle + data_channel_is_entertainment + 
##     data_channel_is_bus + data_channel_is_socmed + data_channel_is_tech + 
##     kw_min_min + kw_max_min + kw_avg_min + kw_min_max + kw_avg_max + 
##     kw_min_avg + kw_max_avg + kw_avg_avg + self_reference_min_shares + 
##     self_reference_avg_sharess + weekday_is_monday + weekday_is_tuesday + 
##     weekday_is_wednesday + weekday_is_thursday + weekday_is_friday + 
##     LDA_00 + LDA_01 + LDA_02 + LDA_03 + global_subjectivity + 
##     global_sentiment_polarity + global_rate_positive_words + 
##     rate_positive_words + min_positive_polarity + avg_negative_polarity + 
##     title_subjectivity + title_sentiment_polarity + abs_title_subjectivity, 
##     data = train.news)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1150 -0.5461 -0.1649  0.3857  5.5981 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     6.886e+00  6.650e-02 103.553  < 2e-16 ***
## n_tokens_title                  5.593e-03  2.497e-03   2.240 0.025119 *  
## n_tokens_content                3.988e-05  1.362e-05   2.929 0.003408 ** 
## num_hrefs                       3.733e-03  5.768e-04   6.472 9.85e-11 ***
## num_self_hrefs                 -7.158e-03  1.547e-03  -4.629 3.70e-06 ***
## num_imgs                        3.061e-03  7.251e-04   4.221 2.44e-05 ***
## num_videos                      2.354e-03  1.348e-03   1.747 0.080635 .  
## average_token_length           -6.956e-02  1.036e-02  -6.716 1.91e-11 ***
## num_keywords                    1.189e-02  3.191e-03   3.726 0.000195 ***
## data_channel_is_lifestyle1     -8.216e-02  2.907e-02  -2.826 0.004713 ** 
## data_channel_is_entertainment1 -1.723e-01  1.960e-02  -8.793  < 2e-16 ***
## data_channel_is_bus1           -1.462e-01  2.762e-02  -5.294 1.20e-07 ***
## data_channel_is_socmed1         1.825e-01  2.678e-02   6.815 9.60e-12 ***
## data_channel_is_tech1           1.375e-01  2.477e-02   5.550 2.87e-08 ***
## kw_min_min                      9.509e-04  9.332e-05  10.189  < 2e-16 ***
## kw_max_min                      1.942e-05  4.703e-06   4.129 3.65e-05 ***
## kw_avg_min                     -1.415e-04  2.734e-05  -5.176 2.29e-07 ***
## kw_min_max                     -4.463e-07  1.016e-07  -4.394 1.12e-05 ***
## kw_avg_max                     -2.627e-07  6.801e-08  -3.863 0.000112 ***
## kw_min_avg                     -4.651e-05  6.554e-06  -7.096 1.32e-12 ***
## kw_max_avg                     -4.114e-05  2.156e-06 -19.077  < 2e-16 ***
## kw_avg_avg                      3.351e-04  1.229e-05  27.268  < 2e-16 ***
## self_reference_min_shares       1.124e-06  4.549e-07   2.471 0.013495 *  
## self_reference_avg_sharess      1.275e-06  3.771e-07   3.382 0.000721 ***
## weekday_is_monday1             -2.310e-01  1.867e-02 -12.376  < 2e-16 ***
## weekday_is_tuesday1            -2.959e-01  1.830e-02 -16.166  < 2e-16 ***
## weekday_is_wednesday1          -2.956e-01  1.829e-02 -16.164  < 2e-16 ***
## weekday_is_thursday1           -3.001e-01  1.833e-02 -16.368  < 2e-16 ***
## weekday_is_friday1             -2.268e-01  1.936e-02 -11.713  < 2e-16 ***
## LDA_00                          2.425e-01  3.970e-02   6.109 1.01e-09 ***
## LDA_01                         -1.250e-01  4.176e-02  -2.993 0.002766 ** 
## LDA_02                         -2.517e-01  3.767e-02  -6.682 2.40e-11 ***
## LDA_03                         -9.811e-02  3.813e-02  -2.573 0.010082 *  
## global_subjectivity             4.276e-01  6.876e-02   6.219 5.06e-10 ***
## global_sentiment_polarity      -1.815e-01  1.001e-01  -1.814 0.069745 .  
## global_rate_positive_words     -8.480e-01  4.343e-01  -1.953 0.050885 .  
## rate_positive_words             1.431e-01  5.678e-02   2.520 0.011742 *  
## min_positive_polarity          -2.575e-01  8.261e-02  -3.117 0.001829 ** 
## avg_negative_polarity          -1.226e-01  5.129e-02  -2.390 0.016859 *  
## title_subjectivity              5.628e-02  1.848e-02   3.046 0.002321 ** 
## title_sentiment_polarity        6.904e-02  2.041e-02   3.382 0.000720 ***
## abs_title_subjectivity          1.364e-01  3.173e-02   4.299 1.72e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8695 on 29690 degrees of freedom
## Multiple R-squared:  0.129,  Adjusted R-squared:  0.1278 
## F-statistic: 107.3 on 41 and 29690 DF,  p-value: < 2.2e-16
pred_mlm2 = predict(fit_mlm2, test.news)

sqrt(mean((test.news$shares - pred_mlm2)^2))
## [1] 0.8725425
# We thus get an optimized model with R-square value of approximately 0.13 and Root mean square error of approximately 0.87. This model includes only the statiscal variables.