library(readxl)
socailmedia_2019_features <- read_excel("Downloads/socailmedia_2019_features.xlsx")
# View(socailmedia_2019_features)
dim(socailmedia_2019_features)
## [1] 49587   350
head(socailmedia_2019_features)
## # A tibble: 6 x 350
##   subreddit author date                post  automated_reada… coleman_liau_in…
##   <chr>     <chr>  <dttm>              <chr>            <dbl>            <dbl>
## 1 depressi… anona… 2019-01-01 00:00:00 "Any…           10.2              5.47 
## 2 depressi… gimli… 2019-01-01 00:00:00 "Cra…            9.13             9.90 
## 3 depressi… Wreck… 2019-01-01 00:00:00 "Cal…           -1.16             0.873
## 4 depressi… danie… 2019-01-01 00:00:00 "Onl…            0.830            3.05 
## 5 depressi… emman… 2019-01-01 00:00:00 "Any…            1.35             3.60 
## 6 depressi… Lunak… 2019-01-01 00:00:00 "How…            4.25             4.67 
## # … with 344 more variables: flesch_kincaid_grade_level <dbl>,
## #   flesch_reading_ease <dbl>, gulpease_index <dbl>, gunning_fog_index <dbl>,
## #   lix <dbl>, smog_index <dbl>, wiener_sachtextformel <dbl>, n_chars <dbl>,
## #   n_long_words <dbl>, n_monosyllable_words <dbl>, n_polysyllable_words <dbl>,
## #   n_sents <dbl>, n_syllables <dbl>, n_unique_words <dbl>, n_words <dbl>,
## #   sent_neg <dbl>, sent_neu <dbl>, sent_pos <dbl>, sent_compound <dbl>,
## #   economic_stress_total <dbl>, isolation_total <dbl>,
## #   substance_use_total <dbl>, guns_total <dbl>, domestic_stress_total <dbl>,
## #   suicidality_total <dbl>, punctuation <dbl>, liwc_1st_pers <dbl>,
## #   liwc_2nd_pers <dbl>, liwc_3rd_pers <dbl>, liwc_achievement <dbl>,
## #   liwc_adverbs <dbl>, liwc_affective_processes <dbl>, liwc_anger <dbl>,
## #   liwc_anxiety <dbl>, liwc_articles_article <dbl>, liwc_assent <dbl>,
## #   liwc_auxiliary_verbs <dbl>, liwc_biological <dbl>, liwc_body <dbl>,
## #   liwc_causation <dbl>, liwc_certainty <dbl>, liwc_cognitive <dbl>,
## #   liwc_common_verbs <dbl>, liwc_conjunctions <dbl>, liwc_death <dbl>,
## #   liwc_discrepancy <dbl>, liwc_exclusive <dbl>, liwc_family <dbl>,
## #   liwc_feel <dbl>, liwc_fillers <dbl>, liwc_friends <dbl>,
## #   liwc_future_tense <dbl>, liwc_health <dbl>, liwc_hear <dbl>,
## #   liwc_home <dbl>, liwc_humans <dbl>, liwc_impersonal_pronouns <dbl>,
## #   liwc_inclusive <dbl>, liwc_ingestion <dbl>, liwc_inhibition <dbl>,
## #   liwc_insight <dbl>, liwc_leisure <dbl>, liwc_money <dbl>,
## #   liwc_motion <dbl>, liwc_negations <dbl>, liwc_negative_emotion <dbl>,
## #   liwc_nonfluencies <dbl>, liwc_numbers <dbl>, liwc_past_tense <dbl>,
## #   liwc_perceptual_processes <dbl>, liwc_personal_pronouns <dbl>,
## #   liwc_positive_emotion <dbl>, liwc_prepositions <dbl>,
## #   liwc_present_tense <dbl>, liwc_quantifiers <dbl>, liwc_relativity <dbl>,
## #   liwc_religion <dbl>, liwc_sadness <dbl>, liwc_see <dbl>, liwc_sexual <dbl>,
## #   liwc_social_processes <dbl>, liwc_space <dbl>, liwc_swear_words <dbl>,
## #   liwc_tentative <dbl>, liwc_time <dbl>, liwc_total_functional <dbl>,
## #   liwc_total_pronouns <dbl>, liwc_work <dbl>, tfidf_abl <dbl>,
## #   tfidf_abus <dbl>, tfidf_actual <dbl>, tfidf_addict <dbl>, tfidf_adhd <dbl>,
## #   tfidf_advic <dbl>, tfidf_ago <dbl>, tfidf_alcohol <dbl>,
## #   tfidf_almost <dbl>, tfidf_alon <dbl>, tfidf_alreadi <dbl>,
## #   tfidf_also <dbl>, …
summary(socailmedia_2019_features)
##   subreddit            author               date                    
##  Length:49587       Length:49587       Min.   :2019-01-01 00:00:00  
##  Class :character   Class :character   1st Qu.:2019-01-25 00:00:00  
##  Mode  :character   Mode  :character   Median :2019-02-21 00:00:00  
##                                        Mean   :2019-02-21 22:50:39  
##                                        3rd Qu.:2019-03-22 00:00:00  
##                                        Max.   :2019-04-20 00:00:00  
##      post           automated_readability_index coleman_liau_index
##  Length:49587       Min.   : -8.940             Min.   :-19.846   
##  Class :character   1st Qu.:  1.482             1st Qu.:  3.332   
##  Mode  :character   Median :  3.045             Median :  4.605   
##                     Mean   :  3.349             Mean   :  4.747   
##                     3rd Qu.:  4.773             3rd Qu.:  5.906   
##                     Max.   :243.975             Max.   :306.833   
##  flesch_kincaid_grade_level flesch_reading_ease gulpease_index   
##  Min.   : -3.205            Min.   :-1496.72    Min.   :-428.50  
##  1st Qu.:  3.112            1st Qu.:   80.02    1st Qu.:  69.91  
##  Median :  4.269            Median :   85.95    Median :  75.20  
##  Mean   :  4.483            Mean   :   84.98    Mean   :  76.46  
##  3rd Qu.:  5.579            3rd Qu.:   91.34    3rd Qu.:  81.43  
##  Max.   :222.708            Max.   :  120.71    Max.   : 266.37  
##  gunning_fog_index      lix           smog_index     wiener_sachtextformel
##  Min.   : 0.600    Min.   :  1.50   Min.   : 3.129   Min.   :-3.8942      
##  1st Qu.: 6.006    1st Qu.: 21.79   1st Qu.: 6.938   1st Qu.: 0.5845      
##  Median : 7.287    Median : 25.64   Median : 7.909   Median : 1.4737      
##  Mean   : 7.476    Mean   : 26.08   Mean   : 7.916   Mean   : 1.6058      
##  3rd Qu.: 8.700    3rd Qu.: 29.79   3rd Qu.: 8.842   3rd Qu.: 2.4620      
##  Max.   :45.243    Max.   :121.86   Max.   :20.267   Max.   :20.7409      
##     n_chars         n_long_words    n_monosyllable_words n_polysyllable_words
##  Min.   :    8.0   Min.   :  0.00   Min.   :   1.0       Min.   :  0.00      
##  1st Qu.:  296.0   1st Qu.: 10.00   1st Qu.:  60.0       1st Qu.:  4.00      
##  Median :  543.0   Median : 18.00   Median : 110.0       Median :  8.00      
##  Mean   :  753.3   Mean   : 26.03   Mean   : 152.5       Mean   : 11.91      
##  3rd Qu.:  954.0   3rd Qu.: 33.00   3rd Qu.: 194.0       3rd Qu.: 15.00      
##  Max.   :20679.0   Max.   :791.00   Max.   :3745.0       Max.   :343.00      
##     n_sents        n_syllables     n_unique_words      n_words      
##  Min.   :  1.00   Min.   :   2.0   Min.   :   1.0   Min.   :   2.0  
##  1st Qu.:  7.00   1st Qu.:  97.0   1st Qu.:  54.0   1st Qu.:  76.0  
##  Median : 12.00   Median : 179.0   Median :  87.0   Median : 139.0  
##  Mean   : 15.75   Mean   : 247.2   Mean   : 103.5   Mean   : 192.2  
##  3rd Qu.: 20.00   3rd Qu.: 314.0   3rd Qu.: 134.0   3rd Qu.: 244.0  
##  Max.   :614.00   Max.   :6501.0   Max.   :1394.0   Max.   :4900.0  
##     sent_neg         sent_neu         sent_pos     sent_compound    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :-1.0000  
##  1st Qu.:0.1070   1st Qu.:0.6650   1st Qu.:0.076   1st Qu.:-0.9403  
##  Median :0.1520   Median :0.7220   Median :0.115   Median :-0.6440  
##  Mean   :0.1596   Mean   :0.7184   Mean   :0.122   Mean   :-0.2503  
##  3rd Qu.:0.2030   3rd Qu.:0.7760   3rd Qu.:0.160   3rd Qu.: 0.6071  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   : 0.9998  
##  economic_stress_total isolation_total  substance_use_total   guns_total      
##  Min.   : 0.0000       Min.   : 0.000   Min.   : 0.0000     Min.   : 0.00000  
##  1st Qu.: 0.0000       1st Qu.: 0.000   1st Qu.: 0.0000     1st Qu.: 0.00000  
##  Median : 0.0000       Median : 0.000   Median : 0.0000     Median : 0.00000  
##  Mean   : 0.8374       Mean   : 0.428   Mean   : 0.4756     Mean   : 0.01978  
##  3rd Qu.: 1.0000       3rd Qu.: 0.000   3rd Qu.: 1.0000     3rd Qu.: 0.00000  
##  Max.   :30.0000       Max.   :37.000   Max.   :45.0000     Max.   :10.00000  
##  domestic_stress_total suicidality_total  punctuation     liwc_1st_pers    
##  Min.   : 0.0000       Min.   : 0.0000   Min.   :  0.00   Min.   : 0.0000  
##  1st Qu.: 0.0000       1st Qu.: 0.0000   1st Qu.:  8.00   1st Qu.: 0.0000  
##  Median : 0.0000       Median : 0.0000   Median : 15.00   Median : 0.0000  
##  Mean   : 0.1229       Mean   : 0.2914   Mean   : 22.24   Mean   : 0.4635  
##  3rd Qu.: 0.0000       3rd Qu.: 0.0000   3rd Qu.: 27.50   3rd Qu.: 0.0000  
##  Max.   :16.0000       Max.   :23.0000   Max.   :776.00   Max.   :41.0000  
##  liwc_2nd_pers     liwc_3rd_pers    liwc_achievement  liwc_adverbs    
##  Min.   :  0.000   Min.   : 0.000   Min.   : 0.000   Min.   :  0.000  
##  1st Qu.:  0.000   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.:  3.000  
##  Median :  0.000   Median : 0.000   Median : 1.000   Median :  7.000  
##  Mean   :  1.067   Mean   : 1.057   Mean   : 2.247   Mean   :  9.575  
##  3rd Qu.:  1.000   3rd Qu.: 1.000   3rd Qu.: 3.000   3rd Qu.: 13.000  
##  Max.   :132.000   Max.   :62.000   Max.   :61.000   Max.   :182.000  
##  liwc_affective_processes   liwc_anger       liwc_anxiety   
##  Min.   :  0.00           Min.   :  0.000   Min.   : 0.000  
##  1st Qu.:  4.00           1st Qu.:  0.000   1st Qu.: 0.000  
##  Median :  8.00           Median :  1.000   Median : 0.000  
##  Mean   : 11.52           Mean   :  1.573   Mean   : 1.012  
##  3rd Qu.: 15.00           3rd Qu.:  2.000   3rd Qu.: 1.000  
##  Max.   :614.00           Max.   :614.000   Max.   :37.000  
##  liwc_articles_article  liwc_assent      liwc_auxiliary_verbs liwc_biological 
##  Min.   :  0.000       Min.   : 0.0000   Min.   :  0.00       Min.   :  0.00  
##  1st Qu.:  2.000       1st Qu.: 0.0000   1st Qu.:  6.00       1st Qu.:  1.00  
##  Median :  5.000       Median : 0.0000   Median : 11.00       Median :  3.00  
##  Mean   :  8.079       Mean   : 0.2185   Mean   : 15.56       Mean   :  4.11  
##  3rd Qu.: 10.000       3rd Qu.: 0.0000   3rd Qu.: 20.00       3rd Qu.:  6.00  
##  Max.   :355.000       Max.   :28.0000   Max.   :338.00       Max.   :114.00  
##    liwc_body      liwc_causation   liwc_certainty   liwc_cognitive  
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   :  0.00  
##  1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 12.00  
##  Median : 1.000   Median : 1.000   Median : 1.000   Median : 22.00  
##  Mean   : 1.269   Mean   : 2.177   Mean   : 1.846   Mean   : 30.66  
##  3rd Qu.: 2.000   3rd Qu.: 3.000   3rd Qu.: 3.000   3rd Qu.: 39.00  
##  Max.   :55.000   Max.   :51.000   Max.   :53.000   Max.   :688.00  
##  liwc_common_verbs liwc_conjunctions   liwc_death      liwc_discrepancy 
##  Min.   :  0.00    Min.   :  0.00    Min.   : 0.0000   Min.   :  0.000  
##  1st Qu.: 13.00    1st Qu.:  5.00    1st Qu.: 0.0000   1st Qu.:  1.000  
##  Median : 23.00    Median : 10.00    Median : 0.0000   Median :  2.000  
##  Mean   : 31.59    Mean   : 13.57    Mean   : 0.5511   Mean   :  3.728  
##  3rd Qu.: 40.00    3rd Qu.: 18.00    3rd Qu.: 1.0000   3rd Qu.:  5.000  
##  Max.   :627.00    Max.   :301.00    Max.   :37.0000   Max.   :103.000  
##  liwc_exclusive     liwc_family        liwc_feel       liwc_fillers   
##  Min.   :  0.000   Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:  2.000   1st Qu.: 0.0000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median :  5.000   Median : 0.0000   Median : 1.000   Median : 1.000  
##  Mean   :  6.419   Mean   : 0.6097   Mean   : 2.274   Mean   : 1.349  
##  3rd Qu.:  9.000   3rd Qu.: 0.0000   3rd Qu.: 3.000   3rd Qu.: 2.000  
##  Max.   :141.000   Max.   :62.0000   Max.   :42.000   Max.   :36.000  
##   liwc_friends     liwc_future_tense  liwc_health       liwc_hear      
##  Min.   : 0.0000   Min.   : 0.000    Min.   : 0.000   Min.   : 0.0000  
##  1st Qu.: 0.0000   1st Qu.: 0.000    1st Qu.: 0.000   1st Qu.: 0.0000  
##  Median : 0.0000   Median : 1.000    Median : 1.000   Median : 0.0000  
##  Mean   : 0.7083   Mean   : 1.399    Mean   : 1.831   Mean   : 0.7658  
##  3rd Qu.: 1.0000   3rd Qu.: 2.000    3rd Qu.: 3.000   3rd Qu.: 1.0000  
##  Max.   :44.0000   Max.   :49.000    Max.   :40.000   Max.   :36.0000  
##    liwc_home        liwc_humans      liwc_impersonal_pronouns liwc_inclusive   
##  Min.   : 0.0000   Min.   : 0.0000   Min.   :  0.000          Min.   :  0.000  
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.:  3.000          1st Qu.:  3.000  
##  Median : 0.0000   Median : 0.0000   Median :  7.000          Median :  6.000  
##  Mean   : 0.4881   Mean   : 0.8326   Mean   :  9.467          Mean   :  9.136  
##  3rd Qu.: 1.0000   3rd Qu.: 1.0000   3rd Qu.: 12.000          3rd Qu.: 12.000  
##  Max.   :37.0000   Max.   :53.0000   Max.   :247.000          Max.   :252.000  
##  liwc_ingestion    liwc_inhibition    liwc_insight      liwc_leisure    
##  Min.   : 0.0000   Min.   : 0.0000   Min.   :  0.000   Min.   : 0.0000  
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.:  2.000   1st Qu.: 0.0000  
##  Median : 0.0000   Median : 0.0000   Median :  4.000   Median : 0.0000  
##  Mean   : 0.4329   Mean   : 0.8145   Mean   :  5.494   Mean   : 0.9098  
##  3rd Qu.: 0.0000   3rd Qu.: 1.0000   3rd Qu.:  7.000   3rd Qu.: 1.0000  
##  Max.   :32.0000   Max.   :24.0000   Max.   :100.000   Max.   :71.0000  
##    liwc_money       liwc_motion      liwc_negations   liwc_negative_emotion
##  Min.   : 0.0000   Min.   :  0.000   Min.   : 0.000   Min.   :  0.000      
##  1st Qu.: 0.0000   1st Qu.:  0.000   1st Qu.: 1.000   1st Qu.:  2.000      
##  Median : 0.0000   Median :  2.000   Median : 3.000   Median :  4.000      
##  Mean   : 0.5724   Mean   :  2.825   Mean   : 4.911   Mean   :  5.676      
##  3rd Qu.: 1.0000   3rd Qu.:  4.000   3rd Qu.: 7.000   3rd Qu.:  8.000      
##  Max.   :51.0000   Max.   :103.000   Max.   :81.000   Max.   :614.000      
##  liwc_nonfluencies  liwc_numbers    liwc_past_tense   liwc_perceptual_processes
##  Min.   :0.0000    Min.   : 0.000   Min.   :  0.000   Min.   : 0.000           
##  1st Qu.:0.0000    1st Qu.: 0.000   1st Qu.:  1.000   1st Qu.: 1.000           
##  Median :0.0000    Median : 0.000   Median :  3.000   Median : 3.000           
##  Mean   :0.1805    Mean   : 1.029   Mean   :  5.778   Mean   : 3.964           
##  3rd Qu.:0.0000    3rd Qu.: 1.000   3rd Qu.:  7.000   3rd Qu.: 5.000           
##  Max.   :8.0000    Max.   :35.000   Max.   :241.000   Max.   :98.000           
##  liwc_personal_pronouns liwc_positive_emotion liwc_prepositions
##  Min.   :   0.00        Min.   :  0.000       Min.   :  0.00   
##  1st Qu.:   8.00        1st Qu.:  2.000       1st Qu.:  8.00   
##  Median :  15.00        Median :  4.000       Median : 16.00   
##  Mean   :  22.04        Mean   :  5.838       Mean   : 22.36   
##  3rd Qu.:  28.00        3rd Qu.:  8.000       3rd Qu.: 29.00   
##  Max.   :1228.00        Max.   :118.000       Max.   :619.00   
##  liwc_present_tense liwc_quantifiers  liwc_relativity  liwc_religion    
##  Min.   :  0.00     Min.   :  0.000   Min.   :  0.00   Min.   : 0.0000  
##  1st Qu.:  9.00     1st Qu.:  1.000   1st Qu.:  8.00   1st Qu.: 0.0000  
##  Median : 17.00     Median :  3.000   Median : 16.00   Median : 0.0000  
##  Mean   : 22.47     Mean   :  4.125   Mean   : 22.72   Mean   : 0.1215  
##  3rd Qu.: 29.00     3rd Qu.:  6.000   3rd Qu.: 29.00   3rd Qu.: 0.0000  
##  Max.   :614.00     Max.   :122.000   Max.   :686.00   Max.   :17.0000  
##   liwc_sadness       liwc_see        liwc_sexual       liwc_social_processes
##  Min.   : 0.000   Min.   : 0.0000   Min.   :  0.0000   Min.   :  0.00       
##  1st Qu.: 0.000   1st Qu.: 0.0000   1st Qu.:  0.0000   1st Qu.:  3.00       
##  Median : 1.000   Median : 0.0000   Median :  0.0000   Median :  6.00       
##  Mean   : 1.825   Mean   : 0.7747   Mean   :  0.6665   Mean   : 11.16       
##  3rd Qu.: 3.000   3rd Qu.: 1.0000   3rd Qu.:  1.0000   3rd Qu.: 14.00       
##  Max.   :60.000   Max.   :42.0000   Max.   :110.0000   Max.   :389.00       
##    liwc_space     liwc_swear_words   liwc_tentative     liwc_time     
##  Min.   :  0.00   Min.   :  0.0000   Min.   : 0.000   Min.   :  0.00  
##  1st Qu.:  3.00   1st Qu.:  0.0000   1st Qu.: 1.000   1st Qu.:  4.00  
##  Median :  6.00   Median :  0.0000   Median : 2.000   Median :  8.00  
##  Mean   :  8.99   Mean   :  0.6212   Mean   : 3.584   Mean   : 11.48  
##  3rd Qu.: 12.00   3rd Qu.:  1.0000   3rd Qu.: 5.000   3rd Qu.: 15.00  
##  Max.   :319.00   Max.   :110.0000   Max.   :85.000   Max.   :261.00  
##  liwc_total_functional liwc_total_pronouns   liwc_work        tfidf_abl      
##  Min.   :   0.0        Min.   :   0.00     Min.   : 0.000   Min.   :0.00000  
##  1st Qu.:  39.0        1st Qu.:  11.00     1st Qu.: 0.000   1st Qu.:0.00000  
##  Median :  73.0        Median :  22.00     Median : 1.000   Median :0.00000  
##  Mean   : 102.5        Mean   :  31.51     Mean   : 2.556   Mean   :0.01367  
##  3rd Qu.: 130.0        3rd Qu.:  40.00     3rd Qu.: 3.000   3rd Qu.:0.00000  
##  Max.   :2631.0        Max.   :1228.00     Max.   :69.000   Max.   :0.78904  
##    tfidf_abus        tfidf_actual      tfidf_addict        tfidf_adhd      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.000000   Median :0.00000   Median :0.000000   Median :0.000000  
##  Mean   :0.004557   Mean   :0.01497   Mean   :0.002222   Mean   :0.001375  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :0.767328   Max.   :0.81180   Max.   :0.785988   Max.   :0.686174  
##   tfidf_advic       tfidf_ago       tfidf_alcohol      tfidf_almost    
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.0131   Mean   :0.01462   Mean   :0.00292   Mean   :0.01293  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :0.53289   Max.   :1.00000   Max.   :0.76166  
##    tfidf_alon      tfidf_alreadi        tfidf_also       tfidf_alway     
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.02368   Mean   :0.009661   Mean   :0.01756   Mean   :0.02736  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.652063   Max.   :0.50616   Max.   :0.76236  
##    tfidf_amp       tfidf_amp x200b     tfidf_ani        tfidf_anoth     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01728   Mean   :0.01437   Mean   :0.03061   Mean   :0.01124  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.04063   3rd Qu.:0.00000  
##  Max.   :0.99987   Max.   :0.59814   Max.   :1.00000   Max.   :0.79570  
##  tfidf_anxieti     tfidf_anxious      tfidf_anymor      tfidf_anyon     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.05328   Mean   :0.01684   Mean   :0.02398   Mean   :0.03405  
##  3rd Qu.:0.06473   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.03864  
##  Max.   :1.00000   Max.   :1.00000   Max.   :0.83999   Max.   :0.81033  
##  tfidf_anyon els    tfidf_anyth       tfidf_around       tfidf_ask      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01481   Mean   :0.02933   Mean   :0.01773   Mean   :0.01573  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.68512   Max.   :0.85654   Max.   :1.00000   Max.   :0.76233  
##   tfidf_attack       tfidf_away        tfidf_back        tfidf_bad      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01695   Mean   :0.01747   Mean   :0.02625   Mean   :0.02477  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.80686   Max.   :0.85578   Max.   :0.64889   Max.   :0.76346  
##   tfidf_becaus      tfidf_becom       tfidf_befor       tfidf_believ     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.000000  
##  Mean   :0.04586   Mean   :0.01058   Mean   :0.01706   Mean   :0.008021  
##  3rd Qu.:0.08256   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :0.63720   Max.   :0.80263   Max.   :0.61233   Max.   :1.000000  
##    tfidf_best       tfidf_better       tfidf_bit          tfidf_bodi     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.000000   Median :0.00000  
##  Mean   :0.01432   Mean   :0.02542   Mean   :0.009737   Mean   :0.00789  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :0.671279   Max.   :0.75450  
##    tfidf_bpd         tfidf_brain         tfidf_call        tfidf_came      
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.000000   Median :0.000000   Median :0.00000   Median :0.000000  
##  Mean   :0.000405   Mean   :0.008372   Mean   :0.01372   Mean   :0.006829  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :0.515416   Max.   :0.877988   Max.   :0.88243   Max.   :0.576998  
##    tfidf_care        tfidf_caus       tfidf_chang        tfidf_come     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02164   Mean   :0.01159   Mean   :0.01315   Mean   :0.01964  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.88316   Max.   :0.80204   Max.   :0.84197   Max.   :0.68171  
##  tfidf_complet     tfidf_constant   tfidf_control       tfidf_could     
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.000000   Median :0.00000  
##  Mean   :0.01136   Mean   :0.0141   Mean   :0.006696   Mean   :0.02276  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :0.66823   Max.   :0.6824   Max.   :0.769004   Max.   :1.00000  
##   tfidf_coupl         tfidf_cri         tfidf_day         tfidf_deal     
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.008814   Mean   :0.01907   Mean   :0.04250   Mean   :0.01376  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.06943   3rd Qu.:0.00000  
##  Max.   :0.605043   Max.   :1.00000   Max.   :0.83482   Max.   :0.84814  
##  tfidf_depress    tfidf_diagnos        tfidf_die        tfidf_differ    
##  Min.   :0.0000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.0735   Mean   :0.006242   Mean   :0.02042   Mean   :0.01011  
##  3rd Qu.:0.1204   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :0.662233   Max.   :1.00000   Max.   :0.71660  
##   tfidf_disord       tfidf_doctor       tfidf_doe        tfidf_done     
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.004843   Mean   :0.01039   Mean   :0.0187   Mean   :0.01227  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :0.845321   Max.   :0.70416   Max.   :0.7782   Max.   :0.98321  
##    tfidf_dont       tfidf_drink         tfidf_drug         tfidf_eat      
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.000000   Median :0.00000  
##  Mean   :0.01708   Mean   :0.006001   Mean   :0.005097   Mean   :0.01062  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :0.83453   Max.   :0.742348   Max.   :0.851183   Max.   :0.85491  
##    tfidf_els         tfidf_emot        tfidf_end        tfidf_enough    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02361   Mean   :0.01145   Mean   :0.02267   Mean   :0.01421  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.59447   Max.   :0.80117   Max.   :0.85770   Max.   :0.93971  
##    tfidf_etc          tfidf_even        tfidf_ever       tfidf_everi    
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.008731   Mean   :0.04364   Mean   :0.02117   Mean   :0.0228  
##  3rd Qu.:0.000000   3rd Qu.:0.07812   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :0.883921   Max.   :1.00000   Max.   :0.72983   Max.   :1.0000  
##  tfidf_everyon     tfidf_everyth      tfidf_experi     tfidf_famili    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.01987   Mean   :0.02618   Mean   :0.0116   Mean   :0.02032  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.76360   Max.   :0.9426   Max.   :0.77908  
##    tfidf_fear         tfidf_feel      tfidf_feel like     tfidf_felt     
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.07718   Median :0.00000   Median :0.00000  
##  Mean   :0.009628   Mean   :0.10353   Mean   :0.04638   Mean   :0.01851  
##  3rd Qu.:0.000000   3rd Qu.:0.16698   3rd Qu.:0.08094   3rd Qu.:0.00000  
##  Max.   :0.920862   Max.   :1.00000   Max.   :0.58754   Max.   :0.70959  
##   tfidf_final       tfidf_find       tfidf_first        tfidf_food      
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.000000  
##  Mean   :0.0117   Mean   :0.02098   Mean   :0.01916   Mean   :0.004817  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :0.75928   Max.   :0.74519   Max.   :0.913467  
##   tfidf_found        tfidf_friend       tfidf_fuck        tfidf_get      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.02889  
##  Mean   :0.008784   Mean   :0.05110   Mean   :0.02874   Mean   :0.06323  
##  3rd Qu.:0.000000   3rd Qu.:0.07479   3rd Qu.:0.00000   3rd Qu.:0.10601  
##  Max.   :0.833085   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##    tfidf_give         tfidf_go         tfidf_good        tfidf_got      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01685   Mean   :0.05570   Mean   :0.02752   Mean   :0.02326  
##  3rd Qu.:0.00000   3rd Qu.:0.09589   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.92784   Max.   :0.78979   Max.   :0.89742   Max.   :0.77857  
##   tfidf_great       tfidf_guess        tfidf_guy        tfidf_happen    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01123   Mean   :0.01056   Mean   :0.01425   Mean   :0.01925  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.76037   Max.   :0.73251   Max.   :1.00000   Max.   :0.82963  
##   tfidf_happi        tfidf_hard        tfidf_hate        tfidf_head     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02743   Mean   :0.01952   Mean   :0.02492   Mean   :0.01239  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.65414   Max.   :1.00000   Max.   :1.00000  
##   tfidf_health        tfidf_hear       tfidf_heart        tfidf_help     
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.009106   Mean   :0.00681   Mean   :0.01026   Mean   :0.04073  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.06004  
##  Max.   :0.710469   Max.   :0.86296   Max.   :0.91243   Max.   :0.93148  
##    tfidf_high        tfidf_home        tfidf_hope        tfidf_hour     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01139   Mean   :0.01619   Mean   :0.01728   Mean   :0.01444  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.78574   Max.   :0.77763   Max.   :0.86821   Max.   :0.77211  
##    tfidf_hous         tfidf_hurt        tfidf_idea          tfidf_im      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.000000   Median :0.00000  
##  Mean   :0.009715   Mean   :0.01354   Mean   :0.009232   Mean   :0.02383  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :0.753271   Max.   :0.84672   Max.   :0.879889   Max.   :0.93437  
##    tfidf_issu        tfidf_job         tfidf_keep        tfidf_kill    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.01113   Mean   :0.02705   Mean   :0.02101   Mean   :0.0167  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :0.84307   Max.   :1.00000   Max.   :1.0000  
##    tfidf_kind        tfidf_know        tfidf_last        tfidf_late     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01065   Mean   :0.05781   Mean   :0.02125   Mean   :0.01163  
##  3rd Qu.:0.00000   3rd Qu.:0.09920   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.67493   Max.   :1.00000   Max.   :0.59494   Max.   :0.75625  
##    tfidf_leav        tfidf_left        tfidf_let         tfidf_life     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01382   Mean   :0.01188   Mean   :0.01405   Mean   :0.05362  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.09165  
##  Max.   :0.77236   Max.   :0.77499   Max.   :0.79742   Max.   :1.00000  
##    tfidf_like       tfidf_littl       tfidf_live        tfidf_long     
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.05854   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.07678   Mean   :0.0133   Mean   :0.02761   Mean   :0.01858  
##  3rd Qu.:0.12708   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.63412   Max.   :0.8021   Max.   :1.00000   Max.   :0.82238  
##    tfidf_look       tfidf_lose        tfidf_lost        tfidf_lot     
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.0211   Mean   :0.00928   Mean   :0.01434   Mean   :0.0189  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :0.8337   Max.   :0.95191   Max.   :0.86246   Max.   :0.6612  
##    tfidf_love        tfidf_made        tfidf_make        tfidf_mani     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02608   Mean   :0.01585   Mean   :0.04156   Mean   :0.01353  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.07263   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.78266   Max.   :1.00000   Max.   :1.00000  
##    tfidf_mayb        tfidf_mean        tfidf_med         tfidf_medic     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.000000   Median :0.00000  
##  Mean   :0.01444   Mean   :0.01059   Mean   :0.008513   Mean   :0.01279  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :0.74442   Max.   :0.74482   Max.   :0.888200   Max.   :0.84878  
##   tfidf_mental      tfidf_might       tfidf_mind        tfidf_mom      
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.01412   Mean   :0.0101   Mean   :0.01416   Mean   :0.01142  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.78343   Max.   :0.8454   Max.   :0.75367   Max.   :0.84310  
##   tfidf_month        tfidf_move        tfidf_much        tfidf_need     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02166   Mean   :0.01377   Mean   :0.02993   Mean   :0.02956  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.02281   3rd Qu.:0.00000  
##  Max.   :0.78364   Max.   :0.74906   Max.   :0.68366   Max.   :0.75431  
##   tfidf_never        tfidf_new         tfidf_next       tfidf_night     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.03134   Mean   :0.01742   Mean   :0.00955   Mean   :0.01475  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.75473   Max.   :0.83215   Max.   :0.76809   Max.   :0.94573  
##   tfidf_normal       tfidf_noth       tfidf_notic         tfidf_old      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.000000   Median :0.00000  
##  Mean   :0.01154   Mean   :0.02396   Mean   :0.007037   Mean   :0.01166  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :0.77903   Max.   :1.00000   Max.   :0.712938   Max.   :1.00000  
##    tfidf_onc         tfidf_one         tfidf_onli        tfidf_pain     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01002   Mean   :0.03736   Mean   :0.02836   Mean   :0.01307  
##  3rd Qu.:0.00000   3rd Qu.:0.06170   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.69046   Max.   :0.83676   Max.   :0.78887   Max.   :0.93075  
##   tfidf_panic       tfidf_parent      tfidf_part        tfidf_past     
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.01449   Mean   :0.0161   Mean   :0.01048   Mean   :0.01522  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.83953   Max.   :0.8148   Max.   :1.00000   Max.   :0.76669  
##   tfidf_peopl       tfidf_person      tfidf_place       tfidf_pleas     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.04535   Mean   :0.02222   Mean   :0.01267   Mean   :0.01165  
##  3rd Qu.:0.07178   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.79645   Max.   :0.71672   Max.   :1.00000  
##   tfidf_point      tfidf_possibl        tfidf_post       tfidf_pretti    
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.01856   Mean   :0.006681   Mean   :0.01603   Mean   :0.01306  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.74205   Max.   :0.816572   Max.   :0.81536   Max.   :0.69090  
##  tfidf_probabl     tfidf_problem       tfidf_ptsd         tfidf_put      
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.000000   Median :0.00000  
##  Mean   :0.01005   Mean   :0.01485   Mean   :0.001472   Mean   :0.01289  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :0.71161   Max.   :0.77379   Max.   :0.841565   Max.   :0.80629  
##  tfidf_question       tfidf_quit        tfidf_read        tfidf_real     
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.007794   Mean   :0.00875   Mean   :0.01161   Mean   :0.01004  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.792766   Max.   :0.71659   Max.   :1.00000   Max.   :0.85961  
##   tfidf_realli      tfidf_reason      tfidf_recent     tfidf_relationship
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000   
##  Mean   :0.04526   Mean   :0.01516   Mean   :0.01234   Mean   :0.01434   
##  3rd Qu.:0.07742   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   
##  Max.   :0.76107   Max.   :0.78364   Max.   :0.63988   Max.   :0.89918   
##   tfidf_rememb       tfidf_right        tfidf_said        tfidf_say      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.008799   Mean   :0.02105   Mean   :0.01227   Mean   :0.02492  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.965981   Max.   :0.82153   Max.   :0.69062   Max.   :1.00000  
##   tfidf_scare       tfidf_school       tfidf_see         tfidf_seem     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01361   Mean   :0.02881   Mean   :0.02481   Mean   :0.01986  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.96271   Max.   :1.00000   Max.   :0.84921   Max.   :0.60346  
##    tfidf_self       tfidf_sever         tfidf_shit        tfidf_sinc     
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.01498   Mean   :0.008448   Mean   :0.01816   Mean   :0.02036  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.79557   Max.   :0.793802   Max.   :0.76601   Max.   :0.63103  
##   tfidf_situat       tfidf_sleep       tfidf_social      tfidf_someon    
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.009818   Mean   :0.02136   Mean   :0.01466   Mean   :0.02816  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.825902   Max.   :0.89062   Max.   :0.80150   Max.   :1.00000  
##   tfidf_someth     tfidf_sometim      tfidf_sorri       tfidf_start     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02796   Mean   :0.01661   Mean   :0.01085   Mean   :0.03123  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.91995   Max.   :0.94216   Max.   :0.75764  
##    tfidf_stay       tfidf_still        tfidf_stop       tfidf_stress    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01132   Mean   :0.02362   Mean   :0.01959   Mean   :0.01169  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.90071   Max.   :0.93304   Max.   :0.78812   Max.   :0.88526  
##  tfidf_struggl      tfidf_stuff        tfidf_suicid    tfidf_support     
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.000000   Median :0.0000   Median :0.000000  
##  Mean   :0.01358   Mean   :0.009114   Mean   :0.0204   Mean   :0.008628  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:0.000000  
##  Max.   :0.80640   Max.   :0.862957   Max.   :0.8863   Max.   :0.776566  
##    tfidf_sure      tfidf_symptom        tfidf_take        tfidf_talk     
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.01397   Mean   :0.006048   Mean   :0.02769   Mean   :0.03563  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :0.822635   Max.   :1.00000   Max.   :1.00000  
##    tfidf_tell       tfidf_thank      tfidf_therapi    tfidf_therapist  
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.02116   Mean   :0.01327   Mean   :0.0106   Mean   :0.01039  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :0.83719   Max.   :0.87651   Max.   :0.8441   Max.   :0.84280  
##   tfidf_thing       tfidf_think       tfidf_though     tfidf_thought    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.03999   Mean   :0.04145   Mean   :0.01334   Mean   :0.02906  
##  3rd Qu.:0.07027   3rd Qu.:0.07154   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.63738   Max.   :0.84618   Max.   :0.65806   Max.   :0.78063  
##    tfidf_time        tfidf_tire       tfidf_today        tfidf_told     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.04792   Mean   :0.01635   Mean   :0.01923   Mean   :0.01422  
##  3rd Qu.:0.08537   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.68757   Max.   :0.97665   Max.   :0.93192   Max.   :0.87700  
##    tfidf_took         tfidf_tri         tfidf_turn        tfidf_two      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.007907   Mean   :0.03885   Mean   :0.01053   Mean   :0.01224  
##  3rd Qu.:0.000000   3rd Qu.:0.06555   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.786568   Max.   :1.00000   Max.   :0.76578   Max.   :0.70316  
##  tfidf_understand     tfidf_us          tfidf_use        tfidf_usual      
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.000000  
##  Mean   :0.01208   Mean   :0.007248   Mean   :0.01868   Mean   :0.009395  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :0.88774   Max.   :0.831979   Max.   :0.70982   Max.   :0.908200  
##    tfidf_veri        tfidf_want        tfidf_way         tfidf_week     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.02026   Mean   :0.06381   Mean   :0.02559   Mean   :0.02064  
##  3rd Qu.:0.00000   3rd Qu.:0.10585   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.72539   Max.   :1.00000   Max.   :0.74705   Max.   :0.68281  
##   tfidf_weight        tfidf_well        tfidf_went        tfidf_whi      
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.000000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.004925   Mean   :0.01665   Mean   :0.01334   Mean   :0.02553  
##  3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :0.912800   Max.   :0.63109   Max.   :0.67659   Max.   :0.99580  
##   tfidf_whole        tfidf_wish      tfidf_without      tfidf_wonder     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.000000  
##  Mean   :0.00893   Mean   :0.01645   Mean   :0.01268   Mean   :0.009652  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :0.88639   Max.   :0.94930   Max.   :0.73502   Max.   :0.735612  
##    tfidf_work       tfidf_worri        tfidf_wors       tfidf_would     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.03941   Mean   :0.01233   Mean   :0.01724   Mean   :0.03249  
##  3rd Qu.:0.05001   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.03201  
##  Max.   :0.80059   Max.   :0.88022   Max.   :0.86105   Max.   :1.00000  
##   tfidf_wrong       tfidf_x200b        tfidf_year     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :0.01235   Mean   :0.01437   Mean   :0.04120  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.07234  
##  Max.   :0.82475   Max.   :0.59814   Max.   :1.00000
str(socailmedia_2019_features)
## tibble [49,587 × 350] (S3: tbl_df/tbl/data.frame)
##  $ subreddit                  : chr [1:49587] "depression" "depression" "depression" "depression" ...
##  $ author                     : chr [1:49587] "anonaccount131" "gimlis_beard" "WreckDotNet" "danieltargaryean" ...
##  $ date                       : POSIXct[1:49587], format: "2019-01-01" "2019-01-01" ...
##  $ post                       : chr [1:49587] "Anyone else feel like they're the stupidest, most inept sack of shit in the world I used to actually be smart a"| __truncated__ "Craving validation from others while immediately rejecting anything positive that other people say about me is "| __truncated__ "Calling the distress line while living at home? How? How can I?  \r\nI can't really afford therapy or anything "| __truncated__ "Only been here less than a month but I'm ready to unsubscribe Starting this year, I want time to focus on impro"| __truncated__ ...
##  $ automated_readability_index: num [1:49587] 10.22 9.13 -1.16 0.83 1.35 ...
##  $ coleman_liau_index         : num [1:49587] 5.467 9.896 0.873 3.047 3.598 ...
##  $ flesch_kincaid_grade_level : num [1:49587] 9.94 9.78 1.88 2.36 2.86 ...
##  $ flesch_reading_ease        : num [1:49587] 72.8 55.6 92.8 94.4 91.4 ...
##  $ gulpease_index             : num [1:49587] 61.9 59.8 100.4 84.3 82.3 ...
##  $ gunning_fog_index          : num [1:49587] 13.91 13.39 4.33 5.45 5.38 ...
##  $ lix                        : num [1:49587] 38.4 41.6 17.6 20.6 21.3 ...
##  $ smog_index                 : num [1:49587] 11.21 12.69 6.18 6.78 6.63 ...
##  $ wiener_sachtextformel      : num [1:49587] 3.81027 6.18863 0.00256 0.30255 0.34551 ...
##  $ n_chars                    : num [1:49587] 209 401 160 755 291 354 383 420 110 221 ...
##  $ n_long_words               : num [1:49587] 6 21 5 23 9 10 8 15 3 5 ...
##  $ n_monosyllable_words       : num [1:49587] 46 56 34 166 62 75 96 82 24 57 ...
##  $ n_polysyllable_words       : num [1:49587] 4 14 2 9 3 6 3 6 1 1 ...
##  $ n_sents                    : num [1:49587] 2 5 7 22 8 6 13 9 3 7 ...
##  $ n_syllables                : num [1:49587] 69 136 56 245 95 119 131 133 37 78 ...
##  $ n_unique_words             : num [1:49587] 45 67 35 117 55 65 66 63 27 42 ...
##  $ n_words                    : num [1:49587] 55 86 44 201 76 93 111 104 30 67 ...
##  $ sent_neg                   : num [1:49587] 0.129 0.121 0.09 0.157 0.066 0.132 0.133 0.102 0.104 0.034 ...
##  $ sent_neu                   : num [1:49587] 0.775 0.663 0.847 0.559 0.817 ...
##  $ sent_pos                   : num [1:49587] 0.096 0.217 0.063 0.284 0.117 ...
##  $ sent_compound              : num [1:49587] -0.421 0.866 -0.291 0.985 0.44 ...
##  $ economic_stress_total      : num [1:49587] 0 2 1 2 0 0 0 0 0 0 ...
##  $ isolation_total            : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##  $ substance_use_total        : num [1:49587] 0 0 0 0 0 0 0 1 0 0 ...
##  $ guns_total                 : num [1:49587] 0 1 0 0 0 0 0 0 0 0 ...
##  $ domestic_stress_total      : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##  $ suicidality_total          : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##  $ punctuation                : num [1:49587] 5 13 10 22 6 10 16 15 1 9 ...
##  $ liwc_1st_pers              : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
##  $ liwc_2nd_pers              : num [1:49587] 0 0 0 2 0 2 0 0 0 0 ...
##  $ liwc_3rd_pers              : num [1:49587] 0 1 0 0 0 0 0 3 0 0 ...
##  $ liwc_achievement           : num [1:49587] 0 0 0 3 0 2 4 1 1 0 ...
##  $ liwc_adverbs               : num [1:49587] 4 7 4 7 7 6 8 6 2 5 ...
##  $ liwc_affective_processes   : num [1:49587] 4 8 2 19 2 7 10 6 3 5 ...
##  $ liwc_anger                 : num [1:49587] 2 1 0 2 0 1 1 0 1 1 ...
##  $ liwc_anxiety               : num [1:49587] 0 0 1 0 0 0 0 0 0 0 ...
##  $ liwc_articles_article      : num [1:49587] 3 1 1 4 3 2 4 3 2 1 ...
##  $ liwc_assent                : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##  $ liwc_auxiliary_verbs       : num [1:49587] 5 7 5 20 8 6 8 7 4 6 ...
##  $ liwc_biological            : num [1:49587] 2 0 0 3 1 6 3 1 1 4 ...
##  $ liwc_body                  : num [1:49587] 1 0 0 1 1 0 0 0 0 0 ...
##  $ liwc_causation             : num [1:49587] 1 1 2 0 0 3 1 1 0 1 ...
##  $ liwc_certainty             : num [1:49587] 0 1 0 2 0 1 1 0 0 0 ...
##  $ liwc_cognitive             : num [1:49587] 7 11 6 44 13 17 24 22 5 10 ...
##  $ liwc_common_verbs          : num [1:49587] 12 12 6 45 14 16 27 25 7 16 ...
##  $ liwc_conjunctions          : num [1:49587] 2 3 8 15 9 9 6 8 2 1 ...
##  $ liwc_death                 : num [1:49587] 0 0 0 2 0 0 0 0 0 0 ...
##  $ liwc_discrepancy           : num [1:49587] 0 0 0 11 3 1 3 3 0 5 ...
##  $ liwc_exclusive             : num [1:49587] 0 3 2 10 4 3 7 4 1 3 ...
##  $ liwc_family                : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
##  $ liwc_feel                  : num [1:49587] 1 2 0 1 0 0 5 3 0 0 ...
##  $ liwc_fillers               : num [1:49587] 1 2 1 1 0 0 3 4 0 1 ...
##  $ liwc_friends               : num [1:49587] 0 0 0 1 0 0 1 0 1 0 ...
##  $ liwc_future_tense          : num [1:49587] 0 0 0 4 1 1 2 3 0 1 ...
##  $ liwc_health                : num [1:49587] 0 0 0 1 0 6 2 1 0 1 ...
##  $ liwc_hear                  : num [1:49587] 0 2 0 0 2 2 0 0 0 0 ...
##  $ liwc_home                  : num [1:49587] 0 0 2 1 0 0 0 0 0 0 ...
##  $ liwc_humans                : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
##  $ liwc_impersonal_pronouns   : num [1:49587] 1 9 1 9 1 5 3 4 1 1 ...
##  $ liwc_inclusive             : num [1:49587] 2 1 2 12 5 5 3 7 3 1 ...
##  $ liwc_ingestion             : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##  $ liwc_inhibition            : num [1:49587] 0 0 0 0 0 1 2 3 0 0 ...
##  $ liwc_insight               : num [1:49587] 3 3 0 7 1 3 8 4 1 0 ...
##  $ liwc_leisure               : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
##  $ liwc_money                 : num [1:49587] 0 0 0 0 0 0 1 0 0 0 ...
##  $ liwc_motion                : num [1:49587] 0 1 1 2 0 0 1 2 0 0 ...
##  $ liwc_negations             : num [1:49587] 2 3 2 7 0 3 4 3 2 5 ...
##  $ liwc_negative_emotion      : num [1:49587] 2 2 1 5 0 3 3 2 1 1 ...
##  $ liwc_nonfluencies          : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
##  $ liwc_numbers               : num [1:49587] 0 0 0 0 0 0 0 1 0 0 ...
##  $ liwc_past_tense            : num [1:49587] 0 0 0 7 3 2 1 4 5 0 ...
##  $ liwc_perceptual_processes  : num [1:49587] 2 5 0 3 3 2 5 3 0 2 ...
##  $ liwc_personal_pronouns     : num [1:49587] 4 10 5 26 5 12 13 16 4 13 ...
##  $ liwc_positive_emotion      : num [1:49587] 2 6 1 14 2 3 7 4 2 4 ...
##  $ liwc_prepositions          : num [1:49587] 6 13 5 19 9 11 8 8 3 7 ...
##  $ liwc_present_tense         : num [1:49587] 11 11 6 30 9 13 22 18 2 14 ...
##  $ liwc_quantifiers           : num [1:49587] 2 1 0 7 0 2 3 3 0 0 ...
##  $ liwc_relativity            : num [1:49587] 9 4 7 11 5 11 11 18 5 3 ...
##  $ liwc_religion              : num [1:49587] 0 1 0 0 0 0 0 0 0 0 ...
##  $ liwc_sadness               : num [1:49587] 0 1 0 2 0 1 1 1 0 0 ...
##  $ liwc_see                   : num [1:49587] 1 1 0 2 0 0 0 0 0 2 ...
##  $ liwc_sexual                : num [1:49587] 1 0 0 1 0 0 1 0 1 3 ...
##  $ liwc_social_processes      : num [1:49587] 0 3 2 9 8 5 2 3 1 3 ...
##  $ liwc_space                 : num [1:49587] 6 2 4 3 3 5 3 4 3 1 ...
##  $ liwc_swear_words           : num [1:49587] 2 1 0 2 0 0 1 0 1 0 ...
##  $ liwc_tentative             : num [1:49587] 1 3 1 9 1 1 0 1 0 0 ...
##  $ liwc_time                  : num [1:49587] 4 3 3 7 2 6 9 16 2 3 ...
##  $ liwc_total_functional      : num [1:49587] 27 48 25 109 40 50 52 53 17 34 ...
##  $ liwc_total_pronouns        : num [1:49587] 5 19 6 35 6 17 16 20 5 14 ...
##  $ liwc_work                  : num [1:49587] 1 0 0 1 1 2 2 1 0 0 ...
##  $ tfidf_abl                  : num [1:49587] 0.259 0 0 0 0 ...
##  $ tfidf_abus                 : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##  $ tfidf_actual               : num [1:49587] 0.253 0.265 0 0 0 ...
##  $ tfidf_addict               : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##  $ tfidf_adhd                 : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

Missing Data

  1. Describe missing data, provide summary of missing data, similar to the analysis in the Chapter 2 (table 3): Count of missing data/percent per variable, type of missing data (NA, null), total percent of missingness per dataset [ - 20pts] If you find that there is no missing data, you still have to report your findings. Your reader does not know your data and you have to show how reliable your data is
#sum(is.na(socailmedia_2019_features))
socailmedia_2019_features_subset = socailmedia_2019_features[,1:11,22:25]
sum(is.na(socailmedia_2019_features_subset))
## [1] 39
# install.packages("naniar")
library(naniar)
vis_miss(socailmedia_2019_features_subset)

n_var_miss(socailmedia_2019_features_subset)
## [1] 1

The NAs are found under the author column. There is only 0.1% NAs in the dataset which is very minimal.

  1. Plot visualization of missing data pattern [ - 20pts] If you do not have missing data, you still need to plot it to show it to your reader
library(visdat)
vis_dat(socailmedia_2019_features_subset)

This visual shows the data patterns in the dataset. Here you can notice that more than half of the dataset consists of numeric variables, followed by characters and a date variable. There is no prominent NA seen here since we have a very small amout of NAs only found under the author column.

  1. Describe what type of missing data you have observed: MCAR, MAR, MNAR or no missing data [- 10pts] If you do not have missing data, simply state that you did not observe any missing values
# install.packages("mice")
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
md.pattern(socailmedia_2019_features_subset)

##       subreddit date post automated_readability_index coleman_liau_index
## 49548         1    1    1                           1                  1
## 39            1    1    1                           1                  1
##               0    0    0                           0                  0
##       flesch_kincaid_grade_level flesch_reading_ease gulpease_index
## 49548                          1                   1              1
## 39                             1                   1              1
##                                0                   0              0
##       gunning_fog_index lix author   
## 49548                 1   1      1  0
## 39                    1   1      0  1
##                       0   0     39 39
#install.packages("VIM")
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use. 
##  Since version 4.0.0 the GUI is in its own package VIMGUI.
## 
##           Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
aggr(socailmedia_2019_features_subset,prop=FALSE,numbers=TRUE)  

The missing data was only found under the “author” variable in the dataset. It has 39 missing values as shown in the visual above.

  1. Select Imputation method that will be performing and explain why [20pts]: list-wise/pair-wise deletions, mean imputation, regression imputation etc [ - 10pts] perform imputation and provide data statistics. For example, if you perform list-wise deletion, how many observations will you use for consequent analysis. If you perform regression imputation, provide statistic summary [-10pts]
socailmedia_2019_features_subset_clean<- na.omit(socailmedia_2019_features_subset)
sum(is.na(socailmedia_2019_features_subset_clean))
## [1] 0

Here, I’ve omitted the NAs found previously from the subset of my data set. As you can see, there are 0 NAs now.

  1. Outlier analysis
set.seed(482)
boxplot(socailmedia_2019_features_subset_clean$flesch_kincaid_grade_level)

The data has a few outliers as seen in the image above.