library(readxl)
socailmedia_2019_features <- read_excel("Downloads/socailmedia_2019_features.xlsx")
# View(socailmedia_2019_features)
dim(socailmedia_2019_features)
## [1] 49587 350
head(socailmedia_2019_features)
## # A tibble: 6 x 350
## subreddit author date post automated_reada… coleman_liau_in…
## <chr> <chr> <dttm> <chr> <dbl> <dbl>
## 1 depressi… anona… 2019-01-01 00:00:00 "Any… 10.2 5.47
## 2 depressi… gimli… 2019-01-01 00:00:00 "Cra… 9.13 9.90
## 3 depressi… Wreck… 2019-01-01 00:00:00 "Cal… -1.16 0.873
## 4 depressi… danie… 2019-01-01 00:00:00 "Onl… 0.830 3.05
## 5 depressi… emman… 2019-01-01 00:00:00 "Any… 1.35 3.60
## 6 depressi… Lunak… 2019-01-01 00:00:00 "How… 4.25 4.67
## # … with 344 more variables: flesch_kincaid_grade_level <dbl>,
## # flesch_reading_ease <dbl>, gulpease_index <dbl>, gunning_fog_index <dbl>,
## # lix <dbl>, smog_index <dbl>, wiener_sachtextformel <dbl>, n_chars <dbl>,
## # n_long_words <dbl>, n_monosyllable_words <dbl>, n_polysyllable_words <dbl>,
## # n_sents <dbl>, n_syllables <dbl>, n_unique_words <dbl>, n_words <dbl>,
## # sent_neg <dbl>, sent_neu <dbl>, sent_pos <dbl>, sent_compound <dbl>,
## # economic_stress_total <dbl>, isolation_total <dbl>,
## # substance_use_total <dbl>, guns_total <dbl>, domestic_stress_total <dbl>,
## # suicidality_total <dbl>, punctuation <dbl>, liwc_1st_pers <dbl>,
## # liwc_2nd_pers <dbl>, liwc_3rd_pers <dbl>, liwc_achievement <dbl>,
## # liwc_adverbs <dbl>, liwc_affective_processes <dbl>, liwc_anger <dbl>,
## # liwc_anxiety <dbl>, liwc_articles_article <dbl>, liwc_assent <dbl>,
## # liwc_auxiliary_verbs <dbl>, liwc_biological <dbl>, liwc_body <dbl>,
## # liwc_causation <dbl>, liwc_certainty <dbl>, liwc_cognitive <dbl>,
## # liwc_common_verbs <dbl>, liwc_conjunctions <dbl>, liwc_death <dbl>,
## # liwc_discrepancy <dbl>, liwc_exclusive <dbl>, liwc_family <dbl>,
## # liwc_feel <dbl>, liwc_fillers <dbl>, liwc_friends <dbl>,
## # liwc_future_tense <dbl>, liwc_health <dbl>, liwc_hear <dbl>,
## # liwc_home <dbl>, liwc_humans <dbl>, liwc_impersonal_pronouns <dbl>,
## # liwc_inclusive <dbl>, liwc_ingestion <dbl>, liwc_inhibition <dbl>,
## # liwc_insight <dbl>, liwc_leisure <dbl>, liwc_money <dbl>,
## # liwc_motion <dbl>, liwc_negations <dbl>, liwc_negative_emotion <dbl>,
## # liwc_nonfluencies <dbl>, liwc_numbers <dbl>, liwc_past_tense <dbl>,
## # liwc_perceptual_processes <dbl>, liwc_personal_pronouns <dbl>,
## # liwc_positive_emotion <dbl>, liwc_prepositions <dbl>,
## # liwc_present_tense <dbl>, liwc_quantifiers <dbl>, liwc_relativity <dbl>,
## # liwc_religion <dbl>, liwc_sadness <dbl>, liwc_see <dbl>, liwc_sexual <dbl>,
## # liwc_social_processes <dbl>, liwc_space <dbl>, liwc_swear_words <dbl>,
## # liwc_tentative <dbl>, liwc_time <dbl>, liwc_total_functional <dbl>,
## # liwc_total_pronouns <dbl>, liwc_work <dbl>, tfidf_abl <dbl>,
## # tfidf_abus <dbl>, tfidf_actual <dbl>, tfidf_addict <dbl>, tfidf_adhd <dbl>,
## # tfidf_advic <dbl>, tfidf_ago <dbl>, tfidf_alcohol <dbl>,
## # tfidf_almost <dbl>, tfidf_alon <dbl>, tfidf_alreadi <dbl>,
## # tfidf_also <dbl>, …
summary(socailmedia_2019_features)
## subreddit author date
## Length:49587 Length:49587 Min. :2019-01-01 00:00:00
## Class :character Class :character 1st Qu.:2019-01-25 00:00:00
## Mode :character Mode :character Median :2019-02-21 00:00:00
## Mean :2019-02-21 22:50:39
## 3rd Qu.:2019-03-22 00:00:00
## Max. :2019-04-20 00:00:00
## post automated_readability_index coleman_liau_index
## Length:49587 Min. : -8.940 Min. :-19.846
## Class :character 1st Qu.: 1.482 1st Qu.: 3.332
## Mode :character Median : 3.045 Median : 4.605
## Mean : 3.349 Mean : 4.747
## 3rd Qu.: 4.773 3rd Qu.: 5.906
## Max. :243.975 Max. :306.833
## flesch_kincaid_grade_level flesch_reading_ease gulpease_index
## Min. : -3.205 Min. :-1496.72 Min. :-428.50
## 1st Qu.: 3.112 1st Qu.: 80.02 1st Qu.: 69.91
## Median : 4.269 Median : 85.95 Median : 75.20
## Mean : 4.483 Mean : 84.98 Mean : 76.46
## 3rd Qu.: 5.579 3rd Qu.: 91.34 3rd Qu.: 81.43
## Max. :222.708 Max. : 120.71 Max. : 266.37
## gunning_fog_index lix smog_index wiener_sachtextformel
## Min. : 0.600 Min. : 1.50 Min. : 3.129 Min. :-3.8942
## 1st Qu.: 6.006 1st Qu.: 21.79 1st Qu.: 6.938 1st Qu.: 0.5845
## Median : 7.287 Median : 25.64 Median : 7.909 Median : 1.4737
## Mean : 7.476 Mean : 26.08 Mean : 7.916 Mean : 1.6058
## 3rd Qu.: 8.700 3rd Qu.: 29.79 3rd Qu.: 8.842 3rd Qu.: 2.4620
## Max. :45.243 Max. :121.86 Max. :20.267 Max. :20.7409
## n_chars n_long_words n_monosyllable_words n_polysyllable_words
## Min. : 8.0 Min. : 0.00 Min. : 1.0 Min. : 0.00
## 1st Qu.: 296.0 1st Qu.: 10.00 1st Qu.: 60.0 1st Qu.: 4.00
## Median : 543.0 Median : 18.00 Median : 110.0 Median : 8.00
## Mean : 753.3 Mean : 26.03 Mean : 152.5 Mean : 11.91
## 3rd Qu.: 954.0 3rd Qu.: 33.00 3rd Qu.: 194.0 3rd Qu.: 15.00
## Max. :20679.0 Max. :791.00 Max. :3745.0 Max. :343.00
## n_sents n_syllables n_unique_words n_words
## Min. : 1.00 Min. : 2.0 Min. : 1.0 Min. : 2.0
## 1st Qu.: 7.00 1st Qu.: 97.0 1st Qu.: 54.0 1st Qu.: 76.0
## Median : 12.00 Median : 179.0 Median : 87.0 Median : 139.0
## Mean : 15.75 Mean : 247.2 Mean : 103.5 Mean : 192.2
## 3rd Qu.: 20.00 3rd Qu.: 314.0 3rd Qu.: 134.0 3rd Qu.: 244.0
## Max. :614.00 Max. :6501.0 Max. :1394.0 Max. :4900.0
## sent_neg sent_neu sent_pos sent_compound
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :-1.0000
## 1st Qu.:0.1070 1st Qu.:0.6650 1st Qu.:0.076 1st Qu.:-0.9403
## Median :0.1520 Median :0.7220 Median :0.115 Median :-0.6440
## Mean :0.1596 Mean :0.7184 Mean :0.122 Mean :-0.2503
## 3rd Qu.:0.2030 3rd Qu.:0.7760 3rd Qu.:0.160 3rd Qu.: 0.6071
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. : 0.9998
## economic_stress_total isolation_total substance_use_total guns_total
## Min. : 0.0000 Min. : 0.000 Min. : 0.0000 Min. : 0.00000
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 0.00000
## Median : 0.0000 Median : 0.000 Median : 0.0000 Median : 0.00000
## Mean : 0.8374 Mean : 0.428 Mean : 0.4756 Mean : 0.01978
## 3rd Qu.: 1.0000 3rd Qu.: 0.000 3rd Qu.: 1.0000 3rd Qu.: 0.00000
## Max. :30.0000 Max. :37.000 Max. :45.0000 Max. :10.00000
## domestic_stress_total suicidality_total punctuation liwc_1st_pers
## Min. : 0.0000 Min. : 0.0000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 8.00 1st Qu.: 0.0000
## Median : 0.0000 Median : 0.0000 Median : 15.00 Median : 0.0000
## Mean : 0.1229 Mean : 0.2914 Mean : 22.24 Mean : 0.4635
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 27.50 3rd Qu.: 0.0000
## Max. :16.0000 Max. :23.0000 Max. :776.00 Max. :41.0000
## liwc_2nd_pers liwc_3rd_pers liwc_achievement liwc_adverbs
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 3.000
## Median : 0.000 Median : 0.000 Median : 1.000 Median : 7.000
## Mean : 1.067 Mean : 1.057 Mean : 2.247 Mean : 9.575
## 3rd Qu.: 1.000 3rd Qu.: 1.000 3rd Qu.: 3.000 3rd Qu.: 13.000
## Max. :132.000 Max. :62.000 Max. :61.000 Max. :182.000
## liwc_affective_processes liwc_anger liwc_anxiety
## Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.00 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 8.00 Median : 1.000 Median : 0.000
## Mean : 11.52 Mean : 1.573 Mean : 1.012
## 3rd Qu.: 15.00 3rd Qu.: 2.000 3rd Qu.: 1.000
## Max. :614.00 Max. :614.000 Max. :37.000
## liwc_articles_article liwc_assent liwc_auxiliary_verbs liwc_biological
## Min. : 0.000 Min. : 0.0000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.000 1st Qu.: 0.0000 1st Qu.: 6.00 1st Qu.: 1.00
## Median : 5.000 Median : 0.0000 Median : 11.00 Median : 3.00
## Mean : 8.079 Mean : 0.2185 Mean : 15.56 Mean : 4.11
## 3rd Qu.: 10.000 3rd Qu.: 0.0000 3rd Qu.: 20.00 3rd Qu.: 6.00
## Max. :355.000 Max. :28.0000 Max. :338.00 Max. :114.00
## liwc_body liwc_causation liwc_certainty liwc_cognitive
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 12.00
## Median : 1.000 Median : 1.000 Median : 1.000 Median : 22.00
## Mean : 1.269 Mean : 2.177 Mean : 1.846 Mean : 30.66
## 3rd Qu.: 2.000 3rd Qu.: 3.000 3rd Qu.: 3.000 3rd Qu.: 39.00
## Max. :55.000 Max. :51.000 Max. :53.000 Max. :688.00
## liwc_common_verbs liwc_conjunctions liwc_death liwc_discrepancy
## Min. : 0.00 Min. : 0.00 Min. : 0.0000 Min. : 0.000
## 1st Qu.: 13.00 1st Qu.: 5.00 1st Qu.: 0.0000 1st Qu.: 1.000
## Median : 23.00 Median : 10.00 Median : 0.0000 Median : 2.000
## Mean : 31.59 Mean : 13.57 Mean : 0.5511 Mean : 3.728
## 3rd Qu.: 40.00 3rd Qu.: 18.00 3rd Qu.: 1.0000 3rd Qu.: 5.000
## Max. :627.00 Max. :301.00 Max. :37.0000 Max. :103.000
## liwc_exclusive liwc_family liwc_feel liwc_fillers
## Min. : 0.000 Min. : 0.0000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 2.000 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 5.000 Median : 0.0000 Median : 1.000 Median : 1.000
## Mean : 6.419 Mean : 0.6097 Mean : 2.274 Mean : 1.349
## 3rd Qu.: 9.000 3rd Qu.: 0.0000 3rd Qu.: 3.000 3rd Qu.: 2.000
## Max. :141.000 Max. :62.0000 Max. :42.000 Max. :36.000
## liwc_friends liwc_future_tense liwc_health liwc_hear
## Min. : 0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.0000
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.0000
## Median : 0.0000 Median : 1.000 Median : 1.000 Median : 0.0000
## Mean : 0.7083 Mean : 1.399 Mean : 1.831 Mean : 0.7658
## 3rd Qu.: 1.0000 3rd Qu.: 2.000 3rd Qu.: 3.000 3rd Qu.: 1.0000
## Max. :44.0000 Max. :49.000 Max. :40.000 Max. :36.0000
## liwc_home liwc_humans liwc_impersonal_pronouns liwc_inclusive
## Min. : 0.0000 Min. : 0.0000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 3.000 1st Qu.: 3.000
## Median : 0.0000 Median : 0.0000 Median : 7.000 Median : 6.000
## Mean : 0.4881 Mean : 0.8326 Mean : 9.467 Mean : 9.136
## 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 12.000 3rd Qu.: 12.000
## Max. :37.0000 Max. :53.0000 Max. :247.000 Max. :252.000
## liwc_ingestion liwc_inhibition liwc_insight liwc_leisure
## Min. : 0.0000 Min. : 0.0000 Min. : 0.000 Min. : 0.0000
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 2.000 1st Qu.: 0.0000
## Median : 0.0000 Median : 0.0000 Median : 4.000 Median : 0.0000
## Mean : 0.4329 Mean : 0.8145 Mean : 5.494 Mean : 0.9098
## 3rd Qu.: 0.0000 3rd Qu.: 1.0000 3rd Qu.: 7.000 3rd Qu.: 1.0000
## Max. :32.0000 Max. :24.0000 Max. :100.000 Max. :71.0000
## liwc_money liwc_motion liwc_negations liwc_negative_emotion
## Min. : 0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 2.000
## Median : 0.0000 Median : 2.000 Median : 3.000 Median : 4.000
## Mean : 0.5724 Mean : 2.825 Mean : 4.911 Mean : 5.676
## 3rd Qu.: 1.0000 3rd Qu.: 4.000 3rd Qu.: 7.000 3rd Qu.: 8.000
## Max. :51.0000 Max. :103.000 Max. :81.000 Max. :614.000
## liwc_nonfluencies liwc_numbers liwc_past_tense liwc_perceptual_processes
## Min. :0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 1.000
## Median :0.0000 Median : 0.000 Median : 3.000 Median : 3.000
## Mean :0.1805 Mean : 1.029 Mean : 5.778 Mean : 3.964
## 3rd Qu.:0.0000 3rd Qu.: 1.000 3rd Qu.: 7.000 3rd Qu.: 5.000
## Max. :8.0000 Max. :35.000 Max. :241.000 Max. :98.000
## liwc_personal_pronouns liwc_positive_emotion liwc_prepositions
## Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 8.00 1st Qu.: 2.000 1st Qu.: 8.00
## Median : 15.00 Median : 4.000 Median : 16.00
## Mean : 22.04 Mean : 5.838 Mean : 22.36
## 3rd Qu.: 28.00 3rd Qu.: 8.000 3rd Qu.: 29.00
## Max. :1228.00 Max. :118.000 Max. :619.00
## liwc_present_tense liwc_quantifiers liwc_relativity liwc_religion
## Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 9.00 1st Qu.: 1.000 1st Qu.: 8.00 1st Qu.: 0.0000
## Median : 17.00 Median : 3.000 Median : 16.00 Median : 0.0000
## Mean : 22.47 Mean : 4.125 Mean : 22.72 Mean : 0.1215
## 3rd Qu.: 29.00 3rd Qu.: 6.000 3rd Qu.: 29.00 3rd Qu.: 0.0000
## Max. :614.00 Max. :122.000 Max. :686.00 Max. :17.0000
## liwc_sadness liwc_see liwc_sexual liwc_social_processes
## Min. : 0.000 Min. : 0.0000 Min. : 0.0000 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 3.00
## Median : 1.000 Median : 0.0000 Median : 0.0000 Median : 6.00
## Mean : 1.825 Mean : 0.7747 Mean : 0.6665 Mean : 11.16
## 3rd Qu.: 3.000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 14.00
## Max. :60.000 Max. :42.0000 Max. :110.0000 Max. :389.00
## liwc_space liwc_swear_words liwc_tentative liwc_time
## Min. : 0.00 Min. : 0.0000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 0.0000 1st Qu.: 1.000 1st Qu.: 4.00
## Median : 6.00 Median : 0.0000 Median : 2.000 Median : 8.00
## Mean : 8.99 Mean : 0.6212 Mean : 3.584 Mean : 11.48
## 3rd Qu.: 12.00 3rd Qu.: 1.0000 3rd Qu.: 5.000 3rd Qu.: 15.00
## Max. :319.00 Max. :110.0000 Max. :85.000 Max. :261.00
## liwc_total_functional liwc_total_pronouns liwc_work tfidf_abl
## Min. : 0.0 Min. : 0.00 Min. : 0.000 Min. :0.00000
## 1st Qu.: 39.0 1st Qu.: 11.00 1st Qu.: 0.000 1st Qu.:0.00000
## Median : 73.0 Median : 22.00 Median : 1.000 Median :0.00000
## Mean : 102.5 Mean : 31.51 Mean : 2.556 Mean :0.01367
## 3rd Qu.: 130.0 3rd Qu.: 40.00 3rd Qu.: 3.000 3rd Qu.:0.00000
## Max. :2631.0 Max. :1228.00 Max. :69.000 Max. :0.78904
## tfidf_abus tfidf_actual tfidf_addict tfidf_adhd
## Min. :0.000000 Min. :0.00000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.000000 Median :0.00000 Median :0.000000 Median :0.000000
## Mean :0.004557 Mean :0.01497 Mean :0.002222 Mean :0.001375
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :0.767328 Max. :0.81180 Max. :0.785988 Max. :0.686174
## tfidf_advic tfidf_ago tfidf_alcohol tfidf_almost
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.0131 Mean :0.01462 Mean :0.00292 Mean :0.01293
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :0.53289 Max. :1.00000 Max. :0.76166
## tfidf_alon tfidf_alreadi tfidf_also tfidf_alway
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.02368 Mean :0.009661 Mean :0.01756 Mean :0.02736
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.652063 Max. :0.50616 Max. :0.76236
## tfidf_amp tfidf_amp x200b tfidf_ani tfidf_anoth
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01728 Mean :0.01437 Mean :0.03061 Mean :0.01124
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.04063 3rd Qu.:0.00000
## Max. :0.99987 Max. :0.59814 Max. :1.00000 Max. :0.79570
## tfidf_anxieti tfidf_anxious tfidf_anymor tfidf_anyon
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05328 Mean :0.01684 Mean :0.02398 Mean :0.03405
## 3rd Qu.:0.06473 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.03864
## Max. :1.00000 Max. :1.00000 Max. :0.83999 Max. :0.81033
## tfidf_anyon els tfidf_anyth tfidf_around tfidf_ask
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01481 Mean :0.02933 Mean :0.01773 Mean :0.01573
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.68512 Max. :0.85654 Max. :1.00000 Max. :0.76233
## tfidf_attack tfidf_away tfidf_back tfidf_bad
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01695 Mean :0.01747 Mean :0.02625 Mean :0.02477
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.80686 Max. :0.85578 Max. :0.64889 Max. :0.76346
## tfidf_becaus tfidf_becom tfidf_befor tfidf_believ
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.04586 Mean :0.01058 Mean :0.01706 Mean :0.008021
## 3rd Qu.:0.08256 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :0.63720 Max. :0.80263 Max. :0.61233 Max. :1.000000
## tfidf_best tfidf_better tfidf_bit tfidf_bodi
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.01432 Mean :0.02542 Mean :0.009737 Mean :0.00789
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :0.671279 Max. :0.75450
## tfidf_bpd tfidf_brain tfidf_call tfidf_came
## Min. :0.000000 Min. :0.000000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.000000 Median :0.000000 Median :0.00000 Median :0.000000
## Mean :0.000405 Mean :0.008372 Mean :0.01372 Mean :0.006829
## 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :0.515416 Max. :0.877988 Max. :0.88243 Max. :0.576998
## tfidf_care tfidf_caus tfidf_chang tfidf_come
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02164 Mean :0.01159 Mean :0.01315 Mean :0.01964
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.88316 Max. :0.80204 Max. :0.84197 Max. :0.68171
## tfidf_complet tfidf_constant tfidf_control tfidf_could
## Min. :0.00000 Min. :0.0000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.000000 Median :0.00000
## Mean :0.01136 Mean :0.0141 Mean :0.006696 Mean :0.02276
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.66823 Max. :0.6824 Max. :0.769004 Max. :1.00000
## tfidf_coupl tfidf_cri tfidf_day tfidf_deal
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.008814 Mean :0.01907 Mean :0.04250 Mean :0.01376
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.06943 3rd Qu.:0.00000
## Max. :0.605043 Max. :1.00000 Max. :0.83482 Max. :0.84814
## tfidf_depress tfidf_diagnos tfidf_die tfidf_differ
## Min. :0.0000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.0735 Mean :0.006242 Mean :0.02042 Mean :0.01011
## 3rd Qu.:0.1204 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :0.662233 Max. :1.00000 Max. :0.71660
## tfidf_disord tfidf_doctor tfidf_doe tfidf_done
## Min. :0.000000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.004843 Mean :0.01039 Mean :0.0187 Mean :0.01227
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :0.845321 Max. :0.70416 Max. :0.7782 Max. :0.98321
## tfidf_dont tfidf_drink tfidf_drug tfidf_eat
## Min. :0.00000 Min. :0.000000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.000000 Median :0.00000
## Mean :0.01708 Mean :0.006001 Mean :0.005097 Mean :0.01062
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.83453 Max. :0.742348 Max. :0.851183 Max. :0.85491
## tfidf_els tfidf_emot tfidf_end tfidf_enough
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02361 Mean :0.01145 Mean :0.02267 Mean :0.01421
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.59447 Max. :0.80117 Max. :0.85770 Max. :0.93971
## tfidf_etc tfidf_even tfidf_ever tfidf_everi
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.008731 Mean :0.04364 Mean :0.02117 Mean :0.0228
## 3rd Qu.:0.000000 3rd Qu.:0.07812 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :0.883921 Max. :1.00000 Max. :0.72983 Max. :1.0000
## tfidf_everyon tfidf_everyth tfidf_experi tfidf_famili
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.01987 Mean :0.02618 Mean :0.0116 Mean :0.02032
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.76360 Max. :0.9426 Max. :0.77908
## tfidf_fear tfidf_feel tfidf_feel like tfidf_felt
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.07718 Median :0.00000 Median :0.00000
## Mean :0.009628 Mean :0.10353 Mean :0.04638 Mean :0.01851
## 3rd Qu.:0.000000 3rd Qu.:0.16698 3rd Qu.:0.08094 3rd Qu.:0.00000
## Max. :0.920862 Max. :1.00000 Max. :0.58754 Max. :0.70959
## tfidf_final tfidf_find tfidf_first tfidf_food
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.0117 Mean :0.02098 Mean :0.01916 Mean :0.004817
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.0000 Max. :0.75928 Max. :0.74519 Max. :0.913467
## tfidf_found tfidf_friend tfidf_fuck tfidf_get
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.02889
## Mean :0.008784 Mean :0.05110 Mean :0.02874 Mean :0.06323
## 3rd Qu.:0.000000 3rd Qu.:0.07479 3rd Qu.:0.00000 3rd Qu.:0.10601
## Max. :0.833085 Max. :1.00000 Max. :1.00000 Max. :1.00000
## tfidf_give tfidf_go tfidf_good tfidf_got
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01685 Mean :0.05570 Mean :0.02752 Mean :0.02326
## 3rd Qu.:0.00000 3rd Qu.:0.09589 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.92784 Max. :0.78979 Max. :0.89742 Max. :0.77857
## tfidf_great tfidf_guess tfidf_guy tfidf_happen
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01123 Mean :0.01056 Mean :0.01425 Mean :0.01925
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.76037 Max. :0.73251 Max. :1.00000 Max. :0.82963
## tfidf_happi tfidf_hard tfidf_hate tfidf_head
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02743 Mean :0.01952 Mean :0.02492 Mean :0.01239
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.65414 Max. :1.00000 Max. :1.00000
## tfidf_health tfidf_hear tfidf_heart tfidf_help
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.009106 Mean :0.00681 Mean :0.01026 Mean :0.04073
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.06004
## Max. :0.710469 Max. :0.86296 Max. :0.91243 Max. :0.93148
## tfidf_high tfidf_home tfidf_hope tfidf_hour
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01139 Mean :0.01619 Mean :0.01728 Mean :0.01444
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.78574 Max. :0.77763 Max. :0.86821 Max. :0.77211
## tfidf_hous tfidf_hurt tfidf_idea tfidf_im
## Min. :0.000000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.009715 Mean :0.01354 Mean :0.009232 Mean :0.02383
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.753271 Max. :0.84672 Max. :0.879889 Max. :0.93437
## tfidf_issu tfidf_job tfidf_keep tfidf_kill
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.01113 Mean :0.02705 Mean :0.02101 Mean :0.0167
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :0.84307 Max. :1.00000 Max. :1.0000
## tfidf_kind tfidf_know tfidf_last tfidf_late
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01065 Mean :0.05781 Mean :0.02125 Mean :0.01163
## 3rd Qu.:0.00000 3rd Qu.:0.09920 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.67493 Max. :1.00000 Max. :0.59494 Max. :0.75625
## tfidf_leav tfidf_left tfidf_let tfidf_life
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01382 Mean :0.01188 Mean :0.01405 Mean :0.05362
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.09165
## Max. :0.77236 Max. :0.77499 Max. :0.79742 Max. :1.00000
## tfidf_like tfidf_littl tfidf_live tfidf_long
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.05854 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.07678 Mean :0.0133 Mean :0.02761 Mean :0.01858
## 3rd Qu.:0.12708 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.63412 Max. :0.8021 Max. :1.00000 Max. :0.82238
## tfidf_look tfidf_lose tfidf_lost tfidf_lot
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.0211 Mean :0.00928 Mean :0.01434 Mean :0.0189
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :0.8337 Max. :0.95191 Max. :0.86246 Max. :0.6612
## tfidf_love tfidf_made tfidf_make tfidf_mani
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02608 Mean :0.01585 Mean :0.04156 Mean :0.01353
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.07263 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.78266 Max. :1.00000 Max. :1.00000
## tfidf_mayb tfidf_mean tfidf_med tfidf_medic
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.01444 Mean :0.01059 Mean :0.008513 Mean :0.01279
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.74442 Max. :0.74482 Max. :0.888200 Max. :0.84878
## tfidf_mental tfidf_might tfidf_mind tfidf_mom
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.01412 Mean :0.0101 Mean :0.01416 Mean :0.01142
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.78343 Max. :0.8454 Max. :0.75367 Max. :0.84310
## tfidf_month tfidf_move tfidf_much tfidf_need
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02166 Mean :0.01377 Mean :0.02993 Mean :0.02956
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.02281 3rd Qu.:0.00000
## Max. :0.78364 Max. :0.74906 Max. :0.68366 Max. :0.75431
## tfidf_never tfidf_new tfidf_next tfidf_night
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.03134 Mean :0.01742 Mean :0.00955 Mean :0.01475
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.75473 Max. :0.83215 Max. :0.76809 Max. :0.94573
## tfidf_normal tfidf_noth tfidf_notic tfidf_old
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.01154 Mean :0.02396 Mean :0.007037 Mean :0.01166
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.77903 Max. :1.00000 Max. :0.712938 Max. :1.00000
## tfidf_onc tfidf_one tfidf_onli tfidf_pain
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01002 Mean :0.03736 Mean :0.02836 Mean :0.01307
## 3rd Qu.:0.00000 3rd Qu.:0.06170 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.69046 Max. :0.83676 Max. :0.78887 Max. :0.93075
## tfidf_panic tfidf_parent tfidf_part tfidf_past
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.01449 Mean :0.0161 Mean :0.01048 Mean :0.01522
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.83953 Max. :0.8148 Max. :1.00000 Max. :0.76669
## tfidf_peopl tfidf_person tfidf_place tfidf_pleas
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.04535 Mean :0.02222 Mean :0.01267 Mean :0.01165
## 3rd Qu.:0.07178 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.79645 Max. :0.71672 Max. :1.00000
## tfidf_point tfidf_possibl tfidf_post tfidf_pretti
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.01856 Mean :0.006681 Mean :0.01603 Mean :0.01306
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.74205 Max. :0.816572 Max. :0.81536 Max. :0.69090
## tfidf_probabl tfidf_problem tfidf_ptsd tfidf_put
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.000000 Median :0.00000
## Mean :0.01005 Mean :0.01485 Mean :0.001472 Mean :0.01289
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :0.71161 Max. :0.77379 Max. :0.841565 Max. :0.80629
## tfidf_question tfidf_quit tfidf_read tfidf_real
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.007794 Mean :0.00875 Mean :0.01161 Mean :0.01004
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.792766 Max. :0.71659 Max. :1.00000 Max. :0.85961
## tfidf_realli tfidf_reason tfidf_recent tfidf_relationship
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.04526 Mean :0.01516 Mean :0.01234 Mean :0.01434
## 3rd Qu.:0.07742 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.76107 Max. :0.78364 Max. :0.63988 Max. :0.89918
## tfidf_rememb tfidf_right tfidf_said tfidf_say
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.008799 Mean :0.02105 Mean :0.01227 Mean :0.02492
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.965981 Max. :0.82153 Max. :0.69062 Max. :1.00000
## tfidf_scare tfidf_school tfidf_see tfidf_seem
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01361 Mean :0.02881 Mean :0.02481 Mean :0.01986
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.96271 Max. :1.00000 Max. :0.84921 Max. :0.60346
## tfidf_self tfidf_sever tfidf_shit tfidf_sinc
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.01498 Mean :0.008448 Mean :0.01816 Mean :0.02036
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.79557 Max. :0.793802 Max. :0.76601 Max. :0.63103
## tfidf_situat tfidf_sleep tfidf_social tfidf_someon
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.009818 Mean :0.02136 Mean :0.01466 Mean :0.02816
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.825902 Max. :0.89062 Max. :0.80150 Max. :1.00000
## tfidf_someth tfidf_sometim tfidf_sorri tfidf_start
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02796 Mean :0.01661 Mean :0.01085 Mean :0.03123
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.91995 Max. :0.94216 Max. :0.75764
## tfidf_stay tfidf_still tfidf_stop tfidf_stress
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01132 Mean :0.02362 Mean :0.01959 Mean :0.01169
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.90071 Max. :0.93304 Max. :0.78812 Max. :0.88526
## tfidf_struggl tfidf_stuff tfidf_suicid tfidf_support
## Min. :0.00000 Min. :0.000000 Min. :0.0000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.000000
## Median :0.00000 Median :0.000000 Median :0.0000 Median :0.000000
## Mean :0.01358 Mean :0.009114 Mean :0.0204 Mean :0.008628
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.000000
## Max. :0.80640 Max. :0.862957 Max. :0.8863 Max. :0.776566
## tfidf_sure tfidf_symptom tfidf_take tfidf_talk
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.01397 Mean :0.006048 Mean :0.02769 Mean :0.03563
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :0.822635 Max. :1.00000 Max. :1.00000
## tfidf_tell tfidf_thank tfidf_therapi tfidf_therapist
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.02116 Mean :0.01327 Mean :0.0106 Mean :0.01039
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :0.83719 Max. :0.87651 Max. :0.8441 Max. :0.84280
## tfidf_thing tfidf_think tfidf_though tfidf_thought
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.03999 Mean :0.04145 Mean :0.01334 Mean :0.02906
## 3rd Qu.:0.07027 3rd Qu.:0.07154 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.63738 Max. :0.84618 Max. :0.65806 Max. :0.78063
## tfidf_time tfidf_tire tfidf_today tfidf_told
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.04792 Mean :0.01635 Mean :0.01923 Mean :0.01422
## 3rd Qu.:0.08537 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.68757 Max. :0.97665 Max. :0.93192 Max. :0.87700
## tfidf_took tfidf_tri tfidf_turn tfidf_two
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.007907 Mean :0.03885 Mean :0.01053 Mean :0.01224
## 3rd Qu.:0.000000 3rd Qu.:0.06555 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.786568 Max. :1.00000 Max. :0.76578 Max. :0.70316
## tfidf_understand tfidf_us tfidf_use tfidf_usual
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.000000
## Mean :0.01208 Mean :0.007248 Mean :0.01868 Mean :0.009395
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :0.88774 Max. :0.831979 Max. :0.70982 Max. :0.908200
## tfidf_veri tfidf_want tfidf_way tfidf_week
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02026 Mean :0.06381 Mean :0.02559 Mean :0.02064
## 3rd Qu.:0.00000 3rd Qu.:0.10585 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.72539 Max. :1.00000 Max. :0.74705 Max. :0.68281
## tfidf_weight tfidf_well tfidf_went tfidf_whi
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.004925 Mean :0.01665 Mean :0.01334 Mean :0.02553
## 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :0.912800 Max. :0.63109 Max. :0.67659 Max. :0.99580
## tfidf_whole tfidf_wish tfidf_without tfidf_wonder
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.00893 Mean :0.01645 Mean :0.01268 Mean :0.009652
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :0.88639 Max. :0.94930 Max. :0.73502 Max. :0.735612
## tfidf_work tfidf_worri tfidf_wors tfidf_would
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.03941 Mean :0.01233 Mean :0.01724 Mean :0.03249
## 3rd Qu.:0.05001 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.03201
## Max. :0.80059 Max. :0.88022 Max. :0.86105 Max. :1.00000
## tfidf_wrong tfidf_x200b tfidf_year
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.01235 Mean :0.01437 Mean :0.04120
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.07234
## Max. :0.82475 Max. :0.59814 Max. :1.00000
str(socailmedia_2019_features)
## tibble [49,587 × 350] (S3: tbl_df/tbl/data.frame)
## $ subreddit : chr [1:49587] "depression" "depression" "depression" "depression" ...
## $ author : chr [1:49587] "anonaccount131" "gimlis_beard" "WreckDotNet" "danieltargaryean" ...
## $ date : POSIXct[1:49587], format: "2019-01-01" "2019-01-01" ...
## $ post : chr [1:49587] "Anyone else feel like they're the stupidest, most inept sack of shit in the world I used to actually be smart a"| __truncated__ "Craving validation from others while immediately rejecting anything positive that other people say about me is "| __truncated__ "Calling the distress line while living at home? How? How can I? \r\nI can't really afford therapy or anything "| __truncated__ "Only been here less than a month but I'm ready to unsubscribe Starting this year, I want time to focus on impro"| __truncated__ ...
## $ automated_readability_index: num [1:49587] 10.22 9.13 -1.16 0.83 1.35 ...
## $ coleman_liau_index : num [1:49587] 5.467 9.896 0.873 3.047 3.598 ...
## $ flesch_kincaid_grade_level : num [1:49587] 9.94 9.78 1.88 2.36 2.86 ...
## $ flesch_reading_ease : num [1:49587] 72.8 55.6 92.8 94.4 91.4 ...
## $ gulpease_index : num [1:49587] 61.9 59.8 100.4 84.3 82.3 ...
## $ gunning_fog_index : num [1:49587] 13.91 13.39 4.33 5.45 5.38 ...
## $ lix : num [1:49587] 38.4 41.6 17.6 20.6 21.3 ...
## $ smog_index : num [1:49587] 11.21 12.69 6.18 6.78 6.63 ...
## $ wiener_sachtextformel : num [1:49587] 3.81027 6.18863 0.00256 0.30255 0.34551 ...
## $ n_chars : num [1:49587] 209 401 160 755 291 354 383 420 110 221 ...
## $ n_long_words : num [1:49587] 6 21 5 23 9 10 8 15 3 5 ...
## $ n_monosyllable_words : num [1:49587] 46 56 34 166 62 75 96 82 24 57 ...
## $ n_polysyllable_words : num [1:49587] 4 14 2 9 3 6 3 6 1 1 ...
## $ n_sents : num [1:49587] 2 5 7 22 8 6 13 9 3 7 ...
## $ n_syllables : num [1:49587] 69 136 56 245 95 119 131 133 37 78 ...
## $ n_unique_words : num [1:49587] 45 67 35 117 55 65 66 63 27 42 ...
## $ n_words : num [1:49587] 55 86 44 201 76 93 111 104 30 67 ...
## $ sent_neg : num [1:49587] 0.129 0.121 0.09 0.157 0.066 0.132 0.133 0.102 0.104 0.034 ...
## $ sent_neu : num [1:49587] 0.775 0.663 0.847 0.559 0.817 ...
## $ sent_pos : num [1:49587] 0.096 0.217 0.063 0.284 0.117 ...
## $ sent_compound : num [1:49587] -0.421 0.866 -0.291 0.985 0.44 ...
## $ economic_stress_total : num [1:49587] 0 2 1 2 0 0 0 0 0 0 ...
## $ isolation_total : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## $ substance_use_total : num [1:49587] 0 0 0 0 0 0 0 1 0 0 ...
## $ guns_total : num [1:49587] 0 1 0 0 0 0 0 0 0 0 ...
## $ domestic_stress_total : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## $ suicidality_total : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## $ punctuation : num [1:49587] 5 13 10 22 6 10 16 15 1 9 ...
## $ liwc_1st_pers : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
## $ liwc_2nd_pers : num [1:49587] 0 0 0 2 0 2 0 0 0 0 ...
## $ liwc_3rd_pers : num [1:49587] 0 1 0 0 0 0 0 3 0 0 ...
## $ liwc_achievement : num [1:49587] 0 0 0 3 0 2 4 1 1 0 ...
## $ liwc_adverbs : num [1:49587] 4 7 4 7 7 6 8 6 2 5 ...
## $ liwc_affective_processes : num [1:49587] 4 8 2 19 2 7 10 6 3 5 ...
## $ liwc_anger : num [1:49587] 2 1 0 2 0 1 1 0 1 1 ...
## $ liwc_anxiety : num [1:49587] 0 0 1 0 0 0 0 0 0 0 ...
## $ liwc_articles_article : num [1:49587] 3 1 1 4 3 2 4 3 2 1 ...
## $ liwc_assent : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## $ liwc_auxiliary_verbs : num [1:49587] 5 7 5 20 8 6 8 7 4 6 ...
## $ liwc_biological : num [1:49587] 2 0 0 3 1 6 3 1 1 4 ...
## $ liwc_body : num [1:49587] 1 0 0 1 1 0 0 0 0 0 ...
## $ liwc_causation : num [1:49587] 1 1 2 0 0 3 1 1 0 1 ...
## $ liwc_certainty : num [1:49587] 0 1 0 2 0 1 1 0 0 0 ...
## $ liwc_cognitive : num [1:49587] 7 11 6 44 13 17 24 22 5 10 ...
## $ liwc_common_verbs : num [1:49587] 12 12 6 45 14 16 27 25 7 16 ...
## $ liwc_conjunctions : num [1:49587] 2 3 8 15 9 9 6 8 2 1 ...
## $ liwc_death : num [1:49587] 0 0 0 2 0 0 0 0 0 0 ...
## $ liwc_discrepancy : num [1:49587] 0 0 0 11 3 1 3 3 0 5 ...
## $ liwc_exclusive : num [1:49587] 0 3 2 10 4 3 7 4 1 3 ...
## $ liwc_family : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
## $ liwc_feel : num [1:49587] 1 2 0 1 0 0 5 3 0 0 ...
## $ liwc_fillers : num [1:49587] 1 2 1 1 0 0 3 4 0 1 ...
## $ liwc_friends : num [1:49587] 0 0 0 1 0 0 1 0 1 0 ...
## $ liwc_future_tense : num [1:49587] 0 0 0 4 1 1 2 3 0 1 ...
## $ liwc_health : num [1:49587] 0 0 0 1 0 6 2 1 0 1 ...
## $ liwc_hear : num [1:49587] 0 2 0 0 2 2 0 0 0 0 ...
## $ liwc_home : num [1:49587] 0 0 2 1 0 0 0 0 0 0 ...
## $ liwc_humans : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
## $ liwc_impersonal_pronouns : num [1:49587] 1 9 1 9 1 5 3 4 1 1 ...
## $ liwc_inclusive : num [1:49587] 2 1 2 12 5 5 3 7 3 1 ...
## $ liwc_ingestion : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## $ liwc_inhibition : num [1:49587] 0 0 0 0 0 1 2 3 0 0 ...
## $ liwc_insight : num [1:49587] 3 3 0 7 1 3 8 4 1 0 ...
## $ liwc_leisure : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
## $ liwc_money : num [1:49587] 0 0 0 0 0 0 1 0 0 0 ...
## $ liwc_motion : num [1:49587] 0 1 1 2 0 0 1 2 0 0 ...
## $ liwc_negations : num [1:49587] 2 3 2 7 0 3 4 3 2 5 ...
## $ liwc_negative_emotion : num [1:49587] 2 2 1 5 0 3 3 2 1 1 ...
## $ liwc_nonfluencies : num [1:49587] 0 0 0 1 0 0 0 0 0 0 ...
## $ liwc_numbers : num [1:49587] 0 0 0 0 0 0 0 1 0 0 ...
## $ liwc_past_tense : num [1:49587] 0 0 0 7 3 2 1 4 5 0 ...
## $ liwc_perceptual_processes : num [1:49587] 2 5 0 3 3 2 5 3 0 2 ...
## $ liwc_personal_pronouns : num [1:49587] 4 10 5 26 5 12 13 16 4 13 ...
## $ liwc_positive_emotion : num [1:49587] 2 6 1 14 2 3 7 4 2 4 ...
## $ liwc_prepositions : num [1:49587] 6 13 5 19 9 11 8 8 3 7 ...
## $ liwc_present_tense : num [1:49587] 11 11 6 30 9 13 22 18 2 14 ...
## $ liwc_quantifiers : num [1:49587] 2 1 0 7 0 2 3 3 0 0 ...
## $ liwc_relativity : num [1:49587] 9 4 7 11 5 11 11 18 5 3 ...
## $ liwc_religion : num [1:49587] 0 1 0 0 0 0 0 0 0 0 ...
## $ liwc_sadness : num [1:49587] 0 1 0 2 0 1 1 1 0 0 ...
## $ liwc_see : num [1:49587] 1 1 0 2 0 0 0 0 0 2 ...
## $ liwc_sexual : num [1:49587] 1 0 0 1 0 0 1 0 1 3 ...
## $ liwc_social_processes : num [1:49587] 0 3 2 9 8 5 2 3 1 3 ...
## $ liwc_space : num [1:49587] 6 2 4 3 3 5 3 4 3 1 ...
## $ liwc_swear_words : num [1:49587] 2 1 0 2 0 0 1 0 1 0 ...
## $ liwc_tentative : num [1:49587] 1 3 1 9 1 1 0 1 0 0 ...
## $ liwc_time : num [1:49587] 4 3 3 7 2 6 9 16 2 3 ...
## $ liwc_total_functional : num [1:49587] 27 48 25 109 40 50 52 53 17 34 ...
## $ liwc_total_pronouns : num [1:49587] 5 19 6 35 6 17 16 20 5 14 ...
## $ liwc_work : num [1:49587] 1 0 0 1 1 2 2 1 0 0 ...
## $ tfidf_abl : num [1:49587] 0.259 0 0 0 0 ...
## $ tfidf_abus : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## $ tfidf_actual : num [1:49587] 0.253 0.265 0 0 0 ...
## $ tfidf_addict : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## $ tfidf_adhd : num [1:49587] 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
#sum(is.na(socailmedia_2019_features))
socailmedia_2019_features_subset = socailmedia_2019_features[,1:11,22:25]
sum(is.na(socailmedia_2019_features_subset))
## [1] 39
# install.packages("naniar")
library(naniar)
vis_miss(socailmedia_2019_features_subset)
n_var_miss(socailmedia_2019_features_subset)
## [1] 1
The NAs are found under the author column. There is only 0.1% NAs in the dataset which is very minimal.
library(visdat)
vis_dat(socailmedia_2019_features_subset)
This visual shows the data patterns in the dataset. Here you can notice that more than half of the dataset consists of numeric variables, followed by characters and a date variable. There is no prominent NA seen here since we have a very small amout of NAs only found under the author column.
# install.packages("mice")
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
md.pattern(socailmedia_2019_features_subset)
## subreddit date post automated_readability_index coleman_liau_index
## 49548 1 1 1 1 1
## 39 1 1 1 1 1
## 0 0 0 0 0
## flesch_kincaid_grade_level flesch_reading_ease gulpease_index
## 49548 1 1 1
## 39 1 1 1
## 0 0 0
## gunning_fog_index lix author
## 49548 1 1 1 0
## 39 1 1 0 1
## 0 0 39 39
#install.packages("VIM")
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
aggr(socailmedia_2019_features_subset,prop=FALSE,numbers=TRUE)
The missing data was only found under the “author” variable in the dataset. It has 39 missing values as shown in the visual above.
socailmedia_2019_features_subset_clean<- na.omit(socailmedia_2019_features_subset)
sum(is.na(socailmedia_2019_features_subset_clean))
## [1] 0
Here, I’ve omitted the NAs found previously from the subset of my data set. As you can see, there are 0 NAs now.
set.seed(482)
boxplot(socailmedia_2019_features_subset_clean$flesch_kincaid_grade_level)
The data has a few outliers as seen in the image above.