library(readr)
library(psych)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ dplyr   1.0.5
## ✓ tibble  3.0.6     ✓ stringr 1.4.0
## ✓ tidyr   1.1.3     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
#install.packages("Hmisc")
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:psych':
## 
##     logit
library(ggplot2)
library(readxl)
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
## The following object is masked from 'package:purrr':
## 
##     lift
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#install.packages("LogicReg")
library(LogicReg)
#install.packages("tidyverse")
library(tidyverse) 
socailmedia_2019_features <- read.csv(file = '/Users/pallavisaitu/Downloads/socailmedia_2019_features.csv', sep=",")
# View(socailmedia_2019_features)
dim(socailmedia_2019_features)
## [1] 7742   36
head(socailmedia_2019_features)
##    subreddit           author   date
## 1 depression   anonaccount131 1/1/19
## 2 depression     gimlis_beard 1/1/19
## 3 depression      WreckDotNet 1/1/19
## 4 depression danieltargaryean 1/1/19
## 5 depression      emmanuel169 1/1/19
## 6 depression         Lunakinn 1/1/19
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        post
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  Anyone else feel like they're the stupidest, most inept sack of shit in the world I used to actually be smart and able back in high school, now I can't do fucking anything. I look at a piece of work/theory and don't even understand it, thinking where tf do I start
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        Craving validation from others while immediately rejecting anything positive that other people say about me is a special kind of hell I have no confidence in myself, especially about my physical appearance, so i often look to others for things I can be positive about. However, I imeadiately shoot down any complement with statements like, "It isn't actually all that impressive," or, "they are only saying that to placate me." It feels like I'm just not allowed to feel happy about anything I do. 
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Calling the distress line while living at home? How? How can I?  \nI can't really afford therapy or anything so this is like my last resort. But I live at home, and I can't go out in public to do so.\n\n&amp;#x200B;
## 4 Only been here less than a month but I'm ready to unsubscribe Starting this year, I want time to focus on improving myself. I don't want to be depressed 24/7 anymore. I'm done with people treating me like shit. I'm still my own worst enemy but I'm hoping that will change this year. I already delete social media from my phone, save reddit and messenger, and I hope that this will improve my life somehow. I know everyone here seems to have lost hope and I have too but I don't know, I just hope someone will read this and decide to try and change with me. 2018 was fucking trash and honestly do i expect 2019 to be different? No, but it's not going to be because I didn't try. I wish all of you the best in 2019 and I hope we all live to see 2020. \n\nP.S. If you feel suicidal, a pet might help. I have had suicidal thoughts but I couldn't go through with it knowing I not only leave my parents but my best friend as well. Try looking into it guys. Have a great 2019.
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Anyone just want someone to talk to? I\x89Ûªm stuck in the desert because I was forced to join the military and am now stuck with 1000\x89Ûªs who live so loosely and find joy in anything and then there is me. So any type of conversation would be nice. Venting ranting or just light conversation. I can listen and talk. Anyone in need of help I can offer only an ear but I am here. 
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    How do you know you're on the right medication? I hated my last medication, it seemed to make everything worse and my doctor wouldnt listen to me. When I said it was wrong, he just doubled the dose. So for months I endured it and waited for my anxiety to calm down then ended those attrocious pills with my doctor's approval. \n\nI dont suffer from anxiety anymore, and my mood is much better, but I'm not sure I'm out of this place yet, how do you know?
##   automated_readability_index coleman_liau_index flesch_kincaid_grade_level
## 1                  10.2180000          5.4667287                   9.938636
## 2                   9.1317442          9.8955198                   9.778465
## 3                  -1.1598701          0.8734051                   1.879610
## 4                   0.8299729          3.0467947                   2.356266
## 5                   1.3543421          3.5983855                   2.865000
## 6                   4.2483871          4.6717075                   5.553925
##   flesch_reading_ease gulpease_index gunning_fog_index      lix smog_index
## 1            72.78795       61.90909         13.909091 38.40909  11.208143
## 2            55.59095       59.81395         13.391628 41.61860  12.688353
## 3            92.78227      100.36364          4.332468 17.64935   6.182691
## 4            94.44219       84.27363          5.445590 20.57915   6.782985
## 5            91.44250       82.28947          5.378947 21.34211   6.627428
## 6            82.85089       70.29032          8.780645 26.25269   8.841846
##   wiener_sachtextformel n_chars n_long_words n_monosyllable_words
## 1           3.810272727     209            6                   46
## 2           6.188630698     401           21                   56
## 3           0.002562338     160            5                   34
## 4           0.302550249     755           23                  166
## 5           0.345505263     291            9                   62
## 6           1.722513978     354           10                   75
##   n_polysyllable_words n_sents n_syllables n_unique_words n_words sent_neg
## 1                    4       2          69             45      55    0.129
## 2                   14       5         136             67      86    0.121
## 3                    2       7          56             35      44    0.090
## 4                    9      22         245            117     201    0.157
## 5                    3       8          95             55      76    0.066
## 6                    6       6         119             65      93    0.132
##   sent_neu sent_pos sent_compound economic_stress_total domestic_stress_total
## 1    0.775    0.096       -0.4215                     0                     0
## 2    0.663    0.217        0.8658                     2                     0
## 3    0.847    0.063       -0.2906                     1                     0
## 4    0.559    0.284        0.9846                     2                     0
## 5    0.817    0.117        0.4404                     0                     0
## 6    0.787    0.082       -0.5615                     0                     0
##   suicidality_total liwc_negative_emotion liwc_social_processes tfidf_anxieti
## 1                 0                     2                     0     0.0000000
## 2                 0                     2                     3     0.0000000
## 3                 0                     1                     2     0.0000000
## 4                 0                     5                     9     0.0000000
## 5                 0                     0                     8     0.0000000
## 6                 0                     3                     5     0.2973842
##   tfidf_depress tfidf_stress tfidf_struggl tfidf_suicid depression
## 1    0.00000000            0             0    0.0000000          1
## 2    0.00000000            0             0    0.0000000          1
## 3    0.00000000            0             0    0.0000000          1
## 4    0.09623711            0             0    0.2444397          1
## 5    0.00000000            0             0    0.0000000          1
## 6    0.00000000            0             0    0.0000000          1
#summary(socailmedia_2019_features)
#str(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features))
## [1] 0
#sum(is.na(socailmedia_2019_features))
sum(is.na(socailmedia_2019_features))
## [1] 0
# Omit NAs
socailmedia_2019_features_clean<- na.omit(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features_clean))
## [1] 0
str(socailmedia_2019_features_clean)
## 'data.frame':    7742 obs. of  36 variables:
##  $ subreddit                  : Factor w/ 3 levels "anxiety","depression",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ author                     : Factor w/ 7638 levels "__dark__throw__away_",..: 496 2582 7414 1628 2125 4128 6517 3129 2718 5500 ...
##  $ date                       : Factor w/ 23 levels "1/1/19","1/10/19",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ post                       : Factor w/ 7739 levels "_\xd9\xf7\xd3 Scared of seeking help with psych about what I am feeling right know. ",..: 564 1033 850 6015 656 2455 3052 3826 5060 94 ...
##  $ automated_readability_index: num  10.22 9.13 -1.16 0.83 1.35 ...
##  $ coleman_liau_index         : num  5.467 9.896 0.873 3.047 3.598 ...
##  $ flesch_kincaid_grade_level : num  9.94 9.78 1.88 2.36 2.87 ...
##  $ flesch_reading_ease        : num  72.8 55.6 92.8 94.4 91.4 ...
##  $ gulpease_index             : num  61.9 59.8 100.4 84.3 82.3 ...
##  $ gunning_fog_index          : num  13.91 13.39 4.33 5.45 5.38 ...
##  $ lix                        : num  38.4 41.6 17.6 20.6 21.3 ...
##  $ smog_index                 : num  11.21 12.69 6.18 6.78 6.63 ...
##  $ wiener_sachtextformel      : num  3.81027 6.18863 0.00256 0.30255 0.34551 ...
##  $ n_chars                    : int  209 401 160 755 291 354 383 420 110 221 ...
##  $ n_long_words               : int  6 21 5 23 9 10 8 15 3 5 ...
##  $ n_monosyllable_words       : int  46 56 34 166 62 75 96 82 24 57 ...
##  $ n_polysyllable_words       : int  4 14 2 9 3 6 3 6 1 1 ...
##  $ n_sents                    : int  2 5 7 22 8 6 13 9 3 7 ...
##  $ n_syllables                : int  69 136 56 245 95 119 131 133 37 78 ...
##  $ n_unique_words             : int  45 67 35 117 55 65 66 63 27 42 ...
##  $ n_words                    : int  55 86 44 201 76 93 111 104 30 67 ...
##  $ sent_neg                   : num  0.129 0.121 0.09 0.157 0.066 0.132 0.133 0.102 0.104 0.034 ...
##  $ sent_neu                   : num  0.775 0.663 0.847 0.559 0.817 0.787 0.704 0.762 0.592 0.634 ...
##  $ sent_pos                   : num  0.096 0.217 0.063 0.284 0.117 0.082 0.164 0.136 0.305 0.331 ...
##  $ sent_compound              : num  -0.421 0.866 -0.291 0.985 0.44 ...
##  $ economic_stress_total      : int  0 2 1 2 0 0 0 0 0 0 ...
##  $ domestic_stress_total      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ suicidality_total          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ liwc_negative_emotion      : int  2 2 1 5 0 3 3 2 1 1 ...
##  $ liwc_social_processes      : int  0 3 2 9 8 5 2 3 1 3 ...
##  $ tfidf_anxieti              : num  0 0 0 0 0 ...
##  $ tfidf_depress              : num  0 0 0 0.0962 0 ...
##  $ tfidf_stress               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ tfidf_struggl              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ tfidf_suicid               : num  0 0 0 0.244 0 ...
##  $ depression                 : int  1 1 1 1 1 1 1 1 1 1 ...
# Remove the reponse character variable and include only numeric variables
library(dplyr)
dataset = select_if(socailmedia_2019_features_clean, is.numeric)
dataset <- select(dataset, -c(5))

#install.packages("corrplot")
library(corrplot)
## corrplot 0.84 loaded
datamatrix <- cor(dataset)
corrplot(datamatrix, order="hclust", type="upper", tl.srt = 45)

res2 <- rcorr(as.matrix(datamatrix), type="pearson")

res2$r
##                             automated_readability_index coleman_liau_index
## automated_readability_index                 1.000000000         0.98931973
## coleman_liau_index                          0.989319730         1.00000000
## flesch_kincaid_grade_level                  0.990699285         0.96577749
## flesch_reading_ease                        -0.990248998        -0.98341065
## gunning_fog_index                           0.970648650         0.93795782
## lix                                         0.981155036         0.95858900
## smog_index                                  0.945396439         0.91707082
## wiener_sachtextformel                       0.974377306         0.95889014
## n_chars                                    -0.096438060        -0.13767645
## n_long_words                                0.015964726        -0.02617195
## n_monosyllable_words                       -0.168319022        -0.20921511
## n_polysyllable_words                        0.105649466         0.06191993
## n_sents                                    -0.301370955        -0.33194202
## n_syllables                                -0.102537721        -0.14410887
## n_unique_words                             -0.090835791        -0.13418334
## n_words                                    -0.138677394        -0.17990308
## sent_neg                                   -0.123779577        -0.09370482
## sent_neu                                    0.246587047         0.20606541
## sent_pos                                   -0.163504343        -0.14950172
## sent_compound                              -0.006516122        -0.00884736
## economic_stress_total                      -0.097589640        -0.13916592
## domestic_stress_total                      -0.173184889        -0.19185685
## suicidality_total                          -0.317545083        -0.31839093
## liwc_negative_emotion                      -0.165093749        -0.19653448
## liwc_social_processes                      -0.166806263        -0.20639122
## tfidf_anxieti                               0.329402291         0.33439282
## tfidf_depress                               0.364851125         0.39278186
## tfidf_stress                               -0.004095684        -0.00949735
## tfidf_struggl                               0.049343511         0.05112505
## tfidf_suicid                                0.033858340         0.06000411
## depression                                  0.024477226         0.02963546
##                             flesch_kincaid_grade_level flesch_reading_ease
## automated_readability_index                0.990699285       -0.9902489976
## coleman_liau_index                         0.965777485       -0.9834106476
## flesch_kincaid_grade_level                 1.000000000       -0.9907684425
## flesch_reading_ease                       -0.990768442        1.0000000000
## gunning_fog_index                          0.993382206       -0.9805146544
## lix                                        0.993027284       -0.9888181471
## smog_index                                 0.975595153       -0.9680266556
## wiener_sachtextformel                      0.987812384       -0.9928522663
## n_chars                                   -0.061616650        0.1287219936
## n_long_words                               0.053683437        0.0113778283
## n_monosyllable_words                      -0.134216558        0.2020556016
## n_polysyllable_words                       0.148537074       -0.0854409170
## n_sents                                   -0.271369447        0.3304284917
## n_syllables                               -0.066974171        0.1341976448
## n_unique_words                            -0.054003534        0.1229233636
## n_words                                   -0.103922908        0.1715234100
## sent_neg                                  -0.127747375        0.0810251886
## sent_neu                                   0.264783221       -0.2110928963
## sent_pos                                  -0.182385705        0.1729121387
## sent_compound                             -0.021815296        0.0313199966
## economic_stress_total                     -0.059198538        0.1215424245
## domestic_stress_total                     -0.159880040        0.1950214823
## suicidality_total                         -0.310848132        0.3236630855
## liwc_negative_emotion                     -0.132646789        0.1862972368
## liwc_social_processes                     -0.139022245        0.2072377149
## tfidf_anxieti                              0.361568191       -0.3865460593
## tfidf_depress                              0.365822243       -0.4079030312
## tfidf_stress                               0.002447042        0.0004958451
## tfidf_struggl                              0.056510840       -0.0634911380
## tfidf_suicid                               0.017752008       -0.0471086573
## depression                                 0.040734854       -0.0531732905
##                             gunning_fog_index          lix   smog_index
## automated_readability_index      0.9706486502  0.981155036  0.945396439
## coleman_liau_index               0.9379578241  0.958589005  0.917070824
## flesch_kincaid_grade_level       0.9933822064  0.993027284  0.975595153
## flesch_reading_ease             -0.9805146544 -0.988818147 -0.968026656
## gunning_fog_index                1.0000000000  0.989929241  0.991276217
## lix                              0.9899292406  1.000000000  0.976059397
## smog_index                       0.9912762169  0.976059397  1.000000000
## wiener_sachtextformel            0.9892592949  0.995136381  0.983496595
## n_chars                         -0.0528585935 -0.084656172 -0.006758319
## n_long_words                     0.0636667410  0.034465823  0.111536431
## n_monosyllable_words            -0.1251441489 -0.157837596 -0.079616041
## n_polysyllable_words             0.1632822830  0.129119659  0.215080821
## n_sents                         -0.2624910279 -0.291534495 -0.212145059
## n_syllables                     -0.0575975087 -0.089914669 -0.011059597
## n_unique_words                  -0.0438310778 -0.077518414  0.002669595
## n_words                         -0.0947782211 -0.127183417 -0.048826950
## sent_neg                        -0.1269766395 -0.079162357 -0.108231551
## sent_neu                         0.2730993379  0.238004782  0.258800212
## sent_pos                        -0.1944278027 -0.211048000 -0.200223232
## sent_compound                   -0.0315950859 -0.063467539 -0.056073112
## economic_stress_total           -0.0494235209 -0.080747378 -0.008924442
## domestic_stress_total           -0.1614285589 -0.171161445 -0.138067999
## suicidality_total               -0.3204037182 -0.307984203 -0.297605722
## liwc_negative_emotion           -0.1234976707 -0.142345475 -0.071980024
## liwc_social_processes           -0.1327639714 -0.164897157 -0.092852086
## tfidf_anxieti                    0.3879011933  0.379939652  0.416365204
## tfidf_depress                    0.3828302473  0.403651741  0.410680302
## tfidf_stress                     0.0047402573  0.005322529  0.006600530
## tfidf_struggl                    0.0566017348  0.062405444  0.065882030
## tfidf_suicid                     0.0001173876  0.038581125  0.001765730
## depression                       0.0523730794  0.050899572  0.065050182
##                             wiener_sachtextformel       n_chars  n_long_words
## automated_readability_index          0.9743773060 -0.0964380596  0.0159647259
## coleman_liau_index                   0.9588901384 -0.1376764541 -0.0261719508
## flesch_kincaid_grade_level           0.9878123842 -0.0616166504  0.0536834367
## flesch_reading_ease                 -0.9928522663  0.1287219936  0.0113778283
## gunning_fog_index                    0.9892592949 -0.0528585935  0.0636667410
## lix                                  0.9951363809 -0.0846561724  0.0344658234
## smog_index                           0.9834965945 -0.0067583185  0.1115364310
## wiener_sachtextformel                1.0000000000 -0.1206703566 -0.0003933138
## n_chars                             -0.1206703566  1.0000000000  0.9919137489
## n_long_words                        -0.0003933138  0.9919137489  1.0000000000
## n_monosyllable_words                -0.1941762297  0.9970761977  0.9795372562
## n_polysyllable_words                 0.0992516351  0.9725141736  0.9932151702
## n_sents                             -0.3232967958  0.9761132879  0.9440341629
## n_syllables                         -0.1256496056  0.9999614991  0.9913566626
## n_unique_words                      -0.1137351474  0.9993298246  0.9920376325
## n_words                             -0.1633406943  0.9990172930  0.9855204853
## sent_neg                            -0.0626030286 -0.1355891156 -0.1305371118
## sent_neu                             0.2133080151  0.1654598939  0.1858490804
## sent_pos                            -0.2001769879 -0.0402208793 -0.0739287595
## sent_compound                       -0.0603298749 -0.1184550634 -0.1406862884
## economic_stress_total               -0.1128804795  0.8909186571  0.8851110989
## domestic_stress_total               -0.1945628232  0.5932068798  0.5774570941
## suicidality_total                   -0.3256545300  0.4145478121  0.3807025748
## liwc_negative_emotion               -0.1733099855  0.9506950770  0.9395590412
## liwc_social_processes               -0.2026489662  0.9779251322  0.9560647371
## tfidf_anxieti                        0.4103315704 -0.1362706055 -0.0771747683
## tfidf_depress                        0.4316887453 -0.1316552404 -0.0711865040
## tfidf_stress                         0.0036396798 -0.0180337439 -0.0135615012
## tfidf_struggl                        0.0666315394 -0.0006563571  0.0125063420
## tfidf_suicid                         0.0405142083 -0.1138205435 -0.1044475636
## depression                           0.0639043077 -0.0726959326 -0.0589678534
##                             n_monosyllable_words n_polysyllable_words
## automated_readability_index         -0.168319022           0.10564947
## coleman_liau_index                  -0.209215109           0.06191993
## flesch_kincaid_grade_level          -0.134216558           0.14853707
## flesch_reading_ease                  0.202055602          -0.08544092
## gunning_fog_index                   -0.125144149           0.16328228
## lix                                 -0.157837596           0.12911966
## smog_index                          -0.079616041           0.21508082
## wiener_sachtextformel               -0.194176230           0.09925164
## n_chars                              0.997076198           0.97251417
## n_long_words                         0.979537256           0.99321517
## n_monosyllable_words                 1.000000000           0.95265001
## n_polysyllable_words                 0.952650010           1.00000000
## n_sents                              0.988532722           0.90521407
## n_syllables                          0.997429241           0.97174220
## n_unique_words                       0.996027044           0.97373814
## n_words                              0.999473723           0.96184577
## sent_neg                            -0.131723144          -0.13971798
## sent_neu                             0.150582162           0.20560776
## sent_pos                            -0.025599272          -0.08800526
## sent_compound                       -0.111509722          -0.14019112
## economic_stress_total                0.888753604           0.86929978
## domestic_stress_total                0.598883842           0.54702726
## suicidality_total                    0.434797972           0.33578668
## liwc_negative_emotion                0.951591512           0.91376694
## liwc_social_processes                0.982590121           0.92580386
## tfidf_anxieti                       -0.167670049          -0.01832355
## tfidf_depress                       -0.164871965          -0.01974876
## tfidf_stress                        -0.018670508          -0.01270078
## tfidf_struggl                       -0.007817339           0.02060667
## tfidf_suicid                        -0.115705880          -0.10923294
## depression                          -0.076674831          -0.04862408
##                                  n_sents   n_syllables n_unique_words
## automated_readability_index -0.301370955 -0.1025377213   -0.090835791
## coleman_liau_index          -0.331942021 -0.1441088708   -0.134183338
## flesch_kincaid_grade_level  -0.271369447 -0.0669741709   -0.054003534
## flesch_reading_ease          0.330428492  0.1341976448    0.122923364
## gunning_fog_index           -0.262491028 -0.0575975087   -0.043831078
## lix                         -0.291534495 -0.0899146686   -0.077518414
## smog_index                  -0.212145059 -0.0110595969    0.002669595
## wiener_sachtextformel       -0.323296796 -0.1256496056   -0.113735147
## n_chars                      0.976113288  0.9999614991    0.999329825
## n_long_words                 0.944034163  0.9913566626    0.992037632
## n_monosyllable_words         0.988532722  0.9974292411    0.996027044
## n_polysyllable_words         0.905214073  0.9717422027    0.973738139
## n_sents                      1.000000000  0.9771908545    0.973680696
## n_syllables                  0.977190854  1.0000000000    0.999362049
## n_unique_words               0.973680696  0.9993620485    1.000000000
## n_words                      0.984045561  0.9992268241    0.998166489
## sent_neg                    -0.091064337 -0.1341811689   -0.145748988
## sent_neu                     0.091924989  0.1642173354    0.182349824
## sent_pos                    -0.001523116 -0.0404325862   -0.049196191
## sent_compound               -0.119064089 -0.1194978466   -0.119639996
## economic_stress_total        0.868603789  0.8920016784    0.891412097
## domestic_stress_total        0.605529222  0.5931290010    0.589715201
## suicidality_total            0.475413018  0.4166199670    0.411687829
## liwc_negative_emotion        0.948325586  0.9511560000    0.948354536
## liwc_social_processes        0.969737337  0.9776199414    0.974389393
## tfidf_anxieti               -0.203258669 -0.1360198342   -0.128960258
## tfidf_depress               -0.194939145 -0.1337233470   -0.134008081
## tfidf_stress                -0.018667308 -0.0173713210   -0.012040809
## tfidf_struggl               -0.012844531 -0.0006961643    0.005519864
## tfidf_suicid                -0.103260070 -0.1138330096   -0.114620797
## depression                  -0.071617697 -0.0713132081   -0.068515101
##                                  n_words    sent_neg      sent_neu     sent_pos
## automated_readability_index -0.138677394 -0.12377958  0.2465870474 -0.163504343
## coleman_liau_index          -0.179903076 -0.09370482  0.2060654108 -0.149501721
## flesch_kincaid_grade_level  -0.103922908 -0.12774737  0.2647832210 -0.182385705
## flesch_reading_ease          0.171523410  0.08102519 -0.2110928963  0.172912139
## gunning_fog_index           -0.094778221 -0.12697664  0.2730993379 -0.194427803
## lix                         -0.127183417 -0.07916236  0.2380047817 -0.211048000
## smog_index                  -0.048826950 -0.10823155  0.2588002117 -0.200223232
## wiener_sachtextformel       -0.163340694 -0.06260303  0.2133080151 -0.200176988
## n_chars                      0.999017293 -0.13558912  0.1654598939 -0.040220879
## n_long_words                 0.985520485 -0.13053711  0.1858490804 -0.073928760
## n_monosyllable_words         0.999473723 -0.13172314  0.1505821621 -0.025599272
## n_polysyllable_words         0.961845766 -0.13971798  0.2056077564 -0.088005262
## n_sents                      0.984045561 -0.09106434  0.0919249885 -0.001523116
## n_syllables                  0.999226824 -0.13418117  0.1642173354 -0.040432586
## n_unique_words               0.998166489 -0.14574899  0.1823498244 -0.049196191
## n_words                      1.000000000 -0.13234642  0.1564691233 -0.032581910
## sent_neg                    -0.132346421  1.00000000 -0.7143900456 -0.373496113
## sent_neu                     0.156469123 -0.71439005  1.0000000000 -0.382286271
## sent_pos                    -0.032581910 -0.37349611 -0.3822862710  1.000000000
## sent_compound               -0.115701142 -0.79857541  0.2001823661  0.789162434
## economic_stress_total        0.890807234 -0.14187230  0.2124161138 -0.094160341
## domestic_stress_total        0.596888266  0.03027215  0.0376954105 -0.089872627
## suicidality_total            0.427248291  0.34860062 -0.2851830701 -0.082315391
## liwc_negative_emotion        0.952183338  0.14111825 -0.0466972241 -0.124365173
## liwc_social_processes        0.980902927 -0.18304550  0.1647076148  0.023446532
## tfidf_anxieti               -0.153517504  0.05331424  0.1267184510 -0.238404436
## tfidf_depress               -0.150909305  0.36746591 -0.1830954612 -0.242535850
## tfidf_stress                -0.018055840  0.08153457 -0.0006740162 -0.106695519
## tfidf_struggl               -0.004447442  0.11048382 -0.0374965961 -0.096223304
## tfidf_suicid                -0.114620203  0.35648462 -0.2573792628 -0.129674940
## depression                  -0.074223806  0.28320568 -0.0397383263 -0.321246190
##                             sent_compound economic_stress_total
## automated_readability_index  -0.006516122         -0.0975896398
## coleman_liau_index           -0.008847360         -0.1391659245
## flesch_kincaid_grade_level   -0.021815296         -0.0591985383
## flesch_reading_ease           0.031319997          0.1215424245
## gunning_fog_index            -0.031595086         -0.0494235209
## lix                          -0.063467539         -0.0807473781
## smog_index                   -0.056073112         -0.0089244418
## wiener_sachtextformel        -0.060329875         -0.1128804795
## n_chars                      -0.118455063          0.8909186571
## n_long_words                 -0.140686288          0.8851110989
## n_monosyllable_words         -0.111509722          0.8887536042
## n_polysyllable_words         -0.140191125          0.8692997799
## n_sents                      -0.119064089          0.8686037888
## n_syllables                  -0.119497847          0.8920016784
## n_unique_words               -0.119639996          0.8914120965
## n_words                      -0.115701142          0.8908072340
## sent_neg                     -0.798575406         -0.1418722976
## sent_neu                      0.200182366          0.2124161138
## sent_pos                      0.789162434         -0.0941603412
## sent_compound                 1.000000000         -0.1275440361
## economic_stress_total        -0.127544036          1.0000000000
## domestic_stress_total        -0.189000554          0.5220658033
## suicidality_total            -0.367881229          0.3328408113
## liwc_negative_emotion        -0.338141953          0.8246801846
## liwc_social_processes        -0.042010708          0.8542174439
## tfidf_anxieti                -0.151860795         -0.1256804896
## tfidf_depress                -0.348129960         -0.1547261439
## tfidf_stress                 -0.134935283          0.0251576157
## tfidf_struggl                -0.137538001          0.0005381508
## tfidf_suicid                 -0.303939964         -0.1275830158
## depression                   -0.371973659         -0.0265968741
##                             domestic_stress_total suicidality_total
## automated_readability_index           -0.17318489       -0.31754508
## coleman_liau_index                    -0.19185685       -0.31839093
## flesch_kincaid_grade_level            -0.15988004       -0.31084813
## flesch_reading_ease                    0.19502148        0.32366309
## gunning_fog_index                     -0.16142856       -0.32040372
## lix                                   -0.17116144       -0.30798420
## smog_index                            -0.13806800       -0.29760572
## wiener_sachtextformel                 -0.19456282       -0.32565453
## n_chars                                0.59320688        0.41454781
## n_long_words                           0.57745709        0.38070257
## n_monosyllable_words                   0.59888384        0.43479797
## n_polysyllable_words                   0.54702726        0.33578668
## n_sents                                0.60552922        0.47541302
## n_syllables                            0.59312900        0.41661997
## n_unique_words                         0.58971520        0.41168783
## n_words                                0.59688827        0.42724829
## sent_neg                               0.03027215        0.34860062
## sent_neu                               0.03769541       -0.28518307
## sent_pos                              -0.08987263       -0.08231539
## sent_compound                         -0.18900055       -0.36788123
## economic_stress_total                  0.52206580        0.33284081
## domestic_stress_total                  1.00000000        0.30331940
## suicidality_total                      0.30331940        1.00000000
## liwc_negative_emotion                  0.61391814        0.51487644
## liwc_social_processes                  0.61177441        0.38328003
## tfidf_anxieti                         -0.18108957       -0.30862588
## tfidf_depress                         -0.10761703       -0.11456051
## tfidf_stress                          -0.07054069       -0.07993940
## tfidf_struggl                         -0.01058888       -0.03757874
## tfidf_suicid                          -0.06671180        0.59143662
## depression                            -0.04024530        0.09018527
##                             liwc_negative_emotion liwc_social_processes
## automated_readability_index          -0.165093749           -0.16680626
## coleman_liau_index                   -0.196534475           -0.20639122
## flesch_kincaid_grade_level           -0.132646789           -0.13902225
## flesch_reading_ease                   0.186297237            0.20723771
## gunning_fog_index                    -0.123497671           -0.13276397
## lix                                  -0.142345475           -0.16489716
## smog_index                           -0.071980024           -0.09285209
## wiener_sachtextformel                -0.173309986           -0.20264897
## n_chars                               0.950695077            0.97792513
## n_long_words                          0.939559041            0.95606474
## n_monosyllable_words                  0.951591512            0.98259012
## n_polysyllable_words                  0.913766938            0.92580386
## n_sents                               0.948325586            0.96973734
## n_syllables                           0.951156000            0.97761994
## n_unique_words                        0.948354536            0.97438939
## n_words                               0.952183338            0.98090293
## sent_neg                              0.141118248           -0.18304550
## sent_neu                             -0.046697224            0.16470761
## sent_pos                             -0.124365173            0.02344653
## sent_compound                        -0.338141953           -0.04201071
## economic_stress_total                 0.824680185            0.85421744
## domestic_stress_total                 0.613918137            0.61177441
## suicidality_total                     0.514876443            0.38328003
## liwc_negative_emotion                 1.000000000            0.91651953
## liwc_social_processes                 0.916519532            1.00000000
## tfidf_anxieti                        -0.145983011           -0.19535857
## tfidf_depress                        -0.008312494           -0.18174662
## tfidf_stress                          0.018575807           -0.04707125
## tfidf_struggl                         0.048324617           -0.02519996
## tfidf_suicid                         -0.039070948           -0.14154731
## depression                            0.008436090           -0.13383659
##                             tfidf_anxieti tfidf_depress  tfidf_stress
## automated_readability_index    0.32940229   0.364851125 -0.0040956841
## coleman_liau_index             0.33439282   0.392781861 -0.0094973496
## flesch_kincaid_grade_level     0.36156819   0.365822243  0.0024470420
## flesch_reading_ease           -0.38654606  -0.407903031  0.0004958451
## gunning_fog_index              0.38790119   0.382830247  0.0047402573
## lix                            0.37993965   0.403651741  0.0053225287
## smog_index                     0.41636520   0.410680302  0.0066005297
## wiener_sachtextformel          0.41033157   0.431688745  0.0036396798
## n_chars                       -0.13627061  -0.131655240 -0.0180337439
## n_long_words                  -0.07717477  -0.071186504 -0.0135615012
## n_monosyllable_words          -0.16767005  -0.164871965 -0.0186705083
## n_polysyllable_words          -0.01832355  -0.019748756 -0.0127007846
## n_sents                       -0.20325867  -0.194939145 -0.0186673083
## n_syllables                   -0.13601983  -0.133723347 -0.0173713210
## n_unique_words                -0.12896026  -0.134008081 -0.0120408091
## n_words                       -0.15351750  -0.150909305 -0.0180558399
## sent_neg                       0.05331424   0.367465908  0.0815345728
## sent_neu                       0.12671845  -0.183095461 -0.0006740162
## sent_pos                      -0.23840444  -0.242535850 -0.1066955188
## sent_compound                 -0.15186080  -0.348129960 -0.1349352830
## economic_stress_total         -0.12568049  -0.154726144  0.0251576157
## domestic_stress_total         -0.18108957  -0.107617034 -0.0705406900
## suicidality_total             -0.30862588  -0.114560511 -0.0799394002
## liwc_negative_emotion         -0.14598301  -0.008312494  0.0185758075
## liwc_social_processes         -0.19535857  -0.181746619 -0.0470712458
## tfidf_anxieti                  1.00000000   0.076229923  0.0633771934
## tfidf_depress                  0.07622992   1.000000000 -0.0664883971
## tfidf_stress                   0.06337719  -0.066488397  1.0000000000
## tfidf_struggl                  0.04095322   0.076152399  0.0042549727
## tfidf_suicid                  -0.15837080   0.060119651 -0.0940051280
## depression                     0.13632309   0.236235097  0.0078282218
##                             tfidf_struggl  tfidf_suicid   depression
## automated_readability_index  0.0493435112  0.0338583396  0.024477226
## coleman_liau_index           0.0511250506  0.0600041094  0.029635458
## flesch_kincaid_grade_level   0.0565108403  0.0177520081  0.040734854
## flesch_reading_ease         -0.0634911380 -0.0471086573 -0.053173291
## gunning_fog_index            0.0566017348  0.0001173876  0.052373079
## lix                          0.0624054444  0.0385811254  0.050899572
## smog_index                   0.0658820300  0.0017657297  0.065050182
## wiener_sachtextformel        0.0666315394  0.0405142083  0.063904308
## n_chars                     -0.0006563571 -0.1138205435 -0.072695933
## n_long_words                 0.0125063420 -0.1044475636 -0.058967853
## n_monosyllable_words        -0.0078173393 -0.1157058795 -0.076674831
## n_polysyllable_words         0.0206066738 -0.1092329432 -0.048624082
## n_sents                     -0.0128445306 -0.1032600698 -0.071617697
## n_syllables                 -0.0006961643 -0.1138330096 -0.071313208
## n_unique_words               0.0055198642 -0.1146207970 -0.068515101
## n_words                     -0.0044474416 -0.1146202032 -0.074223806
## sent_neg                     0.1104838198  0.3564846199  0.283205679
## sent_neu                    -0.0374965961 -0.2573792628 -0.039738326
## sent_pos                    -0.0962233037 -0.1296749405 -0.321246190
## sent_compound               -0.1375380010 -0.3039399636 -0.371973659
## economic_stress_total        0.0005381508 -0.1275830158 -0.026596874
## domestic_stress_total       -0.0105888808 -0.0667118002 -0.040245303
## suicidality_total           -0.0375787359  0.5914366242  0.090185270
## liwc_negative_emotion        0.0483246170 -0.0390709481  0.008436090
## liwc_social_processes       -0.0251999618 -0.1415473143 -0.133836590
## tfidf_anxieti                0.0409532185 -0.1583708043  0.136323094
## tfidf_depress                0.0761523989  0.0601196505  0.236235097
## tfidf_stress                 0.0042549727 -0.0940051280  0.007828222
## tfidf_struggl                1.0000000000 -0.0558296255  0.014105794
## tfidf_suicid                -0.0558296255  1.0000000000  0.111750491
## depression                   0.0141057944  0.1117504914  1.000000000
res2$P
##                             automated_readability_index coleman_liau_index
## automated_readability_index                          NA       0.000000e+00
## coleman_liau_index                         0.000000e+00                 NA
## flesch_kincaid_grade_level                 0.000000e+00       0.000000e+00
## flesch_reading_ease                        0.000000e+00       0.000000e+00
## gunning_fog_index                          0.000000e+00       7.105427e-15
## lix                                        0.000000e+00       0.000000e+00
## smog_index                                 1.332268e-15       4.183320e-13
## wiener_sachtextformel                      0.000000e+00       0.000000e+00
## n_chars                                    6.057953e-01       4.601637e-01
## n_long_words                               9.320703e-01       8.888538e-01
## n_monosyllable_words                       3.653964e-01       2.586670e-01
## n_polysyllable_words                       5.716349e-01       7.407156e-01
## n_sents                                    9.944127e-02       6.809997e-02
## n_syllables                                5.830774e-01       4.392609e-01
## n_unique_words                             6.269845e-01       4.717318e-01
## n_words                                    4.568768e-01       3.328308e-01
## sent_neg                                   5.070661e-01       6.160953e-01
## sent_neu                                   1.811322e-01       2.660677e-01
## sent_pos                                   3.794668e-01       4.221422e-01
## sent_compound                              9.722475e-01       9.623247e-01
## economic_stress_total                      6.014776e-01       4.552771e-01
## domestic_stress_total                      3.514946e-01       3.011544e-01
## suicidality_total                          8.172991e-02       8.087530e-02
## liwc_negative_emotion                      3.747874e-01       2.892956e-01
## liwc_social_processes                      3.697836e-01       2.652958e-01
## tfidf_anxieti                              7.036605e-02       6.596751e-02
## tfidf_depress                              4.358118e-02       2.883429e-02
## tfidf_stress                               9.825543e-01       9.595590e-01
## tfidf_struggl                              7.920854e-01       7.847508e-01
## tfidf_suicid                               8.565089e-01       7.484766e-01
## depression                                 8.960108e-01       8.742542e-01
##                             flesch_kincaid_grade_level flesch_reading_ease
## automated_readability_index                 0.00000000          0.00000000
## coleman_liau_index                          0.00000000          0.00000000
## flesch_kincaid_grade_level                          NA          0.00000000
## flesch_reading_ease                         0.00000000                  NA
## gunning_fog_index                           0.00000000          0.00000000
## lix                                         0.00000000          0.00000000
## smog_index                                  0.00000000          0.00000000
## wiener_sachtextformel                       0.00000000          0.00000000
## n_chars                                     0.74194255          0.49011792
## n_long_words                                0.77424970          0.95156020
## n_monosyllable_words                        0.47162106          0.27568725
## n_polysyllable_words                        0.42517682          0.64766887
## n_sents                                     0.13975513          0.06944347
## n_syllables                                 0.72036213          0.47168410
## n_unique_words                              0.77293854          0.51003153
## n_words                                     0.57797129          0.35620531
## sent_neg                                    0.49343700          0.66479392
## sent_neu                                    0.15000704          0.25431975
## sent_pos                                    0.32609027          0.35226535
## sent_compound                               0.90726844          0.86716781
## economic_stress_total                       0.75174730          0.51483230
## domestic_stress_total                       0.39026363          0.29309826
## suicidality_total                           0.08874325          0.07570246
## liwc_negative_emotion                       0.47686800          0.31564202
## liwc_social_processes                       0.45574731          0.26329725
## tfidf_anxieti                               0.04565238          0.03171056
## tfidf_depress                               0.04298312          0.02273415
## tfidf_stress                                0.98957621          0.99788777
## tfidf_struggl                               0.76268978          0.73436917
## tfidf_suicid                                0.92448595          0.80131094
## depression                                  0.82776268          0.77634058
##                             gunning_fog_index        lix   smog_index
## automated_readability_index      0.000000e+00 0.00000000 1.332268e-15
## coleman_liau_index               7.105427e-15 0.00000000 4.183320e-13
## flesch_kincaid_grade_level       0.000000e+00 0.00000000 0.000000e+00
## flesch_reading_ease              0.000000e+00 0.00000000 0.000000e+00
## gunning_fog_index                          NA 0.00000000 0.000000e+00
## lix                              0.000000e+00         NA 0.000000e+00
## smog_index                       0.000000e+00 0.00000000           NA
## wiener_sachtextformel            0.000000e+00 0.00000000 0.000000e+00
## n_chars                          7.776312e-01 0.65069970 9.712164e-01
## n_long_words                     7.336609e-01 0.85396157 5.502688e-01
## n_monosyllable_words             5.023579e-01 0.39642514 6.702945e-01
## n_polysyllable_words             3.801233e-01 0.48876694 2.452479e-01
## n_sents                          1.536993e-01 0.11154676 2.519051e-01
## n_syllables                      7.582601e-01 0.63049712 9.529135e-01
## n_unique_words                   8.148881e-01 0.67851346 9.886282e-01
## n_words                          6.120416e-01 0.49536276 7.942154e-01
## sent_neg                         4.960698e-01 0.67206897 5.622176e-01
## sent_neu                         1.371492e-01 0.19728699 1.597812e-01
## sent_pos                         2.945990e-01 0.25442312 2.801569e-01
## sent_compound                    8.660115e-01 0.73446436 7.644762e-01
## economic_stress_total            7.917556e-01 0.66587700 9.619967e-01
## domestic_stress_total            3.856291e-01 0.35723655 4.588764e-01
## suicidality_total                7.886929e-02 0.09187896 1.039519e-01
## liwc_negative_emotion            5.080415e-01 0.44493919 7.003868e-01
## liwc_social_processes            4.764753e-01 0.37536439 6.193236e-01
## tfidf_anxieti                    3.106649e-02 0.03500753 1.981247e-02
## tfidf_depress                    3.353242e-02 0.02433057 2.173856e-02
## tfidf_stress                     9.798092e-01 0.97732963 9.718881e-01
## tfidf_struggl                    7.623190e-01 0.73875270 7.247449e-01
## tfidf_suicid                     9.994999e-01 0.83674434 9.924783e-01
## depression                       7.796234e-01 0.78567813 7.280888e-01
##                             wiener_sachtextformel      n_chars n_long_words
## automated_readability_index            0.00000000 6.057953e-01 9.320703e-01
## coleman_liau_index                     0.00000000 4.601637e-01 8.888538e-01
## flesch_kincaid_grade_level             0.00000000 7.419426e-01 7.742497e-01
## flesch_reading_ease                    0.00000000 4.901179e-01 9.515602e-01
## gunning_fog_index                      0.00000000 7.776312e-01 7.336609e-01
## lix                                    0.00000000 6.506997e-01 8.539616e-01
## smog_index                             0.00000000 9.712164e-01 5.502688e-01
## wiener_sachtextformel                          NA 5.178754e-01 9.983245e-01
## n_chars                                0.51787541           NA 0.000000e+00
## n_long_words                           0.99832454 0.000000e+00           NA
## n_monosyllable_words                   0.29523645 0.000000e+00 0.000000e+00
## n_polysyllable_words                   0.59526957 0.000000e+00 0.000000e+00
## n_sents                                0.07605336 0.000000e+00 1.776357e-15
## n_syllables                            0.50061946 0.000000e+00 0.000000e+00
## n_unique_words                         0.54238551 0.000000e+00 0.000000e+00
## n_words                                0.37995055 0.000000e+00 0.000000e+00
## sent_neg                               0.73795435 4.670581e-01 4.839670e-01
## sent_neu                               0.24925380 3.737143e-01 3.168284e-01
## sent_pos                               0.28027035 8.299042e-01 6.926625e-01
## sent_compound                          0.74715523 5.256452e-01 4.503180e-01
## economic_stress_total                  0.54544347 1.873812e-11 3.824918e-11
## domestic_stress_total                  0.29425728 4.362382e-04 6.703720e-04
## suicidality_total                      0.07381648 2.041224e-02 3.461320e-02
## liwc_negative_emotion                  0.35114145 2.220446e-16 4.884981e-15
## liwc_social_processes                  0.27424979 0.000000e+00 0.000000e+00
## tfidf_anxieti                          0.02186157 4.648012e-01 6.798634e-01
## tfidf_depress                          0.01531150 4.801978e-01 7.035406e-01
## tfidf_stress                           0.98449638 9.232910e-01 9.422776e-01
## tfidf_struggl                          0.72173618 9.972040e-01 9.467621e-01
## tfidf_suicid                           0.82868188 5.420804e-01 5.760425e-01
## depression                             0.73270312 6.975457e-01 7.526847e-01
##                             n_monosyllable_words n_polysyllable_words
## automated_readability_index         3.653964e-01         5.716349e-01
## coleman_liau_index                  2.586670e-01         7.407156e-01
## flesch_kincaid_grade_level          4.716211e-01         4.251768e-01
## flesch_reading_ease                 2.756872e-01         6.476689e-01
## gunning_fog_index                   5.023579e-01         3.801233e-01
## lix                                 3.964251e-01         4.887669e-01
## smog_index                          6.702945e-01         2.452479e-01
## wiener_sachtextformel               2.952364e-01         5.952696e-01
## n_chars                             0.000000e+00         0.000000e+00
## n_long_words                        0.000000e+00         0.000000e+00
## n_monosyllable_words                          NA         0.000000e+00
## n_polysyllable_words                0.000000e+00                   NA
## n_sents                             0.000000e+00         2.686296e-12
## n_syllables                         0.000000e+00         0.000000e+00
## n_unique_words                      0.000000e+00         0.000000e+00
## n_words                             0.000000e+00         0.000000e+00
## sent_neg                            4.799694e-01         4.534731e-01
## sent_neu                            4.187577e-01         2.671544e-01
## sent_pos                            8.912713e-01         6.378036e-01
## sent_compound                       5.503648e-01         4.519299e-01
## economic_stress_total               2.456124e-11         2.232314e-10
## domestic_stress_total               3.716096e-04         1.450014e-03
## suicidality_total                   1.451121e-02         6.477816e-02
## liwc_negative_emotion               2.220446e-16         7.212009e-13
## liwc_social_processes               0.000000e+00         8.837375e-14
## tfidf_anxieti                       3.672747e-01         9.220619e-01
## tfidf_depress                       3.754384e-01         9.160204e-01
## tfidf_stress                        9.205907e-01         9.459356e-01
## tfidf_struggl                       9.667083e-01         9.123858e-01
## tfidf_suicid                        5.353653e-01         5.585845e-01
## depression                          6.818291e-01         7.950523e-01
##                                  n_sents  n_syllables n_unique_words
## automated_readability_index 9.944127e-02 5.830774e-01   6.269845e-01
## coleman_liau_index          6.809997e-02 4.392609e-01   4.717318e-01
## flesch_kincaid_grade_level  1.397551e-01 7.203621e-01   7.729385e-01
## flesch_reading_ease         6.944347e-02 4.716841e-01   5.100315e-01
## gunning_fog_index           1.536993e-01 7.582601e-01   8.148881e-01
## lix                         1.115468e-01 6.304971e-01   6.785135e-01
## smog_index                  2.519051e-01 9.529135e-01   9.886282e-01
## wiener_sachtextformel       7.605336e-02 5.006195e-01   5.423855e-01
## n_chars                     0.000000e+00 0.000000e+00   0.000000e+00
## n_long_words                1.776357e-15 0.000000e+00   0.000000e+00
## n_monosyllable_words        0.000000e+00 0.000000e+00   0.000000e+00
## n_polysyllable_words        2.686296e-12 0.000000e+00   0.000000e+00
## n_sents                               NA 0.000000e+00   0.000000e+00
## n_syllables                 0.000000e+00           NA   0.000000e+00
## n_unique_words              0.000000e+00 0.000000e+00             NA
## n_words                     0.000000e+00 0.000000e+00   0.000000e+00
## sent_neg                    6.261142e-01 4.717390e-01   4.340151e-01
## sent_neu                    6.228413e-01 3.773635e-01   3.261871e-01
## sent_pos                    9.935118e-01 8.290220e-01   7.926927e-01
## sent_compound               5.235035e-01 5.219808e-01   5.214822e-01
## economic_stress_total       2.399796e-10 1.633094e-11   1.760325e-11
## domestic_stress_total       3.068296e-04 4.371898e-04   4.807466e-04
## suicidality_total           6.873520e-03 1.972957e-02   2.138639e-02
## liwc_negative_emotion       4.440892e-16 2.220446e-16   4.440892e-16
## liwc_social_processes       0.000000e+00 0.000000e+00   0.000000e+00
## tfidf_anxieti               2.727778e-01 4.656310e-01   4.893082e-01
## tfidf_depress               2.933061e-01 4.732664e-01   4.723161e-01
## tfidf_stress                9.206043e-01 9.261009e-01   9.487412e-01
## tfidf_struggl               9.453246e-01 9.970344e-01   9.764893e-01
## tfidf_suicid                5.804122e-01 5.420359e-01   5.392253e-01
## depression                  7.018263e-01 7.030367e-01   7.141932e-01
##                                  n_words     sent_neg     sent_neu     sent_pos
## automated_readability_index 4.568768e-01 5.070661e-01 1.811322e-01 3.794668e-01
## coleman_liau_index          3.328308e-01 6.160953e-01 2.660677e-01 4.221422e-01
## flesch_kincaid_grade_level  5.779713e-01 4.934370e-01 1.500070e-01 3.260903e-01
## flesch_reading_ease         3.562053e-01 6.647939e-01 2.543197e-01 3.522653e-01
## gunning_fog_index           6.120416e-01 4.960698e-01 1.371492e-01 2.945990e-01
## lix                         4.953628e-01 6.720690e-01 1.972870e-01 2.544231e-01
## smog_index                  7.942154e-01 5.622176e-01 1.597812e-01 2.801569e-01
## wiener_sachtextformel       3.799505e-01 7.379543e-01 2.492538e-01 2.802704e-01
## n_chars                     0.000000e+00 4.670581e-01 3.737143e-01 8.299042e-01
## n_long_words                0.000000e+00 4.839670e-01 3.168284e-01 6.926625e-01
## n_monosyllable_words        0.000000e+00 4.799694e-01 4.187577e-01 8.912713e-01
## n_polysyllable_words        0.000000e+00 4.534731e-01 2.671544e-01 6.378036e-01
## n_sents                     0.000000e+00 6.261142e-01 6.228413e-01 9.935118e-01
## n_syllables                 0.000000e+00 4.717390e-01 3.773635e-01 8.290220e-01
## n_unique_words              0.000000e+00 4.340151e-01 3.261871e-01 7.926927e-01
## n_words                               NA 4.778754e-01 4.005844e-01 8.618659e-01
## sent_neg                    4.778754e-01           NA 6.364724e-06 3.848485e-02
## sent_neu                    4.005844e-01 6.364724e-06           NA 3.380610e-02
## sent_pos                    8.618659e-01 3.848485e-02 3.380610e-02           NA
## sent_compound               5.353821e-01 7.296827e-08 2.802572e-01 1.325466e-07
## economic_stress_total       1.900347e-11 4.464696e-01 2.512855e-01 6.143736e-01
## domestic_stress_total       3.932909e-04 8.715747e-01 8.404439e-01 6.306576e-01
## suicidality_total           1.651795e-02 5.461292e-02 1.199282e-01 6.597726e-01
## liwc_negative_emotion       2.220446e-16 4.489143e-01 8.030122e-01 5.050429e-01
## liwc_social_processes       0.000000e+00 3.243131e-01 3.759211e-01 9.003675e-01
## tfidf_anxieti               4.096394e-01 7.757627e-01 4.969533e-01 1.965133e-01
## tfidf_depress               4.177359e-01 4.198586e-02 3.241788e-01 1.886379e-01
## tfidf_stress                9.231973e-01 6.628097e-01 9.971288e-01 5.678112e-01
## tfidf_struggl               9.810562e-01 5.540616e-01 8.412748e-01 6.066019e-01
## tfidf_suicid                5.392274e-01 4.901380e-02 1.621681e-01 4.868837e-01
## depression                  6.914956e-01 1.226305e-01 8.319159e-01 7.804112e-02
##                             sent_compound economic_stress_total
## automated_readability_index  9.722475e-01          6.014776e-01
## coleman_liau_index           9.623247e-01          4.552771e-01
## flesch_kincaid_grade_level   9.072684e-01          7.517473e-01
## flesch_reading_ease          8.671678e-01          5.148323e-01
## gunning_fog_index            8.660115e-01          7.917556e-01
## lix                          7.344644e-01          6.658770e-01
## smog_index                   7.644762e-01          9.619967e-01
## wiener_sachtextformel        7.471552e-01          5.454435e-01
## n_chars                      5.256452e-01          1.873812e-11
## n_long_words                 4.503180e-01          3.824918e-11
## n_monosyllable_words         5.503648e-01          2.456124e-11
## n_polysyllable_words         4.519299e-01          2.232314e-10
## n_sents                      5.235035e-01          2.399796e-10
## n_syllables                  5.219808e-01          1.633094e-11
## n_unique_words               5.214822e-01          1.760325e-11
## n_words                      5.353821e-01          1.900347e-11
## sent_neg                     7.296827e-08          4.464696e-01
## sent_neu                     2.802572e-01          2.512855e-01
## sent_pos                     1.325466e-07          6.143736e-01
## sent_compound                          NA          4.941309e-01
## economic_stress_total        4.941309e-01                    NA
## domestic_stress_total        3.085442e-01          2.591706e-03
## suicidality_total            4.173683e-02          6.731178e-02
## liwc_negative_emotion        6.280660e-02          1.167073e-08
## liwc_social_processes        8.224520e-01          9.827357e-10
## tfidf_anxieti                4.147720e-01          5.005133e-01
## tfidf_depress                5.496251e-02          4.059177e-01
## tfidf_stress                 4.692288e-01          8.931364e-01
## tfidf_struggl                4.606193e-01          9.977076e-01
## tfidf_suicid                 9.644965e-02          4.939978e-01
## depression                   3.934575e-02          8.870606e-01
##                             domestic_stress_total suicidality_total
## automated_readability_index          0.3514946094      0.0817299063
## coleman_liau_index                   0.3011543562      0.0808752999
## flesch_kincaid_grade_level           0.3902636257      0.0887432455
## flesch_reading_ease                  0.2930982581      0.0757024552
## gunning_fog_index                    0.3856291285      0.0788692866
## lix                                  0.3572365511      0.0918789554
## smog_index                           0.4588764368      0.1039518624
## wiener_sachtextformel                0.2942572776      0.0738164778
## n_chars                              0.0004362382      0.0204122380
## n_long_words                         0.0006703720      0.0346131980
## n_monosyllable_words                 0.0003716096      0.0145112068
## n_polysyllable_words                 0.0014500144      0.0647781632
## n_sents                              0.0003068296      0.0068735203
## n_syllables                          0.0004371898      0.0197295709
## n_unique_words                       0.0004807466      0.0213863859
## n_words                              0.0003932909      0.0165179528
## sent_neg                             0.8715746864      0.0546129219
## sent_neu                             0.8404439443      0.1199281505
## sent_pos                             0.6306576315      0.6597726253
## sent_compound                        0.3085442442      0.0417368331
## economic_stress_total                0.0025917058      0.0673117800
## domestic_stress_total                          NA      0.0971659710
## suicidality_total                    0.0971659710                NA
## liwc_negative_emotion                0.0002394507      0.0030391705
## liwc_social_processes                0.0002552831      0.0333074835
## tfidf_anxieti                        0.3295988485      0.0911691627
## tfidf_depress                        0.5644523592      0.5394400966
## tfidf_stress                         0.7061109316      0.6690307430
## tfidf_struggl                        0.9549155419      0.8409315412
## tfidf_suicid                         0.7214142328      0.0004583266
## depression                           0.8298024168      0.6294643831
##                             liwc_negative_emotion liwc_social_processes
## automated_readability_index          3.747874e-01          3.697836e-01
## coleman_liau_index                   2.892956e-01          2.652958e-01
## flesch_kincaid_grade_level           4.768680e-01          4.557473e-01
## flesch_reading_ease                  3.156420e-01          2.632972e-01
## gunning_fog_index                    5.080415e-01          4.764753e-01
## lix                                  4.449392e-01          3.753644e-01
## smog_index                           7.003868e-01          6.193236e-01
## wiener_sachtextformel                3.511414e-01          2.742498e-01
## n_chars                              2.220446e-16          0.000000e+00
## n_long_words                         4.884981e-15          0.000000e+00
## n_monosyllable_words                 2.220446e-16          0.000000e+00
## n_polysyllable_words                 7.212009e-13          8.837375e-14
## n_sents                              4.440892e-16          0.000000e+00
## n_syllables                          2.220446e-16          0.000000e+00
## n_unique_words                       4.440892e-16          0.000000e+00
## n_words                              2.220446e-16          0.000000e+00
## sent_neg                             4.489143e-01          3.243131e-01
## sent_neu                             8.030122e-01          3.759211e-01
## sent_pos                             5.050429e-01          9.003675e-01
## sent_compound                        6.280660e-02          8.224520e-01
## economic_stress_total                1.167073e-08          9.827357e-10
## domestic_stress_total                2.394507e-04          2.552831e-04
## suicidality_total                    3.039170e-03          3.330748e-02
## liwc_negative_emotion                          NA          4.587442e-13
## liwc_social_processes                4.587442e-13                    NA
## tfidf_anxieti                        4.332694e-01          2.922483e-01
## tfidf_depress                        9.646009e-01          3.278174e-01
## tfidf_stress                         9.209923e-01          8.014656e-01
## tfidf_struggl                        7.962881e-01          8.929576e-01
## tfidf_suicid                         8.346998e-01          4.475223e-01
## depression                           9.640749e-01          4.728883e-01
##                             tfidf_anxieti tfidf_depress tfidf_stress
## automated_readability_index    0.07036605    0.04358118    0.9825543
## coleman_liau_index             0.06596751    0.02883429    0.9595590
## flesch_kincaid_grade_level     0.04565238    0.04298312    0.9895762
## flesch_reading_ease            0.03171056    0.02273415    0.9978878
## gunning_fog_index              0.03106649    0.03353242    0.9798092
## lix                            0.03500753    0.02433057    0.9773296
## smog_index                     0.01981247    0.02173856    0.9718881
## wiener_sachtextformel          0.02186157    0.01531150    0.9844964
## n_chars                        0.46480123    0.48019777    0.9232910
## n_long_words                   0.67986343    0.70354061    0.9422776
## n_monosyllable_words           0.36727470    0.37543836    0.9205907
## n_polysyllable_words           0.92206191    0.91602039    0.9459356
## n_sents                        0.27277781    0.29330611    0.9206043
## n_syllables                    0.46563104    0.47326636    0.9261009
## n_unique_words                 0.48930824    0.47231615    0.9487412
## n_words                        0.40963941    0.41773592    0.9231973
## sent_neg                       0.77576272    0.04198586    0.6628097
## sent_neu                       0.49695334    0.32417878    0.9971288
## sent_pos                       0.19651325    0.18863790    0.5678112
## sent_compound                  0.41477196    0.05496251    0.4692288
## economic_stress_total          0.50051334    0.40591767    0.8931364
## domestic_stress_total          0.32959885    0.56445236    0.7061109
## suicidality_total              0.09116916    0.53944010    0.6690307
## liwc_negative_emotion          0.43326938    0.96460090    0.9209923
## liwc_social_processes          0.29224831    0.32781736    0.8014656
## tfidf_anxieti                          NA    0.68358016    0.7348288
## tfidf_depress                  0.68358016            NA    0.7223105
## tfidf_stress                   0.73482884    0.72231047           NA
## tfidf_struggl                  0.82685321    0.68388544    0.9818759
## tfidf_suicid                   0.39481125    0.74800788    0.6149600
## depression                     0.46462764    0.20073852    0.9666620
##                             tfidf_struggl tfidf_suicid depression
## automated_readability_index     0.7920854 0.8565088953 0.89601080
## coleman_liau_index              0.7847508 0.7484766368 0.87425423
## flesch_kincaid_grade_level      0.7626898 0.9244859455 0.82776268
## flesch_reading_ease             0.7343692 0.8013109399 0.77634058
## gunning_fog_index               0.7623190 0.9994999446 0.77962342
## lix                             0.7387527 0.8367443390 0.78567813
## smog_index                      0.7247449 0.9924783344 0.72808881
## wiener_sachtextformel           0.7217362 0.8286818777 0.73270312
## n_chars                         0.9972040 0.5420804086 0.69754567
## n_long_words                    0.9467621 0.5760425127 0.75268467
## n_monosyllable_words            0.9667083 0.5353653224 0.68182912
## n_polysyllable_words            0.9123858 0.5585845161 0.79505229
## n_sents                         0.9453246 0.5804121707 0.70182626
## n_syllables                     0.9970344 0.5420358771 0.70303671
## n_unique_words                  0.9764893 0.5392252554 0.71419322
## n_words                         0.9810562 0.5392273711 0.69149564
## sent_neg                        0.5540616 0.0490137997 0.12263049
## sent_neu                        0.8412748 0.1621680749 0.83191590
## sent_pos                        0.6066019 0.4868836832 0.07804112
## sent_compound                   0.4606193 0.0964496535 0.03934575
## economic_stress_total           0.9977076 0.4939978468 0.88706060
## domestic_stress_total           0.9549155 0.7214142328 0.82980242
## suicidality_total               0.8409315 0.0004583266 0.62946438
## liwc_negative_emotion           0.7962881 0.8346998262 0.96407490
## liwc_social_processes           0.8929576 0.4475223281 0.47288833
## tfidf_anxieti                   0.8268532 0.3948112537 0.46462764
## tfidf_depress                   0.6838854 0.7480078839 0.20073852
## tfidf_stress                    0.9818759 0.6149600290 0.96666202
## tfidf_struggl                          NA 0.7654704655 0.93996498
## tfidf_suicid                    0.7654705           NA 0.54949892
## depression                      0.9399650 0.5494989219         NA
# Insignificant correlations are leaved blank
corrplot(res2$r, type="upper", order="hclust",p.mat = res2$P, sig.level = 0.01, insig = "blank")

df = data.frame(datamatrix)

#install.packages("MASS")
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(caret)

preProcValues <- preProcess(socailmedia_2019_features_clean, method = c("center", "scale"))
trainIndex <- createDataPartition(y = socailmedia_2019_features_clean$subreddit, p = .8, 
                                  list = FALSE, 
                                  times = 1)
train.data <- socailmedia_2019_features_clean[ trainIndex,] 
test.data  <- socailmedia_2019_features_clean[-trainIndex,]

SM_lr= glm(subreddit ~ sent_neg + sent_pos + sent_compound + economic_stress_total  + suicidality_total  + tfidf_anxieti + tfidf_stress + tfidf_suicid, data = train.data, family=binomial)

summary(SM_lr)
## 
## Call:
## glm(formula = subreddit ~ sent_neg + sent_pos + sent_compound + 
##     economic_stress_total + suicidality_total + tfidf_anxieti + 
##     tfidf_stress + tfidf_suicid, family = binomial, data = train.data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.2890   0.1894   0.2953   0.3753   3.5921  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             1.49292    0.16327   9.144  < 2e-16 ***
## sent_neg                5.75172    0.92499   6.218 5.03e-10 ***
## sent_pos                4.00313    1.03621   3.863 0.000112 ***
## sent_compound           0.22058    0.11561   1.908 0.056390 .  
## economic_stress_total   0.06474    0.03637   1.780 0.075073 .  
## suicidality_total       0.75749    0.15315   4.946 7.57e-07 ***
## tfidf_anxieti         -15.74648    0.57834 -27.227  < 2e-16 ***
## tfidf_stress           -5.23959    0.79392  -6.600 4.12e-11 ***
## tfidf_suicid            4.62944    1.64446   2.815 0.004875 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4423.4  on 6193  degrees of freedom
## Residual deviance: 2885.4  on 6185  degrees of freedom
## AIC: 2903.4
## 
## Number of Fisher Scoring iterations: 7
vif(SM_lr)
##              sent_neg              sent_pos         sent_compound 
##              1.968267              1.758100              2.896438 
## economic_stress_total     suicidality_total         tfidf_anxieti 
##              1.032235              1.088967              1.061492 
##          tfidf_stress          tfidf_suicid 
##              1.019619              1.068945
SM_lr= glm(subreddit ~ sent_neg + sent_pos + sent_compound, data = train.data, family=binomial)
summary(SM_lr)
## 
## Call:
## glm(formula = subreddit ~ sent_neg + sent_pos + sent_compound, 
##     family = binomial, data = train.data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7266   0.3953   0.4672   0.5316   0.8059  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    0.95787    0.12773   7.499 6.43e-14 ***
## sent_neg       3.32452    0.65465   5.078 3.81e-07 ***
## sent_pos       5.05652    0.80735   6.263 3.77e-10 ***
## sent_compound  0.12666    0.08878   1.427    0.154    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4423.4  on 6193  degrees of freedom
## Residual deviance: 4329.1  on 6190  degrees of freedom
## AIC: 4337.1
## 
## Number of Fisher Scoring iterations: 5
vif(SM_lr)
##      sent_neg      sent_pos sent_compound 
##      1.730900      1.644989      2.550332

The social media dataset is found to be not suitable for factor analysis. Hence, instead of the Factor analysis, Logistic Regression model was built. The model showed statistical significance with predictor variables. The variable sent_comp has a p-value of 0.025 which is less than 0.05 and is statistically significant.The VIF value for sent_comp is 2.8 which is still acceptable for multi-colinearity.