library(readr)
library(psych)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ dplyr 1.0.5
## ✓ tibble 3.0.6 ✓ stringr 1.4.0
## ✓ tidyr 1.1.3 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x ggplot2::%+%() masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#install.packages("Hmisc")
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:base':
##
## format.pval, units
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:psych':
##
## logit
library(ggplot2)
library(readxl)
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
## The following object is masked from 'package:purrr':
##
## lift
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#install.packages("LogicReg")
library(LogicReg)
#install.packages("tidyverse")
library(tidyverse)
socailmedia_2019_features <- read.csv(file = '/Users/pallavisaitu/Downloads/socailmedia_2019_features.csv', sep=",")
# View(socailmedia_2019_features)
dim(socailmedia_2019_features)
## [1] 7742 36
head(socailmedia_2019_features)
## subreddit author date
## 1 depression anonaccount131 1/1/19
## 2 depression gimlis_beard 1/1/19
## 3 depression WreckDotNet 1/1/19
## 4 depression danieltargaryean 1/1/19
## 5 depression emmanuel169 1/1/19
## 6 depression Lunakinn 1/1/19
## post
## 1 Anyone else feel like they're the stupidest, most inept sack of shit in the world I used to actually be smart and able back in high school, now I can't do fucking anything. I look at a piece of work/theory and don't even understand it, thinking where tf do I start
## 2 Craving validation from others while immediately rejecting anything positive that other people say about me is a special kind of hell I have no confidence in myself, especially about my physical appearance, so i often look to others for things I can be positive about. However, I imeadiately shoot down any complement with statements like, "It isn't actually all that impressive," or, "they are only saying that to placate me." It feels like I'm just not allowed to feel happy about anything I do.
## 3 Calling the distress line while living at home? How? How can I? \nI can't really afford therapy or anything so this is like my last resort. But I live at home, and I can't go out in public to do so.\n\n&#x200B;
## 4 Only been here less than a month but I'm ready to unsubscribe Starting this year, I want time to focus on improving myself. I don't want to be depressed 24/7 anymore. I'm done with people treating me like shit. I'm still my own worst enemy but I'm hoping that will change this year. I already delete social media from my phone, save reddit and messenger, and I hope that this will improve my life somehow. I know everyone here seems to have lost hope and I have too but I don't know, I just hope someone will read this and decide to try and change with me. 2018 was fucking trash and honestly do i expect 2019 to be different? No, but it's not going to be because I didn't try. I wish all of you the best in 2019 and I hope we all live to see 2020. \n\nP.S. If you feel suicidal, a pet might help. I have had suicidal thoughts but I couldn't go through with it knowing I not only leave my parents but my best friend as well. Try looking into it guys. Have a great 2019.
## 5 Anyone just want someone to talk to? I\x89Ûªm stuck in the desert because I was forced to join the military and am now stuck with 1000\x89Ûªs who live so loosely and find joy in anything and then there is me. So any type of conversation would be nice. Venting ranting or just light conversation. I can listen and talk. Anyone in need of help I can offer only an ear but I am here.
## 6 How do you know you're on the right medication? I hated my last medication, it seemed to make everything worse and my doctor wouldnt listen to me. When I said it was wrong, he just doubled the dose. So for months I endured it and waited for my anxiety to calm down then ended those attrocious pills with my doctor's approval. \n\nI dont suffer from anxiety anymore, and my mood is much better, but I'm not sure I'm out of this place yet, how do you know?
## automated_readability_index coleman_liau_index flesch_kincaid_grade_level
## 1 10.2180000 5.4667287 9.938636
## 2 9.1317442 9.8955198 9.778465
## 3 -1.1598701 0.8734051 1.879610
## 4 0.8299729 3.0467947 2.356266
## 5 1.3543421 3.5983855 2.865000
## 6 4.2483871 4.6717075 5.553925
## flesch_reading_ease gulpease_index gunning_fog_index lix smog_index
## 1 72.78795 61.90909 13.909091 38.40909 11.208143
## 2 55.59095 59.81395 13.391628 41.61860 12.688353
## 3 92.78227 100.36364 4.332468 17.64935 6.182691
## 4 94.44219 84.27363 5.445590 20.57915 6.782985
## 5 91.44250 82.28947 5.378947 21.34211 6.627428
## 6 82.85089 70.29032 8.780645 26.25269 8.841846
## wiener_sachtextformel n_chars n_long_words n_monosyllable_words
## 1 3.810272727 209 6 46
## 2 6.188630698 401 21 56
## 3 0.002562338 160 5 34
## 4 0.302550249 755 23 166
## 5 0.345505263 291 9 62
## 6 1.722513978 354 10 75
## n_polysyllable_words n_sents n_syllables n_unique_words n_words sent_neg
## 1 4 2 69 45 55 0.129
## 2 14 5 136 67 86 0.121
## 3 2 7 56 35 44 0.090
## 4 9 22 245 117 201 0.157
## 5 3 8 95 55 76 0.066
## 6 6 6 119 65 93 0.132
## sent_neu sent_pos sent_compound economic_stress_total domestic_stress_total
## 1 0.775 0.096 -0.4215 0 0
## 2 0.663 0.217 0.8658 2 0
## 3 0.847 0.063 -0.2906 1 0
## 4 0.559 0.284 0.9846 2 0
## 5 0.817 0.117 0.4404 0 0
## 6 0.787 0.082 -0.5615 0 0
## suicidality_total liwc_negative_emotion liwc_social_processes tfidf_anxieti
## 1 0 2 0 0.0000000
## 2 0 2 3 0.0000000
## 3 0 1 2 0.0000000
## 4 0 5 9 0.0000000
## 5 0 0 8 0.0000000
## 6 0 3 5 0.2973842
## tfidf_depress tfidf_stress tfidf_struggl tfidf_suicid depression
## 1 0.00000000 0 0 0.0000000 1
## 2 0.00000000 0 0 0.0000000 1
## 3 0.00000000 0 0 0.0000000 1
## 4 0.09623711 0 0 0.2444397 1
## 5 0.00000000 0 0 0.0000000 1
## 6 0.00000000 0 0 0.0000000 1
#summary(socailmedia_2019_features)
#str(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features))
## [1] 0
#sum(is.na(socailmedia_2019_features))
sum(is.na(socailmedia_2019_features))
## [1] 0
# Omit NAs
socailmedia_2019_features_clean<- na.omit(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features_clean))
## [1] 0
str(socailmedia_2019_features_clean)
## 'data.frame': 7742 obs. of 36 variables:
## $ subreddit : Factor w/ 3 levels "anxiety","depression",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ author : Factor w/ 7638 levels "__dark__throw__away_",..: 496 2582 7414 1628 2125 4128 6517 3129 2718 5500 ...
## $ date : Factor w/ 23 levels "1/1/19","1/10/19",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ post : Factor w/ 7739 levels "_\xd9\xf7\xd3 Scared of seeking help with psych about what I am feeling right know. ",..: 564 1033 850 6015 656 2455 3052 3826 5060 94 ...
## $ automated_readability_index: num 10.22 9.13 -1.16 0.83 1.35 ...
## $ coleman_liau_index : num 5.467 9.896 0.873 3.047 3.598 ...
## $ flesch_kincaid_grade_level : num 9.94 9.78 1.88 2.36 2.87 ...
## $ flesch_reading_ease : num 72.8 55.6 92.8 94.4 91.4 ...
## $ gulpease_index : num 61.9 59.8 100.4 84.3 82.3 ...
## $ gunning_fog_index : num 13.91 13.39 4.33 5.45 5.38 ...
## $ lix : num 38.4 41.6 17.6 20.6 21.3 ...
## $ smog_index : num 11.21 12.69 6.18 6.78 6.63 ...
## $ wiener_sachtextformel : num 3.81027 6.18863 0.00256 0.30255 0.34551 ...
## $ n_chars : int 209 401 160 755 291 354 383 420 110 221 ...
## $ n_long_words : int 6 21 5 23 9 10 8 15 3 5 ...
## $ n_monosyllable_words : int 46 56 34 166 62 75 96 82 24 57 ...
## $ n_polysyllable_words : int 4 14 2 9 3 6 3 6 1 1 ...
## $ n_sents : int 2 5 7 22 8 6 13 9 3 7 ...
## $ n_syllables : int 69 136 56 245 95 119 131 133 37 78 ...
## $ n_unique_words : int 45 67 35 117 55 65 66 63 27 42 ...
## $ n_words : int 55 86 44 201 76 93 111 104 30 67 ...
## $ sent_neg : num 0.129 0.121 0.09 0.157 0.066 0.132 0.133 0.102 0.104 0.034 ...
## $ sent_neu : num 0.775 0.663 0.847 0.559 0.817 0.787 0.704 0.762 0.592 0.634 ...
## $ sent_pos : num 0.096 0.217 0.063 0.284 0.117 0.082 0.164 0.136 0.305 0.331 ...
## $ sent_compound : num -0.421 0.866 -0.291 0.985 0.44 ...
## $ economic_stress_total : int 0 2 1 2 0 0 0 0 0 0 ...
## $ domestic_stress_total : int 0 0 0 0 0 0 0 0 0 0 ...
## $ suicidality_total : int 0 0 0 0 0 0 0 0 0 0 ...
## $ liwc_negative_emotion : int 2 2 1 5 0 3 3 2 1 1 ...
## $ liwc_social_processes : int 0 3 2 9 8 5 2 3 1 3 ...
## $ tfidf_anxieti : num 0 0 0 0 0 ...
## $ tfidf_depress : num 0 0 0 0.0962 0 ...
## $ tfidf_stress : num 0 0 0 0 0 0 0 0 0 0 ...
## $ tfidf_struggl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ tfidf_suicid : num 0 0 0 0.244 0 ...
## $ depression : int 1 1 1 1 1 1 1 1 1 1 ...
# Remove the reponse character variable and include only numeric variables
library(dplyr)
dataset = select_if(socailmedia_2019_features_clean, is.numeric)
dataset <- select(dataset, -c(5))
#install.packages("corrplot")
library(corrplot)
## corrplot 0.84 loaded
datamatrix <- cor(dataset)
corrplot(datamatrix, order="hclust", type="upper", tl.srt = 45)
res2 <- rcorr(as.matrix(datamatrix), type="pearson")
res2$r
## automated_readability_index coleman_liau_index
## automated_readability_index 1.000000000 0.98931973
## coleman_liau_index 0.989319730 1.00000000
## flesch_kincaid_grade_level 0.990699285 0.96577749
## flesch_reading_ease -0.990248998 -0.98341065
## gunning_fog_index 0.970648650 0.93795782
## lix 0.981155036 0.95858900
## smog_index 0.945396439 0.91707082
## wiener_sachtextformel 0.974377306 0.95889014
## n_chars -0.096438060 -0.13767645
## n_long_words 0.015964726 -0.02617195
## n_monosyllable_words -0.168319022 -0.20921511
## n_polysyllable_words 0.105649466 0.06191993
## n_sents -0.301370955 -0.33194202
## n_syllables -0.102537721 -0.14410887
## n_unique_words -0.090835791 -0.13418334
## n_words -0.138677394 -0.17990308
## sent_neg -0.123779577 -0.09370482
## sent_neu 0.246587047 0.20606541
## sent_pos -0.163504343 -0.14950172
## sent_compound -0.006516122 -0.00884736
## economic_stress_total -0.097589640 -0.13916592
## domestic_stress_total -0.173184889 -0.19185685
## suicidality_total -0.317545083 -0.31839093
## liwc_negative_emotion -0.165093749 -0.19653448
## liwc_social_processes -0.166806263 -0.20639122
## tfidf_anxieti 0.329402291 0.33439282
## tfidf_depress 0.364851125 0.39278186
## tfidf_stress -0.004095684 -0.00949735
## tfidf_struggl 0.049343511 0.05112505
## tfidf_suicid 0.033858340 0.06000411
## depression 0.024477226 0.02963546
## flesch_kincaid_grade_level flesch_reading_ease
## automated_readability_index 0.990699285 -0.9902489976
## coleman_liau_index 0.965777485 -0.9834106476
## flesch_kincaid_grade_level 1.000000000 -0.9907684425
## flesch_reading_ease -0.990768442 1.0000000000
## gunning_fog_index 0.993382206 -0.9805146544
## lix 0.993027284 -0.9888181471
## smog_index 0.975595153 -0.9680266556
## wiener_sachtextformel 0.987812384 -0.9928522663
## n_chars -0.061616650 0.1287219936
## n_long_words 0.053683437 0.0113778283
## n_monosyllable_words -0.134216558 0.2020556016
## n_polysyllable_words 0.148537074 -0.0854409170
## n_sents -0.271369447 0.3304284917
## n_syllables -0.066974171 0.1341976448
## n_unique_words -0.054003534 0.1229233636
## n_words -0.103922908 0.1715234100
## sent_neg -0.127747375 0.0810251886
## sent_neu 0.264783221 -0.2110928963
## sent_pos -0.182385705 0.1729121387
## sent_compound -0.021815296 0.0313199966
## economic_stress_total -0.059198538 0.1215424245
## domestic_stress_total -0.159880040 0.1950214823
## suicidality_total -0.310848132 0.3236630855
## liwc_negative_emotion -0.132646789 0.1862972368
## liwc_social_processes -0.139022245 0.2072377149
## tfidf_anxieti 0.361568191 -0.3865460593
## tfidf_depress 0.365822243 -0.4079030312
## tfidf_stress 0.002447042 0.0004958451
## tfidf_struggl 0.056510840 -0.0634911380
## tfidf_suicid 0.017752008 -0.0471086573
## depression 0.040734854 -0.0531732905
## gunning_fog_index lix smog_index
## automated_readability_index 0.9706486502 0.981155036 0.945396439
## coleman_liau_index 0.9379578241 0.958589005 0.917070824
## flesch_kincaid_grade_level 0.9933822064 0.993027284 0.975595153
## flesch_reading_ease -0.9805146544 -0.988818147 -0.968026656
## gunning_fog_index 1.0000000000 0.989929241 0.991276217
## lix 0.9899292406 1.000000000 0.976059397
## smog_index 0.9912762169 0.976059397 1.000000000
## wiener_sachtextformel 0.9892592949 0.995136381 0.983496595
## n_chars -0.0528585935 -0.084656172 -0.006758319
## n_long_words 0.0636667410 0.034465823 0.111536431
## n_monosyllable_words -0.1251441489 -0.157837596 -0.079616041
## n_polysyllable_words 0.1632822830 0.129119659 0.215080821
## n_sents -0.2624910279 -0.291534495 -0.212145059
## n_syllables -0.0575975087 -0.089914669 -0.011059597
## n_unique_words -0.0438310778 -0.077518414 0.002669595
## n_words -0.0947782211 -0.127183417 -0.048826950
## sent_neg -0.1269766395 -0.079162357 -0.108231551
## sent_neu 0.2730993379 0.238004782 0.258800212
## sent_pos -0.1944278027 -0.211048000 -0.200223232
## sent_compound -0.0315950859 -0.063467539 -0.056073112
## economic_stress_total -0.0494235209 -0.080747378 -0.008924442
## domestic_stress_total -0.1614285589 -0.171161445 -0.138067999
## suicidality_total -0.3204037182 -0.307984203 -0.297605722
## liwc_negative_emotion -0.1234976707 -0.142345475 -0.071980024
## liwc_social_processes -0.1327639714 -0.164897157 -0.092852086
## tfidf_anxieti 0.3879011933 0.379939652 0.416365204
## tfidf_depress 0.3828302473 0.403651741 0.410680302
## tfidf_stress 0.0047402573 0.005322529 0.006600530
## tfidf_struggl 0.0566017348 0.062405444 0.065882030
## tfidf_suicid 0.0001173876 0.038581125 0.001765730
## depression 0.0523730794 0.050899572 0.065050182
## wiener_sachtextformel n_chars n_long_words
## automated_readability_index 0.9743773060 -0.0964380596 0.0159647259
## coleman_liau_index 0.9588901384 -0.1376764541 -0.0261719508
## flesch_kincaid_grade_level 0.9878123842 -0.0616166504 0.0536834367
## flesch_reading_ease -0.9928522663 0.1287219936 0.0113778283
## gunning_fog_index 0.9892592949 -0.0528585935 0.0636667410
## lix 0.9951363809 -0.0846561724 0.0344658234
## smog_index 0.9834965945 -0.0067583185 0.1115364310
## wiener_sachtextformel 1.0000000000 -0.1206703566 -0.0003933138
## n_chars -0.1206703566 1.0000000000 0.9919137489
## n_long_words -0.0003933138 0.9919137489 1.0000000000
## n_monosyllable_words -0.1941762297 0.9970761977 0.9795372562
## n_polysyllable_words 0.0992516351 0.9725141736 0.9932151702
## n_sents -0.3232967958 0.9761132879 0.9440341629
## n_syllables -0.1256496056 0.9999614991 0.9913566626
## n_unique_words -0.1137351474 0.9993298246 0.9920376325
## n_words -0.1633406943 0.9990172930 0.9855204853
## sent_neg -0.0626030286 -0.1355891156 -0.1305371118
## sent_neu 0.2133080151 0.1654598939 0.1858490804
## sent_pos -0.2001769879 -0.0402208793 -0.0739287595
## sent_compound -0.0603298749 -0.1184550634 -0.1406862884
## economic_stress_total -0.1128804795 0.8909186571 0.8851110989
## domestic_stress_total -0.1945628232 0.5932068798 0.5774570941
## suicidality_total -0.3256545300 0.4145478121 0.3807025748
## liwc_negative_emotion -0.1733099855 0.9506950770 0.9395590412
## liwc_social_processes -0.2026489662 0.9779251322 0.9560647371
## tfidf_anxieti 0.4103315704 -0.1362706055 -0.0771747683
## tfidf_depress 0.4316887453 -0.1316552404 -0.0711865040
## tfidf_stress 0.0036396798 -0.0180337439 -0.0135615012
## tfidf_struggl 0.0666315394 -0.0006563571 0.0125063420
## tfidf_suicid 0.0405142083 -0.1138205435 -0.1044475636
## depression 0.0639043077 -0.0726959326 -0.0589678534
## n_monosyllable_words n_polysyllable_words
## automated_readability_index -0.168319022 0.10564947
## coleman_liau_index -0.209215109 0.06191993
## flesch_kincaid_grade_level -0.134216558 0.14853707
## flesch_reading_ease 0.202055602 -0.08544092
## gunning_fog_index -0.125144149 0.16328228
## lix -0.157837596 0.12911966
## smog_index -0.079616041 0.21508082
## wiener_sachtextformel -0.194176230 0.09925164
## n_chars 0.997076198 0.97251417
## n_long_words 0.979537256 0.99321517
## n_monosyllable_words 1.000000000 0.95265001
## n_polysyllable_words 0.952650010 1.00000000
## n_sents 0.988532722 0.90521407
## n_syllables 0.997429241 0.97174220
## n_unique_words 0.996027044 0.97373814
## n_words 0.999473723 0.96184577
## sent_neg -0.131723144 -0.13971798
## sent_neu 0.150582162 0.20560776
## sent_pos -0.025599272 -0.08800526
## sent_compound -0.111509722 -0.14019112
## economic_stress_total 0.888753604 0.86929978
## domestic_stress_total 0.598883842 0.54702726
## suicidality_total 0.434797972 0.33578668
## liwc_negative_emotion 0.951591512 0.91376694
## liwc_social_processes 0.982590121 0.92580386
## tfidf_anxieti -0.167670049 -0.01832355
## tfidf_depress -0.164871965 -0.01974876
## tfidf_stress -0.018670508 -0.01270078
## tfidf_struggl -0.007817339 0.02060667
## tfidf_suicid -0.115705880 -0.10923294
## depression -0.076674831 -0.04862408
## n_sents n_syllables n_unique_words
## automated_readability_index -0.301370955 -0.1025377213 -0.090835791
## coleman_liau_index -0.331942021 -0.1441088708 -0.134183338
## flesch_kincaid_grade_level -0.271369447 -0.0669741709 -0.054003534
## flesch_reading_ease 0.330428492 0.1341976448 0.122923364
## gunning_fog_index -0.262491028 -0.0575975087 -0.043831078
## lix -0.291534495 -0.0899146686 -0.077518414
## smog_index -0.212145059 -0.0110595969 0.002669595
## wiener_sachtextformel -0.323296796 -0.1256496056 -0.113735147
## n_chars 0.976113288 0.9999614991 0.999329825
## n_long_words 0.944034163 0.9913566626 0.992037632
## n_monosyllable_words 0.988532722 0.9974292411 0.996027044
## n_polysyllable_words 0.905214073 0.9717422027 0.973738139
## n_sents 1.000000000 0.9771908545 0.973680696
## n_syllables 0.977190854 1.0000000000 0.999362049
## n_unique_words 0.973680696 0.9993620485 1.000000000
## n_words 0.984045561 0.9992268241 0.998166489
## sent_neg -0.091064337 -0.1341811689 -0.145748988
## sent_neu 0.091924989 0.1642173354 0.182349824
## sent_pos -0.001523116 -0.0404325862 -0.049196191
## sent_compound -0.119064089 -0.1194978466 -0.119639996
## economic_stress_total 0.868603789 0.8920016784 0.891412097
## domestic_stress_total 0.605529222 0.5931290010 0.589715201
## suicidality_total 0.475413018 0.4166199670 0.411687829
## liwc_negative_emotion 0.948325586 0.9511560000 0.948354536
## liwc_social_processes 0.969737337 0.9776199414 0.974389393
## tfidf_anxieti -0.203258669 -0.1360198342 -0.128960258
## tfidf_depress -0.194939145 -0.1337233470 -0.134008081
## tfidf_stress -0.018667308 -0.0173713210 -0.012040809
## tfidf_struggl -0.012844531 -0.0006961643 0.005519864
## tfidf_suicid -0.103260070 -0.1138330096 -0.114620797
## depression -0.071617697 -0.0713132081 -0.068515101
## n_words sent_neg sent_neu sent_pos
## automated_readability_index -0.138677394 -0.12377958 0.2465870474 -0.163504343
## coleman_liau_index -0.179903076 -0.09370482 0.2060654108 -0.149501721
## flesch_kincaid_grade_level -0.103922908 -0.12774737 0.2647832210 -0.182385705
## flesch_reading_ease 0.171523410 0.08102519 -0.2110928963 0.172912139
## gunning_fog_index -0.094778221 -0.12697664 0.2730993379 -0.194427803
## lix -0.127183417 -0.07916236 0.2380047817 -0.211048000
## smog_index -0.048826950 -0.10823155 0.2588002117 -0.200223232
## wiener_sachtextformel -0.163340694 -0.06260303 0.2133080151 -0.200176988
## n_chars 0.999017293 -0.13558912 0.1654598939 -0.040220879
## n_long_words 0.985520485 -0.13053711 0.1858490804 -0.073928760
## n_monosyllable_words 0.999473723 -0.13172314 0.1505821621 -0.025599272
## n_polysyllable_words 0.961845766 -0.13971798 0.2056077564 -0.088005262
## n_sents 0.984045561 -0.09106434 0.0919249885 -0.001523116
## n_syllables 0.999226824 -0.13418117 0.1642173354 -0.040432586
## n_unique_words 0.998166489 -0.14574899 0.1823498244 -0.049196191
## n_words 1.000000000 -0.13234642 0.1564691233 -0.032581910
## sent_neg -0.132346421 1.00000000 -0.7143900456 -0.373496113
## sent_neu 0.156469123 -0.71439005 1.0000000000 -0.382286271
## sent_pos -0.032581910 -0.37349611 -0.3822862710 1.000000000
## sent_compound -0.115701142 -0.79857541 0.2001823661 0.789162434
## economic_stress_total 0.890807234 -0.14187230 0.2124161138 -0.094160341
## domestic_stress_total 0.596888266 0.03027215 0.0376954105 -0.089872627
## suicidality_total 0.427248291 0.34860062 -0.2851830701 -0.082315391
## liwc_negative_emotion 0.952183338 0.14111825 -0.0466972241 -0.124365173
## liwc_social_processes 0.980902927 -0.18304550 0.1647076148 0.023446532
## tfidf_anxieti -0.153517504 0.05331424 0.1267184510 -0.238404436
## tfidf_depress -0.150909305 0.36746591 -0.1830954612 -0.242535850
## tfidf_stress -0.018055840 0.08153457 -0.0006740162 -0.106695519
## tfidf_struggl -0.004447442 0.11048382 -0.0374965961 -0.096223304
## tfidf_suicid -0.114620203 0.35648462 -0.2573792628 -0.129674940
## depression -0.074223806 0.28320568 -0.0397383263 -0.321246190
## sent_compound economic_stress_total
## automated_readability_index -0.006516122 -0.0975896398
## coleman_liau_index -0.008847360 -0.1391659245
## flesch_kincaid_grade_level -0.021815296 -0.0591985383
## flesch_reading_ease 0.031319997 0.1215424245
## gunning_fog_index -0.031595086 -0.0494235209
## lix -0.063467539 -0.0807473781
## smog_index -0.056073112 -0.0089244418
## wiener_sachtextformel -0.060329875 -0.1128804795
## n_chars -0.118455063 0.8909186571
## n_long_words -0.140686288 0.8851110989
## n_monosyllable_words -0.111509722 0.8887536042
## n_polysyllable_words -0.140191125 0.8692997799
## n_sents -0.119064089 0.8686037888
## n_syllables -0.119497847 0.8920016784
## n_unique_words -0.119639996 0.8914120965
## n_words -0.115701142 0.8908072340
## sent_neg -0.798575406 -0.1418722976
## sent_neu 0.200182366 0.2124161138
## sent_pos 0.789162434 -0.0941603412
## sent_compound 1.000000000 -0.1275440361
## economic_stress_total -0.127544036 1.0000000000
## domestic_stress_total -0.189000554 0.5220658033
## suicidality_total -0.367881229 0.3328408113
## liwc_negative_emotion -0.338141953 0.8246801846
## liwc_social_processes -0.042010708 0.8542174439
## tfidf_anxieti -0.151860795 -0.1256804896
## tfidf_depress -0.348129960 -0.1547261439
## tfidf_stress -0.134935283 0.0251576157
## tfidf_struggl -0.137538001 0.0005381508
## tfidf_suicid -0.303939964 -0.1275830158
## depression -0.371973659 -0.0265968741
## domestic_stress_total suicidality_total
## automated_readability_index -0.17318489 -0.31754508
## coleman_liau_index -0.19185685 -0.31839093
## flesch_kincaid_grade_level -0.15988004 -0.31084813
## flesch_reading_ease 0.19502148 0.32366309
## gunning_fog_index -0.16142856 -0.32040372
## lix -0.17116144 -0.30798420
## smog_index -0.13806800 -0.29760572
## wiener_sachtextformel -0.19456282 -0.32565453
## n_chars 0.59320688 0.41454781
## n_long_words 0.57745709 0.38070257
## n_monosyllable_words 0.59888384 0.43479797
## n_polysyllable_words 0.54702726 0.33578668
## n_sents 0.60552922 0.47541302
## n_syllables 0.59312900 0.41661997
## n_unique_words 0.58971520 0.41168783
## n_words 0.59688827 0.42724829
## sent_neg 0.03027215 0.34860062
## sent_neu 0.03769541 -0.28518307
## sent_pos -0.08987263 -0.08231539
## sent_compound -0.18900055 -0.36788123
## economic_stress_total 0.52206580 0.33284081
## domestic_stress_total 1.00000000 0.30331940
## suicidality_total 0.30331940 1.00000000
## liwc_negative_emotion 0.61391814 0.51487644
## liwc_social_processes 0.61177441 0.38328003
## tfidf_anxieti -0.18108957 -0.30862588
## tfidf_depress -0.10761703 -0.11456051
## tfidf_stress -0.07054069 -0.07993940
## tfidf_struggl -0.01058888 -0.03757874
## tfidf_suicid -0.06671180 0.59143662
## depression -0.04024530 0.09018527
## liwc_negative_emotion liwc_social_processes
## automated_readability_index -0.165093749 -0.16680626
## coleman_liau_index -0.196534475 -0.20639122
## flesch_kincaid_grade_level -0.132646789 -0.13902225
## flesch_reading_ease 0.186297237 0.20723771
## gunning_fog_index -0.123497671 -0.13276397
## lix -0.142345475 -0.16489716
## smog_index -0.071980024 -0.09285209
## wiener_sachtextformel -0.173309986 -0.20264897
## n_chars 0.950695077 0.97792513
## n_long_words 0.939559041 0.95606474
## n_monosyllable_words 0.951591512 0.98259012
## n_polysyllable_words 0.913766938 0.92580386
## n_sents 0.948325586 0.96973734
## n_syllables 0.951156000 0.97761994
## n_unique_words 0.948354536 0.97438939
## n_words 0.952183338 0.98090293
## sent_neg 0.141118248 -0.18304550
## sent_neu -0.046697224 0.16470761
## sent_pos -0.124365173 0.02344653
## sent_compound -0.338141953 -0.04201071
## economic_stress_total 0.824680185 0.85421744
## domestic_stress_total 0.613918137 0.61177441
## suicidality_total 0.514876443 0.38328003
## liwc_negative_emotion 1.000000000 0.91651953
## liwc_social_processes 0.916519532 1.00000000
## tfidf_anxieti -0.145983011 -0.19535857
## tfidf_depress -0.008312494 -0.18174662
## tfidf_stress 0.018575807 -0.04707125
## tfidf_struggl 0.048324617 -0.02519996
## tfidf_suicid -0.039070948 -0.14154731
## depression 0.008436090 -0.13383659
## tfidf_anxieti tfidf_depress tfidf_stress
## automated_readability_index 0.32940229 0.364851125 -0.0040956841
## coleman_liau_index 0.33439282 0.392781861 -0.0094973496
## flesch_kincaid_grade_level 0.36156819 0.365822243 0.0024470420
## flesch_reading_ease -0.38654606 -0.407903031 0.0004958451
## gunning_fog_index 0.38790119 0.382830247 0.0047402573
## lix 0.37993965 0.403651741 0.0053225287
## smog_index 0.41636520 0.410680302 0.0066005297
## wiener_sachtextformel 0.41033157 0.431688745 0.0036396798
## n_chars -0.13627061 -0.131655240 -0.0180337439
## n_long_words -0.07717477 -0.071186504 -0.0135615012
## n_monosyllable_words -0.16767005 -0.164871965 -0.0186705083
## n_polysyllable_words -0.01832355 -0.019748756 -0.0127007846
## n_sents -0.20325867 -0.194939145 -0.0186673083
## n_syllables -0.13601983 -0.133723347 -0.0173713210
## n_unique_words -0.12896026 -0.134008081 -0.0120408091
## n_words -0.15351750 -0.150909305 -0.0180558399
## sent_neg 0.05331424 0.367465908 0.0815345728
## sent_neu 0.12671845 -0.183095461 -0.0006740162
## sent_pos -0.23840444 -0.242535850 -0.1066955188
## sent_compound -0.15186080 -0.348129960 -0.1349352830
## economic_stress_total -0.12568049 -0.154726144 0.0251576157
## domestic_stress_total -0.18108957 -0.107617034 -0.0705406900
## suicidality_total -0.30862588 -0.114560511 -0.0799394002
## liwc_negative_emotion -0.14598301 -0.008312494 0.0185758075
## liwc_social_processes -0.19535857 -0.181746619 -0.0470712458
## tfidf_anxieti 1.00000000 0.076229923 0.0633771934
## tfidf_depress 0.07622992 1.000000000 -0.0664883971
## tfidf_stress 0.06337719 -0.066488397 1.0000000000
## tfidf_struggl 0.04095322 0.076152399 0.0042549727
## tfidf_suicid -0.15837080 0.060119651 -0.0940051280
## depression 0.13632309 0.236235097 0.0078282218
## tfidf_struggl tfidf_suicid depression
## automated_readability_index 0.0493435112 0.0338583396 0.024477226
## coleman_liau_index 0.0511250506 0.0600041094 0.029635458
## flesch_kincaid_grade_level 0.0565108403 0.0177520081 0.040734854
## flesch_reading_ease -0.0634911380 -0.0471086573 -0.053173291
## gunning_fog_index 0.0566017348 0.0001173876 0.052373079
## lix 0.0624054444 0.0385811254 0.050899572
## smog_index 0.0658820300 0.0017657297 0.065050182
## wiener_sachtextformel 0.0666315394 0.0405142083 0.063904308
## n_chars -0.0006563571 -0.1138205435 -0.072695933
## n_long_words 0.0125063420 -0.1044475636 -0.058967853
## n_monosyllable_words -0.0078173393 -0.1157058795 -0.076674831
## n_polysyllable_words 0.0206066738 -0.1092329432 -0.048624082
## n_sents -0.0128445306 -0.1032600698 -0.071617697
## n_syllables -0.0006961643 -0.1138330096 -0.071313208
## n_unique_words 0.0055198642 -0.1146207970 -0.068515101
## n_words -0.0044474416 -0.1146202032 -0.074223806
## sent_neg 0.1104838198 0.3564846199 0.283205679
## sent_neu -0.0374965961 -0.2573792628 -0.039738326
## sent_pos -0.0962233037 -0.1296749405 -0.321246190
## sent_compound -0.1375380010 -0.3039399636 -0.371973659
## economic_stress_total 0.0005381508 -0.1275830158 -0.026596874
## domestic_stress_total -0.0105888808 -0.0667118002 -0.040245303
## suicidality_total -0.0375787359 0.5914366242 0.090185270
## liwc_negative_emotion 0.0483246170 -0.0390709481 0.008436090
## liwc_social_processes -0.0251999618 -0.1415473143 -0.133836590
## tfidf_anxieti 0.0409532185 -0.1583708043 0.136323094
## tfidf_depress 0.0761523989 0.0601196505 0.236235097
## tfidf_stress 0.0042549727 -0.0940051280 0.007828222
## tfidf_struggl 1.0000000000 -0.0558296255 0.014105794
## tfidf_suicid -0.0558296255 1.0000000000 0.111750491
## depression 0.0141057944 0.1117504914 1.000000000
res2$P
## automated_readability_index coleman_liau_index
## automated_readability_index NA 0.000000e+00
## coleman_liau_index 0.000000e+00 NA
## flesch_kincaid_grade_level 0.000000e+00 0.000000e+00
## flesch_reading_ease 0.000000e+00 0.000000e+00
## gunning_fog_index 0.000000e+00 7.105427e-15
## lix 0.000000e+00 0.000000e+00
## smog_index 1.332268e-15 4.183320e-13
## wiener_sachtextformel 0.000000e+00 0.000000e+00
## n_chars 6.057953e-01 4.601637e-01
## n_long_words 9.320703e-01 8.888538e-01
## n_monosyllable_words 3.653964e-01 2.586670e-01
## n_polysyllable_words 5.716349e-01 7.407156e-01
## n_sents 9.944127e-02 6.809997e-02
## n_syllables 5.830774e-01 4.392609e-01
## n_unique_words 6.269845e-01 4.717318e-01
## n_words 4.568768e-01 3.328308e-01
## sent_neg 5.070661e-01 6.160953e-01
## sent_neu 1.811322e-01 2.660677e-01
## sent_pos 3.794668e-01 4.221422e-01
## sent_compound 9.722475e-01 9.623247e-01
## economic_stress_total 6.014776e-01 4.552771e-01
## domestic_stress_total 3.514946e-01 3.011544e-01
## suicidality_total 8.172991e-02 8.087530e-02
## liwc_negative_emotion 3.747874e-01 2.892956e-01
## liwc_social_processes 3.697836e-01 2.652958e-01
## tfidf_anxieti 7.036605e-02 6.596751e-02
## tfidf_depress 4.358118e-02 2.883429e-02
## tfidf_stress 9.825543e-01 9.595590e-01
## tfidf_struggl 7.920854e-01 7.847508e-01
## tfidf_suicid 8.565089e-01 7.484766e-01
## depression 8.960108e-01 8.742542e-01
## flesch_kincaid_grade_level flesch_reading_ease
## automated_readability_index 0.00000000 0.00000000
## coleman_liau_index 0.00000000 0.00000000
## flesch_kincaid_grade_level NA 0.00000000
## flesch_reading_ease 0.00000000 NA
## gunning_fog_index 0.00000000 0.00000000
## lix 0.00000000 0.00000000
## smog_index 0.00000000 0.00000000
## wiener_sachtextformel 0.00000000 0.00000000
## n_chars 0.74194255 0.49011792
## n_long_words 0.77424970 0.95156020
## n_monosyllable_words 0.47162106 0.27568725
## n_polysyllable_words 0.42517682 0.64766887
## n_sents 0.13975513 0.06944347
## n_syllables 0.72036213 0.47168410
## n_unique_words 0.77293854 0.51003153
## n_words 0.57797129 0.35620531
## sent_neg 0.49343700 0.66479392
## sent_neu 0.15000704 0.25431975
## sent_pos 0.32609027 0.35226535
## sent_compound 0.90726844 0.86716781
## economic_stress_total 0.75174730 0.51483230
## domestic_stress_total 0.39026363 0.29309826
## suicidality_total 0.08874325 0.07570246
## liwc_negative_emotion 0.47686800 0.31564202
## liwc_social_processes 0.45574731 0.26329725
## tfidf_anxieti 0.04565238 0.03171056
## tfidf_depress 0.04298312 0.02273415
## tfidf_stress 0.98957621 0.99788777
## tfidf_struggl 0.76268978 0.73436917
## tfidf_suicid 0.92448595 0.80131094
## depression 0.82776268 0.77634058
## gunning_fog_index lix smog_index
## automated_readability_index 0.000000e+00 0.00000000 1.332268e-15
## coleman_liau_index 7.105427e-15 0.00000000 4.183320e-13
## flesch_kincaid_grade_level 0.000000e+00 0.00000000 0.000000e+00
## flesch_reading_ease 0.000000e+00 0.00000000 0.000000e+00
## gunning_fog_index NA 0.00000000 0.000000e+00
## lix 0.000000e+00 NA 0.000000e+00
## smog_index 0.000000e+00 0.00000000 NA
## wiener_sachtextformel 0.000000e+00 0.00000000 0.000000e+00
## n_chars 7.776312e-01 0.65069970 9.712164e-01
## n_long_words 7.336609e-01 0.85396157 5.502688e-01
## n_monosyllable_words 5.023579e-01 0.39642514 6.702945e-01
## n_polysyllable_words 3.801233e-01 0.48876694 2.452479e-01
## n_sents 1.536993e-01 0.11154676 2.519051e-01
## n_syllables 7.582601e-01 0.63049712 9.529135e-01
## n_unique_words 8.148881e-01 0.67851346 9.886282e-01
## n_words 6.120416e-01 0.49536276 7.942154e-01
## sent_neg 4.960698e-01 0.67206897 5.622176e-01
## sent_neu 1.371492e-01 0.19728699 1.597812e-01
## sent_pos 2.945990e-01 0.25442312 2.801569e-01
## sent_compound 8.660115e-01 0.73446436 7.644762e-01
## economic_stress_total 7.917556e-01 0.66587700 9.619967e-01
## domestic_stress_total 3.856291e-01 0.35723655 4.588764e-01
## suicidality_total 7.886929e-02 0.09187896 1.039519e-01
## liwc_negative_emotion 5.080415e-01 0.44493919 7.003868e-01
## liwc_social_processes 4.764753e-01 0.37536439 6.193236e-01
## tfidf_anxieti 3.106649e-02 0.03500753 1.981247e-02
## tfidf_depress 3.353242e-02 0.02433057 2.173856e-02
## tfidf_stress 9.798092e-01 0.97732963 9.718881e-01
## tfidf_struggl 7.623190e-01 0.73875270 7.247449e-01
## tfidf_suicid 9.994999e-01 0.83674434 9.924783e-01
## depression 7.796234e-01 0.78567813 7.280888e-01
## wiener_sachtextformel n_chars n_long_words
## automated_readability_index 0.00000000 6.057953e-01 9.320703e-01
## coleman_liau_index 0.00000000 4.601637e-01 8.888538e-01
## flesch_kincaid_grade_level 0.00000000 7.419426e-01 7.742497e-01
## flesch_reading_ease 0.00000000 4.901179e-01 9.515602e-01
## gunning_fog_index 0.00000000 7.776312e-01 7.336609e-01
## lix 0.00000000 6.506997e-01 8.539616e-01
## smog_index 0.00000000 9.712164e-01 5.502688e-01
## wiener_sachtextformel NA 5.178754e-01 9.983245e-01
## n_chars 0.51787541 NA 0.000000e+00
## n_long_words 0.99832454 0.000000e+00 NA
## n_monosyllable_words 0.29523645 0.000000e+00 0.000000e+00
## n_polysyllable_words 0.59526957 0.000000e+00 0.000000e+00
## n_sents 0.07605336 0.000000e+00 1.776357e-15
## n_syllables 0.50061946 0.000000e+00 0.000000e+00
## n_unique_words 0.54238551 0.000000e+00 0.000000e+00
## n_words 0.37995055 0.000000e+00 0.000000e+00
## sent_neg 0.73795435 4.670581e-01 4.839670e-01
## sent_neu 0.24925380 3.737143e-01 3.168284e-01
## sent_pos 0.28027035 8.299042e-01 6.926625e-01
## sent_compound 0.74715523 5.256452e-01 4.503180e-01
## economic_stress_total 0.54544347 1.873812e-11 3.824918e-11
## domestic_stress_total 0.29425728 4.362382e-04 6.703720e-04
## suicidality_total 0.07381648 2.041224e-02 3.461320e-02
## liwc_negative_emotion 0.35114145 2.220446e-16 4.884981e-15
## liwc_social_processes 0.27424979 0.000000e+00 0.000000e+00
## tfidf_anxieti 0.02186157 4.648012e-01 6.798634e-01
## tfidf_depress 0.01531150 4.801978e-01 7.035406e-01
## tfidf_stress 0.98449638 9.232910e-01 9.422776e-01
## tfidf_struggl 0.72173618 9.972040e-01 9.467621e-01
## tfidf_suicid 0.82868188 5.420804e-01 5.760425e-01
## depression 0.73270312 6.975457e-01 7.526847e-01
## n_monosyllable_words n_polysyllable_words
## automated_readability_index 3.653964e-01 5.716349e-01
## coleman_liau_index 2.586670e-01 7.407156e-01
## flesch_kincaid_grade_level 4.716211e-01 4.251768e-01
## flesch_reading_ease 2.756872e-01 6.476689e-01
## gunning_fog_index 5.023579e-01 3.801233e-01
## lix 3.964251e-01 4.887669e-01
## smog_index 6.702945e-01 2.452479e-01
## wiener_sachtextformel 2.952364e-01 5.952696e-01
## n_chars 0.000000e+00 0.000000e+00
## n_long_words 0.000000e+00 0.000000e+00
## n_monosyllable_words NA 0.000000e+00
## n_polysyllable_words 0.000000e+00 NA
## n_sents 0.000000e+00 2.686296e-12
## n_syllables 0.000000e+00 0.000000e+00
## n_unique_words 0.000000e+00 0.000000e+00
## n_words 0.000000e+00 0.000000e+00
## sent_neg 4.799694e-01 4.534731e-01
## sent_neu 4.187577e-01 2.671544e-01
## sent_pos 8.912713e-01 6.378036e-01
## sent_compound 5.503648e-01 4.519299e-01
## economic_stress_total 2.456124e-11 2.232314e-10
## domestic_stress_total 3.716096e-04 1.450014e-03
## suicidality_total 1.451121e-02 6.477816e-02
## liwc_negative_emotion 2.220446e-16 7.212009e-13
## liwc_social_processes 0.000000e+00 8.837375e-14
## tfidf_anxieti 3.672747e-01 9.220619e-01
## tfidf_depress 3.754384e-01 9.160204e-01
## tfidf_stress 9.205907e-01 9.459356e-01
## tfidf_struggl 9.667083e-01 9.123858e-01
## tfidf_suicid 5.353653e-01 5.585845e-01
## depression 6.818291e-01 7.950523e-01
## n_sents n_syllables n_unique_words
## automated_readability_index 9.944127e-02 5.830774e-01 6.269845e-01
## coleman_liau_index 6.809997e-02 4.392609e-01 4.717318e-01
## flesch_kincaid_grade_level 1.397551e-01 7.203621e-01 7.729385e-01
## flesch_reading_ease 6.944347e-02 4.716841e-01 5.100315e-01
## gunning_fog_index 1.536993e-01 7.582601e-01 8.148881e-01
## lix 1.115468e-01 6.304971e-01 6.785135e-01
## smog_index 2.519051e-01 9.529135e-01 9.886282e-01
## wiener_sachtextformel 7.605336e-02 5.006195e-01 5.423855e-01
## n_chars 0.000000e+00 0.000000e+00 0.000000e+00
## n_long_words 1.776357e-15 0.000000e+00 0.000000e+00
## n_monosyllable_words 0.000000e+00 0.000000e+00 0.000000e+00
## n_polysyllable_words 2.686296e-12 0.000000e+00 0.000000e+00
## n_sents NA 0.000000e+00 0.000000e+00
## n_syllables 0.000000e+00 NA 0.000000e+00
## n_unique_words 0.000000e+00 0.000000e+00 NA
## n_words 0.000000e+00 0.000000e+00 0.000000e+00
## sent_neg 6.261142e-01 4.717390e-01 4.340151e-01
## sent_neu 6.228413e-01 3.773635e-01 3.261871e-01
## sent_pos 9.935118e-01 8.290220e-01 7.926927e-01
## sent_compound 5.235035e-01 5.219808e-01 5.214822e-01
## economic_stress_total 2.399796e-10 1.633094e-11 1.760325e-11
## domestic_stress_total 3.068296e-04 4.371898e-04 4.807466e-04
## suicidality_total 6.873520e-03 1.972957e-02 2.138639e-02
## liwc_negative_emotion 4.440892e-16 2.220446e-16 4.440892e-16
## liwc_social_processes 0.000000e+00 0.000000e+00 0.000000e+00
## tfidf_anxieti 2.727778e-01 4.656310e-01 4.893082e-01
## tfidf_depress 2.933061e-01 4.732664e-01 4.723161e-01
## tfidf_stress 9.206043e-01 9.261009e-01 9.487412e-01
## tfidf_struggl 9.453246e-01 9.970344e-01 9.764893e-01
## tfidf_suicid 5.804122e-01 5.420359e-01 5.392253e-01
## depression 7.018263e-01 7.030367e-01 7.141932e-01
## n_words sent_neg sent_neu sent_pos
## automated_readability_index 4.568768e-01 5.070661e-01 1.811322e-01 3.794668e-01
## coleman_liau_index 3.328308e-01 6.160953e-01 2.660677e-01 4.221422e-01
## flesch_kincaid_grade_level 5.779713e-01 4.934370e-01 1.500070e-01 3.260903e-01
## flesch_reading_ease 3.562053e-01 6.647939e-01 2.543197e-01 3.522653e-01
## gunning_fog_index 6.120416e-01 4.960698e-01 1.371492e-01 2.945990e-01
## lix 4.953628e-01 6.720690e-01 1.972870e-01 2.544231e-01
## smog_index 7.942154e-01 5.622176e-01 1.597812e-01 2.801569e-01
## wiener_sachtextformel 3.799505e-01 7.379543e-01 2.492538e-01 2.802704e-01
## n_chars 0.000000e+00 4.670581e-01 3.737143e-01 8.299042e-01
## n_long_words 0.000000e+00 4.839670e-01 3.168284e-01 6.926625e-01
## n_monosyllable_words 0.000000e+00 4.799694e-01 4.187577e-01 8.912713e-01
## n_polysyllable_words 0.000000e+00 4.534731e-01 2.671544e-01 6.378036e-01
## n_sents 0.000000e+00 6.261142e-01 6.228413e-01 9.935118e-01
## n_syllables 0.000000e+00 4.717390e-01 3.773635e-01 8.290220e-01
## n_unique_words 0.000000e+00 4.340151e-01 3.261871e-01 7.926927e-01
## n_words NA 4.778754e-01 4.005844e-01 8.618659e-01
## sent_neg 4.778754e-01 NA 6.364724e-06 3.848485e-02
## sent_neu 4.005844e-01 6.364724e-06 NA 3.380610e-02
## sent_pos 8.618659e-01 3.848485e-02 3.380610e-02 NA
## sent_compound 5.353821e-01 7.296827e-08 2.802572e-01 1.325466e-07
## economic_stress_total 1.900347e-11 4.464696e-01 2.512855e-01 6.143736e-01
## domestic_stress_total 3.932909e-04 8.715747e-01 8.404439e-01 6.306576e-01
## suicidality_total 1.651795e-02 5.461292e-02 1.199282e-01 6.597726e-01
## liwc_negative_emotion 2.220446e-16 4.489143e-01 8.030122e-01 5.050429e-01
## liwc_social_processes 0.000000e+00 3.243131e-01 3.759211e-01 9.003675e-01
## tfidf_anxieti 4.096394e-01 7.757627e-01 4.969533e-01 1.965133e-01
## tfidf_depress 4.177359e-01 4.198586e-02 3.241788e-01 1.886379e-01
## tfidf_stress 9.231973e-01 6.628097e-01 9.971288e-01 5.678112e-01
## tfidf_struggl 9.810562e-01 5.540616e-01 8.412748e-01 6.066019e-01
## tfidf_suicid 5.392274e-01 4.901380e-02 1.621681e-01 4.868837e-01
## depression 6.914956e-01 1.226305e-01 8.319159e-01 7.804112e-02
## sent_compound economic_stress_total
## automated_readability_index 9.722475e-01 6.014776e-01
## coleman_liau_index 9.623247e-01 4.552771e-01
## flesch_kincaid_grade_level 9.072684e-01 7.517473e-01
## flesch_reading_ease 8.671678e-01 5.148323e-01
## gunning_fog_index 8.660115e-01 7.917556e-01
## lix 7.344644e-01 6.658770e-01
## smog_index 7.644762e-01 9.619967e-01
## wiener_sachtextformel 7.471552e-01 5.454435e-01
## n_chars 5.256452e-01 1.873812e-11
## n_long_words 4.503180e-01 3.824918e-11
## n_monosyllable_words 5.503648e-01 2.456124e-11
## n_polysyllable_words 4.519299e-01 2.232314e-10
## n_sents 5.235035e-01 2.399796e-10
## n_syllables 5.219808e-01 1.633094e-11
## n_unique_words 5.214822e-01 1.760325e-11
## n_words 5.353821e-01 1.900347e-11
## sent_neg 7.296827e-08 4.464696e-01
## sent_neu 2.802572e-01 2.512855e-01
## sent_pos 1.325466e-07 6.143736e-01
## sent_compound NA 4.941309e-01
## economic_stress_total 4.941309e-01 NA
## domestic_stress_total 3.085442e-01 2.591706e-03
## suicidality_total 4.173683e-02 6.731178e-02
## liwc_negative_emotion 6.280660e-02 1.167073e-08
## liwc_social_processes 8.224520e-01 9.827357e-10
## tfidf_anxieti 4.147720e-01 5.005133e-01
## tfidf_depress 5.496251e-02 4.059177e-01
## tfidf_stress 4.692288e-01 8.931364e-01
## tfidf_struggl 4.606193e-01 9.977076e-01
## tfidf_suicid 9.644965e-02 4.939978e-01
## depression 3.934575e-02 8.870606e-01
## domestic_stress_total suicidality_total
## automated_readability_index 0.3514946094 0.0817299063
## coleman_liau_index 0.3011543562 0.0808752999
## flesch_kincaid_grade_level 0.3902636257 0.0887432455
## flesch_reading_ease 0.2930982581 0.0757024552
## gunning_fog_index 0.3856291285 0.0788692866
## lix 0.3572365511 0.0918789554
## smog_index 0.4588764368 0.1039518624
## wiener_sachtextformel 0.2942572776 0.0738164778
## n_chars 0.0004362382 0.0204122380
## n_long_words 0.0006703720 0.0346131980
## n_monosyllable_words 0.0003716096 0.0145112068
## n_polysyllable_words 0.0014500144 0.0647781632
## n_sents 0.0003068296 0.0068735203
## n_syllables 0.0004371898 0.0197295709
## n_unique_words 0.0004807466 0.0213863859
## n_words 0.0003932909 0.0165179528
## sent_neg 0.8715746864 0.0546129219
## sent_neu 0.8404439443 0.1199281505
## sent_pos 0.6306576315 0.6597726253
## sent_compound 0.3085442442 0.0417368331
## economic_stress_total 0.0025917058 0.0673117800
## domestic_stress_total NA 0.0971659710
## suicidality_total 0.0971659710 NA
## liwc_negative_emotion 0.0002394507 0.0030391705
## liwc_social_processes 0.0002552831 0.0333074835
## tfidf_anxieti 0.3295988485 0.0911691627
## tfidf_depress 0.5644523592 0.5394400966
## tfidf_stress 0.7061109316 0.6690307430
## tfidf_struggl 0.9549155419 0.8409315412
## tfidf_suicid 0.7214142328 0.0004583266
## depression 0.8298024168 0.6294643831
## liwc_negative_emotion liwc_social_processes
## automated_readability_index 3.747874e-01 3.697836e-01
## coleman_liau_index 2.892956e-01 2.652958e-01
## flesch_kincaid_grade_level 4.768680e-01 4.557473e-01
## flesch_reading_ease 3.156420e-01 2.632972e-01
## gunning_fog_index 5.080415e-01 4.764753e-01
## lix 4.449392e-01 3.753644e-01
## smog_index 7.003868e-01 6.193236e-01
## wiener_sachtextformel 3.511414e-01 2.742498e-01
## n_chars 2.220446e-16 0.000000e+00
## n_long_words 4.884981e-15 0.000000e+00
## n_monosyllable_words 2.220446e-16 0.000000e+00
## n_polysyllable_words 7.212009e-13 8.837375e-14
## n_sents 4.440892e-16 0.000000e+00
## n_syllables 2.220446e-16 0.000000e+00
## n_unique_words 4.440892e-16 0.000000e+00
## n_words 2.220446e-16 0.000000e+00
## sent_neg 4.489143e-01 3.243131e-01
## sent_neu 8.030122e-01 3.759211e-01
## sent_pos 5.050429e-01 9.003675e-01
## sent_compound 6.280660e-02 8.224520e-01
## economic_stress_total 1.167073e-08 9.827357e-10
## domestic_stress_total 2.394507e-04 2.552831e-04
## suicidality_total 3.039170e-03 3.330748e-02
## liwc_negative_emotion NA 4.587442e-13
## liwc_social_processes 4.587442e-13 NA
## tfidf_anxieti 4.332694e-01 2.922483e-01
## tfidf_depress 9.646009e-01 3.278174e-01
## tfidf_stress 9.209923e-01 8.014656e-01
## tfidf_struggl 7.962881e-01 8.929576e-01
## tfidf_suicid 8.346998e-01 4.475223e-01
## depression 9.640749e-01 4.728883e-01
## tfidf_anxieti tfidf_depress tfidf_stress
## automated_readability_index 0.07036605 0.04358118 0.9825543
## coleman_liau_index 0.06596751 0.02883429 0.9595590
## flesch_kincaid_grade_level 0.04565238 0.04298312 0.9895762
## flesch_reading_ease 0.03171056 0.02273415 0.9978878
## gunning_fog_index 0.03106649 0.03353242 0.9798092
## lix 0.03500753 0.02433057 0.9773296
## smog_index 0.01981247 0.02173856 0.9718881
## wiener_sachtextformel 0.02186157 0.01531150 0.9844964
## n_chars 0.46480123 0.48019777 0.9232910
## n_long_words 0.67986343 0.70354061 0.9422776
## n_monosyllable_words 0.36727470 0.37543836 0.9205907
## n_polysyllable_words 0.92206191 0.91602039 0.9459356
## n_sents 0.27277781 0.29330611 0.9206043
## n_syllables 0.46563104 0.47326636 0.9261009
## n_unique_words 0.48930824 0.47231615 0.9487412
## n_words 0.40963941 0.41773592 0.9231973
## sent_neg 0.77576272 0.04198586 0.6628097
## sent_neu 0.49695334 0.32417878 0.9971288
## sent_pos 0.19651325 0.18863790 0.5678112
## sent_compound 0.41477196 0.05496251 0.4692288
## economic_stress_total 0.50051334 0.40591767 0.8931364
## domestic_stress_total 0.32959885 0.56445236 0.7061109
## suicidality_total 0.09116916 0.53944010 0.6690307
## liwc_negative_emotion 0.43326938 0.96460090 0.9209923
## liwc_social_processes 0.29224831 0.32781736 0.8014656
## tfidf_anxieti NA 0.68358016 0.7348288
## tfidf_depress 0.68358016 NA 0.7223105
## tfidf_stress 0.73482884 0.72231047 NA
## tfidf_struggl 0.82685321 0.68388544 0.9818759
## tfidf_suicid 0.39481125 0.74800788 0.6149600
## depression 0.46462764 0.20073852 0.9666620
## tfidf_struggl tfidf_suicid depression
## automated_readability_index 0.7920854 0.8565088953 0.89601080
## coleman_liau_index 0.7847508 0.7484766368 0.87425423
## flesch_kincaid_grade_level 0.7626898 0.9244859455 0.82776268
## flesch_reading_ease 0.7343692 0.8013109399 0.77634058
## gunning_fog_index 0.7623190 0.9994999446 0.77962342
## lix 0.7387527 0.8367443390 0.78567813
## smog_index 0.7247449 0.9924783344 0.72808881
## wiener_sachtextformel 0.7217362 0.8286818777 0.73270312
## n_chars 0.9972040 0.5420804086 0.69754567
## n_long_words 0.9467621 0.5760425127 0.75268467
## n_monosyllable_words 0.9667083 0.5353653224 0.68182912
## n_polysyllable_words 0.9123858 0.5585845161 0.79505229
## n_sents 0.9453246 0.5804121707 0.70182626
## n_syllables 0.9970344 0.5420358771 0.70303671
## n_unique_words 0.9764893 0.5392252554 0.71419322
## n_words 0.9810562 0.5392273711 0.69149564
## sent_neg 0.5540616 0.0490137997 0.12263049
## sent_neu 0.8412748 0.1621680749 0.83191590
## sent_pos 0.6066019 0.4868836832 0.07804112
## sent_compound 0.4606193 0.0964496535 0.03934575
## economic_stress_total 0.9977076 0.4939978468 0.88706060
## domestic_stress_total 0.9549155 0.7214142328 0.82980242
## suicidality_total 0.8409315 0.0004583266 0.62946438
## liwc_negative_emotion 0.7962881 0.8346998262 0.96407490
## liwc_social_processes 0.8929576 0.4475223281 0.47288833
## tfidf_anxieti 0.8268532 0.3948112537 0.46462764
## tfidf_depress 0.6838854 0.7480078839 0.20073852
## tfidf_stress 0.9818759 0.6149600290 0.96666202
## tfidf_struggl NA 0.7654704655 0.93996498
## tfidf_suicid 0.7654705 NA 0.54949892
## depression 0.9399650 0.5494989219 NA
# Insignificant correlations are leaved blank
corrplot(res2$r, type="upper", order="hclust",p.mat = res2$P, sig.level = 0.01, insig = "blank")
df = data.frame(datamatrix)
#install.packages("MASS")
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
preProcValues <- preProcess(socailmedia_2019_features_clean, method = c("center", "scale"))
trainIndex <- createDataPartition(y = socailmedia_2019_features_clean$subreddit, p = .8,
list = FALSE,
times = 1)
train.data <- socailmedia_2019_features_clean[ trainIndex,]
test.data <- socailmedia_2019_features_clean[-trainIndex,]
SM_lr= glm(subreddit ~ sent_neg + sent_pos + sent_compound + economic_stress_total + suicidality_total + tfidf_anxieti + tfidf_stress + tfidf_suicid, data = train.data, family=binomial)
summary(SM_lr)
##
## Call:
## glm(formula = subreddit ~ sent_neg + sent_pos + sent_compound +
## economic_stress_total + suicidality_total + tfidf_anxieti +
## tfidf_stress + tfidf_suicid, family = binomial, data = train.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.2890 0.1894 0.2953 0.3753 3.5921
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.49292 0.16327 9.144 < 2e-16 ***
## sent_neg 5.75172 0.92499 6.218 5.03e-10 ***
## sent_pos 4.00313 1.03621 3.863 0.000112 ***
## sent_compound 0.22058 0.11561 1.908 0.056390 .
## economic_stress_total 0.06474 0.03637 1.780 0.075073 .
## suicidality_total 0.75749 0.15315 4.946 7.57e-07 ***
## tfidf_anxieti -15.74648 0.57834 -27.227 < 2e-16 ***
## tfidf_stress -5.23959 0.79392 -6.600 4.12e-11 ***
## tfidf_suicid 4.62944 1.64446 2.815 0.004875 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4423.4 on 6193 degrees of freedom
## Residual deviance: 2885.4 on 6185 degrees of freedom
## AIC: 2903.4
##
## Number of Fisher Scoring iterations: 7
vif(SM_lr)
## sent_neg sent_pos sent_compound
## 1.968267 1.758100 2.896438
## economic_stress_total suicidality_total tfidf_anxieti
## 1.032235 1.088967 1.061492
## tfidf_stress tfidf_suicid
## 1.019619 1.068945
SM_lr= glm(subreddit ~ sent_neg + sent_pos + sent_compound, data = train.data, family=binomial)
summary(SM_lr)
##
## Call:
## glm(formula = subreddit ~ sent_neg + sent_pos + sent_compound,
## family = binomial, data = train.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7266 0.3953 0.4672 0.5316 0.8059
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.95787 0.12773 7.499 6.43e-14 ***
## sent_neg 3.32452 0.65465 5.078 3.81e-07 ***
## sent_pos 5.05652 0.80735 6.263 3.77e-10 ***
## sent_compound 0.12666 0.08878 1.427 0.154
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4423.4 on 6193 degrees of freedom
## Residual deviance: 4329.1 on 6190 degrees of freedom
## AIC: 4337.1
##
## Number of Fisher Scoring iterations: 5
vif(SM_lr)
## sent_neg sent_pos sent_compound
## 1.730900 1.644989 2.550332
The social media dataset is found to be not suitable for factor analysis. Hence, instead of the Factor analysis, Logistic Regression model was built. The model showed statistical significance with predictor variables. The variable sent_comp has a p-value of 0.025 which is less than 0.05 and is statistically significant.The VIF value for sent_comp is 2.8 which is still acceptable for multi-colinearity.