#install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
socailmedia_2019_features <- read.csv(file = '/Users/pallavisaitu/Downloads/socailmedia_2019_features.csv', sep=",")
# View(socailmedia_2019_features)
dim(socailmedia_2019_features)
## [1] 7742 35
head(socailmedia_2019_features)
## subreddit author date
## 1 depression anonaccount131 1/1/19
## 2 depression gimlis_beard 1/1/19
## 3 depression WreckDotNet 1/1/19
## 4 depression danieltargaryean 1/1/19
## 5 depression emmanuel169 1/1/19
## 6 depression Lunakinn 1/1/19
## post
## 1 Anyone else feel like they're the stupidest, most inept sack of shit in the world I used to actually be smart and able back in high school, now I can't do fucking anything. I look at a piece of work/theory and don't even understand it, thinking where tf do I start
## 2 Craving validation from others while immediately rejecting anything positive that other people say about me is a special kind of hell I have no confidence in myself, especially about my physical appearance, so i often look to others for things I can be positive about. However, I imeadiately shoot down any complement with statements like, "It isn't actually all that impressive," or, "they are only saying that to placate me." It feels like I'm just not allowed to feel happy about anything I do.
## 3 Calling the distress line while living at home? How? How can I? \nI can't really afford therapy or anything so this is like my last resort. But I live at home, and I can't go out in public to do so.\n\n&#x200B;
## 4 Only been here less than a month but I'm ready to unsubscribe Starting this year, I want time to focus on improving myself. I don't want to be depressed 24/7 anymore. I'm done with people treating me like shit. I'm still my own worst enemy but I'm hoping that will change this year. I already delete social media from my phone, save reddit and messenger, and I hope that this will improve my life somehow. I know everyone here seems to have lost hope and I have too but I don't know, I just hope someone will read this and decide to try and change with me. 2018 was fucking trash and honestly do i expect 2019 to be different? No, but it's not going to be because I didn't try. I wish all of you the best in 2019 and I hope we all live to see 2020. \n\nP.S. If you feel suicidal, a pet might help. I have had suicidal thoughts but I couldn't go through with it knowing I not only leave my parents but my best friend as well. Try looking into it guys. Have a great 2019.
## 5 Anyone just want someone to talk to? I\x89۪m stuck in the desert because I was forced to join the military and am now stuck with 1000\x89۪s who live so loosely and find joy in anything and then there is me. So any type of conversation would be nice. Venting ranting or just light conversation. I can listen and talk. Anyone in need of help I can offer only an ear but I am here.
## 6 How do you know you're on the right medication? I hated my last medication, it seemed to make everything worse and my doctor wouldnt listen to me. When I said it was wrong, he just doubled the dose. So for months I endured it and waited for my anxiety to calm down then ended those attrocious pills with my doctor's approval. \n\nI dont suffer from anxiety anymore, and my mood is much better, but I'm not sure I'm out of this place yet, how do you know?
## automated_readability_index coleman_liau_index flesch_kincaid_grade_level
## 1 10.2180000 5.4667287 9.938636
## 2 9.1317442 9.8955198 9.778465
## 3 -1.1598701 0.8734051 1.879610
## 4 0.8299729 3.0467947 2.356266
## 5 1.3543421 3.5983855 2.865000
## 6 4.2483871 4.6717075 5.553925
## flesch_reading_ease gulpease_index gunning_fog_index lix smog_index
## 1 72.78795 61.90909 13.909091 38.40909 11.208143
## 2 55.59095 59.81395 13.391628 41.61860 12.688353
## 3 92.78227 100.36364 4.332468 17.64935 6.182691
## 4 94.44219 84.27363 5.445590 20.57915 6.782985
## 5 91.44250 82.28947 5.378947 21.34211 6.627428
## 6 82.85089 70.29032 8.780645 26.25269 8.841846
## wiener_sachtextformel n_chars n_long_words n_monosyllable_words
## 1 3.810272727 209 6 46
## 2 6.188630698 401 21 56
## 3 0.002562338 160 5 34
## 4 0.302550249 755 23 166
## 5 0.345505263 291 9 62
## 6 1.722513978 354 10 75
## n_polysyllable_words n_sents n_syllables n_unique_words n_words sent_neg
## 1 4 2 69 45 55 0.129
## 2 14 5 136 67 86 0.121
## 3 2 7 56 35 44 0.090
## 4 9 22 245 117 201 0.157
## 5 3 8 95 55 76 0.066
## 6 6 6 119 65 93 0.132
## sent_neu sent_pos sent_compound economic_stress_total domestic_stress_total
## 1 0.775 0.096 -0.4215 0 0
## 2 0.663 0.217 0.8658 2 0
## 3 0.847 0.063 -0.2906 1 0
## 4 0.559 0.284 0.9846 2 0
## 5 0.817 0.117 0.4404 0 0
## 6 0.787 0.082 -0.5615 0 0
## suicidality_total liwc_negative_emotion liwc_social_processes tfidf_anxieti
## 1 0 2 0 0.0000000
## 2 0 2 3 0.0000000
## 3 0 1 2 0.0000000
## 4 0 5 9 0.0000000
## 5 0 0 8 0.0000000
## 6 0 3 5 0.2973842
## tfidf_depress tfidf_stress tfidf_struggl tfidf_suicid
## 1 0.00000000 0 0 0.0000000
## 2 0.00000000 0 0 0.0000000
## 3 0.00000000 0 0 0.0000000
## 4 0.09623711 0 0 0.2444397
## 5 0.00000000 0 0 0.0000000
## 6 0.00000000 0 0 0.0000000
#summary(socailmedia_2019_features)
#str(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features))
## [1] 0
#sum(is.na(socailmedia_2019_features))
sum(is.na(socailmedia_2019_features))
## [1] 0
# Omit NAs
socailmedia_2019_features_clean<- na.omit(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features_clean))
## [1] 0
#install.packages("caret")
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
preProcValues <- preProcess(socailmedia_2019_features_clean, method = c("center", "scale"))
Nominal logistic regression models the relationship between a set of independent variables and a nominal dependent variable. A nominal variable has at least three groups which do not have a natural order, such as scratch, dent, and tear.
library(caret)
trainIndex <- createDataPartition(y = socailmedia_2019_features_clean$subreddit, p = .8,
list = FALSE,
times = 1)
train.data <- socailmedia_2019_features_clean[ trainIndex,]
test.data <- socailmedia_2019_features_clean[-trainIndex,]
#install.packages("nnet")
library(nnet)
train.data$subreddit <- relevel(train.data$subreddit, ref = "depression")
model <- nnet::multinom(subreddit ~ sent_neg + economic_stress_total + suicidality_total + tfidf_anxieti + tfidf_stress + tfidf_suicid, data = train.data)
## # weights: 24 (14 variable)
## initial value 6804.804516
## iter 10 value 3035.201009
## iter 20 value 2937.351797
## final value 2937.097270
## converged
## print summary interpretation table
summary(model)
## Call:
## nnet::multinom(formula = subreddit ~ sent_neg + economic_stress_total +
## suicidality_total + tfidf_anxieti + tfidf_stress + tfidf_suicid,
## data = train.data)
##
## Coefficients:
## (Intercept) sent_neg economic_stress_total suicidality_total
## anxiety -2.093620 -3.764962 -0.09808071 -0.9719750
## lonely -1.220264 -4.550001 -0.26423708 -0.7835405
## tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety 16.002402 5.423802 -5.490855
## lonely -8.557884 -2.753378 -5.748509
##
## Std. Errors:
## (Intercept) sent_neg economic_stress_total suicidality_total
## anxiety 0.1167143 0.6927750 0.03822321 0.1776349
## lonely 0.1000413 0.6284113 0.05271532 0.1624789
## tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety 0.5875291 0.8213883 1.761306
## lonely 2.0766939 1.8743026 1.826388
##
## Residual Deviance: 5874.195
## AIC: 5902.195
predictions <- predict(model,test.data)
#install.packages('DescTools')
library(DescTools)
##
## Attaching package: 'DescTools'
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
r.glm <- glm(subreddit ~ sent_neg + economic_stress_total + suicidality_total + tfidf_anxieti + tfidf_stress + tfidf_suicid, data = train.data, family=binomial)
PseudoR2(r.glm)
## McFadden
## 0.1971462
PseudoR2(r.glm, which='all')
## McFadden McFaddenAdj CoxSnell Nagelkerke AldrichNelson
## 0.1971462 0.1948440 0.1759792 0.2814017 0.1621700
## VeallZimmermann Efron McKelveyZavoina Tjur AIC
## 0.3273449 0.2198631 0.4111034 0.2183098 4896.4052969
## BIC logLik logLik0 G2
## 4943.5246514 -2441.2026485 -3040.6564139 1198.9075309
mlogit_output1 <- summary(model)
#mlogit_output1$coefficients
#mlogit_output1$standard.errors
z <- mlogit_output1$coefficients/mlogit_output1$standard.errors
# 2-tailed z test
p <- (1-pnorm(abs(z),0,1))*2
p
## (Intercept) sent_neg economic_stress_total suicidality_total
## anxiety 0 5.491652e-08 1.028792e-02 4.455949e-08
## lonely 0 4.471978e-13 5.371916e-07 1.418306e-06
## tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety 0.000000e+00 4.023426e-11 0.001823974
## lonely 3.773666e-05 1.418288e-01 0.001646874
Pquality5 <- rbind(mlogit_output1$coefficients,mlogit_output1$standard.errors)
Pquality5
## (Intercept) sent_neg economic_stress_total suicidality_total
## anxiety -2.0936203 -3.7649617 -0.09808071 -0.9719750
## lonely -1.2202643 -4.5500007 -0.26423708 -0.7835405
## anxiety 0.1167143 0.6927750 0.03822321 0.1776349
## lonely 0.1000413 0.6284113 0.05271532 0.1624789
## tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety 16.0024024 5.4238018 -5.490855
## lonely -8.5578844 -2.7533780 -5.748509
## anxiety 0.5875291 0.8213883 1.761306
## lonely 2.0766939 1.8743026 1.826388
rownames(Pquality5) <- c("Coefficient","Std. Errors","z stat","p value")
knitr::kable(Pquality5)
| (Intercept) | sent_neg | economic_stress_total | suicidality_total | tfidf_anxieti | tfidf_stress | tfidf_suicid | |
|---|---|---|---|---|---|---|---|
| Coefficient | -2.0936203 | -3.7649617 | -0.0980807 | -0.9719750 | 16.0024024 | 5.4238018 | -5.490855 |
| Std. Errors | -1.2202643 | -4.5500007 | -0.2642371 | -0.7835405 | -8.5578844 | -2.7533780 | -5.748509 |
| z stat | 0.1167143 | 0.6927750 | 0.0382232 | 0.1776349 | 0.5875291 | 0.8213883 | 1.761305 |
| p value | 0.1000413 | 0.6284113 | 0.0527153 | 0.1624789 | 2.0766939 | 1.8743026 | 1.826388 |
rmse <- function(error)
{
sqrt(mean(error^2))
}
#error <- model$residuals
error <- as.numeric(train.data$subreddit) - as.numeric(predictions)
## Warning in as.numeric(train.data$subreddit) - as.numeric(predictions): longer
## object length is not a multiple of shorter object length
svrPredictionRMSE <- rmse(error)
svrPredictionRMSE
## [1] 0.6627942
RMSE = 66.4% R-Squared = Higher 0.70 for sent_neg, tfidf_depress, tfidf_anxieti, tfidf_stress, tfidf_suicid Adj R-Squared = Higher F-Statistic = Higher Std. Error = Closer to zero t-statistic = greater 1.96 for p-value to be less than 0.05 AIC = 4940.6733357
BIC = 4987.7926902