#install.packages("tidyverse")
library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
socailmedia_2019_features <- read.csv(file = '/Users/pallavisaitu/Downloads/socailmedia_2019_features.csv', sep=",")
# View(socailmedia_2019_features)
dim(socailmedia_2019_features)
## [1] 7742   35
head(socailmedia_2019_features)
##    subreddit           author   date
## 1 depression   anonaccount131 1/1/19
## 2 depression     gimlis_beard 1/1/19
## 3 depression      WreckDotNet 1/1/19
## 4 depression danieltargaryean 1/1/19
## 5 depression      emmanuel169 1/1/19
## 6 depression         Lunakinn 1/1/19
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        post
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  Anyone else feel like they're the stupidest, most inept sack of shit in the world I used to actually be smart and able back in high school, now I can't do fucking anything. I look at a piece of work/theory and don't even understand it, thinking where tf do I start
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        Craving validation from others while immediately rejecting anything positive that other people say about me is a special kind of hell I have no confidence in myself, especially about my physical appearance, so i often look to others for things I can be positive about. However, I imeadiately shoot down any complement with statements like, "It isn't actually all that impressive," or, "they are only saying that to placate me." It feels like I'm just not allowed to feel happy about anything I do. 
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Calling the distress line while living at home? How? How can I?  \nI can't really afford therapy or anything so this is like my last resort. But I live at home, and I can't go out in public to do so.\n\n&amp;#x200B;
## 4 Only been here less than a month but I'm ready to unsubscribe Starting this year, I want time to focus on improving myself. I don't want to be depressed 24/7 anymore. I'm done with people treating me like shit. I'm still my own worst enemy but I'm hoping that will change this year. I already delete social media from my phone, save reddit and messenger, and I hope that this will improve my life somehow. I know everyone here seems to have lost hope and I have too but I don't know, I just hope someone will read this and decide to try and change with me. 2018 was fucking trash and honestly do i expect 2019 to be different? No, but it's not going to be because I didn't try. I wish all of you the best in 2019 and I hope we all live to see 2020. \n\nP.S. If you feel suicidal, a pet might help. I have had suicidal thoughts but I couldn't go through with it knowing I not only leave my parents but my best friend as well. Try looking into it guys. Have a great 2019.
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Anyone just want someone to talk to? I\x89۪m stuck in the desert because I was forced to join the military and am now stuck with 1000\x89۪s who live so loosely and find joy in anything and then there is me. So any type of conversation would be nice. Venting ranting or just light conversation. I can listen and talk. Anyone in need of help I can offer only an ear but I am here. 
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    How do you know you're on the right medication? I hated my last medication, it seemed to make everything worse and my doctor wouldnt listen to me. When I said it was wrong, he just doubled the dose. So for months I endured it and waited for my anxiety to calm down then ended those attrocious pills with my doctor's approval. \n\nI dont suffer from anxiety anymore, and my mood is much better, but I'm not sure I'm out of this place yet, how do you know?
##   automated_readability_index coleman_liau_index flesch_kincaid_grade_level
## 1                  10.2180000          5.4667287                   9.938636
## 2                   9.1317442          9.8955198                   9.778465
## 3                  -1.1598701          0.8734051                   1.879610
## 4                   0.8299729          3.0467947                   2.356266
## 5                   1.3543421          3.5983855                   2.865000
## 6                   4.2483871          4.6717075                   5.553925
##   flesch_reading_ease gulpease_index gunning_fog_index      lix smog_index
## 1            72.78795       61.90909         13.909091 38.40909  11.208143
## 2            55.59095       59.81395         13.391628 41.61860  12.688353
## 3            92.78227      100.36364          4.332468 17.64935   6.182691
## 4            94.44219       84.27363          5.445590 20.57915   6.782985
## 5            91.44250       82.28947          5.378947 21.34211   6.627428
## 6            82.85089       70.29032          8.780645 26.25269   8.841846
##   wiener_sachtextformel n_chars n_long_words n_monosyllable_words
## 1           3.810272727     209            6                   46
## 2           6.188630698     401           21                   56
## 3           0.002562338     160            5                   34
## 4           0.302550249     755           23                  166
## 5           0.345505263     291            9                   62
## 6           1.722513978     354           10                   75
##   n_polysyllable_words n_sents n_syllables n_unique_words n_words sent_neg
## 1                    4       2          69             45      55    0.129
## 2                   14       5         136             67      86    0.121
## 3                    2       7          56             35      44    0.090
## 4                    9      22         245            117     201    0.157
## 5                    3       8          95             55      76    0.066
## 6                    6       6         119             65      93    0.132
##   sent_neu sent_pos sent_compound economic_stress_total domestic_stress_total
## 1    0.775    0.096       -0.4215                     0                     0
## 2    0.663    0.217        0.8658                     2                     0
## 3    0.847    0.063       -0.2906                     1                     0
## 4    0.559    0.284        0.9846                     2                     0
## 5    0.817    0.117        0.4404                     0                     0
## 6    0.787    0.082       -0.5615                     0                     0
##   suicidality_total liwc_negative_emotion liwc_social_processes tfidf_anxieti
## 1                 0                     2                     0     0.0000000
## 2                 0                     2                     3     0.0000000
## 3                 0                     1                     2     0.0000000
## 4                 0                     5                     9     0.0000000
## 5                 0                     0                     8     0.0000000
## 6                 0                     3                     5     0.2973842
##   tfidf_depress tfidf_stress tfidf_struggl tfidf_suicid
## 1    0.00000000            0             0    0.0000000
## 2    0.00000000            0             0    0.0000000
## 3    0.00000000            0             0    0.0000000
## 4    0.09623711            0             0    0.2444397
## 5    0.00000000            0             0    0.0000000
## 6    0.00000000            0             0    0.0000000
#summary(socailmedia_2019_features)
#str(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features))
## [1] 0
#sum(is.na(socailmedia_2019_features))
sum(is.na(socailmedia_2019_features))
## [1] 0
# Omit NAs
socailmedia_2019_features_clean<- na.omit(socailmedia_2019_features)
sum(is.na(socailmedia_2019_features_clean))
## [1] 0

Step 1. Normalization

#install.packages("caret")
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
preProcValues <- preProcess(socailmedia_2019_features_clean, method = c("center", "scale"))

Step 2. Choosing a Regression Model

Nominal logistic regression models the relationship between a set of independent variables and a nominal dependent variable. A nominal variable has at least three groups which do not have a natural order, such as scratch, dent, and tear.

Step 3. Train/Test Split

library(caret)
trainIndex <- createDataPartition(y = socailmedia_2019_features_clean$subreddit, p = .8, 
                                  list = FALSE, 
                                  times = 1)
train.data <- socailmedia_2019_features_clean[ trainIndex,] 
test.data  <- socailmedia_2019_features_clean[-trainIndex,]

Step 4. Building Model

#install.packages("nnet")
library(nnet)
train.data$subreddit <- relevel(train.data$subreddit, ref = "depression")
model <- nnet::multinom(subreddit ~ sent_neg + economic_stress_total + suicidality_total  + tfidf_anxieti + tfidf_stress + tfidf_suicid, data = train.data)
## # weights:  24 (14 variable)
## initial  value 6804.804516 
## iter  10 value 3035.201009
## iter  20 value 2937.351797
## final  value 2937.097270 
## converged

Step 5. Model Summary

## print summary interpretation table
summary(model)
## Call:
## nnet::multinom(formula = subreddit ~ sent_neg + economic_stress_total + 
##     suicidality_total + tfidf_anxieti + tfidf_stress + tfidf_suicid, 
##     data = train.data)
## 
## Coefficients:
##         (Intercept)  sent_neg economic_stress_total suicidality_total
## anxiety   -2.093620 -3.764962           -0.09808071        -0.9719750
## lonely    -1.220264 -4.550001           -0.26423708        -0.7835405
##         tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety     16.002402     5.423802    -5.490855
## lonely      -8.557884    -2.753378    -5.748509
## 
## Std. Errors:
##         (Intercept)  sent_neg economic_stress_total suicidality_total
## anxiety   0.1167143 0.6927750            0.03822321         0.1776349
## lonely    0.1000413 0.6284113            0.05271532         0.1624789
##         tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety     0.5875291    0.8213883     1.761306
## lonely      2.0766939    1.8743026     1.826388
## 
## Residual Deviance: 5874.195 
## AIC: 5902.195

Step 6. Predictions

predictions <- predict(model,test.data)

Step 7. Evaluation

#install.packages('DescTools')
library(DescTools)
## 
## Attaching package: 'DescTools'
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
r.glm <- glm(subreddit ~ sent_neg + economic_stress_total  + suicidality_total  + tfidf_anxieti + tfidf_stress + tfidf_suicid, data = train.data, family=binomial)
PseudoR2(r.glm)
##  McFadden 
## 0.1971462
PseudoR2(r.glm, which='all')
##        McFadden     McFaddenAdj        CoxSnell      Nagelkerke   AldrichNelson 
##       0.1971462       0.1948440       0.1759792       0.2814017       0.1621700 
## VeallZimmermann           Efron McKelveyZavoina            Tjur             AIC 
##       0.3273449       0.2198631       0.4111034       0.2183098    4896.4052969 
##             BIC          logLik         logLik0              G2 
##    4943.5246514   -2441.2026485   -3040.6564139    1198.9075309
mlogit_output1 <- summary(model)
#mlogit_output1$coefficients
#mlogit_output1$standard.errors
z <- mlogit_output1$coefficients/mlogit_output1$standard.errors
# 2-tailed z test
p <- (1-pnorm(abs(z),0,1))*2
p
##         (Intercept)     sent_neg economic_stress_total suicidality_total
## anxiety           0 5.491652e-08          1.028792e-02      4.455949e-08
## lonely            0 4.471978e-13          5.371916e-07      1.418306e-06
##         tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety  0.000000e+00 4.023426e-11  0.001823974
## lonely   3.773666e-05 1.418288e-01  0.001646874
Pquality5 <- rbind(mlogit_output1$coefficients,mlogit_output1$standard.errors)
Pquality5
##         (Intercept)   sent_neg economic_stress_total suicidality_total
## anxiety  -2.0936203 -3.7649617           -0.09808071        -0.9719750
## lonely   -1.2202643 -4.5500007           -0.26423708        -0.7835405
## anxiety   0.1167143  0.6927750            0.03822321         0.1776349
## lonely    0.1000413  0.6284113            0.05271532         0.1624789
##         tfidf_anxieti tfidf_stress tfidf_suicid
## anxiety    16.0024024    5.4238018    -5.490855
## lonely     -8.5578844   -2.7533780    -5.748509
## anxiety     0.5875291    0.8213883     1.761306
## lonely      2.0766939    1.8743026     1.826388
rownames(Pquality5) <- c("Coefficient","Std. Errors","z stat","p value")
knitr::kable(Pquality5)
(Intercept) sent_neg economic_stress_total suicidality_total tfidf_anxieti tfidf_stress tfidf_suicid
Coefficient -2.0936203 -3.7649617 -0.0980807 -0.9719750 16.0024024 5.4238018 -5.490855
Std. Errors -1.2202643 -4.5500007 -0.2642371 -0.7835405 -8.5578844 -2.7533780 -5.748509
z stat 0.1167143 0.6927750 0.0382232 0.1776349 0.5875291 0.8213883 1.761305
p value 0.1000413 0.6284113 0.0527153 0.1624789 2.0766939 1.8743026 1.826388
rmse <- function(error)
{
  sqrt(mean(error^2))
}
 
#error <- model$residuals
error <- as.numeric(train.data$subreddit) - as.numeric(predictions)
## Warning in as.numeric(train.data$subreddit) - as.numeric(predictions): longer
## object length is not a multiple of shorter object length
svrPredictionRMSE <- rmse(error)
svrPredictionRMSE
## [1] 0.6627942

Step 8. Results

RMSE = 66.4% R-Squared = Higher 0.70 for sent_neg, tfidf_depress, tfidf_anxieti, tfidf_stress, tfidf_suicid Adj R-Squared = Higher F-Statistic = Higher Std. Error = Closer to zero t-statistic = greater 1.96 for p-value to be less than 0.05 AIC = 4940.6733357
BIC = 4987.7926902