library(tidyverse)
library(psych)

Data Preparation

# load data
data <- readr::read_csv("https://raw.githubusercontent.com/christianthieme/MSDS-DATA606/master/Analysis%20Project/depression.csv")
head(data)
## # A tibble: 6 x 50
##   inter_dom Region Gender Academic   Age Age_cate  Stay Stay_Cate Japanese
##   <chr>     <chr>  <chr>  <chr>    <dbl>    <dbl> <dbl> <chr>        <dbl>
## 1 Inter     SEA    Male   Grad        24        4     5 Long             3
## 2 Inter     SEA    Male   Grad        28        5     1 Short            4
## 3 Inter     SEA    Male   Grad        25        4     6 Long             4
## 4 Inter     EA     Female Grad        29        5     1 Short            2
## 5 Inter     EA     Female Grad        28        5     1 Short            1
## 6 Inter     SEA    Male   Grad        24        4     6 Long             3
## # ... with 41 more variables: Japanese_cate <chr>, English <dbl>,
## #   English_cate <chr>, Intimate <chr>, Religion <chr>, Suicide <chr>,
## #   Dep <chr>, DepType <chr>, ToDep <dbl>, DepSev <chr>, ToSC <dbl>, APD <dbl>,
## #   AHome <dbl>, APH <dbl>, Afear <dbl>, ACS <dbl>, AGuilt <dbl>,
## #   AMiscell <dbl>, ToAS <dbl>, Partner <dbl>, Friends <dbl>, Parents <dbl>,
## #   Relative <dbl>, Profess <dbl>, Phone <dbl>, Doctor <dbl>, Reli <dbl>,
## #   Alone <dbl>, Others <dbl>, Internet <dbl>, Partner_bi <chr>,
## #   Friends_bi <chr>, Parents_bi <chr>, Relative_bi <chr>,
## #   Professional_bi <chr>, Phone_bi <chr>, Doctor_bi <chr>, religion_bi <chr>,
## #   Alone_bi <chr>, Others_bi <chr>, Internet_bi <chr>

Research question

Which factors are most predictive of depression in international students? Are there differences between genders in depression rate? Does having an intimate relationship affect depression rate? Does being religious affect depression rate?

Cases

The cases are international and domestic students from an international university in Japan. There are 268 cases.

Data collection

This data set was collected through survey as part of a research paper by Minh-Hoang Nguyen, Manh-Toan Ho, Quynh-Yen T. Nguyen, and Quan-HOang Voung in an effort to determine “Help-Seeking Behanviors in a Mulitcultural Environment”. This research was requested by the Research Office of Ritsumeikan Asia Pacific University.The study can be found here.

Type of study

This is an observational study.

Data Source

The data set can be downloaded here. For this project, I will store the data set in my GitHub account here.

Response Variable

The response variable is the “Total score of depression measured by PHQ-9”. PHQ-9 is a “Patient Health Questionnaire” used to assess/diagnose depression. More info about PHQ-9 can be found here. PHQ-9 scale is from 0-24, with 24 being severely depressed. The variable is numeric.

Explanatory Variable(s)

For my first question, the point of my analysis is to find which of the variables is the strongest predictor of depression (which is the independent variable with the greatest effect). There may be several variables that play a factor. I would like to find which one plays the largest role. The variables are as follows (all are numeric):

Relevant summary statistics

PHQ-9 summary data:

describe(data$ToDep)
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis  se
## X1    1 268 8.19 4.95      8    7.89 4.45   0  25    25 0.61     0.52 0.3
hist(data$ToDep)

Total social connectedness summary statistics:

describe(data$ToSC)
##    vars   n  mean   sd median trimmed   mad min max range  skew kurtosis   se
## X1    1 268 37.47 9.23     40   38.44 10.38   8  48    40 -0.78    -0.11 0.56

Perceived descrimination:

describe(data$APD)
##    vars   n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 268 15.41 6.17     16   14.88 7.41   8  39    31 0.67     0.16 0.38

Homesickness:

describe(data$AHome)
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 268 9.61 4.01      9    9.44 4.45   4  20    16 0.26    -0.68 0.25

Perceived Hatred:

describe(data$APH)
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 268 9.14 4.19      9    8.62 5.93   5  25    20 0.86      0.2 0.26

Fear:

describe(data$Afear)
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 268 7.26 3.11      7    6.91 4.45   4  17    13 0.74     -0.2 0.19

Culture Shock:

describe(data$ACS)
##    vars   n mean  sd median trimmed  mad min max range skew kurtosis   se
## X1    1 268 6.06 2.6      6    5.81 2.97   3  13    10 0.55    -0.49 0.16

Guilt:

describe(data$AGuilt)
##    vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 268 3.78 1.91      4    3.51 2.97   2  10     8 0.98     0.26 0.12

Miscellaneous:

describe(data$AMiscell)
##    vars   n  mean  sd median trimmed  mad min max range skew kurtosis   se
## X1    1 268 21.12 7.4     20   20.75 7.41  10  47    37 0.47    -0.03 0.45

Total score of Acculturative Stress:

describe(data$ToAS)
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 268 72.38 22.64     72    71.5 23.72  36 145   109 0.35    -0.26 1.38

Correlation to depression:

cor(na.omit(data[,c(18,20:28)]))[,1]
##      ToDep       ToSC        APD      AHome        APH      Afear        ACS 
##  1.0000000 -0.5517953  0.3402557  0.1685867  0.3092537  0.3486300  0.2743267 
##     AGuilt   AMiscell       ToAS 
##  0.3059476  0.3333420  0.3940945
barplot(cor(na.omit(data[,c(18,20:28)]))[,1])