library(tidyverse)
library(psych)
# load data
data <- readr::read_csv("https://raw.githubusercontent.com/christianthieme/MSDS-DATA606/master/Analysis%20Project/depression.csv")
head(data)
## # A tibble: 6 x 50
## inter_dom Region Gender Academic Age Age_cate Stay Stay_Cate Japanese
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Inter SEA Male Grad 24 4 5 Long 3
## 2 Inter SEA Male Grad 28 5 1 Short 4
## 3 Inter SEA Male Grad 25 4 6 Long 4
## 4 Inter EA Female Grad 29 5 1 Short 2
## 5 Inter EA Female Grad 28 5 1 Short 1
## 6 Inter SEA Male Grad 24 4 6 Long 3
## # ... with 41 more variables: Japanese_cate <chr>, English <dbl>,
## # English_cate <chr>, Intimate <chr>, Religion <chr>, Suicide <chr>,
## # Dep <chr>, DepType <chr>, ToDep <dbl>, DepSev <chr>, ToSC <dbl>, APD <dbl>,
## # AHome <dbl>, APH <dbl>, Afear <dbl>, ACS <dbl>, AGuilt <dbl>,
## # AMiscell <dbl>, ToAS <dbl>, Partner <dbl>, Friends <dbl>, Parents <dbl>,
## # Relative <dbl>, Profess <dbl>, Phone <dbl>, Doctor <dbl>, Reli <dbl>,
## # Alone <dbl>, Others <dbl>, Internet <dbl>, Partner_bi <chr>,
## # Friends_bi <chr>, Parents_bi <chr>, Relative_bi <chr>,
## # Professional_bi <chr>, Phone_bi <chr>, Doctor_bi <chr>, religion_bi <chr>,
## # Alone_bi <chr>, Others_bi <chr>, Internet_bi <chr>
Which factors are most predictive of depression in international students? Are there differences between genders in depression rate? Does having an intimate relationship affect depression rate? Does being religious affect depression rate?
The cases are international and domestic students from an international university in Japan. There are 268 cases.
This data set was collected through survey as part of a research paper by Minh-Hoang Nguyen, Manh-Toan Ho, Quynh-Yen T. Nguyen, and Quan-HOang Voung in an effort to determine “Help-Seeking Behanviors in a Mulitcultural Environment”. This research was requested by the Research Office of Ritsumeikan Asia Pacific University.The study can be found here.
This is an observational study.
The data set can be downloaded here. For this project, I will store the data set in my GitHub account here.
The response variable is the “Total score of depression measured by PHQ-9”. PHQ-9 is a “Patient Health Questionnaire” used to assess/diagnose depression. More info about PHQ-9 can be found here. PHQ-9 scale is from 0-24, with 24 being severely depressed. The variable is numeric.
For my first question, the point of my analysis is to find which of the variables is the strongest predictor of depression (which is the independent variable with the greatest effect). There may be several variables that play a factor. I would like to find which one plays the largest role. The variables are as follows (all are numeric):
PHQ-9 summary data:
describe(data$ToDep)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 8.19 4.95 8 7.89 4.45 0 25 25 0.61 0.52 0.3
hist(data$ToDep)
Total social connectedness summary statistics:
describe(data$ToSC)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 37.47 9.23 40 38.44 10.38 8 48 40 -0.78 -0.11 0.56
Perceived descrimination:
describe(data$APD)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 15.41 6.17 16 14.88 7.41 8 39 31 0.67 0.16 0.38
Homesickness:
describe(data$AHome)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 9.61 4.01 9 9.44 4.45 4 20 16 0.26 -0.68 0.25
Perceived Hatred:
describe(data$APH)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 9.14 4.19 9 8.62 5.93 5 25 20 0.86 0.2 0.26
Fear:
describe(data$Afear)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 7.26 3.11 7 6.91 4.45 4 17 13 0.74 -0.2 0.19
Culture Shock:
describe(data$ACS)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 6.06 2.6 6 5.81 2.97 3 13 10 0.55 -0.49 0.16
Guilt:
describe(data$AGuilt)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 3.78 1.91 4 3.51 2.97 2 10 8 0.98 0.26 0.12
Miscellaneous:
describe(data$AMiscell)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 21.12 7.4 20 20.75 7.41 10 47 37 0.47 -0.03 0.45
Total score of Acculturative Stress:
describe(data$ToAS)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 268 72.38 22.64 72 71.5 23.72 36 145 109 0.35 -0.26 1.38
Correlation to depression:
cor(na.omit(data[,c(18,20:28)]))[,1]
## ToDep ToSC APD AHome APH Afear ACS
## 1.0000000 -0.5517953 0.3402557 0.1685867 0.3092537 0.3486300 0.2743267
## AGuilt AMiscell ToAS
## 0.3059476 0.3333420 0.3940945
barplot(cor(na.omit(data[,c(18,20:28)]))[,1])