library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
data <- read_excel("C:/Users/mgmetz/Downloads/onlinecourse.xlsx")
str(data)
## tibble [94 × 13] (S3: tbl_df/tbl/data.frame)
## $ ID : chr [1:94] "respondent_1" "respondent_2" "respondent_3" "respondent_4" ...
## $ cstanding : num [1:94] 3 3 2 3 1 3 3 2 4 3 ...
## $ transfer : chr [1:94] "yes" "yes" "no" "yes" ...
## $ first : chr [1:94] "yes" "yes" "no" "yes" ...
## $ classpref : num [1:94] 3 7 7 6 3 1 7 6 3 1 ...
## $ stress : num [1:94] 4 7 7 6 7 2 7 5 5 2 ...
## $ academic : num [1:94] 4 6 7 6 6 1 4 2 2 1 ...
## $ relationships: num [1:94] 7 7 7 7 7 3 7 7 6 3 ...
## $ wellbeing : num [1:94] 5 6 7 7 3 1 5 7 1 1 ...
## $ gender : chr [1:94] "female" "female" "male" "female" ...
## $ race : chr [1:94] "latino" "asian" "latino" "latino" ...
## $ employed : chr [1:94] "unemployed" "part" "unemployed" "part" ...
## $ age : num [1:94] 21 19 19 19 25 21 20 19 21 21 ...
colnames(data)
## [1] "ID" "cstanding" "transfer" "first"
## [5] "classpref" "stress" "academic" "relationships"
## [9] "wellbeing" "gender" "race" "employed"
## [13] "age"
manifest_vars <- c("classpref", "stress", "academic", "relationships", "wellbeing")
data <- data %>%
mutate(depd = rowSums(select(., all_of(manifest_vars)), na.rm = TRUE))
head(data)
## # A tibble: 6 × 14
## ID cstanding transfer first classpref stress academic relationships
## <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 respondent_1 3 yes yes 3 4 4 7
## 2 respondent_2 3 yes yes 7 7 6 7
## 3 respondent_3 2 no no 7 7 7 7
## 4 respondent_4 3 yes yes 6 6 6 7
## 5 respondent_5 1 yes yes 3 7 6 7
## 6 respondent_6 3 yes yes 1 2 1 3
## # ℹ 6 more variables: wellbeing <dbl>, gender <chr>, race <chr>,
## # employed <chr>, age <dbl>, depd <dbl>
summary(data$depd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.00 22.25 27.00 26.16 31.00 35.00
library(corrr)
## Warning: package 'corrr' was built under R version 4.4.2
correlation_data <- data %>%
select(all_of(manifest_vars), depd)
correlation_matrix <- cor(correlation_data, use = "pairwise.complete.obs")
print(correlation_matrix)
## classpref stress academic relationships wellbeing depd
## classpref 1.0000000 0.5671495 0.5954152 0.6168661 0.5947816 0.8319547
## stress 0.5671495 1.0000000 0.6119831 0.3947068 0.6220138 0.7898455
## academic 0.5954152 0.6119831 1.0000000 0.4879970 0.7047065 0.8551581
## relationships 0.6168661 0.3947068 0.4879970 1.0000000 0.5860952 0.7153559
## wellbeing 0.5947816 0.6220138 0.7047065 0.5860952 1.0000000 0.8681474
## depd 0.8319547 0.7898455 0.8551581 0.7153559 0.8681474 1.0000000
correlation_table <- correlation_data %>%
correlate() %>%
fashion()
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
correlation_table
## term classpref stress academic relationships wellbeing depd
## 1 classpref .57 .60 .62 .59 .83
## 2 stress .57 .61 .39 .62 .79
## 3 academic .60 .61 .49 .70 .86
## 4 relationships .62 .39 .49 .59 .72
## 5 wellbeing .59 .62 .70 .59 .87
## 6 depd .83 .79 .86 .72 .87
library(psych)
reliability_data <- data %>%
select(all_of(manifest_vars))
cronbach_result <- psych::alpha(reliability_data)
cronbach_result
##
## Reliability analysis
## Call: psych::alpha(x = reliability_data)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.87 0.87 0.86 0.58 6.9 0.02 5.2 1.4 0.6
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.82 0.87 0.91
## Duhachek 0.83 0.87 0.91
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## classpref 0.84 0.84 0.81 0.57 5.3 0.024 0.0121 0.60
## stress 0.84 0.86 0.83 0.60 5.9 0.024 0.0048 0.60
## academic 0.83 0.84 0.81 0.56 5.2 0.027 0.0073 0.59
## relationships 0.86 0.87 0.83 0.62 6.4 0.023 0.0022 0.60
## wellbeing 0.82 0.83 0.80 0.55 4.8 0.028 0.0077 0.58
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## classpref 94 0.83 0.83 0.77 0.71 4.9 1.9
## stress 94 0.79 0.79 0.71 0.67 5.4 1.6
## academic 94 0.86 0.84 0.79 0.74 4.5 2.0
## relationships 94 0.72 0.76 0.67 0.62 6.3 1.1
## wellbeing 94 0.87 0.86 0.83 0.77 5.1 1.8
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 7 miss
## classpref 0.05 0.07 0.18 0.07 0.14 0.22 0.26 0
## stress 0.00 0.06 0.10 0.13 0.14 0.23 0.34 0
## academic 0.09 0.14 0.07 0.17 0.16 0.17 0.20 0
## relationships 0.01 0.00 0.02 0.01 0.18 0.18 0.60 0
## wellbeing 0.05 0.09 0.06 0.10 0.20 0.19 0.31 0
cronbach_result$total$raw_alpha
## [1] 0.8670815
cronbach_result$total$std.alpha
## [1] 0.8726627
independent_vars <- data %>%
select(classpref, stress, relationships)
head(independent_vars)
## # A tibble: 6 × 3
## classpref stress relationships
## <dbl> <dbl> <dbl>
## 1 3 4 7
## 2 7 7 7
## 3 7 7 7
## 4 6 6 7
## 5 3 7 7
## 6 1 2 3
descriptive_stats <- psych::describe(independent_vars)
descriptive_stats
## vars n mean sd median trimmed mad min max range skew
## classpref 1 94 4.86 1.89 5 5.01 2.97 1 7 6 -0.47
## stress 2 94 5.40 1.60 6 5.58 1.48 2 7 5 -0.68
## relationships 3 94 6.28 1.10 7 6.46 0.00 1 7 6 -1.94
## kurtosis se
## classpref -1.07 0.20
## stress -0.76 0.16
## relationships 4.90 0.11
data <- data %>%
mutate(depd = rowSums(select(., classpref, stress, relationships), na.rm = TRUE))
bivariate_data <- data %>%
select(classpref, stress, relationships, depd) # Include only relevant variables
head(bivariate_data)
## # A tibble: 6 × 4
## classpref stress relationships depd
## <dbl> <dbl> <dbl> <dbl>
## 1 3 4 7 14
## 2 7 7 7 21
## 3 7 7 7 21
## 4 6 6 7 19
## 5 3 7 7 17
## 6 1 2 3 6
correlation_matrix <- cor(bivariate_data, use = "pairwise.complete.obs")
print(correlation_matrix)
## classpref stress relationships depd
## classpref 1.0000000 0.5671495 0.6168661 0.9061107
## stress 0.5671495 1.0000000 0.3947068 0.8087657
## relationships 0.6168661 0.3947068 1.0000000 0.7553981
## depd 0.9061107 0.8087657 0.7553981 1.0000000
library(corrr)
bivariate_correlations <- bivariate_data %>%
correlate()
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
bivariate_correlations
## # A tibble: 4 × 5
## term classpref stress relationships depd
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 classpref NA 0.567 0.617 0.906
## 2 stress 0.567 NA 0.395 0.809
## 3 relationships 0.617 0.395 NA 0.755
## 4 depd 0.906 0.809 0.755 NA
ggplot(bivariate_data, aes(x = classpref, y = depd)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Classpref vs. depd", x = "Class Preference", y = "depd")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(bivariate_data, aes(x = stress, y = depd)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Stress vs. depd", x = "Stress", y = "depd")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(bivariate_data, aes(x = relationships, y = depd)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "green") +
labs(title = "Relationships vs. depd", x = "Relationships", y = "depd")
## `geom_smooth()` using formula = 'y ~ x'

data <- data %>%
mutate(depd = rowSums(select(., classpref, stress, relationships), na.rm = TRUE))
model_data <- data %>%
select(classpref, stress, relationships, depd)
str(model_data)
## tibble [94 × 4] (S3: tbl_df/tbl/data.frame)
## $ classpref : num [1:94] 3 7 7 6 3 1 7 6 3 1 ...
## $ stress : num [1:94] 4 7 7 6 7 2 7 5 5 2 ...
## $ relationships: num [1:94] 7 7 7 7 7 3 7 7 6 3 ...
## $ depd : num [1:94] 14 21 21 19 17 6 21 18 14 6 ...
model <- lm(depd ~ classpref + stress + relationships, data = model_data)
summary(model)
## Warning in summary.lm(model): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = depd ~ classpref + stress + relationships, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.491e-15 -2.151e-15 -9.100e-16 2.100e-17 7.979e-14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.932e-15 5.718e-15 5.130e-01 0.609
## classpref 1.000e+00 6.885e-16 1.453e+15 <2e-16 ***
## stress 1.000e+00 6.998e-16 1.429e+15 <2e-16 ***
## relationships 1.000e+00 1.061e-15 9.429e+14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.845e-15 on 90 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 5.834e+30 on 3 and 90 DF, p-value: < 2.2e-16
model_interaction <- lm(depd ~ classpref + stress * relationships, data = model_data)
summary(model_interaction)
## Warning in summary.lm(model_interaction): essentially perfect fit: summary may
## be unreliable
##
## Call:
## lm(formula = depd ~ classpref + stress * relationships, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.405e-15 -2.020e-15 -8.550e-16 3.120e-16 7.952e-14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.130e-15 1.521e-14 -3.370e-01 0.737
## classpref 1.000e+00 7.006e-16 1.427e+15 <2e-16 ***
## stress 1.000e+00 3.200e-15 3.125e+14 <2e-16 ***
## relationships 1.000e+00 2.589e-15 3.862e+14 <2e-16 ***
## stress:relationships -2.472e-16 5.245e-16 -4.710e-01 0.639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.883e-15 on 89 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 4.338e+30 on 4 and 89 DF, p-value: < 2.2e-16
ggplot(model_data, aes(x = stress, y = depd, color = relationships)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Interaction Between Stress and Relationships",
x = "Stress",
y = "Dependent Variable (depd)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

# 1. Residual Plot (Linearity and Homoscedasticity)
ggplot(model, aes(.fitted, .resid)) +
geom_point() +
geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
labs(title = "Residuals vs Fitted", x = "Fitted Values", y = "Residuals")

# 2. Normal Q-Q Plot (Normality of Residuals)
ggplot(model, aes(sample = .stdresid)) +
stat_qq() +
stat_qq_line() +
labs(title = "Normal Q-Q Plot", x = "Theoretical Quantiles", y = "Standardized Residuals")

# 3. Variance Inflation Factor (Multicollinearity)
library(car)
## Warning: package 'car' was built under R version 4.4.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.2
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
vif(model)
## Warning in summary.lm(object, ...): essentially perfect fit: summary may be
## unreliable
## classpref stress relationships
## 2.018644 1.481275 1.622032
r_squared <- summary(model)$r.squared
## Warning in summary.lm(model): essentially perfect fit: summary may be
## unreliable
adj_r_squared <- summary(model)$adj.r.squared
## Warning in summary.lm(model): essentially perfect fit: summary may be
## unreliable
cat("R-squared:", r_squared, "\n")
## R-squared: 1
cat("Adjusted R-squared:", adj_r_squared, "\n")
## Adjusted R-squared: 1
f_statistic <- summary(model)$fstatistic
## Warning in summary.lm(model): essentially perfect fit: summary may be
## unreliable
f_value <- f_statistic[1]
df1 <- f_statistic[2]
df2 <- f_statistic[3]
p_value <- pf(f_value, df1, df2, lower.tail = FALSE)
cat("F-Statistic:", f_value, "\n")
## F-Statistic: 5.834357e+30
cat("Degrees of Freedom 1 (Numerator):", df1, "\n")
## Degrees of Freedom 1 (Numerator): 3
cat("Degrees of Freedom 2 (Denominator):", df2, "\n")
## Degrees of Freedom 2 (Denominator): 90
cat("p-value:", p_value, "\n")
## p-value: 0