#assign section information
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
s1=read.csv("Bio1010S1.csv")
s2=read.csv("Bio1010S2.csv")
s2 <- s2[, -4]
user.treatment <- bind_rows(list(s1 = s1, s2 = s2), .id = "source")
section_info <- unique(user.treatment)
quizzes=read.csv("Quiz_data_combined.csv") #228 unique users with quiz data
#Create a "type" column that labels each question as either an "attention" question or a "viewpoint"
quizzes$Type <- ifelse(grepl("TRUE|FALSE", quizzes$Response.Text, ignore.case = TRUE), "attention", "viewpoint")
quizzes$User.Email <- substr(quizzes$User.Email, 1, 7)
#add in user data for each occurrence
quizzes <- merge(quizzes, section_info[, c("SIS.Login.ID", "source")], by.x = "User.Email", by.y = "SIS.Login.ID", all.x = TRUE)
#Assess quiz responses for engagement using the "attention" check questions
###subset quiz data to only include the attention check questions
att.check=subset(quizzes, Type == "attention")
#Create a "score" column, where if their response matches the correct response, we give them a score of 1, and otherwise they get a score of 0.
att.check$score <- ifelse(att.check$Correct. == "TRUE", 1, 0)
#get the average score for each section
att.check %>%
group_by(Topic, Section) %>%
summarize(average_score = mean(score))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 3
## # Groups: Topic [6]
## Topic Section average_score
## <chr> <chr> <dbl>
## 1 BioDiv Control 0.769
## 2 BioDiv IA 0.863
## 3 Digestion Control 1
## 4 Digestion IA 0.993
## 5 EnvInjustice Control 0.887
## 6 EnvInjustice IA 0.981
## 7 MolBiology Control 0.927
## 8 MolBiology IA 0.705
## 9 Respiration Control 0.787
## 10 Respiration IA 0.738
## 11 Vaccines Control 0.815
## 12 Vaccines IA 0.865
att.check %>%
group_by(Section) %>%
summarize(average_score = mean(score))
## # A tibble: 2 × 2
## Section average_score
## <chr> <dbl>
## 1 Control 0.852
## 2 IA 0.839
# Calculate average score per section grouped by topic and section
avg_scores <- att.check %>%
group_by(Topic, Section) %>%
summarize(avg_score = mean(score, na.rm = TRUE))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
# Create a ggplot object to visualize the data
plot <- ggplot(avg_scores, aes(x = Section, y = avg_score)) +
geom_bar(stat = "identity", position = "dodge", fill = "blue") +
facet_wrap(~ Topic, scales = "free") +
labs(title = "Average Score per Section Faceted by Topic",
x = "Section", y = "Average Score") +
theme_minimal()
# Display the plot
print(plot)
summary(lme(fixed = score ~ Section, random = ~ 1 | User.Email, data = att.check))
## Linear mixed-effects model fit by REML
## Data: att.check
## AIC BIC logLik
## 1852.5 1875.401 -922.2498
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.04760064 0.3595739
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8519613 0.01315351 2040 64.77062 0.0000
## SectionIA -0.0127315 0.01684294 225 -0.75590 0.4505
## Correlation:
## (Intr)
## SectionIA -0.781
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3950415 0.3748525 0.4157131 0.4565736 0.6311915
##
## Number of Observations: 2267
## Number of Groups: 227
split_data <- split(att.check, att.check$Topic)
# Function to fit a linear mixed-effects model for each topic
fit_model <- function(df) {
lme(fixed = score ~ Section, random = ~ 1 | User.Email, data = df)
}
# Apply the function to each subset of data and store the models in a list
models <- lapply(split_data, fit_model)
# To see the summary for each model
summaries <- lapply(models, summary)
# Optional: print summaries for review
summaries
## $BioDiv
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 379.2015 395.2269 -185.6008
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.21428e-05 0.377255
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7687500 0.02982463 204 25.775680 0.0000
## SectionIA 0.0941532 0.03825421 202 2.461251 0.0147
## Correlation:
## (Intr)
## SectionIA -0.78
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.2873209 0.3634061 0.3634061 0.6129806 0.6129806
##
## Number of Observations: 408
## Number of Groups: 204
##
## $Digestion
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -553.6341 -540.0234 280.817
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.0626107 0.02347944
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 1.0000000 0.007128196 222 140.28796 0.0000
## SectionIA -0.0073529 0.009148169 222 -0.80376 0.4224
## Correlation:
## (Intr)
## SectionIA -0.779
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.212435e+00 3.782793e-14 3.861063e-02 3.861063e-02 3.861063e-02
##
## Number of Observations: 224
## Number of Groups: 224
##
## $EnvInjustice
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -21.44766 -6.050361 14.72383
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.02535476 0.2271112
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8873239 0.01929483 174 45.98766 0e+00
## SectionIA 0.0933535 0.02505320 173 3.72621 3e-04
## Correlation:
## (Intr)
## SectionIA -0.77
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.26657464 0.08301035 0.08301035 0.48406097 0.53760489
##
## Number of Observations: 349
## Number of Groups: 175
##
## $MolBiology
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 433.7067 449.9244 -212.8533
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 4.512674e-06 0.393839
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.9268293 0.03075366 214 30.137204 0
## SectionIA -0.2222838 0.03915766 212 -5.676637 0
## Correlation:
## (Intr)
## SectionIA -0.785
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3533203 0.1857884 0.1857884 0.7501912 0.7501912
##
## Number of Observations: 428
## Number of Groups: 214
##
## $Respiration
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 516.3417 532.6339 -254.1709
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.652571e-05 0.4292832
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7869822 0.03302179 217 23.832212 0.0000
## SectionIA -0.0491545 0.04219764 217 -1.164864 0.2454
## Correlation:
## (Intr)
## SectionIA -0.783
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.8332471 0.4962173 0.4962173 0.6107210 0.6107210
##
## Number of Observations: 436
## Number of Groups: 219
##
## $Vaccines
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 354.685 370.846 -173.3425
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 8.920891e-06 0.3609937
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8148148 0.02836234 211 28.728756 0.0000
## SectionIA 0.0505698 0.03613363 209 1.399522 0.1631
## Correlation:
## (Intr)
## SectionIA -0.785
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3972292 0.3729023 0.3729023 0.5129873 0.5129873
##
## Number of Observations: 422
## Number of Groups: 211
#Remove data for each user by topic if they have an attention check of “0”.
# Sum total scores for each "User.Email" by "Topic"
total_scores <- att.check %>%
group_by(User.Email, Topic) %>%
summarize(total_score = sum(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'User.Email'. You can override using the
## `.groups` argument.
# Identify users and topics where the score is zero
zero_scores <- total_scores %>%
filter(total_score == 0)
# Remove these users' data for topics where they scored zero
att.check <- att.check %>%
anti_join(zero_scores, by = c("User.Email", "Topic"))
# Calculate sum total scores for each "User.Email" by "topic" in the filtered data
#Check to make sure none of these are 0
sum_scores <- att.check %>%
group_by(User.Email, Topic) %>%
summarize(total_score = sum(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'User.Email'. You can override using the
## `.groups` argument.
#get the average score for each section
att.check %>%
group_by(Topic, Section) %>%
summarize(average_score = mean(score))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 3
## # Groups: Topic [6]
## Topic Section average_score
## <chr> <chr> <dbl>
## 1 BioDiv Control 0.778
## 2 BioDiv IA 0.877
## 3 Digestion Control 1
## 4 Digestion IA 1
## 5 EnvInjustice Control 0.9
## 6 EnvInjustice IA 0.981
## 7 MolBiology Control 0.95
## 8 MolBiology IA 0.710
## 9 Respiration Control 0.796
## 10 Respiration IA 0.749
## 11 Vaccines Control 0.825
## 12 Vaccines IA 0.886
att.check %>%
group_by(Section) %>%
summarize(average_score = mean(score))
## # A tibble: 2 × 2
## Section average_score
## <chr> <dbl>
## 1 Control 0.864
## 2 IA 0.850
library(nlme)
summary(lme(score ~ Section, random = ~ 1 | User.Email, data = att.check))
## Linear mixed-effects model fit by REML
## Data: att.check
## AIC BIC logLik
## 1697.693 1720.543 -844.8465
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.0001214032 0.3519632
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8636884 0.01191216 2011 72.50480 0.0000
## SectionIA -0.0138716 0.01525296 225 -0.90944 0.3641
## Correlation:
## (Intr)
## SectionIA -0.781
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.4539174 0.3872892 0.4267008 0.4267012 0.4267021
##
## Number of Observations: 2238
## Number of Groups: 227
split_data <- split(att.check, att.check$Topic)
# Function to fit a linear mixed-effects model for each topic
fit_model <- function(df) {
lme(fixed = score ~ Section, random = ~ 1 | User.Email, data = df)
}
# Apply the function to each subset of data and store the models in a list
models <- lapply(split_data, fit_model)
# To see the summary for each model
summaries <- lapply(models, summary)
# Optional: print summaries for review
summaries
## $BioDiv
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 349.4332 365.399 -170.7166
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 9.492587e-06 0.3659177
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7784810 0.02911085 201 26.741953 0.000
## SectionIA 0.0985682 0.03736569 199 2.637932 0.009
## Correlation:
## (Intr)
## SectionIA -0.779
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3968481 0.3360067 0.3360067 0.6053792 0.6053792
##
## Number of Observations: 402
## Number of Groups: 201
##
## $Digestion
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -15503.43 -15489.83 7755.714
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.331307e-16 2.777743e-17
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 1 1.44974e-17 221 6.897788e+16 0.0000
## SectionIA 0 1.86327e-17 221 -1.000000e+00 0.4207
## Correlation:
## (Intr)
## SectionIA -0.778
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## 0 0 0 0 0
##
## Number of Observations: 223
## Number of Groups: 223
##
## $EnvInjustice
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -51.04712 -35.67294 29.52356
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.484812e-05 0.2188422
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.9000000 0.01849554 173 48.66038 0e+00
## SectionIA 0.0806763 0.02394675 172 3.36899 9e-04
## Correlation:
## (Intr)
## SectionIA -0.772
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.48120344 0.08829957 0.08829957 0.45695030 0.45695032
##
## Number of Observations: 347
## Number of Groups: 174
##
## $MolBiology
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 404.0105 420.1715 -198.0052
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.207599e-05 0.3828284
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.9500000 0.03026525 211 31.389139 0
## SectionIA -0.2400763 0.03841048 209 -6.250281 0
## Correlation:
## (Intr)
## SectionIA -0.788
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.4815293 0.1306068 0.1306068 0.7577189 0.7577189
##
## Number of Observations: 422
## Number of Groups: 211
##
## $Respiration
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 496.4418 512.6782 -244.2209
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.523438e-05 0.4228163
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7964072 0.03271851 214 24.341184 0.0000
## SectionIA -0.0473578 0.04183597 214 -1.131987 0.2589
## Correlation:
## (Intr)
## SectionIA -0.782
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.8835774 0.4815160 0.4815160 0.5935215 0.5935215
##
## Number of Observations: 430
## Number of Groups: 216
##
## $Vaccines
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 308.808 324.8921 -150.404
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.685284e-05 0.3441218
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8250000 0.02720522 207 30.325062 0.0000
## SectionIA 0.0608268 0.03473246 205 1.751295 0.0814
## Correlation:
## (Intr)
## SectionIA -0.783
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.5741660 0.3317814 0.3317814 0.5085408 0.5085408
##
## Number of Observations: 414
## Number of Groups: 207
library(ggplot2)
# Calculate average score per section grouped by topic and section
avg_scores <- att.check %>%
group_by(Topic, Section) %>%
summarize(avg_score = mean(score, na.rm = TRUE))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
# Create a ggplot object to visualize the data
plot <- ggplot(avg_scores, aes(x = Section, y = avg_score)) +
geom_bar(stat = "identity", position = "dodge", fill = "blue") +
facet_wrap(~ Topic, scales = "free") +
labs(title = "Average Score per Section Faceted by Topic",
x = "Section", y = "Average Score") +
theme_minimal()
# Display the plot
print(plot)
#Assess viewpoint questions only
viewpoint=subset(quizzes, Type == "viewpoint")
#Remove all user info on a per topic basis if they scored a "0" on the attention check for that topic
viewpoint <- viewpoint %>%
anti_join(zero_scores, by = c("User.Email", "Topic"))
###add in a column called "score" with likert ranging from 1-5 (strongly disagree to strongly agree)
viewpoint$score <- ifelse(grepl("Neither Agree nor Disagree", viewpoint$Response.Text, ignore.case = TRUE), 3,
ifelse(grepl("Agree", viewpoint$Response.Text, ignore.case = TRUE) & !grepl("Disagree", viewpoint$Response.Text, ignore.case = TRUE), 4,
ifelse(grepl("Strongly Agree", viewpoint$Response.Text, ignore.case = TRUE), 5,
ifelse(grepl("Strongly Disagree", viewpoint$Response.Text, ignore.case = TRUE), 1,
ifelse(grepl("Disagree", viewpoint$Response.Text, ignore.case = TRUE), 2, NA)))))
#Calculate the average score per question per section
average_scores <- viewpoint %>%
group_by(Section, Question.Text, Topic) %>%
summarize(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Section', 'Question.Text'. You can
## override using the `.groups` argument.
#calculate the difference for each question between sections
library(tidyr)
# Assuming that `average_scores` is the data frame obtained from your previous calculations
# Ensure that there are exactly two Section subcategories per Question.Text
# Pivot to have each Section in separate columns
pivoted_scores <- average_scores %>%
pivot_wider(names_from = Section, values_from = avg_score)
# Check the column names to confirm the two subcategories are present
print(colnames(pivoted_scores))
## [1] "Question.Text" "Topic" "Control" "IA"
# Assuming the two subcategories are, for example, 'Section1' and 'Section2'
# Calculate the difference between them for each Question.Text
pivoted_scores <- pivoted_scores %>%
mutate(score_difference = IA - Control)
# View the results with differences
print(pivoted_scores)
## # A tibble: 47 × 5
## Question.Text Topic Control IA score_difference
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 "Active advocacy for and support of pol… BioD… 3.81 NA NA
## 2 "Choosing to consume products that are … BioD… 3.91 NA NA
## 3 "Communities affected by environmental … EnvI… 3.81 3.87 0.0595
## 4 "Dietary supplements, including vitamin… Dige… 3.29 3.75 0.465
## 5 "Economic development justifies environ… EnvI… 2.97 3.03 0.0577
## 6 "Editing the human genome is a risk to … MolB… 3.75 3.84 0.0885
## 7 "Education about the food web and human… BioD… 3.86 3.85 -0.00773
## 8 "Environmental injustices are primarily… EnvI… 3.63 3.69 0.0607
## 9 "Ethical considerations, such as equity… Vacc… 3.81 3.63 -0.183
## 10 "Gene editing technology has already sa… MolB… 3.55 NA NA
## # ℹ 37 more rows
# library(DT)
#
# # Create an interactive HTML table with DT
# datatable(
# pivoted_scores,
# options = list(
# pageLength = 57, # Adjust this to change the number of rows per page
# scrollX = TRUE, # Enables horizontal scrolling if necessary
# columnDefs = list(
# list(targets = 0:ncol(pivoted_scores), className = 'wrap') # Apply text wrapping
# )
# )
# )
pivoted_scores$AbsValDiff <- abs(pivoted_scores$score_difference)
average_abs_val_diff <- aggregate(AbsValDiff ~ Topic, data = pivoted_scores, mean)
average_scores_topic <- viewpoint %>%
group_by(Section, Topic) %>%
summarize(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Section'. You can override using the
## `.groups` argument.
#calculate the difference for each question between sections
library(tidyr)
# Assuming that `average_scores` is the data frame obtained from your previous calculations
# Ensure that there are exactly two Section subcategories per Question.Text
# Pivot to have each Section in separate columns
pivoted_scores <- average_scores %>%
pivot_wider(names_from = Section, values_from = avg_score)
# Check the column names to confirm the two subcategories are present
print(colnames(pivoted_scores))
## [1] "Question.Text" "Topic" "Control" "IA"
# Assuming the two subcategories are, for example, 'Section1' and 'Section2'
# Calculate the difference between them for each Question.Text
pivoted_scores <- pivoted_scores %>%
mutate(score_difference = IA - Control)
# View the results with differences
print(pivoted_scores)
## # A tibble: 47 × 5
## Question.Text Topic Control IA score_difference
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 "Active advocacy for and support of pol… BioD… 3.81 NA NA
## 2 "Choosing to consume products that are … BioD… 3.91 NA NA
## 3 "Communities affected by environmental … EnvI… 3.81 3.87 0.0595
## 4 "Dietary supplements, including vitamin… Dige… 3.29 3.75 0.465
## 5 "Economic development justifies environ… EnvI… 2.97 3.03 0.0577
## 6 "Editing the human genome is a risk to … MolB… 3.75 3.84 0.0885
## 7 "Education about the food web and human… BioD… 3.86 3.85 -0.00773
## 8 "Environmental injustices are primarily… EnvI… 3.63 3.69 0.0607
## 9 "Ethical considerations, such as equity… Vacc… 3.81 3.63 -0.183
## 10 "Gene editing technology has already sa… MolB… 3.55 NA NA
## # ℹ 37 more rows
library(DT)
#
# # Create an interactive HTML table with DT
# datatable(
# pivoted_scores,
# options = list(
# pageLength = 57, # Adjust this to change the number of rows per page
# scrollX = TRUE, # Enables horizontal scrolling if necessary
# columnDefs = list(
# list(targets = 0:ncol(pivoted_scores), className = 'wrap') # Apply text wrapping
# )
# )
# )
library(lavaan)
## This is lavaan 0.6-21
## lavaan is FREE software! Please report any bugs.
surv=read.csv("survey.quant.final.csv")
#remove any empty rows
surv <- surv[apply(surv, 1, function(x) !all(x == "" | is.na(x))), ]
#Remove rows with additional question text (non-data rows)
surv <- surv[-c(2, 3), ]
#only keep data rows who complete the survey and who have consented to participation
surv=subset(surv, Consent == "I Consent")
surv=subset(surv, Progress == "100")
#change construct likert scales to numeric
surv[, 88:146] <- lapply(surv[, 88:146], as.numeric)
#Q141 is the studentID in email form
surv$Email <- tolower(substr(surv$Email, 1, 7))
surv <- merge(surv, section_info[, c("SIS.Login.ID", "source")], by.x = "Email", by.y = "SIS.Login.ID", all.x = TRUE)
# CE.1 through CE.8
# reverse #5-8
columns_to_reverse <- paste0("CE.", 5:8)
surv[columns_to_reverse] <- 8 - surv[columns_to_reverse]
#Create model that contains all questions related to grit
(CE.model <- '
CogEng =~ CE.1 + CE.2 + CE.3 + CE.4 + CE.5 + CE.6 + CE.7 + CE.8')
## [1] "\nCogEng =~ CE.1 + CE.2 + CE.3 + CE.4 + CE.5 + CE.6 + CE.7 + CE.8"
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionCE <- cfa(CE.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionCE, fit.measures=T, standardized=TRUE)
## lavaan 0.6-21 ended normally after 31 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 16
##
## Used Total
## Number of observations 292 392
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 268.246 175.429
## Degrees of freedom 20 20
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.529
## Satorra-Bentler correction
##
## Model Test Baseline Model:
##
## Test statistic 840.594 652.303
## Degrees of freedom 28 28
## P-value 0.000 0.000
## Scaling correction factor 1.289
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.695 0.751
## Tucker-Lewis Index (TLI) 0.572 0.651
##
## Robust Comparative Fit Index (CFI) 0.705
## Robust Tucker-Lewis Index (TLI) 0.586
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -3623.834 -3623.834
## Loglikelihood unrestricted model (H1) -3489.711 -3489.711
##
## Akaike (AIC) 7279.669 7279.669
## Bayesian (BIC) 7338.497 7338.497
## Sample-size adjusted Bayesian (SABIC) 7287.757 7287.757
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.206 0.163
## 90 Percent confidence interval - lower 0.185 0.145
## 90 Percent confidence interval - upper 0.228 0.181
## P-value H_0: RMSEA <= 0.050 0.000 0.000
## P-value H_0: RMSEA >= 0.080 1.000 1.000
##
## Robust RMSEA 0.202
## 90 Percent confidence interval - lower 0.175
## 90 Percent confidence interval - upper 0.230
## P-value H_0: Robust RMSEA <= 0.050 0.000
## P-value H_0: Robust RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.131 0.131
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CogEng =~
## CE.1 1.000 0.777 0.627
## CE.2 0.942 0.112 8.384 0.000 0.732 0.633
## CE.3 0.863 0.102 8.448 0.000 0.671 0.711
## CE.4 0.944 0.106 8.904 0.000 0.734 0.760
## CE.5 0.999 0.145 6.870 0.000 0.776 0.493
## CE.6 0.757 0.126 5.999 0.000 0.589 0.375
## CE.7 1.138 0.126 9.014 0.000 0.884 0.597
## CE.8 1.086 0.147 7.375 0.000 0.844 0.526
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CE.1 0.934 0.151 6.189 0.000 0.934 0.607
## .CE.2 0.801 0.095 8.427 0.000 0.801 0.599
## .CE.3 0.440 0.058 7.556 0.000 0.440 0.494
## .CE.4 0.393 0.059 6.695 0.000 0.393 0.422
## .CE.5 1.881 0.164 11.440 0.000 1.881 0.757
## .CE.6 2.120 0.202 10.506 0.000 2.120 0.860
## .CE.7 1.414 0.181 7.833 0.000 1.414 0.644
## .CE.8 1.858 0.170 10.963 0.000 1.858 0.723
## CogEng 0.604 0.111 5.440 0.000 1.000 1.000
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionCE, "std", edge.label.cex =1, label.cex=1)
#Create model that contains all questions related to grit
(CEb.model <- '
CogEng =~ CE.1 + CE.5 + CE.6 + CE.7 + CE.8')
## [1] "\nCogEng =~ CE.1 + CE.5 + CE.6 + CE.7 + CE.8"
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionCEb <- cfa(CEb.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionCEb, fit.measures=T, standardized=TRUE)
## lavaan 0.6-21 ended normally after 33 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 10
##
## Used Total
## Number of observations 303 392
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 4.797 4.300
## Degrees of freedom 5 5
## P-value (Chi-square) 0.441 0.507
## Scaling correction factor 1.116
## Satorra-Bentler correction
##
## Model Test Baseline Model:
##
## Test statistic 385.857 290.818
## Degrees of freedom 10 10
## P-value 0.000 0.000
## Scaling correction factor 1.327
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000 1.000
## Tucker-Lewis Index (TLI) 1.001 1.005
##
## Robust Comparative Fit Index (CFI) 1.000
## Robust Tucker-Lewis Index (TLI) 1.004
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -2556.977 -2556.977
## Loglikelihood unrestricted model (H1) -2554.578 -2554.578
##
## Akaike (AIC) 5133.954 5133.954
## Bayesian (BIC) 5171.091 5171.091
## Sample-size adjusted Bayesian (SABIC) 5139.377 5139.377
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000 0.000
## 90 Percent confidence interval - lower 0.000 0.000
## 90 Percent confidence interval - upper 0.078 0.071
## P-value H_0: RMSEA <= 0.050 0.774 0.834
## P-value H_0: RMSEA >= 0.080 0.044 0.025
##
## Robust RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.078
## P-value H_0: Robust RMSEA <= 0.050 0.795
## P-value H_0: Robust RMSEA >= 0.080 0.044
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.019 0.019
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CogEng =~
## CE.1 1.000 0.379 0.307
## CE.5 2.743 0.698 3.928 0.000 1.038 0.663
## CE.6 2.428 0.646 3.759 0.000 0.919 0.588
## CE.7 3.127 0.801 3.905 0.000 1.184 0.798
## CE.8 3.205 0.815 3.931 0.000 1.213 0.757
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CE.1 1.382 0.149 9.296 0.000 1.382 0.906
## .CE.5 1.377 0.150 9.199 0.000 1.377 0.561
## .CE.6 1.597 0.212 7.545 0.000 1.597 0.654
## .CE.7 0.801 0.137 5.847 0.000 0.801 0.364
## .CE.8 1.098 0.152 7.221 0.000 1.098 0.427
## CogEng 0.143 0.065 2.215 0.027 1.000 1.000
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionCEb, "std", edge.label.cex =1, label.cex=1)
#Create index file of the factor scores collected from cfa. These will be adjust values to meet the loading determined for each survey question
idx <- lavInspect(solutionCEb, "case.idx")
fscores <- lavPredict(solutionCEb, type="lv")
## loop over factors and merge them with the original data set. All missing values will be input as "NA".
for (fs in colnames(fscores)) {
surv[idx, fs] <- fscores[ , fs]
}
summary(lme(CogEng ~ Timepoint, random = ~ 1 | Email, data = surv, na.action = na.omit))
## Linear mixed-effects model fit by REML
## Data: surv
## AIC BIC logLik
## 223.1895 238.0179 -107.5947
##
## Random effects:
## Formula: ~1 | Email
## (Intercept) Residual
## StdDev: 0.1445638 0.3099086
##
## Fixed effects: CogEng ~ Timepoint
## Value Std.Error DF t-value p-value
## (Intercept) -0.04031050 0.03029389 207 -1.330648 0.1848
## TimepointPre 0.06565277 0.03737536 94 1.756579 0.0822
## Correlation:
## (Intr)
## TimepointPre -0.732
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.9509088 -0.4653659 0.1101215 0.6423174 2.0215912
##
## Number of Observations: 303
## Number of Groups: 208
columns_to_reverse <- paste0("EE.", 5:10)
surv[columns_to_reverse] <- 8 - surv[columns_to_reverse]
#Create model that contains all questions related to grit
(EE.model <- '
EmEng =~ EE.1 + EE.2 + EE.3 + EE.4 + EE.5 + EE.6 + EE.7 + EE.8 + EE.9 + EE.10')
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionEE <- cfa(EE.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionEE, fit.measures=T, standardized=TRUE)
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionEE, "std", edge.label.cex =1, label.cex=1)
#Create model that contains all questions related to grit
(EEb.model <- '
EmEng =~ EE.3 + EE.4 + EE.5 + EE.8 + EE.9 + EE.10')
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionEEb <- cfa(EEb.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionEEb, fit.measures=T, standardized=TRUE)
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionEEb, "std", edge.label.cex =1, label.cex=1)
#Create index file of the factor scores collected from cfa. These will be adjust values to meet the loading determined for each survey question
idx <- lavInspect(solutionEEb, "case.idx")
fscores <- lavPredict(solutionEEb, type="lv")
## loop over factors and merge them with the original data set. All missing values will be input as "NA".
for (fs in colnames(fscores)) {
surv[idx, fs] <- fscores[ , fs]
}
summary(lme(EmEng ~ Timepoint, random = ~ 1 | Email, data = surv, na.action = na.omit))
columns_to_reverse <- paste0("AEE.", 3)
surv[columns_to_reverse] <- 8 - surv[columns_to_reverse]
#Create model that contains all questions related to grit
(AEE.model <- '
AdEmp =~ AEE.1 + AEE.2 + AEE.3 + AEE.4 + AEE.5 + AEE.6 + AEE.7 + AEE.8 + AEE.9 + AEE.10')
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionAEE <- cfa(AEE.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionAEE, fit.measures=T, standardized=TRUE)
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionAEE, "std", edge.label.cex =1, label.cex=1)
#Create model that contains all questions related to grit
(AEEb.model <- '
AdEmp =~ AEE.1 + AEE.2 + AEE.4 + AEE.6 + AEE.7 + AEE.9 ')
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionAEEb <- cfa(AEEb.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionAEEb, fit.measures=T, standardized=TRUE)
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionAEEb, "std", edge.label.cex =1, label.cex=1)
#Create index file of the factor scores collected from cfa. These will be adjust values to meet the loading determined for each survey question
idx <- lavInspect(solutionAEEb, "case.idx")
fscores <- lavPredict(solutionAEEb, type="lv")
## loop over factors and merge them with the original data set. All missing values will be input as "NA".
for (fs in colnames(fscores)) {
surv[idx, fs] <- fscores[ , fs]
}
summary(lme(AdEmp ~ Timepoint, random = ~ 1 | Email, data = surv, na.action = na.omit))
columns_to_reverse <- paste0("IRI.", c(2,3,7,8,9))
surv[columns_to_reverse] <- 8 - surv[columns_to_reverse]
#Create model that contains all questions related to grit
(IRI.model <- '
IntReact =~ IRI.1 + IRI.2 + IRI.3 + IRI.4 + IRI.5 + IRI.6 + IRI.7 + IRI.8 + IRI.9 + IRI.10 + IRI.11 + IRI.12 + IRI.13 + IRI.14')
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionIRI <- cfa(IRI.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionIRI, fit.measures=T, standardized=TRUE)
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionIRI, "std", edge.label.cex =1, label.cex=1)
#Create model that contains all questions related to grit
(IRIb.model <- '
IntReact =~ IRI.3 + IRI.4 + IRI.5 + IRI.7 + IRI.8 + IRI.9 + IRI.10 + IRI.11 + IRI.12 + IRI.13 + IRI.14')
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionIRIb <- cfa(IRIb.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionIRIb, fit.measures=T, standardized=TRUE)
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionIRIb, "std", edge.label.cex =1, label.cex=1)
#Create index file of the factor scores collected from cfa. These will be adjust values to meet the loading determined for each survey question
idx <- lavInspect(solutionIRIb, "case.idx")
fscores <- lavPredict(solutionIRIb, type="lv")
## loop over factors and merge them with the original data set. All missing values will be input as "NA".
for (fs in colnames(fscores)) {
surv[idx, fs] <- fscores[ , fs]
}
summary(lme(IntReact ~ Timepoint, random = ~ 1 | Email, data = surv, na.action = na.omit))
#Create model that contains all questions related to grit
(SI.model <- '
SciId =~ SI.1 + SI.2 + SI.3 + SI.4 + SI.5')
## [1] "\nSciId =~ SI.1 + SI.2 + SI.3 + SI.4 + SI.5"
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionSI <- cfa(SI.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionSI, fit.measures=T, standardized=TRUE)
## lavaan 0.6-21 ended normally after 25 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 10
##
## Used Total
## Number of observations 294 392
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 38.708 30.035
## Degrees of freedom 5 5
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.289
## Satorra-Bentler correction
##
## Model Test Baseline Model:
##
## Test statistic 568.889 370.321
## Degrees of freedom 10 10
## P-value 0.000 0.000
## Scaling correction factor 1.536
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.940 0.931
## Tucker-Lewis Index (TLI) 0.879 0.861
##
## Robust Comparative Fit Index (CFI) 0.942
## Robust Tucker-Lewis Index (TLI) 0.883
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -2471.029 -2471.029
## Loglikelihood unrestricted model (H1) -2451.675 -2451.675
##
## Akaike (AIC) 4962.057 4962.057
## Bayesian (BIC) 4998.893 4998.893
## Sample-size adjusted Bayesian (SABIC) 4967.181 4967.181
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.151 0.131
## 90 Percent confidence interval - lower 0.109 0.093
## 90 Percent confidence interval - upper 0.198 0.172
## P-value H_0: RMSEA <= 0.050 0.000 0.000
## P-value H_0: RMSEA >= 0.080 0.997 0.985
##
## Robust RMSEA 0.148
## 90 Percent confidence interval - lower 0.100
## 90 Percent confidence interval - upper 0.201
## P-value H_0: Robust RMSEA <= 0.050 0.001
## P-value H_0: Robust RMSEA >= 0.080 0.988
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.045 0.045
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## SciId =~
## SI.1 1.000 1.253 0.791
## SI.2 0.323 0.068 4.770 0.000 0.405 0.304
## SI.3 0.973 0.071 13.716 0.000 1.219 0.814
## SI.4 1.095 0.083 13.142 0.000 1.372 0.815
## SI.5 0.893 0.082 10.942 0.000 1.119 0.651
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .SI.1 0.937 0.131 7.159 0.000 0.937 0.374
## .SI.2 1.610 0.149 10.808 0.000 1.610 0.908
## .SI.3 0.757 0.122 6.201 0.000 0.757 0.337
## .SI.4 0.949 0.215 4.422 0.000 0.949 0.335
## .SI.5 1.700 0.223 7.620 0.000 1.700 0.576
## SciId 1.570 0.197 7.971 0.000 1.000 1.000
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionSI, "std", edge.label.cex =1, label.cex=1)
#Create index file of the factor scores collected from cfa. These will be adjust values to meet the loading determined for each survey question
idx <- lavInspect(solutionSI, "case.idx")
fscores <- lavPredict(solutionSI, type="lv")
## loop over factors and merge them with the original data set. All missing values will be input as "NA".
for (fs in colnames(fscores)) {
surv[idx, fs] <- fscores[ , fs]
}
summary(lme(SciId ~ Timepoint, random = ~ 1 | Email, data = surv, na.action = na.omit))
## Linear mixed-effects model fit by REML
## Data: surv
## AIC BIC logLik
## 880.3411 895.0481 -436.1706
##
## Random effects:
## Formula: ~1 | Email
## (Intercept) Residual
## StdDev: 0.9961904 0.6346924
##
## Fixed effects: SciId ~ Timepoint
## Value Std.Error DF t-value p-value
## (Intercept) 0.2955271 0.09753661 202 3.029910 0.0028
## TimepointPre -0.3964644 0.08768428 90 -4.521499 0.0000
## Correlation:
## (Intr)
## TimepointPre -0.573
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.94877302 -0.44828314 -0.08856004 0.44676570 2.50132821
##
## Number of Observations: 294
## Number of Groups: 203
#Create model that contains all questions related to grit
(SCV.model <- '
SciComVal=~ SCE.1 + SCE.2 + SCE.3 + SCE.4 + SCE.5')
## [1] "\nSciComVal=~ SCE.1 + SCE.2 + SCE.3 + SCE.4 + SCE.5"
#run confirmatory factor analysis on those questions using the MLM mode explained above
solutionSCV <- cfa(SCV.model, data=surv, estimator= "MLM")
#Collect summary output, including all fitness measures based on the standardized outputs.
summary(solutionSCV, fit.measures=T, standardized=TRUE)
## lavaan 0.6-21 ended normally after 27 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 10
##
## Used Total
## Number of observations 340 392
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 5.143 3.842
## Degrees of freedom 5 5
## P-value (Chi-square) 0.399 0.572
## Scaling correction factor 1.339
## Satorra-Bentler correction
##
## Model Test Baseline Model:
##
## Test statistic 415.314 335.697
## Degrees of freedom 10 10
## P-value 0.000 0.000
## Scaling correction factor 1.237
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000 1.000
## Tucker-Lewis Index (TLI) 0.999 1.007
##
## Robust Comparative Fit Index (CFI) 1.000
## Robust Tucker-Lewis Index (TLI) 1.008
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -2225.301 -2225.301
## Loglikelihood unrestricted model (H1) -2222.729 -2222.729
##
## Akaike (AIC) 4470.601 4470.601
## Bayesian (BIC) 4508.891 4508.891
## Sample-size adjusted Bayesian (SABIC) 4477.169 4477.169
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.009 0.000
## 90 Percent confidence interval - lower 0.000 0.000
## 90 Percent confidence interval - upper 0.076 0.058
## P-value H_0: RMSEA <= 0.050 0.772 0.914
## P-value H_0: RMSEA >= 0.080 0.038 0.006
##
## Robust RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.076
## P-value H_0: Robust RMSEA <= 0.050 0.823
## P-value H_0: Robust RMSEA >= 0.080 0.039
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.020 0.020
##
## Parameter Estimates:
##
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## SciComVal =~
## SCE.1 1.000 0.463 0.511
## SCE.2 1.716 0.198 8.683 0.000 0.794 0.766
## SCE.3 1.422 0.144 9.906 0.000 0.658 0.746
## SCE.4 1.283 0.179 7.150 0.000 0.594 0.491
## SCE.5 1.490 0.187 7.987 0.000 0.690 0.656
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .SCE.1 0.606 0.045 13.588 0.000 0.606 0.739
## .SCE.2 0.444 0.078 5.729 0.000 0.444 0.414
## .SCE.3 0.346 0.060 5.755 0.000 0.346 0.444
## .SCE.4 1.111 0.151 7.343 0.000 1.111 0.759
## .SCE.5 0.631 0.080 7.854 0.000 0.631 0.570
## SciComVal 0.214 0.045 4.768 0.000 1.000 1.000
#Produce SEM plot showing all estimates and standard deviations using the standardized values.
#label and text sizes were increased from default 0.8 to 1.0
semPlot::semPaths(solutionSCV, "std", edge.label.cex =1, label.cex=1)
#Create index file of the factor scores collected from cfa. These will be adjust values to meet the loading determined for each survey question
idx <- lavInspect(solutionSCV, "case.idx")
fscores <- lavPredict(solutionSCV, type="lv")
## loop over factors and merge them with the original data set. All missing values will be input as "NA".
for (fs in colnames(fscores)) {
surv[idx, fs] <- fscores[ , fs]
}
summary(lme(SciComVal ~ Timepoint, random = ~ 1 | Email, data = surv, na.action = na.omit))
## Linear mixed-effects model fit by REML
## Data: surv
## AIC BIC logLik
## 365.7951 381.0872 -178.8975
##
## Random effects:
## Formula: ~1 | Email
## (Intercept) Residual
## StdDev: 0.2658323 0.3209356
##
## Fixed effects: SciComVal ~ Timepoint
## Value Std.Error DF t-value p-value
## (Intercept) -0.03692936 0.03460149 228 -1.067277 0.2870
## TimepointPre 0.05151883 0.03828727 110 1.345587 0.1812
## Correlation:
## (Intr)
## TimepointPre -0.689
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.5597954 -0.4627069 0.0837014 0.5986544 1.6859895
##
## Number of Observations: 340
## Number of Groups: 229
#Calculate change in pre/post values for variables that passed CFA criteria
#Cognitive Engagement
CE.selected_columns <- c(1,147:149)
CogEng=surv[, CE.selected_columns]
CogEng <- CogEng %>%
group_by(Email, Timepoint, source) %>%
summarise(CogEng = mean(CogEng), .groups = 'drop')
CE.wide_data <- CogEng %>%
pivot_wider(
names_from = Timepoint,
values_from = CogEng,
names_prefix = "CogEng_"
)
CE.wide_data <- CE.wide_data %>%
mutate(CogEng_diff = ifelse(is.na(CogEng_Pre) | is.na(CogEng_Post), NA, CogEng_Post - CogEng_Pre))
summary(lm(CogEng_diff ~ source, data = CE.wide_data, na.action = na.omit))
##
## Call:
## lm(formula = CogEng_diff ~ source, data = CE.wide_data, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.52267 -0.14393 0.05461 0.24922 1.22329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.07075 0.05821 -1.215 0.227
## sources2 -0.01114 0.09591 -0.116 0.908
##
## Residual standard error: 0.4509 on 93 degrees of freedom
## (155 observations deleted due to missingness)
## Multiple R-squared: 0.000145, Adjusted R-squared: -0.01061
## F-statistic: 0.01348 on 1 and 93 DF, p-value: 0.9078
###################################
ID.selected_columns <- c(1,147:148, 150)
SciID=surv[, ID.selected_columns]
SciID <- SciID %>%
group_by(Email, Timepoint, source) %>%
summarise(SciId = mean(SciId), .groups = 'drop')
SI.wide_data <- SciID %>%
pivot_wider(
names_from = Timepoint,
values_from = SciId,
names_prefix = "SI_"
)
SI.wide_data <- SI.wide_data %>%
mutate(SI_diff = ifelse(is.na(SI_Pre) | is.na(SI_Post), NA, SI_Post - SI_Pre))
summary(lm(SI_diff ~ source, data = SI.wide_data, na.action = na.omit))
##
## Call:
## lm(formula = SI_diff ~ source, data = SI.wide_data, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.13889 -0.63852 -0.08524 0.49341 2.79219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4287 0.1127 3.803 0.000261 ***
## sources2 -0.1611 0.1963 -0.820 0.414200
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8804 on 89 degrees of freedom
## (159 observations deleted due to missingness)
## Multiple R-squared: 0.007505, Adjusted R-squared: -0.003647
## F-statistic: 0.673 on 1 and 89 DF, p-value: 0.4142
#######################
SCV.selected_columns <- c(1,147:148, 151)
SCV=surv[, SCV.selected_columns]
SCV <- SCV %>%
group_by(Email, Timepoint, source) %>%
summarise(SCV = mean(SciComVal), .groups = 'drop')
SCV.wide_data <- SCV %>%
pivot_wider(
names_from = Timepoint,
values_from = SCV,
names_prefix = "SCV_"
)
SCV.wide_data <- SCV.wide_data %>%
mutate(SCV_diff = ifelse(is.na(SCV_Pre) | is.na(SCV_Post), NA, SCV_Post - SCV_Pre))
summary(lm(SCV_diff ~ source, data = SCV.wide_data, na.action = na.omit))
##
## Call:
## lm(formula = SCV_diff ~ source, data = SCV.wide_data, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.25953 -0.27792 0.08473 0.30680 1.03728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.08697 0.05394 -1.612 0.110
## sources2 0.12240 0.08729 1.402 0.164
##
## Residual standard error: 0.4448 on 108 degrees of freedom
## (140 observations deleted due to missingness)
## Multiple R-squared: 0.01788, Adjusted R-squared: 0.008787
## F-statistic: 1.966 on 1 and 108 DF, p-value: 0.1637
write.csv(surv, file = "quant_calc.csv", row.names = F)
for notes on how to produce and interpret semPlots, see : https://www.r-bloggers.com/2018/04/statistics-sunday-using-semplot/ https://cran.r-project.org/web/packages/semPlot/semPlot.pdf http://sachaepskamp.com/documentation/semPlot/semPaths.html
#CFI: Measures whether the model fits the data better than a more restricted baseline model. Higher is better, with okay fit > .9. Find CFI under User Model vs. Baseline model
#TLI (Tucker-Lewis index): Similar to CFI, but it penalizes overly complex models (making it more conservative than CFI). Measures whether the model fits the data better than a more restricted baseline model. Higher is better, with okay fit > .9.
#Find Test statistic, DOF, and P value under Model Test User Model (chi-squared goodness of fit test)
#RMSEA p value:The RMSEA P-value is the Probability that RMSEA <= .05. If that P-value is greater than 5% you can argue that the RMSEA value does not indicate a model rejection (the RMSEA value doesn’t reject the model if the RMSEA value is between 0 and 0.05). Usually this is useful when the RMSEA value is near the cutoff value of 0.05. If the RMSEA value is not near 0.05 one can typically ignore the confidence limit and the P-value and simply use the actual RMSEA value.
#Interpretation:
#RMSEA < 0.05: Indicates a close fit of the model to the data. #0.05 ≤ RMSEA < 0.08: Indicates a reasonable fit. #0.08 ≤ RMSEA < 0.10: Indicates a mediocre fit. #RMSEA ≥ 0.10: Indicates a poor fit.
#p-value > 0.05: The null hypothesis cannot be rejected, suggesting that the model provides a close fit to the data. #p-value ≤ 0.05: The null hypothesis is rejected, indicating that the model does not provide a close fit to the data.
#std.all are the standardized coefficients
See this publication and references within on reporting CFA results properly: https://www.researchgate.net/publication/24187223_Reporting_Practices_in_Confirmatory_Factor_Analysis_An_Overview_and_Some_Recommendations
#Does view of importance (IA.Importance) & IA.Comfort & IA.Grad.Imp change the outcome of constructs above within the IA group? #Same but Science.Career #Same by Gender & Race & First Gen
#Change in comfort per student for each Comfort. topic (bigger change for IA group?) #Does this change based on views of explicit links or demographics?
resp=read.csv("Qual_textresp.csv")
des=read.csv("Qual_Designations.csv")
des_clean <- des %>%
filter(!if_all(starts_with("Designation."), ~ is.na(.)))
library(dplyr)
library(tidyr)
library(purrr)
des_long <- des_clean %>%
pivot_longer(
cols = starts_with("Designation."),
names_to = "Topic",
values_to = "Response"
) %>%
filter(!is.na(Response)) %>%
mutate(
Section = factor(Section),
Response = factor(Response),
Topic = factor(Topic)
)
# results <- des_long %>%
# split(.$Topic) %>%
# map_dfr(function(dat) {
#
# tab <- table(dat$Section, dat$Response)
#
# # remove empty rows/columns
# tab <- tab[rowSums(tab) > 0, colSums(tab) > 0, drop = FALSE]
#
# # check table is still testable
# if (nrow(tab) < 2 || ncol(tab) < 2) {
# return(data.frame(
# Topic = as.character(unique(dat$Topic)),
# test = NA,
# statistic = NA,
# p_value = NA,
# df = NA,
# note = "Not enough variation"
# ))
# }
#
# chi <- suppressWarnings(chisq.test(tab))
#
# data.frame(
# Topic = as.character(unique(dat$Topic)),
# test = "Chi-square",
# statistic = unname(chi$statistic),
# p_value = chi$p.value,
# df = unname(chi$parameter),
# note = "OK"
# )
# })
#
# results
results_fisher <- des_long %>%
split(.$Topic) %>%
map_dfr(function(dat) {
tab <- table(dat$Section, dat$Response)
tab <- tab[rowSums(tab) > 0, colSums(tab) > 0, drop = FALSE]
if (nrow(tab) < 2 || ncol(tab) < 2) {
return(data.frame(
Topic = as.character(unique(dat$Topic)),
p_value = NA,
note = "Not enough variation"
))
}
fish <- fisher.test(tab)
data.frame(
Topic = as.character(unique(dat$Topic)),
p_value = fish$p.value,
note = "Fisher exact test"
)
})
results_fisher
## Topic p_value note
## 1 Designation.DB 0.1112633218 Fisher exact test
## 2 Designation.Env 0.4896338538 Fisher exact test
## 3 Designation.GE 0.2990936696 Fisher exact test
## 4 Designation.Mar 0.0002512388 Fisher exact test
## 5 Designation.Vac 0.1863084072 Fisher exact test
## 6 Designation.Vap 0.4164906739 Fisher exact test
des_long <- des_long %>%
mutate(Response_ord = factor(Response,
levels = c(
"Negative",
"Negative-Neutral",
"Neutral/No opinion/Freedom",
"Positive-Neutral",
"Positive"
),
ordered = TRUE
))
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
model <- polr(Response_ord ~ Section + Topic, data = des_long, Hess = TRUE)
summary(model)
## Call:
## polr(formula = Response_ord ~ Section + Topic, data = des_long,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## Section2 0.49686 0.1661 2.9915
## TopicDesignation.Env 0.03744 0.3008 0.1245
## TopicDesignation.GE 0.08432 0.2795 0.3017
## TopicDesignation.Mar 0.63632 0.2615 2.4336
## TopicDesignation.Vac 4.18553 0.3383 12.3705
## TopicDesignation.Vap -1.14137 0.3195 -3.5718
##
## Intercepts:
## Value Std. Error t value
## Negative|Negative-Neutral 0.7857 0.2142 3.6685
## Negative-Neutral|Neutral/No opinion/Freedom 2.0531 0.2334 8.7978
## Neutral/No opinion/Freedom|Positive-Neutral 2.0533 0.2334 8.7982
## Positive-Neutral|Positive 4.0427 0.3084 13.1068
##
## Residual Deviance: 1179.775
## AIC: 1199.775
## (1335 observations deleted due to missingness)
coef_table <- coef(summary(model))
p <- pnorm(abs(coef_table[, "t value"]), lower.tail = FALSE) * 2
cbind(coef_table, "p value" = p)
## Value Std. Error t value
## Section2 0.49685980 0.1660929 2.9914577
## TopicDesignation.Env 0.03744155 0.3007908 0.1244771
## TopicDesignation.GE 0.08432139 0.2795291 0.3016551
## TopicDesignation.Mar 0.63632010 0.2614763 2.4335670
## TopicDesignation.Vac 4.18553063 0.3383491 12.3704509
## TopicDesignation.Vap -1.14137216 0.3195484 -3.5718284
## Negative|Negative-Neutral 0.78566185 0.2141649 3.6684898
## Negative-Neutral|Neutral/No opinion/Freedom 2.05314450 0.2333707 8.7977833
## Neutral/No opinion/Freedom|Positive-Neutral 2.05327306 0.2333734 8.7982308
## Positive-Neutral|Positive 4.04274339 0.3084459 13.1068162
## p value
## Section2 2.776490e-03
## TopicDesignation.Env 9.009376e-01
## TopicDesignation.GE 7.629150e-01
## TopicDesignation.Mar 1.495087e-02
## TopicDesignation.Vac 3.776951e-35
## TopicDesignation.Vap 3.544976e-04
## Negative|Negative-Neutral 2.439874e-04
## Negative-Neutral|Neutral/No opinion/Freedom 1.395452e-18
## Neutral/No opinion/Freedom|Positive-Neutral 1.389899e-18
## Positive-Neutral|Positive 3.009701e-39
#why are there extra columns here?
des.table= des_long %>%
split(.$Topic) %>%
purrr::walk(~ print(table(.x$Section, .x$Response)))
##
## Negative Negative-Neutral Neutral/No opinon/Freedom Positive
## 1 0 46 7 12 1
## 2 0 24 13 6 1
##
## Positive-Neutral
## 1 11
## 2 5
##
## Negative Negative-Neutral Neutral/No opinon/Freedom Positive
## 1 0 32 10 26 1
## 2 0 17 11 15 2
##
## Positive-Neutral
## 1 3
## 2 3
##
## Negative Negative-Neutral Neutral/No opinon/Freedom Positive
## 1 0 45 8 12 1
## 2 0 24 12 7 1
##
## Positive-Neutral
## 1 12
## 2 6
##
## Negative Negative-Neutral Neutral/No opinon/Freedom Positive
## 1 0 45 19 7 2
## 2 0 13 17 3 2
##
## Positive-Neutral
## 1 6
## 2 17
##
## Negative Negative-Neutral Neutral/No opinon/Freedom Positive
## 1 0 0 4 15 38
## 2 0 4 2 8 25
##
## Positive-Neutral
## 1 22
## 2 13
##
## Negative Negative-Neutral Neutral/No opinon/Freedom Positive
## 1 0 61 12 5 0
## 2 0 41 5 6 1
##
## Positive-Neutral
## 1 1
## 2 0
When I converted the designation scale to an ordered score: The marijuana topic shows a substantial ideological framing difference between sections.
Students in Section 2 gave responses that were much less negative / more nuanced.
This suggests:
IA may be most influential when the topic is politically or culturally loaded.
Students do not automatically apply IA to all topics.
library(dplyr)
des_long <- des_long %>%
mutate(Response_ord = factor(Response,
levels = c(
"Negative",
"Negative-Neutral",
"Neutral/No opinion/Freedom",
"Positive-Neutral",
"Positive"
),
ordered = TRUE
),
Response_num = as.numeric(Response_ord))
des_long <- des_long %>%
mutate(
Response = na_if(as.character(Response), "NA"),
Section = na_if(as.character(Section), "NA")
) %>%
filter(!is.na(Response), !is.na(Section))
means <- des_long %>%
group_by(Topic, Section) %>%
summarise(
mean_score = mean(Response_num, na.rm = TRUE),
sd = sd(Response_num, na.rm = TRUE),
n = n(),
se = sd / sqrt(n),
.groups = "drop"
)
means
## # A tibble: 12 × 6
## Topic Section mean_score sd n se
## <fct> <chr> <dbl> <dbl> <int> <dbl>
## 1 Designation.DB 1 1.68 1.20 77 0.137
## 2 Designation.DB 2 1.74 1.09 49 0.156
## 3 Designation.Env 1 1.5 0.960 72 0.113
## 4 Designation.Env 2 1.85 1.20 48 0.174
## 5 Designation.GE 1 1.73 1.22 78 0.138
## 6 Designation.GE 2 1.79 1.15 50 0.162
## 7 Designation.Mar 1 1.62 1.04 79 0.117
## 8 Designation.Mar 2 2.55 1.32 52 0.184
## 9 Designation.Vac 1 4.47 0.796 79 0.0896
## 10 Designation.Vac 2 4.20 1.25 52 0.173
## 11 Designation.Vap 1 1.20 0.496 79 0.0558
## 12 Designation.Vap 2 1.19 0.647 53 0.0889
library(ggplot2)
ggplot(means, aes(x = Topic, y = mean_score, fill = Section)) +
geom_col(position = position_dodge(width = 0.7)) +
geom_errorbar(
aes(ymin = mean_score - se, ymax = mean_score + se),
width = .2,
position = position_dodge(.7)
) +
coord_flip() +
labs(
x = "Topic",
y = "Average Ideological Framing Score"
) +
theme_bw()
Across most topics (environmental justice, vaccines, GE, vaping, etc.), the average ideological framing scores were very similar between sections.
This suggests that students do not necessarily change their stance on socio-scientific issues simply because ideological awareness is introduced.
In other words:
AI instruction appears to influence how students reason, rather than what position they ultimately take.
The marijuana topic shows the clearest difference between sections.(1.62-2.55)
Students in the control section expressed more neutral or mixed framing, while the AI section tended to give more negative responses.
Possible explanation:
The AI section may have encouraged students to critically interrogate ideological narratives surrounding the issue, which may have led some students to articulate stronger evaluative positions rather than neutral ones.
Another possibility is that marijuana is a highly culturally framed issue, and the ideological discussion may have prompted students to engage more directly with those frames.
AI section shows more consistent framing
In several topics, the AI section responses cluster more tightly around specific categories (often negative or negative-neutral), while the control section spreads more across categories.
This suggests that AI instruction may have helped students:
recognize ideological framing more clearly
articulate positions more decisively
Rather than defaulting to neutral responses.
ggplot(des_long, aes(x = Section, y = Response_num, fill = Section)) +
geom_violin(alpha = 0.4, trim = FALSE) +
geom_boxplot(width = 0.15, outlier.shape = NA, alpha = 0.6) +
facet_wrap(~Topic) +
theme_bw() +
labs(
y = "Ideological Framing Score",
x = "Section",
title = "Distribution of Ideological Framing Responses by Section"
)
## Warning: Removed 122 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 122 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Control section uses neutral categories more often. This pattern is common when students lack an explicit analytic framework for interpreting socio-scientific issues.
Without ideological awareness instruction, students may:
hedge their answers
default to neutrality
frame responses as individual opinion rather than ideological analysis.
Looking at the qualitative responses:
AI section (Section 1)
Students more often reference:
systemic issues
ideological framing
social structures
policy implications
Responses tend to focus on how the issue is constructed or framed.
Control section (Section 2)
Students more often focus on:
individual harms
fairness
environmental consequences
personal opinion
Responses tend to focus on what the issue is, rather than how it is framed.
resp <- read.csv("Qual_textresp.csv")
library(tidytext)
library(dplyr)
words <- resp %>%
unnest_tokens(word, Vid.EnvInj.Text)
library(stopwords)
words_clean <- words %>%
filter(!word %in% stop_words$word)
word_counts <- words_clean %>%
count(Section, word, sort = TRUE)
word_counts
## Section word n
## 1 2 environmental 34
## 2 1 environmental 30
## 3 2 racism 30
## 4 1 racism 21
## 5 1 injustice 17
## 6 1 people 14
## 7 2 communities 14
## 8 1 communities 13
## 9 2 injustice 12
## 10 2 people 12
## 11 2 health 11
## 12 1 government 10
## 13 2 understanding 10
## 14 1 water 9
## 15 2 water 9
## 16 1 community 8
## 17 1 topic 8
## 18 1 understanding 8
## 19 2 clean 8
## 20 1 environment 7
## 21 1 health 7
## 22 2 issue 7
## 23 2 marginalized 7
## 24 2 resources 7
## 25 1 clean 6
## 26 1 due 6
## 27 1 issues 6
## 28 1 live 6
## 29 1 lot 6
## 30 2 access 6
## 31 2 affected 6
## 32 2 environment 6
## 33 1 affected 5
## 34 1 income 5
## 35 1 money 5
## 36 2 community 5
## 37 2 disparities 5
## 38 2 flint 5
## 39 2 waste 5
## 40 1 access 4
## 41 1 don 4
## 42 1 educated 4
## 43 1 feel 4
## 44 1 issue 4
## 45 1 lower 4
## 46 1 marginalized 4
## 47 1 poor 4
## 48 1 resources 4
## 49 1 time 4
## 50 2 air 4
## 51 2 concerns 4
## 52 2 crisis 4
## 53 2 exposure 4
## 54 2 issues 4
## 55 2 lack 4
## 56 2 michigan 4
## 57 2 race 4
## 58 1 attention 3
## 59 1 care 3
## 60 1 dont 3
## 61 1 flint 3
## 62 1 healthy 3
## 63 1 impact 3
## 64 1 informed 3
## 65 1 involved 3
## 66 1 low 3
## 67 1 michigan 3
## 68 1 pollution 3
## 69 1 poverty 3
## 70 1 protect 3
## 71 1 race 3
## 72 1 risks 3
## 73 1 sad 3
## 74 1 society 3
## 75 1 speak 3
## 76 1 world 3
## 77 2 color 3
## 78 2 concept 3
## 79 2 dont 3
## 80 2 due 3
## 81 2 environments 3
## 82 2 equal 3
## 83 2 hazards 3
## 84 2 healthy 3
## 85 2 leading 3
## 86 2 pollutants 3
## 87 2 pollution 3
## 88 2 social 3
## 89 2 systemic 3
## 90 1 advantage 2
## 91 1 afford 2
## 92 1 air 2
## 93 1 answer 2
## 94 1 cancer 2
## 95 1 city 2
## 96 1 class 2
## 97 1 concerns 2
## 98 1 cycle 2
## 99 1 disadvantaged 2
## 100 1 disasters 2
## 101 1 disturbing 2
## 102 1 fields 2
## 103 1 fight 2
## 104 1 found 2
## 105 1 generation 2
## 106 1 harder 2
## 107 1 hardships 2
## 108 1 hate 2
## 109 1 hazards 2
## 110 1 hit 2
## 111 1 homeless 2
## 112 1 huge 2
## 113 1 inequalities 2
## 114 1 land 2
## 115 1 looked 2
## 116 1 major 2
## 117 1 makes 2
## 118 1 minorities 2
## 119 1 necessarily 2
## 120 1 person 2
## 121 1 pollutants 2
## 122 1 poorly 2
## 123 1 rates 2
## 124 1 socioeconomic 2
## 125 1 status 2
## 126 1 stop 2
## 127 1 subject 2
## 128 1 super 2
## 129 1 systemic 2
## 130 1 topics 2
## 131 1 trash 2
## 132 1 understand 2
## 133 1 worse 2
## 134 2 addressing 2
## 135 2 adverse 2
## 136 2 biggest 2
## 137 2 burdening 2
## 138 2 cancer 2
## 139 2 country 2
## 140 2 don 2
## 141 2 earth 2
## 142 2 economic 2
## 143 2 effects 2
## 144 2 ensure 2
## 145 2 equitable 2
## 146 2 exacerbating 2
## 147 2 existing 2
## 148 2 family 2
## 149 2 green 2
## 150 2 honestly 2
## 151 2 impactful 2
## 152 2 importance 2
## 153 2 increased 2
## 154 2 industrial 2
## 155 2 inequalities 2
## 156 2 involve 2
## 157 2 leads 2
## 158 2 live 2
## 159 2 lot 2
## 160 2 neighborhoods 2
## 161 2 perpetuating 2
## 162 2 poor 2
## 163 2 poverty 2
## 164 2 races 2
## 165 2 racist 2
## 166 2 rates 2
## 167 2 respiratory 2
## 168 2 sites 2
## 169 2 situations 2
## 170 2 society 2
## 171 2 spaces 2
## 172 2 system 2
## 173 2 targeted 2
## 174 2 topic 2
## 175 2 underscores 2
## 176 2 understand 2
## 177 2 unfairly 2
## 178 2 united 2
## 179 NA racism 2
## 180 1 2024 1
## 181 1 3 1
## 182 1 5x 1
## 183 1 ability 1
## 184 1 achieving 1
## 185 1 addressing 1
## 186 1 advocate 1
## 187 1 affect 1
## 188 1 age 1
## 189 1 ago 1
## 190 1 agricultural 1
## 191 1 amount 1
## 192 1 amounts 1
## 193 1 animals 1
## 194 1 apartments 1
## 195 1 arising 1
## 196 1 aspect 1
## 197 1 aware 1
## 198 1 awareness 1
## 199 1 awful 1
## 200 1 bad 1
## 201 1 bear 1
## 202 1 becasue 1
## 203 1 bit 1
## 204 1 brought 1
## 205 1 brunt 1
## 206 1 build 1
## 207 1 call 1
## 208 1 caring 1
## 209 1 caught 1
## 210 1 caused 1
## 211 1 change 1
## 212 1 cheap 1
## 213 1 chemical 1
## 214 1 children 1
## 215 1 choices 1
## 216 1 cities 1
## 217 1 clue 1
## 218 1 colored 1
## 219 1 coming 1
## 220 1 companies 1
## 221 1 company 1
## 222 1 conditions 1
## 223 1 condos 1
## 224 1 considered 1
## 225 1 contributes 1
## 226 1 country 1
## 227 1 created 1
## 228 1 crime 1
## 229 1 crucial 1
## 230 1 current 1
## 231 1 damage 1
## 232 1 day 1
## 233 1 deal 1
## 234 1 dealt 1
## 235 1 decent 1
## 236 1 decision 1
## 237 1 decrease 1
## 238 1 depends 1
## 239 1 difficult 1
## 240 1 directly 1
## 241 1 discrimination 1
## 242 1 discussed 1
## 243 1 discussing 1
## 244 1 disparities 1
## 245 1 disproportionate 1
## 246 1 diversity 1
## 247 1 doesn 1
## 248 1 drink 1
## 249 1 drinking 1
## 250 1 drop 1
## 251 1 eat 1
## 252 1 economic 1
## 253 1 economics 1
## 254 1 ecosystem 1
## 255 1 education 1
## 256 1 effected 1
## 257 1 effects 1
## 258 1 ehtically 1
## 259 1 eliminate 1
## 260 1 endure 1
## 261 1 enhance 1
## 262 1 ensuring 1
## 263 1 entire 1
## 264 1 enviroment 1
## 265 1 enviromental 1
## 266 1 environmentally 1
## 267 1 environtmental 1
## 268 1 equally 1
## 269 1 equitable 1
## 270 1 exacerbates 1
## 271 1 exact 1
## 272 1 existing 1
## 273 1 experience 1
## 274 1 experiences 1
## 275 1 exposed 1
## 276 1 exposure 1
## 277 1 extremely 1
## 278 1 eyes 1
## 279 1 facing 1
## 280 1 families 1
## 281 1 fault 1
## 282 1 financially 1
## 283 1 fix 1
## 284 1 fixes 1
## 285 1 food 1
## 286 1 form 1
## 287 1 formed 1
## 288 1 fought 1
## 289 1 freaks 1
## 290 1 garbage 1
## 291 1 goal 1
## 292 1 god 1
## 293 1 grocery 1
## 294 1 habitats 1
## 295 1 handled 1
## 296 1 hard 1
## 297 1 harmful 1
## 298 1 helping 1
## 299 1 highlights 1
## 300 1 history 1
## 301 1 honest 1
## 302 1 honestly 1
## 303 1 hopeless 1
## 304 1 horrible 1
## 305 1 hospitals 1
## 306 1 human 1
## 307 1 hurt 1
## 308 1 idea 1
## 309 1 ill 1
## 310 1 illnesses 1
## 311 1 impactful 1
## 312 1 impacting 1
## 313 1 impacts 1
## 314 1 impoverished 1
## 315 1 incident 1
## 316 1 insightful 1
## 317 1 instances 1
## 318 1 invalidate 1
## 319 1 invest 1
## 320 1 justice 1
## 321 1 knowledge 1
## 322 1 knowledgeable 1
## 323 1 lack 1
## 324 1 landfill 1
## 325 1 landfills 1
## 326 1 leading 1
## 327 1 learn 1
## 328 1 learned 1
## 329 1 levels 1
## 330 1 lights 1
## 331 1 linger 1
## 332 1 lives 1
## 333 1 living 1
## 334 1 local 1
## 335 1 location 1
## 336 1 louisiana 1
## 337 1 maintain 1
## 338 1 maintained 1
## 339 1 managed 1
## 340 1 means 1
## 341 1 mi 1
## 342 1 minority 1
## 343 1 move 1
## 344 1 negative 1
## 345 1 negatively 1
## 346 1 officials 1
## 347 1 opinions 1
## 348 1 opportunity 1
## 349 1 passed 1
## 350 1 peoples 1
## 351 1 plants 1
## 352 1 plays 1
## 353 1 policies 1
## 354 1 poorer 1
## 355 1 predominant 1
## 356 1 prejudice 1
## 357 1 pressing 1
## 358 1 prevalent 1
## 359 1 prevent 1
## 360 1 prices 1
## 361 1 properly 1
## 362 1 provided 1
## 363 1 public 1
## 364 1 races 1
## 365 1 rational 1
## 366 1 reason 1
## 367 1 refers 1
## 368 1 regulation 1
## 369 1 reoccurrence 1
## 370 1 represent 1
## 371 1 residents 1
## 372 1 respiratory 1
## 373 1 responsibility 1
## 374 1 restaurants 1
## 375 1 restrictions 1
## 376 1 rising 1
## 377 1 risk 1
## 378 1 river 1
## 379 1 roads 1
## 380 1 role 1
## 381 1 run 1
## 382 1 save 1
## 383 1 scapegoat 1
## 384 1 segregated 1
## 385 1 sidewalks 1
## 386 1 simple 1
## 387 1 situation 1
## 388 1 solution 1
## 389 1 someting 1
## 390 1 sooner 1
## 391 1 spend 1
## 392 1 spread 1
## 393 1 stand 1
## 394 1 standpoint 1
## 395 1 status's 1
## 396 1 stems 1
## 397 1 step 1
## 398 1 stores 1
## 399 1 support 1
## 400 1 supported 1
## 401 1 takes 1
## 402 1 themself 1
## 403 1 therefor 1
## 404 1 times 1
## 405 1 today's 1
## 406 1 treat 1
## 407 1 treated 1
## 408 1 unfair 1
## 409 1 unique 1
## 410 1 united 1
## 411 1 unjust 1
## 412 1 vulnerable 1
## 413 1 weapon 1
## 414 1 willpower 1
## 415 1 wrong 1
## 416 1 zoning 1
## 417 1 <NA> 1
## 418 2 absolutely 1
## 419 2 address 1
## 420 2 addressed 1
## 421 2 adequate 1
## 422 2 advantage 1
## 423 2 affecting 1
## 424 2 affects 1
## 425 2 allowing 1
## 426 2 amount 1
## 427 2 amounts 1
## 428 2 animal 1
## 429 2 answer 1
## 430 2 apparently 1
## 431 2 aspect 1
## 432 2 bad 1
## 433 2 based 1
## 434 2 biases 1
## 435 2 birth 1
## 436 2 black 1
## 437 2 blind 1
## 438 2 born 1
## 439 2 broad 1
## 440 2 broken 1
## 441 2 building 1
## 442 2 care 1
## 443 2 causation 1
## 444 2 caused 1
## 445 2 change 1
## 446 2 choose 1
## 447 2 city 1
## 448 2 claim 1
## 449 2 class 1
## 450 2 climate 1
## 451 2 concentrated 1
## 452 2 constantly 1
## 453 2 contributes 1
## 454 2 correlation 1
## 455 2 countries 1
## 456 2 crazy 1
## 457 2 created 1
## 458 2 creates 1
## 459 2 credible 1
## 460 2 cuases 1
## 461 2 current 1
## 462 2 cycles 1
## 463 2 death 1
## 464 2 decade 1
## 465 2 depleting 1
## 466 2 deserves 1
## 467 2 destroys 1
## 468 2 destruction 1
## 469 2 developmental 1
## 470 2 disadvantaged 1
## 471 2 disagree 1
## 472 2 discriminated 1
## 473 2 disposal 1
## 474 2 disproportionately 1
## 475 2 distributed 1
## 476 2 distribution 1
## 477 2 diverse 1
## 478 2 documentary 1
## 479 2 dumped 1
## 480 2 easily 1
## 481 2 economical 1
## 482 2 ecosystem 1
## 483 2 educate 1
## 484 2 educated 1
## 485 2 effected 1
## 486 2 emissions 1
## 487 2 enviormental 1
## 488 2 equally 1
## 489 2 examine 1
## 490 2 examples 1
## 491 2 existed 1
## 492 2 exists 1
## 493 2 extinction 1
## 494 2 extreme 1
## 495 2 eye 1
## 496 2 eyes 1
## 497 2 facilities 1
## 498 2 facing 1
## 499 2 failure 1
## 500 2 failures 1
## 501 2 families 1
## 502 2 feel 1
## 503 2 field 1
## 504 2 frequently 1
## 505 2 gender 1
## 506 2 giving 1
## 507 2 god's 1
## 508 2 government 1
## 509 2 growing 1
## 510 2 happening 1
## 511 2 harm 1
## 512 2 harmful 1
## 513 2 hate 1
## 514 2 hazardous 1
## 515 2 healthcare 1
## 516 2 hierarchy 1
## 517 2 highly 1
## 518 2 horrible 1
## 519 2 housing 1
## 520 2 illnesses 1
## 521 2 imminent 1
## 522 2 impact 1
## 523 2 impacts 1
## 524 2 incident 1
## 525 2 income 1
## 526 2 incomes 1
## 527 2 industry 1
## 528 2 inequity 1
## 529 2 information 1
## 530 2 infrastructure 1
## 531 2 institution 1
## 532 2 judging 1
## 533 2 keeping 1
## 534 2 lakes 1
## 535 2 layer 1
## 536 2 leave 1
## 537 2 life 1
## 538 2 living 1
## 539 2 located 1
## 540 2 low 1
## 541 2 main 1
## 542 2 management 1
## 543 2 market 1
## 544 2 matter 1
## 545 2 means 1
## 546 2 meat 1
## 547 2 mention 1
## 548 2 minimal 1
## 549 2 minority 1
## 550 2 mistaken 1
## 551 2 money 1
## 552 2 move 1
## 553 2 natural 1
## 554 2 negatively 1
## 555 2 neglect 1
## 556 2 occupied 1
## 557 2 occurring 1
## 558 2 officials 1
## 559 2 opinion 1
## 560 2 outcomes 1
## 561 2 outwards 1
## 562 2 ozone 1
## 563 2 perfect 1
## 564 2 personally 1
## 565 2 personlly 1
## 566 2 phenomenon 1
## 567 2 phrase 1
## 568 2 physical 1
## 569 2 policies 1
## 570 2 policy 1
## 571 2 politics 1
## 572 2 pollutes 1
## 573 2 polluting 1
## 574 2 ponds 1
## 575 2 poorer 1
## 576 2 population 1
## 577 2 predominantly 1
## 578 2 prevalent 1
## 579 2 prioritized 1
## 580 2 privilege 1
## 581 2 progressively 1
## 582 2 properly 1
## 583 2 push 1
## 584 2 qualities 1
## 585 2 quicker 1
## 586 2 real 1
## 587 2 recently 1
## 588 2 refer 1
## 589 2 resolve 1
## 590 2 resource's 1
## 591 2 respond 1
## 592 2 result 1
## 593 2 ridiculous 1
## 594 2 sad 1
## 595 2 school 1
## 596 2 severely 1
## 597 2 shouldn 1
## 598 2 significant 1
## 599 2 single 1
## 600 2 socioeconomic 1
## 601 2 solved 1
## 602 2 speak 1
## 603 2 species 1
## 604 2 status 1
## 605 2 steady 1
## 606 2 stereotype 1
## 607 2 stewards 1
## 608 2 studied 1
## 609 2 superior 1
## 610 2 sustainable 1
## 611 2 systemically 1
## 612 2 tend 1
## 613 2 time 1
## 614 2 today's 1
## 615 2 toxins 1
## 616 2 treated 1
## 617 2 treatment 1
## 618 2 types 1
## 619 2 unable 1
## 620 2 unaware 1
## 621 2 underrepresented 1
## 622 2 understnd 1
## 623 2 unequal 1
## 624 2 unfair 1
## 625 2 unfamiliar 1
## 626 2 ungodly 1
## 627 2 views 1
## 628 2 vulnerable 1
## 629 2 watched 1
## 630 2 white 1
## 631 2 whomever 1
## 632 2 workplace 1
## 633 2 world 1
## 634 2 worldy 1
## 635 2 worrying 1
## 636 2 worse 1
## 637 2 worship 1
## 638 2 wrong 1
## 639 NA aspects 1
## 640 NA aspire 1
## 641 NA current 1
## 642 NA discuss 1
## 643 NA environmental 1
## 644 NA honestly 1
## 645 NA impactful 1
## 646 NA injustice 1
## 647 NA understanding 1
## 648 NA ups 1
top_words <- word_counts %>%
group_by(Section) %>%
slice_max(n, n = 20)
top_words
## # A tibble: 58 × 3
## # Groups: Section [3]
## Section word n
## <int> <chr> <int>
## 1 1 environmental 30
## 2 1 racism 21
## 3 1 injustice 17
## 4 1 people 14
## 5 1 communities 13
## 6 1 government 10
## 7 1 water 9
## 8 1 community 8
## 9 1 topic 8
## 10 1 understanding 8
## # ℹ 48 more rows
ggplot(top_words, aes(x = reorder(word, n), y = n, fill = factor(Section))) +
geom_col(show.legend = FALSE) +
facet_wrap(~Section, scales = "free") +
coord_flip() +
labs(
x = "",
y = "Word Frequency",
title = "Most Common Words in Student Responses by Section"
) +
theme_bw()
keywords <- c("system","policy","government","industry","media","bias","justice","freedom")
words_clean %>%
filter(word %in% keywords) %>%
count(Section, word)
## Section word n
## 1 1 government 10
## 2 1 justice 1
## 3 2 government 1
## 4 2 industry 1
## 5 2 policy 1
## 6 2 system 2
ia_terms <- c(
"system","systemic","structure","structural",
"policy","policies",
"government","state",
"industry","corporate","corporation",
"power","control",
"bias","biased",
"media","narrative","framing",
"justice","inequality","equity",
"rights","freedom",
"capitalism","market",
"regulation","regulated"
)
library(dplyr)
library(tidyr)
library(tidytext)
text_long <- resp %>%
pivot_longer(
cols = starts_with("Vid."),
names_to = "Topic",
values_to = "Text"
)
words <- text_long %>%
unnest_tokens(word, Text)
data(stop_words)
words_clean <- words %>%
anti_join(stop_words, by = "word")
ia_words <- words_clean %>%
filter(word %in% ia_terms)
ia_counts <- ia_words %>%
count(Section, Topic, word, sort = TRUE)
ia_counts
## # A tibble: 28 × 4
## Section Topic word n
## <int> <chr> <chr> <int>
## 1 1 Vid.Resp.Text regulated 16
## 2 2 Vid.Resp.Text regulated 12
## 3 1 Vid.EnvInj.Text government 10
## 4 2 Vid.EnvInj.Text systemic 3
## 5 2 Vid.Resp.Text regulation 3
## 6 1 Vid.EnvInj.Text systemic 2
## 7 1 Vid.MolBio.Text control 2
## 8 1 Vid.Resp.Text government 2
## 9 1 Vid.Resp.Text regulation 2
## 10 2 Vid.EnvInj.Text system 2
## # ℹ 18 more rows
ia_summary <- ia_words %>%
count(Section, Topic)
ia_summary
## # A tibble: 8 × 3
## Section Topic n
## <int> <chr> <int>
## 1 1 Vid.EnvInj.Text 15
## 2 1 Vid.MolBio.Text 2
## 3 1 Vid.Resp.Text 22
## 4 1 Vid.Vaccines.Text 3
## 5 2 Vid.EnvInj.Text 10
## 6 2 Vid.MolBio.Text 2
## 7 2 Vid.Resp.Text 16
## 8 2 Vid.Vaccines.Text 3
library(ggplot2)
ggplot(ia_summary,
aes(x = Topic, y = n, fill = factor(Section))) +
geom_col(position = "dodge") +
coord_flip() +
theme_bw() +
labs(
y = "Number of IA terms used",
x = "Topic",
fill = "Section"
)
total_words <- words_clean %>%
count(Section, Topic)
ia_rate <- ia_summary %>%
left_join(total_words, by = c("Section","Topic"),
suffix = c("_ia","_total")) %>%
mutate(rate = n_ia / n_total)
ia_rate
## # A tibble: 8 × 5
## Section Topic n_ia n_total rate
## <int> <chr> <int> <int> <dbl>
## 1 1 Vid.EnvInj.Text 15 620 0.0242
## 2 1 Vid.MolBio.Text 2 534 0.00375
## 3 1 Vid.Resp.Text 22 546 0.0403
## 4 1 Vid.Vaccines.Text 3 526 0.00570
## 5 2 Vid.EnvInj.Text 10 581 0.0172
## 6 2 Vid.MolBio.Text 2 514 0.00389
## 7 2 Vid.Resp.Text 16 657 0.0244
## 8 2 Vid.Vaccines.Text 3 529 0.00567
ia_rate %>%
arrange(desc(rate))
## # A tibble: 8 × 5
## Section Topic n_ia n_total rate
## <int> <chr> <int> <int> <dbl>
## 1 1 Vid.Resp.Text 22 546 0.0403
## 2 2 Vid.Resp.Text 16 657 0.0244
## 3 1 Vid.EnvInj.Text 15 620 0.0242
## 4 2 Vid.EnvInj.Text 10 581 0.0172
## 5 1 Vid.Vaccines.Text 3 526 0.00570
## 6 2 Vid.Vaccines.Text 3 529 0.00567
## 7 2 Vid.MolBio.Text 2 514 0.00389
## 8 1 Vid.MolBio.Text 2 534 0.00375
ggplot(ia_rate,
aes(x = Topic, y = rate, fill = factor(Section))) +
geom_col(position = "dodge") +
coord_flip() +
theme_bw() +
labs(
y = "Rate of IA terms used",
x = "Topic",
fill = "Section"
)
library(dplyr)
library(tidytext)
library(tidyr)
#install.packages("tidylo")
library(tidylo)
log_odds <- word_counts %>%
bind_log_odds(set = Section, feature = word, n = n)
log_odds_plot <- log_odds %>%
slice_max(abs(log_odds_weighted), n = 20) %>%
mutate(
more_common = ifelse(log_odds_weighted > 0, "AI", "Control"),
word = reorder(word, log_odds_weighted)
)
xmax <- max(abs(log_odds_plot$log_odds_weighted)) * 1.15
ggplot(log_odds_plot,
aes(x = word, y = log_odds_weighted, fill = more_common)) +
geom_col(width = 0.8) +
coord_flip() +
geom_hline(yintercept = 0, linewidth = 0.6) +
annotate("text", x = 1, y = xmax * 0.85, label = "AI", hjust = 1, fontface = "bold") +
annotate("text", x = 1, y = -xmax * 0.85, label = "Control", hjust = 0, fontface = "bold") +
scale_fill_manual(values = c("AI" = "#D95F5F", "Control" = "#2C7FB8")) +
scale_y_continuous(limits = c(-xmax, xmax)) +
labs(
x = NULL,
y = "Weighted log odds",
fill = "More common in",
title = "Words most characteristic of each section"
) +
theme_bw() +
theme(
panel.grid.major.y = element_blank(),
legend.position = "none"
)
library(tidylo)
log_odds_topic <- words_clean %>%
count(Topic, Section, word) %>%
group_by(Topic) %>%
filter(sum(n) >= 3) %>%
bind_log_odds(set = Section, feature = word, n = n) %>%
ungroup()
plot_topic <- log_odds_topic %>%
group_by(Topic) %>%
slice_max(abs(log_odds_weighted), n = 8) %>%
mutate(more_common = ifelse(log_odds_weighted > 0, "AI", "Control"))
ggplot(plot_topic,
aes(x = reorder(word, log_odds_weighted), y = log_odds_weighted, fill = more_common)) +
geom_col() +
coord_flip() +
facet_wrap(~Topic, scales = "free_y") +
scale_fill_manual(values = c("AI" = "#D95F5F", "Control" = "#2C7FB8")) +
labs(
x = NULL,
y = "Weighted log odds",
fill = "More common in",
title = "Topic-specific words distinguishing AI and Control sections"
) +
theme_bw()
The results suggest that explicit ideological awareness instruction influences students’ interpretive reasoning about socio-scientific issues, but does not consistently change the direction of their attitudes.
Students exposed to AI instruction appear more likely to:
articulate clearer positions
recognize ideological structures
analyze issues beyond individual harms.
Explicit instruction in ideological awareness does not necessarily change students’ positions on socio-scientific issues, but it appears to influence how students interpret and articulate those issues. Students in the AI section demonstrated more structured ideological reasoning, whereas students in the control section more frequently expressed neutral or opinion-based responses.
Your data suggest:
Students can recognize ideological issues even without explicit instruction, but explicit IA instruction shifts how they reason about those issues.
Specifically, IA instruction appears to:
increase emotional engagement
strengthen science identity
encourage structural interpretation of socio-scientific issues
rather than simply increasing advocacy or opinion strength.
#focus in on long response more clearly
structural_terms <- c(
"system","systemic","structure","structural",
"policy","policies","regulation","regulated",
"government","state","institution","institutions",
"industry","corporate","corporation","companies",
"power","powerful","control",
"inequality","inequity","equity",
"justice","racism","discrimination",
"community","communities","neighborhood","neighborhoods",
"environmental","environment","pollution",
"media","narrative","framing",
"economic","economics","capitalism","market"
)
library(dplyr)
library(tidytext)
library(tidyr)
text_long <- resp %>%
pivot_longer(
cols = starts_with("Vid."),
names_to = "Topic",
values_to = "Text"
)
words <- text_long %>%
unnest_tokens(word, Text)
data(stop_words)
words_clean <- words %>%
anti_join(stop_words, by = "word")
structural_words <- words_clean %>%
filter(word %in% structural_terms)
structural_counts <- structural_words %>%
count(SortingID, Section, Topic)
total_words <- words_clean %>%
count(SortingID, Section, Topic)
structural_rate <- structural_counts %>%
left_join(total_words, by = c("SortingID","Section","Topic"),
suffix = c("_struct","_total")) %>%
mutate(rate = n_struct / n_total)
structural_summary <- structural_rate %>%
filter(!is.na(Section)) %>% # remove NA section rows
group_by(Section, Topic) %>%
summarise(
mean_rate = mean(rate, na.rm = TRUE),
sd = sd(rate, na.rm = TRUE),
n = n(),
se = sd / sqrt(n), # <-- this creates the SE column
.groups = "drop"
)
structural_summary <- structural_summary %>%
filter(!is.na(Section))
library(ggplot2)
ggplot(structural_summary,
aes(x = Topic, y = mean_rate, fill = factor(Section))) +
geom_col(position = "dodge") +
coord_flip() +
theme_bw() +
labs(
y = "Structural reasoning rate",
x = "Topic",
fill = "Section",
title = "Use of structural reasoning language in responses"
)
lm(rate ~ Section + Topic, data = structural_rate)
##
## Call:
## lm(formula = rate ~ Section + Topic, data = structural_rate)
##
## Coefficients:
## (Intercept) Section TopicVid.MolBio.Text
## 0.281361 -0.005751 -0.207970
## TopicVid.Resp.Text TopicVid.Vaccines.Text
## -0.120820 -0.179134
high_structural <- structural_rate %>%
arrange(desc(rate)) %>%
slice_head(n = 10)
resp %>%
filter(SortingID %in% high_structural$SortingID)
## SortingID Section Timepoint Consent Email
## 1 15 1 Post I Consent kme0042@auburn.edu
## 2 19 2 Post I Consent emo0021@auburn.edu
## 3 28 1 Post I Consent rer0061@auburn.edu
## 4 39 2 Post I Consent Hno0009@auburn.edu
## 5 47 2 Post I Consent lrp0037@auburn.edu
## 6 48 2 Post I Consent mzs0263@auburn.edu
## 7 69 1 Post I Consent akd0082@auburn.edu
## 8 86 1 Post I Consent mzp0147@auburn.edu
## 9 104 2 Post I Consent mar0147@auburn.edu
## 10 119 1 Post I Consent meg0156@auburn.edu
## Learn.Science
## 1 So we can know how the world works
## 2 To learn about how the world around them works.
## 3 Learning science is somethings that is important because it can help you learn more about how your body works and how the earth works.
## 4 To understand basic concepts of how the world and things around us work
## 5 to understand our world and how it works
## 6 Because science is objective truth
## 7
## 8
## 9 It is important for students to learn science because science greatly effects their own life and the world around them.
## 10 It is important to learn biology so that people can have basic knowledge of life.
## Benefits
## 1 It would help people have a better understanding of the more political parts of the science field so they can form their own opinions.
## 2 Make people more aware of what's going on in today's society.
## 3 Having different ideal that others might not of heard about which might open their eye to new ideals.
## 4 Biology would be inclusive and appealing to all groups if this was included
## 5 to have a more unbiased education
## 6 With my biology class being so introductory, I'm not sure which materials would benefit from ideological awareness, but maybe making sure to emphasize the difference in sex and gender when teaching about the reproductive system.
## 7 Students would know some things are not soley based on facts.
## 8 to have awareness.
## 9 More ideologically aware resources would benefit biology content by helping people learn more about their own anatomy and life.
## 10 \nI think this may be able to help students understand a few things better
## Downsides
## 1 You could offend people if you are not careful.
## 2 It can deteriorate from class material.
## 3 Some people not be open to learning new ideas, which might lead them to not evey listening to other ideas.
## 4 I think some people would get annoyed with the fact that it has to be mentioned
## 5 take time away from material
## 6 I don't see any downsides
## 7 Some people would argue these topics.
## 8 may sway people\xd5s thought processes
## 9 One downside would be that many aspects of ideologies can lead to controversy and take away from the facts.
## 10 this could greatly offend people.
## Vid.Resp.Text
## 1 I think that they should be highly regulated.
## 2 Vaping and using marijuana is bad for the public's health.
## 3 Vaping and marijuana use is not good for the health of user, and the government needed to regulate them more.
## 4 I think vaping should be banned but marijuana should be available for prescription use
## 5 I do not believe in the recreational use of vapes/marijuana because it has been proven harmful for us
## 6 I think vaping is horrible. I think that right now, marijuana use is dangerous in our state because it is not regulated because it is illegal. I think that there are safe ways to use marijuana, but I do not think that vaping is ever safe.
## 7 I do not do it because it is bad for health.
## 8 it\xd5s not good for you, it could harm those around you
## 9 Marijuana can be very beneficial to those suffering with chronic pain. However, Marijuana should be well regulated and only accessible to those with a prescription.
## 10 I think that it has not been around long enough to know the harmful side effects of vaping
## Vid.MolBio.Text
## 1 I believe we should not allow this because it will create an even bigger divide in society because not everyone will have access to the technology.
## 2 I don't know enough information on human genome editing to give my opinion.
## 3 Designer babies can be bad for the world.
## 4 I think that it is wrong and having a baby should be a natural process unless altering it to not have death related diseases
## 5 I think the ethical and societal implications outweigh the other pros for genetic editing
## 6 I don't really know enough about this topic to make a thoughtful comment.
## 7 Not sure what this is.
## 8 i don\xd5t understand it enough to have a strong thought about it
## 9 I do not believe genetic editing is ethical if people are trying to make children look a certain way. This practice can be dangerous and is unnatural. Genetic editing should only be used when necessary to prevent a life threatening mutation.
## 10 I think that this is wrong based on my religion
## Vid.Vaccines.Text
## 1 We should all try to get vaccinated but we shouldn\xd5t be forced to do anything we don\xd5t want to do. We also need to have correct information about it.
## 2 Vaccines are good for society's health.
## 3 Misinformation has a really negative impact on whether or not people get vaccines.
## 4 I think vaccinations are important to the general public and stop disease from spreading
## 5 I do not know enough information on these vaccinations
## 6 I think that it is important to get vaccinated for the benefit of the public.
## 7 It can help people not spread diseases that they may have.
## 8 vaccines should be required, for not only the person\xd5s safety but those around them
## 9 Vaccinations are soemtimes useful when preventing possible disease. Vaccines should be well studied and should not be experimental.
## 10 I think that vaccines are good but they SHOULD NOT be forced on the public
## Vid.EnvInj.Text
## 1 I find the most concerning impact to be that these things are passed down from generation to generation.
## 2 I do not know enough about environmental racism to give my opinion.
## 3 Environmental racism and injustice is something that need to be worked on within the United State.
## 4 I don\xd5t know much about environmental racism and injustice
## 5 I find environmental racism concerning and impactful
## 6 I don't know much about environmental racism, but I do find the concept very concerning.
## 7 I am not aware of any environmental racism concerns.
## 8 just take care of the environment and each other.
## 9 I am unfamiliar with environmental racism
## 10 I have no clue what environmental racism is
## X X.1 X.2 X.3 X.4 X.5 X.6 X.7 X.8 X.9 X.10 X.11 X.12 X.13 X.14 X.15
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 7 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 8 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 9 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## 10 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
summary(lm(rate ~ Section * Topic, data = structural_rate))
##
## Call:
## lm(formula = rate ~ Section * Topic, data = structural_rate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24113 -0.08461 -0.02008 0.04872 0.40360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.24153 0.04950 4.880 3.28e-06 ***
## Section 0.02154 0.03210 0.671 0.503
## TopicVid.MolBio.Text -0.05913 0.17742 -0.333 0.740
## TopicVid.Resp.Text -0.03934 0.08607 -0.457 0.648
## TopicVid.Vaccines.Text -0.07817 0.13059 -0.599 0.551
## Section:TopicVid.MolBio.Text -0.09438 0.10549 -0.895 0.373
## Section:TopicVid.Resp.Text -0.05704 0.05731 -0.995 0.322
## Section:TopicVid.Vaccines.Text -0.07044 0.08672 -0.812 0.418
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1376 on 121 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2532, Adjusted R-squared: 0.21
## F-statistic: 5.862 on 7 and 121 DF, p-value: 7.124e-06
structural_summary$Topic <- factor(
structural_summary$Topic,
levels = c(
"Vid.EnvInj.Text",
"Vid.Resp.Text",
"Vid.Vaccines.Text",
"Vid.MolBio.Text"
)
)
ggplot(structural_summary,
aes(x = Topic, y = mean_rate, color = factor(Section))) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = mean_rate - se,
ymax = mean_rate + se),
width = .1) +
coord_flip() +
theme_bw() +
labs(
y = "Structural reasoning rate",
x = "Topic",
color = "Section"
)
AI students appear more likely to:
connect science to systems
recognize social structures
interpret responsibility beyond individuals
library(dplyr)
library(tidyr)
library(tidytext)
structural_terms <- c(
"system","systemic","structure","structural",
"policy","policies","regulation","regulated",
"government","state","institution","institutions",
"industry","corporate","corporation","companies",
"power","control",
"inequality","inequity","equity",
"justice","racism","discrimination",
"community","communities","neighborhood","neighborhoods",
"environmental","environment",
"media","narrative","framing",
"economic","economics","capitalism","market"
)
text_long <- resp %>%
pivot_longer(
cols = starts_with("Vid."),
names_to = "Topic",
values_to = "Text"
) %>%
mutate(
Section = na_if(as.character(Section), "NA"),
Section = recode(Section, "1" = "AI", "2" = "Control")
) %>%
filter(!is.na(Section), !is.na(Text), Text != "")
words_clean <- text_long %>%
unnest_tokens(word, Text) %>%
anti_join(stop_words, by = "word")
structural_by_response <- words_clean %>%
group_by(SortingID, Section, Topic) %>%
summarise(
total_words = n(),
structural_n = sum(word %in% structural_terms),
structural_present = as.integer(structural_n > 0),
structural_rate = structural_n / total_words,
.groups = "drop"
)
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
model_presence <- glmer(
structural_present ~ Section + (1 | Topic) + (1 | SortingID),
data = structural_by_response,
family = binomial
)
summary(model_presence)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: structural_present ~ Section + (1 | Topic) + (1 | SortingID)
## Data: structural_by_response
##
## AIC BIC logLik -2*log(L) df.resid
## 455.8 472.7 -223.9 447.8 500
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8812 -0.4372 -0.2473 0.3373 4.4931
##
## Random effects:
## Groups Name Variance Std.Dev.
## SortingID (Intercept) 1.040 1.020
## Topic (Intercept) 2.209 1.486
## Number of obs: 504, groups: SortingID, 133; Topic, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7854 0.7781 -2.295 0.0218 *
## SectionControl 0.3311 0.3119 1.061 0.2885
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## SectinCntrl -0.175
exp(fixef(model_presence))
## (Intercept) SectionControl
## 0.1677246 1.3924618
exp(confint(model_presence, parm = "beta_", method = "Wald"))
## 2.5 % 97.5 %
## (Intercept) 0.03649859 0.7707571
## SectionControl 0.75555417 2.5662618
library(lme4)
model_rate <- lmer(
structural_rate ~ Section + (1 | Topic) + (1 | SortingID),
data = structural_by_response
)
summary(model_rate)
## Linear mixed model fit by REML ['lmerMod']
## Formula: structural_rate ~ Section + (1 | Topic) + (1 | SortingID)
## Data: structural_by_response
##
## REML criterion at convergence: -853.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7268 -0.3660 -0.0739 0.0050 4.9553
##
## Random effects:
## Groups Name Variance Std.Dev.
## SortingID (Intercept) 0.0001068 0.01033
## Topic (Intercept) 0.0059033 0.07683
## Residual 0.0101001 0.10050
## Number of obs: 504, groups: SortingID, 133; Topic, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.052119 0.038868 1.341
## SectionControl 0.008608 0.009320 0.924
##
## Correlation of Fixed Effects:
## (Intr)
## SectinCntrl -0.096
model_rate_topic <- lmer(
structural_rate ~ Section * Topic + (1 | SortingID),
data = structural_by_response
)
summary(model_rate_topic)
## Linear mixed model fit by REML ['lmerMod']
## Formula: structural_rate ~ Section * Topic + (1 | SortingID)
## Data: structural_by_response
##
## REML criterion at convergence: -838.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0270 -0.3505 -0.0757 -0.0184 5.1401
##
## Random effects:
## Groups Name Variance Std.Dev.
## SortingID (Intercept) 0.0001306 0.01143
## Residual 0.0099822 0.09991
## Number of obs: 504, groups: SortingID, 133
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.14740 0.01202 12.264
## SectionControl 0.05403 0.01896 2.849
## TopicVid.MolBio.Text -0.14300 0.01661 -8.610
## TopicVid.Resp.Text -0.10253 0.01645 -6.231
## TopicVid.Vaccines.Text -0.13727 0.01641 -8.367
## SectionControl:TopicVid.MolBio.Text -0.05513 0.02623 -2.101
## SectionControl:TopicVid.Resp.Text -0.06425 0.02592 -2.479
## SectionControl:TopicVid.Vaccines.Text -0.05787 0.02596 -2.229
##
## Correlation of Fixed Effects:
## (Intr) SctnCn TV.MB. TV.R.T TV.V.T SC:TV.M SC:TV.R
## SectinCntrl -0.634
## TpcVd.MlB.T -0.715 0.453
## TpcVd.Rsp.T -0.722 0.458 0.522
## TpcVd.Vcc.T -0.724 0.459 0.524 0.529
## ScC:TV.MB.T 0.453 -0.714 -0.633 -0.331 -0.332
## SctC:TV.R.T 0.458 -0.723 -0.332 -0.635 -0.336 0.523
## SctC:TV.V.T 0.458 -0.722 -0.331 -0.334 -0.632 0.522 0.528
library(ggeffects)
library(ggplot2)
model_topic <- glmer(
structural_present ~ Section * Topic + (1 | SortingID),
data = structural_by_response,
family = binomial
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00267592 (tol = 0.002, component 1)
summary(model_topic)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: structural_present ~ Section * Topic + (1 | SortingID)
## Data: structural_by_response
##
## AIC BIC logLik -2*log(L) df.resid
## 444.4 482.4 -213.2 426.4 495
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5620 -0.3672 -0.2174 0.2520 6.3521
##
## Random effects:
## Groups Name Variance Std.Dev.
## SortingID (Intercept) 1.406 1.186
## Number of obs: 504, groups: SortingID, 133
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3842 0.3127 1.229 0.219
## SectionControl 0.7216 0.5116 1.411 0.158
## TopicVid.MolBio.Text -4.1764 0.7307 -5.715 1.09e-08 ***
## TopicVid.Resp.Text -1.6779 0.4263 -3.936 8.29e-05 ***
## TopicVid.Vaccines.Text -3.2420 0.5603 -5.786 7.19e-09 ***
## SectionControl:TopicVid.MolBio.Text 0.0754 0.9546 0.079 0.937
## SectionControl:TopicVid.Resp.Text -0.7639 0.6555 -1.165 0.244
## SectionControl:TopicVid.Vaccines.Text -0.6430 0.8137 -0.790 0.429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SctnCn TV.MB. TV.R.T TV.V.T SC:TV.M SC:TV.R
## SectinCntrl -0.593
## TpcVd.MlB.T -0.389 0.171
## TpcVd.Rsp.T -0.627 0.325 0.389
## TpcVd.Vcc.T -0.499 0.231 0.369 0.480
## ScC:TV.MB.T 0.265 -0.451 -0.646 -0.193 -0.151
## SctC:TV.R.T 0.375 -0.656 -0.135 -0.548 -0.183 0.367
## SctC:TV.V.T 0.306 -0.534 -0.117 -0.211 -0.538 0.309 0.434
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00267592 (tol = 0.002, component 1)
VarCorr(model_presence)
## Groups Name Std.Dev.
## SortingID (Intercept) 1.0196
## Topic (Intercept) 1.4861
student_structural <- structural_by_response %>%
group_by(SortingID, Section) %>%
summarise(
mean_rate = mean(structural_rate, na.rm = TRUE),
.groups = "drop"
)
ggplot(student_structural,
aes(x = as.factor(Section), y = mean_rate)) +
geom_violin(fill = "grey80") +
geom_boxplot(width = .1) +
geom_jitter(width = .1, alpha = .5) +
theme_bw()
student_structural <- student_structural %>%
mutate(level = case_when(
mean_rate == 0 ~ "None",
mean_rate < 0.05 ~ "Low",
mean_rate < 0.10 ~ "Moderate",
TRUE ~ "High"
))
student_structural <- student_structural %>%
filter(!is.na(Section))
ggplot(student_structural,
aes(x = level, fill = factor(Section))) +
geom_bar(position = "dodge") +
theme_bw() +
labs(
x = "Structural reasoning level",
y = "Number of students",
fill = "Section"
)
student_structural$level <- factor(
student_structural$level,
levels = c("None","Low","Moderate","High")
)
ggplot(student_structural,
aes(x = factor(Section), fill = level)) +
geom_bar(position = "fill") +
theme_bw() +
labs(
x = "Section",
y = "Proportion of students",
fill = "Structural reasoning level"
)
table_levels <- table(student_structural$Section,
student_structural$level)
chisq.test(table_levels)
##
## Pearson's Chi-squared test
##
## data: table_levels
## X-squared = 1.695, df = 3, p-value = 0.638
This means many students never used structural reasoning language in any response.
That is actually a major finding:
Structural reasoning is not a default explanation style for most students.
This is very consistent with research showing students tend to default to proximate explanations (pollution, waste, sickness) rather than systemic explanations.
library(dplyr)
library(tidyr)
library(tidytext)
structural_terms <- c(
"system","systemic","structure","structural",
"policy","policies","regulation","regulated",
"government","state","institution","institutions",
"industry","corporate","corporation","companies",
"power","control",
"inequality","inequity","equity",
"justice","racism","discrimination",
"community","communities","neighborhood","neighborhoods",
"environmental","environment",
"media","narrative","framing",
"economic","economics","capitalism","market"
)
text_long <- resp %>%
pivot_longer(
cols = starts_with("Vid."),
names_to = "Topic",
values_to = "Text"
) %>%
mutate(
Section = na_if(as.character(Section), "NA")
) %>%
filter(!is.na(Section), !is.na(Text), Text != "")
words_clean <- text_long %>%
unnest_tokens(word, Text) %>%
anti_join(stop_words, by = "word")
structural_by_response <- words_clean %>%
group_by(SortingID, Section, Topic) %>%
summarise(
total_words = n(),
structural_n = sum(word %in% structural_terms),
structural_present = structural_n > 0,
structural_rate = structural_n / total_words,
.groups = "drop"
)
library(dplyr)
library(tidyr)
library(tidytext)
structural_terms <- c(
"system","systemic","structure","structural",
"policy","policies","regulation","regulated",
"government","state","institution","institutions",
"industry","corporate","corporation","companies",
"power","control",
"inequality","inequity","equity",
"justice","racism","discrimination",
"community","communities","neighborhood","neighborhoods",
"environmental","environment",
"media","narrative","framing",
"economic","economics","capitalism","market"
)
text_long <- resp %>%
pivot_longer(
cols = starts_with("Vid."),
names_to = "Topic",
values_to = "Text"
) %>%
mutate(
Section = na_if(as.character(Section), "NA")
) %>%
filter(!is.na(Section), !is.na(Text), Text != "")
words_clean <- text_long %>%
unnest_tokens(word, Text) %>%
anti_join(stop_words, by = "word")
structural_by_response <- words_clean %>%
group_by(SortingID, Section, Topic) %>%
summarise(
total_words = n(),
structural_n = sum(word %in% structural_terms),
structural_present = structural_n > 0,
structural_rate = structural_n / total_words,
.groups = "drop"
)
structural_responses <- structural_by_response %>%
filter(structural_present)
structural_words_only <- words_clean %>%
semi_join(structural_responses, by = c("SortingID", "Section", "Topic"))
library(tidylo)
word_counts_structural <- structural_words_only %>%
count(Section, word) %>%
group_by(word) %>%
filter(sum(n) >= 3) %>%
ungroup()
log_odds_structural <- word_counts_structural %>%
bind_log_odds(set = Section, feature = word, n = n)
library(ggplot2)
plot_words <- log_odds_structural %>%
slice_max(abs(log_odds_weighted), n = 20) %>%
mutate(
more_common = ifelse(log_odds_weighted > 0, "AI", "Control"),
word = reorder(word, log_odds_weighted)
)
ggplot(plot_words,
aes(x = word, y = log_odds_weighted, fill = more_common)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("AI" = "#D95F5F", "Control" = "#2C7FB8")) +
theme_bw() +
labs(
x = NULL,
y = "Weighted log odds",
fill = "More common in",
title = "How AI vs Control articulate structural reasoning"
)
student_profiles <- structural_by_response %>%
group_by(SortingID, Section) %>%
summarise(
n_topics = n(),
n_structural = sum(structural_present),
prop_structural = mean(structural_present),
mean_rate = mean(structural_rate, na.rm = TRUE),
max_rate = max(structural_rate, na.rm = TRUE),
.groups = "drop"
)
student_profiles <- student_profiles %>%
mutate(archetype = case_when(
n_structural == 0 ~ "None",
prop_structural <= 0.25 ~ "Occasional",
prop_structural <= 0.75 ~ "Mixed",
prop_structural > 0.75 ~ "Consistent"
))
ggplot(student_profiles,
aes(x = factor(Section), fill = archetype)) +
geom_bar(position = "fill") +
theme_bw() +
labs(
x = "Section",
y = "Proportion of students",
fill = "Reasoning archetype",
title = "Student structural reasoning archetypes by section"
)
archetype_tab <- table(student_profiles$Section, student_profiles$archetype)
chisq.test(archetype_tab)
## Warning in chisq.test(archetype_tab): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: archetype_tab
## X-squared = 2.1101, df = 3, p-value = 0.5499
archetype_tab
##
## Consistent Mixed None Occasional
## 1 1 18 30 31
## 2 0 17 17 19
ggplot(student_profiles, aes(x = factor(Section), y = prop_structural)) +
geom_violin(fill = "grey85") +
geom_boxplot(width = 0.15, outlier.shape = NA) +
geom_jitter(width = 0.08, alpha = 0.6) +
theme_bw() +
labs(
x = "Section",
y = "Proportion of responses with structural reasoning",
title = "Cross-topic consistency in structural reasoning"
)
Social Engagement