library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
quizzes=read.csv("Quiz_data_combined.csv") #228 unique users with quiz data
#Create a "type" column that labels each question as either an "attention" question or a "viewpoint"
quizzes$Type <- ifelse(grepl("TRUE|FALSE", quizzes$Response.Text, ignore.case = TRUE), "attention", "viewpoint")
#Assess quiz responses for engagement using the "attention" check questions
###subset quiz data to only include the attention check questions
att.check=subset(quizzes, Type == "attention")
#Create a "score" column, where if their response matches the correct response, we give them a score of 1, and otherwise they get a score of 0.
att.check$score <- ifelse(att.check$Correct. == "TRUE", 1, 0)
#get the average score for each section
att.check %>%
group_by(Topic, Section) %>%
summarize(average_score = mean(score))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 3
## # Groups: Topic [6]
## Topic Section average_score
## <chr> <chr> <dbl>
## 1 BioDiv Control 0.769
## 2 BioDiv IA 0.863
## 3 Digestion Control 1
## 4 Digestion IA 0.993
## 5 EnvInjustice Control 0.887
## 6 EnvInjustice IA 0.981
## 7 MolBiology Control 0.927
## 8 MolBiology IA 0.705
## 9 Respiration Control 0.787
## 10 Respiration IA 0.738
## 11 Vaccines Control 0.815
## 12 Vaccines IA 0.865
att.check %>%
group_by(Section) %>%
summarize(average_score = mean(score))
## # A tibble: 2 × 2
## Section average_score
## <chr> <dbl>
## 1 Control 0.852
## 2 IA 0.839
# Calculate average score per section grouped by topic and section
avg_scores <- att.check %>%
group_by(Topic, Section) %>%
summarize(avg_score = mean(score, na.rm = TRUE))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
# Create a ggplot object to visualize the data
plot <- ggplot(avg_scores, aes(x = Section, y = avg_score)) +
geom_bar(stat = "identity", position = "dodge", fill = "blue") +
facet_wrap(~ Topic, scales = "free") +
labs(title = "Average Score per Section Faceted by Topic",
x = "Section", y = "Average Score") +
theme_minimal()
# Display the plot
print(plot)
summary(lme(fixed = score ~ Section, random = ~ 1 | User.Email, data = att.check))
## Linear mixed-effects model fit by REML
## Data: att.check
## AIC BIC logLik
## 1852.5 1875.401 -922.2498
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.04760064 0.3595739
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8519613 0.01315351 2040 64.77062 0.0000
## SectionIA -0.0127315 0.01684294 225 -0.75590 0.4505
## Correlation:
## (Intr)
## SectionIA -0.781
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3950415 0.3748525 0.4157131 0.4565736 0.6311915
##
## Number of Observations: 2267
## Number of Groups: 227
split_data <- split(att.check, att.check$Topic)
# Function to fit a linear mixed-effects model for each topic
fit_model <- function(df) {
lme(fixed = score ~ Section, random = ~ 1 | User.Email, data = df)
}
# Apply the function to each subset of data and store the models in a list
models <- lapply(split_data, fit_model)
# To see the summary for each model
summaries <- lapply(models, summary)
# Optional: print summaries for review
summaries
## $BioDiv
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 379.2015 395.2269 -185.6008
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 4.557599e-06 0.377255
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7687500 0.02982463 204 25.775680 0.0000
## SectionIA 0.0941532 0.03825421 202 2.461251 0.0147
## Correlation:
## (Intr)
## SectionIA -0.78
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.2873209 0.3634061 0.3634061 0.6129806 0.6129806
##
## Number of Observations: 408
## Number of Groups: 204
##
## $Digestion
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -553.6341 -540.0234 280.817
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.06261081 0.02347914
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 1.0000000 0.007128196 222 140.28796 0.0000
## SectionIA -0.0073529 0.009148169 222 -0.80376 0.4224
## Correlation:
## (Intr)
## SectionIA -0.779
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.212369e+00 2.364275e-14 3.861014e-02 3.861014e-02 3.861014e-02
##
## Number of Observations: 224
## Number of Groups: 224
##
## $EnvInjustice
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -21.44766 -6.050361 14.72383
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.02535476 0.2271112
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8873239 0.01929483 174 45.98766 0e+00
## SectionIA 0.0933535 0.02505320 173 3.72621 3e-04
## Correlation:
## (Intr)
## SectionIA -0.77
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.26657464 0.08301035 0.08301035 0.48406097 0.53760489
##
## Number of Observations: 349
## Number of Groups: 175
##
## $MolBiology
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 433.7067 449.9244 -212.8533
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.519978e-05 0.393839
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.9268293 0.03075366 214 30.137204 0
## SectionIA -0.2222838 0.03915766 212 -5.676637 0
## Correlation:
## (Intr)
## SectionIA -0.785
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3533203 0.1857884 0.1857884 0.7501912 0.7501912
##
## Number of Observations: 428
## Number of Groups: 214
##
## $Respiration
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 516.3417 532.6339 -254.1709
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.652548e-05 0.4292832
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7869822 0.03302179 217 23.832212 0.0000
## SectionIA -0.0491545 0.04219764 217 -1.164864 0.2454
## Correlation:
## (Intr)
## SectionIA -0.783
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.8332471 0.4962173 0.4962173 0.6107210 0.6107210
##
## Number of Observations: 436
## Number of Groups: 219
##
## $Vaccines
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 354.685 370.846 -173.3425
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 3.733875e-05 0.3609937
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8148148 0.02836234 211 28.728755 0.0000
## SectionIA 0.0505698 0.03613364 209 1.399522 0.1631
## Correlation:
## (Intr)
## SectionIA -0.785
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3972292 0.3729023 0.3729023 0.5129873 0.5129873
##
## Number of Observations: 422
## Number of Groups: 211
#Remove data for each user by topic if they have an attention check of “0”.
# Sum total scores for each "User.Email" by "Topic"
total_scores <- att.check %>%
group_by(User.Email, Topic) %>%
summarize(total_score = sum(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'User.Email'. You can override using the
## `.groups` argument.
# Identify users and topics where the score is zero
zero_scores <- total_scores %>%
filter(total_score == 0) %>%
select(User.Email, Topic)
# Remove these users' data for topics where they scored zero
att.check <- att.check %>%
anti_join(zero_scores, by = c("User.Email", "Topic"))
# Calculate sum total scores for each "User.Email" by "topic" in the filtered data
#Check to make sure none of these are 0
sum_scores <- att.check %>%
group_by(User.Email, Topic) %>%
summarize(total_score = sum(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'User.Email'. You can override using the
## `.groups` argument.
#get the average score for each section
att.check %>%
group_by(Topic, Section) %>%
summarize(average_score = mean(score))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 3
## # Groups: Topic [6]
## Topic Section average_score
## <chr> <chr> <dbl>
## 1 BioDiv Control 0.778
## 2 BioDiv IA 0.877
## 3 Digestion Control 1
## 4 Digestion IA 1
## 5 EnvInjustice Control 0.9
## 6 EnvInjustice IA 0.981
## 7 MolBiology Control 0.95
## 8 MolBiology IA 0.710
## 9 Respiration Control 0.796
## 10 Respiration IA 0.749
## 11 Vaccines Control 0.825
## 12 Vaccines IA 0.886
att.check %>%
group_by(Section) %>%
summarize(average_score = mean(score))
## # A tibble: 2 × 2
## Section average_score
## <chr> <dbl>
## 1 Control 0.864
## 2 IA 0.850
library(nlme)
summary(lme(score ~ Section, random = ~ 1 | User.Email, data = att.check))
## Linear mixed-effects model fit by REML
## Data: att.check
## AIC BIC logLik
## 1697.693 1720.543 -844.8465
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 0.0001212476 0.3519632
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8636884 0.01191216 2011 72.50480 0.0000
## SectionIA -0.0138716 0.01525296 225 -0.90944 0.3641
## Correlation:
## (Intr)
## SectionIA -0.781
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.4539173 0.3872892 0.4267008 0.4267012 0.4267021
##
## Number of Observations: 2238
## Number of Groups: 227
split_data <- split(att.check, att.check$Topic)
# Function to fit a linear mixed-effects model for each topic
fit_model <- function(df) {
lme(fixed = score ~ Section, random = ~ 1 | User.Email, data = df)
}
# Apply the function to each subset of data and store the models in a list
models <- lapply(split_data, fit_model)
# To see the summary for each model
summaries <- lapply(models, summary)
# Optional: print summaries for review
summaries
## $BioDiv
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 349.4332 365.399 -170.7166
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 8.703664e-06 0.3659177
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7784810 0.02911085 201 26.741953 0.000
## SectionIA 0.0985682 0.03736569 199 2.637932 0.009
## Correlation:
## (Intr)
## SectionIA -0.779
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.3968481 0.3360067 0.3360067 0.6053792 0.6053792
##
## Number of Observations: 402
## Number of Groups: 201
##
## $Digestion
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -15313.71 -15300.11 7660.853
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 2.084409e-16 1.388871e-17
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 1 2.226914e-17 221 4.490518e+16 0.0000
## SectionIA 0 2.862128e-17 221 -1.000000e+00 0.4207
## Correlation:
## (Intr)
## SectionIA -0.778
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## 0 0 0 0 0
##
## Number of Observations: 223
## Number of Groups: 223
##
## $EnvInjustice
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## -51.04712 -35.67294 29.52356
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.48505e-05 0.2188422
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.9000000 0.01849554 173 48.66038 0e+00
## SectionIA 0.0806763 0.02394675 172 3.36899 9e-04
## Correlation:
## (Intr)
## SectionIA -0.772
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.48120344 0.08829957 0.08829957 0.45695030 0.45695032
##
## Number of Observations: 347
## Number of Groups: 174
##
## $MolBiology
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 404.0105 420.1715 -198.0052
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.207588e-05 0.3828284
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.9500000 0.03026525 211 31.389139 0
## SectionIA -0.2400763 0.03841048 209 -6.250281 0
## Correlation:
## (Intr)
## SectionIA -0.788
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.4815293 0.1306068 0.1306068 0.7577189 0.7577189
##
## Number of Observations: 422
## Number of Groups: 211
##
## $Respiration
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 496.4418 512.6782 -244.2209
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 7.42436e-06 0.4228163
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.7964072 0.03271851 214 24.341184 0.0000
## SectionIA -0.0473578 0.04183597 214 -1.131987 0.2589
## Correlation:
## (Intr)
## SectionIA -0.782
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.8835774 0.4815160 0.4815160 0.5935215 0.5935215
##
## Number of Observations: 430
## Number of Groups: 216
##
## $Vaccines
## Linear mixed-effects model fit by REML
## Data: df
## AIC BIC logLik
## 308.808 324.8921 -150.404
##
## Random effects:
## Formula: ~1 | User.Email
## (Intercept) Residual
## StdDev: 1.685294e-05 0.3441218
##
## Fixed effects: score ~ Section
## Value Std.Error DF t-value p-value
## (Intercept) 0.8250000 0.02720522 207 30.325062 0.0000
## SectionIA 0.0608268 0.03473246 205 1.751295 0.0814
## Correlation:
## (Intr)
## SectionIA -0.783
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.5741660 0.3317814 0.3317814 0.5085408 0.5085408
##
## Number of Observations: 414
## Number of Groups: 207
library(ggplot2)
# Calculate average score per section grouped by topic and section
avg_scores <- att.check %>%
group_by(Topic, Section) %>%
summarize(avg_score = mean(score, na.rm = TRUE))
## `summarise()` has grouped output by 'Topic'. You can override using the
## `.groups` argument.
# Create a ggplot object to visualize the data
plot <- ggplot(avg_scores, aes(x = Section, y = avg_score)) +
geom_bar(stat = "identity", position = "dodge", fill = "blue") +
facet_wrap(~ Topic, scales = "free") +
labs(title = "Average Score per Section Faceted by Topic",
x = "Section", y = "Average Score") +
theme_minimal()
# Display the plot
print(plot)
#Assess viewpoint questions only
viewpoint=subset(quizzes, Type == "viewpoint")
#Remove all user info on a per topic basis if they scored a "0" on the attention check for that topic
viewpoint <- viewpoint %>%
anti_join(zero_scores, by = c("User.Email", "Topic"))
###add in a column called "score" with likert ranging from 1-5 (strongly disagree to strongly agree)
viewpoint$score <- ifelse(grepl("Neither Agree nor Disagree", viewpoint$Response.Text, ignore.case = TRUE), 3,
ifelse(grepl("Agree", viewpoint$Response.Text, ignore.case = TRUE) & !grepl("Disagree", viewpoint$Response.Text, ignore.case = TRUE), 4,
ifelse(grepl("Strongly Agree", viewpoint$Response.Text, ignore.case = TRUE), 5,
ifelse(grepl("Strongly Disagree", viewpoint$Response.Text, ignore.case = TRUE), 1,
ifelse(grepl("Disagree", viewpoint$Response.Text, ignore.case = TRUE), 2, NA)))))
#Calculate the average score per question per section
average_scores <- viewpoint %>%
group_by(Section, Question.Text, Topic) %>%
summarize(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Section', 'Question.Text'. You can
## override using the `.groups` argument.
#calculate the difference for each question between sections
library(tidyr)
# Assuming that `average_scores` is the data frame obtained from your previous calculations
# Ensure that there are exactly two Section subcategories per Question.Text
# Pivot to have each Section in separate columns
pivoted_scores <- average_scores %>%
pivot_wider(names_from = Section, values_from = avg_score)
# Check the column names to confirm the two subcategories are present
print(colnames(pivoted_scores))
## [1] "Question.Text" "Topic" "Control" "IA"
# Assuming the two subcategories are, for example, 'Section1' and 'Section2'
# Calculate the difference between them for each Question.Text
pivoted_scores <- pivoted_scores %>%
mutate(score_difference = IA - Control)
# View the results with differences
print(pivoted_scores)
## # A tibble: 47 × 5
## Question.Text Topic Control IA score_difference
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 "Active advocacy for and support of pol… BioD… 3.81 NA NA
## 2 "Choosing to consume products that are … BioD… 3.91 NA NA
## 3 "Communities affected by environmental … EnvI… 3.81 3.87 0.0595
## 4 "Dietary supplements, including vitamin… Dige… 3.29 3.75 0.465
## 5 "Economic development justifies environ… EnvI… 2.97 3.03 0.0577
## 6 "Editing the human genome is a risk to … MolB… 3.75 3.84 0.0885
## 7 "Education about the food web and human… BioD… 3.86 3.85 -0.00773
## 8 "Environmental injustices are primarily… EnvI… 3.63 3.69 0.0607
## 9 "Ethical considerations, such as equity… Vacc… 3.81 3.63 -0.183
## 10 "Gene editing technology has already sa… MolB… 3.55 NA NA
## # ℹ 37 more rows
library(DT)
# Create an interactive HTML table with DT
datatable(
pivoted_scores,
options = list(
pageLength = 57, # Adjust this to change the number of rows per page
scrollX = TRUE, # Enables horizontal scrolling if necessary
columnDefs = list(
list(targets = 0:ncol(pivoted_scores), className = 'wrap') # Apply text wrapping
)
)
)
average_scores_topic <- viewpoint %>%
group_by(Section, Topic) %>%
summarize(avg_score = mean(score, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Section'. You can override using the
## `.groups` argument.
#calculate the difference for each question between sections
library(tidyr)
# Assuming that `average_scores` is the data frame obtained from your previous calculations
# Ensure that there are exactly two Section subcategories per Question.Text
# Pivot to have each Section in separate columns
pivoted_scores <- average_scores %>%
pivot_wider(names_from = Section, values_from = avg_score)
# Check the column names to confirm the two subcategories are present
print(colnames(pivoted_scores))
## [1] "Question.Text" "Topic" "Control" "IA"
# Assuming the two subcategories are, for example, 'Section1' and 'Section2'
# Calculate the difference between them for each Question.Text
pivoted_scores <- pivoted_scores %>%
mutate(score_difference = IA - Control)
# View the results with differences
print(pivoted_scores)
## # A tibble: 47 × 5
## Question.Text Topic Control IA score_difference
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 "Active advocacy for and support of pol… BioD… 3.81 NA NA
## 2 "Choosing to consume products that are … BioD… 3.91 NA NA
## 3 "Communities affected by environmental … EnvI… 3.81 3.87 0.0595
## 4 "Dietary supplements, including vitamin… Dige… 3.29 3.75 0.465
## 5 "Economic development justifies environ… EnvI… 2.97 3.03 0.0577
## 6 "Editing the human genome is a risk to … MolB… 3.75 3.84 0.0885
## 7 "Education about the food web and human… BioD… 3.86 3.85 -0.00773
## 8 "Environmental injustices are primarily… EnvI… 3.63 3.69 0.0607
## 9 "Ethical considerations, such as equity… Vacc… 3.81 3.63 -0.183
## 10 "Gene editing technology has already sa… MolB… 3.55 NA NA
## # ℹ 37 more rows
library(DT)
# Create an interactive HTML table with DT
datatable(
pivoted_scores,
options = list(
pageLength = 57, # Adjust this to change the number of rows per page
scrollX = TRUE, # Enables horizontal scrolling if necessary
columnDefs = list(
list(targets = 0:ncol(pivoted_scores), className = 'wrap') # Apply text wrapping
)
)
)
#Merge the available viewpoint scores and data?
s1=read.csv("Bio1010S1.csv")
s2=read.csv("Bio1010S2.csv")
pre=read.csv("PreResponses.csv")
library("dplyr")
combined_df <- bind_rows(list(s1 = s1, s2 = s2), .id = "source")
combined_df <- combined_df[ , -5]
pre <- pre[ -c(1:2), ]
#Q141 is the studentID in email form
pre$Q141 <- tolower(substr(pre$Q141, 1, 7))
#Q1.2 is consent
pre=subset(pre, Q1.2 == "I Consent")
pre=subset(pre, Progress == "100")
combined_df <- combined_df %>%
mutate(Pre_Survey = if_else(SIS.Login.ID %in% unique(pre$Q141), "yes", "no"))
write.csv(combined_df, "combined_df.csv", row.names = FALSE)
# Extract unique values from both columns
unique_Q141 <- unique(pre$Q141)
unique_SIS_Login_ID <- unique(combined_df$SIS.Login.ID)
# Find values in 'Q141' that are not in 'SIS.Login.ID'
values_not_present <- setdiff(unique_Q141, unique_SIS_Login_ID)
as.data.frame(values_not_present)
pre_only=subset(combined_df, Pre_Survey == "yes")
s1=read.csv("Bio1010S1.csv")
s2=read.csv("Bio1010S2.csv")
post=read.csv("PostResponses.csv")
library("dplyr")
combined_df <- bind_rows(list(s1 = s1, s2 = s2), .id = "source")
post <- post[ -c(1:2), ]
#Q141 is the studentID in email form
post$Q141 <- tolower(substr(post$Q141, 1, 7))
#Q1.2 is consent
post=subset(post, Q1.2 == "I Consent")
post=subset(post, Progress == "100")
combined_df <- combined_df %>%
mutate(Post_Survey = if_else(SIS.Login.ID %in% unique(post$Q141), "yes", "no"))
write.csv(combined_df, "combined_df_post.csv", row.names = FALSE)
# Extract unique values from both columns
unique_Q141 <- unique(post$Q141)
unique_SIS_Login_ID <- unique(combined_df$SIS.Login.ID)
# Find values in 'Q141' that are not in 'SIS.Login.ID'
values_not_present <- setdiff(unique_Q141, unique_SIS_Login_ID)
as.data.frame(values_not_present)
post_only=subset(combined_df, Post_Survey == "yes")
#archive combined
#write.csv(combined_df, "combined_archived.csv")
combined=combined_df
both=subset(combined, Pre_Survey == "yes" & Post_Survey =="yes" ) #141 total participants have both
Q134_1 = age Q138 = year Q140 = major W146 = country of birht Q149_1 = Q2.4= gender Q2.6 = Race/Ethnicity Q2.7 = First Gen
pre_post_all <- bind_rows(list(pre = pre, post = post), .id = "source")
pre_post_all=subset(pre_post_all, Q1.2 == "I Consent")
pre_post_all=subset(pre_post_all, Progress == "100")
unique_pre_post_all <- unique(pre_post_all$Q141)
unique_pre_post_all #250 in "all", 141 for people who did "both"
pre_post_all %>%
count(Q2.4)
pre_post_all %>%
count(Q138)