library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at /Users/ethandunn/Documents/University/2025/Semester 2/Psychology Capstone/Assignements/VerticalCB_avg
cc_loc <-here("CCdata.csv")
cc_data <- read_csv(file = cc_loc)
## New names:
## Rows: 8 Columns: 66
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (66): StartDate, EndDate, Status, IPAddress, Progress, Duration (in seco...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...18`
print(cc_data)
## # A tibble: 8 × 66
## StartDate EndDate Status IPAddress Progress Duration (in seconds…¹ Finished
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "Start Date" "End D… "Resp… "IP Addr… "Progre… "Duration (in seconds… "Finish…
## 2 "{\"ImportI… "{\"Im… "{\"I… "{\"Impo… "{\"Imp… "{\"ImportId\":\"dura… "{\"Imp…
## 3 "2025-09-01… "2025-… "IP A… "27.32.7… "100" "693" "True"
## 4 "2025-09-01… "2025-… "IP A… "49.193.… "100" "755" "True"
## 5 "2025-09-01… "2025-… "IP A… "172.225… "100" "1265" "True"
## 6 "2025-09-01… "2025-… "IP A… "128.250… "100" "1730" "True"
## 7 "2025-09-01… "2025-… "IP A… "194.127… "100" "841" "True"
## 8 "2025-09-01… "2025-… "IP A… "106.70.… "100" "432" "True"
## # ℹ abbreviated name: ¹`Duration (in seconds)`
## # ℹ 59 more variables: RecordedDate <chr>, ResponseId <chr>,
## # RecipientLastName <chr>, RecipientFirstName <chr>, RecipientEmail <chr>,
## # ExternalReference <chr>, LocationLatitude <chr>, LocationLongitude <chr>,
## # DistributionChannel <chr>, UserLanguage <chr>, ...18 <chr>, Age <chr>,
## # Gender <chr>, Student <chr>, Q1_1 <chr>, Q2_1 <chr>, Q3_1 <chr>,
## # Q4_1 <chr>, Q5_1 <chr>, Q6_1 <chr>, Q7_1 <chr>, Q8_1 <chr>, Q9_1 <chr>, …
# Vertical Concept Breadth
# Figuring out which column number vertical concept breadth starts at
which(colnames(cc_data) == "Q1_1")
## [1] 22
which(colnames(cc_data) == "ResponseId")
## [1] 9
#Selected vertical concept breadth columns
vertical_cb <- cc_data[c(22:31, 9)]
print(vertical_cb)
## # A tibble: 8 × 11
## Q1_1 Q2_1 Q3_1 Q4_1 Q5_1 Q6_1 Q7_1 Q8_1 Q9_1 Q10_1 ResponseId
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "For each of… "2. … "3. … "4. … "5. … "6. … "7. … "8. … "9. … "10.… "Response…
## 2 "{\"ImportId… "{\"… "{\"… "{\"… "{\"… "{\"… "{\"… "{\"… "{\"… "{\"… "{\"Impor…
## 3 "No" "No" "Yes" "No" "No" "No" "No" "Yes" "No" "No" "R_9xJhbq…
## 4 "No" "No" "Yes" "Yes" "No" "No" "Yes" "Yes" "No" "No" "R_9WJs98…
## 5 "No" "Yes" "Yes" "Yes" "No" "Yes" "Yes" "No" "No" "Yes" "R_9PofNG…
## 6 "No" "Yes" "No" "No" "No" "No" "Yes" "No" "No" "Yes" "R_42DMxN…
## 7 "Yes" "No" "No" "Yes" "No" "No" "Yes" "Yes" "Yes" "Yes" "R_4n15nY…
## 8 "No" "No" "Yes" "No" "No" "No" "Yes" "No" "Yes" "Yes" "R_923cPE…
# Removing unnecessary rows
vertical_clean <- vertical_cb %>%
slice(-c(1,2))
print(vertical_clean)
## # A tibble: 6 × 11
## Q1_1 Q2_1 Q3_1 Q4_1 Q5_1 Q6_1 Q7_1 Q8_1 Q9_1 Q10_1 ResponseId
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 No No Yes No No No No Yes No No R_9xJhbqYWbPnFya6
## 2 No No Yes Yes No No Yes Yes No No R_9WJs98ahDCRZEy0
## 3 No Yes Yes Yes No Yes Yes No No Yes R_9PofNGNuUJ8ZNM5
## 4 No Yes No No No No Yes No No Yes R_42DMxN1YupUm0jj
## 5 Yes No No Yes No No Yes Yes Yes Yes R_4n15nYXDmk4Bz2s
## 6 No No Yes No No No Yes No Yes Yes R_923cPErlCcqjxL3
# Converting Yes / No to numeric 1 / 0
vertical_CB <- vertical_clean %>%
mutate(across(c(1:10), ~ ifelse(. == "Yes", 1, 0)))
print(vertical_CB)
## # A tibble: 6 × 11
## Q1_1 Q2_1 Q3_1 Q4_1 Q5_1 Q6_1 Q7_1 Q8_1 Q9_1 Q10_1 ResponseId
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 0 0 1 0 0 0 0 1 0 0 R_9xJhbqYWbPnFya6
## 2 0 0 1 1 0 0 1 1 0 0 R_9WJs98ahDCRZEy0
## 3 0 1 1 1 0 1 1 0 0 1 R_9PofNGNuUJ8ZNM5
## 4 0 1 0 0 0 0 1 0 0 1 R_42DMxN1YupUm0jj
## 5 1 0 0 1 0 0 1 1 1 1 R_4n15nYXDmk4Bz2s
## 6 0 0 1 0 0 0 1 0 1 1 R_923cPErlCcqjxL3
# Calculating average participant scores
vertical_avg <- vertical_CB %>%
rowwise() %>%
mutate(avg_score = sum(c_across(1:10), na.rm = TRUE) / 10) %>%
ungroup()
print(vertical_avg)
## # A tibble: 6 × 12
## Q1_1 Q2_1 Q3_1 Q4_1 Q5_1 Q6_1 Q7_1 Q8_1 Q9_1 Q10_1 ResponseId
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 0 0 1 0 0 0 0 1 0 0 R_9xJhbqYWbPnFya6
## 2 0 0 1 1 0 0 1 1 0 0 R_9WJs98ahDCRZEy0
## 3 0 1 1 1 0 1 1 0 0 1 R_9PofNGNuUJ8ZNM5
## 4 0 1 0 0 0 0 1 0 0 1 R_42DMxN1YupUm0jj
## 5 1 0 0 1 0 0 1 1 1 1 R_4n15nYXDmk4Bz2s
## 6 0 0 1 0 0 0 1 0 1 1 R_923cPErlCcqjxL3
## # ℹ 1 more variable: avg_score <dbl>
# Final
Vertical_avg <- vertical_avg %>%
select(ResponseId, avg_score)
print(Vertical_avg)
## # A tibble: 6 × 2
## ResponseId avg_score
## <chr> <dbl>
## 1 R_9xJhbqYWbPnFya6 0.2
## 2 R_9WJs98ahDCRZEy0 0.4
## 3 R_9PofNGNuUJ8ZNM5 0.6
## 4 R_42DMxN1YupUm0jj 0.3
## 5 R_4n15nYXDmk4Bz2s 0.6
## 6 R_923cPErlCcqjxL3 0.4