#{r packages} #install.packages(c("psych", "irr","pwr","writexl")) #
data_dir <- "/Users/haisaosmanli/Desktop/CORE/SDA/DATA"
excel_files <- list.files(path = data_dir, pattern = "\\.xlsx$", full.names = TRUE)
excel_files <- excel_files[!grepl("~\\$", excel_files)]
excel_files
## [1] "/Users/haisaosmanli/Desktop/CORE/SDA/DATA/AD.xlsx"
## [2] "/Users/haisaosmanli/Desktop/CORE/SDA/DATA/AK.xlsx"
## [3] "/Users/haisaosmanli/Desktop/CORE/SDA/DATA/CD.xlsx"
## [4] "/Users/haisaosmanli/Desktop/CORE/SDA/DATA/EM.xlsx"
## [5] "/Users/haisaosmanli/Desktop/CORE/SDA/DATA/RL.xlsx"
## [6] "/Users/haisaosmanli/Desktop/CORE/SDA/DATA/VG.xlsx"
definitions <- read_excel(excel_files[1], sheet = 1)
## New names:
## • `` -> `...1`
## • `` -> `...3`
glimpse(definitions)
## Rows: 81
## Columns: 3
## $ ...1 <chr> …
## $ `Present = 1 point; absent=0 points: Place X in each box to indicate the presence of each indicator (total “X”s at the end in yellow box).` <chr> …
## $ ...3 <chr> …
read_researcher_data <- function(file_path) {
# extract researcher initials (eg. "AD.xlsx" → "AD")
researcher_id <- toupper(str_extract(basename(file_path), "^[A-Z]+"))
# get all sheet names
sheets <- excel_sheets(file_path)
# decide which sheets to read
if (researcher_id == "EM") {
target_sheets <- sheets[2:4] # EM → sheets 2,3 and 4
} else if (researcher_id == "VG") {
target_sheets <- sheets[2:4] # VG → sheets 2, 3, and 4
} else {
target_sheets <- sheets[2] # others → only sheet 2
}
# read and combine data from selected sheets
df_list <- lapply(target_sheets, function(sheet_name) {
df <- read_excel(file_path, sheet = sheet_name) %>%
clean_names() %>%
mutate(
researcher = researcher_id,
source_file = basename(file_path),
# clean sheet name (remove "(Pilot)" and convert to date)
date_collected = suppressWarnings(
ymd(str_trim(str_remove(sheet_name, "\\(.*\\)")))
)
)
return(df)
})
# combine all sheets vertically
bind_rows(df_list)
}
# read all files and merge vertically
metadata_all <- map_dfr(excel_files, read_researcher_data)
# preview the merged data
glimpse(metadata_all)
## Rows: 1,650
## Columns: 16
## $ researcher_initials <chr> "AD", "N/A", "N/A", "N/A", "N/A", "N/A", …
## $ weather_brief_description <chr> "Slightly Rainy, Cloudy", "N/A", "N/A", "…
## $ temperature_celsius <chr> "7", "N/A", "N/A", "N/A", "N/A", "N/A", "…
## $ number_of_photos_taken <chr> "4", "N/A", "N/A", "N/A", "N/A", "N/A", "…
## $ time_start_00_00am_pm <chr> "0.375", "N/A", "N/A", "N/A", "N/A", "N/A…
## $ time_end_00_00am_pm <chr> "0.41666666666666669", "N/A", "N/A", "N/A…
## $ location <chr> "Sunalta", "Sunalta", "Sunalta", "Sunalta…
## $ indicator_type <chr> "N/A", "Physical disorder", "Physical dis…
## $ code <chr> "N/A", "001", "002", "003a", "003b", "004…
## $ indicator_description <chr> "N/A", "Pile of discarded cigarette butts…
## $ present_yes_1 <chr> "N/A", "1", "1", "1", NA, "1", "1", NA, N…
## $ frequency_tally_total <chr> "N/A", "7", "2", "14", NA, "4", "1", NA, …
## $ field_notes_where_applicable <chr> "N/A", NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ researcher <chr> "AD", "AD", "AD", "AD", "AD", "AD", "AD",…
## $ source_file <chr> "AD.xlsx", "AD.xlsx", "AD.xlsx", "AD.xlsx…
## $ date_collected <date> 2025-10-19, 2025-10-19, 2025-10-19, 2025…
metadata_all$present_yes_1[
is.na(metadata_all$present_yes_1) |
metadata_all$present_yes_1 == "" |
metadata_all$present_yes_1 == "N/A"
] <- 0
# Rows where present_yes_1 == 1
present_1 <- metadata_all[metadata_all$present_yes_1 == 1, ]
# Rows where present_yes_1 == 0
present_0 <- metadata_all[metadata_all$present_yes_1 == 0, ]
allowed_codes <- c("013", "015a", "016", "017", "030", "034",
"044", "045", "047", "048", "064", "065a", "067")
present_1 <- metadata_all %>%
filter(present_yes_1 == 1, code %in% allowed_codes) %>%
select(location, code, present_yes_1)
present_0 <- metadata_all %>%
filter(present_yes_1 == 0, code %in% allowed_codes) %>%
select(location, code, present_yes_1) %>%
mutate(location = ifelse(location == "" | is.na(location), "Sunalta", location))
make_code_location_table <- function(df) {
df %>%
filter(code %in% allowed_codes) %>% # keep only allowed codes
group_by(code, location) %>% # group by code & location
summarise(count = n(), .groups = "drop") %>% # count rows
pivot_wider(names_from = location, values_from = count, values_fill = 0) %>% # pivot to 13x3
arrange(code)
}
# For present_1
table_present_1 <- make_code_location_table(present_1)
# For present_0
table_present_0 <- make_code_location_table(present_0)
physical_disorder <- c("013", "015a", "016", "017", "030")
social_disorder <- c("034", "044", "045", "047", "048")
social_order <- c("064", "065a", "067")
library(dplyr)
collapse_to_groups <- function(df) {
df %>%
mutate(group = case_when(
code %in% physical_disorder ~ "physical_disorder",
code %in% social_disorder ~ "social_disorder",
code %in% social_order ~ "social_order",
TRUE ~ "other" # optional, can remove this if not needed
)) %>%
select(-code) %>% # remove old code column
group_by(group) %>% # group by new group
summarise(across(everything(), sum), .groups = "drop") # sum all locations
}
# For present_1
table_present_1_grouped <- collapse_to_groups(table_present_1)
# For present_0
table_present_0_grouped <- collapse_to_groups(table_present_0)
# 2x2 matrix: Yes/No × Location
physical_matrix <- matrix(
c(
table_present_1_grouped$`Beltline Sheldon Chumir`[table_present_1_grouped$group == "physical_disorder"], # Yes Chumir
table_present_1_grouped$Sunalta[table_present_1_grouped$group == "physical_disorder"], # Yes Sunalta
table_present_0_grouped$`Beltline Sheldon Chumir`[table_present_0_grouped$group == "physical_disorder"], # No Chumir
table_present_0_grouped$Sunalta[table_present_0_grouped$group == "physical_disorder"] # No Sunalta
),
nrow = 2,
byrow = TRUE
)
rownames(physical_matrix) <- c("Yes", "No")
colnames(physical_matrix) <- c("Chumir", "Sunalta")
# Chi-square test
chisq.test(physical_matrix)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: physical_matrix
## X-squared = 1.9726, df = 1, p-value = 0.1602
# 2x2 matrix: Yes/No × Location
social_disorder_matrix <- matrix(
c(
table_present_1_grouped$`Beltline Sheldon Chumir`[table_present_1_grouped$group == "social_disorder"], # Yes Chumir
table_present_1_grouped$Sunalta[table_present_1_grouped$group == "social_disorder"], # Yes Sunalta
table_present_0_grouped$`Beltline Sheldon Chumir`[table_present_0_grouped$group == "social_disorder"], # No Chumir
table_present_0_grouped$Sunalta[table_present_0_grouped$group == "social_disorder"] # No Sunalta
),
nrow = 2,
byrow = TRUE
)
rownames(social_disorder_matrix) <- c("Yes", "No")
colnames(social_disorder_matrix) <- c("Chumir", "Sunalta")
# Chi-square test
chisq.test(social_disorder_matrix)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: social_disorder_matrix
## X-squared = 13.281, df = 1, p-value = 0.0002681
# 2x2 matrix: Yes/No × Location
social_order_matrix <- matrix(
c(
table_present_1_grouped$`Beltline Sheldon Chumir`[table_present_1_grouped$group == "social_order"], # Yes Chumir
table_present_1_grouped$Sunalta[table_present_1_grouped$group == "social_order"], # Yes Sunalta
table_present_0_grouped$`Beltline Sheldon Chumir`[table_present_0_grouped$group == "social_order"], # No Chumir
table_present_0_grouped$Sunalta[table_present_0_grouped$group == "social_order"] # No Sunalta
),
nrow = 2,
byrow = TRUE
)
rownames(social_order_matrix) <- c("Yes", "No")
colnames(social_order_matrix) <- c("Chumir", "Sunalta")
# Chi-square test
chisq.test(social_order_matrix)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: social_order_matrix
## X-squared = 0.28202, df = 1, p-value = 0.5954
physical_matrix_double <- matrix(
c(
62, 46, # Yes Chumir, Yes Sunalta (doubled counts)
38, 54 # No Chumir, No Sunalta (doubled counts)
),
nrow = 2,
byrow = TRUE
)
rownames(physical_matrix_double) <- c("Yes", "No")
colnames(physical_matrix_double) <- c("Chumir", "Sunalta")
chisq.test(physical_matrix_double)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: physical_matrix_double
## X-squared = 4.529, df = 1, p-value = 0.03333
# Multiply the counts by 3
yes_chumir <- 31 * 3
yes_sunalta <- 23 * 3
no_chumir <- 19 * 3
no_sunalta <- 27 * 3
# Create the 2x2 matrix
physical_matrix_triple <- matrix(
c(yes_chumir, yes_sunalta,
no_chumir, no_sunalta),
nrow = 2,
byrow = TRUE
)
rownames(physical_matrix_triple) <- c("Yes", "No")
colnames(physical_matrix_triple) <- c("Chumir", "Sunalta")
chisq.test(physical_matrix_triple)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: physical_matrix_triple
## X-squared = 7.0988, df = 1, p-value = 0.007714