# sample code
# version 1
# 4/7/2021
# data: CC data
# authors
# heather perkins
###############################################################################
# notes -------------------------------------------------------------------
# useful code from office hours
# ctrl-shift-c - create commented out line (like this one)
# ctrl-shift-r - create section header
# ctrl-alt-b - run all code before this point
# code to create new csv file
# write.csv(data, file="cleaned_data1.csv", row.names = F)
# code to create new ids
# data_pre$new_id <- 1:nrow(data_pre)
# data_post$new_id <- 91:170
# create a new column for each dataframe before merging
# data_pre$timepoint <- "pre"
###############################################################################
# 1. basic starting tasks -------------------------------------------------
# 1.1 load libraries ------------------------------------------------------
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
# 1.2 import data -------------------------------------------------------------
data_pre <- read.csv(file="Data/final_pre.csv", header = T, na.strings = c("NA",""," "))
data_post <- read.csv(file="Data/final_post.csv", header = T, na.strings = c("NA",""," "))
###############################################################################
###############################################################################
# 2. create factor scores -------------------------------------------------
names(data_pre)
## [1] "salg1" "salg2" "salg3" "salg4" "salg5"
## [6] "salg6" "salg7" "salg8" "vbs1" "vbs2"
## [11] "vbs3" "vbs4" "vbs5" "vbs6" "vbs7"
## [16] "vbs8" "vbs9" "vbs10" "vbs11" "vbs12"
## [21] "vbs13" "vbs14" "vbs15" "write_gender" "id"
data_pre$salg <- rowMeans(data_pre[1:8], na.rm = F)
data_pre$vbs <- rowMeans(data_pre[9:23], na.rm = F)
names(data_post)
## [1] "salg1" "salg2" "salg3" "salg4" "salg5"
## [6] "salg6" "salg7" "salg8" "vbs1" "vbs2"
## [11] "vbs3" "vbs4" "vbs5" "vbs6" "vbs7"
## [16] "vbs8" "vbs9" "vbs10" "vbs11" "vbs12"
## [21] "vbs13" "vbs14" "vbs15" "write_gender" "id"
data_post$salg <- rowMeans(data_post[1:8], na.rm = F)
data_post$vbs <- rowMeans(data_post[9:23], na.rm = F)
data_pre2 <- subset(data_pre, select=c(id, write_gender, salg, vbs))
data_post2 <- subset(data_post, select=c(id, write_gender, salg, vbs))
###############################################################################
###############################################################################
# 3. merging pre and post -------------------------------------------------
# 3.1 append column names -------------------------------------------------
names(data_pre2)
## [1] "id" "write_gender" "salg" "vbs"
pre_names <- paste0(names(data_pre2)[2:4],"_pre")
colnames(data_pre2)[2:4] <- pre_names
post_names <- paste0(names(data_post2)[2:4],"_post")
names(data_post2)[2:4] <- post_names
# 3.2 merge dataframes ----------------------------------------------------
data_final_matched <- merge(data_pre2, data_post2, by = "id") # only matched pairs
data_final_all <- merge(data_pre2, data_post2, by = "id", all = T) # all observations, even those without matches
###############################################################################
###############################################################################
# 4. create non-repeating variable ----------------------------------------
# uses post-test gender data if available, otherwise uses gender data from pre-test or leaves NA
gender <- data_final_all$write_gender_post
gender[is.na(data_final_all$write_gender_post)] <- data_final_all$write_gender_pre[is.na(data_final_all$write_gender_post)]
# creates new vector, removes cases and extra spaces, and creates list of entries
gender2 <- tolower(gender)
gender3 <- trimws(gender2)
gender_write <- unique(gender3)
data_final_all$gender_rc[gender3 == gender_write[1]] <- "F"
data_final_all$gender_rc[gender3 == gender_write[2]] <- "M"
data_final_all$gender_rc[gender3 == gender_write[4]] <- "NB"
data_final_all$gender_rc[gender3 == gender_write[5]] <- "F"
data_final_all$gender_rc[gender3 == gender_write[6]] <- "F"
data_final_all$gender_rc[gender3 == gender_write[7]] <- "F"
# subset to remove extra columns
head(data_final_all)
## id write_gender_pre salg_pre vbs_pre write_gender_post salg_post vbs_post
## 1 1 Female 2.750 3.866667 Female 2.625 3.800000
## 2 2 Female 2.375 3.533333 female 2.250 3.133333
## 3 3 Female 2.250 3.866667 Female 3.625 3.933333
## 4 4 female 4.250 4.066667 Female 4.250 3.933333
## 5 5 Female 3.750 3.600000 female 3.500 3.933333
## 6 6 Female 4.250 3.266667 Female 5.000 4.333333
## gender_rc
## 1 F
## 2 F
## 3 F
## 4 F
## 5 F
## 6 F
data_final_all2 <- subset(data_final_all, select=-c(write_gender_pre, write_gender_post))
###############################################################################
###############################################################################
# 5. switching between long/wide formats ----------------------------------
# 5.1 switching from wide to long -----------------------------------------
head(data_final_all2)
## id salg_pre vbs_pre salg_post vbs_post gender_rc
## 1 1 2.750 3.866667 2.625 3.800000 F
## 2 2 2.375 3.533333 2.250 3.133333 F
## 3 3 2.250 3.866667 3.625 3.933333 F
## 4 4 4.250 4.066667 4.250 3.933333 F
## 5 5 3.750 3.600000 3.500 3.933333 F
## 6 6 4.250 3.266667 5.000 4.333333 F
data_final_all_long <- gather(data_final_all2, variable, value, salg_pre:vbs_post, factor_key = T)
head(data_final_all_long)
## id gender_rc variable value
## 1 1 F salg_pre 2.750
## 2 2 F salg_pre 2.375
## 3 3 F salg_pre 2.250
## 4 4 F salg_pre 4.250
## 5 5 F salg_pre 3.750
## 6 6 F salg_pre 4.250
# 5.2 switching from long to wide -----------------------------------------
data_final_all_wide <- spread(data_final_all_long, variable, value)
head(data_final_all_wide)
## id gender_rc salg_pre vbs_pre salg_post vbs_post
## 1 1 F 2.750 3.866667 2.625 3.800000
## 2 2 F 2.375 3.533333 2.250 3.133333
## 3 3 F 2.250 3.866667 3.625 3.933333
## 4 4 F 4.250 4.066667 4.250 3.933333
## 5 5 F 3.750 3.600000 3.500 3.933333
## 6 6 F 4.250 3.266667 5.000 4.333333