Overview (Steps)

##################
# Clean the data #
##################

# Import data, subset and name the variables in the dataframes, remove blanks (by language)
pptmraw.df <- read.csv("Participant Pre  Post Test Survey MPC.csv", stringsAsFactors = TRUE)
# OR
pptiraw.df <- read.csv("Participant Pre  Post Test Survey IN-clued.csv", stringsAsFactors = TRUE)

#English Pre/Post-Tests: subsetted dataframe for IN-clued
#pptengli.df <- rawppti.df[2:nrow(rawppti.df),c(3,11:69,77:80,86,87:93)] #72 variables for IN-clued
pptieng.df <- pptiraw.df[2:nrow(pptiraw.df),c(3,11:69,77:80,86,87,89:93)] #71 variables for IN-clued + MPC
names(pptieng.df) <- c("Start Date","Initials","Birthdate",
                   "Group Name", "Participant", "Time",
                   "Alamance County",
                   "Other County (text)",
                   "Grade",
                   "Race not reported",
                   "White",
                   "Black",
                   "Asian",
                   "Pacific Islander",
                   "Native American",
                   "Other Race (text)",
                   "Hispanic",
                   "Language",
                   "Other Language (text)",
                   "Gender not reported",
                   "Woman",
                   "Man",
                   "Transgender",
                   "Nonbinary/Does not identify",
                   "Other Gender (text)",
                   "Sexual Identity",
                   "Other Sexual Identity (text)",
                   "Oral Sex",
                   "Vaginal Sex",
                   "Anal Sex",
                   "Pregnancy",
                   "Sex using alcohol",
                   "Sex using drugs",
                   "Never had sex",
                   "(Duplicate hidden category)",
                   "Sex with no barrier method",
                   "Sex with no birth control",
                   "EC",
                   "IUD",
                   "Implant",
                   "Shot",
                   "Ring",
                   "Patch",
                   "Pill",
                   "Barrier Method",
                   "Not sure",
                   "Prefer not to answer",
                   "Other Method (text)",
                   "Group leader understood me",
                   "Group mates respect each other",
                   "I felt judged*",
                   "Group leader knew the material",
                   "I understood the material",
                   "Group leader made the activities fun",
                   "I felt free to speak",
                   "I felt uncomfortable participating*",
                   "Topics I will remember",
                   "Group did this well",
                   "Improvements",
                   "First name",
                   "Email address",
                   "Phone number",
                   "Future",
                   "Consent",
                   "EC up to 5 days",
                   "EC without prescription",
#                   "Be You", #col 88
                   "Prepared to say no if not ready",
                   "Prepared to use a barrier method",
                   "Prepared to say no if no barrier method",
                   "Prepared to use birth control",
                   "Prepared to go to a doctor for birth control")
pptieng.df[pptieng.df==""] <- NA

#English Pre/Post-Tests: subsetted dataframe for MPC
#pptenglm.df <- rawpptm.df[2:nrow(rawpptm.df),c(3,12:70,78:116)] #99 variables for MPC
pptmeng.df <- pptmraw.df[2:nrow(pptmraw.df),c(3,12:70,78:80,83,110:116)] #71 variables for MPC + IN-clued
names(pptmeng.df) <- c("Start Date","Initials","Birthdate",
                   "Group Name", "Participant", "Time",
                   "Alamance County",
                   "Other County (text)",
                   "Grade",
                   "Race not reported",
                   "White",
                   "Black",
                   "Asian",
                   "Pacific Islander",
                   "Native American",
                   "Other Race (text)",
                   "Hispanic",
                   "Language",
                   "Other Language (text)",
                   "Gender not reported",
                   "Woman",
                   "Man",
                   "Transgender",
                   "Nonbinary/Does not identify",
                   "Other Gender (text)",
                   "Sexual Identity",
                   "Other Sexual Identity (text)",
                   "Oral Sex",
                   "Vaginal Sex",
                   "Anal Sex",
                   "Pregnancy",
                   "Sex using alcohol",
                   "Sex using drugs",
                   "Never had sex",
                   "(Duplicate hidden category)",
                   "Sex with no barrier method",
                   "Sex with no birth control",
                   "EC",
                   "IUD",
                   "Implant",
                   "Shot",
                   "Ring",
                   "Patch",
                   "Pill",
                   "Barrier Method",
                   "Not sure",
                   "Prefer not to answer",
                   "Other Method (text)",
                   "Group leader understood me",
                   "Group mates respect each other",
                   "I felt judged*",
                   "Group leader knew the material",
                   "I understood the material",
                   "Group leader made the activities fun",
                   "I felt free to speak",
                   "I felt uncomfortable participating*",
                   "Topics I will remember",
                   "Group did this well",
                   "Improvements",
                   "First name",
                   "Email address",
                   "Phone number",
                   "Future",
#                   "Healthy Relationship",      #col 81
#                   "Dating Abuse",              #col 82
                   "Consent",
#                   "PrEP",                      #col 84
#                   "abstinence-neither",        #col 85
#                   "abstinence-pregnancy",      #col 86
#                   "abstinence-STI",            #col 87
#                   "birth control-neither",     #col 88
#                   "birth control-pregnancy",   #col 89
#                   "birth control-STI",         #col 90
#                   "external condom-neither",   #col 91
#                   "external condom-pregnancy", #col 92
#                   "external condom-STI",       #col 93
#                   "lambskin condom-neither",   #col 94
#                   "lambskin condom-pregnancy", #col 95
#                   "lambskin condom-STI",       #col 96
#                   "internal condom-neither",   #col 97
#                   "internal condom-pregnancy", #col 98
#                   "internal condom-STI",       #col 99
#                   "withdrawal-neither",        #col 100
#                   "withdrawal-pregnancy",      #col 101
#                   "withdrawal-STI",            #col 102
#                   "Store the condom",          #col 103
#                   "Open the package",          #col 104
#                   "Pinch the tip",             #col 105
#                   "Unroll the condom",         #col 106
#                   "Hold the base",             #col 107
#                   "STI infection",             #col 108
#                   "Water-based lubricants",    #col 109
                   "EC up to 5 days",
                   "EC without prescription",
                   "Prepared to say no if not ready",
                   "Prepared to use a barrier method",
                   "Prepared to say no if no barrier method",
                   "Prepared to use birth control",
                   "Prepared to go to a doctor for birth control")
pptmeng.df[pptmeng.df==""] <- NA

#Spanish Pre/Post-Tests: Subsetted dataframe for MPC
#pptspanm.df <- rawpptm.df[2:nrow(rawpptm.df),c(3,118:176,184:222) ] #99 variables for MPC
pptmspa.df <- pptmraw.df[2:nrow(pptmraw.df),c(3,118:176,184:186,189,216:222) ] #99 variables
names(pptmspa.df) <- c("Start Date","Initials","Birthdate",
                   "Group Name", "Participant", "Time",
                   "Alamance County",
                   "Other County (text)",
                   "Grade",
                   "Race not reported",
                   "White",
                   "Black",
                   "Asian",
                   "Pacific Islander",
                   "Native American",
                   "Other Race (text)",
                   "Hispanic",
                   "Language",
                   "Other Language (text)",
                   "Gender not reported",
                   "Woman",
                   "Man",
                   "Transgender",
                   "Nonbinary/Does not identify",
                   "Other Gender (text)",
                   "Sexual Identity",
                   "Other Sexual Identity (text)",
                   "Oral Sex",
                   "Vaginal Sex",
                   "Anal Sex",
                   "Pregnancy",
                   "Sex using alcohol",
                   "Sex using drugs",
                   "Never had sex",
                   "(Duplicate hidden category)",
                   "Sex with no barrier method",
                   "Sex with no birth control",
                   "EC",
                   "IUD",
                   "Implant",
                   "Shot",
                   "Ring",
                   "Patch",
                   "Pill",
                   "Barrier Method",
                   "Not sure",
                   "Prefer not to answer",
                   "Other Method (text)",
                   "Group leader understood me",
                   "Group mates respect each other",
                   "I felt judged*",
                   "Group leader knew the material",
                   "I understood the material",
                   "Group leader made the activities fun",
                   "I felt free to speak",
                   "I felt uncomfortable participating*",
                   "Topics I will remember",
                   "Group did this well",
                   "Improvements",
                   "First name",
                   "Email address",
                   "Phone number",
                   "Future",
#                   "Healthy Relationship",      #col 187
#                   "Dating Abuse",              #col 188
                   "Consent",
#                   "PrEP",                      #col 190
#                   "abstinence-neither",        #col 191
#                   "abstinence-pregnancy",      #col 192
#                   "abstinence-STI",            #col 193
#                   "birth control-neither",     #col 194
#                   "birth control-pregnancy",   #col 195
#                   "birth control-STI",         #col 196
#                   "external condom-neither",   #col 197
#                   "external condom-pregnancy", #col 198
#                   "external condom-STI",       #col 199
#                   "lambskin condom-neither",   #col 200
#                   "lambskin condom-pregnancy", #col 201
#                   "lambskin condom-STI",       #col 202
#                   "internal condom-neither",   #col 203
#                   "internal condom-pregnancy", #col 204
#                   "internal condom-STI",       #col 205
#                   "withdrawal-neither",        #col 206
#                   "withdrawal-pregnancy",      #col 207
#                   "withdrawal-STI",            #col 208
#                   "Store the condom",          #col 209
#                   "Open the package",          #col 210
#                   "Pinch the tip",             #col 211
#                   "Unroll the condom",         #col 212
#                   "Hold the base",             #col 213
#                   "STI infection",             #col 214
#                   "Water-based lubricants",    #col 215
                   "EC up to 5 days",
                   "EC without prescription",
                   "Prepared to say no if not ready",
                   "Prepared to use a barrier method",
                   "Prepared to say no if no barrier method",
                   "Prepared to use birth control",
                   "Prepared to go to a doctor for birth control")
pptmspa.df[pptmspa.df==""] <- NA

#ppt.df <- rbind(pptieng.df,pptmeng.df,pptmspa.df) #MPC + IN-clued
ppt.df <- rbind(pptmeng.df,pptmspa.df) #MPC only

# Lowercase the strings and format the dates
library(data.table)

#IN-clued + MPC-E + MPC-S Pre/Post-Tests: Identifiers 
ppt.df$`Group Name` <- tolower(ppt.df$`Group Name`) 
ppt.df$Initials <- tolower(ppt.df$Initials)

ppt.df$`Start Date` <- as.POSIXct(as.character(ppt.df$`Start Date`), format = "%m/%d/%Y %H:%M:%S %p", tz="America/New_York")
ppt.df$Birthdate <- as.Date(ppt.df$Birthdate, "%m/%d/%Y") 
ppt.df$PID <- paste(ppt.df$Initials, ppt.df$Birthdate)

# Dummy the CATA variables
cleanIt <- function(vec){
  chars <- as.character(vec)
  chars[chars!=""] <- "1" #if not blank (including zeroes), replace with "one"
  chars[is.na(chars)] <- "0" #if missing, replace with zero
  chars[chars==""] <- "0" #if blank, replace with zero
  return(as.numeric(chars)) #convert values to numbers
}

#IN-clued + MPC-E + MPC-S Pre/Post-Tests: Multiple Response Dummy Variables 
ppt.df[10:15] <- lapply(ppt.df[10:15], cleanIt) #pre: racial identity options 
ppt.df[20:24] <- lapply(ppt.df[20:24], cleanIt) #pre: gender identity options 
#ppt.df[34:47] <- lapply(ppt.df[34:47], cleanIt) #pre: last time sex options   
#pptmengl.df[68:85] <- lapply(pptmengl.df[68:85], cleanIt) #both: if used correctly Qs  (MPC only)

# Factor categorical variables

#IN-clued + MPC-E + MPC-S Pre-Tests: Categorical Variables - Demographics 
ppt.df$Time <- factor(ppt.df$Time,
                    levels = c(1,2),
                    labels = c("Before", "After"))
ppt.df$`Alamance County` <- factor(ppt.df$`Alamance County`,
                                   levels = c(1,0),
                                   labels = c("Yes","No/Not Reported"))
ppt.df$Grade <- factor(ppt.df$Grade,
                   levels = c(1,2,3,4,5,6,7,9,10,11,12),
                   labels = c("6th Grade",
                              "7th Grade",
                              "8th Grade",
                              "9th Grade",
                              "10th Grade",
                              "11th Grade",
                              "12th Grade",
                              "GED Program",
                              "Technical Training or College",
                              "Not in school",
                              "Prefer not to answer"))
ppt.df$Hispanic <- factor(ppt.df$Hispanic,
                          levels = c(1,2,0),
                          labels = c("Hispanic/Latinx",
                                     "Non-Hispanic/Latinx",
                                     "Prefer not to answer"))
ppt.df$Language <- factor(ppt.df$Language,
                         levels = c(1,2,3,0),
                         labels = c("English",
                                    "Spanish",
                                    "Both",
                                    "Prefer not to answer/Other"))
ppt.df$`Sexual Identity` <- factor(ppt.df$`Sexual Identity`,
                         levels = c(1,2,3,4,0),
                         labels = c("LGBQ",
                                    "Straight",
                                    "Pansexual",
                                    "Not sure yet",
                                    "Prefer not to answer/Other"))
#table(ppt.df$`Sexual Identity`) #Optional: check against Survey Monkey's Question Summary

#Pre-Tests: Experiences codebook - The labels and levels are re-ordered to how I want them to appear in the legend with "prefer not to answer" on one end or the other of the Likert scale, as opposed to the sequence in the paper survey/codebook: 1=last 30 days; 2=more than 30 days ago; 3=not sure; 4=never; 5=prefer not to answer 

labelfreq <- c("Never",
               "Not sure",
               "Prefer not to answer",
               "Within the last 30 days",
               "Ever (more than 30 days ago")

#IN-clued + MPC-E + MPC-S Pre-Tests: Categorical Variables - Experiences
ppt.df$`Oral Sex` <- factor(ppt.df$`Oral Sex`,
                         levels = c(4,3,5,1,2),
                         labels = labelfreq,
                         ordered = FALSE)
ppt.df$`Vaginal Sex` <- factor(ppt.df$`Vaginal Sex`,
                         levels = c(4,3,5,1,2),
                         labels = labelfreq,
                         ordered = FALSE)
ppt.df$`Anal Sex` <- factor(ppt.df$`Anal Sex`,
                         levels = c(4,3,5,1,2),
                         labels = labelfreq,
                         ordered = FALSE)
ppt.df$Pregnancy <- factor(ppt.df$Pregnancy,
                         levels = c(4,3,5,1,2),
                         labels = labelfreq,
                         ordered = FALSE)
ppt.df$`Sex using alcohol` <- factor(ppt.df$`Sex using alcohol`,
                         levels = c(4,3,5,1,2),
                         labels = labelfreq,
                         ordered = FALSE)
ppt.df$`Sex using drugs` <- factor(ppt.df$`Sex using drugs`,
                                   levels = c(4,3,5,1,2),
                                   labels = labelfreq)

#Check all that apply:
ppt.df$`Never had sex` <- factor(ppt.df$`Never had sex`,
                                 levels = c(0),
                                 labels = c("Never had sex"))
ppt.df$`Sex with no barrier method` <- factor(ppt.df$`Sex with no barrier method`,
                                 levels = c(2),
                                 labels = c("No barrier method"))
ppt.df$`Sex with no birth control` <- factor(ppt.df$`Sex with no birth control`,
                                 levels = c(3),
                                 labels = c("No birth control"))
ppt.df$EC <- factor(ppt.df$EC,
                                 levels = c(4),
                                 labels = c("EC"))
ppt.df$IUD <- factor(ppt.df$IUD,
                                 levels = c(5),
                                 labels = c("IUD"))
ppt.df$Implant <- factor(ppt.df$Implant,
                                 levels = c(6),
                                 labels = c("Implant"))
ppt.df$Shot <- factor(ppt.df$Shot,
                                 levels = c(7),
                                 labels = c("Shot"))
ppt.df$Ring <- factor(ppt.df$Ring,
                                 levels = c(8),
                                 labels = c("Ring"))
ppt.df$Patch <- factor(ppt.df$Patch,
                                 levels = c(9),
                                 labels = c("Patch"))
ppt.df$Pill <- factor(ppt.df$Pill,
                                 levels = c(10),
                                 labels = c("Pill"))
ppt.df$`Barrier Method` <- factor(ppt.df$`Barrier Method`,
                                 levels = c(11),
                                 labels = c("Barrier Method"))
ppt.df$`Not sure` <- factor(ppt.df$`Not sure`,
                                 levels = c(12),
                                 labels = c("Not Sure"))
ppt.df$`Prefer not to answer` <- factor(ppt.df$`Prefer not to answer`,
                                 levels = c(13),
                                 labels = c("Prefer not to answer"))

#Pre/Post-Tests: Thoughts codebook - 1=disagree; 2=not sure; 3=agree; 4=not applicable to me (replaced with NA); 5=prefer not to answer; seemed to work by just omitting "prefer not to answer"

labellikert <- c("Disagree",
                 "Not Sure",
                 "Agree")

#IN-clued + MPC-E + MPC-S Pre/Post Tests: Categorical Variables - Thoughts
ppt.df$`Prepared to say no if not ready` <- factor(ppt.df$`Prepared to say no if not ready`,
                      levels = c(1,2,3),
                      labels = labellikert,
                      ordered = FALSE)
ppt.df$`Prepared to use a barrier method` <- factor(ppt.df$`Prepared to use a barrier method`,
                      levels = c(1,2,3),
                      labels = labellikert,
                      ordered = FALSE)
ppt.df$`Prepared to say no if no barrier method` <- factor(ppt.df$`Prepared to say no if no barrier method`,
                      levels = c(1,2,3),
                      labels = labellikert,
                      ordered = FALSE)
ppt.df$`Prepared to use birth control` <- factor(ppt.df$`Prepared to use birth control`,
                      levels = c(1,2,3),
                      labels = labellikert,
                      ordered = FALSE)
ppt.df$`Prepared to go to a doctor for birth control` <- factor(ppt.df$`Prepared to go to a doctor for birth control`,
                      levels = c(1,2,3),
                      labels = labellikert,
                      ordered = FALSE)

#Post-Tests: Group satisfaction codebook - 1=disagree; 2=not sure; 3=agree; 4=prefer not to answer 

#IN-clued + MPC-E + MPC-S Post-Tests: Categorical Variable - Satisfaction 
ppt.df$`Group leader understood me` <- factor(ppt.df$`Group leader understood me`,
                                              levels = c(1,2,3),
                                              labels = labellikert,
                                              ordered = FALSE)
ppt.df$`Group mates respect each other` <- factor(ppt.df$`Group mates respect each other`,
                                              levels = c(1,2,3),
                                              labels = labellikert,
                                              ordered = FALSE)
ppt.df$`I felt judged*` <- factor(ppt.df$`I felt judged`,
                                 levels = c(3,2,1), # reverse-coded
                                 labels = labellikert,
                                 ordered = FALSE)
ppt.df$`Group leader knew the material` <- factor(ppt.df$`Group leader knew the material`,
                                                  levels = c(1,2,3),
                                                  labels = labellikert,
                                                  ordered = FALSE)
ppt.df$`I understood the material` <- factor(ppt.df$`I understood the material`,
                                             levels = c(1,2,3),
                                             labels = labellikert,
                                             ordered = FALSE )
ppt.df$`Group leader made the activities fun` <- factor(ppt.df$`Group leader made the activities fun`,
                                                        levels = c(1,2,3),
                                                        labels = labellikert,
                                                        ordered = FALSE)
ppt.df$`I felt free to speak` <- factor(ppt.df$`I felt free to speak`,
                                        levels = c(1,2,3),
                                        labels = labellikert,
                                        ordered = FALSE)
ppt.df$`I felt uncomfortable participating*` <- factor(ppt.df$`I felt uncomfortable participating`,
                                                      levels = c(3,2,1), # reverse-coded
                                                      labels = labellikert,
                                                      ordered = FALSE)
#Pre/Post-Test by group if group name is exact 
#Update thisgroup name and ggsave file names below
thisgroup <- "achd110821d"

########################
# Pre-Test: Experience #
########################

library(likert)
## Loading required package: ggplot2
## Loading required package: xtable
gpsexperience.df <- ppt.df[ppt.df$`Group Name` %like% thisgroup & ppt.df$Time=="Before",c(72,4,6,28:33)] #6 Experience variables (likert)
gpsexperience.df <- na.omit(gpsexperience.df) 

gplktexperience <- likert(gpsexperience.df[,4:9])
plot(gplktexperience, positive.order = TRUE) + ggtitle("Participants' Past Sexual Experiences")

ggsave("PAYC120621b - Experiences - Pre.png", width = 9, height = 2.5)

########################

#Optional: subset the types of past sexual experiences

gpsexpermrd.df <- ppt.df[ppt.df$`Group Name` %like% thisgroup & ppt.df$Time=="Before",c(72,4,6,34,36:47)] #13 Experience options (MR dummy excluding duplicate column and "Other method" (text))

mrdnever   <- data.frame(table(gpsexpermrd.df$`Never had sex`))
mrdnobm    <- data.frame(table(gpsexpermrd.df$`Sex with no barrier method`))
mrdnobc    <- data.frame(table(gpsexpermrd.df$`Sex with no birth control`))
mrdec      <- data.frame(table(gpsexpermrd.df$EC))
mrdiud     <- data.frame(table(gpsexpermrd.df$IUD))
mrdimplant <- data.frame(table(gpsexpermrd.df$Implant))
mrdshot    <- data.frame(table(gpsexpermrd.df$Shot))
mrdring    <- data.frame(table(gpsexpermrd.df$Ring))
mrdpatch   <- data.frame(table(gpsexpermrd.df$Patch))
mrdpill    <- data.frame(table(gpsexpermrd.df$Pill))
mrdbm      <- data.frame(table(gpsexpermrd.df$`Barrier Method`))
mrdns      <- data.frame(table(gpsexpermrd.df$`Not sure`))
mrdpnta    <- data.frame(table(gpsexpermrd.df$`Prefer not to answer`))

mrdall <- rbind.data.frame(mrdnever,mrdnobm,mrdnobc,mrdec,mrdiud,mrdimplant,mrdshot,mrdring,mrdpatch,mrdpill,mrdbm,mrdns,mrdpnta)
names(mrdall) <- c("At Last Sex","Count")

###########################
# Pre/Post-Test: Thoughts #
###########################
library(likert)

gpsthoughts.df <- ppt.df[ppt.df$`Group Name` %like% thisgroup,c(72,4,6,67:71)] #5 Thought variables (likert) for IN-clued + MPC Pre AND Post
gpsthoughts.df <- na.omit(gpsthoughts.df) 

gplktthought <- likert(gpsthoughts.df[,4:8], grouping = gpsthoughts.df$Time)
plot(gplktthought, group.order = c("Before","After")) + ggtitle("Participants' Thoughts about Future Sexual Activity")

ggsave("PAYC120621b - Thoughts over time.png", width = 9, height = 4.5)
#may not plot if too many listwise deletions

table(gpsthoughts.df$Time)
## 
## Before  After 
##     10      1
#Optional: just the pre-test thoughts
gplktthought <- likert(gpsthoughts.df[,4:8])
plot(gplktthought) + ggtitle("Participants' Thoughts about Future Sexual Activity")

ggsave("PAYC120621b - Thoughts.png", width = 9, height = 2.5)
#may not plot if too many listwise deletions

###########################
# Post-Test: Satisfaction #
###########################

gpssatisfaction.df <- ppt.df[ppt.df$`Group Name`%like% thisgroup & ppt.df$Time=="After",c(72,4,49:59)] #8 Group variables on IN-clued + MPC Post, excluding comments
gpssatisfaction.df <- na.omit(gpssatisfaction.df)

gplktsatisfaction <- likert(gpssatisfaction.df[,3:10])
plot(gplktsatisfaction) + ggtitle("Participants' Satisfaction with the Group")

ggsave("DreamCenter102621 - Satisfaction.png", width = 9, height = 3.25)

######################
# SurveyMonkey Stats #
######################

# Optional subsets:
#post.df <- ppt.df[ppt.df$Time=="After", c(1:3,47:60,61:97)]# optional subset of identifiers, satisfaction, contact info to post-test
#pre.df  <- ppt.df[ppt.df$Time=="Before", c(1,3,4,6:13,15,16,18:22,24,26:45)] #optional subset of demographics, experiences
#pre.df  <- na.omit(pre.df)
#ptksa.df <- pptmengl.df[pptmengl.df$`Group Name`%like% thisgroup,c(100,4,63:94)] #16 KSA variables with 32 options for MPC

################
# OAH 1: Reach #
################

gpdemographics.df <- ppt.df[ppt.df$`Group Name` %like% thisgroup & ppt.df$Time=="Before",c(72,4,3,9:15,17,18,20:24,26)] #8 Demographic variables for IN-clued + MPC
gpdemographics.df <- na.omit(gpdemographics.df)

# install.packages("eeptools")
library(eeptools)
gpdemographics.df$Age <- floor(age_calc(gpdemographics.df$Birthdate, units = "years")) #make sure your date variable is formatted; does not like NAs

gpdemographics.df$gender_cat <- apply(gpdemographics.df[13:17], 1, function(x) {ifelse(sum(x) > 1, "Multigender", names(x[x != 0]))})
gpdgender <- data.frame(table(gpdemographics.df$gender_cat))
names(gpdgender) <- c("Demographics","Count")

gpdemographics.df$race_cat <- apply(gpdemographics.df[5:10], 1, function(x) {ifelse(sum(x) > 1, "Multiracial", names(x[x != 0]))})
gpdrace <- data.frame(table(gpdemographics.df$race_cat))
names(gpdrace) <- c("Demographics","Count")

gpdage <- data.frame(table(gpdemographics.df$Age))
names(gpdage) <- c("Demographics","Count")

gpdgrade <- data.frame(table(gpdemographics.df$Grade))
names(gpdgrade) <- c("Demographics","Count")

gpdreach <- rbind.data.frame(gpdgender,gpdrace,gpdage,gpdgrade)

library(sjPlot)
tab_df(gpdreach,
       title = "Reach for: ACHD110821d",
       file = "ACHD110821d - Demographics Table.doc")
Reach for: ACHD110821d
Demographics Count
Nonbinary/Does not identify 1
Woman 14
Black 3
Multiracial 4
Race not reported 7
White 1
12 8
13 7
6th Grade 0
7th Grade 15
8th Grade 0
9th Grade 0
10th Grade 0
11th Grade 0
12th Grade 0
GED Program 0
Technical Training or College 0
Not in school 0
Prefer not to answer 0
#gpdoahrace <- tab_xtab(var.row = gpdemographics.df$race_cat, var.col = gpdemographics.df$Hispanic,
#         title = "Race by Ethnicity for: PAYC120621a",
#         file = "PAYC120621b - OAH Race by Ethnicity Table.doc") #may not print if no Hispanic YP; need 2 or more non-zero column marginals
# OR

table(gpdemographics.df$race_cat,gpdemographics.df$Hispanic)
##                    
##                     Hispanic/Latinx Non-Hispanic/Latinx Prefer not to answer
##   Black                           0                   3                    0
##   Multiracial                     0                   4                    0
##   Race not reported               7                   0                    0
##   White                           0                   1                    0
#Knitr can print double tables: https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#multiple-tables-side-by-side
#knitr::kables(
#  list(
#    knitr::kable(
#      reach),
#    knitr::kable(oahrace)),)
#Subsets by time frame instead of group name
#Update begin/finish dates & ggsave file names below
begin  <- "2021-07-01" #options: "2020-07-01", "2021-01-01", "2021-07-01", "2022-01-01", "2022-07-01", "2023-01-01"
finish <- "2022-06-30" #options: "2020-12-31", "2021-06-30", "2021-12-31", "2022-06-30", "2022-12-31", "2023-06-30"

###################
# All Experiences #
###################

library(likert)
yrsexperience.df <- ppt.df[ppt.df$`Start Date` >=begin & ppt.df$`Start Date` <=finish & ppt.df$Time=="Before",c(72,4,6,28:33)] #6 Experience variables (likert)
yrsexperience.df <- na.omit(yrsexperience.df) 

yrlktexperience <- likert(yrsexperience.df[,4:9])
plot(yrlktexperience, positive.order = TRUE) + ggtitle("Participants' Past Sexual Experiences")

ggsave("Year 2 - Experiences - Pre.png", width = 9, height = 2.5)

################
# All Thoughts #
################
       
yrsthoughts.df <- ppt.df[ppt.df$`Start Date`>=begin & ppt.df$`Start Date`<=finish,c(72,4,6,67:71)] #5 Thought variables (likert) for Pre AND Post
yrsthoughts.df <- na.omit(yrsthoughts.df) 

yrlktthought <- likert(yrsthoughts.df[,4:8], grouping = yrsthoughts.df$Time)
plot(yrlktthought, group.order = c("Before","After")) + ggtitle("Participants' Thoughts about Future Sexual Activity")

ggsave("Year 2 - Thoughts over time.png", width = 9, height = 4.5)
#may not plot if too many listwise deletions

table(yrsthoughts.df$Time) #fewer post < pre
## 
## Before  After 
##     84     13
chisq.test(yrsthoughts.df$`Prepared to say no if not ready`,yrsthoughts.df$Time)
## Warning in chisq.test(yrsthoughts.df$`Prepared to say no if not ready`, : Chi-
## squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  yrsthoughts.df$`Prepared to say no if not ready` and yrsthoughts.df$Time
## X-squared = 1.3494, df = 2, p-value = 0.5093
chisq.test(yrsthoughts.df$`Prepared to use a barrier method`,yrsthoughts.df$Time)
## Warning in chisq.test(yrsthoughts.df$`Prepared to use a barrier method`, : Chi-
## squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  yrsthoughts.df$`Prepared to use a barrier method` and yrsthoughts.df$Time
## X-squared = 0.55536, df = 2, p-value = 0.7575
chisq.test(yrsthoughts.df$`Prepared to say no if no barrier method`,yrsthoughts.df$Time)
## Warning in chisq.test(yrsthoughts.df$`Prepared to say no if no barrier method`, :
## Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  yrsthoughts.df$`Prepared to say no if no barrier method` and yrsthoughts.df$Time
## X-squared = 3.2934, df = 2, p-value = 0.1927
chisq.test(yrsthoughts.df$`Prepared to use birth control`,yrsthoughts.df$Time)
## Warning in chisq.test(yrsthoughts.df$`Prepared to use birth control`,
## yrsthoughts.df$Time): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  yrsthoughts.df$`Prepared to use birth control` and yrsthoughts.df$Time
## X-squared = 3.8992, df = 2, p-value = 0.1423
chisq.test(yrsthoughts.df$`Prepared to go to a doctor for birth control`,yrsthoughts.df$Time)
## Warning in chisq.test(yrsthoughts.df$`Prepared to go to a doctor for birth
## control`, : Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  yrsthoughts.df$`Prepared to go to a doctor for birth control` and yrsthoughts.df$Time
## X-squared = 2.631, df = 2, p-value = 0.2683
#p-value < 0.05: small likelihood that the pre/post difference is due to chance

####################
# All Satisfaction #
####################

yrssatisfaction.df <- ppt.df[ppt.df$`Start Date`>=begin & ppt.df$`Start Date`<=finish & ppt.df$Time=="After",c(72,4,6,49:59)] #8 Group variables on Post
yrssatisfaction.df <- na.omit(yrssatisfaction.df)

yrlktsatisfaction <- likert(yrssatisfaction.df[,4:11])
plot(yrlktsatisfaction) + ggtitle("Participants' Satisfaction with the Groups")

ggsave("Year 2 - Satisfaction.png", width = 9, height = 3.25)

####################
# All Demographics #
####################

yrdemographics.df <- ppt.df[ppt.df$`Start Date` >=begin & ppt.df$`Start Date`<=finish & ppt.df$Time=="Before",c(72,4,3,9:15,17,18,20:24,26)] #8 Demographic variables 
yrdemographics.df <- na.omit(yrdemographics.df)

# install.packages("eeptools")
library(eeptools)
yrdemographics.df$Age <- floor(age_calc(yrdemographics.df$Birthdate, units = "years")) #make sure your date variable is formatted; does not like NAs

yrdemographics.df$gender_cat <- apply(yrdemographics.df[13:17], 1, function(x) {ifelse(sum(x) > 1, "Multigender", names(x[x != 0]))})
yrdgender <- data.frame(table(yrdemographics.df$gender_cat))
names(yrdgender) <- c("Demographics","Count")

yrdemographics.df$race_cat <- apply(yrdemographics.df[5:10], 1, function(x) {ifelse(sum(x) > 1, "Multiracial", names(x[x != 0]))})
yrdrace <- data.frame(table(yrdemographics.df$race_cat))
names(yrdrace) <- c("Demographics","Count")

yrdage <- data.frame(table(yrdemographics.df$Age))
names(yrdage) <- c("Demographics","Count")

yrdgrade <- data.frame(table(yrdemographics.df$Grade))
names(yrdgrade) <- c("Demographics","Count")

yrdreach <- rbind.data.frame(yrdgender,yrdrace,yrdage,yrdgrade)

library(sjPlot)
tab_df(yrdreach,
       title = "Reach for Year 2",
       file = "Year 2 - Demographics Table.doc")
Reach for Year 2
Demographics Count
Gender not reported 2
Man 12
Multigender 3
Nonbinary/Does not identify 6
Transgender 2
Woman 103
Asian 9
Black 37
Multiracial 13
Native American 2
Pacific Islander 1
Race not reported 50
White 16
12 28
13 59
14 21
15 4
16 2
17 4
18 3
19 5
20 1
21 1
6th Grade 0
7th Grade 48
8th Grade 59
9th Grade 3
10th Grade 3
11th Grade 2
12th Grade 7
GED Program 0
Technical Training or College 6
Not in school 0
Prefer not to answer 0
#yrdoahrace <- tab_xtab(var.row = yrdemographics.df$race_cat, var.col = yrdemographics.df$Hispanic,
#         title = "Race by Ethnicity for Year 2",
#         file = "Year 2 - Race by Ethnicity Table.doc") #may not print if no Hispanic YP; need 2 or more non-zero column marginals
# OR

table(yrdemographics.df$race_cat,yrdemographics.df$Hispanic)
##                    
##                     Hispanic/Latinx Non-Hispanic/Latinx Prefer not to answer
##   Asian                           2                   7                    0
##   Black                           1                  35                    1
##   Multiracial                     2                  11                    0
##   Native American                 2                   0                    0
##   Pacific Islander                1                   0                    0
##   Race not reported              50                   0                    0
##   White                           1                  13                    2
#############
# Knowledge #
#############

#compute the average pre-test score to use as a threshold against the average post-test score in a one-sample t test

rawpre.df <- read.csv("QuizSummaryYR1pre.csv")
yrpre.df <- rawpre.df[2:nrow(rawpre.df),c(2)] 

meanpre <- mean(as.numeric(sub("%","",yrpre.df))) 

rawpost.df <- read.csv("QuizSummaryYR1post.csv")
yrpost.df <- rawpost.df[2:nrow(rawpost.df),c(2)] 
yrpost.df <- as.numeric(sub("%","",yrpost.df))

t.test(yrpost.df, mu = meanpre, alternative = "greater")
## 
##  One Sample t-test
## 
## data:  yrpost.df
## t = 3.1085, df = 29, p-value = 0.002094
## alternative hypothesis: true mean is greater than 58.76271
## 95 percent confidence interval:
##  63.90297      Inf
## sample estimates:
## mean of x 
##      70.1
library(effectsize)
cohens_d(yrpost.df, mu = meanpre, alternative = "greater")
length(yrpost.df)
## [1] 30
length(yrpre.df)
## [1] 59
#compute difference scores for each respondent to use in a paired t-test

scores <- read.csv("QuizSummary.csv")
merged.df <- merge(scores,pptmraw.df, by=c('Respondent.ID'))
merged.df <- merged.df[2:nrow(merged.df),c(1:3,12,21:23,25) ] #99 variables, just MPC+
names(merged.df) <- c("Respondent.ID","Score (Percent)","Score (Points)","Start Date","Initials","Birthdate",
                   "Group Name", "Time")
                  
merged.df$`Group Name` <- tolower(merged.df$`Group Name`) 
merged.df$Initials <- tolower(merged.df$Initials)

merged.df$`Start Date` <- as.POSIXct(as.character(merged.df$`Start Date`), format = "%m/%d/%Y %H:%M:%S %p", tz="America/New_York")
merged.df$Birthdate <- as.Date(merged.df$Birthdate, "%m/%d/%Y") 
merged.df$PID <- paste(merged.df$Initials, merged.df$Birthdate)

wide.df <- reshape(merged.df, idvar = "PID", timevar = "Time", direction = "wide")
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for Time=2: first taken
wide.df <- wide.df[,c(5,8,1,3,10)]
#wide.df <- as.numeric(sub("%","",wide.df)) #convert to numbers without NAing data
#wide.df$diff <- wide.df$`Score (Percent).2` - wide.df$`Score (Percent).1
#run t-test on difference scores

Fidelity Logs & Quarterly Program Reports

rawflqpr.df <- read.csv("Facilitators Evaluation Tools.csv", stringsAsFactors = TRUE)
flqpr.df <- rawflqpr.df[2:nrow(rawflqpr.df),c(10:30,32:36,38:47,49:53,55:64,66:70,72:81,83:87,89:93,95:99,101:105,107,115:121,141:153,155:167,169:181,183:195,196)] #145 variables
names(flqpr.df) <- c("tool",
                    "FL io",
                    "FL grp",
                    "FL fac1", "FL fac2", "FL fac3", "FL TA topic",
                    "FL success",
                    "FL ref srh","FL ref mh","FL ref sa","FL ref pcp","FL ref edu","FL ref voc","FL ref viol",
                    "FL ebp",
                    "LN1 name","LN1 start","LN1 how","LN1 change","LN1 incomp",
                    "LN2 name","LN2 start","LN2 how","LN2 change","LN2 incomp",
                    "LN3 name","LN3 start","LN3 how","LN3 change","LN3 incomp",
                    "MPC1 name","MPC1 start","MPC1 how","MPC1 change","MPC1 incomp",
                    "MPC2 name","MPC2 start","MPC2 how","MPC2 change","MPC2 incomp",
                    "MPC3 name","MPC3 start","MPC3 how","MPC3 change","MPC3 incomp",
                    "IN1 name","IN1 start","IN1 how","IN1 change","IN1 incomp",
                    "IN2 name","IN2 start","IN2 how","IN2 change","IN2 incomp",
                    "IN3 name","IN3 start","IN3 how","IN3 change","IN3 incomp",
                    "SL1 name","SL1 start","SL1 how","SL1 change","SL1 incomp",
                    "SL2 name","SL2 start","SL2 how","SL2 change","SL2 incomp",
                    "SL3 name","SL3 start","SL3 how","SL3 change","SL3 incomp",
                    "SL4 name","SL4 start","SL4 how","SL4 change","SL4 incomp",
                    "SL5 name","SL5 start","SL5 how","SL5 change","SL5 incomp",
                    "QPR fac", "QPR email", "QPR phone", "QPR io", "QPR setting", "QPR prog spec", "QPR period", "QPR ebp",
                    "QPR1 name","# meetings","QPR1 start","QPR1 end","QPR1 change","QPR1 reach","QPR1 attend","QPR1 retent","QPR1 eval-none","QPR1 eval-pre","QPR1 eval-FL","QPR1 eval-obs","QPR1 eval-post",
                    "QPR2 name","# meetings","QPR2 start","QPR2 end","QPR2 change","QPR2 reach","QPR2 attend","QPR2 retent","QPR2 eval-none","QPR2 eval-pre","QPR2 eval-FL","QPR2 eval-obs","QPR2 eval-post",
                    "QPR3 name","# meetings","QPR3 start","QPR3 end","QPR3 change","QPR3 reach","QPR3 attend","QPR3 retent","QPR3 eval-none","QPR3 eval-pre","QPR3 eval-FL","QPR3 eval-obs","QPR3 eval-post",
                    "QPR4 name","# meetings","QPR4 start","QPR4 end","QPR4 change","QPR4 reach","QPR4 attend","QPR4 retent","QPR4 eval-none","QPR4 eval-pre","QPR4 eval-FL","QPR4 eval-obs","QPR4 eval-post",
                    "add'l info")
flqpr.df[flqpr.df==""] <- NA

library(data.table)
flqpr.df$`FL grp`<- tolower(flqpr.df$`FL grp`)
flqpr.df$`QPR1 name` <- tolower(flqpr.df$`QPR1 name`)
flqpr.df$`QPR2 name` <- tolower(flqpr.df$`QPR2 name`)
flqpr.df$`QPR3 name` <- tolower(flqpr.df$`QPR3 name`)
flqpr.df$`QPR4 name` <- tolower(flqpr.df$`QPR4 name`)

flqpr.df$`MPC1 start` <- as.Date(flqpr.df$`MPC1 start`,"%m/%d/%Y")
flqpr.df$`MPC2 start` <- as.Date(flqpr.df$`MPC2 start`,"%m/%d/%Y")
flqpr.df$`MPC3 start` <- as.Date(flqpr.df$`MPC3 start`,"%m/%d/%Y")

flqpr.df$`IN1 start` <- as.Date(flqpr.df$`IN1 start`,"%m/%d/%Y")
flqpr.df$`IN2 start` <- as.Date(flqpr.df$`IN2 start`,"%m/%d/%Y")
flqpr.df$`IN3 start` <- as.Date(flqpr.df$`IN3 start`,"%m/%d/%Y")

flqpr.df$`SL1 start` <- as.Date(flqpr.df$`SL1 start`,"%m/%d/%Y")
flqpr.df$`SL2 start` <- as.Date(flqpr.df$`SL2 start`,"%m/%d/%Y")
flqpr.df$`SL3 start` <- as.Date(flqpr.df$`SL3 start`,"%m/%d/%Y")
flqpr.df$`SL4 start` <- as.Date(flqpr.df$`SL4 start`,"%m/%d/%Y")
flqpr.df$`SL5 start` <- as.Date(flqpr.df$`SL5 start`,"%m/%d/%Y")

flqpr.df$`QPR1 start` <- as.Date(flqpr.df$`QPR1 start`,"%m/%d/%Y")
flqpr.df$`QPR2 start` <- as.Date(flqpr.df$`QPR2 start`,"%m/%d/%Y")
flqpr.df$`QPR3 start` <- as.Date(flqpr.df$`QPR3 start`,"%m/%d/%Y")
flqpr.df$`QPR4 start` <- as.Date(flqpr.df$`QPR4 start`,"%m/%d/%Y")

flqpr.df$`QPR1 end` <- as.Date(flqpr.df$`QPR1 end`,"%m/%d/%Y")
flqpr.df$`QPR2 end` <- as.Date(flqpr.df$`QPR2 end`,"%m/%d/%Y")
flqpr.df$`QPR3 end` <- as.Date(flqpr.df$`QPR3 end`,"%m/%d/%Y")
flqpr.df$`QPR4 end` <- as.Date(flqpr.df$`QPR4 end`,"%m/%d/%Y")

cleanIt <- function(vec){
  chars <- as.character(vec)
  chars[chars!=""] <- "1" #if not blank (including zeroes), replace with "one"
  chars[is.na(chars)] <- "0" #if missing, replace with zero
  chars[chars==""] <- "0" #if blank, replace with zero
  return(as.numeric(chars)) #convert values to numbers
}

flqpr.df[103:107] <- lapply(flqpr.df[103:107], cleanIt) #QPR1 eval
flqpr.df[116:120] <- lapply(flqpr.df[116:120], cleanIt) #QPR2 eval
flqpr.df[129:133] <- lapply(flqpr.df[129:133], cleanIt) #QPR3 eval
flqpr.df[142:146] <- lapply(flqpr.df[142:146], cleanIt) #QPR4 eval

labelio <- c("ACDSS","ACHD","CHS","DreamCenter","CrossRoads","Elon","PAYC","SA")
labelepb <- c("HF-NC","IN-clued","Love Notes","MPC","PPP","SSI","Supplementals")
labelln <- c("N&B","1","2","3","4","5","6","7","8","9","10","11","12","13","KTA")
labelmpc <- c("N&B","1","2","3","EC","TMHYLI","4","5","6","7","8","9","10","KTA")
labelinc <- c("N&B1-2","1","N&B3","2","3","KTA")
labelsl <- c("KTA1","KTA2","KTA3","TMHYLI1","TMHYLI2","TMHYLI3","TMHYLI4","TMHYLI5")
labelhow <- c("as written","with changes","incomplete")
labelset <- c("school","other CBO","out-of-home","homeless","juvenile justice","clinic","faith-based")
labelperiod <- c("Y1-SAPR1","Y1-SAPR1","Y1-SAPR2","Y1-SAPR2","Y2-SAPR1","Y2-SAPR1","Y2-SAPR2","Y2-SAPR2","Y3-SAPR1","Y3-SAPR1","Y3-SAPR2","Y3-SAPR2")

flqpr.df$tool <- factor(flqpr.df$tool,
                          levels = c(1,2,3),
                          labels = c("P/PT","FL","QPR"))
flqpr.df$`FL io` <- factor(flqpr.df$`FL io`,
                          levels = c(1,2,3,4,5,6,7,8),
                          labels = labelio)
flqpr.df$`FL ebp` <- factor(flqpr.df$`FL ebp`,
                           levels = c(1,2,3,4,5,6,7),
                           labels = labelepb)

#Fidelity Logs: lesson names for MPC & IN-clued only
flqpr.df$`MPC1 name` <- factor(flqpr.df$`MPC1 name`,
                              levels = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14),
                              labels = labelmpc)
flqpr.df$`MPC2 name` <- factor(flqpr.df$`MPC2 name`,
                              levels = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14),
                              labels = labelmpc)
flqpr.df$`MPC3 name` <- factor(flqpr.df$`MPC3 name`,
                              levels = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14),
                              labels = labelmpc)

flqpr.df$`IN1 name` <- factor(flqpr.df$`IN1 name`,
                             levels = c(1,2,3,4,5,6),
                             labels = labelinc)
flqpr.df$`IN2 name` <- factor(flqpr.df$`IN2 name`,
                             levels = c(1,2,3,4,5,6),
                             labels = labelinc)
flqpr.df$`IN3 name` <- factor(flqpr.df$`IN3 name`,
                             levels = c(1,2,3,4,5,6),
                             labels = labelinc)

#Fidelity Logs: lesson implementation for all EBPs with Fidelity Logs
flqpr.df$`LN1 how` <- factor(flqpr.df$`LN1 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`LN2 how` <- factor(flqpr.df$`LN2 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`LN3 how` <- factor(flqpr.df$`LN3 how`,
                            levels = c(1,2,3),
                            labels = labelhow)

flqpr.df$`MPC1 how` <- factor(flqpr.df$`MPC1 how`,
                             levels = c(1,2,3),
                             labels = labelhow)
flqpr.df$`MPC2 how` <- factor(flqpr.df$`MPC2 how`,
                             levels = c(1,2,3),
                             labels = labelhow)
flqpr.df$`MPC3 how` <- factor(flqpr.df$`MPC3 how`,
                             levels = c(1,2,3),
                             labels = labelhow)

flqpr.df$`IN1 how` <- factor(flqpr.df$`IN1 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`IN2 how` <- factor(flqpr.df$`IN2 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`IN3 how` <- factor(flqpr.df$`IN3 how`,
                            levels = c(1,2,3),
                            labels = labelhow)

flqpr.df$`SL1 how` <- factor(flqpr.df$`SL1 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`SL2 how` <- factor(flqpr.df$`SL2 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`SL3 how` <- factor(flqpr.df$`SL3 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`SL4 how` <- factor(flqpr.df$`SL4 how`,
                            levels = c(1,2,3),
                            labels = labelhow)
flqpr.df$`SL5 how` <- factor(flqpr.df$`SL5 how`,
                            levels = c(1,2,3),
                            labels = labelhow)

#Quarterly Program Reports
flqpr.df$`QPR io` <- factor(flqpr.df$`QPR io`,
                                   levels = c(1,2,3,4,5,6,7,8),
                                   labels = labelio)
flqpr.df$`QPR setting` <- factor(flqpr.df$`QPR setting`,
                                   levels = c(1,2,3,4,5,6,7),
                                   labels = labelset)
flqpr.df$`QPR prog spec` <- factor(flqpr.df$`QPR prog spec`,
                                   levels = c(1,2),
                                   labels = c("Lisa","Caro"))
flqpr.df$`QPR period` <- factor(flqpr.df$`QPR period`,
                                   levels = c(1,2,3,4,5,6,7,8,9,10,11,12),
                                   labels = labelperiod)
flqpr.df$`QPR ebp` <- factor(flqpr.df$`QPR ebp`,
                                   levels = c(1,2,3,4,5,6,7),
                                   labels = labelepb)

#######################
# OAH 2: Dosage (all) #
#######################
#double check column numbers
#Reach & Retention by group reported in the QPR; convert to wide form to long form data
period <- "Y1-SAPR2"  #options: "Y1-SAPR1", "Y1-SAPR2", "Y2-SAPR1", "Y2-SAPR2", "Y3-SAPR1", "Y3-SAPR2",
curr <- "MPC" #options:" "MPC", "IN-clued"

qpr1 <- flqpr.df[flqpr.df$`QPR period` %like% period & flqpr.df$`QPR ebp`==curr, c(95,96,100:102)]
qpr2 <- flqpr.df[flqpr.df$`QPR period` %like% period & flqpr.df$`QPR ebp`==curr, c(108,109,113:115)]
qpr3 <- flqpr.df[flqpr.df$`QPR period` %like% period & flqpr.df$`QPR ebp`==curr, c(121,122,126:128)]
qpr4 <- flqpr.df[flqpr.df$`QPR period` %like% period & flqpr.df$`QPR ebp`==curr, c(134,135,139:141)]

names(qpr1) <- c("Group Name","# Sessions/Meetings","Reach","Average Attendance","Retention")
names(qpr2) <- c("Group Name","# Sessions/Meetings","Reach","Average Attendance","Retention")
names(qpr3) <- c("Group Name","# Sessions/Meetings","Reach","Average Attendance","Retention")
names(qpr4) <- c("Group Name","# Sessions/Meetings","Reach","Average Attendance","Retention")

dosage <- rbind.data.frame(qpr1,qpr2,qpr3,qpr4)
dosage <- na.omit(dosage)

library(sjPlot)
tab_df(dosage,
       title = "Dosage for MPC: 2020-2021",
       file = "MPC 2020-2021 - Dosage Table.doc")
Dosage for MPC: 2020-2021
Group.Name X..Sessions.Meetings Reach Average.Attendance Retention
payc051821 10 5 30 3
dreamcenter032421 8 8 48 3
dreamcenter062121 6 8 83 6
#repeat for IN-clued

################################################
# OAH 3: Fidelity & Quality: Lessons Completed #
################################################
#Subsets by time frame: Update begin & finish dates below
begin  <- "2021-01-01" #options: "2020-07-01", "2021-01-01", "2021-07-01", "2022-01-01", "2022-07-01", "2023-01-01"
finish <- "2021-06-30" #options: "2020-12-31", "2021-06-30", "2021-12-31", "2022-06-30", "2022-12-31", "2023-06-30"

saprfl.df <- flqpr.df[flqpr.df$`MPC1 start` >=begin & flqpr.df$`MPC1 start` <=finish, c(1:147)]

table(saprfl.df$`FL grp`,saprfl.df$`FL ebp`) 
##                    
##                     HF-NC IN-clued Love Notes MPC PPP SSI Supplementals
##   alamance04062021      0        0          0   5   0   0             0
##   dreamcenter032421     0        0          0  10   0   0             0
##   dreamcenter062121     0        0          0   6   0   0             0
##   payc021821            0        0          0  11   0   0             0
##   payc022321            0        0          0   1   0   0             0
##   payc051821            0        0          0   7   0   0             0
##   sabgc041921           0        0          0   1   0   0             0
##   sabgc050121           0        0          0   5   0   0             0
#How many sections/groups have fidelity logs? How many logs per section/group? For which EBP? Technically, this is the lessons completed. 
#Doesn't seem to match this dataframe of all FLs for the year:

mpc1 <- flqpr.df[flqpr.df$`MPC1 start` >=begin & flqpr.df$`MPC1 start` <=finish,c(3,32:36)]
mpc2 <- flqpr.df[flqpr.df$`MPC2 start` >=begin & flqpr.df$`MPC2 start` <=finish,c(3,37:41)]
mpc3 <- flqpr.df[flqpr.df$`MPC3 start` >=begin & flqpr.df$`MPC3 start` <=finish,c(3,42:46)]

names(mpc1) <- c("FL Group", "Lesson Name", "Start Date", "MPC Implementation", "Changes Made","Incomplete")
names(mpc2) <- c("FL Group", "Lesson Name", "Start Date", "MPC Implementation", "Changes Made","Incomplete")
names(mpc3) <- c("FL Group", "Lesson Name", "Start Date", "MPC Implementation", "Changes Made","Incomplete")

mpcflall <- rbind.data.frame(mpc1,mpc2,mpc3)
mpcflall <- na.omit(mpcflall)

#Optional: Double check by group, using the appended logs below by EBP

#Append the MPC lesson logs into a long form fidelity log, by group/section:
mpcgrp <- "dreamcenter032421"

mpc1 <- flqpr.df[flqpr.df$`FL grp` %like% mpcgrp,c(3,32:36)]
mpc2 <- flqpr.df[flqpr.df$`FL grp` %like% mpcgrp,c(3,37:41)]
mpc3 <- flqpr.df[flqpr.df$`FL grp` %like% mpcgrp,c(3,42:46)]

names(mpc1) <- c("FL Group", "Lesson Name", "Start Date", "MPC Implementation", "Changes Made","Incomplete")
names(mpc2) <- c("FL Group", "Lesson Name", "Start Date", "MPC Implementation", "Changes Made","Incomplete")
names(mpc3) <- c("FL Group", "Lesson Name", "Start Date", "MPC Implementation", "Changes Made","Incomplete")

mpcfl <- rbind.data.frame(mpc1,mpc2,mpc3)
mpcfl <- na.omit(mpcfl)

#Append the IN-clued lesson logs into a long form fidelity log, by group/section:
incgrp <- "dreamcenter062121"

inc1 <- flqpr.df[flqpr.df$`FL grp`==incgrp,c(3,47:51)]
inc2 <- flqpr.df[flqpr.df$`FL grp`==incgrp,c(3,52:56)]
inc3 <- flqpr.df[flqpr.df$`FL grp`==incgrp,c(3,57:61)]

names(inc1) <- c("FL Group", "Lesson Name", "Start Date", "IN-clued Implementation", "Changes Made","Incomplete")
names(inc2) <- c("FL Group", "Lesson Name", "Start Date", "IN-clued Implementation", "Changes Made","Incomplete")
names(inc3) <- c("FL Group", "Lesson Name", "Start Date", "IN-clued Implementation", "Changes Made","Incomplete")

incfl <- rbind.data.frame(inc1,inc2,inc3)
incfl <- na.omit(incfl)

###########################################
# OAH 3: Fidelity & Quality: Observations #
###########################################

rawobs.df <- read.csv("OAH Program Observation Form for TPP Grantees.csv", stringsAsFactors = TRUE)
obs.df <- rawobs.df[2:nrow(rawobs.df),c(11,20,23,22,21,34)]
names(obs.df) <- c("Group observed",
                   "Date observed",
                   "observed_activities",
                   "total_activities",
                   "# Lessons observed",
                   "Observation Quality")
obs.df[obs.df==""] <- NA

library(data.table)
obs.df$`Group observed`<- tolower(obs.df$`Group observed`)

obs.df$`Date observed` <- as.Date(obs.df$`Date observed`,"%m/%d/%Y")

cleanCares <- function(vec){
  chars <- as.character(vec)
  chars[is.na(chars)] <- "0" #if missing, replace with zero
  chars[chars==""] <- "0" #if blank, replace with zero
  return(as.numeric(chars)) #convert values to numbers
}

obs.df[3:6] <- lapply(obs.df[3:6], cleanCares)

obs.df$adherence <- obs.df$observed_activities / obs.df$total_activities *100

obs.df <- obs.df[obs.df$`Date observed` >=begin & obs.df$`Date observed` <=finish, c(1:7)]

library(knitr)
kable(obs.df)
Group observed Date observed observed_activities total_activities # Lessons observed Observation Quality adherence
2 cgdc062121 2021-06-24 2 2 2 5 100
3 payc051821 2021-06-01 4 4 1 5 100
4 sabgc042021 2021-05-20 2 4 1 3 50
5 cgdc032421 2021-04-28 2 2 1 4 100
6 payc021821 2021-03-25 4 4 1 4 100
7 payc021821 2021-03-23 4 4 1 4 100
#need to figure out table

##############################
# OAH 4: Supportive Services #
##############################

#subset by period:
referralsall.df <- flqpr.df[flqpr.df$`MPC1 start` >=begin & flqpr.df$`MPC1 start` <=finish,c(3,9:15)] 
referralsall.df <- na.omit(referralsall.df)

#subset by service:
referralsall.df$`FL ref edu` <- as.numeric(as.character(referralsall.df$`FL ref edu`))
referralsall.df$`FL ref mh` <- as.numeric(as.character(referralsall.df$`FL ref mh`))
referralsall.df$`FL ref pcp` <- as.numeric(as.character(referralsall.df$`FL ref pcp`))
referralsall.df$`FL ref sa` <- as.numeric(as.character(referralsall.df$`FL ref sa`))
referralsall.df$`FL ref srh` <- as.numeric(as.character(referralsall.df$`FL ref srh`))
referralsall.df$`FL ref viol` <- as.numeric(as.character(referralsall.df$`FL ref viol`))
referralsall.df$`FL ref voc` <- as.numeric(as.character(referralsall.df$`FL ref voc`))

#sum each service
edu <- sum(referralsall.df$`FL ref edu`)
mh <- sum(referralsall.df$`FL ref mh`)
pcp <- sum(referralsall.df$`FL ref pcp`)
sa <- sum(referralsall.df$`FL ref sa`)
srh <- sum(referralsall.df$`FL ref srh`)
viol <- sum(referralsall.df$`FL ref viol`)
voc <- sum(referralsall.df$`FL ref voc`)

#create a new dataframe with the sums
suppsrvs <- data.frame(referral=c("SRH","MH","SA","PCP","EDU","VOC","VIOL"),count=c(srh,mh,sa,pcp,edu,voc,viol))

#create a table with all referrals total (for SAPR)
tab_df(suppsrvs,
       title = "Y1 Referrals for all IOs",
       file = "Y1 Referrals Table - all IOs.doc")
Y1 Referrals for all IOs
referral count
SRH 1
MH 0
SA 0
PCP 0
EDU 1
VOC 1
VIOL 0
#subset by IO & period:
org <- "SA" #options: "ACDSS", "CHS", "DreamCenter", PAYC", "SA"
referrals.df <- flqpr.df[flqpr.df$`FL io` %like% org & flqpr.df$`MPC1 start` >=begin & flqpr.df$`MPC1 start` <=finish,c(3,9:15)] 
referrals.df <- na.omit(referrals.df)

#subset by service:
referrals.df$`FL ref edu` <- as.numeric(as.character(referrals.df$`FL ref edu`))
referrals.df$`FL ref mh` <- as.numeric(as.character(referrals.df$`FL ref mh`))
referrals.df$`FL ref pcp` <- as.numeric(as.character(referrals.df$`FL ref pcp`))
referrals.df$`FL ref sa` <- as.numeric(as.character(referrals.df$`FL ref sa`))
referrals.df$`FL ref srh` <- as.numeric(as.character(referrals.df$`FL ref srh`))
referrals.df$`FL ref viol` <- as.numeric(as.character(referrals.df$`FL ref viol`))
referrals.df$`FL ref voc` <- as.numeric(as.character(referrals.df$`FL ref voc`))

#sum each service
edu <- sum(referrals.df$`FL ref edu`)
mh <- sum(referrals.df$`FL ref mh`)
pcp <- sum(referrals.df$`FL ref pcp`)
sa <- sum(referrals.df$`FL ref sa`)
srh <- sum(referrals.df$`FL ref srh`)
viol <- sum(referrals.df$`FL ref viol`)
voc <- sum(referrals.df$`FL ref voc`)

#create a new dataframe with the sums
suppsrvs <- data.frame(referral=c("SRH","MH","SA","PCP","EDU","VOC","VIOL"),count=c(srh,mh,sa,pcp,edu,voc,viol))

#create a table with all referrals for this IO (for OAHPMdata); change titles and file names
tab_df(suppsrvs,
       title = "Y1 Referrals for: SA",
       file = "SA - Y1 Referrals Table.doc")
Y1 Referrals for: SA
referral count
SRH 0
MH 0
SA 0
PCP 0
EDU 0
VOC 0
VIOL 0

Alamance Cares

# Alamance Cares: Supportive Services
rawcares.df <- read.csv("Alamance Cares Testing Data.csv", stringsAsFactors = TRUE)
rawcares.df <- rawcares.df[2:nrow(rawcares.df),c(10:54)]

cleanCares <- function(vec){
  chars <- as.character(vec)
  chars[is.na(chars)] <- "0" #if missing, replace with zero
  chars[chars==""] <- "0" #if blank, replace with zero
  return(as.numeric(chars)) #convert values to numbers
}

rawcares.df[2:45] <- lapply(rawcares.df[2:45], cleanCares)

rawcares.df$Month <- factor(rawcares.df$Month,
                       levels = c(1,2,3,4,5,6,7,8,9,10,11,12),
                       labels = c("2020-07-01","2020-08-01","2020-09-01","2020-10-01","2020-11-01","2020-12-01",
                                  "2021-01-01","2021-02-01","2021-03-01","2021-04-01","2021-05-01","2021-06-01"))

rawcares.df$Date <- as.Date(rawcares.df$Month, format = "%Y-%m-%d")

younger.df <- rawcares.df[rawcares.df$Date >=begin & rawcares.df$Date <=finish, c(2:23,46)]
older.df   <- rawcares.df[rawcares.df$Date >=begin & rawcares.df$Date <=finish, c(24:46)]

younger.df$Age <- "14-17"
older.df$Age <- "18-19"

names(younger.df) <- c("Men","Women","F2M", "M2F","Other gender","White","Black","Native","Asian", "Pacific Islander","Multiracial","Race not specified","Race unknown","Hispanic","Non Hispanic","Ethnicity Unknown","Alamance Cares","ACC","ACHD","Elon","Maplebrook","SA","Date","Age")
names(older.df) <- c("Men","Women","F2M", "M2F","Other gender","White","Black","Native","Asian", "Pacific Islander","Multiracial","Race not specified","Race unknown","Hispanic","Non Hispanic","Ethnicity Unknown","Alamance Cares","ACC","ACHD","Elon","Maplebrook","SA","Date","Age")

cares.df <- rbind(younger.df,older.df) #6 months x 2 age groups = 12 obs

#haven't figured out how to output a pub-ready table since this is already aggregated data; ended up doing counts & percents in excel
women <- aggregate(x=cares.df$Women,
                   by= list(cares.df$Age),
                   FUN=sum)
men <- aggregate(x=cares.df$Men,
          by= list(cares.df$Age),
          FUN=sum)
other <- aggregate(x=cares.df$`Other gender`,
                   by= list(cares.df$Age),
                   FUN=sum)

table1 <- rbind(women, men, other)