##Parsing Data
##Parse the Data
#Which column number is
which(colnames(my_data)== "Info")
## integer(0)
which(colnames(my_data)== "Accompany")
## integer(0)
# Expand comma-delimited values in each column into multiple rows
# Adjust column names based on the actual names printed out
health_data_expanded <- my_data %>%
mutate(row = row_number()) %>%
pivot_longer(
cols = 61:67, #columns reflecting services referred to.
names_to = "column_name",
values_to = "responses"
) %>%
separate_rows(responses, sep = ",\\s*") %>%
filter(responses != "") # Removes any empty responses
# View the first few entries of the expanded data
head(health_data_expanded)
## # A tibble: 6 × 76
## `Participant ID` Timestamp `Email Address` `Caseworker Name` Year Month
## <chr> <list> <chr> <chr> <dbl> <ord>
## 1 A0405P <dttm [1]> <NA> Darcell (Taleed Re-en… 2023 Sep
## 2 A0412H <dttm [1]> <NA> Darcell Walker 2024 Feb
## 3 A0412H <dttm [1]> <NA> Darcell Walker 2024 Feb
## 4 A0412H <dttm [1]> <NA> Darcell Walker 2024 Mar
## 5 A0412H <dttm [1]> <NA> Darcell Walker 2024 Mar
## 6 A0412H <dttm [1]> <NA> Darcell Walker 2024 Mar
## # ℹ 70 more variables: `Date of Meeting` <dttm>, `Summary of Visit` <chr>,
## # `[OPTIONAL] (Use this to write in any services that were not listed in the previous question or to expand or take notes.)` <chr>,
## # `What goals were you trying to accomplish in this meeting?` <chr>,
## # `Duration of Meeting` <list>, `Where did the meeting take place?` <chr>,
## # `CTI phase` <chr>,
## # `Did participant report any changes in health status? (HIV, HEP C etc)` <chr>,
## # `If unhoused, the number of night unsheltered in the previous week` <dbl>, …
# Optional: Count each type of response TOTAL for ALL TIME PERIOD
response_counts_total <- health_data_expanded %>%
group_by(responses) %>%
summarize(total_count = n(), .groups = 'drop')
#Month and Response Count
response_counts_total <- health_data_expanded %>%
group_by(Year, Month, responses) %>%
summarize(total_count = n(), .groups = 'drop')
****need healthcare variable that is then counted by month & Year*** ##Healthcare Data
For McKinsey Report need total medical care meetings only
##Social Services Data
##Decrease in substance use
First set of code looks at percentage decrease in substance use including all participants initial reported use (# of days reported using in the last week, data source= participant intake form) and compared to their most recent data that we have (# of days reported using in the last week on the most recent participant meeting form for each participant, data soiurce= participant meeting form)
# Install and load googlesheets4 if needed
if (!require("googlesheets4")) install.packages("googlesheets4")
library(googlesheets4)
# Reading directly from Google Sheets
baseline_data <- read_sheet("https://docs.google.com/spreadsheets/d/1_QduRLU4RJGH0oe1PxG_m84ew3DyIQtL6ygK8dolgT4/edit?usp=sharing", sheet = "Form Responses 1")
## ✔ Reading from "Intake Form (Responses)".
## ✔ Range ''Form Responses 1''.
## New names:
## • `` -> `...22`
recent_data <- read_sheet("https://docs.google.com/spreadsheets/d/1O0dRuk3Wb8EH2OyiPXE6hCO5-L-BBVg4MkthP2gRn4c/edit?usp=sharing", sheet = "Form Responses 1")
## ✔ Reading from "Participant Meeting Form (Responses)".
## ✔ Range ''Form Responses 1''.
## New names:
#Merge the Data
baseline_data$Member_ID <- as.character(baseline_data$"Member ID Number")
recent_data$Member_ID <- as.character(recent_data$"Participant ID")
# Rename columns if necessary
colnames(baseline_data) <- sub("Member_ID_Number", "Member_ID", colnames(baseline_data)) #(I just channged the names in the data sheets themselves, if this doesn't work it is because they reverted back to the original names)#
colnames(recent_data) <- sub("Member_ID_Number", "Member_ID", colnames(recent_data))
# Ensure that Member ID columns are of the same type
baseline_data$Member_ID <- as.character(baseline_data$"Member_ID")
merged_data <- left_join(baseline_data, recent_data, by = c("Member_ID" = "Member_ID"))
# Convert "N/A" to NA in your data frames
merged_data$Baseline_Substance_Use <- as.character(merged_data$'In the last week, how many days would you say you used a substance?')
merged_data$Recent_Substance_Use <- as.character(merged_data$'In the last week, how many days has the participant used substances?')
merged_data$Baseline_Substance_Use[merged_data$Baseline_Substance_Use == "N/A"] <- NA
merged_data$Recent_Substance_Use[merged_data$Recent_Substance_Use == "N/A"] <- NA
# Convert columns back to numeric, safely handling NA values
merged_data$Baseline_Substance_Use <- as.numeric(merged_data$Baseline_Substance_Use)
merged_data$Recent_Substance_Use <- as.numeric(merged_data$Recent_Substance_Use)
# Using mutate to selects most recent, excluding NA values in the comparison
merged_data <- merged_data %>%
group_by(Member_ID) %>%
# Filter out rows where the specific columns of interest are NA
filter(!is.na(Baseline_Substance_Use) & !is.na(Recent_Substance_Use)) %>%
# Sort by date in descending order so the most recent dates come first
arrange(desc(Date)) %>%
# Slice the first entry, which is the most recent non-NA data
slice(1) %>%
ungroup()
summary(merged_data$Recent_Substance_Use)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 3.000 3.516 7.000 7.000
# Using mutate to create 'Decrease' column
merged_data <- merged_data %>%
mutate(Decrease = if_else(!is.na(Baseline_Substance_Use) & !is.na(Recent_Substance_Use) &
Baseline_Substance_Use > Recent_Substance_Use,
TRUE, FALSE, missing = FALSE))
summary(merged_data$Decrease)
## Mode FALSE TRUE
## logical 44 18
# Calculate the percentage of participants showing a decrease
percent_decrease <- mean(merged_data$Decrease, na.rm = TRUE) * 100
# Display the percentage
print(paste("Percentage of participants with a decrease in substance use:", percent_decrease, "%"))
## [1] "Percentage of participants with a decrease in substance use: 29.0322580645161 %"
Rather thank calculate the percent with decrease over time, I am going to do it based only on meetings that month, which will skew the data in our favor… because people who are actually meeting are active in the program… so this will be less participants total (n will be smaller, because there are less participants that have meetings monthly than total participants)….
# Filter entries from Dec 2024 and non-NA Recent_Substance_Use
#####NEEED TO CHNGE THIS CODE EVERY MONTH#######
library(dplyr)
library(lubridate)
# Assuming 'Date' is in a proper date format
# If 'Date' is not already a Date object, convert it:
recent_data$Date <- as.Date(recent_data$"Date of Meeting", format = "%Y-%m-%d")
# Filter entries for Dec 2024 and non-NA Recent_Substance_Use
mon_2024_data <- recent_data %>%
filter(month(Date) == 12 & year(Date) == 2024 & !is.na(recent_data$'In the last week, how many days has the participant used substances?'))
# Group by Member_ID and get the most recent entry
recent_mon_data <- mon_2024_data %>%
group_by(Member_ID) %>%
arrange(desc(Date)) %>%
slice(1) %>%
ungroup()
# Assuming baseline data is part of the same dataset or merging with baseline if separate
final_data <- recent_mon_data %>%
left_join(select(baseline_data, Member_ID, 'In the last week, how many days would you say you used a substance?'), by = "Member_ID")
# Replace "N/A" with NA and NULL with NA in Baseline_Substance_Use
final_data$Baseline_Substance_Use <- sapply(final_data$'In the last week, how many days would you say you used a substance?',
function(x) {
if (is.null(x) || is.na(x) || x == "N/A") {
return(NA) # Convert NULL, NA, or "N/A" to NA
} else {
return(as.numeric(x)) # Convert other values to numeric
}
}
)# Ensure numeric conversion, it will automatically handle other non-numeric cases
# Replace "N/A" with NA and NULL with NA in recent_may_data
final_data$Recent_Substance_Use <- sapply(final_data$'In the last week, how many days has the participant used substances?', function(x) {
if (is.null(x)) {
NA # Convert NULL to NA
} else if (x == "N/A") {
NA # Convert "N/A" to NA
} else {
as.numeric(x) # Ensure numeric conversion, it will automatically handle other non-numeric cases
}
})
# Calculate decrease
final_data <- final_data %>%
mutate(Decrease = case_when(
!is.na(Baseline_Substance_Use) & !is.na(Recent_Substance_Use) &
Baseline_Substance_Use > Recent_Substance_Use ~ TRUE,
!is.na(Baseline_Substance_Use) & !is.na(Recent_Substance_Use) ~ FALSE,
TRUE ~ NA) # Retain NA where input is NA
)
final_data <- final_data %>%
mutate(Decrease = if_else(!is.na(Baseline_Substance_Use) & !is.na(Recent_Substance_Use) &
Baseline_Substance_Use > Recent_Substance_Use,
TRUE, FALSE,))
summary (final_data$Decrease)
## Mode FALSE TRUE
## logical 10 11
### Note to self, if intake baseline was missing, I went to the first measure of substance use we had for the participant and added that date into the intake spreadsheet manually
#Do percent calculation by hand
n = 9+9
percent_decrease_this_month=(9/n)*100
print(percent_decrease_this_month)
## [1] 50