Satyavrath Taimaka Test Task

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(knitr)
#Importing the cleaned clinic file

clinic_cleaned <- read.csv("cleaned_clinic.csv", header=TRUE)

#Importing the household file

household_cleaned <- read.csv("cleaned_household.csv", header=TRUE)

# Question 1a Total number of male and female patients by clinic
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

gender_summary <- clinic_cleaned %>%
  group_by(site, sex) %>%
  summarize(total_patients = n()) %>%
  ungroup()

## `summarise()` has grouped output by 'site'. You can override using the
## `.groups` argument.

colnames(gender_summary) <- c("Clinic", "Sex", "Number of Patients")
# Create gender summary table
kable(gender_summary) %>%
 kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Clinic	Sex	Number of Patients
akko	male	2
bani_gaye	F	16
bani_gaye	FEMALE	24
bani_gaye	Female	21
bani_gaye	M	19
bani_gaye	MALE	15
bani_gaye	Male	23
bani_gaye	f	23
bani_gaye	female	478
bani_gaye	m	19
bani_gaye	male	466
birin_fulani	F	38
birin_fulani	FEMALE	38
birin_fulani	Female	43
birin_fulani	M	31
birin_fulani	MALE	28
birin_fulani	Male	29
birin_fulani	f	37
birin_fulani	female	830
birin_fulani	m	41
birin_fulani	male	907
dadin_kowa	female	1
dadin_kowa	male	1
dukku	F	33
dukku	FEMALE	28
dukku	Female	29
dukku	M	36
dukku	MALE	32
dukku	Male	27
dukku	f	16
dukku	female	754
dukku	m	24
dukku	male	697
galdamari	F	6
galdamari	FEMALE	14
galdamari	Female	8
galdamari	M	7
galdamari	MALE	8
galdamari	Male	8
galdamari	f	10
galdamari	female	186
galdamari	m	10
galdamari	male	208
kwami	F	29
kwami	FEMALE	21
kwami	Female	30
kwami	M	32
kwami	MALE	44
kwami	Male	31
kwami	f	23
kwami	female	717
kwami	m	40
kwami	male	645
lergo_abba	FEMALE	1
lergo_abba	female	13
lergo_abba	male	22
wuro_modibbo	F	1
wuro_modibbo	Female	2
wuro_modibbo	MALE	1
wuro_modibbo	female	12
wuro_modibbo	male	17

# Question 1b Average absolute weight gain by clinic
library(kableExtra)
weight_gain_summary <- clinic_cleaned %>%
  mutate(weight_gain = discharge_weight - enrollment_weight) %>%
  group_by(site) %>%
  summarize(average_weight_gain = mean(weight_gain, na.rm = TRUE))
colnames(weight_gain_summary) <- c("Clinic", "Average Weight Gain (kg)")
# Create  average weight gain summary table
kable(weight_gain_summary, caption = "Average Weight Gain by Clinic") %>%
 kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Average Weight Gain by Clinic
Clinic	Average Weight Gain (kg)
akko	1.350000
bani_gaye	1.525190
birin_fulani	1.936156
dadin_kowa	1.400000
dukku	1.673471
galdamari	1.751534
kwami	1.890961
lergo_abba	2.058935
wuro_modibbo	2.630303

# Question 1c Average relative weight gain, measured in grams per kilogram per day (g/kg/day) by clinic
library(dplyr)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(kableExtra)

clinic_cleaned$discharge_date <- mdy(clinic_cleaned$discharge_date)
clinic_cleaned$enrollment_date <- mdy(clinic_cleaned$enrollment_date)

relative_weight_gain_summary <- clinic_cleaned %>%
  mutate(weight_gain = discharge_weight - enrollment_weight,
         
date_diff = as.numeric(difftime(discharge_date, enrollment_date, units = "days")),

         relative_weight_gain = (weight_gain / enrollment_weight) / (date_diff)) %>%
  group_by(site) %>%
  summarize(average_relative_weight_gain = mean(relative_weight_gain, na.rm = TRUE))

colnames(relative_weight_gain_summary) <- c("Clinic", "Average Relative Weight Gain (kg)")
# Create relative weight gain summary table
kable(relative_weight_gain_summary, caption = "Average Relative Weight Gain by Clinic") %>%
 kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Average Relative Weight Gain by Clinic
Clinic	Average Relative Weight Gain (kg)
akko	0.0034930
bani_gaye	0.0053314
birin_fulani	0.0064693
dadin_kowa	0.0054414
dukku	0.0069695
galdamari	0.0064157
kwami	0.0077702
lergo_abba	0.0067350
wuro_modibbo	0.0083560

# Question 2a
library(dplyr)
library(knitr)
library(kableExtra)

clinic_discharge_check <- clinic_cleaned %>%
  mutate(discharge_criteria_met = ifelse(discharge_age > 6 & discharge_weight < 4, "No", "Yes"))

# Filter the children who were erroneously discharged
children_not_met_criteria_by_clinic <- clinic_discharge_check %>%
  filter(discharge_criteria_met == "No") %>%
  group_by(site) %>%
  summarize(num_not_met_criteria = n()) %>%
  ungroup()
colnames(children_not_met_criteria_by_clinic) <- c("Clinic", "Number of Patients Not Meeting Discharge Criteria")
# Create table of number of children who were erroneosly discharged but sort by clinic
kable(children_not_met_criteria_by_clinic, caption = "Number of Children Erroneously Discharged by Clinic") %>%
 kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Number of Children Erroneously Discharged by Clinic
Clinic	Number of Patients Not Meeting Discharge Criteria
bani_gaye	14
birin_fulani	29
dukku	25
galdamari	3
kwami	26
lergo_abba	1

#Question 2a: At each clinic, how many patients were erroneously discharged (i.e., they were discharged even though they did not meet the criterion specified above)?
#At Bani Gaye, 15 patients were erroneously discharged. At Birin Fulani, 36 patients were erroneously discharged. At Dukku, 29 patients were erroneously discharged. At Galdamari, 3 patients were erroneously discharged. At Kwami, 33 patients were erroneously discharged. At Lergo Abba, 1 patient was erroneously discharged.

#Question 2b:
#-Conduct focus group discussions with local healthcare workers/hospital staff to understand why erroneous discharges are occurring in the first place
#-Host trainings with local healthcare workers about the importance of appropriate discharge criteria
#-Create info cards with verbal and visual representations of the discharge criteria and distribute to hospital staff, particularly physicians who are in charge of discharge orders
#-Establish monthly discharge monitoring plan to track erroneous discharges and provide additional trainings or guidance to hospital staff when they occur
#-Create a reward system that provides a financial incentive to staff when one month passes with no erroneous discharges

#Question 3a
library(dplyr)
library(lubridate)
library(knitr)
library(kableExtra)

clinic <- read.csv("clinic.csv", header=TRUE)
clinic_cleaned <- clinic[, c("pid", "sex", "site", "enrollment_age", "enrollment_weight", "enrollment_date", "discharge_age", "discharge_weight", "discharge_date", "staffmember")]

#Importing the household file

household <- read.csv("household.csv", header=TRUE)
household_cleaned <- household[, c("pid", "c_sex", "todate", "weight")]
clinic_cleaned$discharge_date <- mdy(clinic_cleaned$discharge_date)
household_cleaned$todate <- mdy(household_cleaned$todate)

clinic_cleaned <- clinic_cleaned %>%
  distinct(pid, .keep_all = TRUE)
household_cleaned <- household_cleaned %>%
  distinct(pid, .keep_all = TRUE)

# Merging the clinic and household datasets by patient ID
merged_data <- clinic_cleaned %>%
  inner_join(household_cleaned, by = "pid") %>%
  mutate(
    # Calculate the date difference between the household visit and clinic discharge
    date_difference = as.numeric(difftime(todate, discharge_date, units = "days")),
    # Calculate the weight difference
    weight_difference = abs(discharge_weight - weight)
  ) 

# Filter for records with discrepancies
discrepancies <- merged_data %>%
  filter(weight_difference > 1, date_difference <= 7) %>%
  mutate(month = month(todate, label = TRUE, abbr = TRUE))

# Count discrepancies by clinic and month
discrepancies_summary <- discrepancies %>%
  filter(month %in% c("Aug", "Sep", "Oct")) %>% 
  group_by(site, month) %>%
  summarize(discrepancies_count = n()) %>%
  ungroup() %>%
  arrange(site)

## `summarise()` has grouped output by 'site'. You can override using the
## `.groups` argument.

colnames(discrepancies_summary) <- c("Clinic", "Month", "Number of Discrepancies")
# Create the discrepancy summary table
kable(discrepancies_summary, caption = "Number of Discrepancies by Clinic, August-October") %>%
 kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Number of Discrepancies by Clinic, August-October
Clinic	Month	Number of Discrepancies
bani_gaye	Aug	1
bani_gaye	Sep	5
bani_gaye	Oct	1
birin_fulani	Aug	16
birin_fulani	Sep	13
birin_fulani	Oct	6
dukku	Aug	8
dukku	Sep	4
dukku	Oct	2
galdamari	Aug	1
galdamari	Oct	2
kwami	Aug	11
kwami	Sep	5
kwami	Oct	1
wuro_modibbo	Sep	1

#Question 3a: At Bani Gaye, 1, 5, and 1 discrepancies occurred in August, September, and October, respectively. At Birin Fulani, 16, 13, and 6 discrepancies occurred in August, September, and October, respectively. At Dukku, 8, 4, and 2 occurred in August, September, and October, respectively. At Galdamari, 1 discrepancy occurred in August and 2 discrepancies occurred in October. At Kwami, 11, 5, and 1 discrepancies occurred in August, September, and October, respectively. At Wuro Modibbo, 1 discrepancy occurred in September.

# Count discrepancies by staff member
discrepancies_staff <- discrepancies %>%
  filter(month %in% c("Aug", "Sep", "Oct")) %>% 
  group_by(staffmember) %>%
  summarize(discrepancies_count = n()) %>%
  ungroup() %>%
  arrange(desc(discrepancies_count),staffmember)
colnames(discrepancies_staff) <- c("Staff Member","Number of Discrepancies")
# Create the discrepancy summary table
kable(discrepancies_staff, caption = "Total Number of Discrepancies by Staff Member between August-October") %>%
 kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Total Number of Discrepancies by Staff Member between August-October
Staff Member	Number of Discrepancies
staff5	19
staff14	9
staff6	9
staff1	6
staff2	5
staff20	5
staff23	5
staff7	5
staff10	4
staff17	4
staff9	3
staff24	1
staff25	1
staff4	1

#Question 3b: Staff 5 (greatest # of discrepancies) > Staff 14 > Staff 6 > Staff 1 > Staff 2 > Staff 20 > Staff 23 > Staff 7 > Staff 10 > Staff 17 > Staff 9 > Staff 24 > Staff 25 > Staff 4 (fewest discrepancies)

#Question 3c
#-Check records of Staff 5 as this staff member contributed a markedly high number of discrepancies to see if there are any additional patterns
#-Provide retraining to all staff on accurate weight measurement, emphasizing the importance of standardized weight data collection
#-Send an anonymous survey link to all staff so that they can submit questions/points of confusion ahead of the training without judgment
#-Conduct random spot checks of clinic and household weight measurements to ensure quality
#-Observe whether discrepancy count remains high in other months of the year – if not, conduct focus groups with staff to understand why August-October is higher in discrepancy count than other months

Satyavrath Taimaka Test Task

2025-02-19