#install.packages("googlesheets4")
#install.packages("kableExtra")
library(googlesheets4)
## Warning: package 'googlesheets4' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.3
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.3
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
clinic_url <- "https://docs.google.com/spreadsheets/d/1vN_8CfBYlrpf7hV76g3q9rrd26q8z95WPTg6yYJCRMc/edit?gid=1596887924#gid=1596887924"
clinic <- read_sheet(clinic_url)
## ! Using an auto-discovered, cached token.
##   To suppress this message, modify your code or options to clearly consent to
##   the use of a cached token.
##   See gargle's "Non-interactive auth" vignette for more details:
##   <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googlesheets4 package is using a cached token for
##   'ngwayuclaude1@gmail.com'.
## Auto-refreshing stale OAuth token.
## ✔ Reading from "clinic".
## ✔ Range 'clinic'.
## New names:
## • `` -> `...1`
unique(clinic$sex)
##  [1] "male"   "female" "m"      "MALE"   "FEMALE" "M"      "f"      "F"     
##  [9] "Male"   "Female"
clinic = clinic%>%
  mutate(sex = case_when(sex =='male'|sex =='m'|sex =='MALE'|sex =='M'|sex =='Male'~ 'Male',TRUE~'Female'))
unique(clinic$sex)
## [1] "Male"   "Female"

1a. Total number of male and female patients, by clinic

gender_summary <- clinic %>%
  group_by(site,sex) %>%
  summarise(total_patients = n(), .groups = "drop") %>%
  pivot_wider(names_from = 'sex',values_from = 'total_patients')
kable(gender_summary, caption = "Total Number of Male and Female Patients by Clinic") %>%
  kable_styling(full_width = FALSE, position = "left")
Total Number of Male and Female Patients by Clinic
site Male Female
akko 2 NA
bani_gaye 543 565
birin_fulani 1044 991
dadin_kowa 1 1
dukku 820 861
galdamari 244 226
kwami 798 827
lergo_abba 22 14
wuro_modibbo 18 15

1b. Average absolute weight gain during the treatment program, by clinic

weight_gain_summary <- clinic %>%
  mutate(abs_weight_gain = abs(discharge_weight - enrollment_weight))%>%
  group_by(site) %>%
  summarise(average_absolute_weight_gain = mean(abs_weight_gain), .groups = "drop")
kable(weight_gain_summary, caption = "Average absolute weight gain during the treatment program") %>%
  kable_styling(full_width = FALSE, position = "left")
Average absolute weight gain during the treatment program
site average_absolute_weight_gain
akko 1.350000
bani_gaye 1.712929
birin_fulani 2.221774
dadin_kowa 1.400000
dukku 1.748064
galdamari 2.449293
kwami 617.588856
lergo_abba 2.170509
wuro_modibbo 2.630303

1c. Average relative weight gain, measured in grams per kilogram per day , by clinic

relative_weight_gain_summary <- clinic %>%
  mutate(relative_weight_gain = abs(discharge_weight - enrollment_weight)/enrollment_weight)%>%
  group_by(site) %>%
  summarise(average_relative_weight_gain = mean(relative_weight_gain), .groups = "drop")
kable(relative_weight_gain_summary, caption = "Average relative weight gain") %>%
  kable_styling(full_width = FALSE, position = "left")
Average relative weight gain
site average_relative_weight_gain
akko 0.1462651
bani_gaye 0.2961946
birin_fulani 0.3879823
dadin_kowa 0.1951890
dukku 0.3176802
galdamari 0.4314371
kwami 0.3800805
lergo_abba 0.3848489
wuro_modibbo 0.5165295

Discharge criteria

erroneously_discharge_summary <- clinic %>%
  mutate(not_eligible_for_discharge = case_when(discharge_age>=6 & discharge_weight< 4 ~ 1,TRUE~0)) %>%
  group_by(site) %>%
  summarise(number_erroneously_discharged = sum(not_eligible_for_discharge), .groups = "drop")
kable(erroneously_discharge_summary, caption = "Number of patients erroneously discharged") %>%
  kable_styling(full_width = FALSE, position = "left")
Number of patients erroneously discharged
site number_erroneously_discharged
akko 0
bani_gaye 15
birin_fulani 36
dadin_kowa 0
dukku 30
galdamari 3
kwami 34
lergo_abba 1
wuro_modibbo 0
households_url <- "https://docs.google.com/spreadsheets/d/1MqMLmK3i7aUgSbyWbkLjyKvLzTrEaO4klGNJOchJKQM/edit?gid=994307358#gid=994307358"
households <- read_sheet(households_url)
## ✔ Reading from "household".
## ✔ Range 'household'.
clinic_data = select(clinic,staffmember,pid,discharge_weight)
households_data = select(households,pid,site,weight,todate)
merge_data = merge(clinic_data,households_data,by = "pid")

# Convert to numeric if needed
merge_data <- merge_data %>%
  mutate(
    weight = as.numeric(weight),
    discharge_weight = as.numeric(discharge_weight),
    diffweight = abs(weight - discharge_weight),
    month_num = month(todate),
    discrepancy = case_when(diffweight>=1~1,TRUE~0) 
      
  )
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `weight = as.numeric(weight)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
 merge_data=filter(merge_data,month_num>=8 & month_num<=10)

3a. How many discrepancies occurred in August, September, and October at each clinic?

discrepancies_summary <- merge_data %>%
  group_by(site,month_num) %>%
  summarise(total_discrepancies = sum(discrepancy), .groups = "drop") %>%
  pivot_wider(names_from = 'month_num',values_from = 'total_discrepancies')
kable(discrepancies_summary, caption = "Total discrepancies per month by clinic") %>%
  kable_styling(full_width = FALSE, position = "left")
Total discrepancies per month by clinic
site 8 9 10
bani_gaye 3 6 4
birin_fulani 19 13 11
dukku 9 5 3
galdamari 1 0 0
kwami 11 6 1

3b. Rank the clinic staff members by number of discrepancies.

discrepancies_staffmember<- merge_data %>%
  group_by(staffmember) %>%
  summarise(total_discrepancies = sum(discrepancy), .groups = "drop") %>%
 arrange(desc(total_discrepancies))
kable(discrepancies_staffmember, caption = "Total discrepancies per staff member")%>%
  kable_styling(full_width = FALSE, position = "left")
Total discrepancies per staff member
staffmember total_discrepancies
staff5 23
staff14 10
staff6 10
staff1 9
staff7 7
staff2 6
staff17 5
staff20 5
staff23 5
staff9 5
staff10 4
staff24 1
staff25 1
staff4 1
staff3 0