setwd("C:\\Users\\Change\\Documents\\Bohemia Data\\medicalassesment")

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(writexl)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(dplyr)
library(cloudbrewr)

DATA_STAGING_BUCKET_NAME <- 'databrew.org'
DATA_LAKE_BUCKET_NAME <- 'bohemia-lake-db'
PROJECT_SOURCE <- 'kwale'
SE_FOLDER_TARGET <- glue::glue('{PROJECT_SOURCE}/sanitized-form')

tryCatch({
  logger::log_info('Attempt AWS login')
  # login to AWS - this will be bypassed if executed in CI/CD environment
  cloudbrewr::aws_login(
    role_name = 'bohemia-anomalies-team-s3-role',
    profile_name =  'bohemia-anomalies-team-s3-role',
    pipeline_stage = 'production'
    
  )
  
}, error = function(e){
  logger::log_error('AWS Login Failed')
  stop(e$message)
})

medical_assessmentb <- cloudbrewr::aws_s3_get_table(
  bucket = 'databrew.org',
  key = 'kwale/clean-form/medical_assessmentb/medical_assessmentb.csv'
)

write.csv(medical_assessmentb,"medical_assessmentb.csv")

dataset <- medical_assessmentb


# Filter and select for SAEs_Initial
saes_initial <- dataset %>%
  filter(sae == 'sae_initial') %>%
  select(ext_id, dob, weight...14, height, gender, symptoms...88, sae_start_date...90, sae_stop_date...92, 
         medical_evaluation_sae, causality...97, todays_date, date...86, severity...96, outcome...118) %>%
  rename(weight = weight...14,
         symptoms =  symptoms...88,
         sae_start_date =  sae_start_date...90,
         sae_stop_date =  sae_stop_date...92,
         causality = causality...97,
         sae_Initial_date = date...86,
         severity = severity...96,
         outcome = outcome...118
  )

# Filter and select for 'SAEs followup'
sae_followup <- dataset %>%
  filter(sae == 'sae_follow-up') %>%
  select(ext_id, dob, weight...14, height, gender,diagnosis, sae_start_date...105, sae_stop_date...107, 
         outcome...108, causality...110, todays_date, severity...109, likely_cause_of_the_event) %>%
  rename(weight = weight...14,
         sae_start_date = sae_start_date...105,
         sae_stop_date = sae_stop_date...107,
         outcome = outcome...108,
         causality = causality...110,
         severity = severity...109
  )

# Filter and select for 'PFU_SAEs'
pfu_Saes <- dataset %>%
  filter(sae == 'sae_in_pregnancy') %>%
  select(ext_id, dob, weight...14, height, gender, todays_date, serious_assessment, symptoms...69, outcome_saes_in_pregnancy, 
         symptom_start_date, symptom_stop_date, severity...78, causality...79, event_cause_spec, systems_affected, 
         systems_affected_spec, outcome...118) %>%
  rename(weight = weight...14,
         symptoms = symptoms...69,
         severity = severity...78, 
         causality =  causality...79,
         outcome = outcome...118
  )

#Now repeat the process for in order to generate PFU report;

# Filter and select for 'PFU Initial'
pfu_initial <- dataset %>%
  filter(pregnancy == 'pregnancy_initial_report') %>%
  select(ext_id, gender, reporting_date,  dob, height, weight...14,ip_start_date, ip_stop_date, visit_number...27, 
         drug_withdrawn,preg_discovery_date, todays_date 
  ) %>%
  rename(visit_number = visit_number...27,
         weight = weight...14
  )

# Filter and select for 'PFU Followup'
pfu_followup <- dataset %>%
  filter(pregnancy == 'pregnancy_Follow-up_report') %>%
  select(ext_id, height, weight...14, todays_date, gender, dob, ip_start_date, ip_stop_date,date...34, 
         main_symptoms, medical_evaluation, evaluation_specify, outcome...43, live_birth ) %>%
  rename(weight = weight...14,
         pfu_followup_date = date...34,
         outcome = outcome...43
  )

# Store each dataset in a list with a named entry for each sheet
sae_dataset <- list("SAE_Initial" = saes_initial, "SAE_Followup" = sae_followup, "PFU_SAEs" = pfu_Saes)
pfu_dataset <- list("PFU_initial" = pfu_Saes, "PFU_Followup" = pfu_followup)

# Write each dataset to a different sheet in an Excel file changing ythe dates to reflect the current date of reporting
write_xlsx(sae_dataset, path = "SAEs_Safety,Efficacy&PFU_20241121.xlsx")
write_xlsx(pfu_dataset, path = "PFU_Reports_20241121.xlsx")