RG Final R Explore

options(repos = c(CRAN = "https://cloud.r-project.org"))

#install explore package
install.packages("explore")

## package 'explore' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Roxana\AppData\Local\Temp\RtmpGCsLLd\downloaded_packages

#load the packages
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)
library(tidyr)
library(explore)

#load the data
#im using the code below to make it easier for R to look for my files to work with

setwd("C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 6")
getwd()

## [1] "C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 6"

#import and load the data from Excel "StateStrata"

StateStrata <- read_excel("StateStrata.xlsx")


#start using the explore package
explore(StateStrata)

## Warning in explore_shiny(data, ...): This function can only be used in an
## interactive R session

head(StateStrata)

## # A tibble: 6 × 11
##   state   age   race  gender sex   transmission  year indicator metric statistic
##   <chr>   <chr> <chr> <lgl>  <chr> <chr>        <dbl> <chr>     <chr>  <chr>    
## 1 Alabama 13-24 Amer… NA     Fema… Heterosexua…  2017 Diagnoses New d… count    
## 2 Alabama 13-24 Amer… NA     Fema… Injection d…  2017 Diagnoses New d… count    
## 3 Alabama 13-24 Amer… NA     Fema… Other Trans…  2017 Diagnoses New d… count    
## 4 Alabama 13-24 Amer… NA     Fema… <NA>          2017 Diagnoses New d… count    
## 5 Alabama 13-24 Amer… NA     Male… Heterosexua…  2017 Diagnoses New d… count    
## 6 Alabama 13-24 Amer… NA     Male… Injection d…  2017 Diagnoses New d… count    
## # ℹ 1 more variable: value <dbl>

#filter data for the year 2022 and for Indicator = Viral suppression (however i think this code is missing filtering for Viral suppression)
StateStrata_2022 <- StateStrata %>%
  filter(year == 2022) %>%
  select(Age = age, Race = race, Sex = sex, indicator, value) #i kept the Indicator variable to see what kind of values we are looking at
View(StateStrata_2022) #if you keep scrolling, you still have other data from the variable Indicator (you still have diagnoses, linkage to care...)


StateStrata_2022_clean <- StateStrata_2022 %>%
  filter(complete.cases(.)) #once you filter for complete cases for each variable (age, sex, race) you end up looking at only Diagnoses, however the HW asked to look at only Viral Suppression data. Am i missing something?
View(StateStrata_2022_clean)


#below is another version
StateStrata_2022_test <- StateStrata %>%
  filter(year == 2022 & indicator == "Viral suppression") %>% 
  select(age, race, sex, indicator, value) #the indicator column helps me see we are looking at viral suppression data
View(StateStrata_2022_test)


StateStrata_2022_test_clean <- StateStrata_2022_test %>%
  filter(complete.cases(.)) #because each variable (age, sex, race) has N/A data, filtering for complete cases ends up deleting those variables' values (i.e. i end up with no observations/ rows of data) This could be because of the way the original dataset "StateStrata" was set up. 
#View(StateStrata_2022_test_clean) #no data if i filter for complete cases


#generate a summary report using the explore paackage
#i decided to use the dataset not filtered for Viral Suppression but filtered for complete cases
StateStrata_2022_clean %>% report(output_file = "Report_2022_clean.html", output_dir = "C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 6")

## ℹ Processing template: "template_report_variable.Rmd"

## ✔ Report created at 'file://C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 6/Report_2022_clean.html'

#export to an Excel file

#install if not already
if (!require(openxlsx)) {
  install.packages("openxlsx")
}

## Loading required package: openxlsx

#load package
library(openxlsx)

#define the file path where the Excel file will be saved
file_path <- "C:/Users/Roxana/OneDrive - Latino Commission on AIDS/Documents/UTSA Adolph Delgado/Week 6/RG_StateStrata_2022_clean.xlsx"

#write the dataframe to an Excel file
write.xlsx(StateStrata_2022_clean, file = file_path, sheetName = "Data", append = FALSE)

cat("HTML report and Excel file have been saved to your Desktop.\n")

## HTML report and Excel file have been saved to your Desktop.

RG Final R Explore

Roxana Guzman

2024-12-20