library(readxl)
StateLevel_PrEP <- read_excel("C:/Users/12108/OneDrive - Significant Results/PostDoc/Rhonda/R studio/Week 1/StateLevel_PrEP.xlsx")
View(StateLevel_PrEP)
summary(StateLevel_PrEP)
##     state                year       indicator             value      
##  Length:51          Min.   :2021   Length:51          Min.   :13.40  
##  Class :character   1st Qu.:2021   Class :character   1st Qu.:21.15  
##  Mode  :character   Median :2021   Mode  :character   Median :25.20  
##                     Mean   :2021                      Mean   :28.01  
##                     3rd Qu.:2021                      3rd Qu.:32.55  
##                     Max.   :2021                      Max.   :54.20
summary(StateLevel_PrEP$value)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.40   21.15   25.20   28.01   32.55   54.20
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Define the mapping of states to regions
regions <- list(
  Northeast = c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "New Jersey", 
                "New York", "Pennsylvania", "Rhode Island", "Vermont"),
  Midwest = c("Illinois", "Indiana", "Iowa", "Kansas", "Michigan", "Minnesota", 
              "Missouri", "Nebraska", "North Dakota", "Ohio", "South Dakota", "Wisconsin"),
  South = c("Alabama", "Arkansas", "Delaware", "District of Columbia", "Florida", "Georgia", 
            "Kentucky", "Louisiana", "Maryland", "Mississippi", "North Carolina", "Oklahoma", 
            "South Carolina", "Tennessee", "Texas", "Virginia", "West Virginia"),
  West = c("Alaska", "Arizona", "California", "Colorado", "Hawaii", "Idaho", "Montana", 
           "Nevada", "New Mexico", "Oregon", "Utah", "Washington", "Wyoming")
)

# Convert the list into a data frame for easier merging
regions_df <- stack(regions) %>% 
  rename(state = values, Region = ind)

# Assuming StateLevelData is your dataset and it has a column named 'state'
# Add a region column to your dataset based on the state
StateLevel_PrEP <- StateLevel_PrEP %>%
  left_join(regions_df, by = "state")

# Now your StateLevelData has an additional column 'Region' indicating the region for each state
# Conducting ANOVA with Region as IV and value as DV
anova_result <- aov(value ~ Region, data = StateLevel_PrEP)

# Displaying the ANOVA table
summary(anova_result)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Region       3    728  242.82   2.727 0.0545 .
## Residuals   47   4185   89.03                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Install and load ggplot2 package if not already installed
if (!require("ggplot2")) install.packages("ggplot2")
## Loading required package: ggplot2
library(ggplot2)

# Creating the boxplot with means
ggplot(StateLevel_PrEP, aes(x = Region, y = value, fill = Region)) +
  geom_boxplot() + # Creates boxplot
  stat_summary(fun = mean, geom = "point", shape = 23, size = 3, color = "black", fill = "white") + # Adds mean points
  labs(title = "Boxplot of Value by Region", x = "Region", y = "Value") +
  theme_minimal() + # Uses a minimal theme for the plot
  theme(legend.position = "none") # Removes the legend since color distinguishes the regions

# Load necessary libraries
if (!require("dplyr")) install.packages("dplyr")
if (!require("knitr")) install.packages("knitr")
## Loading required package: knitr
if (!require("kableExtra")) install.packages("kableExtra")
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(dplyr)
library(knitr)
library(kableExtra)

# Calculate summary statistics by Region
summary_table <- StateLevel_PrEP %>%
  group_by(Region) %>%
  summarise(
    Mean_Value = mean(value, na.rm = TRUE),
    Median_Value = median(value, na.rm = TRUE),
    Min_Value = min(value, na.rm = TRUE),
    Max_Value = max(value, na.rm = TRUE),
    n = n()
  ) %>%
  arrange(Region) # Optional: arrange by Region alphabetically or by another variable

# Format and print the table
kable(summary_table, caption = "Summary Statistics of Values by Region", format = "html", digits = 2, align = 'c') %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Summary Statistics of Values by Region
Region Mean_Value Median_Value Min_Value Max_Value n
Northeast 36.17 35.80 22.9 54.2 9
Midwest 26.08 23.25 16.1 45.5 12
South 26.44 25.00 13.4 52.8 17
West 26.20 24.70 15.2 47.6 13