library(readxl)
StateLevel_PrEP <- read_excel("C:/Users/12108/OneDrive - Significant Results/PostDoc/Rhonda/R studio/Week 1/StateLevel_PrEP.xlsx")
View(StateLevel_PrEP)
summary(StateLevel_PrEP)
## state year indicator value
## Length:51 Min. :2021 Length:51 Min. :13.40
## Class :character 1st Qu.:2021 Class :character 1st Qu.:21.15
## Mode :character Median :2021 Mode :character Median :25.20
## Mean :2021 Mean :28.01
## 3rd Qu.:2021 3rd Qu.:32.55
## Max. :2021 Max. :54.20
summary(StateLevel_PrEP$value)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.40 21.15 25.20 28.01 32.55 54.20
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Define the mapping of states to regions
regions <- list(
Northeast = c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "New Jersey",
"New York", "Pennsylvania", "Rhode Island", "Vermont"),
Midwest = c("Illinois", "Indiana", "Iowa", "Kansas", "Michigan", "Minnesota",
"Missouri", "Nebraska", "North Dakota", "Ohio", "South Dakota", "Wisconsin"),
South = c("Alabama", "Arkansas", "Delaware", "District of Columbia", "Florida", "Georgia",
"Kentucky", "Louisiana", "Maryland", "Mississippi", "North Carolina", "Oklahoma",
"South Carolina", "Tennessee", "Texas", "Virginia", "West Virginia"),
West = c("Alaska", "Arizona", "California", "Colorado", "Hawaii", "Idaho", "Montana",
"Nevada", "New Mexico", "Oregon", "Utah", "Washington", "Wyoming")
)
# Convert the list into a data frame for easier merging
regions_df <- stack(regions) %>%
rename(state = values, Region = ind)
# Assuming StateLevelData is your dataset and it has a column named 'state'
# Add a region column to your dataset based on the state
StateLevel_PrEP <- StateLevel_PrEP %>%
left_join(regions_df, by = "state")
# Now your StateLevelData has an additional column 'Region' indicating the region for each state
# Conducting ANOVA with Region as IV and value as DV
anova_result <- aov(value ~ Region, data = StateLevel_PrEP)
# Displaying the ANOVA table
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Region 3 728 242.82 2.727 0.0545 .
## Residuals 47 4185 89.03
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Install and load ggplot2 package if not already installed
if (!require("ggplot2")) install.packages("ggplot2")
## Loading required package: ggplot2
library(ggplot2)
# Creating the boxplot with means
ggplot(StateLevel_PrEP, aes(x = Region, y = value, fill = Region)) +
geom_boxplot() + # Creates boxplot
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, color = "black", fill = "white") + # Adds mean points
labs(title = "Boxplot of Value by Region", x = "Region", y = "Value") +
theme_minimal() + # Uses a minimal theme for the plot
theme(legend.position = "none") # Removes the legend since color distinguishes the regions

# Load necessary libraries
if (!require("dplyr")) install.packages("dplyr")
if (!require("knitr")) install.packages("knitr")
## Loading required package: knitr
if (!require("kableExtra")) install.packages("kableExtra")
## Loading required package: kableExtra
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(dplyr)
library(knitr)
library(kableExtra)
# Calculate summary statistics by Region
summary_table <- StateLevel_PrEP %>%
group_by(Region) %>%
summarise(
Mean_Value = mean(value, na.rm = TRUE),
Median_Value = median(value, na.rm = TRUE),
Min_Value = min(value, na.rm = TRUE),
Max_Value = max(value, na.rm = TRUE),
n = n()
) %>%
arrange(Region) # Optional: arrange by Region alphabetically or by another variable
# Format and print the table
kable(summary_table, caption = "Summary Statistics of Values by Region", format = "html", digits = 2, align = 'c') %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Summary Statistics of Values by Region
|
Region
|
Mean_Value
|
Median_Value
|
Min_Value
|
Max_Value
|
n
|
|
Northeast
|
36.17
|
35.80
|
22.9
|
54.2
|
9
|
|
Midwest
|
26.08
|
23.25
|
16.1
|
45.5
|
12
|
|
South
|
26.44
|
25.00
|
13.4
|
52.8
|
17
|
|
West
|
26.20
|
24.70
|
15.2
|
47.6
|
13
|