#############PROBLEM SET 3#######################
####LOAD LIBRARY
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
library(rmarkdown)
## Warning: package 'rmarkdown' was built under R version 4.4.3
##Load in the new file##
Arsenic=read.csv("C:/Users/david/OneDrive/bgsclassfolder/GBE/Arsenic in Well-Water.csv")
#create df with mean values of arsenic data by cities in new hampshire clean
#data that has no town assigned to it.
arsenic_avg <- Arsenic %>%
group_by(City) %>%
summarize(mean_arsenic = mean(Arsenic..ug.l., na.rm = TRUE))
#Cleaning
arsenic_avg[arsenic_avg == ""] <- NA
arsenic_avg[arsenic_avg== "null"]<- NA
arsenic_avg=na.omit(arsenic_avg)
#Extract the 20 towns with the highest arsenic levels to use for plotting
top20 <- arsenic_avg %>%
arrange(desc(mean_arsenic)) %>%
slice_head(n = 20)
You can also embed plots, for example:
#Create Bar Chart to display findings
ggplot(top20, aes(x = reorder(City, mean_arsenic), y = mean_arsenic)) +
geom_col(fill = "seagreen") +
xlab("City") +
ylab("Average Arsenic (µg/L)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Filter Application Question and prepare to show significance on graph
Filter_avg <- Arsenic %>%
group_by(Type.of.filtration.system) %>%
summarize(
mean_arsenic = mean(Arsenic..ug.l., na.rm = TRUE),
sd = sd(Arsenic..ug.l., na.rm = TRUE),
n = n(),
se = sd / sqrt(n)
)
###Create bar chart to display findings
ggplot(Filter_avg, aes(x = Type.of.filtration.system, y = mean_arsenic)) +
geom_col(fill = "seagreen") +
geom_errorbar(aes(ymin = mean_arsenic - se,
ymax = mean_arsenic + se),
width = 0.2) +
xlab("") +
ylab("Average Arsenic (µg/L)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
####Public versus Well water?
Public_avg<-Arsenic %>%
group_by(Well.type) %>%
summarize(
mean_arsenic = mean(Arsenic..ug.l., na.rm = TRUE),
sd = sd(Arsenic..ug.l., na.rm = TRUE),
n = n(),
se = sd / sqrt(n)
)
ggplot(Public_avg, aes(x = Well.type, y = mean_arsenic)) +
geom_col(fill = "seagreen") +
geom_errorbar(aes(ymin = mean_arsenic - se,
ymax = mean_arsenic + se),
width = 0.2) +
xlab("") +
ylab("Average Arsenic (µg/L)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
annotate(
"text",
x = Inf,
y = Inf,
label = "ANOVA, p = 0.045",
hjust = 1.1,
vjust = 1.5,
size = 5
) +
coord_cartesian(
ylim = c(0, max(Public_avg$mean_arsenic + Public_avg$se, na.rm = TRUE) * 1.2),
clip = "off"
)