#############PROBLEM SET 3#######################
####LOAD LIBRARY
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
library(rmarkdown)
## Warning: package 'rmarkdown' was built under R version 4.4.3
##Load in the new file##

Arsenic=read.csv("C:/Users/david/OneDrive/bgsclassfolder/GBE/Arsenic in Well-Water.csv")

#create df with mean values of arsenic data by cities in new hampshire clean
#data that has no town assigned to it. 
arsenic_avg <- Arsenic %>%
  group_by(City) %>%
  summarize(mean_arsenic = mean(Arsenic..ug.l., na.rm = TRUE))
#Cleaning
arsenic_avg[arsenic_avg == ""] <- NA
arsenic_avg[arsenic_avg== "null"]<- NA
arsenic_avg=na.omit(arsenic_avg)
#Extract the 20 towns with the highest arsenic levels to use for plotting
top20 <- arsenic_avg %>%
  arrange(desc(mean_arsenic)) %>%
  slice_head(n = 20)

Including Plots

You can also embed plots, for example:

#Create Bar Chart to display findings
ggplot(top20, aes(x = reorder(City, mean_arsenic), y = mean_arsenic)) +
  geom_col(fill = "seagreen") +
  xlab("City") +
  ylab("Average Arsenic (µg/L)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Filter Application Question and prepare to show significance on graph
Filter_avg <- Arsenic %>%
  group_by(Type.of.filtration.system) %>%
  summarize(
    mean_arsenic = mean(Arsenic..ug.l., na.rm = TRUE),
    sd = sd(Arsenic..ug.l., na.rm = TRUE),
    n = n(),
    se = sd / sqrt(n)
  )
###Create bar chart to display findings
ggplot(Filter_avg, aes(x = Type.of.filtration.system, y = mean_arsenic)) +
  geom_col(fill = "seagreen") +
  geom_errorbar(aes(ymin = mean_arsenic - se,
                    ymax = mean_arsenic + se),
                width = 0.2) +
  xlab("") +
  ylab("Average Arsenic (µg/L)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

####Public versus Well water? 
Public_avg<-Arsenic %>%
  group_by(Well.type) %>%
  summarize(
    mean_arsenic = mean(Arsenic..ug.l., na.rm = TRUE),
    sd = sd(Arsenic..ug.l., na.rm = TRUE),
    n = n(),
    se = sd / sqrt(n)
  )
ggplot(Public_avg, aes(x = Well.type, y = mean_arsenic)) +
  geom_col(fill = "seagreen") +
  geom_errorbar(aes(ymin = mean_arsenic - se,
                    ymax = mean_arsenic + se),
                width = 0.2) +
  xlab("") +
  ylab("Average Arsenic (µg/L)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  
  annotate(
    "text",
    x = Inf,
    y = Inf,
    label = "ANOVA, p = 0.045",
    hjust = 1.1,
    vjust = 1.5,
    size = 5
  ) +
  
  coord_cartesian(
    ylim = c(0, max(Public_avg$mean_arsenic + Public_avg$se, na.rm = TRUE) * 1.2),
    clip = "off"
  )