I started by reading in the fluoride and arsenic files and loading packages into the library
library(knitr)
library(ggvis)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(shiny)
fluoride_raw_data <- read.csv("fluoride.csv", header = TRUE, stringsAsFactors = FALSE)
arsenic_raw_data <- read.csv("arsenic.csv", header = TRUE, stringsAsFactors = FALSE)
I renamed the n_wells_tested, percent_wells_above_guideline and maximum column names
fluoride_raw_data <- fluoride_raw_data %>% rename(wells_fluoride = n_wells_tested) %>% rename(percent_above = percent_wells_above_guideline) %>% rename(fluoride_max = maximum)
arsenic_raw_data <- arsenic_raw_data %>% rename(wells_arsenic = n_wells_tested) %>% rename(percent_above = percent_wells_above_guideline) %>% rename(arsenic_max = maximum)
I selected wells and maximum data from raw data
fluoride_data <- fluoride_raw_data %>% arrange(location) %>% select(location, wells_fluoride, fluoride_max)
arsenic_data <- arsenic_raw_data %>% arrange(location) %>% select(location, wells_arsenic, arsenic_max)
I decided to inner join fluoride and arsenic data here. I found this made it easier to keep the data frames I created organized and keep the number of observations consistent.
fluoride_arsenic <- fluoride_data %>% inner_join(arsenic_data) %>% select(location, wells_fluoride, fluoride_max, wells_arsenic, arsenic_max)
## Joining, by = "location"
After joining the data, then I started cleaning up the information. I removed data where wells tested are 0
fluoride_arsenic <- fluoride_arsenic %>% filter(fluoride_arsenic$wells_fluoride > 0)
fluoride_arsenic <- fluoride_arsenic %>% filter(fluoride_arsenic$wells_arsenic > 0)
I was interested in what the maximum arsenic levels are for fluoride data < 0.7. I found that it is healthy to have a fluoride level of above 0.7 for dental health on Maine.gov Maine.gov
lo_fluoride <- fluoride_arsenic[fluoride_arsenic$fluoride_max < 0.7, ] %>% arrange(fluoride_max) %>% na.omit
I then found the top 10 arsenic levels for fluoride < 0.7
lo_fluoride_hi_arsenic <- lo_fluoride[lo_fluoride$arsenic_max >= 10, ] %>% arrange(desc(arsenic_max)) %>% top_n(10) %>% select(location, fluoride_max, arsenic_max)
## Selecting by arsenic_max
lo_fluoride_hi_arsenic
## location fluoride_max arsenic_max
## 1 Seboeis Plt 0.50 400
## 2 Atkinson 0.23 290
## 3 Beaver Cove 0.20 140
## 4 Pembroke 0.50 120
## 5 Unity 0.62 120
## 6 Eustis 0.50 110
## 7 Dennysville 0.53 110
## 8 T1 R9 WELS 0.20 89
## 9 Bradford 0.46 84
## 10 Levant 0.30 76
## 11 Alton 0.48 76
I also wanted to visualize fluoride versus arsenic maximum data.
#plot of fluoride and arsenic maximums
fluoride_arsenic %>% ggvis(~fluoride_max, ~arsenic_max) %>% layer_points()
#plot of low fluoride and arsenic maximums
lo_fluoride %>% ggvis(~fluoride_max, ~arsenic_max) %>% layer_points()
# Create columns for exposure > 10 and exposure > 50. I also omitted NA values here.
arsenic_data <- arsenic_data %>% mutate(exp_10 = arsenic_max >= 10) %>% mutate(exp_50 = arsenic_max >= 50) %>% arrange(desc(arsenic_max)) %>% na.omit
# Create a data frame with the count of towns with exposure > 10 ug/L and > 50 ug/L
arsenic_exp <- arsenic_data %>% count(exp_10, exp_50)
# Rename columns
colnames(arsenic_exp) <- c("exp_10", "exp_50", "Locations")
# Rename rows
rownames(arsenic_exp) <- c("< 10", "Difference", ">= 50")
## Warning: Setting row names on a tibble is deprecated.
arsenic_exp
## Source: local data frame [3 x 3]
## Groups: exp_10 [?]
##
## exp_10 exp_50 Locations
## * <lgl> <lgl> <int>
## 1 FALSE FALSE 183
## 2 TRUE FALSE 208
## 3 TRUE TRUE 162
There are 181 observations less than the exposure limit of 10 and 162 observations at or above the old exposure limit of 50 ug/L. If the well testing results were the same in 2001, when the exposure limit changed, an additional 208 locations would have had well maximums above the exposure limit of 10 ug/L