The files flouride.csv and arsenic.csv contain the data collected by HETL and were downloaded from the Maine Tracking Network. The files contain fluoride and arsenic levels, by town, for private well water samples tested by HETL between the years 1999 and 2013.
# Load data
fluoride <- read.csv(url("http://jamessuleiman.com/teaching/datasets/fluoride.csv"), stringsAsFactors = FALSE)
arsenic <- read.csv(url("http://jamessuleiman.com/teaching/datasets/arsenic.csv"), stringsAsFactors = FALSE)
Loaded Packages
#Load packages
library("forcats", lib.loc="~/R/R-3.5.2/library")
library("ggplot2", lib.loc="~/R/R-3.5.2/library")
library("tidyverse", lib.loc="~/R/R-3.5.2/library")
library("yaml", lib.loc="~/R/R-3.5.2/library")
library("rmarkdown", lib.loc="~/R/R-3.5.2/library")
library(knitr)
Here is the data manipulation and calculation made during this examination.
#Remove entries with less the 20 wells tested
arsenic1 <- arsenic %>% filter(n_wells_tested > 19)
fluoride1 <- fluoride %>% filter(n_wells_tested > 19)
#Rename arsenic colunms
arsenic1 <- arsenic1 %>% rename(max_arse = maximum)
arsenic1 <- arsenic1 %>% rename(tests_arse = n_wells_tested)
arsenic1 <- arsenic1 %>% rename(pct_wells_above_arse_gl = percent_wells_above_guideline)
arsenic1 <- arsenic1 %>% rename(med_arse = median)
arsenic1 <- arsenic1 %>% rename(pct_95_arse = percentile_95)
#Rename fluoride colunms
fluoride1 <- fluoride1 %>% rename(tests_fluo = n_wells_tested)
fluoride1 <- fluoride1 %>% rename(pct_wells_above_fluo_gl = percent_wells_above_guideline)
fluoride1 <- fluoride1 %>% rename(med_fluo = median)
fluoride1 <- fluoride1 %>% rename(pct_95_fluo = percentile_95)
fluoride1 <- fluoride1 %>% rename(max_fluo = maximum)
#Combine Arsenic & Fluoride
arsenic1_fluoride1 <- arsenic1 %>% left_join(fluoride1, by = "location")
#Top 10 % Arsenic
arse_top_10 <- arsenic %>% top_n(10, percent_wells_above_guideline) %>% arrange(desc(percent_wells_above_guideline))
#Top 10 % Fluoride
fluo_top_10 <- fluoride %>% top_n(10, percent_wells_above_guideline) %>% arrange(desc(percent_wells_above_guideline))
#Various Calculations
sum_tested_arsenic <- sum(arsenic[["n_wells_tested"]])
sum_tested_fluoride <- sum(fluoride[["n_wells_tested"]])
arsenic_test_1 <- arsenic %>% filter(n_wells_tested > 0)
fluoride_test_1 <- fluoride %>% filter(n_wells_tested > 0)
sum_max_arsenic <- sum(arsenic[["maximum"]])
sum_max_arsenic <- sum(arsenic_test_1[["maximum"]])
sum_max_fluoride <- sum(fluoride_test_1[["maximum"]])
mean_arsenic_level <- round(sum_max_arsenic / sum_tested_arsenic, 3)
mean_fluoride_level <- round(sum_max_fluoride / sum_tested_fluoride, 3)
mean_fluoride_level <- round(sum_max_fluoride / sum_tested_fluoride, 6)
mean_fluoride_level <- round(sum_max_fluoride / sum_tested_fluoride, 3)
#Calculate the number of houses with arsenic levels above guideline
n_wells_above_guideline_arsenic <- arsenic1[2] * (arsenic1[3]/100)
n_wells_above_guideline_arsenic <- round(n_wells_above_guideline_arsenic, 0)
n_wells_above_arse_me <- sum(n_wells_above_guideline_arsenic)
#Calculate the number of houses with fluoride levels above guideline
n_wells_above_guideline_fluo <- fluoride1[2] * (fluoride1[3]/100)
n_wells_above_guideline_fluo <- round(n_wells_above_guideline_fluo, 0)
n_wells_above_fluo_me <- sum(n_wells_above_guideline_fluo)
#Find Towns with the percentage of contaminated wells above 10% for both Arsenic and Fluoride
both_high <- arsenic1_fluoride1 %>% filter(pct_wells_above_arse_gl > 10) %>% filter(pct_wells_above_fluo_gl > 10)
Having water that is safe to drink is critical to the health a safety of communities and households. There are many potential sources of contamination, including microorganisms, nitrates/nitrites, heavey metals, organic chemicals, radionuclides, and fluoride. The State of Maineโs Health and Environmental Testing Laboratory (HETL) compiled testing data on two such contamants, fluoride and arsenic.
The data show that of the wells tested the average arsenic level was 1.195mg/L and the average fluoride level was 0.03ug/L. The total number of wells above the arsenic limit is 4868 for towns with a minimum of 20 wells tested. The total number of wells above the fluoride level was 854 for towns with a minmum of 20 wells tested. The following is an examination of that data to determine which towns have the highest percentage of wells that exceeds the for either arsenic or fluoride and which have the highest percentages of both.
#Top 10 Arsenic
kable(arse_top_10 %>% select(location,n_wells_tested, percent_wells_above_guideline), caption="Top 10 Towns with Highest Arsenic Contamination Percentage")
| location | n_wells_tested | percent_wells_above_guideline |
|---|---|---|
| Manchester | 275 | 58.9 |
| Gorham | 467 | 50.1 |
| Columbia | 42 | 50.0 |
| Monmouth | 277 | 49.5 |
| Eliot | 73 | 49.3 |
| Columbia Falls | 25 | 48.0 |
| Winthrop | 424 | 44.8 |
| Hallowell | 65 | 44.6 |
| Buxton | 334 | 43.4 |
| Blue Hill | 241 | 42.7 |
#Top 10 Fluoride
kable(fluo_top_10 %>% select(location,n_wells_tested, percent_wells_above_guideline), caption="Top 10 Towns with Highest Fluoride Contamination Percentages")
| location | n_wells_tested | percent_wells_above_guideline |
|---|---|---|
| Otis | 60 | 30.0 |
| Dedham | 102 | 22.5 |
| Denmark | 46 | 19.6 |
| Surry | 175 | 18.3 |
| Prospect | 57 | 17.5 |
| Eastbrook | 31 | 16.1 |
| Mercer | 32 | 15.6 |
| Fryeburg | 52 | 15.4 |
| Brownfield | 33 | 15.2 |
| Stockton Springs | 56 | 14.3 |
#Town with both high
kable(both_high %>% select(location, pct_wells_above_arse_gl, pct_wells_above_fluo_gl), caption="All towns with the percentage of contaminated wells above 10% for both Arsenic and Fluoride")
| location | pct_wells_above_arse_gl | pct_wells_above_fluo_gl |
|---|---|---|
| Surry | 40.3 | 18.3 |
| Otis | 39.6 | 30.0 |
| Sedgwick | 37.3 | 11.2 |
| Mercer | 36.4 | 15.6 |
| Starks | 28.6 | 13.6 |
| Clifton | 19.4 | 14.0 |
| Franklin | 17.6 | 10.3 |
| Dedham | 17.5 | 22.5 |
| Stockton Springs | 15.9 | 14.3 |
| Smithfield | 14.6 | 10.1 |
| Kennebunk | 11.7 | 12.7 |
| Eastbrook | 10.7 | 16.1 |