R packages required listed below.

library(maps)
library(mapdata)
library(plyr)
library(graphics)
library(ggplot2)
library(ggmap)
library(plotGoogleMaps)
## Loading required package: sp
## Loading required package: spacetime
library(stats)
library(reshape2)
library(maptools)
## Checking rgeos availability: TRUE
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(scales)

Setting the directory location.

dir<-"C:/Homework [DC]/Groundwater_Proj"
setwd(dir)

Data was downloaded from http://www.bgs.ac.uk/research/groundwater/health/arsenic/Bangladesh/data.html as “DPHE/BGS National Hydrochemical Survey”, describing various groundwater quality parameters for several thousand wells measured by the British Geological Survey throughout Bangladesh in 1998-1999. Subsets of existing .csv data created for the administrative division and district (zila) of Dhaka, most populous in the country.

gwbang <- read.csv("NationalSurveyData (1).csv",header=TRUE,skip=4,sep=",")
dhaka <- subset(gwbang,subset=(DIVISION == "Dhaka"))
dhakadis <- subset(dhaka,subset=(DISTRICT == "Dhaka"))

Simple statistics for well data as a whole - mean, median, range obtained through the summary() function.

gwbang$As <- as.numeric(gwbang$As)

mean(gwbang$As)
## [1] 229.1186
summary(gwbang$As)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     1.0    88.0   229.1   413.0   908.0

I will firstly create a simple histogram of the frequency with which various levels of groundwater arsenic occur throughout Bangladesh. The national water quality standard of 50 ug/L As is indicated by the red line; this threshold has in 2010 been confirmed by JECFA (FAO/WHO) to produce adverse health effects if exceeded.

gwbang$As <- as.numeric(gwbang$As) #factor gwbang$As must be converted to class numeric
hist(gwbang$As,breaks=35,main="Arsenic Contamination of Groundwater in Bangladesh",xlab="Arsenic Con. (ug/L)",ylab="Number Wells",col="lightblue")
abline(v=50,col="red",lty=2)

While a majority of wells have “safe” levels of arsenic, there is a substantial amount and wide range of contaminated wells, with several extreme sites nearing 1 mg/L As.

To provide an overview of geographic distribution, I colorimetrically plot all measured sites (>850 ug/L arbitrarily used to indicate extreme values) by their GPS coordinates on a map:

extremeAs <- subset(gwbang,subset=(As >= 850))
map <- get_map(location='Bangladesh', zoom=7)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Bangladesh&zoom=7&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Bangladesh&sensor=false
mapPoints <- ggmap(map) + geom_point(aes(x=gwbang$LONG_DEG,y=gwbang$LAT_DEG,size=2,col=(gwbang$As)),data=gwbang) + geom_point(aes(x=extremeAs$LONG_DEG,y=extremeAs$LAT_DEG),size=4,colour="#990000",data=extremeAs)

mapPoints

Points (wells) of high arsenic contamination seem to roughly correlate to paths of major rivers, but more rigorous/direct analyses are required.

I now attempt to create a choropleth using averaged levels for each of 64 districts in Bangladesh. Code based on tutorial found in http://bl.ocks.org/prabhasp/raw/5030005/; map shapefile originally obtained from http://gadm.org/download.

download.file("http://biogeo.ucdavis.edu/data/gadm2/shp/BGD_adm.zip",destfile="BGD_adm.zip")  
unzip("BGD_adm.zip",exdir=getwd())  

bang_map <- readShapeSpatial("BGD_adm3.shp")
bang_map <- fortify(bang_map,region="NAME_3")
gwbang$As <- as.numeric(gwbang$As)

district_means <- ddply(gwbang,.(DISTRICT),summarize,arsmean=mean(As)) # ddply splits a data frame, applies a function (here averaging), and returns data frames - how convenient

bang_map$id <- as.factor(bang_map$id)

ggplot() + geom_map(data = district_means, aes(map_id = DISTRICT, fill = arsmean),map = bang_map) + expand_limits(x = bang_map$long, y = bang_map$lat)+ scale_fill_gradient2(low = muted("red"),mid = "white",midpoint = 300,high = muted("blue"),limits=c(0,600),na.value="grey50")

Names for certain districts from the map shapefile do not match those from gwbang (e.g. “Rongpur” instead of “Rangpur”) - hence the empty spaces.

namesinData <- levels(gwbang$DISTRICT)
namesinMap <- levels(bang_map$id)
namesinData[which(!namesinData %in% namesinMap)] #gives values of namesinData that do not exist in namesinMap
##  [1] "Barguna"       "Brahamanbaria" "Chuadanga"     "Gaibandha"    
##  [5] "Gopalganj"     "Habiganj"      "Kushtia"       "Manikganj"    
##  [9] "Maulvibazar"   "Munshiganj"    "Mymensingh"    "Narayanganj"  
## [13] "Narsingdi"     "Netrokona"     "Rangpur"       "Satkhira"     
## [17] "Sirajganj"     "Sunamganj"
namesinMap[which(!namesinMap %in% namesinData)]
##  [1] "Bandarbon"            "Borgona"              "Brahmanbaria"        
##  [4] "Choua Danga"          "Gaibanda"             "Gopalgonj"           
##  [7] "Hobiganj"             "Khagrachari"          "Kustia"              
## [10] "Manikgonj"            "Moulvibazar"          "Munshigonj"          
## [13] "Naray Angonj"         "Narshingdi"           "Nasirabad"           
## [16] "Netrakona"            "Parbattya Chattagram" "Rongpur"             
## [19] "Shatkhira"            "Sirajgonj"            "Sun Amgonj"

Unfortunately, the map and data files also have different numbers of distinct districts. Probably not an effective use of R, but I will manually replace each name in the map file.

bang_map$id <- as.character(bang_map$id)
bang_map$id[bang_map$id == "Borgona"] <- "Barguna"
bang_map$id[bang_map$id == "Brahmanbaria"] <- "Brahamanbaria"
bang_map$id[bang_map$id == "Choua Danga"] <- "Chuadanga"
bang_map$id[bang_map$id == "Gaibanda"] <- "Gaibandha"
bang_map$id[bang_map$id == "Gopalgonj"] <- "Gopalganj"
bang_map$id[bang_map$id == "Hobiganj"] <- "Habiganj"
bang_map$id[bang_map$id == "Moulvibazar"] <- "Kushtia"
bang_map$id[bang_map$id == "Manikgonj"] <- "Maulvibazar"
bang_map$id[bang_map$id == "Munshigonj"] <- "Munshiganj"
bang_map$id[bang_map$id == "Naray Angonj"] <- "Narayanganj"
bang_map$id[bang_map$id == "Narshingdi"] <- "Narsingdi"
bang_map$id[bang_map$id == "Netrakona"] <- "Netrokona"
bang_map$id[bang_map$id == "Rongpur"] <- "Rangpur"
bang_map$id[bang_map$id == "Shatkhira"] <- "Satkhira"
bang_map$id[bang_map$id == "Sirajgonj"] <- "Sirajganj"
bang_map$id[bang_map$id == "Sun Amgonj"] <- "Sunamganj"

Now - simply rerunning previous code for the chloropleth.

bang_map$id <- as.factor(bang_map$id)

ggplot() + geom_map(data = district_means, aes(map_id = DISTRICT, fill = arsmean),map = bang_map) + expand_limits(x = bang_map$long, y = bang_map$lat) + scale_fill_gradient2(low = "lightblue",mid = "purple",midpoint = 250,high = "red",limits=c(0,500),na.value="grey50") 

bang_map2 <- readShapeSpatial("BGD_adm3.shp")

geom_polygon(data=)
## geom_polygon:  
## stat_identity:  
## position_identity: (width = NULL, height = NULL)

Multiple districts are still missing due to lack of data in gwbang, but the map is now reasonably complete.