R packages required listed below.
library(maps)
library(mapdata)
library(plyr)
library(graphics)
library(ggplot2)
library(ggmap)
library(plotGoogleMaps)
## Loading required package: sp
## Loading required package: spacetime
library(stats)
library(reshape2)
library(maptools)
## Checking rgeos availability: TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(scales)
Setting the directory location.
dir<-"C:/Homework [DC]/Groundwater_Proj"
setwd(dir)
Data was downloaded from http://www.bgs.ac.uk/research/groundwater/health/arsenic/Bangladesh/data.html as “DPHE/BGS National Hydrochemical Survey”, describing various groundwater quality parameters for several thousand wells measured by the British Geological Survey throughout Bangladesh in 1998-1999. Subsets of existing .csv data created for the administrative division and district (zila) of Dhaka, most populous in the country.
gwbang <- read.csv("NationalSurveyData (1).csv",header=TRUE,skip=4,sep=",")
dhaka <- subset(gwbang,subset=(DIVISION == "Dhaka"))
dhakadis <- subset(dhaka,subset=(DISTRICT == "Dhaka"))
Simple statistics for well data as a whole - mean, median, range obtained through the summary() function.
gwbang$As <- as.numeric(gwbang$As)
mean(gwbang$As)
## [1] 229.1186
summary(gwbang$As)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 1.0 88.0 229.1 413.0 908.0
I will firstly create a simple histogram of the frequency with which various levels of groundwater arsenic occur throughout Bangladesh. The national water quality standard of 50 ug/L As is indicated by the red line; this threshold has in 2010 been confirmed by JECFA (FAO/WHO) to produce adverse health effects if exceeded.
gwbang$As <- as.numeric(gwbang$As) #factor gwbang$As must be converted to class numeric
hist(gwbang$As,breaks=35,main="Arsenic Contamination of Groundwater in Bangladesh",xlab="Arsenic Con. (ug/L)",ylab="Number Wells",col="lightblue")
abline(v=50,col="red",lty=2)
While a majority of wells have “safe” levels of arsenic, there is a substantial amount and wide range of contaminated wells, with several extreme sites nearing 1 mg/L As.
To provide an overview of geographic distribution, I colorimetrically plot all measured sites (>850 ug/L arbitrarily used to indicate extreme values) by their GPS coordinates on a map:
extremeAs <- subset(gwbang,subset=(As >= 850))
map <- get_map(location='Bangladesh', zoom=7)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Bangladesh&zoom=7&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Bangladesh&sensor=false
mapPoints <- ggmap(map) + geom_point(aes(x=gwbang$LONG_DEG,y=gwbang$LAT_DEG,size=2,col=(gwbang$As)),data=gwbang) + geom_point(aes(x=extremeAs$LONG_DEG,y=extremeAs$LAT_DEG),size=4,colour="#990000",data=extremeAs)
mapPoints
Points (wells) of high arsenic contamination seem to roughly correlate to paths of major rivers, but more rigorous/direct analyses are required.
I now attempt to create a choropleth using averaged levels for each of 64 districts in Bangladesh. Code based on tutorial found in http://bl.ocks.org/prabhasp/raw/5030005/; map shapefile originally obtained from http://gadm.org/download.
download.file("http://biogeo.ucdavis.edu/data/gadm2/shp/BGD_adm.zip",destfile="BGD_adm.zip")
unzip("BGD_adm.zip",exdir=getwd())
bang_map <- readShapeSpatial("BGD_adm3.shp")
bang_map <- fortify(bang_map,region="NAME_3")
gwbang$As <- as.numeric(gwbang$As)
district_means <- ddply(gwbang,.(DISTRICT),summarize,arsmean=mean(As)) # ddply splits a data frame, applies a function (here averaging), and returns data frames - how convenient
bang_map$id <- as.factor(bang_map$id)
ggplot() + geom_map(data = district_means, aes(map_id = DISTRICT, fill = arsmean),map = bang_map) + expand_limits(x = bang_map$long, y = bang_map$lat)+ scale_fill_gradient2(low = muted("red"),mid = "white",midpoint = 300,high = muted("blue"),limits=c(0,600),na.value="grey50")
Names for certain districts from the map shapefile do not match those from gwbang (e.g. “Rongpur” instead of “Rangpur”) - hence the empty spaces.
namesinData <- levels(gwbang$DISTRICT)
namesinMap <- levels(bang_map$id)
namesinData[which(!namesinData %in% namesinMap)] #gives values of namesinData that do not exist in namesinMap
## [1] "Barguna" "Brahamanbaria" "Chuadanga" "Gaibandha"
## [5] "Gopalganj" "Habiganj" "Kushtia" "Manikganj"
## [9] "Maulvibazar" "Munshiganj" "Mymensingh" "Narayanganj"
## [13] "Narsingdi" "Netrokona" "Rangpur" "Satkhira"
## [17] "Sirajganj" "Sunamganj"
namesinMap[which(!namesinMap %in% namesinData)]
## [1] "Bandarbon" "Borgona" "Brahmanbaria"
## [4] "Choua Danga" "Gaibanda" "Gopalgonj"
## [7] "Hobiganj" "Khagrachari" "Kustia"
## [10] "Manikgonj" "Moulvibazar" "Munshigonj"
## [13] "Naray Angonj" "Narshingdi" "Nasirabad"
## [16] "Netrakona" "Parbattya Chattagram" "Rongpur"
## [19] "Shatkhira" "Sirajgonj" "Sun Amgonj"
Unfortunately, the map and data files also have different numbers of distinct districts. Probably not an effective use of R, but I will manually replace each name in the map file.
bang_map$id <- as.character(bang_map$id)
bang_map$id[bang_map$id == "Borgona"] <- "Barguna"
bang_map$id[bang_map$id == "Brahmanbaria"] <- "Brahamanbaria"
bang_map$id[bang_map$id == "Choua Danga"] <- "Chuadanga"
bang_map$id[bang_map$id == "Gaibanda"] <- "Gaibandha"
bang_map$id[bang_map$id == "Gopalgonj"] <- "Gopalganj"
bang_map$id[bang_map$id == "Hobiganj"] <- "Habiganj"
bang_map$id[bang_map$id == "Moulvibazar"] <- "Kushtia"
bang_map$id[bang_map$id == "Manikgonj"] <- "Maulvibazar"
bang_map$id[bang_map$id == "Munshigonj"] <- "Munshiganj"
bang_map$id[bang_map$id == "Naray Angonj"] <- "Narayanganj"
bang_map$id[bang_map$id == "Narshingdi"] <- "Narsingdi"
bang_map$id[bang_map$id == "Netrakona"] <- "Netrokona"
bang_map$id[bang_map$id == "Rongpur"] <- "Rangpur"
bang_map$id[bang_map$id == "Shatkhira"] <- "Satkhira"
bang_map$id[bang_map$id == "Sirajgonj"] <- "Sirajganj"
bang_map$id[bang_map$id == "Sun Amgonj"] <- "Sunamganj"
Now - simply rerunning previous code for the chloropleth.
bang_map$id <- as.factor(bang_map$id)
ggplot() + geom_map(data = district_means, aes(map_id = DISTRICT, fill = arsmean),map = bang_map) + expand_limits(x = bang_map$long, y = bang_map$lat) + scale_fill_gradient2(low = "lightblue",mid = "purple",midpoint = 250,high = "red",limits=c(0,500),na.value="grey50")
bang_map2 <- readShapeSpatial("BGD_adm3.shp")
geom_polygon(data=)
## geom_polygon:
## stat_identity:
## position_identity: (width = NULL, height = NULL)
Multiple districts are still missing due to lack of data in gwbang, but the map is now reasonably complete.