library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(tmap)
library(tmaptools)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at /Users/sylviawen/Desktop/GIS coursework
library(tmap)
library(sp)
library(dplyr)
# Specify the path to your desired folder
setwd("~/Desktop/GIS Coursework/")
# import the shapefile of England Local Authority Districts
shp<-st_read(here::here('data',
'Local_Authority_Districts__May_2020__Boundaries_UK_BFE-shp',
'Local_Authority_Districts__May_2020__Boundaries_UK_BFE.shp'))
## Reading layer `Local_Authority_Districts__May_2020__Boundaries_UK_BFE' from data source `/Users/sylviawen/Desktop/GIS coursework/data/Local_Authority_Districts__May_2020__Boundaries_UK_BFE-shp/Local_Authority_Districts__May_2020__Boundaries_UK_BFE.shp' using driver `ESRI Shapefile'
## Simple feature collection with 379 features and 10 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -116.1928 ymin: 5333.81 xmax: 655989 ymax: 1220302
## projected CRS: OSGB 1936 / British National Grid
# import dataset
all_in_data<-read_csv(here::here('data','all-in-data(2).csv'))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Area Code` = col_character(),
## AreaName = col_character(),
## `Prevalence of Diabetes (%)` = col_double(),
## `Prevalence of Overweight and Obesity(%)` = col_double(),
## `Inactive Rate(%)` = col_double()
## )
#Convert the prevalence to numeric number
as.numeric(all_in_data[3,4,5])
## [1] 68.77986
# Present cleansed data set
all_in_data
## # A tibble: 314 x 5
## `Area Code` AreaName `Prevalence of Di… `Prevalence of Ov… `Inactive Rate(…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 E06000001 Hartlepool 6.7 67.5 31.8
## 2 E06000002 Middlesbr… 6.6 67.7 27.3
## 3 E06000003 Redcar an… 7.4 68.8 26.0
## 4 E06000004 Stockton-… 6.6 65.3 32.8
## 5 E06000005 Darlington 7.4 71.7 26.1
## 6 E06000006 Halton 8.1 70.5 28.2
## 7 E06000007 Warrington 6.8 64.6 26.1
## 8 E06000008 Blackburn… 8.1 61.4 28.8
## 9 E06000009 Blackpool 7.7 68.6 29.4
## 10 E06000010 Kingston … 7.1 65.8 28.7
## # … with 304 more rows
In this step I import a shapefile of Local Authority Districts of England. The shapefile has the region divided into 314 areas in England. All-in-data is a cleansed data set contains Diabetes prevalence, Overweight & Obesity rate and low level of Physical Activity rate. The original data sets are provided in github, please refer to the reference in my assignment for the data source.
Data sets could be found in my Github link: https://github.com/wensylvia/CASA0005-GIS
Note:It could take quite a long while to plot the following maps. #### When Rmarkdown>knit to HTML executes code for map output, there may be a slight deviation between the position of title and legend. If you need to access the image, please copy the code to RStudio for execution, and then use the zoom function to zoom in or out. Saved maps and correlation diagrams are also available on my Github > ‘Maps and Graphs’ folder.
new_shp_diabetes<-merge(shp,all_in_data,by.x='lad20cd',by.y='Area Code')
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(new_shp_diabetes) +
tm_polygons("Prevalence of Diabetes (%)", #select column
style='fixed',
breaks = c(0,2.5,5,7.5,10), #Set up suitable breaks for this data set to better observe the result
palette="Purples")+ #with your preferred colour plate
tm_legend(show=TRUE, position=c("left", "top"))+
tm_layout(frame=FALSE)+
tm_credits("Diabetes Rate", position=c("right","top"), size=1)
new_shp_owob<-merge(shp,all_in_data,by.x='lad20cd',by.y='Area Code')
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(new_shp_owob) +
tm_polygons("Prevalence of Overweight and Obesity(%)", #select column
style='fixed',
breaks = c(40,45,50,55,60,65,70,75),
palette="GnBu")+
tm_legend(show=TRUE, position=c("left", "top"))+
tm_layout(frame=FALSE)+
tm_credits("Overweight&Obesity Rate", position=c("right","top"), size=1)
inactivate<-read_csv(here::here('data','all-in-data(2).csv'))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Area Code` = col_character(),
## AreaName = col_character(),
## `Prevalence of Diabetes (%)` = col_double(),
## `Prevalence of Overweight and Obesity(%)` = col_double(),
## `Inactive Rate(%)` = col_double()
## )
inactivate
## # A tibble: 314 x 5
## `Area Code` AreaName `Prevalence of Di… `Prevalence of Ov… `Inactive Rate(…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 E06000001 Hartlepool 6.7 67.5 31.8
## 2 E06000002 Middlesbr… 6.6 67.7 27.3
## 3 E06000003 Redcar an… 7.4 68.8 26.0
## 4 E06000004 Stockton-… 6.6 65.3 32.8
## 5 E06000005 Darlington 7.4 71.7 26.1
## 6 E06000006 Halton 8.1 70.5 28.2
## 7 E06000007 Warrington 6.8 64.6 26.1
## 8 E06000008 Blackburn… 8.1 61.4 28.8
## 9 E06000009 Blackpool 7.7 68.6 29.4
## 10 E06000010 Kingston … 7.1 65.8 28.7
## # … with 304 more rows
new_shp_inactive<-merge(shp,inactivate,by.x='lad20cd',by.y='Area Code')
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(new_shp_inactive) +
tm_polygons("Inactive Rate(%)", #select column
style='fixed',
breaks = c(0,10,15,20,25,30,35,40),
palette="-RdYlBu")+
tm_legend(show=TRUE, position=c("right", "top"))+
tm_layout(frame=FALSE)+
tm_credits("Inactive Rate", position=c(0,0.9), size=1)
Now we are going to merge all variables in one map. #### Note: Its a patient warning!! It could take a very long while to plot this Choropleth + Bubble Map.
#Prevalence of Inactive mapping(this is our base map)
inactivate<-read_csv(here::here('data','all-in-data(2).csv'))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Area Code` = col_character(),
## AreaName = col_character(),
## `Prevalence of Diabetes (%)` = col_double(),
## `Prevalence of Overweight and Obesity(%)` = col_double(),
## `Inactive Rate(%)` = col_double()
## )
new_shp_inactive<-merge(shp,inactivate,by.x='lad20cd',by.y='Area Code')
#this is for bubble map
all<-read_csv(here::here('data','all-in-data(2).csv'))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Area Code` = col_character(),
## AreaName = col_character(),
## `Prevalence of Diabetes (%)` = col_double(),
## `Prevalence of Overweight and Obesity(%)` = col_double(),
## `Inactive Rate(%)` = col_double()
## )
all
## # A tibble: 314 x 5
## `Area Code` AreaName `Prevalence of Di… `Prevalence of Ov… `Inactive Rate(…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 E06000001 Hartlepool 6.7 67.5 31.8
## 2 E06000002 Middlesbr… 6.6 67.7 27.3
## 3 E06000003 Redcar an… 7.4 68.8 26.0
## 4 E06000004 Stockton-… 6.6 65.3 32.8
## 5 E06000005 Darlington 7.4 71.7 26.1
## 6 E06000006 Halton 8.1 70.5 28.2
## 7 E06000007 Warrington 6.8 64.6 26.1
## 8 E06000008 Blackburn… 8.1 61.4 28.8
## 9 E06000009 Blackpool 7.7 68.6 29.4
## 10 E06000010 Kingston … 7.1 65.8 28.7
## # … with 304 more rows
new_shp_ggg<-merge(shp,all,by.x='lad20cd',by.y='Area Code')
#ready to plot multi-layer map
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(new_shp_inactive) +
tm_polygons("Inactive Rate(%)", #select column
style='fixed',
breaks = c(0,10,15,20,25,30,35,40),
palette="PuBu")+
tm_legend(show=TRUE, title.size = 1.2,position=c("left", "top"))+
tm_layout(frame=FALSE)+
tm_shape(new_shp_ggg) + #adding bubble map layer here
tm_bubbles(size="Prevalence of Diabetes (%)",
col="Prevalence of Overweight and Obesity(%)",
style='fixed',
breaks= c(40,45,50,55,60,65,70,75),
size.lim=c(0,10),
palette="-RdYlBu")+
tm_legend(show=TRUE,title.size = 1.2,position=c("left","top"))+
tm_layout(frame=FALSE)+
tm_credits("Choropleth+Bubble Map ", position=c("right","bottom"), size=1.5)
#### Please note that the layer is added in a “cumulative manner”. Therefore, we first import a base map(here I used PA maps), and then create a new bubble map(with two diseases info) on top of it. #### The first part ends here, we now have a multi-layer visual map~
To see if our variables are related let’s run some correlation.
We need some more libraries in order to run the correlation and plot the result.
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(htmlwidgets)
Read csv and assign each column to a corresponding variable.
all_data<-read_csv(here::here('data','all-in-data(1).csv'))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Area Code` = col_character(),
## AreaName = col_character(),
## `Prevalence of Diabetes` = col_double(),
## `Prevalence of Overweight and Obesity` = col_double(),
## `Active Rate` = col_double(),
## `Fairly Active Rate` = col_double(),
## `Inactive Rate` = col_double()
## )
diabetes_1=all_data$`Prevalence of Diabetes`
ow_obesity=all_data$`Prevalence of Overweight and Obesity`
inactivate_rate=all_data$`Inactive Rate`
The reason why we import another ‘all-in-data(1)’ is because the all-in-data (2) which have applied earlier, has been multiplies the original prevalence (%) by 100 for the sake of the aesthetics of the maps. This all-in-data (1) is the original percentage of data.
dia_owob <- ggplot(data=NULL,
aes(x=diabetes_1, y=ow_obesity)) +
geom_hex(bins=25, na.rm=TRUE) +# This parameter can be adjusted
scale_fill_gradient(low="lightsalmon", high="navy")+
labs(fill = "Count per bin")+
ggtitle("Relationship between Diabetes and Overweight & Obesity")+
xlab("Diabetes rate") + ylab("Overweight and Obesity rate")+
geom_smooth(method='lm', se=FALSE, size=0.6)+
theme_bw()
ggplotly(dia_owob)
## `geom_smooth()` using formula 'y ~ x'
dia_inactive <- ggplot(data=NULL,
aes(x=diabetes_1, y=inactivate_rate)) +
geom_hex(bins=25, na.rm=TRUE) +
scale_fill_gradient(low="#edae49", high="navy")+
labs(fill = "Count per bin")+
ggtitle("Relationship between Diabetes and low level of PA")+
xlab("Diadetes rate") + ylab("Inactive Rate")+
geom_smooth(method='lm', se=FALSE, size=0.6)+
theme_bw()
ggplotly(dia_inactive)
## `geom_smooth()` using formula 'y ~ x'
owob_inactive <- ggplot(data=NULL,
aes(x=ow_obesity, y=inactivate_rate)) +
geom_hex(bins=25, na.rm=TRUE) +
scale_fill_gradient(low="#66a182", high="navy")+
labs(fill = "Count per bin")+
ggtitle("Relationship between Overweight & Obesity and low level of PA")+
xlab("Overweight and Obesity rate") + ylab("Inactive Rate")+
geom_smooth(method='lm', se=FALSE, size=0.6)+
theme_bw()
ggplotly(owob_inactive)
## `geom_smooth()` using formula 'y ~ x'
Now let’s perform Pearson’s correlation to see if the P-value match with confidence level(95%) applied in the article. If the p-value is low (in this article will be <0.05), then the correlation is “statistically significant”.
test_dowob <- cor.test(diabetes_1, ow_obesity)
test_dowob
##
## Pearson's product-moment correlation
##
## data: diabetes_1 and ow_obesity
## t = 10.971, df = 312, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4428031 0.6030910
## sample estimates:
## cor
## 0.5276275
test_dinactive <- cor.test(diabetes_1, inactivate_rate)
test_dinactive
##
## Pearson's product-moment correlation
##
## data: diabetes_1 and inactivate_rate
## t = 14.274, df = 312, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5565576 0.6911273
## sample estimates:
## cor
## 0.6285233
test_owobinactive <- cor.test(ow_obesity, inactivate_rate)
test_owobinactive
##
## Pearson's product-moment correlation
##
## data: ow_obesity and inactivate_rate
## t = 8.304, df = 312, p-value = 3.099e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3303215 0.5120230
## sample estimates:
## cor
## 0.4254505