get the obesity data from the data folder
rawData <- read_csv("data/EnglandObesity.csv",
na = c("", "NA", "n/a"),
col_names = TRUE,
locale = locale(encoding = 'Latin1'))
## Parsed with column specification:
## cols(
## `UTLA code` = col_character(),
## `Local Authority` = col_character(),
## EMPLOY = col_double(),
## UNEMPLOY = col_double(),
## ECO_INACT = col_double(),
## SMOK = col_double(),
## ALCOHOL = col_double(),
## PHY_ACT = col_double(),
## DAY5 = col_double(),
## OBESE = col_double()
## )
# rawData <- rawData %>%clean_names()
draw the pair plots to have a look at the distribution and roughly relationships of the data
ggpairs(select(rawData,c(3:10)))
draw the box plot to see the range, average and other information of the data
par(mfrow=c(1,2))
boxplot(select(rawData,c(3,7:9)))
boxplot(select(rawData,c(4:6,10)))
For the skewed-distribution attribute, use Tukey’s ladder to see how to transform them to be more like normal distributions
symbox(~ALCOHOL,
rawData,
na.rm=T,
powers=seq(-3,3,by=.5))
symbox(~EMPLOY,
rawData,
na.rm=T,
powers=seq(-3,3,by=.5))
symbox(~UNEMPLOY,
rawData,
na.rm=T,
powers=seq(-3,3,by=.5))
symbox(~ECO_INACT,
rawData,
na.rm=T,
powers=seq(-3,3,by=.5))
Take ALCOHOL for example to check the distribution of the transformed data by histogram
ggplot(rawData, aes(x=log(ALCOHOL))) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
use the transformed data to build the linear model. Get the information of the model coefficients and the model itself
tukeyData <- rawData %>%
mutate(ALCOHOL = log(ALCOHOL),
OBESE = OBESE,
EMPLOY = EMPLOY^2,
UNEMPLOY = log(UNEMPLOY),
DAY5 = DAY5,
SMOK = SMOK,
PHY_ACT = PHY_ACT,
ECO_INACT = log(ECO_INACT))
OLSModel <- lm(OBESE ~ SMOK +
ALCOHOL +
PHY_ACT +
EMPLOY +
UNEMPLOY +
ECO_INACT +
DAY5,
data = tukeyData)
tidy(OLSModel)
## # A tibble: 8 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 21.8 73.6 0.296 7.68e- 1
## 2 SMOK 0.178 0.107 1.67 9.72e- 2
## 3 ALCOHOL 3.32 1.51 2.20 2.96e- 2
## 4 PHY_ACT -0.513 0.0687 -7.46 7.97e-12
## 5 EMPLOY 0.00180 0.00455 0.396 6.93e- 1
## 6 UNEMPLOY 0.125 2.87 0.0435 9.65e- 1
## 7 ECO_INACT 1.43 14.3 0.0999 9.21e- 1
## 8 DAY5 0.00773 0.0618 0.125 9.01e- 1
glance(OLSModel)
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.504 0.480 2.94 20.7 5.70e-19 7 -370. 759. 786.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
#summary(OLSModel)
Use VIF and correlation matrix to see whether there is multicolinearity among the attributes. Firstly, remove the attribute whose VIF is the highest and larger than 10.
# Variance Inflation Factor
vif(OLSModel)
## SMOK ALCOHOL PHY_ACT EMPLOY UNEMPLOY ECO_INACT DAY5
## 1.939512 1.783955 2.224730 182.836535 19.575199 113.816333 2.058593
OLSModel <- lm(OBESE ~ SMOK +
ALCOHOL +
PHY_ACT +
# EMPLOY +
UNEMPLOY +
ECO_INACT +
DAY5,
data = tukeyData)
# residual error
s2 = sum(summary(OLSModel)$residuals^2)/143
# correlation matrix
Correlation_all<- rawData %>%
dplyr::select(c(3:10))%>%
correlate()
##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
rplot(Correlation_all)
## Don't know how to automatically pick scale for object of type noquote. Defaulting to continuous.
Correlation_all
## # A tibble: 8 x 9
## term EMPLOY UNEMPLOY ECO_INACT SMOK ALCOHOL PHY_ACT DAY5 OBESE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 EMPLOY NA -0.791 -0.967 -0.555 -0.460 0.584 0.612 -0.286
## 2 UNEMPLOY -0.791 NA 0.611 0.544 0.421 -0.581 -0.586 0.337
## 3 ECO_INACT -0.967 0.611 NA 0.493 0.422 -0.515 -0.553 0.233
## 4 SMOK -0.555 0.544 0.493 NA 0.576 -0.485 -0.498 0.411
## 5 ALCOHOL -0.460 0.421 0.422 0.576 NA -0.551 -0.351 0.486
## 6 PHY_ACT 0.584 -0.581 -0.515 -0.485 -0.551 NA 0.623 -0.666
## 7 DAY5 0.612 -0.586 -0.553 -0.498 -0.351 0.623 NA -0.374
## 8 OBESE -0.286 0.337 0.233 0.411 0.486 -0.666 -0.374 NA
Repeat the step of deleting the highest VIF until all the value is within 10. Then calculate the AIC and BIC of the model without the removed attributes, and search for the most suitable model
OLSModel <- lm(OBESE ~ SMOK +
ALCOHOL +
PHY_ACT +
# EMPLOY +
UNEMPLOY +
ECO_INACT +
DAY5,
data = tukeyData)
tidy(OLSModel)
## # A tibble: 7 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 50.7 9.78 5.18 7.38e- 7
## 2 SMOK 0.180 0.107 1.69 9.36e- 2
## 3 ALCOHOL 3.31 1.51 2.20 2.93e- 2
## 4 PHY_ACT -0.514 0.0684 -7.52 5.66e-12
## 5 UNEMPLOY -0.947 0.937 -1.01 3.14e- 1
## 6 ECO_INACT -4.20 1.81 -2.32 2.19e- 2
## 7 DAY5 0.00931 0.0615 0.151 8.80e- 1
glance(OLSModel)
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.504 0.483 2.93 24.2 1.19e-19 6 -371. 757. 781.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
vif(OLSModel)
## SMOK ALCOHOL PHY_ACT UNEMPLOY ECO_INACT DAY5
## 1.937520 1.783879 2.217240 2.100975 1.826710 2.050030
# BIC
step(OLSModel, direction = "both", k=log(nrow(tukeyData)))
## Start: AIC=350.55
## OBESE ~ SMOK + ALCOHOL + PHY_ACT + UNEMPLOY + ECO_INACT + DAY5
##
## Df Sum of Sq RSS AIC
## - DAY5 1 0.20 1229.0 345.56
## - UNEMPLOY 1 8.79 1237.6 346.60
## - SMOK 1 24.49 1253.3 348.49
## - ALCOHOL 1 41.66 1270.4 350.54
## <none> 1228.8 350.55
## - ECO_INACT 1 46.12 1274.9 351.06
## - PHY_ACT 1 485.37 1714.2 395.47
##
## Step: AIC=345.56
## OBESE ~ SMOK + ALCOHOL + PHY_ACT + UNEMPLOY + ECO_INACT
##
## Df Sum of Sq RSS AIC
## - UNEMPLOY 1 9.78 1238.8 341.74
## - SMOK 1 24.51 1253.5 343.51
## <none> 1229.0 345.56
## - ALCOHOL 1 43.57 1272.5 345.77
## - ECO_INACT 1 49.18 1278.2 346.43
## + DAY5 1 0.20 1228.8 350.55
## - PHY_ACT 1 559.11 1788.1 396.79
##
## Step: AIC=341.74
## OBESE ~ SMOK + ALCOHOL + PHY_ACT + ECO_INACT
##
## Df Sum of Sq RSS AIC
## - SMOK 1 17.68 1256.4 338.85
## <none> 1238.8 341.74
## - ALCOHOL 1 46.51 1285.3 342.26
## - ECO_INACT 1 74.18 1312.9 345.45
## + UNEMPLOY 1 9.78 1229.0 345.56
## + DAY5 1 1.18 1237.6 346.60
## - PHY_ACT 1 568.47 1807.2 393.38
##
## Step: AIC=338.85
## OBESE ~ ALCOHOL + PHY_ACT + ECO_INACT
##
## Df Sum of Sq RSS AIC
## <none> 1256.4 338.85
## - ECO_INACT 1 60.31 1316.7 340.87
## + SMOK 1 17.68 1238.8 341.74
## - ALCOHOL 1 82.76 1339.2 343.41
## + UNEMPLOY 1 2.95 1253.5 343.51
## + DAY5 1 0.00 1256.4 343.86
## - PHY_ACT 1 608.54 1865.0 393.09
##
## Call:
## lm(formula = OBESE ~ ALCOHOL + PHY_ACT + ECO_INACT, data = tukeyData)
##
## Coefficients:
## (Intercept) ALCOHOL PHY_ACT ECO_INACT
## 48.2297 4.2609 -0.5015 -4.2547
# AIC
step(OLSModel, direction = "both")
## Start: AIC=329.47
## OBESE ~ SMOK + ALCOHOL + PHY_ACT + UNEMPLOY + ECO_INACT + DAY5
##
## Df Sum of Sq RSS AIC
## - DAY5 1 0.20 1229.0 327.50
## - UNEMPLOY 1 8.79 1237.6 328.54
## <none> 1228.8 329.47
## - SMOK 1 24.49 1253.3 330.43
## - ALCOHOL 1 41.66 1270.4 332.47
## - ECO_INACT 1 46.12 1274.9 333.00
## - PHY_ACT 1 485.37 1714.2 377.41
##
## Step: AIC=327.5
## OBESE ~ SMOK + ALCOHOL + PHY_ACT + UNEMPLOY + ECO_INACT
##
## Df Sum of Sq RSS AIC
## - UNEMPLOY 1 9.78 1238.8 326.68
## <none> 1229.0 327.50
## - SMOK 1 24.51 1253.5 328.46
## + DAY5 1 0.20 1228.8 329.47
## - ALCOHOL 1 43.57 1272.5 330.72
## - ECO_INACT 1 49.18 1278.2 331.38
## - PHY_ACT 1 559.11 1788.1 381.74
##
## Step: AIC=326.68
## OBESE ~ SMOK + ALCOHOL + PHY_ACT + ECO_INACT
##
## Df Sum of Sq RSS AIC
## <none> 1238.8 326.68
## - SMOK 1 17.68 1256.4 326.81
## + UNEMPLOY 1 9.78 1229.0 327.50
## + DAY5 1 1.18 1237.6 328.54
## - ALCOHOL 1 46.51 1285.3 330.21
## - ECO_INACT 1 74.18 1312.9 333.41
## - PHY_ACT 1 568.47 1807.2 381.34
##
## Call:
## lm(formula = OBESE ~ SMOK + ALCOHOL + PHY_ACT + ECO_INACT, data = tukeyData)
##
## Coefficients:
## (Intercept) SMOK ALCOHOL PHY_ACT ECO_INACT
## 50.4179 0.1433 3.4528 -0.4894 -4.9004
Consider the result using Mallow’s Cp statistic and each regressor’s p-value in two model to find the final model
OLSModel <- lm(OBESE ~
SMOK +
ALCOHOL +
PHY_ACT +
# EMPLOY +
# UNEMPLOY +
ECO_INACT,
# DAY5,
data = tukeyData)
summary(OLSModel)
##
## Call:
## lm(formula = OBESE ~ SMOK + ALCOHOL + PHY_ACT + ECO_INACT, data = tukeyData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8082 -1.7321 0.3107 2.1128 6.1905
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.4179 9.4169 5.354 3.29e-07 ***
## SMOK 0.1433 0.0996 1.438 0.15245
## ALCOHOL 3.4528 1.4798 2.333 0.02100 *
## PHY_ACT -0.4894 0.0600 -8.157 1.49e-13 ***
## ECO_INACT -4.9004 1.6631 -2.947 0.00374 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.923 on 145 degrees of freedom
## Multiple R-squared: 0.4999, Adjusted R-squared: 0.4861
## F-statistic: 36.23 on 4 and 145 DF, p-value: < 2.2e-16
SSE1 = sum(summary(OLSModel)$residuals^2)
cp1 = SSE1/s2 - (150-2*5)
print(paste("cp1:", cp1))
## [1] "cp1: 4.1605096775167"
OLSModel <- lm(OBESE ~
# SMOK +
ALCOHOL +
PHY_ACT +
# EMPLOY +
# UNEMPLOY +
ECO_INACT,
# DAY5,
data = tukeyData)
summary(OLSModel)
##
## Call:
## lm(formula = OBESE ~ ALCOHOL + PHY_ACT + ECO_INACT, data = tukeyData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.317 -1.764 0.160 1.843 6.141
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.22972 9.32717 5.171 7.55e-07 ***
## ALCOHOL 4.26090 1.37399 3.101 0.00231 **
## PHY_ACT -0.50145 0.05963 -8.409 3.42e-14 ***
## ECO_INACT -4.25475 1.60720 -2.647 0.00900 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.934 on 146 degrees of freedom
## Multiple R-squared: 0.4927, Adjusted R-squared: 0.4823
## F-statistic: 47.28 on 3 and 146 DF, p-value: < 2.2e-16
SSE2 = sum(summary(OLSModel)$residuals^2)
cp2 = SSE2/s2 - (150-2*4)
print(paste("cp2:", cp2))
## [1] "cp2: 4.217752987106"
# choose the model that its Cp is closer to p than other models. As there is no lack of fit when the value of cp is approximately p.
check whether the residuals in the model is normally distributed
# add residuals to the data
modelData <- tukeyData %>%
select(c(1,5,7:8,10)) %>%
mutate(OLSModelresids = residuals(OLSModel))
model_data <- OLSModel %>% augment(., modelData)
#plot residuals
model_data%>%
dplyr::select(.resid)%>%
pull()%>%
qplot()+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
check for homo/heteroscedasticity
par(mfrow=c(2,2))
plot(OLSModel)
Read the map data and merge it with the data we get above. plot the obesity rate and other attributes on the map
EnglandWales <- st_read(here::here("data","Counties_and_Unitary_Authorities__December_2016__Boundaries-shp","Counties_and_Unitary_Authorities__December_2016__Boundaries.shp"))
## Reading layer `Counties_and_Unitary_Authorities__December_2016__Boundaries' from data source `/cloud/project/data/Counties_and_Unitary_Authorities__December_2016__Boundaries-shp/Counties_and_Unitary_Authorities__December_2016__Boundaries.shp' using driver `ESRI Shapefile'
## Simple feature collection with 174 features and 10 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 83757.91 ymin: 5556.599 xmax: 655510.8 ymax: 657536.3
## proj4string: +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +datum=OSGB36 +units=m +no_defs
England <- EnglandWales %>%
dplyr::filter(str_detect(ctyua16cd, "^E"))%>%
st_transform(., 27700) %>%
select(c(ctyua16cd,geometry))
geometryData <- England%>%
left_join(.,
modelData,
by = c("ctyua16cd" = "UTLA code")) %>%
na.omit()
# qtm(geometryData)
# drqw the distribution of obese on the map
tmap_mode("plot")
## tmap mode set to plotting
tm1 <- tm_shape(geometryData) +
tm_polygons("OBESE",palette="RdBu",breaks=c(10,22,25,28,35))+
tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent")
# tm_credits("(a)", position=c(0.1,0.85), size=1.5)
legend <- tm_shape(geometryData) +
tm_polygons("OBESE",palette="RdBu",breaks=c(10,22,25,28,35))+
tm_scale_bar(position=c(0,0.1), text.size=0.6)+
tm_compass(north=0, position=c(0,0.8))+
tm_layout(legend.only = TRUE, legend.position=c(0,0.4),asp=0.1)
t=tmap_arrange(tm1,legend)
t
# draw the distribution of other attributes on the map
tmap_mode("plot")
## tmap mode set to plotting
tm1 <- tm_shape(geometryData) +
tm_polygons("ALCOHOL",palette="RdBu", breaks=c(3.6, 3.9, 4.1, 4.3, 5.0))+
# tm_legend()+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.4)) +
tm_credits("(a)", position=c(0.65,0.85), size=1.5)
tm2 <- tm_shape(geometryData) +
tm_polygons("PHY_ACT",palette="RdBu",breaks=c(40,55,58,61,70))+
# tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.45)) +
tm_credits("(b)", position=c(0.65,0.85), size=1.5)
tm3 <- tm_shape(geometryData) +
tm_polygons("ECO_INACT",palette="RdBu",breaks=c(2.6,2.9,3.0,3.1,3.6))+
# tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.5)) +
tm_credits("(c)", position=c(0.65,0.85), size=1.5)
legend <- tm_shape(geometryData) +
tm_polygons("ALCOHOL",palette="RdBu")+
tm_scale_bar(position=c(0.3,0.2), text.size=3)+
tm_compass(north=0, position=c(0.4,0.5),size = 2)+
tm_layout(legend.only = TRUE, legend.position=c(3,0.4),asp=0.1)
t=tmap_arrange(tm1,tm2,tm3,legend, nrow=2)
t
Calculate the Moran’s I statistic for both Queen’s case neighbours and k-nearest neighbours of 4
# spatial autocorrelation Moran’s I
#calculate the centroids
coordsW <- geometryData%>%
st_centroid()%>%
st_geometry()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
# plot(coordsW)
#generate a spatial weights matrix
#queen's case neighbours
LWard_nb <- geometryData %>%
poly2nb(., queen=T)
#or nearest neighbours
knn_wards <-coordsW %>%
knearneigh(., k=4)
LWard_knn <- knn_wards %>%
knn2nb()
# plot(LWard_nb, st_geometry(coordsW), col="red")
# plot(LWard_knn, st_geometry(coordsW), col="blue")
# plot(geometryData)
#create a spatial weights matrix object from these weights
Lward.queens_weight <- LWard_nb %>%
nb2listw(., style="C",zero.policy = T)
Lward.knn_4_weight <- LWard_knn %>%
nb2listw(., style="C")
Queen <- geometryData %>%
st_drop_geometry()%>%
dplyr::select(OLSModelresids)%>%
pull()%>%
moran.test(., Lward.queens_weight,zero.policy = T)%>%
tidy()
Queen
## # A tibble: 1 x 7
## estimate1 estimate2 estimate3 statistic p.value method alternative
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 0.415 -0.00676 0.00269 8.12 2.31e-16 Moran I test und… greater
Nearest_neighbour <- geometryData %>%
st_drop_geometry()%>%
dplyr::select(OLSModelresids)%>%
pull()%>%
moran.test(., Lward.knn_4_weight)%>%
tidy()
Nearest_neighbour
## # A tibble: 1 x 7
## estimate1 estimate2 estimate3 statistic p.value method alternative
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 0.474 -0.00671 0.00278 9.11 4.05e-20 Moran I test und… greater
#Remembering that Moran’s I ranges from between -1 and +1 (0 indicating no spatial autocorrelation) we can conclude that there is some moderate spatial autocorrelation in our residuals.
construct the GWR model use the attributes we select by MLR
st_crs(geometryData) = 27700
geometryDataSP <- geometryData %>%
as(., "Spatial")
st_crs(coordsW) = 27700
coordsWSP <- coordsW %>%
as(., "Spatial")
#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(OBESE ~ ALCOHOL +
# SMOK +
PHY_ACT +
# EMPLOY +
# UNEMPLOY +
ECO_INACT,
# DAY5,
data = geometryDataSP,
coords=coordsWSP,
adapt=T,
gweight = gwr.bisquare,
verbose = FALSE,
method = "AIC")
## Warning in gwr.sel(OBESE ~ ALCOHOL + PHY_ACT + ECO_INACT, data =
## geometryDataSP, : data is Spatial* object, ignoring coords argument
gwr.model = gwr(OBESE ~ ALCOHOL +
# SMOK +
PHY_ACT +
# EMPLOY +
# UNEMPLOY +
ECO_INACT,
# DAY5,
data=geometryDataSP,
coords=coordsWSP,
adapt = GWRbandwidth,
gweight = gwr.bisquare,
hatmatrix=TRUE,
se.fit=TRUE)
## Warning in gwr(OBESE ~ ALCOHOL + PHY_ACT + ECO_INACT, data = geometryDataSP, :
## data is Spatial* object, ignoring coords argument
gwr.model
## Call:
## gwr(formula = OBESE ~ ALCOHOL + PHY_ACT + ECO_INACT, data = geometryDataSP,
## coords = coordsWSP, gweight = gwr.bisquare, adapt = GWRbandwidth,
## hatmatrix = TRUE, se.fit = TRUE)
## Kernel function: gwr.bisquare
## Adaptive quantile: 0.5031398 (about 75 of 150 data points)
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median 3rd Qu. Max. Global
## X.Intercept. 29.900495 57.043361 62.441404 86.995783 89.205814 48.2297
## ALCOHOL -2.931730 -1.998932 -0.441513 1.291099 8.216442 4.2609
## PHY_ACT -0.633899 -0.592649 -0.469005 -0.431624 -0.357972 -0.5015
## ECO_INACT -8.963359 -7.654702 -4.273400 -3.178335 0.070912 -4.2547
## Number of data points: 150
## Effective number of parameters (residual: 2traceS - traceS'S): 20.9273
## Effective degrees of freedom (residual: 2traceS - traceS'S): 129.0727
## Sigma (residual: 2traceS - traceS'S): 2.622095
## Effective number of parameters (model: traceS): 16.30293
## Effective degrees of freedom (model: traceS): 133.6971
## Sigma (model: traceS): 2.576349
## Sigma (ML): 2.432316
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 731.75
## AIC (GWR p. 96, eq. 4.22): 708.6376
## Residual sum of squares: 887.4241
## Quasi-global R2: 0.641725
gwrResults <- as.data.frame(gwr.model$SDF)
names(gwrResults)
## [1] "sum.w" "X.Intercept." "ALCOHOL"
## [4] "PHY_ACT" "ECO_INACT" "X.Intercept._se"
## [7] "ALCOHOL_se" "PHY_ACT_se" "ECO_INACT_se"
## [10] "gwr.e" "pred" "pred.se"
## [13] "localR2" "X.Intercept._se_EDF" "ALCOHOL_se_EDF"
## [16] "PHY_ACT_se_EDF" "ECO_INACT_se_EDF" "pred.se.1"
show the distribution of the coefficients on the map
#attach coefficients to original SF
geometryDataSP2 <- geometryData %>%
mutate(coef_ALCOHOL = gwrResults$ALCOHOL,
localR2 = gwrResults$localR2,
coef_ECO_INACT = gwrResults$ECO_INACT,
coef_PHY_ACT = gwrResults$PHY_ACT
)
# coef_physically_active_adults = gwrResults$PHY_ACT,
# coef_alcohol_related_admissions = gwrResults$ALCOHOL,
# coef_ilo_economically_inactive = gwrResults$ECO_INACT)
# the coefficients of three attributes
tmap_mode("plot")
## tmap mode set to plotting
tm1 <- tm_shape(geometryDataSP2) +
tm_polygons("coef_ALCOHOL",palette="RdBu",midpoint = 0)+
# tm_legend()+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.52)) +
tm_credits("(a)", position=c(0.65,0.85), size=1.5)
tm2 <- tm_shape(geometryDataSP2) +
tm_polygons("coef_PHY_ACT",palette="RdBu",midpoint = 0)+
# tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.45)) +
tm_credits("(b)", position=c(0.65,0.85), size=1.5)
tm3 <- tm_shape(geometryDataSP2) +
tm_polygons("coef_ECO_INACT",palette="RdBu",midpoint = 0)+
# tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.32)) +
tm_credits("(c)", position=c(0.65,0.85), size=1.5)
legend <- tm_shape(geometryDataSP2) +
tm_polygons("coef_ECO_INACT",palette="RdBu",midpoint = 0)+
tm_scale_bar(position=c(0.3,0.2), text.size=3)+
tm_compass(north=0, position=c(0.4,0.5),size = 2)+
tm_layout(legend.only = TRUE, legend.position=c(3,0.4),asp=0.1)
t=tmap_arrange(tm1,tm2,tm3,legend, nrow=2)
t
# tm_shape(geometryDataSP2) +
# tm_polygons(col = "localR2",
# palette = "RdBu",
# alpha = 0.5,
# midpoint = NA)
to see whether the coefficient estimate is more than 2 standard errors away from zero
#run the significance test
PHY_ACTSigTest = abs(gwrResults$PHY_ACT)-2 * gwrResults$PHY_ACT_se
# smokingSigTest1 = 2*gwrResults$UNEMPLOY / gwrResults$EMPLOY_se
#store significance results
ALCOHOLSigTest = abs(gwrResults$ALCOHOL)-2 * gwrResults$ALCOHOL_se
ECO_INACTSigTest = abs(gwrResults$ECO_INACT)-2 * gwrResults$ECO_INACT_se
geometryDataSP2 <- geometryDataSP2 %>%
mutate(PHY_ACTSigTest = PHY_ACTSigTest,
ALCOHOLSigTest = ALCOHOLSigTest,
ECO_INACTSigTest = ECO_INACTSigTest)
# the value bigger than zero is statistically significiant
tmap_mode("plot")
## tmap mode set to plotting
tm1 <- tm_shape(geometryDataSP2) +
tm_polygons("ALCOHOLSigTest",palette="RdBu",midpoint = 0)+
# tm_legend()+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.4)) +
tm_credits("(a)", position=c(0.65,0.85), size=1.5)
tm2 <- tm_shape(geometryDataSP2) +
tm_polygons("PHY_ACTSigTest",palette="RdBu",midpoint = 0)+
# tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.45)) +
tm_credits("(b)", position=c(0.65,0.85), size=1.5)
tm3 <- tm_shape(geometryDataSP2) +
tm_polygons("ECO_INACTSigTest",palette="RdBu",midpoint = 0)+
# tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent",legend.position=c(0.1,0.5)) +
tm_credits("(c)", position=c(0.65,0.85), size=1.5)
legend <- tm_shape(geometryDataSP2) +
tm_polygons("ECO_INACTSigTest",palette="RdBu",midpoint = 0)+
tm_scale_bar(position=c(0.3,0.2), text.size=3)+
tm_compass(north=0, position=c(0.4,0.5),size = 2)+
tm_layout(legend.only = TRUE, legend.position=c(3,0.4),asp=0.1)
t=tmap_arrange(tm1,tm2,tm3,legend, nrow=2)
t
have a look at the local R2 of the model
tmap_mode("plot")
## tmap mode set to plotting
tm1 <- tm_shape(geometryDataSP2) +
tm_polygons("localR2",palette="RdBu")+
tm_legend(show=FALSE)+
tm_layout(frame=FALSE,bg.color = "transparent")
# tm_credits("(a)", position=c(0.1,0.85), size=1.5)
legend <- tm_shape(geometryDataSP2) +
tm_polygons("localR2",palette="RdBu")+
tm_scale_bar(position=c(0,0.1), text.size=0.6)+
tm_compass(north=0, position=c(0,0.8))+
tm_layout(legend.only = TRUE, legend.position=c(0,0.4),asp=0.1)
t=tmap_arrange(tm1,legend)
t
Some codes refer from : https://andrewmaclachlan.github.io/CASA0005repo/gwr-and-spatially-lagged-regression.html