packages <- c('sf','tmap','tidyverse','ggpubr','corrplot','GWmodel','spdep','sp')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}## Parsed with column specification:
## cols(
## .default = col_double(),
## area_id = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 56 parsing failures.
## row col expected actual file
## 108 prevalence_obese_reception a double na 'data/child_obesity_london_ward_2013-2014.csv'
## 108 prevalence_obese_y6 a double na 'data/child_obesity_london_ward_2013-2014.csv'
## 110 prevalence_obese_reception a double na 'data/child_obesity_london_ward_2013-2014.csv'
## 110 prevalence_obese_y6 a double na 'data/child_obesity_london_ward_2013-2014.csv'
## 397 prevalence_obese_reception a double na 'data/child_obesity_london_ward_2013-2014.csv'
## ... .......................... ........ ...... ..............................................
## See problems(...) for more details.
## Parsed with column specification:
## cols(
## area_id = col_character(),
## gp_patients = col_double(),
## gp_patients_diabetes = col_double(),
## estimated_diabetes_prevalence = col_double()
## )
wardpcd <- inner_join(ward, obesity, by = c("area_id" = "area_id"))
wardpcd <- inner_join(wardpcd, diabetes, by = c("area_id" = "area_id"))## Reading layer `wardmap' from data source `C:\TEMP\Training\MITB\ISSS608-VA\Project2\Project\data\wardmap.shp' using driver `ESRI Shapefile'
## Simple feature collection with 638 features and 6 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## proj4string: +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +datum=OSGB36 +units=m +no_defs
## tmap mode set to plotting
tm_shape(map,
name="Ward") +
tm_fill("bor_nm",
alpha = 0.5,
colorNA = "black",
n = 5,
style = "equal",
palette = "Pastel1",
title = "Borough",
legend.show = FALSE,
legend.z = 0.1,
legend.is.portrait = FALSE,
legend.hist = TRUE,
legend.hist.z = 0.1) +
tm_borders(lwd=1.0, alpha = 0.5)## Warning: Number of levels of the variable "bor_nm" is 33, which is
## larger than max.categories (which is 30), so levels are combined. Set
## tmap_options(max.categories = 33) in the layer function to show all levels.
## Warning: Column `ward_id`/`area_id` joining factor and character vector,
## coercing into character vector
tm_shape(mapward,
name="Ward") +
tm_fill("bor_nm",
alpha = 0.5,
colorNA = "black",
n = 5,
style = "equal",
palette = "Pastel1",
title = "Borough",
legend.show = FALSE,
legend.z = 0.1,
legend.is.portrait = FALSE,
legend.hist = TRUE,
legend.hist.z = 0.1) +
tm_borders(lwd=1.0, alpha = 0.5)## Warning: Number of levels of the variable "bor_nm" is 33, which is
## larger than max.categories (which is 30), so levels are combined. Set
## tmap_options(max.categories = 33) in the layer function to show all levels.
How should we have missing Y variable values in regression?
## Warning: Column `ward_id`/`area_id` joining factor and character vector,
## coercing into character vector
tm_shape(mapward1,
name="Ward") +
tm_fill("population",
alpha = 0.5,
colorNA = "black",
n = 5,
style = "equal",
palette = "Pastel1",
title = "Population",
legend.show = TRUE,
legend.z = 0.1,
legend.is.portrait = FALSE,
legend.hist = TRUE,
legend.hist.z = 0.1) +
tm_borders(lwd=1, alpha = 0.5)## Legend labels were too wide. Therefore, legend.text.size has been set to 0.23. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
wardplot_wide_sf <- mapward %>%
select(ward_id,ward_nm,bor_id,bor_nm,long,lat,
weight,
volume,
fat,
saturate,
salt,
sugar,
protein,
carb,
fibre,
alcohol,
energy_fat,
energy_saturate,
energy_sugar,
energy_protein,
energy_carb,
energy_fibre,
energy_alcohol,
energy_tot,
energy_density,
h_nutrients_weight,
h_nutrients_weight_norm,
h_nutrients_calories,
h_nutrients_calories_norm,
h_items,
h_items_norm,
h_items_weight,
h_items_weight_norm,
prevalence_overweight_reception,
prevalence_overweight_y6,
prevalence_obese_reception,
prevalence_obese_y6,
estimated_diabetes_prevalence)## tmap mode set to plotting
tm_shape(mapward,
name="Ward") +
tm_fill("bor_nm",
alpha = 0.5,
colorNA = "black",
n = 5,
style = "equal",
palette = "Pastel1",
title = "Borough",
legend.show = FALSE,
legend.z = 0.1,
legend.is.portrait = FALSE,
legend.hist = TRUE,
legend.hist.z = 0.1) +
tm_borders(lwd=1, alpha = 0.5) +
tm_bubbles(size=0.1,col="red")## Warning: Number of levels of the variable "bor_nm" is 33, which is
## larger than max.categories (which is 30), so levels are combined. Set
## tmap_options(max.categories = 33) in the layer function to show all levels.
Should we use the points from the data or the centroid from the polygons?
tm_shape(wardplot_wide_sf) +
tm_fill("bor_nm",
alpha = 0.5,
colorNA = "black",
n = 5,
style = "equal",
palette = "Pastel1",
title = "Borough",
legend.show = FALSE,
legend.z = 0.1,
legend.is.portrait = FALSE,
legend.hist = TRUE,
legend.hist.z = 0.1) +
tm_borders(lwd=1, alpha = 0.5) +
tm_shape(wardpoint_wide_sf) +
tm_bubbles(size=0.1, col="red", alpha=0.5)## Warning: Number of levels of the variable "bor_nm" is 33, which is
## larger than max.categories (which is 30), so levels are combined. Set
## tmap_options(max.categories = 33) in the layer function to show all levels.
wardplot_long_sf <- wardplot_wide_sf %>%
# st_drop_geometry() %>%
pivot_longer(col=weight:estimated_diabetes_prevalence, names_to="variable", values_to="value") %>%
select(ward_id,ward_nm,bor_id,bor_nm,variable,value,long,lat,geometry)
wardplot_long_sf = st_as_sf(wardplot_long_sf)wardplot_long = as_tibble(wardplot_long_sf)
wardplot_long_sp = as(wardplot_long_sf, "Spatial")
# st_drop_geometry() %>%
# pivot_longer(col=weight:estimated_diabetes_prevalence, names_to="variable", values_to="value")ggplot(wardplot_wide, aes(x = weight, y = prevalence_obese_reception)) +
geom_point() +
geom_smooth(method = "lm", col = "red", span = 0.15) +
facet_wrap(~bor_id) +
theme_minimal()ggplot(wardplot_long, aes(value, variable, fill = variable)) +
geom_col() +
# facet_wrap(~bor_id) +
theme_minimal()h1 <- ggplot(data=wardplot_wide, aes(x= `weight`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h2 <- ggplot(data=wardplot_wide, aes(x= `volume`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h3 <- ggplot(data=wardplot_wide, aes(x= `fat`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h4 <- ggplot(data=wardplot_wide, aes(x= `saturate`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h5 <- ggplot(data=wardplot_wide, aes(x= `salt`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h6 <- ggplot(data=wardplot_wide, aes(x= `sugar`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h7 <- ggplot(data=wardplot_wide, aes(x= `protein`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h8 <- ggplot(data=wardplot_wide, aes(x= `carb`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h9 <- ggplot(data=wardplot_wide, aes(x= `fibre`)) +
geom_histogram(bins=20, color="black", fill="light blue")
h10 <- ggplot(data=wardplot_wide, aes(x= `alcohol`))
# geom_histogram(bins=20, color="black", fill="light blue")
# h11 <- ggplot(data=wardplot_wide, aes(x= `energy_fat`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h12 <- ggplot(data=wardplot_wide, aes(x= `energy_saturate`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h13 <- ggplot(data=wardplot_wide, aes(x= `energy_sugar`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h14 <- ggplot(data=wardplot_wide, aes(x= `energy_protein`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h15 <- ggplot(data=wardplot_wide, aes(x= `energy_carb`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h16 <- ggplot(data=wardplot_wide, aes(x= `energy_fibre`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h17 <- ggplot(data=wardplot_wide, aes(x= `energy_alcohol`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h18 <- ggplot(data=wardplot_wide, aes(x= `energy_tot`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h19 <- ggplot(data=wardplot_wide, aes(x= `energy_density`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h20 <- ggplot(data=wardplot_wide, aes(x= `h_nutrients_weight`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h21 <- ggplot(data=wardplot_wide, aes(x= `h_nutrients_weight_norm`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h22 <- ggplot(data=wardplot_wide, aes(x= `h_nutrients_calories`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h23 <- ggplot(data=wardplot_wide, aes(x= `h_nutrients_calories_norm`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h24 <- ggplot(data=wardplot_wide, aes(x= `h_items`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h25 <- ggplot(data=wardplot_wide, aes(x= `h_items_norm`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h26 <- ggplot(data=wardplot_wide, aes(x= `h_items_weight`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h27 <- ggplot(data=wardplot_wide, aes(x= `h_items_weight_norm`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
# h28 <- ggplot(data=wardplot_wide, aes(x= `representativeness_norm`)) +
# geom_histogram(bins=20, color="black", fill="light blue")
#ggarrange(h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11,h12,h13,h14,h15,h16,h17,h18,h19,h20,h21,h22,h23,h24,h25,h26,h27,h28, ncol = 7, nrow = 4)
ggarrange(h1,h2,h3,h4,h5,h6,h7,h8,h9,h10, ncol = 5, nrow = 2)ggplot(data=wardplot_long, aes(x= value)) +
geom_histogram(bins=20, color="black", fill="light blue") +
facet_wrap( ~ variable, ncol = 3, scales = "free") +
theme_minimal()corrplot.mixed(ward.cor,
lower = "ellipse",
upper = "number",
p.mat = ward.sig$p,
sig.level = .05,
tl.pos = "lt",
bg = "white",
diag = "l",
order="AOE",
tl.col = "black")gwss.res <- gwss(wardplot_wide_sp,vars=c("sugar", "carb"), bw=1000)
gwss.rob <- gwss(wardplot_wide_sp,vars=c("sugar", "carb"), bw=1000, quantile = T)quick.map <- function(spdf,var,legend.title,main.title, dot.size = 0.2) {
if (class(spdf) == "SpatialPointsDataFrame"){
p = tm_shape(spdf)+
tm_dots(var, title = legend.title, size = dot.size)+
tm_layout(title = main.title, title.size =0.7, legend.title.size =0.6)
}
if (class(spdf) == "SpatialPolygonsDataFrame"){
p = tm_shape(spdf)+
tm_fill(var, title = legend.title)+
tm_layout(title = main.title, title.size =0.7, legend.title.size =0.6)
}
return(p)
}## tmap mode set to plotting
## Variable "sugar_LSKe" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable "carb_LSKe" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable "Cov_sugar.carb" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable "Corr_sugar.carb" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable "Spearman_rho_sugar.carb" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tm_shape(gwss.rob$SDF)+
tm_fill(c("sugar_LM", "sugar_Median"), title = c("Sugar\nMean", "Sugar\nMedian"))tm_shape(gwss.rob$SDF)+
tm_fill(c("sugar_IQR", "sugar_QI"), title = c("Sugar\nLocal IQR", "Sugar\nLocal Quantile Imbalance"))## Variable "sugar_QI" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tm_shape(gwss.rob$SDF)+
tm_fill(c("carb_LM", "carb_Median"), title = c("Carb\nMean", "Carb\nMedian"))tm_shape(gwss.rob$SDF)+
tm_fill(c("carb_IQR", "carb_QI"), title = c("Carb\nLocal IQR", "Carb\nLocal Quantile Imbalance"))## Variable "carb_QI" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
# Different bandwidth and kernel
# 1. the original GWSS
gwss.1 <- gwss(wardplot_wide_sp,vars=c("sugar", "carb"), bw=1000)
# 2. with a larger bandwidth
gwss.2 <- gwss(wardplot_wide_sp,vars=c("sugar", "carb"), bw=1500)
# 3. with gaussian kernel
gwss.3 <- gwss(wardplot_wide_sp,vars=c("sugar", "carb"), bw=1000, kernel = "gaussian")
# 4. with a larger bandwidth and a gaussian kernel
gwss.4 <- gwss(wardplot_wide_sp,vars=c("sugar", "carb"), bw=1500, kernel = "gaussian")p1 = quick.map(gwss.1$SDF,"sugar_LM","","bw=1000, \nbi-square")
p2 = quick.map(gwss.2$SDF,"sugar_LM","","bw=1500, \nbi-square")
p3 = quick.map(gwss.3$SDF,"sugar_LM","","bw=1000, \ngaussian")
p4 = quick.map(gwss.4$SDF,"sugar_LM","","bw=1500, \ngaussian")
tmap_arrange(p1,p2,p3,p4, ncol = 2)## Neighbour list object:
## Number of regions: 544
## Number of nonzero links: 2980
## Percentage nonzero weights: 1.006974
## Average number of links: 5.477941
## Neighbour list object:
## Number of regions: 544
## Number of nonzero links: 2868
## Percentage nonzero weights: 0.9691285
## Average number of links: 5.272059
zone.net.queen <- nb2lines(zone.nb.queen,coords=coordinates(wardplot_wide_sp))
# default projection is NA: change this as below
proj4string(zone.net.queen) = proj4string(wardplot_wide_sp)
# create the map
tm_shape(wardplot_wide_sp) + tm_borders(col='lightgrey') +
tm_shape(zone.net.queen) + tm_lines(col='red')zone.net.rook <- nb2lines(zone.nb.rook,coords=coordinates(wardplot_wide_sp))
# default projection is NA: change this as below
proj4string(zone.net.rook) = proj4string(wardplot_wide_sp)
# create the map
tm_shape(wardplot_wide_sp) + tm_borders(col='lightgrey') +
tm_shape(zone.net.rook) + tm_lines(col='red')tm_shape(wardplot_wide_sp[zone.nb.queen[[100]],]) + tm_polygons(borders = "black")+
tm_shape(wardplot_wide_sp[100,]) + tm_polygons("dodgerblue")+
tm_shape(zone.net.queen) + tm_lines(col='red')+
tm_shape(wardplot_wide_sp) + tm_dots(col='red', size = 1.5)+
tm_layout(frame = F)tm_shape(wardplot_wide_sp[zone.nb.rook[[100]],]) + tm_polygons(borders = "black")+
tm_shape(wardplot_wide_sp[100,]) + tm_polygons("dodgerblue")+
tm_shape(zone.net.rook) + tm_lines(col='red')+
tm_shape(wardplot_wide_sp) + tm_dots(col='red', size = 1.5)+
tm_layout(frame = F)# compute listw from nb object
zone.lw.queen = nb2listw(zone.nb.queen)
moran.test(wardplot_wide_sp$sugar,zone.lw.queen) ##
## Moran I test under randomisation
##
## data: wardplot_wide_sp$sugar
## weights: zone.lw.queen
##
## Moran I statistic standard deviate = 25.558, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.6735339404 -0.0018416206 0.0006982794
##
## Moran I test under randomisation
##
## data: wardplot_wide_sp$sugar
## weights: zone.lw.rook
##
## Moran I statistic standard deviate = 25.265, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.6772000818 -0.0018416206 0.0007223414
# derive range of Moran's I values
moran.range <- function(lw) {
wmat <- listw2mat(lw)
return(range(eigen((wmat + t(wmat))/ 2) $values))
}
moran.range(zone.lw.queen)## [1] -0.8793962 1.0220326
## [1] -0.8813781 1.0210687
wardplot_wide_sp$lI <- localmoran(wardplot_wide_sp$sugar,zone.lw.queen)[, 1]
tm_shape(wardplot_wide_sp) +
tm_polygons(col= 'lI',title= "Local Morans I",
legend.format=list (flag= "+"), midpoint = 0) +
tm_style('col_blind') +
tm_layout(legend.position = c("left", "top")) # Create the local p values
wardplot_wide_sp$pval <- localmoran(wardplot_wide_sp$sugar,zone.lw.queen)[, 5]
# Draw the map
tm_shape(wardplot_wide_sp) +
tm_polygons(col= 'pval',title= "p-value",breaks= c(0, 0.01, 0.05, 0.10, 1),
palette = "-Greens") +
tm_layout(legend.position = c("left", "top")) reg.mod1 = as.formula(prevalence_overweight_reception ~weight+volume+fat+saturate+salt+sugar+protein+carb+fibre+alcohol+energy_fat+energy_saturate+energy_sugar+energy_protein+energy_carb+energy_fibre+energy_alcohol+energy_tot+energy_density+h_nutrients_weight+h_nutrients_weight_norm+h_nutrients_calories+h_nutrients_calories_norm+h_items+h_items_norm+h_items_weight+h_items_weight_norm)
reg.mod2 = as.formula(prevalence_overweight_y6 ~weight+volume+fat+saturate+salt+sugar+protein+carb+fibre+alcohol+energy_fat+energy_saturate+energy_sugar+energy_protein+energy_carb+energy_fibre+energy_alcohol+energy_tot+energy_density+h_nutrients_weight+h_nutrients_weight_norm+h_nutrients_calories+h_nutrients_calories_norm+h_items+h_items_norm+h_items_weight+h_items_weight_norm)
reg.mod3 = as.formula(prevalence_obese_reception ~weight+volume+fat+saturate+salt+sugar+protein+carb+fibre+alcohol+energy_fat+energy_saturate+energy_sugar+energy_protein+energy_carb+energy_fibre+energy_alcohol+energy_tot+energy_density+h_nutrients_weight+h_nutrients_weight_norm+h_nutrients_calories+h_nutrients_calories_norm+h_items+h_items_norm+h_items_weight+h_items_weight_norm)
reg.mod4 = as.formula(prevalence_obese_y6 ~weight+volume+fat+saturate+salt+sugar+protein+carb+fibre+alcohol+energy_fat+energy_saturate+energy_sugar+energy_protein+energy_carb+energy_fibre+energy_alcohol+energy_tot+energy_density+h_nutrients_weight+h_nutrients_weight_norm+h_nutrients_calories+h_nutrients_calories_norm+h_items+h_items_norm+h_items_weight+h_items_weight_norm)
reg.mod5 = as.formula(estimated_diabetes_prevalence ~weight+volume+fat+saturate+salt+sugar+protein+carb+fibre+alcohol+energy_fat+energy_saturate+energy_sugar+energy_protein+energy_carb+energy_fibre+energy_alcohol+energy_tot+energy_density+h_nutrients_weight+h_nutrients_weight_norm+h_nutrients_calories+h_nutrients_calories_norm+h_items+h_items_norm+h_items_weight+h_items_weight_norm)
#coefficients(mod.OLS) # model coefficients
#confint(mod.OLS, level=0.95) # CIs for model parameters
#fitted(mod.OLS) # predicted values
#residuals(mod.OLS) # residuals
#vcov(mod.OLS) # covariance matrix for model parameters
#influence(mod.OLS) # regression diagnostics
#vif(mod.OLS)##
## Call:
## lm(formula = reg.mod1, data = wardplot_wide_sp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.080951 -0.023481 0.000699 0.020772 0.109031
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.269e+00 2.743e+00 -1.556 0.12023
## weight -6.657e-04 2.282e-04 -2.917 0.00368 **
## volume -5.577e-06 2.604e-04 -0.021 0.98292
## fat 3.874e-02 5.079e-02 0.763 0.44592
## saturate -1.150e-01 2.788e-02 -4.125 4.32e-05 ***
## salt 1.218e-02 7.775e-02 0.157 0.87559
## sugar 7.776e-04 5.874e-03 0.132 0.89474
## protein -9.099e-02 5.415e-02 -1.680 0.09347 .
## carb 6.593e-02 3.684e-02 1.790 0.07410 .
## fibre -7.566e-01 2.499e-01 -3.027 0.00259 **
## alcohol -7.385e-01 2.982e-01 -2.477 0.01356 *
## energy_fat NA NA NA NA
## energy_saturate NA NA NA NA
## energy_sugar NA NA NA NA
## energy_protein NA NA NA NA
## energy_carb NA NA NA NA
## energy_fibre 1.353e-01 1.128e-01 1.199 0.23093
## energy_alcohol NA NA NA NA
## energy_tot NA NA NA NA
## energy_density -4.566e-01 1.644e-01 -2.777 0.00568 **
## h_nutrients_weight 2.442e+00 1.978e+00 1.235 0.21742
## h_nutrients_weight_norm NA NA NA NA
## h_nutrients_calories 4.810e-01 1.717e+00 0.280 0.77945
## h_nutrients_calories_norm NA NA NA NA
## h_items 1.471e-01 7.190e-02 2.045 0.04133 *
## h_items_norm NA NA NA NA
## h_items_weight -5.302e-03 6.093e-02 -0.087 0.93070
## h_items_weight_norm NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0324 on 521 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.4752, Adjusted R-squared: 0.4591
## F-statistic: 29.49 on 16 and 521 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: prevalence_overweight_reception
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 0.15284 0.152837 145.6288 < 2.2e-16 ***
## volume 1 0.00062 0.000619 0.5895 0.4429784
## fat 1 0.00046 0.000456 0.4346 0.5100114
## saturate 1 0.06119 0.061191 58.3047 1.081e-13 ***
## salt 1 0.01254 0.012537 11.9458 0.0005924 ***
## sugar 1 0.02649 0.026490 25.2406 6.965e-07 ***
## protein 1 0.00015 0.000154 0.1464 0.7021278
## carb 1 0.00661 0.006608 6.2961 0.0124029 *
## fibre 1 0.18205 0.182050 173.4638 < 2.2e-16 ***
## alcohol 1 0.02764 0.027643 26.3390 4.050e-07 ***
## energy_fibre 1 0.00109 0.001088 1.0368 0.3090289
## energy_density 1 0.01480 0.014802 14.1042 0.0001925 ***
## h_nutrients_weight 1 0.00094 0.000944 0.8999 0.3432437
## h_nutrients_calories 1 0.00142 0.001417 1.3500 0.2458146
## h_items 1 0.00628 0.006281 5.9845 0.0147617 *
## h_items_weight 1 0.00001 0.000008 0.0076 0.9306999
## Residuals 521 0.54679 0.001049
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## lm(formula = reg.mod2, data = wardplot_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.118177 -0.029203 0.000007 0.027633 0.193919
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.035e+01 3.655e+00 -2.832 0.004811 **
## weight -9.124e-04 3.042e-04 -3.000 0.002831 **
## volume 2.050e-04 3.478e-04 0.589 0.555869
## fat 5.870e-02 6.747e-02 0.870 0.384691
## saturate -1.799e-01 3.736e-02 -4.817 1.91e-06 ***
## salt -2.681e-01 1.032e-01 -2.597 0.009671 **
## sugar 1.655e-03 7.845e-03 0.211 0.833014
## protein -2.133e-01 7.217e-02 -2.955 0.003266 **
## carb 1.476e-01 4.884e-02 3.022 0.002631 **
## fibre -1.351e+00 3.307e-01 -4.086 5.09e-05 ***
## alcohol -1.477e+00 3.976e-01 -3.715 0.000226 ***
## energy_fat NA NA NA NA
## energy_saturate NA NA NA NA
## energy_sugar NA NA NA NA
## energy_protein NA NA NA NA
## energy_carb NA NA NA NA
## energy_fibre 2.702e-01 1.496e-01 1.806 0.071477 .
## energy_alcohol NA NA NA NA
## energy_tot NA NA NA NA
## energy_density -7.340e-01 2.192e-01 -3.349 0.000870 ***
## h_nutrients_weight 6.486e+00 2.620e+00 2.475 0.013627 *
## h_nutrients_weight_norm NA NA NA NA
## h_nutrients_calories -3.258e-02 2.281e+00 -0.014 0.988607
## h_nutrients_calories_norm NA NA NA NA
## h_items 3.617e-01 9.640e-02 3.752 0.000195 ***
## h_items_norm NA NA NA NA
## h_items_weight -4.170e-02 8.153e-02 -0.511 0.609259
## h_items_weight_norm NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0431 on 519 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.5473, Adjusted R-squared: 0.5334
## F-statistic: 39.22 on 16 and 519 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: prevalence_overweight_y6
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 0.48320 0.48320 260.0702 < 2.2e-16 ***
## volume 1 0.00248 0.00248 1.3340 0.2486184
## fat 1 0.02040 0.02040 10.9811 0.0009847 ***
## saturate 1 0.08153 0.08153 43.8812 8.743e-11 ***
## salt 1 0.00294 0.00294 1.5848 0.2086431
## sugar 1 0.01888 0.01888 10.1628 0.0015194 **
## protein 1 0.00112 0.00112 0.6013 0.4384483
## carb 1 0.03582 0.03582 19.2802 1.368e-05 ***
## fibre 1 0.36433 0.36433 196.0937 < 2.2e-16 ***
## alcohol 1 0.06310 0.06310 33.9623 9.875e-09 ***
## energy_fibre 1 0.00281 0.00281 1.5130 0.2192320
## energy_density 1 0.04646 0.04646 25.0038 7.840e-07 ***
## h_nutrients_weight 1 0.00501 0.00501 2.6984 0.1010542
## h_nutrients_calories 1 0.00411 0.00411 2.2119 0.1375615
## h_items 1 0.03329 0.03329 17.9193 2.726e-05 ***
## h_items_weight 1 0.00049 0.00049 0.2616 0.6092591
## Residuals 519 0.96428 0.00186
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## lm(formula = reg.mod3, data = wardplot_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.058757 -0.015206 -0.000453 0.013419 0.079038
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.410e+00 1.882e+00 -2.875 0.004203 **
## weight -4.765e-04 1.552e-04 -3.070 0.002258 **
## volume -5.963e-05 1.770e-04 -0.337 0.736327
## fat 5.816e-02 3.669e-02 1.585 0.113541
## saturate -8.607e-02 1.899e-02 -4.532 7.28e-06 ***
## salt 5.439e-03 5.271e-02 0.103 0.917843
## sugar -1.922e-03 3.985e-03 -0.482 0.629798
## protein -1.256e-01 3.698e-02 -3.397 0.000735 ***
## carb 6.307e-02 2.627e-02 2.401 0.016728 *
## fibre -4.206e-01 1.748e-01 -2.407 0.016450 *
## alcohol -8.402e-01 2.034e-01 -4.131 4.22e-05 ***
## energy_fat NA NA NA NA
## energy_saturate NA NA NA NA
## energy_sugar NA NA NA NA
## energy_protein NA NA NA NA
## energy_carb NA NA NA NA
## energy_fibre -1.203e-02 7.771e-02 -0.155 0.877042
## energy_alcohol NA NA NA NA
## energy_tot NA NA NA NA
## energy_density -3.828e-01 1.119e-01 -3.422 0.000671 ***
## h_nutrients_weight 2.211e+00 1.431e+00 1.546 0.122815
## h_nutrients_weight_norm NA NA NA NA
## h_nutrients_calories 1.421e+00 1.232e+00 1.153 0.249357
## h_nutrients_calories_norm NA NA NA NA
## h_items 8.066e-02 4.962e-02 1.625 0.104701
## h_items_norm NA NA NA NA
## h_items_weight -2.011e-02 4.153e-02 -0.484 0.628386
## h_items_weight_norm NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02188 on 510 degrees of freedom
## (17 observations deleted due to missingness)
## Multiple R-squared: 0.5393, Adjusted R-squared: 0.5249
## F-statistic: 37.32 on 16 and 510 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: prevalence_obese_reception
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 0.122914 0.122914 256.6771 < 2.2e-16 ***
## volume 1 0.000245 0.000245 0.5118 0.474705
## fat 1 0.003291 0.003291 6.8731 0.009012 **
## saturate 1 0.026627 0.026627 55.6053 3.834e-13 ***
## salt 1 0.000115 0.000115 0.2406 0.623966
## sugar 1 0.012504 0.012504 26.1109 4.564e-07 ***
## protein 1 0.000901 0.000901 1.8810 0.170821
## carb 1 0.002804 0.002804 5.8562 0.015871 *
## fibre 1 0.078447 0.078447 163.8185 < 2.2e-16 ***
## alcohol 1 0.024409 0.024409 50.9723 3.243e-12 ***
## energy_fibre 1 0.000000 0.000000 0.0000 0.994974
## energy_density 1 0.007777 0.007777 16.2394 6.433e-05 ***
## h_nutrients_weight 1 0.002901 0.002901 6.0590 0.014165 *
## h_nutrients_calories 1 0.001553 0.001553 3.2427 0.072332 .
## h_items 1 0.001304 0.001304 2.7229 0.099534 .
## h_items_weight 1 0.000112 0.000112 0.2345 0.628386
## Residuals 510 0.244222 0.000479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## lm(formula = reg.mod4, data = wardplot_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.097398 -0.023144 -0.000532 0.022256 0.116113
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.066e+01 2.985e+00 -3.571 0.000389 ***
## weight -6.315e-04 2.477e-04 -2.549 0.011093 *
## volume -1.720e-04 2.834e-04 -0.607 0.544116
## fat 4.094e-02 5.621e-02 0.728 0.466748
## saturate -1.380e-01 3.043e-02 -4.536 7.15e-06 ***
## salt -2.635e-01 8.425e-02 -3.128 0.001861 **
## sugar -2.678e-03 6.391e-03 -0.419 0.675368
## protein -2.337e-01 5.892e-02 -3.967 8.31e-05 ***
## carb 1.493e-01 4.031e-02 3.703 0.000236 ***
## fibre -1.323e+00 2.736e-01 -4.834 1.77e-06 ***
## alcohol -1.404e+00 3.253e-01 -4.315 1.91e-05 ***
## energy_fat NA NA NA NA
## energy_saturate NA NA NA NA
## energy_sugar NA NA NA NA
## energy_protein NA NA NA NA
## energy_carb NA NA NA NA
## energy_fibre 2.736e-01 1.234e-01 2.217 0.027078 *
## energy_alcohol NA NA NA NA
## energy_tot NA NA NA NA
## energy_density -5.636e-01 1.785e-01 -3.157 0.001689 **
## h_nutrients_weight 6.527e+00 2.176e+00 3.000 0.002830 **
## h_nutrients_weight_norm NA NA NA NA
## h_nutrients_calories 5.495e-02 1.908e+00 0.029 0.977034
## h_nutrients_calories_norm NA NA NA NA
## h_items 2.722e-01 7.863e-02 3.462 0.000580 ***
## h_items_norm NA NA NA NA
## h_items_weight 1.409e-02 6.659e-02 0.212 0.832517
## h_items_weight_norm NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03509 on 515 degrees of freedom
## (12 observations deleted due to missingness)
## Multiple R-squared: 0.5632, Adjusted R-squared: 0.5497
## F-statistic: 41.51 on 16 and 515 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: prevalence_obese_y6
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 0.32962 0.32962 267.7185 < 2.2e-16 ***
## volume 1 0.00012 0.00012 0.0936 0.7597700
## fat 1 0.01383 0.01383 11.2352 0.0008616 ***
## saturate 1 0.03367 0.03367 27.3460 2.478e-07 ***
## salt 1 0.00792 0.00792 6.4295 0.0115188 *
## sugar 1 0.01078 0.01078 8.7562 0.0032275 **
## protein 1 0.00235 0.00235 1.9072 0.1678713
## carb 1 0.02456 0.02456 19.9510 9.773e-06 ***
## fibre 1 0.28486 0.28486 231.3599 < 2.2e-16 ***
## alcohol 1 0.04224 0.04224 34.3046 8.408e-09 ***
## energy_fibre 1 0.00394 0.00394 3.1989 0.0742775 .
## energy_density 1 0.02824 0.02824 22.9342 2.195e-06 ***
## h_nutrients_weight 1 0.00866 0.00866 7.0361 0.0082346 **
## h_nutrients_calories 1 0.00309 0.00309 2.5107 0.1136883
## h_items 1 0.02377 0.02377 19.3019 1.356e-05 ***
## h_items_weight 1 0.00006 0.00006 0.0448 0.8325168
## Residuals 515 0.63409 0.00123
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## lm(formula = reg.mod5, data = wardplot_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0688 -0.5674 -0.0276 0.5137 3.8819
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -82.587836 78.846701 -1.047 0.29537
## weight -0.018011 0.006647 -2.710 0.00695 **
## volume 0.046105 0.007562 6.097 2.10e-09 ***
## fat 1.661111 1.452838 1.143 0.25341
## saturate 2.571056 0.812513 3.164 0.00164 **
## salt 2.814148 2.257299 1.247 0.21307
## sugar -0.743700 0.170869 -4.352 1.62e-05 ***
## protein -4.313920 1.558560 -2.768 0.00584 **
## carb 0.757611 1.044027 0.726 0.46837
## fibre 28.114506 7.073269 3.975 8.03e-05 ***
## alcohol -16.325558 8.611982 -1.896 0.05855 .
## energy_fat NA NA NA NA
## energy_saturate NA NA NA NA
## energy_sugar NA NA NA NA
## energy_protein NA NA NA NA
## energy_carb NA NA NA NA
## energy_fibre -15.713635 3.234660 -4.858 1.57e-06 ***
## energy_alcohol NA NA NA NA
## energy_tot NA NA NA NA
## energy_density -13.086124 4.784177 -2.735 0.00644 **
## h_nutrients_weight -27.899436 56.014828 -0.498 0.61864
## h_nutrients_weight_norm NA NA NA NA
## h_nutrients_calories 83.906033 49.173132 1.706 0.08853 .
## h_nutrients_calories_norm NA NA NA NA
## h_items 1.596262 2.073717 0.770 0.44179
## h_items_norm NA NA NA NA
## h_items_weight 1.228896 1.746612 0.704 0.48200
## h_items_weight_norm NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9453 on 527 degrees of freedom
## Multiple R-squared: 0.7705, Adjusted R-squared: 0.7635
## F-statistic: 110.6 on 16 and 527 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Response: estimated_diabetes_prevalence
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 514.31 514.31 575.5323 < 2.2e-16 ***
## volume 1 621.79 621.79 695.8095 < 2.2e-16 ***
## fat 1 122.93 122.93 137.5587 < 2.2e-16 ***
## saturate 1 58.07 58.07 64.9796 5.110e-15 ***
## salt 1 0.26 0.26 0.2963 0.5864375
## sugar 1 112.78 112.78 126.2006 < 2.2e-16 ***
## protein 1 18.92 18.92 21.1711 5.268e-06 ***
## carb 1 80.16 80.16 89.7007 < 2.2e-16 ***
## fibre 1 3.95 3.95 4.4179 0.0360383 *
## alcohol 1 8.15 8.15 9.1243 0.0026447 **
## energy_fibre 1 22.56 22.56 25.2480 6.915e-07 ***
## energy_density 1 9.79 9.79 10.9509 0.0009996 ***
## h_nutrients_weight 1 0.01 0.01 0.0117 0.9139513
## h_nutrients_calories 1 5.01 5.01 5.6091 0.0182272 *
## h_items 1 1.84 1.84 2.0550 0.1522999
## h_items_weight 1 0.44 0.44 0.4950 0.4819999
## Residuals 527 470.94 0.89
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
n.min <- 200 # user-specified minimum bandwidth
n.max <- max(EUDM)+100 # user-specified maximum bandwidth
interval.size <- 100
fixed <- seq(n.min,n.max,by=interval.size)
b.func.gwr <- matrix(nrow=length(fixed),ncol=1)
for(i in 1:length(fixed)) {
g.gwr <- gwr.aic(fixed[i], Y = as.matrix(wardplot_wide_sp@data$prevalence_overweight_reception),
X = as.matrix(wardplot_wide_sp@data[, c("weight","volume","fat","saturate","salt","sugar","protein","carb","fibre","alcohol","energy_fat","energy_saturate","energy_sugar","energy_protein","energy_carb","energy_fibre","energy_alcohol","energy_tot","energy_density","h_nutrients_weight","h_nutrients_weight_norm","h_nutrients_calories","h_nutrients_calories_norm","h_items","h_items_norm","h_items_weight","h_items_weight_norm")]),
kernel="bisquare", adaptive=F,
dp.locat = coordinates(wardplot_wide_sp), dMat=EUDM)
b.func.gwr[i] <- g.gwr[1]
if(i%%10 ==0) cat(i, "\t")
}
fixed[which.min(b.func.gwr)]
xy <- data.frame(x = fixed,y = b.func.gwr)ggplot() +
geom_point(data = xy, aes(x=x, y=y), size = 0.7, alpha = 0.5) +
geom_line(data = xy, aes(x=x, y=y)) +
geom_vline(xintercept = gwr.bwd.f, colour = "red") +
scale_x_continuous(breaks = seq(100, max(EUDM), 200))+
labs(
subtitle = "GWR bandwidth function",
x = "Bandwidth size (m)",
y = "AIC")