Variable Names: Flow: Migration flow –> make gravity model of this variable, vi1_origpop: origin population size –> emission factor, wj3_destmedinc: destination median income –> attraction factor,
Load libraries
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(MASS)
library(reshape2)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stplanr)
## Warning: package 'stplanr' was built under R version 4.1.2
library(ggplot2)
Read in data
ausdata <- read.csv("aussie_flow.csv")
ausdata <- ausdata[ausdata$Orig_code != ausdata$Dest_code,]
head(ausdata)
## Origin Orig_code Destination Dest_code Flow vi1_origpop
## 2 Greater Sydney 1GSYD Rest of NSW 1RNSW 91031 4391673
## 3 Greater Sydney 1GSYD Greater Melbourne 2GMEL 22601 4391673
## 4 Greater Sydney 1GSYD Rest of Vic 2RVIC 4416 4391673
## 5 Greater Sydney 1GSYD Greater Brisbane 3GBRI 22888 4391673
## 6 Greater Sydney 1GSYD Rest of Qld 3RQLD 27445 4391673
## 7 Greater Sydney 1GSYD Greater Adelaide 4GADE 5817 4391673
## wj1_destpop vi2_origunemp wj2_destunemp vi3_origmedinc wj3_destmedinc
## 2 2512952 5.74 6.12 780.64 509.97
## 3 3999981 5.74 5.47 780.64 407.95
## 4 1345717 5.74 5.17 780.64 506.58
## 5 2065998 5.74 5.86 780.64 767.08
## 6 2253723 5.74 6.22 780.64 446.48
## 7 1225235 5.74 5.78 780.64 445.53
## vi4_origpctrent wj4_destpctrent FlowNoIntra offset dist
## 2 31.77 27.20 91031 1 392.1234
## 3 31.77 27.34 22601 1 645.8323
## 4 31.77 24.08 4416 1 572.5680
## 5 31.77 33.19 22888 1 1099.0980
## 6 31.77 32.57 27445 1 752.7641
## 7 31.77 28.27 5817 1 1080.6682
Plot shapefile
ausshp = st_read("aus_gccsa/aus_gccsa.shp")
## Reading layer `aus_gccsa' from data source
## `/Users/jessieeastburn/opt/anaconda3/envs/GEOG6160/Lab12/aus_gccsa/aus_gccsa.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -2083066 ymin: -4965775 xmax: 2345637 ymax: -1116309
## Projected CRS: GDA94 / Geoscience Australia Lambert
plot(st_geometry(ausshp))
Preprocessing
dist <- st_distance(st_centroid(ausshp))
## Warning in st_centroid.sf(ausshp): st_centroid assumes attributes are constant
## over geometries of x
dist <- units::drop_units(dist)
ausdata2 <- dplyr::select(ausdata, Orig_code, Dest_code, Flow, everything())
ausdata2$Origin <- as.factor(ausdata$Origin)
ausdata2$Destination <- as.factor(ausdata$Destination)
ausdata2 <- ausdata2[ausdata2$Orig_code!=ausdata2$Dest_code,]
travel_network <- od2line(flow = ausdata2, zones = ausshp)
## Creating centroids representing desire line start and end points.
w <- ausdata2$Flow / max(ausdata2$Flow) * 10
plot(st_geometry(ausshp))
plot(st_geometry(travel_network), lwd = w, add=TRUE)
ausdata2mat <- dcast(ausdata2, Origin ~ Destination, sum,
value.var = "Flow", margins=c("Origin", "Destination"))
ausdata2mat[1:10,1:10]
## Origin Australian Capital Territory Greater Adelaide
## 1 Australian Capital Territory 0 1359
## 2 Greater Adelaide 1993 0
## 3 Greater Brisbane 3134 3052
## 4 Greater Darwin 832 2105
## 5 Greater Hobart 565 533
## 6 Greater Melbourne 4724 6021
## 7 Greater Perth 1666 2631
## 8 Greater Sydney 10670 5817
## 9 Rest of NSW 15779 3617
## 10 Rest of NT 229 1296
## Greater Brisbane Greater Darwin Greater Hobart Greater Melbourne
## 1 4331 617 369 5229
## 2 5447 1851 602 8810
## 3 0 1812 1386 13078
## 4 2769 0 243 1953
## 5 1307 190 0 3016
## 6 13057 2023 2135 0
## 7 5081 1300 1018 11729
## 8 22888 1985 1644 22601
## 9 21300 2248 970 12407
## 10 896 2684 96 700
## Greater Perth Greater Sydney Rest of NSW
## 1 1514 6662 15399
## 2 3829 5421 3518
## 3 4812 12343 16061
## 4 2152 1238 2178
## 5 899 1224 1000
## 6 10116 15560 11095
## 7 0 6516 4066
## 8 10574 0 91031
## 9 4990 53562 0
## 10 699 406 1432
Model 1
mu <- 1
vi1_mu <- ausdata2$vi1_origpop^mu
alpha <- 1
wj2_alpha <- ausdata2$wj3_destmedinc^alpha
beta <- -2
dist_beta <- ausdata2$dist^beta
k <- sum(ausdata2$Flow) / sum(vi1_mu * wj2_alpha * dist_beta)
k
## [1] 3.242849
The value of k is 3.242849
Use these coefficients to estimate interzone flows
ausdata2$unconstrainedEst1 <- round(k * vi1_mu * wj2_alpha * dist_beta, 0)
Estimate the observed total number of flows
sum(ausdata2$Flow)
## [1] 1313518
Predicted total
sum(ausdata2$unconstrainedEst1)
## [1] 1313515
Turn the set of interzone flows into a matrix
ausdata2mat1 <- dcast(ausdata2, Origin ~ Destination, sum,
value.var = "unconstrainedEst1", margins=c("Origin", "Destination"))
ausdata2mat1[1:10,1:10]
## Origin Australian Capital Territory Greater Adelaide
## 1 Australian Capital Territory 0 276
## 2 Greater Adelaide 1876 0
## 3 Greater Brisbane 2605 2089
## 4 Greater Darwin 63 119
## 5 Greater Hobart 866 98
## 6 Greater Melbourne 61593 3949
## 7 Greater Perth 529 706
## 8 Greater Sydney 73262 5433
## 9 Rest of NSW 109365 1703
## 10 Rest of NT 27 32
## Greater Brisbane Greater Darwin Greater Hobart Greater Melbourne
## 1 391 190 733 2541
## 2 2133 2450 566 1108
## 3 0 4283 571 951
## 4 212 0 20 29
## 5 101 73 0 770
## 6 3464 2156 15794 0
## 7 489 1025 270 309
## 8 9043 3377 4425 13929
## 9 3254 1356 3184 7132
## 10 70 419 9 12
## Greater Perth Greater Sydney Rest of NSW
## 1 90 5268 8978
## 2 821 2656 950
## 3 557 4329 1779
## 4 58 80 37
## 5 55 376 309
## 6 1279 24277 14191
## 7 0 523 275
## 8 1244 0 47234
## 9 572 41373 0
## 10 30 31 16
Calculate the RMSE of your estimates
sqrt(mean((ausdata2$Flow-ausdata2$unconstrainedEst1)^2))
## [1] 25197.33
Model 2
Use the glm function to fit a Poisson model of the flow data as a function of origin population and destination income
uncosim <- glm(Flow ~ log(vi1_origpop)+log(wj3_destmedinc)+log(dist),
family = poisson(link = "log"), data = ausdata2)
Provide the output of the summary of the model
summary(uncosim)
##
## Call:
## glm(formula = Flow ~ log(vi1_origpop) + log(wj3_destmedinc) +
## log(dist), family = poisson(link = "log"), data = ausdata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -181.36 -57.51 -22.76 9.76 469.12
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.4735077 0.0248636 300.58 <2e-16 ***
## log(vi1_origpop) 0.5909474 0.0009153 645.60 <2e-16 ***
## log(wj3_destmedinc) -0.1919157 0.0033607 -57.11 <2e-16 ***
## log(dist) -0.8306163 0.0010186 -815.45 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2750417 on 209 degrees of freedom
## Residual deviance: 1479019 on 206 degrees of freedom
## AIC: 1481026
##
## Number of Fisher Scoring iterations: 5
List the model coefficients, give the matching model coefficient (μ, α, β and k):
Intercept (log(k)): 7.4735077,
log(vi1_origpop) (μ): 0.5909474,
log(wj3_destmedinc) (α): -0.1919157,
log(dist) (β): -0.8306163.
Estimate the predicted flows
ausdata2$unconstrainedEst2 = round(fitted(uncosim), 0)
head(ausdata2$unconstrainedEst2)
## [1] 31433 21677 22982 12347 18759 13898
ausdata2mat2 <- dcast(ausdata2, Origin ~ Destination, sum, value.var = "unconstrainedEst2", margins=c("Origin", "Destination"))
ausdata2mat2[1:10,1:10]
## Origin Australian Capital Territory Greater Adelaide
## 1 Australian Capital Territory 0 2594
## 2 Greater Adelaide 4719 0
## 3 Greater Brisbane 5928 8185
## 4 Greater Darwin 766 1512
## 5 Greater Hobart 2515 1542
## 6 Greater Melbourne 24764 11976
## 7 Greater Perth 2964 5056
## 8 Greater Sydney 27055 13898
## 9 Rest of NSW 28968 7782
## 10 Rest of NT 508 836
## Greater Brisbane Greater Darwin Greater Hobart Greater Melbourne
## 1 2156 1447 3906 6880
## 2 5416 5197 4359 6053
## 3 0 7184 4792 6229
## 4 1383 0 728 892
## 5 1122 885 0 3823
## 6 8153 6066 21373 0
## 7 3121 3845 3405 3782
## 8 12347 7430 12808 21677
## 9 7322 4611 10128 14883
## 10 827 1576 500 587
## Greater Perth Greater Sydney Rest of NSW
## 1 1209 6280 10148
## 2 3751 5869 4960
## 3 3499 7881 7054
## 4 830 913 855
## 5 894 1914 2284
## 6 5551 18111 18766
## 7 0 3176 3147
## 8 5580 0 31433
## 9 3663 20827 0
## 10 600 584 571
sum(ausdata2$unconstrainedEst2) #calculate the total estimated flow
## [1] 1313517
sum(ausdata2$Flow) #total observed flow
## [1] 1313518
Calculate the RMSE
sqrt(mean((ausdata2$Flow-ausdata2$unconstrainedEst2)^2))
## [1] 10658.04
Is β above or below one? Why do you think it has that value? β is above 1. It indicates that the destination median income increase as distance increases.