Variable Names: Flow: Migration flow –> make gravity model of this variable, vi1_origpop: origin population size –> emission factor, wj3_destmedinc: destination median income –> attraction factor,

Load libraries

library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(MASS)
library(reshape2)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stplanr)
## Warning: package 'stplanr' was built under R version 4.1.2
library(ggplot2)

Read in data

ausdata <- read.csv("aussie_flow.csv")
ausdata <- ausdata[ausdata$Orig_code != ausdata$Dest_code,]
head(ausdata)
##           Origin Orig_code       Destination Dest_code  Flow vi1_origpop
## 2 Greater Sydney     1GSYD       Rest of NSW     1RNSW 91031     4391673
## 3 Greater Sydney     1GSYD Greater Melbourne     2GMEL 22601     4391673
## 4 Greater Sydney     1GSYD       Rest of Vic     2RVIC  4416     4391673
## 5 Greater Sydney     1GSYD  Greater Brisbane     3GBRI 22888     4391673
## 6 Greater Sydney     1GSYD       Rest of Qld     3RQLD 27445     4391673
## 7 Greater Sydney     1GSYD  Greater Adelaide     4GADE  5817     4391673
##   wj1_destpop vi2_origunemp wj2_destunemp vi3_origmedinc wj3_destmedinc
## 2     2512952          5.74          6.12         780.64         509.97
## 3     3999981          5.74          5.47         780.64         407.95
## 4     1345717          5.74          5.17         780.64         506.58
## 5     2065998          5.74          5.86         780.64         767.08
## 6     2253723          5.74          6.22         780.64         446.48
## 7     1225235          5.74          5.78         780.64         445.53
##   vi4_origpctrent wj4_destpctrent FlowNoIntra offset      dist
## 2           31.77           27.20       91031      1  392.1234
## 3           31.77           27.34       22601      1  645.8323
## 4           31.77           24.08        4416      1  572.5680
## 5           31.77           33.19       22888      1 1099.0980
## 6           31.77           32.57       27445      1  752.7641
## 7           31.77           28.27        5817      1 1080.6682

Plot shapefile

ausshp = st_read("aus_gccsa/aus_gccsa.shp")
## Reading layer `aus_gccsa' from data source 
##   `/Users/jessieeastburn/opt/anaconda3/envs/GEOG6160/Lab12/aus_gccsa/aus_gccsa.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -2083066 ymin: -4965775 xmax: 2345637 ymax: -1116309
## Projected CRS: GDA94 / Geoscience Australia Lambert
plot(st_geometry(ausshp))

Preprocessing

dist <- st_distance(st_centroid(ausshp))
## Warning in st_centroid.sf(ausshp): st_centroid assumes attributes are constant
## over geometries of x
dist <- units::drop_units(dist)
ausdata2 <- dplyr::select(ausdata, Orig_code, Dest_code, Flow, everything())
ausdata2$Origin <- as.factor(ausdata$Origin)
ausdata2$Destination <- as.factor(ausdata$Destination)
ausdata2 <- ausdata2[ausdata2$Orig_code!=ausdata2$Dest_code,]


travel_network <- od2line(flow = ausdata2, zones = ausshp)
## Creating centroids representing desire line start and end points.
w <- ausdata2$Flow / max(ausdata2$Flow) * 10
plot(st_geometry(ausshp))
plot(st_geometry(travel_network), lwd = w, add=TRUE)

ausdata2mat <- dcast(ausdata2, Origin ~ Destination, sum, 
                   value.var = "Flow", margins=c("Origin", "Destination"))
ausdata2mat[1:10,1:10]
##                          Origin Australian Capital Territory Greater Adelaide
## 1  Australian Capital Territory                            0             1359
## 2              Greater Adelaide                         1993                0
## 3              Greater Brisbane                         3134             3052
## 4                Greater Darwin                          832             2105
## 5                Greater Hobart                          565              533
## 6             Greater Melbourne                         4724             6021
## 7                 Greater Perth                         1666             2631
## 8                Greater Sydney                        10670             5817
## 9                   Rest of NSW                        15779             3617
## 10                   Rest of NT                          229             1296
##    Greater Brisbane Greater Darwin Greater Hobart Greater Melbourne
## 1              4331            617            369              5229
## 2              5447           1851            602              8810
## 3                 0           1812           1386             13078
## 4              2769              0            243              1953
## 5              1307            190              0              3016
## 6             13057           2023           2135                 0
## 7              5081           1300           1018             11729
## 8             22888           1985           1644             22601
## 9             21300           2248            970             12407
## 10              896           2684             96               700
##    Greater Perth Greater Sydney Rest of NSW
## 1           1514           6662       15399
## 2           3829           5421        3518
## 3           4812          12343       16061
## 4           2152           1238        2178
## 5            899           1224        1000
## 6          10116          15560       11095
## 7              0           6516        4066
## 8          10574              0       91031
## 9           4990          53562           0
## 10           699            406        1432

Model 1

mu <- 1
vi1_mu <- ausdata2$vi1_origpop^mu
alpha <- 1
wj2_alpha <- ausdata2$wj3_destmedinc^alpha
beta <- -2
dist_beta <- ausdata2$dist^beta
k <- sum(ausdata2$Flow) / sum(vi1_mu * wj2_alpha * dist_beta)
k
## [1] 3.242849

The value of k is 3.242849

Use these coefficients to estimate interzone flows

ausdata2$unconstrainedEst1 <- round(k * vi1_mu * wj2_alpha * dist_beta, 0)

Estimate the observed total number of flows

sum(ausdata2$Flow)
## [1] 1313518

Predicted total

sum(ausdata2$unconstrainedEst1)
## [1] 1313515

Turn the set of interzone flows into a matrix

ausdata2mat1 <- dcast(ausdata2, Origin ~ Destination, sum, 
                    value.var = "unconstrainedEst1", margins=c("Origin", "Destination"))

ausdata2mat1[1:10,1:10]
##                          Origin Australian Capital Territory Greater Adelaide
## 1  Australian Capital Territory                            0              276
## 2              Greater Adelaide                         1876                0
## 3              Greater Brisbane                         2605             2089
## 4                Greater Darwin                           63              119
## 5                Greater Hobart                          866               98
## 6             Greater Melbourne                        61593             3949
## 7                 Greater Perth                          529              706
## 8                Greater Sydney                        73262             5433
## 9                   Rest of NSW                       109365             1703
## 10                   Rest of NT                           27               32
##    Greater Brisbane Greater Darwin Greater Hobart Greater Melbourne
## 1               391            190            733              2541
## 2              2133           2450            566              1108
## 3                 0           4283            571               951
## 4               212              0             20                29
## 5               101             73              0               770
## 6              3464           2156          15794                 0
## 7               489           1025            270               309
## 8              9043           3377           4425             13929
## 9              3254           1356           3184              7132
## 10               70            419              9                12
##    Greater Perth Greater Sydney Rest of NSW
## 1             90           5268        8978
## 2            821           2656         950
## 3            557           4329        1779
## 4             58             80          37
## 5             55            376         309
## 6           1279          24277       14191
## 7              0            523         275
## 8           1244              0       47234
## 9            572          41373           0
## 10            30             31          16

Calculate the RMSE of your estimates

sqrt(mean((ausdata2$Flow-ausdata2$unconstrainedEst1)^2))
## [1] 25197.33

Model 2

Use the glm function to fit a Poisson model of the flow data as a function of origin population and destination income

uncosim <- glm(Flow ~ log(vi1_origpop)+log(wj3_destmedinc)+log(dist), 
               family = poisson(link = "log"), data = ausdata2)

Provide the output of the summary of the model

summary(uncosim)
## 
## Call:
## glm(formula = Flow ~ log(vi1_origpop) + log(wj3_destmedinc) + 
##     log(dist), family = poisson(link = "log"), data = ausdata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -181.36   -57.51   -22.76     9.76   469.12  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          7.4735077  0.0248636  300.58   <2e-16 ***
## log(vi1_origpop)     0.5909474  0.0009153  645.60   <2e-16 ***
## log(wj3_destmedinc) -0.1919157  0.0033607  -57.11   <2e-16 ***
## log(dist)           -0.8306163  0.0010186 -815.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2750417  on 209  degrees of freedom
## Residual deviance: 1479019  on 206  degrees of freedom
## AIC: 1481026
## 
## Number of Fisher Scoring iterations: 5

List the model coefficients, give the matching model coefficient (μ, α, β and k):

Intercept (log(k)): 7.4735077,

log(vi1_origpop) (μ): 0.5909474,

log(wj3_destmedinc) (α): -0.1919157,

log(dist) (β): -0.8306163.

Estimate the predicted flows

ausdata2$unconstrainedEst2 = round(fitted(uncosim), 0)
head(ausdata2$unconstrainedEst2)
## [1] 31433 21677 22982 12347 18759 13898
ausdata2mat2 <- dcast(ausdata2, Origin ~ Destination, sum, value.var = "unconstrainedEst2", margins=c("Origin", "Destination"))
ausdata2mat2[1:10,1:10]
##                          Origin Australian Capital Territory Greater Adelaide
## 1  Australian Capital Territory                            0             2594
## 2              Greater Adelaide                         4719                0
## 3              Greater Brisbane                         5928             8185
## 4                Greater Darwin                          766             1512
## 5                Greater Hobart                         2515             1542
## 6             Greater Melbourne                        24764            11976
## 7                 Greater Perth                         2964             5056
## 8                Greater Sydney                        27055            13898
## 9                   Rest of NSW                        28968             7782
## 10                   Rest of NT                          508              836
##    Greater Brisbane Greater Darwin Greater Hobart Greater Melbourne
## 1              2156           1447           3906              6880
## 2              5416           5197           4359              6053
## 3                 0           7184           4792              6229
## 4              1383              0            728               892
## 5              1122            885              0              3823
## 6              8153           6066          21373                 0
## 7              3121           3845           3405              3782
## 8             12347           7430          12808             21677
## 9              7322           4611          10128             14883
## 10              827           1576            500               587
##    Greater Perth Greater Sydney Rest of NSW
## 1           1209           6280       10148
## 2           3751           5869        4960
## 3           3499           7881        7054
## 4            830            913         855
## 5            894           1914        2284
## 6           5551          18111       18766
## 7              0           3176        3147
## 8           5580              0       31433
## 9           3663          20827           0
## 10           600            584         571
sum(ausdata2$unconstrainedEst2) #calculate the total estimated flow
## [1] 1313517
sum(ausdata2$Flow) #total observed flow
## [1] 1313518

Calculate the RMSE

sqrt(mean((ausdata2$Flow-ausdata2$unconstrainedEst2)^2))
## [1] 10658.04

Is β above or below one? Why do you think it has that value? β is above 1. It indicates that the destination median income increase as distance increases.