knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
library(sp)
library(leaflet)
library(rgeos)
## rgeos version: 0.3-23, (SVN revision 546)
##  GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921 
##  Linking to sp version: 1.2-4 
##  Polygon checking: TRUE
library(plyr)
library(readxl)
library(rgdal)
## rgdal: version: 1.2-8, (SVN revision 663)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.2, released 2016/10/24
##  Path to GDAL shared files: /Users/grant/Library/R/3.3/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Users/grant/Library/R/3.3/library/rgdal/proj
##  Linking to sp version: 1.2-5
library(PBSmapping)
## 
## -----------------------------------------------------------
## PBS Mapping 2.70.4 -- Copyright (C) 2003-2017 Fisheries and Oceans Canada
## 
## PBS Mapping comes with ABSOLUTELY NO WARRANTY;
## for details see the file COPYING.
## This is free software, and you are welcome to redistribute
## it under certain conditions, as outlined in the above file.
## 
## A complete user guide 'PBSmapping-UG.pdf' is located at 
## /Users/grant/Library/R/3.3/library/PBSmapping/doc/PBSmapping-UG.pdf
## 
## Packaged on 2017-06-28
## Pacific Biological Station, Nanaimo
## 
## All available PBS packages can be found at
## https://github.com/pbs-software
## 
## To see demos, type '.PBSfigs()'.
## -----------------------------------------------------------
library(png)
library(grid)
library(mapview)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:rgeos':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(utils)
library(tidyr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(RColorBrewer)
library(tmap)
library(ellipse)
require(lattice)
## Loading required package: lattice
library(ggplot2)

Summary

This documents the data, the code and the process behind the CEBRA app.

Data

The data on the boundaries of regions in Australia comes from Australian Bureau of Statistics. Dowload “Statistical Area Level 2 (SA2) ASGS Ed 2011 Digital Boundaires in ESRI Shapefile Format” into your home directory. Find more about statistical regions of Australia here.

Use a specific type of projection in order to calculate the area consistently proj4js in square meters. I validated that this projection method works with “gArea” using East Pilbara Shire which has an area of about 380,000 km².

        #Read the file with regional boundaries into a var "Aust"
        #Get the data 
        file<-"SA2_2011_AUST.shp"
        Aust<-readOGR(file, GDAL1_integer64_policy = TRUE)
## OGR data source with driver: ESRI Shapefile 
## Source: "SA2_2011_AUST.shp", layer: "SA2_2011_AUST"
## with 2214 features
## It has 12 fields
        #Transform the coord system to a specific type which will be useful later
        proj_Aust<-
        "+proj=aea +lat_1=-18 +lat_2=-36 +lat_0=0 +lon_0=134 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"

        #Generic projection
        proj<- "+proj=longlat +datum=WGS84"

        Aust <- spTransform(Aust, CRS(proj_Aust))
        #Check the plot
        #plot(Aust)

        #Add a data column with area calculated by gArea in m^2, divide by 10000 to get ha
        Aust@data<-mutate(Aust@data, Area_ha=gArea(Aust,byid=TRUE)/10000)

        #Look at the top 4 rows in order of area 
        head(Aust@data[order(-Aust@data$Area_ha),], 4)
##      SA2_MAIN11 SA2_5DIG11         SA2_NAME11 SA3_CODE11
## 1638  406021141      41141            Outback      40602
## 1870  508031203      51203 Leinster - Leonora      50803
## 1882  508051215      51215        Meekatharra      50805
## 1886  508061219      51219       East Pilbara      50806
##                    SA3_NAME11 SA4_CODE11                  SA4_NAME11
## 1638 Outback - North and East        406   South Australia - Outback
## 1870               Goldfields        508 Western Australia - Outback
## 1882                 Mid West        508 Western Australia - Outback
## 1886                  Pilbara        508 Western Australia - Outback
##      GCC_CODE11 GCC_NAME11 STE_CODE11        STE_NAME11   ALBERS_SQM
## 1638      4RSAU Rest of SA          4   South Australia 519519951490
## 1870      5RWAU Rest of WA          5 Western Australia 496740603473
## 1882      5RWAU Rest of WA          5 Western Australia 414505958216
## 1886      5RWAU Rest of WA          5 Western Australia 389540916479
##       Area_ha
## 1638 51951995
## 1870 49674060
## 1882 41450596
## 1886 38954092
        #Look at the names of the 10 largest by area regions 
        unique(Aust@data[order(-Aust@data$Area_ha),][1:10,][,"SA2_NAME11"])
##  [1] Outback                          Leinster - Leonora              
##  [3] Meekatharra                      East Pilbara                    
##  [5] Barkly                           Far Central West                
##  [7] Kambalda - Coolgardie - Norseman Tanami                          
##  [9] Far South West                   Petermann - Simpson             
## 2214 Levels: Abbotsford Aberfoyle Park ACT - East ACT - South West ... Zillmere

Populating the dataframe with real data

The first step is to populate regions with statistics on population. Dowload “Population Estimates by Age and Sex, Summary Statistics (ASGS 2011), 2010 and 2015” excel file from Australian Bureau of Statistics.

Extract populations numbers by region from this data file and merge it with “Aust” dataframe. Not every SA2 region has a population entry, but in total about 22 mil are accounted for, and according to spot check (East Pilbara) and the map seems to have been recorded in the correct regions in the dataframe.

        #Name of the file as downloaded 
        file<-"32350ds0009_sa2_summary_statistics_2010_2015.xls"
        #Read the file
        pop_regions <- read_excel(file, sheet="Table 1", skip=15)[1:3544,c(6,9)] %>% setNames(
                                                c("SA2_NAME11","Population"))
        #Get rid of rows without names
        pop_regions<-subset(pop_regions, !is.na(pop_regions$SA2_NAME11))

        #Check classes of columns
        class(pop_regions$Population)
## [1] "numeric"
        class(pop_regions$SA2_NAME11)
## [1] "character"
        #Check that all regions are distinct
        length(pop_regions$SA2_NAME11)==length(unique(pop_regions$SA2_NAME11))
## [1] TRUE
        #Merge population data with regions
        Aust<-merge(Aust, pop_regions, by="SA2_NAME11")

        #Check how many people are accounted for
        sum(Aust$Population, na.rm = TRUE)
## [1] 22015719

Make a map image with the population data.

        #Set eval to false because it takes a while
        #Plot
        pal_pop<- colorBin(c("white","darkslateblue"), domain= Aust$Population)

        map_pop<-leaflet(data = spTransform(Aust,CRS(proj))) %>% addProviderTiles("CartoDB.Positron") %>%
                addPolygons(fillColor = ~pal_pop(Population),  # refers to pal defined above
                    fillOpacity = 0.8, 
                    color = "#BDBDC3", # colour between polygons
                    weight = 1,
                    highlight = highlightOptions(
                            weight = 5,
                            color = "pink",
                            dashArray = "",
                            fillOpacity = 0.7,
                            bringToFront = TRUE))

        mapshot(map_pop, file = "Map_Population.png")

Show where the population is recorded in the “Aust” dataframe:

        img <- readPNG("Map_Population.png")
        grid.raster(img)

Subsetting the regional data

There are various ways to subset the regional data, for example by name, by area and by geographical location.The code below finds the names of the administrative regions that lie within four boxes defined by latitude and longitude.

We first define a matrix of coordinates for the box, then turn it into an R object of the “SpatialPolygonsDataFrame” class, with the the same projection method (CRS) as “Aust”, so that we can use the function “over” to create an index indicating the overlap between the box and administrative regions. Based on that index, we create a list of names for the regions near the coasts.

The slight complication is that we define the boxes in one global projection system, and then translate them into the Australia specific one, using “spTransform”.

Why are we doing this? In case we want to simulate random but plausible distribution of pests (i.e. near coasts).

        #Find the regions in the box
        boxSW<-cbind(c(114,114,120,120,114),c(-31,-36,-36,-31,-31))
        p = Polygon(boxSW)
        ps = Polygons(list(p), 1)
        sps = SpatialPolygons(list(ps))
        proj4string(sps) = CRS(proj)
        sps = spTransform(sps, CRS(proj_Aust))
        data = data.frame(f=99.9)
        spdf = SpatialPolygonsDataFrame(sps,data)
        indexSW<-over(Aust, spdf)
        regionsSW<-unique(Aust@data[!is.na(indexSW),]$SA2_NAME11)

        boxSE<-cbind(c(138,138,150,150,138),c(-36,-48,-48,-36,-36))
        p = Polygon(boxSE)
        ps = Polygons(list(p), 1)
        sps = SpatialPolygons(list(ps))
        proj4string(sps) = CRS(proj)
        sps = spTransform(sps, CRS(proj_Aust))
        data = data.frame(f=99.9)
        spdf = SpatialPolygonsDataFrame(sps,data)
        indexSE<-over(Aust, spdf)
        regionsSE<-unique(Aust@data[!is.na(indexSE),]$SA2_NAME11)

        boxEast<-cbind(c(146,146,155,155,146),c(-36,-18,-18,-36,-36))
        p = Polygon(boxEast)
        ps = Polygons(list(p), 1)
        sps = SpatialPolygons(list(ps))
        proj4string(sps) = CRS(proj)
        sps = spTransform(sps, CRS(proj_Aust))
        data = data.frame(f=99.9)
        spdf = SpatialPolygonsDataFrame(sps,data)
        indexEast<-over(Aust, spdf)
        regionsEast<-unique(Aust@data[!is.na(indexEast),]$SA2_NAME11)

        boxNorth<-cbind(c(132,132,155,155,132),c(-12,-18,-18,-12,-12))
        p = Polygon(boxNorth)
        ps = Polygons(list(p), 1)
        sps = SpatialPolygons(list(ps))
        proj4string(sps) = CRS(proj)
        sps = spTransform(sps, CRS(proj_Aust))
        data = data.frame(f=99.9)
        spdf = SpatialPolygonsDataFrame(sps,data)
        indexNorth<-over(Aust, spdf)
        regionsNorth<-unique(Aust@data[!is.na(indexNorth),]$SA2_NAME11)

Now that we have names of the regions from which we want to simulate pest invasion and industry data, we can subset the “Aust” object to focus on those regions.

        Aust_Coast <-subset(Aust,
                            (Aust@data[,"SA2_NAME11"] %in% regionsSW)|(
                                    Aust@data[,"SA2_NAME11"] %in% regionsSE)|
                                    (Aust@data[,"SA2_NAME11"] %in% regionsEast)|
                                    (Aust@data[,"SA2_NAME11"] %in% regionsNorth))

        #To see the map of the selected regions
        map<-leaflet(data = spTransform(Aust_Coast,CRS(proj))) %>% addPolygons()
        mapshot(map, file = "Map_Coast_Aust.png")

Show map of selected regions:

        img <- readPNG("Map_Coast_Aust.png")
        grid.raster(img)

Data on industries

Data for various agricultural industries is available on SA2 scale, download csv file “Agricultural commodities, Australia, state, territory and SA2 - (Data by estimated value of agricultural operations class)” from Australian Bureau of Statistics.

This data requires quite a bit of cleaning. It is very detailed containing 549 categories of observations on SA2 scale. Apparently, only aggregated data on $ values is available, Total Values, but there is very detailed data on the areas of cultivation, and yield in weight per ha, etc. For instance, if someone is interested in fennel… the data is there.

We will extract approximate information on distribution of several key commodities: Broadacre crops, Hay, Flowers, Fruit and Nuts and Grapes, Vegetables, and Livestock. Then we merge it with spatial dataframe Aust, and make a visual map of each. The units are ha.

This is not a precise operation and should be viewed as indicative of distribution because there are some unanswered questions about the dataset. For example, several values for the same region show up under the same category - I decided to add them, speculating that these are subregional data, but another explanation is possible. I did not check.

        #Read the file
        file<-"71210do042_201011.csv"
        agri_regions <- read.csv(file, skip=4)
        #Select only relevant columns
        agri_regions <-select(agri_regions, c(2,6,7))
        names(agri_regions)<- c("SA2_NAME11","Commodity","Value")
        #Make sure they are in the right format
        agri_regions$SA2_NAME11<-as.character(agri_regions$SA2_NAME11)
        agri_regions$Value<-as.numeric(agri_regions$Value)
        agri_regions$Commodity<-as.character(agri_regions$Commodity)
        #Only keep those regions which match our existing dataset
        agri_regions <-filter(agri_regions, agri_regions$SA2_NAME11 %in% unique(Aust@data[,"SA2_NAME11"]))
        #Get rid of NA values in areas of production
        agri_regions <-filter(agri_regions, !is.na(agri_regions$Value))
        #Get rid of small values
        agri_regions <-filter(agri_regions, agri_regions$Value > 100)
        #Get rid of duplicate rows if any
        agri_regions<-distinct(agri_regions)

        #This is how many categories there are
        length(unique(agri_regions$Commodity))
## [1] 553
        #If you want to see what they are, run this
        #sort(unique(agri_regions$Commodity))

        #Flowers
        flowers_regions <-filter(agri_regions, agri_regions$Commodity == 
                                         "Nurseries cut flowers and cultivated turf - Total area (ha)")
        flowers_regions<-select(flowers_regions, c(1,3))
        flowers_regions<-ddply(flowers_regions,.(SA2_NAME11),summarize,Total_Area_Flowers=sum(Value, 
                                                                        na.rm = TRUE))

        #merge
        Aust<-merge(Aust, flowers_regions, by="SA2_NAME11")

        #Broadacre
        broadacre_regions <-filter(agri_regions, agri_regions$Commodity == 
        "Broadacre crops - Cereal crops - Barley for grain - Area (ha)" | agri_regions$Commodity == 
        "Broadacre crops - Cereal crops - Wheat for grain - Area (ha)")
        broadacre_regions<-select(broadacre_regions, c(1,3))
        broadacre_regions<-ddply(broadacre_regions,.(SA2_NAME11),summarize,Total_Area_Crops=sum(Value, na.rm = TRUE))

        #merge
        Aust<-merge(Aust, broadacre_regions, by="SA2_NAME11")

        #Fruit and nuts
        a<-"Fruit and nuts - Berry fruit - Total area (ha)"
        b<-"Fruit and nuts - Orchard fruit and nut trees - Total area (ha)"
        c<-"Fruit and nuts - Other fruit - All other fruit - Total area (ha)" 
        d<-"Grapevines - Grapevines for wine production - Total Area (ha)"

        fruit_regions <-filter(agri_regions, agri_regions$Commodity == a | agri_regions$Commodity == 
                                b | agri_regions$Commodity == c | agri_regions$Commodity == d)
        fruit_regions<-select(fruit_regions, c(1,3))
        fruit_regions<-ddply(fruit_regions,.(SA2_NAME11),summarize,Total_Area_Fruit=sum(Value, na.rm = TRUE))

        #merge
        Aust<-merge(Aust, fruit_regions, by="SA2_NAME11")

        #Hay
        hay_regions <-filter(agri_regions, agri_regions$Commodity == "Hay and Silage - Hay - Total area (ha)")
        hay_regions <-select(hay_regions, c(1,3)) 
        hay_regions <-ddply(hay_regions,.(SA2_NAME11),summarize,Total_Area_Hay=sum(Value, na.rm = TRUE))

        #merge
        Aust<-merge(Aust, hay_regions, by="SA2_NAME11")

        #Livestock
        livestock_regions <-filter(agri_regions,agri_regions$Commodity == "Livestock - Cattle - Total (no.)")
        livestock_regions <-select(livestock_regions, c(1,3)) 
        livestock_regions <-ddply(livestock_regions,.(SA2_NAME11),summarize,Total_Area_Livestock=sum(Value, na.rm = TRUE))

        #merge
        Aust<-merge(Aust, livestock_regions, by="SA2_NAME11")

        #Vegetables
        veg_regions <- filter(agri_regions, agri_regions$Commodity == 
                                "Vegetables for human consumption - All other - Area (ha)" )
        veg_regions <- select(veg_regions,c(1,3)) 
        veg_regions <- ddply(veg_regions,.(SA2_NAME11),summarize,Total_Area_Veg=sum(Value, na.rm = TRUE)) 

        #merge
        Aust<-merge(Aust, veg_regions, by="SA2_NAME11")

        #replace NA with 0 
        Aust@data[is.na(Aust@data)]<-0
        #save(Aust, file = "Aust_Pop_Industry.Rdata")

Make maps showing various industries.

#plot flowers
pal_flowers<- colorBin(c("white","deeppink4"), domain= Aust$Total_Area_Flowers)
map_flowers<-leaflet(data = spTransform(Aust,CRS(proj))) %>% addProviderTiles("CartoDB.Positron") %>%
        addPolygons(fillColor = ~pal_flowers(Total_Area_Flowers),  # refers to pal defined above
                    fillOpacity = 0.8, 
                    color = "#BDBDC3", # colour between polygons
                    weight = 1,
                    highlight = highlightOptions(
                            weight = 5,
                            color = "pink",
                            dashArray = "",
                            fillOpacity = 0.7,
                            bringToFront = TRUE))

mapshot(map_flowers, file = "Map_Flowers.png")

#plot broadacre crops
pal_crops<- colorBin(c("white","peru"), domain= Aust$Total_Area_Crops)
map_flowers<-leaflet(data = spTransform(Aust,CRS(proj))) %>% addProviderTiles("CartoDB.Positron") %>%
        addPolygons(fillColor = ~pal_crops(Total_Area_Crops),  # refers to pal defined above
                    fillOpacity = 0.8, 
                    color = "#BDBDC3", # colour between polygons
                    weight = 1,
                    highlight = highlightOptions(
                            weight = 5,
                            color = "pink",
                            dashArray = "",
                            fillOpacity = 0.7,
                            bringToFront = TRUE))

mapshot(map_flowers, file = "Map_Crops.png")

#plot fruit
pal_fruit<- colorBin(c("white","peru"), domain= Aust$Total_Area_Fruit)
map_flowers<-leaflet(data = spTransform(Aust,CRS(proj))) %>% addProviderTiles("CartoDB.Positron") %>%
        addPolygons(fillColor = ~pal_fruit(Total_Area_Fruit),  # refers to pal defined above
                    fillOpacity = 0.8, 
                    color = "#BDBDC3", # colour between polygons
                    weight = 1,
                    highlight = highlightOptions(
                            weight = 5,
                            color = "pink",
                            dashArray = "",
                            fillOpacity = 0.7,
                            bringToFront = TRUE))

mapshot(map_flowers, file = "Map_Fruit.png")

#plot veg
pal_veg<- colorBin(c("white","green4"), domain= Aust$Total_Area_Veg)
map_flowers<-leaflet(data = spTransform(Aust,CRS(proj))) %>% addProviderTiles("CartoDB.Positron") %>%
        addPolygons(fillColor = ~pal_veg(Total_Area_Veg),  # refers to pal defined above
                    fillOpacity = 0.8, 
                    color = "#BDBDC3", # colour between polygons
                    weight = 1,
                    highlight = highlightOptions(
                            weight = 5,
                            color = "pink",
                            dashArray = "",
                            fillOpacity = 0.7,
                            bringToFront = TRUE))

mapshot(map_flowers, file = "Map_Veg.png")

#plot hay
pal_hay<- colorBin(c("white","gold3"), domain= Aust$Total_Area_Hay)
map_flowers<-leaflet(data = spTransform(Aust,CRS(proj))) %>% addProviderTiles("CartoDB.Positron") %>%
        addPolygons(fillColor = ~pal_hay(Total_Area_Hay),  # refers to pal defined above
                    fillOpacity = 0.8, 
                    color = "#BDBDC3", # colour between polygons
                    weight = 1,
                    highlight = highlightOptions(
                            weight = 5,
                            color = "pink",
                            dashArray = "",
                            fillOpacity = 0.7,
                            bringToFront = TRUE))

mapshot(map_flowers, file = "Map_Hay.png")

#plot livestock
pal_livestock<- colorBin(c("white","lavenderblush4"), domain= Aust$Total_Area_Livestock)
map_flowers<-leaflet(data = spTransform(Aust,CRS(proj))) %>% addProviderTiles("CartoDB.Positron") %>%
        addPolygons(fillColor = ~pal_livestock(Total_Area_Livestock),  # refers to pal defined above
                    fillOpacity = 0.8, 
                    color = "#BDBDC3", # colour between polygons
                    weight = 1,
                    highlight = highlightOptions(
                            weight = 5,
                            color = "pink",
                            dashArray = "",
                            fillOpacity = 0.7,
                            bringToFront = TRUE))

mapshot(map_flowers, file = "Map_Livestock.png")

Show map of flower cultivation and hay production areas, as examples.

        img <- readPNG("Map_Flowers.png")
        grid.raster(img)

While hay is produced here:

        img <- readPNG("Map_Hay.png")
        grid.raster(img)

Simulating maximum range of invasion for 20 pests

The proper way to do this is to use CLIMEX model or something similar, because CEBRA application is an exploratory tool minimally plausible distributions are sufficient. We will use the SA2 data to model these ranges. The species considered come from Australian Government, Department of Agriculture.

Looking at the industry data makes it unclear what the units are, it seems unlikely that these are indeed in “ha” as it would appear that more area is cultivated than is available. That’s not a problem though for this coarse excercise as we will just treat it as absence/presense in an SA2 unit information.

        species<-c("Xylella","Khapra beetle",
           "Exotic fruit fly","Karnal bunt","Huanglongbing","Gypsy moths",
           "Tramp ants","Bees mites","Giant African snail","Stink bug",
           "Zebra chip","Ug99","Russian wheat aphid","Citrus canker",
           "Guava rust","Airborne phytophthora","Exotic bees",
           "Panama disease tropical race 4","Potato cyst nematode","Leaf miner")
        
        #Select commodity key words to approximate the range of respective invasive species
        host<- c("Grapevines", "seed", "Fruit",  "Wheat", "Citrus", "Total", "and","Orchard",
         "Vegetables","flower","Tomatoes", "Barley","Broadacre", "Citrus","Trees","Trees","rchard","Bananas",
         "Potatoes","t/ha")
        
        
        species_host<-data.frame(Species=species, Selected_by= host)
        species_host
##                           Species Selected_by
## 1                         Xylella  Grapevines
## 2                   Khapra beetle        seed
## 3                Exotic fruit fly       Fruit
## 4                     Karnal bunt       Wheat
## 5                   Huanglongbing      Citrus
## 6                     Gypsy moths       Total
## 7                      Tramp ants         and
## 8                      Bees mites     Orchard
## 9             Giant African snail  Vegetables
## 10                      Stink bug      flower
## 11                     Zebra chip    Tomatoes
## 12                           Ug99      Barley
## 13            Russian wheat aphid   Broadacre
## 14                  Citrus canker      Citrus
## 15                     Guava rust       Trees
## 16          Airborne phytophthora       Trees
## 17                    Exotic bees      rchard
## 18 Panama disease tropical race 4     Bananas
## 19           Potato cyst nematode    Potatoes
## 20                     Leaf miner        t/ha
        write.csv(species_host, "Species_Host.csv") 
        
        #Simulate species distributions as absence/presence values by region
        #And merge with Aust
        #grepl function lets us choose all the regions that have to do with a specific host
        for (i in 1:length(species))
                {
                        temp<- filter(agri_regions, grepl(host[i], agri_regions$Commodity))
                        temp <- select(temp,c(1,3)) 
                        temp <- ddply(temp,.(SA2_NAME11),summarize, Indicator =sum(Value, na.rm = TRUE))
                        temp$Indicator[is.na(temp$Indicator)]<-"absent"
                        temp$Indicator[temp$Indicator>0]<-"present"
                        names(temp)<-c("SA2_NAME11", species[i])
                        Aust<-merge(Aust, temp, by="SA2_NAME11")
       
                }

        #replace NA with "absent"
        Aust@data[is.na(Aust@data)]<-"absent"
        #save(Aust, file = "Australia.Rdata")

Now we can make maps that show simulated maximum spread of these invasive pests.

        for (i in 1:length(species))
        {
                #file = name of the destinatin file
                file<-paste("www/",species[i],"_map",".png", sep="")
                png(file = file, bg = "white", width = 800, height=500)
                par(mar=c(0,0,0,0))
                print(qtm(shp = Aust, fill = species[i],borders = NULL))
                dev.off()
        }

An example of a pest ivasion map.

        img <- readPNG("www/Giant African snail_map.png")
        grid.raster(img)

Prepare inputs for the CEBRA app

Now we can make a table that will provide inputs for the app. We will record the population each pest affects, area of invasion, area of overlap with each each industry and main population centers (above certain density), proportion of value at risk for each industry, which is calculated by taking the [area where industry and pests overlap]/[total area of industry].

        #Probably should not be making extra copies of data without need
        x<-Aust@data
        #head(x)

        input_app<-matrix(NA,nrow = length(species), ncol = 7)
        
        for (i in 1:length(species))
                {
                        y<-subset(x, x[,species[i]]=="present" & x$Total_Area_Flowers > 0)
                        input_app[i,1]<-sum(y$Area_ha)

                        y<-subset(x, x[,species[i]]=="present" & x$Total_Area_Crops > 0)
                        input_app[i,2]<-sum(y$Area_ha)

                        y<-subset(x, x[,species[i]]=="present" & x$Total_Area_Fruit > 0)
                        input_app[i,3]<-sum(y$Area_ha)

                        y<-subset(x, x[,species[i]]=="present" & x$Total_Area_Hay > 0)
                        input_app[i,4]<-sum(y$Area_ha)

                        y<-subset(x, x[,species[i]]=="present" & x$Total_Area_Livestock > 0)
                        input_app[i,5]<-sum(y$Area_ha)

                        y<-subset(x, x[,species[i]]=="present" & x$Total_Area_Veg > 0)
                        input_app[i,6]<-sum(y$Area_ha)

                        y<-subset(x, x[,species[i]]=="present")
                        input_app[i,7]<-sum(y$Population)
                }
        #tidy the dataframe
        colnames(input_app)<-c("Overlap_Flowers","Overlap_Crops",
                               "Overlap_Fruit","Overlap_Hay","Overlap_Livestock","Overlap_Veg","Overlap_Population_Numbers")
        input_app<-data.frame(input_app)
        input_app<-mutate(input_app, Species=species)
        input_app<-input_app[,c(8,1:7)]
        
        y<-subset(x, x$Total_Area_Flowers > 0)
        Total_Flowers<-sum(y$Area_ha)

        y<-subset(x, x$Total_Area_Crops > 0)
        Total_Crops<-sum(y$Area_ha)

        y<-subset(x,  x$Total_Area_Fruit > 0)
        Total_Fruit<-sum(y$Area_ha)

        y<-subset(x,  x$Total_Area_Hay > 0)
        Total_Hay<-sum(y$Area_ha)

        y<-subset(x,  x$Total_Area_Livestock > 0)
        Total_Livestock<-sum(y$Area_ha)

        y<-subset(x,  x$Total_Area_Veg > 0)
        Total_Veg <-sum(y$Area_ha)

        Total_Population <-sum(x$Population)

        input_app<-mutate(input_app, Prop_Flowers = Overlap_Flowers/Total_Flowers)
        input_app<-mutate(input_app, Prop_Fruit = Overlap_Fruit/Total_Fruit)
        input_app<-mutate(input_app, Prop_Veg = Overlap_Veg/Total_Veg)
        input_app<-mutate(input_app, Prop_Hay = Overlap_Hay/Total_Hay)
        input_app<-mutate(input_app, Prop_Crops = Overlap_Crops/Total_Crops)
        input_app<-mutate(input_app, Prop_Livestock = Overlap_Livestock/Total_Livestock)
        input_app<-mutate(input_app, Prop_Population = Overlap_Population_Numbers/Total_Population)

Include economic data

The data on economic value comes from Australian Bureau of Statistics.

‘VALUE OF AGRICULTURAL COMMODITIES PRODUCED, Australia, year ended 30 June 2016’, in aggregate, are used and added to the inputs.

        #Values from the website linked above, in millions of Australian dollars
        Value_Flowers<-1296.3
        #Wheat, Oates,Barley, Sorghum,and Rice 
        Value_Crops<-6170.1+397.9+2276.6+491.6+114.8
        #Fruit and grapes
        Value_Fruit<-4224.6+1333.9
        #http://www.abs.gov.au/ausstats/abs@.nsf/Lookup/by%20Subject/1301.0~2012~Main%20Features~Agricultural%20production~260
        Value_Hay<-1600
        #Slaughtering and products
        Value_Livestock<-20622.5+8029.9
        
        Value_Veg <-3585.4
        
        input_app<-mutate(input_app, Econ_Flowers= Prop_Flowers*Value_Flowers)
        input_app<-mutate(input_app, Econ_Fruit =Prop_Fruit*Value_Fruit)
        input_app<-mutate(input_app, Econ_Veg = Prop_Veg*Value_Veg)
        input_app<-mutate(input_app, Econ_Hay =Prop_Hay*Value_Hay)
        input_app<-mutate(input_app, Econ_Crops =Prop_Crops*Value_Crops)
        input_app<-mutate(input_app, Econ_Livestock = Prop_Livestock*Value_Livestock)

Include default values that can be altered in the app

By looking at the proportion of overlap between species distribution and key industries, we have arrived at values at risk. However, it is unlikely that the pest will cause a 100% damage whenever it comes in contact with a commodity. In the app the extent of damage for each species and each industry can be modified.

Read default values from an ‘Impact_Species_Host’ file.

Store “Input_App.csv” in the ‘data’ directory for use by the app, this file now contains default values for monetary and non-monetary damages, management utility, time and unweighted impact scores that produce default ranking.

        file<-"Impact_Species_Host.xls"
        #Read the file
        impacts <- read_excel(file, sheet="Impacts", skip=0)
        
        input_app<-merge(input_app, impacts, by="Species")
        
        #Add a column with total monetary damage
        input_app<-mutate(input_app, Monetary_Damage= (Econ_Flowers*Impact_Flowers+
                                                Econ_Fruit*Impact_Fruit+
                                                Econ_Veg*Impact_Veg+
                                                Econ_Hay*Impact_Hay+
                                                Econ_Crops*Impact_Crops+
                                                Econ_Livestock*Impact_Livestock))
                                               
        input_app<-mutate(input_app, Man_Utility = 4.9)
        
        input_app<-mutate(input_app, Unweighted_Score = (Monetary_Damage/max(Monetary_Damage)+
                                                Amenity/max(Amenity)+
                                                Environment/max(Environment)+                 
                                                Man_Utility/max(Man_Utility)+
                                                (1-Time/max(Time))))                     
                                                
        #Generate random likelihood of entry, spread and establishment 
        set.seed(347)
        chance<-rnorm(20,0.6, 0.2)
        social<-round(rnorm(20,3, 1))+1
        
        input_app<-mutate(input_app, Likelihood = chance) 
        #Generate random community impact scores
        input_app<-mutate(input_app, Community = social)                                                                 
        
        write.csv(input_app, "data/Input_App.csv") 
        
        #Look at the names of the top 10 largest by utility
        input_app[order(-input_app$Unweighted_Score),][1:10,][,c("Species", "Monetary_Damage", "Unweighted_Score")]
##                Species Monetary_Damage Unweighted_Score
## 17          Tramp ants       7879.6000         4.700000
## 12          Leaf miner       4458.5432         4.065834
## 10         Karnal bunt       4247.5186         3.439053
## 11       Khapra beetle       1890.2000         3.339885
## 5     Exotic fruit fly       1659.3214         3.310584
## 4          Exotic bees       2513.0604         3.218932
## 2           Bees mites        551.3355         3.169970
## 8          Gypsy moths       5014.3600         3.136372
## 9        Huanglongbing       1434.0852         3.082000
## 15 Russian wheat aphid       6044.5495         3.067114
        #Look at the names of the top 10 largest by monetary damage
        input_app[order(-input_app$Monetary_Damage),][1:10,][,c("Species", "Monetary_Damage", "Unweighted_Score")]
##                Species Monetary_Damage Unweighted_Score
## 17          Tramp ants        7879.600         4.700000
## 15 Russian wheat aphid        6044.550         3.067114
## 8          Gypsy moths        5014.360         3.136372
## 12          Leaf miner        4458.543         4.065834
## 10         Karnal bunt        4247.519         3.439053
## 4          Exotic bees        2513.060         3.218932
## 11       Khapra beetle        1890.200         3.339885
## 18                Ug99        1872.849         2.537683
## 5     Exotic fruit fly        1659.321         3.310584
## 9        Huanglongbing        1434.085         3.082000

Alternative visualisations

In addition to ranking, it is possible to visualise relative risks in two dimensions: impact uncertainty (randomness comes from uncertainty in entry, spread, and establishment) and management uncertainty (uncertainty distribution is derived from expert beliefs about possibilities of eradication).

        theme_set(theme_classic())
        mon_damage<-input_app$Monetary_Damage[input_app$Species=="Xylella"]
        risk_dist<-mon_damage*rnorm(10000,mean=input_app$Likelihood[input_app$Species=="Xylella"], sd = 0.1)
        management_unc<-rnorm(10000, 0.6, 0.05)

        df<-data.frame(score=risk_dist,management=management_unc, species="Xylella")

        mon_damage<-input_app$Monetary_Damage[input_app$Species=="Ug99"]
        risk_dist<-mon_damage*rnorm(10000,mean=input_app$Likelihood[input_app$Species=="Ug99"], sd = 0.1)
        management_unc<-rnorm(10000, 0.4, 0.1)

        df <- rbind(df, data.frame(score=risk_dist,management=management_unc, species="Ug99"))

        p <- ggplot(data=df, aes(x=score, y=management,colour=species)) + geom_point(size=0.5, alpha=.6) +
                stat_ellipse(data=df, aes(x=score, y=management,colour=species, fill=factor(df$species)), size=1,
                     geom="polygon",level=0.95,alpha=0.4)+
                xlab("Monetary damage uncertainty")+ylab("Management uncertainty")+
                scale_fill_manual(values=c("orange","blue"), name="95% density", 
                          labels=unique(df$species))
        p