This task was extremely hard, as I could only identify bottom 20 postcodes and top 20 postcodes, but due to duplicates I could then only explore the bottom class of immunisation % coverage (<70% and >95%) and the top class.
#load libraries
library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(reshape2)
I used the merge file containing, PHN, postcode, immun%coverage, SEIFA data and electoral data. (IE, there were no demographics or taxable income data in this file)
#import newly merged dataset
immunisations_merged <- read.csv("cleaned_data/immunization_with_everything.csv")
str(immunisations_merged)
## 'data.frame': 191163 obs. of 46 variables:
## $ X.1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ postcode : int 800 800 800 800 800 800 800 800 800 800 ...
## $ X : int 163657 132679 132628 132622 132677 132621 163658 132678 163662 163663 ...
## $ state : Factor w/ 12 levels "ACT","NSW","NSW/ACT",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ year : int 2016 2011 2014 2012 2011 2012 2016 2011 2016 2016 ...
## $ age : int 5 1 1 1 1 1 2 1 1 1 ...
## $ pc_immun : Factor w/ 9 levels "<70.0","70.0-74.9",..: 6 6 7 7 6 7 3 6 6 6 ...
## $ caution : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pc_immun_class : int 6 6 7 7 6 7 3 6 6 6 ...
## $ PHN_code : Factor w/ 31 levels "PHN101","PHN102",..: 30 30 30 30 30 30 30 30 30 30 ...
## $ PHN_number : int 701 701 701 701 701 701 701 701 701 701 ...
## $ Index.type : Factor w/ 4 levels "Index of Economic Resources",..: 4 4 2 3 1 1 3 3 3 2 ...
## $ Time : int 2016 2011 2011 2011 2011 2011 2016 2011 2016 2016 ...
## $ Maximum.score.for.SA1s.in.area : int NA 1127 1167 1144 986 986 1167 1144 1167 1163 ...
## $ Minimum.score.for.SA1s.in.area : int 842 1032 1066 1051 927 927 914 1051 914 1023 ...
## $ Rank.within.Australia : int 2243 2053 2089 2136 501 501 2398 2136 2398 2287 ...
## $ Rank.within.Australia...Decile : int NA 9 9 9 3 3 10 9 10 9 ...
## $ Rank.within.Australia...Percentile : int 86 83 85 87 21 21 92 87 92 87 ...
## $ Rank.within.State.or.Territory : int 28 25 28 26 15 15 33 26 33 33 ...
## $ Rank.within.State.or.Territory...Decile : int 8 9 10 9 6 6 10 9 10 10 ...
## $ Rank.within.State.or.Territory...Percentile: int NA NA NA NA NA NA 92 NA 92 92 ...
## $ Score : int 1066 1060 1077 1072 952 952 1096 1072 1096 1089 ...
## $ Usual.resident.population : int 6464 0 0 4564 0 0 6464 4564 6464 6464 ...
## $ Electoral.division : Factor w/ 150 levels "Adelaide","Aston",..: 136 136 136 136 136 136 136 136 136 136 ...
## $ Per.cent.postcode.in.electorate : num 100 100 100 100 100 100 100 100 100 100 ...
## $ DivisionID2016 : int 307 307 307 307 307 307 307 307 307 307 ...
## $ StateAB2016 : Factor w/ 8 levels "ACT","NSW","NT",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ CandidateID2016 : int 28737 28737 28737 28737 28737 28737 28737 28737 28737 28737 ...
## $ GivenNm2016 : Factor w/ 97 levels "Adam","Alan",..: 56 56 56 56 56 56 56 56 56 56 ...
## $ Surname2016 : Factor w/ 135 levels "ABBOTT","ALBANESE",..: 51 51 51 51 51 51 51 51 51 51 ...
## $ PartyNm2016 : Factor w/ 8 levels "Australian Labor Party",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PartyAb2016 : Factor w/ 8 levels "ALP","GRN","IND",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ DivisionID2010 : int 307 307 307 307 307 307 307 307 307 307 ...
## $ StateAb2010 : Factor w/ 8 levels "ACT","NSW","NT",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ CandidateID2010 : int 21477 21477 21477 21477 21477 21477 21477 21477 21477 21477 ...
## $ GivenNm2010 : Factor w/ 108 levels "Adam","Alan",..: 74 74 74 74 74 74 74 74 74 74 ...
## $ Surname2010 : Factor w/ 137 levels "ABBOTT","ADAMS",..: 52 52 52 52 52 52 52 52 52 52 ...
## $ PartyNm2010 : Factor w/ 7 levels "Australian Labor Party",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ PartyAb2010 : Factor w/ 7 levels "ALP","CLP","GRN",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ DivisionID2013 : int 307 307 307 307 307 307 307 307 307 307 ...
## $ StateAb2013 : Factor w/ 8 levels "ACT","NSW","NT",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ CandidateID2013 : int 23502 23502 23502 23502 23502 23502 23502 23502 23502 23502 ...
## $ GivenNm2013 : Factor w/ 106 levels "Adam","Alan",..: 76 76 76 76 76 76 76 76 76 76 ...
## $ Surname2013 : Factor w/ 140 levels "ABBOTT","ALBANESE",..: 48 48 48 48 48 48 48 48 48 48 ...
## $ PartyNm2013 : Factor w/ 9 levels "Australian Labor Party",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ PartyAb2013 : Factor w/ 9 levels "ALP","CLP","GRN",..: 2 2 2 2 2 2 2 2 2 2 ...
#check to see how it really looks - it's a mix of long and wide data
summary(immunisations_merged)
## X.1 postcode X state
## Min. : 1 Min. : 800 Min. : 1 VIC :50376
## 1st Qu.: 47792 1st Qu.:2774 1st Qu.: 39310 NSW :49864
## Median : 95582 Median :3788 Median : 78666 QLD :33463
## Mean : 95582 Mean :4001 Mean : 80160 SA :22900
## 3rd Qu.:143372 3rd Qu.:5070 3rd Qu.:121512 WA :21948
## Max. :191163 Max. :7470 Max. :164021 TAS : 7856
## (Other): 4756
## year age pc_immun caution
## Min. :2011 Min. :1.000 NP :58819 Min. :0.0000
## 1st Qu.:2012 1st Qu.:1.000 92.5-94.9 :35652 1st Qu.:0.0000
## Median :2014 Median :2.000 90.0-92.4 :33580 Median :0.0000
## Mean :2014 Mean :2.671 85.0-89.9 :27468 Mean :0.2413
## 3rd Qu.:2015 3rd Qu.:5.000 95.0-100.0:27160 3rd Qu.:0.0000
## Max. :2016 Max. :5.000 80.0-84.9 : 5936 Max. :1.0000
## (Other) : 2548
## pc_immun_class PHN_code PHN_number
## Min. :0.000 PHN402 : 13028 Min. :101.0
## 1st Qu.:0.000 PHN503 : 11884 1st Qu.:110.0
## Median :6.000 PHN205 : 10888 Median :206.0
## Mean :4.372 PHN202 : 10092 Mean :280.8
## 3rd Qu.:7.000 PHN401 : 9872 3rd Qu.:401.0
## Max. :8.000 PHN206 : 9760 Max. :801.0
## (Other):125639
## Index.type
## Index of Economic Resources :47790
## Index of Education and Occupation :47790
## Index of Relative Socio-economic Advantage and Disadvantage:47790
## Index of Relative Socio-economic Disadvantage :47793
##
##
##
## Time Maximum.score.for.SA1s.in.area
## Min. :2011 Min. : 506
## 1st Qu.:2011 1st Qu.:1030
## Median :2011 Median :1080
## Mean :2012 Mean :1077
## 3rd Qu.:2011 3rd Qu.:1129
## Max. :2016 Max. :1375
## NA's :3 NA's :4826
## Minimum.score.for.SA1s.in.area Rank.within.Australia
## Min. : 121.0 Min. : 1
## 1st Qu.: 818.0 1st Qu.: 651
## Median : 897.0 Median :1288
## Mean : 882.9 Mean :1276
## 3rd Qu.: 969.0 3rd Qu.:1906
## Max. :1226.0 Max. :2630
## NA's :4978 NA's :4812
## Rank.within.Australia...Decile Rank.within.Australia...Percentile
## Min. : 1.000 Min. : 1.00
## 1st Qu.: 3.000 1st Qu.: 27.00
## Median : 6.000 Median : 52.00
## Mean : 5.592 Mean : 51.45
## 3rd Qu.: 8.000 3rd Qu.: 77.00
## Max. :10.000 Max. :100.00
## NA's :4777 NA's :4782
## Rank.within.State.or.Territory Rank.within.State.or.Territory...Decile
## Min. : 1.0 Min. : 1.000
## 1st Qu.:102.0 1st Qu.: 3.000
## Median :230.0 Median : 6.000
## Mean :250.7 Mean : 5.615
## 3rd Qu.:370.0 3rd Qu.: 8.000
## Max. :694.0 Max. :10.000
## NA's :4797 NA's :4838
## Rank.within.State.or.Territory...Percentile Score
## Min. : 1.00 Min. : 506.0
## 1st Qu.: 27.00 1st Qu.: 947.0
## Median : 52.00 Median : 993.0
## Mean : 51.65 Mean : 993.9
## 3rd Qu.: 77.00 3rd Qu.:1043.0
## Max. :100.00 Max. :1234.0
## NA's :8123 NA's :4718
## Usual.resident.population Electoral.division
## Min. : 0 O'Connor: 5108
## 1st Qu.: 0 Maranoa : 4959
## Median : 0 Grey : 4852
## Mean : 4140 Mallee : 3892
## 3rd Qu.: 2112 Wannon : 3692
## Max. :106745 Barker : 3656
## NA's :4829 (Other) :165004
## Per.cent.postcode.in.electorate DivisionID2016 StateAB2016
## Min. : 0.1 Min. :101.0 VIC :50316
## 1st Qu.: 91.9 1st Qu.:158.0 NSW :44164
## Median :100.0 Median :190.0 QLD :32499
## Mean : 84.8 Mean :190.7 SA :22972
## 3rd Qu.:100.0 3rd Qu.:226.0 WA :18596
## Max. :100.0 Max. :310.0 (Other):10940
## NA's :11676 NA's :11676
## CandidateID2016 GivenNm2016 Surname2016
## Min. :20881 Andrew : 9896 WILSON : 6580
## 1st Qu.:28317 Tony : 7744 MITCHELL : 6576
## Median :28777 David : 6919 LITTLEPROUD: 4959
## Mean :28620 Rick : 5108 RAMSEY : 4852
## 3rd Qu.:28997 Rowan : 4852 BROAD : 3892
## Max. :29576 (Other):144968 (Other) :152628
## NA's :11676 NA's : 11676 NA's : 11676
## PartyNm2016 PartyAb2016 DivisionID2010
## Australian Labor Party:65980 ALP :65980 Min. :101.0
## Liberal :61084 LP :61084 1st Qu.:156.0
## Liberal National Party:21935 LNP :21935 Median :189.0
## The Nationals :19584 NP :19584 Mean :189.5
## Independent : 3864 IND : 3864 3rd Qu.:226.0
## (Other) : 7040 (Other): 7040 Max. :310.0
## NA's :11676 NA's :11676 NA's :8908
## StateAb2010 CandidateID2010 GivenNm2010 Surname2010
## VIC :50316 Min. :13270 Tony : 11012 CROOK : 5108
## NSW :45740 1st Qu.:20876 John : 6780 SCOTT : 4959
## QLD :32499 Median :21088 Bruce : 5535 RAMSEY : 4852
## SA :22972 Mean :20988 Darren : 4940 FORREST: 3892
## WA :18596 3rd Qu.:21416 Andrew : 4908 TEHAN : 3692
## (Other):12132 Max. :21912 (Other):149080 (Other):159752
## NA's : 8908 NA's :8908 NA's : 8908 NA's : 8908
## PartyNm2010 PartyAb2010
## Australian Labor Party :73548 ALP :73548
## Liberal :61948 LP :61948
## Liberal National Party of Queensland:21135 LNQ :21135
## The Nationals :17868 NP :17868
## Independent : 5912 IND : 5912
## (Other) : 1844 (Other): 1844
## NA's : 8908 NA's : 8908
## DivisionID2013 StateAb2013 CandidateID2013 GivenNm2013
## Min. :101.0 VIC :50316 Min. :20881 Andrew : 10772
## 1st Qu.:156.0 NSW :45740 1st Qu.:23312 Tony : 7744
## Median :189.0 QLD :32499 Median :23656 Bruce : 5535
## Mean :189.5 SA :22972 Mean :23675 Rick : 5108
## 3rd Qu.:226.0 WA :18596 3rd Qu.:23903 Rowan : 4852
## Max. :310.0 (Other):12132 Max. :24739 (Other):148244
## NA's :8908 NA's : 8908 NA's :8908 NA's : 8908
## Surname2013 PartyNm2013 PartyAb2013
## SCOTT : 5671 Liberal :80548 LP :80548
## WILSON : 5108 Australian Labor Party:53352 ALP :53352
## RAMSEY : 4852 Liberal National Party:22667 LNP :22667
## BROAD : 3892 The Nationals :16568 NP :16568
## TEHAN : 3692 Independent : 3864 IND : 3864
## (Other):159040 (Other) : 5256 (Other): 5256
## NA's : 8908 NA's : 8908 NA's : 8908
# Clean data using function
# Parameters
# * imm_merged - the source data frame we want to clean
# Returns - a data frame
data <- immunisations_merged %>%
select('state', 'postcode', 'year', 'age', 'pc_immun', 'caution', 'pc_immun_class','PHN_code', 'PHN_number','Usual.resident.population','Index.type','Score', 'Electoral.division', 'PartyNm2016', 'PartyNm2010', 'PartyNm2013')
data$age <- as.factor(data$age)
str(data)
## 'data.frame': 191163 obs. of 16 variables:
## $ state : Factor w/ 12 levels "ACT","NSW","NSW/ACT",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ postcode : int 800 800 800 800 800 800 800 800 800 800 ...
## $ year : int 2016 2011 2014 2012 2011 2012 2016 2011 2016 2016 ...
## $ age : Factor w/ 3 levels "1","2","5": 3 1 1 1 1 1 2 1 1 1 ...
## $ pc_immun : Factor w/ 9 levels "<70.0","70.0-74.9",..: 6 6 7 7 6 7 3 6 6 6 ...
## $ caution : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pc_immun_class : int 6 6 7 7 6 7 3 6 6 6 ...
## $ PHN_code : Factor w/ 31 levels "PHN101","PHN102",..: 30 30 30 30 30 30 30 30 30 30 ...
## $ PHN_number : int 701 701 701 701 701 701 701 701 701 701 ...
## $ Usual.resident.population: int 6464 0 0 4564 0 0 6464 4564 6464 6464 ...
## $ Index.type : Factor w/ 4 levels "Index of Economic Resources",..: 4 4 2 3 1 1 3 3 3 2 ...
## $ Score : int 1066 1060 1077 1072 952 952 1096 1072 1096 1089 ...
## $ Electoral.division : Factor w/ 150 levels "Adelaide","Aston",..: 136 136 136 136 136 136 136 136 136 136 ...
## $ PartyNm2016 : Factor w/ 8 levels "Australian Labor Party",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PartyNm2010 : Factor w/ 7 levels "Australian Labor Party",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ PartyNm2013 : Factor w/ 9 levels "Australian Labor Party",..: 2 2 2 2 2 2 2 2 2 2 ...
#Manipulate the data to get distinct PC Immun groups
pc_immun_groups <- data %>%
group_by(pc_immun, pc_immun_class) %>%
summarize(count = n())
pc_immun_groups
## # A tibble: 9 x 3
## # Groups: pc_immun [?]
## pc_immun pc_immun_class count
## <fct> <int> <int>
## 1 <70.0 1 332
## 2 70.0-74.9 2 612
## 3 75.0-79.9 3 1604
## 4 80.0-84.9 4 5936
## 5 85.0-89.9 5 27468
## 6 90.0-92.4 6 33580
## 7 92.5-94.9 7 35652
## 8 95.0-100.0 8 27160
## 9 NP 0 58819
#Manipulate the data to get distinct age groups
age_groups <- data %>%
group_by(age) %>%
summarize(count = n())
age_groups
## # A tibble: 3 x 2
## age count
## <fct> <int>
## 1 1 63549
## 2 2 63685
## 3 5 63929
##creating bottom % immunisation coverage in 2016 to group, summarise and arrange
low_pc_immun <- data %>%
filter(pc_immun_class < 3, pc_immun_class != 0, Index.type == "Index of Relative Socio-economic Advantage and Disadvantage", year == 2016) %>%
group_by(postcode, PHN_code, state, pc_immun, pc_immun_class, Usual.resident.population) %>%
summarize(count = n()) %>%
arrange(pc_immun_class, desc(Usual.resident.population))
low_pc_immun
## # A tibble: 25 x 7
## # Groups: postcode, PHN_code, state, pc_immun, pc_immun_class [25]
## postcode PHN_code state pc_immun pc_immun_class Usual.resident.p… count
## <int> <fct> <fct> <fct> <int> <int> <int>
## 1 2000 PHN101 NSW <70.0 1 27411 1
## 2 2481 PHN109 NSW <70.0 1 11769 1
## 3 2483 PHN109 NSW <70.0 1 10609 1
## 4 4552 PHN306 QLD <70.0 1 8432 1
## 5 4102 PHN302 QLD <70.0 1 7660 1
## 6 2454 PHN109 NSW <70.0 1 7570 1
## 7 2482 PHN109 NSW <70.0 1 6280 3
## 8 6083 PHN501 WA <70.0 1 3512 1
## 9 6073 PHN501 WA <70.0 1 3041 1
## 10 2775 PHN104 NSW <70.0 1 1498 3
## # ... with 15 more rows
View(low_pc_immun)
bottom_20_postocdes <- head(low_pc_immun, 20)
bottom_20_postocdes
## # A tibble: 20 x 7
## # Groups: postcode, PHN_code, state, pc_immun, pc_immun_class [20]
## postcode PHN_code state pc_immun pc_immun_class Usual.resident.… count
## <int> <fct> <fct> <fct> <int> <int> <int>
## 1 2000 PHN101 NSW <70.0 1 27411 1
## 2 2481 PHN109 NSW <70.0 1 11769 1
## 3 2483 PHN109 NSW <70.0 1 10609 1
## 4 4552 PHN306 QLD <70.0 1 8432 1
## 5 4102 PHN302 QLD <70.0 1 7660 1
## 6 2454 PHN109 NSW <70.0 1 7570 1
## 7 2482 PHN109 NSW <70.0 1 6280 3
## 8 6083 PHN501 WA <70.0 1 3512 1
## 9 6073 PHN501 WA <70.0 1 3041 1
## 10 2775 PHN104 NSW <70.0 1 1498 3
## 11 3000 PHN201 VIC 70.0-74.9 2 37975 2
## 12 3006 PHN201 VIC 70.0-74.9 2 18808 1
## 13 5000 PHN401 SA 70.0-74.9 2 15115 1
## 14 2481 PHN109 NSW 70.0-74.9 2 11769 2
## 15 2483 PHN109 NSW 70.0-74.9 2 10609 2
## 16 4552 PHN306 QLD 70.0-74.9 2 8432 1
## 17 2454 PHN109 NSW 70.0-74.9 2 7570 1
## 18 6333 PHN503 WA 70.0-74.9 2 5845 1
## 19 6005 PHN501 WA 70.0-74.9 2 5624 2
## 20 2479 PHN109 NSW 70.0-74.9 2 4730 1
#2016 results
#order and rank the top 20 in 2016
pc_immun_ordered <- data %>%
group_by(postcode, PHN_code, state, pc_immun, pc_immun_class, Usual.resident.population) %>%
filter(pc_immun_class != 0, Index.type == "Index of Relative Socio-economic Advantage and Disadvantage", year == 2016) %>%
summarize(count = n()) %>%
arrange(desc(pc_immun_class), desc(Usual.resident.population))
pc_immun_ordered
## # A tibble: 3,213 x 7
## # Groups: postcode, PHN_code, state, pc_immun, pc_immun_class [3,213]
## postcode PHN_code state pc_immun pc_immun_class Usual.resident.… count
## <int> <fct> <fct> <fct> <int> <int> <int>
## 1 4350 PHN304 QLD 95.0-100… 8 106745 1
## 2 3977 PHN203 VIC 95.0-100… 8 93326 8
## 3 4740 PHN307 QLD 95.0-100… 8 80089 2
## 4 4670 PHN306 QLD 95.0-100… 8 79606 2
## 5 4305 PHN304 QLD 95.0-100… 8 62549 4
## 6 6065 PHN501 WA 95.0-100… 8 61998 2
## 7 2259 PHN108 NSW 95.0-100… 8 59513 6
## 8 3350 PHN206 VIC 95.0-100… 8 59145 1
## 9 2650 PHN110 NSW 95.0-100… 8 56974 2
## 10 4551 PHN306 QLD 95.0-100… 8 56733 2
## # ... with 3,203 more rows
View(pc_immun_ordered)
top_20_postocdes <- head(pc_immun_ordered, 20)
top_20_postocdes
## # A tibble: 20 x 7
## # Groups: postcode, PHN_code, state, pc_immun, pc_immun_class [20]
## postcode PHN_code state pc_immun pc_immun_class Usual.resident.… count
## <int> <fct> <fct> <fct> <int> <int> <int>
## 1 4350 PHN304 QLD 95.0-10… 8 106745 1
## 2 3977 PHN203 VIC 95.0-10… 8 93326 8
## 3 4740 PHN307 QLD 95.0-10… 8 80089 2
## 4 4670 PHN306 QLD 95.0-10… 8 79606 2
## 5 4305 PHN304 QLD 95.0-10… 8 62549 4
## 6 6065 PHN501 WA 95.0-10… 8 61998 2
## 7 2259 PHN108 NSW 95.0-10… 8 59513 6
## 8 3350 PHN206 VIC 95.0-10… 8 59145 1
## 9 2650 PHN110 NSW 95.0-10… 8 56974 2
## 10 4551 PHN306 QLD 95.0-10… 8 56733 2
## 11 3216 PHN206 VIC 95.0-10… 8 56284 1
## 12 4300 PHN304 QLD 95.0-10… 8 55669 2
## 13 2155 PHN103 NSW 95.0-10… 8 55452 2
## 14 4680 PHN306 QLD 95.0-10… 8 54166 1
## 15 2261 PHN108 NSW 95.0-10… 8 52754 2
## 16 3037 PHN201 VIC 95.0-10… 8 51155 2
## 17 2340 PHN108 NSW 95.0-10… 8 49072 4
## 18 3810 PHN203 VIC 95.0-10… 8 47894 2
## 19 2444 PHN109 NSW 95.0-10… 8 46284 2
## 20 2540 PHN106 NSW/A… 95.0-10… 8 45121 4
bottom_20_postocdes <- tail(pc_immun_ordered, 20)
arrange(bottom_20_postocdes, pc_immun_class, desc(Usual.resident.population))
## # A tibble: 20 x 7
## # Groups: postcode, PHN_code, state, pc_immun, pc_immun_class [20]
## postcode PHN_code state pc_immun pc_immun_class Usual.resident.… count
## <int> <fct> <fct> <fct> <int> <int> <int>
## 1 2000 PHN101 NSW <70.0 1 27411 1
## 2 2481 PHN109 NSW <70.0 1 11769 1
## 3 2483 PHN109 NSW <70.0 1 10609 1
## 4 4552 PHN306 QLD <70.0 1 8432 1
## 5 4102 PHN302 QLD <70.0 1 7660 1
## 6 2454 PHN109 NSW <70.0 1 7570 1
## 7 2482 PHN109 NSW <70.0 1 6280 3
## 8 6083 PHN501 WA <70.0 1 3512 1
## 9 6073 PHN501 WA <70.0 1 3041 1
## 10 2775 PHN104 NSW <70.0 1 1498 3
## 11 4552 PHN306 QLD 70.0-74.9 2 8432 1
## 12 2454 PHN109 NSW 70.0-74.9 2 7570 1
## 13 6333 PHN503 WA 70.0-74.9 2 5845 1
## 14 6005 PHN501 WA 70.0-74.9 2 5624 2
## 15 2479 PHN109 NSW 70.0-74.9 2 4730 1
## 16 3461 PHN206 VIC 70.0-74.9 2 4149 1
## 17 2469 PHN109 NSW 70.0-74.9 2 4012 2
## 18 5172 PHN401 SA 70.0-74.9 2 3932 2
## 19 4677 PHN306 QLD 70.0-74.9 2 3549 1
## 20 2848 PHN107 NSW 70.0-74.9 2 1645 1
#filter and explore the 2011 data
mean_immun_rate_aust_2011 <- data %>%
filter(year == 2011, pc_immun_class != 0)
mean(mean_immun_rate_aust_2011$pc_immun_class)
## [1] 6.271792
#filter, group and summarise the 2011 data by state
mean_immun_rate_state_2011 <- data %>%
filter(year == 2011) %>%
group_by(state) %>%
summarize(
mean = ifelse(sum(pc_immun_class[pc_immun_class != 0]) > 0, mean(pc_immun_class[pc_immun_class != 0]), 0),
rounded_mean = round(as.numeric(mean)),
pc_immun = pc_immun_groups$pc_immun[pc_immun_groups$pc_immun_class == rounded_mean],
total_rows = n(),
num_of_non_NP = sum(pc_immun_class != 0),
num_of_NP = sum(pc_immun_class == 0),
proportion_of_NP = (num_of_NP/n()))
mean_immun_rate_state_2011
## # A tibble: 8 x 8
## state mean rounded_mean pc_immun total_rows num_of_non_NP num_of_NP
## <fct> <dbl> <dbl> <fct> <int> <int> <int>
## 1 ACT 6.38 6 90.0-92… 316 316 0
## 2 NSW 6.28 6 90.0-92… 6920 6920 0
## 3 NT 6.71 7 92.5-94… 220 220 0
## 4 QLD 6.28 6 90.0-92… 4268 4268 0
## 5 SA 6.03 6 90.0-92… 2252 2252 0
## 6 TAS 6.53 7 92.5-94… 772 772 0
## 7 VIC 6.56 7 92.5-94… 5308 5308 0
## 8 WA 5.61 6 90.0-92… 2108 2108 0
## # ... with 1 more variable: proportion_of_NP <dbl>
mean_immun_rate_aust_2016 <- data %>%
filter(year == 2016, pc_immun_class != 0)
mean(mean_immun_rate_aust_2016$pc_immun_class)
## [1] 6.63243
#filter, group and summarise the 2016 data by state
mean_immun_rate_state_2016 <- data %>%
filter(year == 2016) %>%
group_by(state) %>%
summarize(
mean = ifelse(sum(pc_immun_class[pc_immun_class != 0]) > 0, mean(pc_immun_class[pc_immun_class != 0]), 0),
rounded_mean = round(as.numeric(mean)),
pc_immun = pc_immun_groups$pc_immun[pc_immun_groups$pc_immun_class == rounded_mean],
total_rows = n(),
num_of_non_NP = sum(pc_immun_class != 0),
num_of_NP = sum(pc_immun_class == 0),
proportion_of_NP = (num_of_NP/n()))
mean_immun_rate_state_2016
## # A tibble: 12 x 8
## state mean rounded_mean pc_immun total_rows num_of_non_NP num_of_NP
## <fct> <dbl> <dbl> <fct> <int> <int> <int>
## 1 ACT 6.64 7 92.5-94… 312 280 32
## 2 NSW 6.58 7 92.5-94… 8648 6800 1848
## 3 NSW/… 7.52 8 95.0-10… 108 84 24
## 4 NSW/… 0 0 NP 48 0 48
## 5 NSW/… 7 7 92.5-94… 24 24 0
## 6 NT 6.22 6 90.0-92… 324 180 144
## 7 QLD 6.82 7 92.5-94… 5919 4180 1739
## 8 SA 6.61 7 92.5-94… 4232 2236 1996
## 9 SA/W… 5.67 6 90.0-92… 24 24 0
## 10 TAS 6.82 7 92.5-94… 1440 704 736
## 11 VIC 6.76 7 92.5-94… 9176 5364 3812
## 12 WA 6.10 6 90.0-92… 4072 2128 1944
## # ... with 1 more variable: proportion_of_NP <dbl>
View(mean_immun_rate_state_2016)
#Get info by electorate for 2016
electorates <- data %>%
filter(Index.type == "Index of Relative Socio-economic Advantage and Disadvantage") %>%
group_by(Electoral.division, PartyNm2016, year) %>%
summarize(
mean = ifelse(sum(pc_immun_class[pc_immun_class != 0]) > 0, mean(pc_immun_class[pc_immun_class != 0]), 0),
rounded_mean = round(as.numeric(mean)),
pc_immun = pc_immun_groups$pc_immun[pc_immun_groups$pc_immun_class == rounded_mean],
total_rows = n(),
num_of_non_NP = sum(pc_immun_class != 0),
num_of_NP = sum(pc_immun_class == 0),
proportion_of_NP = (num_of_NP/n())) %>%
arrange(mean)
Get the electorates with the lowest 10 mean pc_immun_class scores for 2016
lowest_pc_immun <- head(filter(electorates, year == 2016), 10)
lowest_pc_immun
## # A tibble: 10 x 10
## # Groups: Electoral.division, PartyNm2016 [10]
## Electoral.divis… PartyNm2016 year mean rounded_mean pc_immun
## <fct> <fct> <int> <dbl> <dbl> <fct>
## 1 Richmond Australian… 2016 4.38 4 80.0-84…
## 2 Fairfax Liberal Na… 2016 5.26 5 85.0-89…
## 3 Wentworth Liberal 2016 5.36 5 85.0-89…
## 4 Curtin Liberal 2016 5.56 6 90.0-92…
## 5 Reid Liberal 2016 5.61 6 90.0-92…
## 6 Swan Liberal 2016 5.63 6 90.0-92…
## 7 Perth Australian… 2016 5.65 6 90.0-92…
## 8 Cowper The Nation… 2016 5.68 6 90.0-92…
## 9 Fisher Liberal Na… 2016 5.69 6 90.0-92…
## 10 Fremantle Australian… 2016 5.70 6 90.0-92…
## # ... with 4 more variables: total_rows <int>, num_of_non_NP <int>,
## # num_of_NP <int>, proportion_of_NP <dbl>
Get those electorate names
lowest_pc_immun_names <- select(lowest_pc_immun, Electoral.division, PartyNm2016)
lowest_pc_immun_names <- as.character(lowest_pc_immun$Electoral.division)
Let’s look at postcodes in the electorates with lowest mean pc_immun_class scores
#Get postciode level data for the lowest10 electorates in 2016
lowest_electorates_2016 <- data %>%
filter(Electoral.division %in% lowest_pc_immun_names) %>%
select(state, year, Index.type, Electoral.division, postcode, age, pc_immun, pc_immun_class, PHN_code, Score)
ggplot(filter(lowest_electorates_2016, Index.type == "Index of Relative Socio-economic Disadvantage"), aes(age)) +
geom_point(aes(y = pc_immun, size=Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3, nrow = 5) +
labs(title="Relative Socio-economic Disadvantage Scores", subtitle = "Bottom 10 electorates in 2016: a retrospective")
#OK. Let's look at postcodes in those electorates
#Get info by electorate
ggplot(filter(lowest_electorates_2016, Index.type == "Index of Relative Socio-economic Advantage and Disadvantage"), aes(age)) +
geom_point(aes(y = pc_immun, size=Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3) +
labs(title="Relative Socio-economic Advantage and Disadvantage Scores", subtitle = "Bottom 10 electorates in 2016: a retrospective")
#OK. Let's look at postcodes in those electorates
#Get info by electorate
ggplot(filter(lowest_electorates_2016, Index.type == "Index of Economic Resources"), aes(age)) +
geom_point(aes(y = pc_immun, size=Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3) +
labs(title="Economic Resources Scores", subtitle = "Bottom 10 electorates in 2016: a retrospective")
#OK. Let's look at postcodes in those electorates
#Get info by electorate
ggplot(filter(lowest_electorates_2016, Index.type == "Index of Education and Occupation"), aes(age)) +
geom_point(aes(y = pc_immun, size=Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3) +
labs(title="Education and Occupation Scores", subtitle = "Bottom 10 electorates in 2016: a retrospective")
highest_pc_immun <- tail(filter(electorates, year == 2016), 10)
highest_pc_immun
## # A tibble: 10 x 10
## # Groups: Electoral.division, PartyNm2016 [10]
## Electoral.divis… PartyNm2016 year mean rounded_mean pc_immun
## <fct> <fct> <int> <dbl> <dbl> <fct>
## 1 Charlton <NA> 2016 7.28 7 92.5-94…
## 2 Herbert Australian… 2016 7.29 7 92.5-94…
## 3 Macarthur Australian… 2016 7.29 7 92.5-94…
## 4 Lilley Australian… 2016 7.30 7 92.5-94…
## 5 Dawson Liberal Na… 2016 7.34 7 92.5-94…
## 6 Riverina The Nation… 2016 7.38 7 92.5-94…
## 7 Wannon Liberal 2016 7.42 7 92.5-94…
## 8 Hunter Australian… 2016 7.42 7 92.5-94…
## 9 Hughes Liberal 2016 7.46 7 92.5-94…
## 10 Shortland Australian… 2016 7.78 8 95.0-10…
## # ... with 4 more variables: total_rows <int>, num_of_non_NP <int>,
## # num_of_NP <int>, proportion_of_NP <dbl>
highest_pc_immun_names <- select(highest_pc_immun, Electoral.division, PartyNm2016)
highest_pc_immun_names <- as.character(highest_pc_immun$Electoral.division)
#OK. Let's look at postcodes in those electorates
#Get info by electorate
#Get postcode level data for the highest 10 electorates in 2016
highest_electorates_2016 <- data %>%
filter(Electoral.division %in% highest_pc_immun_names) %>%
select(state, year, Electoral.division, Index.type, postcode, age, pc_immun, pc_immun_class, PHN_code, Score)
ggplot(filter(highest_electorates_2016, Index.type == "Index of Relative Socio-economic Disadvantage"), aes(age)) +
geom_point(aes(y = pc_immun, size=Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3) +
labs(title="Relative Socio-economic Disadvantage Score", subtitle = "Top 10 highest electorates in 2016: a retrospective")
#OK. Let's look at postcodes in those electorates
#Get info by electorate
ggplot(filter(highest_electorates_2016, Index.type == "Index of Relative Socio-economic Advantage and Disadvantage"), aes(age)) +
geom_point(aes(y = pc_immun, size=Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3) +
labs(title="Relative Socio-economic Advantage and Disadvantage Scores", subtitle = "Top 10 highest electorates in 2016: a retrospective")
#OK. Let's look at postcodes in those electorates
#Get info by electorate
ggplot(filter(highest_electorates_2016, Index.type == "Index of Economic Resources"), aes(age)) +
geom_point(aes(y = pc_immun, size=Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3) +
labs(title="Economic Resources Scores", subtitle = "Top 10 highest electorates in 2016: a retrospective")
ggplot(filter(highest_electorates_2016, Index.type == "Index of Education and Occupation"), aes(age)) +
geom_point(aes(y = pc_immun, size = Score, color = age), na.rm = TRUE) +
facet_wrap(~ year, ncol = 3) +
labs(title="Education and Occupation Scores", subtitle = "Top 10 highest electorates in 2016: a retrospective")
#Get info by 2016political party
party_2016 <- data %>%
group_by(PartyNm2016) %>%
summarize(
mean = ifelse(sum(pc_immun_class[pc_immun_class != 0]) > 0, mean(pc_immun_class[pc_immun_class != 0]), 0),
rounded_mean = round(as.numeric(mean)),
pc_immun = pc_immun_groups$pc_immun[pc_immun_groups$pc_immun_class == rounded_mean],
total_rows = n(),
num_of_non_NP = sum(pc_immun_class != 0),
num_of_NP = sum(pc_immun_class == 0),
proportion_of_NP = (num_of_NP/n()))
party_2016
## # A tibble: 9 x 8
## PartyNm2016 mean rounded_mean pc_immun total_rows num_of_non_NP
## <fct> <dbl> <dbl> <fct> <int> <int>
## 1 Australian… 6.27 6 90.0-92… 65980 54932
## 2 Independent 6.68 7 92.5-94… 3864 1636
## 3 Katter's A… 6.69 7 92.5-94… 2296 1528
## 4 Liberal 6.24 6 90.0-92… 61084 38884
## 5 Liberal Na… 6.37 6 90.0-92… 21935 15824
## 6 Nick Xenop… 5.74 6 90.0-92… 3464 1476
## 7 The Greens 5.85 6 90.0-92… 1280 1224
## 8 The Nation… 6.58 7 92.5-94… 19584 9024
## 9 <NA> 6.60 7 92.5-94… 11676 7816
## # ... with 2 more variables: num_of_NP <int>, proportion_of_NP <dbl>
#check for missing values
View(data)
#na_data <- data %>%
#summarize(
#state_na = count(is.na(state)),
#state_total = count(state),
#)
#na_data
missmap(data, main = "Missing values vs observed")
#read this to see if I should impute missing data https://www.linkedin.com/pulse/amelia-packager-missing-data-imputation-ramprakash-veluchamy/
#try plotting postcode to immun_class over the years - not very visual
ggplot(data = data, mapping = aes(x = postcode,y = pc_immun_class, color = year))+
geom_point()
# Get % immunised counts per pc immun class for each year
counts_by_year_merged<-data %>%
group_by(year, pc_immun_class) %>%
summarize(count = n())
counts_by_year_merged$pc_immun = NA
View(counts_by_year_merged)
Now try plots
#plot how % immunised has changed by category between 2011 to 2016 - I would like to not have the bottom counts so cluttered on this axis
ggplot(data = counts_by_year_merged) +
geom_point(mapping = aes(x=count, y=year, color=pc_immun_class))+
facet_wrap(~pc_immun, nrow = 2)