Description of the Dataset and Variables

2017 Australian Marraige Law

A postal survey was sent to Australians asking the question: Should the law be changed to allow same-sex couples to marry? The data was reported by the Australian Bureau of Statistics using Excel.

Variables

Divisions (Australian States/Territories)
Cities (within the Divisions)
Response_Clear_Yes: those who clearly selected Yes in support of the new marraige law
Response_Clear_No: those who clearly selected No to the new marraige law
Response_Not_Clear: those in which researchers could not determine which answer the respondent intended to select
Non_Response: those who did not return the survey or select any of the responses

Tidying and Cleaning the Data

Read the data into R

ausmarraige <- read_excel("../../DACSS601_R/Data/australian_marriage_law_postal_survey_2017_-_response_final.xls", "Table 2", skip=6)

## New names:
## * `` -> ...1
## * no. -> no....2
## * `%` -> `%...3`
## * no. -> no....4
## * `%` -> `%...5`
## * ...

head(ausmarraige, 10)

## # A tibble: 10 x 16
##    ...1   no....2 `%...3` no....4 `%...5` no....6 `%...7` ...8  no....9 `%...10`
##    <chr>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <lgl>   <dbl>    <dbl>
##  1 New S~      NA    NA        NA    NA        NA      NA NA         NA     NA  
##  2 Banks    37736    44.9   46343    55.1   84079     100 NA      84079     79.9
##  3 Barton   37153    43.6   47984    56.4   85137     100 NA      85137     77.8
##  4 Benne~   42943    49.8   43215    50.2   86158     100 NA      86158     81  
##  5 Berow~   48471    54.6   40369    45.4   88840     100 NA      88840     84.5
##  6 Blaxl~   20406    26.1   57926    73.9   78332     100 NA      78332     75  
##  7 Bradf~   53681    60.6   34927    39.4   88608     100 NA      88608     83.5
##  8 Calare   54091    60.2   35779    39.8   89870     100 NA      89870     77.8
##  9 Chifl~   32871    41.3   46702    58.7   79573     100 NA      79573     73.7
## 10 Cook     47505    55     38804    45     86309     100 NA      86309     82  
## # ... with 6 more variables: no....11 <dbl>, %...12 <dbl>, no....13 <dbl>,
## #   %...14 <dbl>, no....15 <dbl>, %...16 <dbl>

View column names

colnames(ausmarraige)

##  [1] "...1"     "no....2"  "%...3"    "no....4"  "%...5"    "no....6" 
##  [7] "%...7"    "...8"     "no....9"  "%...10"   "no....11" "%...12"  
## [13] "no....13" "%...14"   "no....15" "%...16"

Select the columns that we want to keep and rename them

ausmarraige1 <- select(ausmarraige, "...1", "no....2", "no....4", "no....11", "no....13")%>%
  rename(Cities=...1, Response_Clear_Yes=no....2, Response_Clear_No=no....4, Response_Not_Clear=no....11, Non_Response=no....13)%>%
  drop_na(Cities)

head(ausmarraige1)

## # A tibble: 6 x 5
##   Cities        Response_Clear_Y~ Response_Clear_~ Response_Not_Cl~ Non_Response
##   <chr>                     <dbl>            <dbl>            <dbl>        <dbl>
## 1 New South Wa~                NA               NA               NA           NA
## 2 Banks                     37736            46343              247        20928
## 3 Barton                    37153            47984              226        24008
## 4 Bennelong                 42943            43215              244        19973
## 5 Berowra                   48471            40369              212        16038
## 6 Blaxland                  20406            57926              220        25883

Remove division totals and footnotes

ausmarraige1 <- ausmarraige1 %>%
  filter(!str_detect(Cities, "Total"))%>%
  filter(!str_starts(Cities, "\\("))%>%
  filter(Cities != "Australia")%>%
  filter(!str_starts(Cities, "\\©"))

Create new column for the divisions

ausmarraige1 <- ausmarraige1 %>%
  mutate(Divisions = case_when(
    str_ends(Cities, "Divisions") ~ Cities
  ))
ausmarraige2 <- ausmarraige1[, c("Divisions", "Cities", "Response_Clear_Yes", "Response_Clear_No", "Response_Not_Clear", "Non_Response")]
head(ausmarraige2)

## # A tibble: 6 x 6
##   Divisions                 Cities Response_Clear_~ Response_Clear_~ Response_Not_Cl~
##   <chr>                     <chr>             <dbl>            <dbl>            <dbl>
## 1 New South Wales Divisions New S~               NA               NA               NA
## 2 <NA>                      Banks             37736            46343              247
## 3 <NA>                      Barton            37153            47984              226
## 4 <NA>                      Benne~            42943            43215              244
## 5 <NA>                      Berow~            48471            40369              212
## 6 <NA>                      Blaxl~            20406            57926              220
## # ... with 1 more variable: Non_Response <dbl>

Continue to fix the Divisions column using fill

ausmarraige2 <- fill(ausmarraige2, Divisions, .direction = c("down"))

Remove the Divisions from the Cities column using filter

ausmarraige2 <- filter(ausmarraige2, !str_detect(Cities, "Divisions"))
head(ausmarraige2)

## # A tibble: 6 x 6
##   Divisions                 Cities    Response_Clear_~ Response_Clear_~ Response_Not_Cl~
##   <chr>                     <chr>                <dbl>            <dbl>            <dbl>
## 1 New South Wales Divisions Banks                37736            46343              247
## 2 New South Wales Divisions Barton               37153            47984              226
## 3 New South Wales Divisions Bennelong            42943            43215              244
## 4 New South Wales Divisions Berowra              48471            40369              212
## 5 New South Wales Divisions Blaxland             20406            57926              220
## 6 New South Wales Divisions Bradfield            53681            34927              202
## # ... with 1 more variable: Non_Response <dbl>

write_csv(ausmarraige2, file = "../../DACSS601_R/Data/ausmar.csv", col_names = TRUE)

Create a column to show Totals for each City

austotals <- ausmarraige2 %>%
  mutate(Total=rowsum(Response_Clear_Yes, Response_Clear_No, Response_Not_Clear, Non_Response))

## Warning in if (reorder) ugroup <- sort(ugroup, na.last = TRUE, method =
## "quick"): the condition has length > 1 and only the first element will be used

head (austotals)

## # A tibble: 6 x 7
##   Divisions                 Cities    Response_Clear_~ Response_Clear_~ Response_Not_Cl~
##   <chr>                     <chr>                <dbl>            <dbl>            <dbl>
## 1 New South Wales Divisions Banks                37736            46343              247
## 2 New South Wales Divisions Barton               37153            47984              226
## 3 New South Wales Divisions Bennelong            42943            43215              244
## 4 New South Wales Divisions Berowra              48471            40369              212
## 5 New South Wales Divisions Blaxland             20406            57926              220
## 6 New South Wales Divisions Bradfield            53681            34927              202
## # ... with 2 more variables: Non_Response <dbl>, Total <dbl[,1]>

Summary Descriptives of the Variables

Divisions and Cities

Australia comprises of 6 states and 2 territories (thus, 8 Divisions). The number of Cities in each Division is displayed below:

select(ausmarraige2, Divisions)%>%
  table()

## .
## Australian Capital Territory Divisions              New South Wales Divisions 
##                                      2                                     47 
##           Northern Territory Divisions                   Queensland Divisions 
##                                      2                                     30 
##              South Australia Divisions                     Tasmania Divisions 
##                                     11                                      5 
##                     Victoria Divisions            Western Australia Divisions 
##                                     37                                     16

The total number of eligible participants, by division:

divtotals <- select(austotals, Divisions, Total) 
divtotals2 <- divtotals %>%
  pivot_wider(names_from = Divisions, values_from = Total, values_fn = sum)
head(divtotals2)

## # A tibble: 1 x 8
##   `New South Wales Divisions` `Victoria Divis~ `Queensland Div~ `South Australi~
##                         <dbl>            <dbl>            <dbl>            <dbl>
## 1                     2711012          2092928          1501247           527426
## # ... with 4 more variables: Western Australia Divisions <dbl>,
## #   Tasmania Divisions <dbl>, Northern Territory Divisions <dbl>,
## #   Australian Capital Territory Divisions <dbl>

A proportion table of the eligible participants based on Division:

prop.table(divtotals2)

##   New South Wales Divisions Victoria Divisions Queensland Divisions
## 1                 0.3467988          0.2677321            0.1920429
##   South Australia Divisions Western Australia Divisions Tasmania Divisions
## 1                0.06746953                   0.0914524         0.02110833
##   Northern Territory Divisions Australian Capital Territory Divisions
## 1                  0.007598327                            0.005797565

Participant Responses

Descriptive statistics for Response_Clear_Yes:

summarize(ausmarraige2, Mean = mean(Response_Clear_Yes), Median = median(Response_Clear_Yes), Min = min(Response_Clear_Yes), Max = max(Response_Clear_Yes), SD = sd(Response_Clear_Yes), Var = var(Response_Clear_Yes), IQR = IQR(Response_Clear_Yes))

## # A tibble: 1 x 7
##     Mean Median   Min   Max     SD        Var   IQR
##    <dbl>  <dbl> <dbl> <dbl>  <dbl>      <dbl> <dbl>
## 1 52115. 51782. 19026 89590 12315. 151661815. 15259

Descriptive statistics for Response_Clear_No:

summarize(ausmarraige2, Mean = mean(Response_Clear_No), Median = median(Response_Clear_No), Min = min(Response_Clear_No), Max = max(Response_Clear_No), SD = sd(Response_Clear_No), Var = var(Response_Clear_No), IQR = IQR(Response_Clear_No))

## # A tibble: 1 x 7
##     Mean Median   Min   Max    SD       Var   IQR
##    <dbl>  <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1 32493. 31654. 14860 57926 8263. 68273681. 8274.

Descriptive statistics for Response_Not_Clear:

summarize(ausmarraige2, Mean = mean(Response_Not_Clear), Median = median(Response_Not_Clear), Min = min(Response_Not_Clear), Max = max(Response_Not_Clear), SD = sd(Response_Not_Clear), Var = var(Response_Not_Clear), IQR = IQR(Response_Not_Clear))

## # A tibble: 1 x 7
##    Mean Median   Min   Max    SD   Var   IQR
##   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  245.    240   106   377  55.9 3124.  68.8

Descriptive statistics for Non_Response:

summarize(ausmarraige2, Mean = mean(Non_Response), Median = median(Non_Response), Min = min(Non_Response), Max = max(Non_Response), SD = sd(Non_Response), Var = var(Non_Response), IQR = IQR(Non_Response))

## # A tibble: 1 x 7
##     Mean Median   Min   Max    SD       Var   IQR
##    <dbl>  <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1 21855. 21416. 13092 35841 4197. 17618721. 5562.

Experimenting with a Visualization

Bar graph modeling the total number of each response:

responsetotals <- ausmarraige2 %>%
  select(Response_Clear_Yes, Response_Clear_No, Response_Not_Clear, Non_Response)%>%
  summarise(Yes=sum(Response_Clear_Yes), No=sum(Response_Clear_No), NotClear=sum(Response_Not_Clear), NonResponse=sum(Non_Response))
rownames(responsetotals) <- c("Total")

## Warning: Setting row names on a tibble is deprecated.

responsetotals <- pivot_longer(responsetotals, Yes:NonResponse, names_to = "Response", values_to = "Total")

ggplot(data = responsetotals) +
  geom_bar(mapping = aes(x = Response, y = Total), stat = "identity") +
  labs(title = "Should Australian Law Change to Allow Same-Sex Marraige?", subtitle = "2017 postal survey of Australians", x = "Type of Response", y = "Total # of Responses")

HW4: Univariate Statistics

Megan Georges

11/23/2021