library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(skimr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(knitr)
#Discrimination
library(readxl)
Discrimination <- read_excel("Discrimination.xlsx")
Discrimination %>%
group_by(Gender) %>%
skim(Expenditures)
| Name | Piped data |
| Number of rows | 1000 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Gender |
Variable type: numeric
| skim_variable | Gender | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Expenditures | Female | 0 | 1 | 18129.61 | 20019.71 | 222 | 2872.5 | 6400 | 39487.5 | 75098 | ▇▁▂▂▁ |
| Expenditures | Male | 0 | 1 | 18001.20 | 19068.01 | 386 | 2954.0 | 7219 | 37201.0 | 68890 | ▇▁▂▂▁ |
Discrimination %>%
select(Gender, Ethnicity, Expenditures)%>%
filter(Ethnicity == "White not Hispanic"|Ethnicity == "Hispanic")%>%
group_by(Ethnicity)%>%
skim(Expenditures)
| Name | Piped data |
| Number of rows | 777 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Ethnicity |
Variable type: numeric
| skim_variable | Ethnicity | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Expenditures | Hispanic | 0 | 1 | 11065.57 | 15629.85 | 222 | 2331.25 | 3952 | 10292.5 | 65581 | ▇▁▁▁▁ |
| Expenditures | White not Hispanic | 0 | 1 | 24697.55 | 20604.38 | 340 | 3977.00 | 15718 | 43134.0 | 68890 | ▇▁▃▃▁ |
Discrimination %>%
select(Age, Expenditures)%>%
filter(Age>21,Age<51)%>%
group_by(Age)%>%
skim(Expenditures)
| Name | Piped data |
| Number of rows | 226 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Age |
Variable type: numeric
| skim_variable | Age | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Expenditures | 22 | 0 | 1 | 39088.28 | 7428.45 | 27058 | 33062.50 | 40444.0 | 43966.00 | 50795 | ▆▂▇▆▆ |
| Expenditures | 23 | 0 | 1 | 39657.91 | 7262.15 | 25348 | 36265.25 | 40276.5 | 43997.50 | 50542 | ▃▂▇▆▆ |
| Expenditures | 24 | 0 | 1 | 41715.00 | 6256.03 | 30803 | 37191.50 | 41586.0 | 46859.00 | 51392 | ▃▇▅▅▇ |
| Expenditures | 25 | 0 | 1 | 41253.63 | 5058.41 | 31943 | 38352.00 | 40999.0 | 45676.50 | 49142 | ▃▅▇▁▇ |
| Expenditures | 26 | 0 | 1 | 38700.58 | 4566.97 | 30504 | 36058.75 | 38595.5 | 41635.25 | 45507 | ▂▇▅▇▇ |
| Expenditures | 27 | 0 | 1 | 40239.52 | 6389.62 | 28581 | 35888.50 | 38331.0 | 45657.00 | 50353 | ▃▇▇▃▇ |
| Expenditures | 28 | 0 | 1 | 36582.56 | 7214.79 | 26178 | 31027.00 | 37247.0 | 41464.00 | 48968 | ▇▇▇▇▃ |
| Expenditures | 29 | 0 | 1 | 40469.70 | 3291.66 | 34708 | 38830.50 | 40884.0 | 41847.75 | 46356 | ▃▂▇▃▂ |
| Expenditures | 30 | 0 | 1 | 41699.59 | 4835.71 | 33721 | 40125.00 | 40927.0 | 44910.00 | 51486 | ▇▇▇▆▃ |
| Expenditures | 31 | 0 | 1 | 39863.29 | 5592.99 | 33051 | 36514.00 | 39172.0 | 43007.00 | 47778 | ▅▁▇▁▅ |
| Expenditures | 32 | 0 | 1 | 37234.38 | 5118.98 | 28205 | 33630.00 | 37637.0 | 40587.00 | 45017 | ▃▃▇▃▆ |
| Expenditures | 33 | 0 | 1 | 39533.58 | 5205.37 | 27090 | 38117.50 | 40649.0 | 42313.25 | 47204 | ▂▂▅▇▃ |
| Expenditures | 34 | 0 | 1 | 42266.67 | 2876.00 | 39521 | 40344.25 | 41034.0 | 44437.25 | 46325 | ▇▂▁▁▅ |
| Expenditures | 35 | 0 | 1 | 34390.40 | 5517.50 | 27417 | 32529.00 | 33070.0 | 36607.00 | 42329 | ▃▇▁▃▃ |
| Expenditures | 36 | 0 | 1 | 35263.33 | 5300.06 | 29144 | 33695.00 | 38246.0 | 38323.00 | 38400 | ▃▁▁▁▇ |
| Expenditures | 37 | 0 | 1 | 38900.80 | 4953.63 | 30627 | 37948.00 | 41550.0 | 41924.00 | 42455 | ▂▁▁▂▇ |
| Expenditures | 38 | 0 | 1 | 39091.50 | 10567.09 | 29709 | 33743.25 | 36233.0 | 41581.25 | 54191 | ▃▇▁▁▃ |
| Expenditures | 39 | 0 | 1 | 38849.60 | 10259.48 | 31076 | 33259.00 | 36576.0 | 36621.00 | 56716 | ▇▇▁▁▃ |
| Expenditures | 40 | 0 | 1 | 44693.00 | 2924.17 | 42840 | 43007.50 | 43175.0 | 45619.50 | 48064 | ▇▁▁▁▃ |
| Expenditures | 41 | 0 | 1 | 48919.75 | 4990.17 | 42139 | 47452.00 | 49712.5 | 51180.25 | 54115 | ▇▁▇▇▇ |
| Expenditures | 42 | 0 | 1 | 44500.00 | 9311.18 | 37916 | 41208.00 | 44500.0 | 47792.00 | 51084 | ▇▁▁▁▇ |
| Expenditures | 44 | 0 | 1 | 46144.00 | NA | 46144 | 46144.00 | 46144.0 | 46144.00 | 46144 | ▁▁▇▁▁ |
| Expenditures | 45 | 0 | 1 | 48218.00 | 2950.05 | 46132 | 47175.00 | 48218.0 | 49261.00 | 50304 | ▇▁▁▁▇ |
| Expenditures | 46 | 0 | 1 | 52222.00 | NA | 52222 | 52222.00 | 52222.0 | 52222.00 | 52222 | ▁▁▇▁▁ |
| Expenditures | 48 | 0 | 1 | 49424.00 | NA | 49424 | 49424.00 | 49424.0 | 49424.00 | 49424 | ▁▁▇▁▁ |
allmwnh <- Discrimination %>%
select(Gender, Ethnicity, Expenditures)%>%
filter(Gender == "Male", Ethnicity == "White not Hispanic")%>%
skim(Expenditures)
print(allmwnh)
## -- Data Summary ------------------------
## Values
## Name Piped data
## Number of rows 196
## Number of columns 3
## _______________________
## Column type frequency:
## numeric 1
## ________________________
## Group variables None
##
## -- Variable type: numeric ------------------------------------------------------
## # A tibble: 1 x 11
## skim_variable n_missing complete_rate mean sd p0 p25 p50 p75
## * <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Expenditures 0 1 24574. 19828. 650 4195. 27390. 40818.
## p100 hist
## * <dbl> <chr>
## 1 68890 <U+2587><U+2581><U+2585><U+2583><U+2581>
Discrimination %>%
select(Age, Ethnicity, Expenditures)%>%
filter(Age>21, Age<51, Ethnicity == "Asian")%>%
group_by(Age)%>%
skim(Expenditures)
| Name | Piped data |
| Number of rows | 29 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Age |
Variable type: numeric
| skim_variable | Age | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Expenditures | 22 | 0 | 1 | 40240.00 | NA | 40240 | 40240.00 | 40240.0 | 40240.00 | 40240 | ▁▁▇▁▁ |
| Expenditures | 23 | 0 | 1 | 37698.50 | 11102.48 | 25348 | 30595.00 | 37452.0 | 44555.50 | 50542 | ▇▇▁▇▇ |
| Expenditures | 24 | 0 | 1 | 40359.25 | 9199.66 | 30803 | 33406.25 | 40592.5 | 47545.50 | 49449 | ▇▁▁▁▇ |
| Expenditures | 25 | 0 | 1 | 41491.50 | 696.50 | 40999 | 41245.25 | 41491.5 | 41737.75 | 41984 | ▇▁▁▁▇ |
| Expenditures | 26 | 0 | 1 | 36281.50 | 5057.14 | 30504 | 32851.50 | 36739.0 | 40169.00 | 41144 | ▃▃▁▁▇ |
| Expenditures | 27 | 0 | 1 | 36617.33 | 5418.93 | 28581 | 33842.50 | 37245.0 | 39035.00 | 44293 | ▃▃▇▃▃ |
| Expenditures | 28 | 0 | 1 | 41123.00 | 11094.51 | 33278 | 37200.50 | 41123.0 | 45045.50 | 48968 | ▇▁▁▁▇ |
| Expenditures | 29 | 0 | 1 | 40813.50 | 282.14 | 40614 | 40713.75 | 40813.5 | 40913.25 | 41013 | ▇▁▁▁▇ |
| Expenditures | 30 | 0 | 1 | 33721.00 | NA | 33721 | 33721.00 | 33721.0 | 33721.00 | 33721 | ▁▁▇▁▁ |
| Expenditures | 31 | 0 | 1 | 47778.00 | NA | 47778 | 47778.00 | 47778.0 | 47778.00 | 47778 | ▁▁▇▁▁ |
| Expenditures | 40 | 0 | 1 | 48064.00 | NA | 48064 | 48064.00 | 48064.0 | 48064.00 | 48064 | ▁▁▇▁▁ |
| Expenditures | 41 | 0 | 1 | 54115.00 | NA | 54115 | 54115.00 | 54115.0 | 54115.00 | 54115 | ▁▁▇▁▁ |
Discrimination %>%
group_by(Gender) %>%
summarise(Median_Expenditures = median(Expenditures))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## Gender Median_Expenditures
## <chr> <dbl>
## 1 Female 6400
## 2 Male 7219
Discrimination %>%
filter(Ethnicity == "Hispanic"| Ethnicity =="White not Hispanic")%>%
group_by(Ethnicity)%>%
summarise(Median_Expenditures = median(Expenditures))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## Ethnicity Median_Expenditures
## <chr> <dbl>
## 1 Hispanic 3952
## 2 White not Hispanic 15718
Discrimination%>%
select(Age, Expenditures)%>%
filter(Age>12,Age<18)%>%
group_by(Age)%>%
summarise(Median_Expenditures = median(Expenditures))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
## Age Median_Expenditures
## <dbl> <dbl>
## 1 13 4003
## 2 14 4058
## 3 15 3715
## 4 16 3818.
## 5 17 4218.
Discrimination%>%
filter(Gender == "Male", Ethnicity == "White not Hispanic")%>%
summarise(Median_Expenditures = median(Expenditures))
## # A tibble: 1 x 1
## Median_Expenditures
## <dbl>
## 1 27390.
Discrimination%>%
filter(Age>13,Age<18, Ethnicity == "Asian")%>%
summarise(Median_Expenditures = median(Expenditures))
## # A tibble: 1 x 1
## Median_Expenditures
## <dbl>
## 1 3460.
skim_variable Gender n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist *
IQR_FEM <- 39488 - 2872
IQR_MALE <- 37201 -2954
IQR_FEM
## [1] 36616
IQR_MALE
## [1] 34247
– Variable type: numeric ——————————————————————————————————– # A tibble: 2 x 12 skim_variable Ethnicity n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist *
IQR_HISP <- 10292 - 2331
IQR_WHITENH <- 43134 -3977
IQR_HISP
## [1] 7961
IQR_WHITENH
## [1] 39157
Discrimination%>%
select(Age, Expenditures)%>%
filter(Age>12,Age<18)%>%
group_by(Age)%>%
skim(Expenditures)
| Name | Piped data |
| Number of rows | 212 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Age |
Variable type: numeric
| skim_variable | Age | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Expenditures | 13 | 0 | 1 | 3918.67 | 980.86 | 1195 | 3406.00 | 4003.0 | 4518.50 | 5872 | ▁▂▆▇▃ |
| Expenditures | 14 | 0 | 1 | 3974.82 | 1196.83 | 386 | 3223.00 | 4058.0 | 4808.00 | 5920 | ▁▂▇▇▆ |
| Expenditures | 15 | 0 | 1 | 3827.16 | 817.12 | 2029 | 3205.25 | 3715.0 | 4531.50 | 5304 | ▂▇▇▇▅ |
| Expenditures | 16 | 0 | 1 | 3762.86 | 1038.39 | 1432 | 3158.75 | 3817.5 | 4466.25 | 5718 | ▃▃▇▆▅ |
| Expenditures | 17 | 0 | 1 | 4134.20 | 989.45 | 2015 | 3640.00 | 4218.5 | 4886.25 | 6798 | ▃▇▇▆▁ |
IQR13 <- 4518 - 3406
IQR14 <- 4808 - 3223
IQR15 <- 4532 - 3205
IQR16 <- 4466 - 3159
IQR17 <- 4886 - 3640
IQR13
## [1] 1112
IQR14
## [1] 1585
IQR15
## [1] 1327
IQR16
## [1] 1307
IQR17
## [1] 1246
– Variable type: numeric ——————————————————————————————————————- # A tibble: 1 x 11 skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist *
IQR <- 40818-4195
IQR
## [1] 36623
Discrimination%>%
select(Age, Ethnicity, Expenditures)%>%
filter(Age>17, Age<22, Ethnicity == "Asian")%>%
group_by(Age)%>%
skim(Expenditures)
| Name | Piped data |
| Number of rows | 41 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Age |
Variable type: numeric
| skim_variable | Age | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Expenditures | 18 | 0 | 1 | 9368.07 | 3799.71 | 3153 | 7374.75 | 9523.0 | 10977.50 | 18435 | ▃▇▇▂▂ |
| Expenditures | 19 | 0 | 1 | 9374.00 | 1485.70 | 7475 | 8657.50 | 8767.0 | 10117.00 | 12243 | ▂▇▂▂▂ |
| Expenditures | 20 | 0 | 1 | 10554.83 | 2538.50 | 6255 | 8848.75 | 11389.5 | 12296.75 | 13345 | ▅▁▂▅▇ |
| Expenditures | 21 | 0 | 1 | 8674.57 | 2526.45 | 3988 | 7627.00 | 9504.0 | 10154.00 | 11668 | ▃▁▇▇▇ |
IQR18 <- 10978 - 7375
IQR18
## [1] 3603
IQR19 <- 10117 - 8658
IQR19
## [1] 1459
IQR20 <- 12297 - 8849
IQR20
## [1] 3448
IQR21 <- 10154 - 7627
IQR21
## [1] 2527
Discrimination %>%
group_by(Gender)%>%
summarise(Expenditures_by_gender = mean(Expenditures))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## Gender Expenditures_by_gender
## <chr> <dbl>
## 1 Female 18130.
## 2 Male 18001.
Discrimination %>%
group_by(Ethnicity)%>%
summarise(Expenditures_by_ethn = mean(Expenditures))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 8 x 2
## Ethnicity Expenditures_by_ethn
## <chr> <dbl>
## 1 American Indian 36438.
## 2 Asian 18392.
## 3 Black 20885.
## 4 Hispanic 11066.
## 5 Multi Race 4457.
## 6 Native Hawaiian 42782.
## 7 Other 3316.
## 8 White not Hispanic 24698.
White_not_Hispanic <- 24697.549
Hispanic <- 11065.569
x <- White_not_Hispanic - Hispanic
x
## [1] 13631.98
Discrimination %>%
select(Gender, Ethnicity, Expenditures)%>%
filter(Ethnicity == "White not Hispanic"|Ethnicity == "Hispanic")%>%
group_by(Ethnicity)%>%
skim(Expenditures)
| Name | Piped data |
| Number of rows | 777 |
| Number of columns | 3 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | Ethnicity |
Variable type: numeric
| skim_variable | Ethnicity | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Expenditures | Hispanic | 0 | 1 | 11065.57 | 15629.85 | 222 | 2331.25 | 3952 | 10292.5 | 65581 | ▇▁▁▁▁ |
| Expenditures | White not Hispanic | 0 | 1 | 24697.55 | 20604.38 | 340 | 3977.00 | 15718 | 43134.0 | 68890 | ▇▁▃▃▁ |
WNH <- Discrimination %>%
filter(Ethnicity %in% c("White not Hispanic", "Hispanic"))
ggplot(WNH) +
aes(x = Expenditures, y = Age.Cohort, fill = Ethnicity) +
geom_boxplot(adjust = 1L) +
scale_fill_hue() +
theme_minimal()
## Warning: Ignoring unknown parameters: adjust
# CLEAN UP #################################################
rm(list = ls())
p_unload(all) # Remove all add-ons
cat(“\014”) # ctrl+L