require(pacman)
## Loading required package: pacman
pacman::p_load(
devtools, readxl, tidyverse, readxl, DataExplorer, skimr, ggthemes, gghighlight,
gganimate, gifski, png, h2o, scales, forecast, zoo, xts, MARSS, tibbletime,
vars, bvar, wrapr, survey, GGally
)
path <- "Data Analyst Case Study.xlsx"
dat_list <- lapply(excel_sheets(path), read_excel, path = path)
## New names:
## * `` -> `..2`
## Warning: `as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.
## Skim summary statistics
## n obs: 246
## n variables: 9
##
## ── Variable type:factor ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## Age 3 243 246 5
## Gender 4 242 246 2
## Q1 59 187 246 4
## Q2 59 187 246 4
## Q3 59 187 246 4
## Q4 59 187 246 5
## Q5 59 187 246 4
## Response ID 0 246 246 246
## Status 0 246 246 2
## top_counts ordered
## 30-: 127, 18-: 93, 50-: 19, 65+: 3 FALSE
## Mal: 157, Fem: 85, NA: 4 FALSE
## Abs: 80, Ver: 62, NA: 59, Of : 41 FALSE
## Ver: 84, Of : 70, NA: 59, Abs: 31 FALSE
## Str: 132, NA: 59, Agr: 40, Nei: 8 FALSE
## Mod: 66, NA: 59, Ver: 56, Ext: 42 FALSE
## Fre: 75, Som: 71, NA: 59, All: 30 FALSE
## 1: 1, 2: 1, 3: 1, 4: 1 FALSE
## Com: 187, Dis: 59, NA: 0 FALSE
## Observations: 246
## Variables: 9
## $ `Response ID` <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ Status <fct> Disqualified, Complete, Complete, Complete, Disqua…
## $ Age <fct> 18-29, 30-49, 30-49, 18-29, 30-49, 18-29, 30-49, 3…
## $ Gender <fct> Female, Male, Male, Female, Male, Male, Male, Fema…
## $ Q1 <fct> NA, Of little importance, Absolutely essential, Of…
## $ Q2 <fct> NA, Of average importance, Of average importance, …
## $ Q3 <fct> NA, Strongly agree, Strongly agree, Neither agree …
## $ Q4 <fct> NA, Very much, Very much, Moderately, NA, NA, NA, …
## $ Q5 <fct> NA, Sometimes, Sometimes, Not very often, NA, NA, …
## [1] "Absolutely essential" "Of average importance" "Of little importance"
## [4] "Very important"
## [1] 3 1 3 1 4 3 3 3 3 3 3 3 4 4 1 4 4 1 4 1 1 1 1 1 1 4 3 3 4 1 1 3 4 1 4
## [36] 1 4 1 4 1 4 1 4 3 1 4 3 3 4 4 3 3 3 1 1 4 1 1 4 4 4 3 3 3 3 4 4 4 1 1
## [71] 1 4 1 1 4 4 2 1 4 1 4 4 4 4 1 4 3 3 3 3 3 3 3 1 1 1 1 1 4 1 4 2 4 1 1
## [106] 1 1 1 1 4 4 1 4 1 1 4 1 1 1 1 4 4 3 3 3 3 4 4 2 4 1 4 4 4 2 1 1 1 4 1
## [141] 1 4 4 1 4 1 4 4 3 3 3 3 4 4 1 1 1 1 1 1 3 3 1 3 1 4 1 1 1 1 1 1 1 1 1
## [176] 1 1 1 4 4 1 4 1 4 4 3 1
## [1] "Agree somewhat" "Disagree somewhat"
## [3] "Neither agree nor disagree" "Strongly agree"
## [1] "A little bit" "Extremely" "Moderately" "Not at all"
## [5] "Very much"
## [1] "All the time" "Frequently" "Not very often" "Sometimes"
## # A tibble: 187 x 9
## # Groups: Gender, Age [8]
## `Response ID` Status Age Gender Q1 Q2 Q3 Q4 Q5
## <fct> <fct> <fct> <fct> <ord> <ord> <ord> <ord> <ord>
## 1 2 Comple… 30-49 Male Of lit… Of aver… Strong… Very… Somet…
## 2 3 Comple… 30-49 Male Absolu… Of aver… Strong… Very… Somet…
## 3 4 Comple… 18-29 Female Of lit… Of aver… Neithe… Mode… Not v…
## 4 8 Comple… 30-49 Female Absolu… Very im… Strong… Very… Somet…
## 5 10 Comple… 30-49 Male Very i… Of aver… Disagr… Mode… Somet…
## 6 11 Comple… 18-29 Male Of lit… Of aver… Strong… Extr… Frequ…
## 7 12 Comple… 50-64 Male Of lit… Of aver… Strong… Extr… All t…
## 8 13 Comple… 30-49 Male Of lit… Of aver… Strong… Very… Somet…
## 9 14 Comple… 30-49 Male Of lit… Very im… Agree … A li… Somet…
## 10 15 Comple… 18-29 Male Of lit… Of aver… Agree … Mode… Somet…
## # … with 177 more rows
## # A tibble: 106 x 9
## # Groups: Gender, Age [7]
## `Response ID` Status Age Gender Q1 Q2 Q3 Q4 Q5
## <fct> <fct> <fct> <fct> <ord> <ord> <ord> <ord> <ord>
## 1 1 Comple… 18-29 Male Absolut… Very i… Stron… Very … Frequ…
## 2 4 Comple… 30-49 Female Absolut… Absolu… Stron… Very … Frequ…
## 3 6 Comple… 30-49 Male Absolut… Very i… Stron… Extre… Frequ…
## 4 7 Comple… 18-29 Female Very im… Very i… Stron… A lit… Somet…
## 5 8 Comple… 30-49 Male Absolut… Absolu… Agree… Extre… All t…
## 6 11 Comple… 50-64 Male Absolut… Absolu… Stron… Very … Frequ…
## 7 12 Comple… 30-49 Female Absolut… Absolu… Stron… Very … Frequ…
## 8 13 Comple… 30-49 Female Absolut… Absolu… Stron… Very … Frequ…
## 9 14 Comple… 30-49 Female Very im… Very i… Stron… Moder… Somet…
## 10 15 Comple… 30-49 Male Absolut… Very i… Stron… Moder… All t…
## # … with 96 more rows
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## # A tibble: 293 x 10
## # Groups: Gender, Age [8]
## `Response ID` Status Age Gender Q1 Q2 Q3 Q4 Q5 group
## <fct> <fct> <fct> <fct> <ord> <ord> <ord> <ord> <ord> <fct>
## 1 2 Comple… 30-49 Male Of li… Of av… Stro… Very… Some… cont…
## 2 3 Comple… 30-49 Male Absol… Of av… Stro… Very… Some… cont…
## 3 4 Comple… 18-29 Female Of li… Of av… Neit… Mode… Not … cont…
## 4 8 Comple… 30-49 Female Absol… Very … Stro… Very… Some… cont…
## 5 10 Comple… 30-49 Male Very … Of av… Disa… Mode… Some… cont…
## 6 11 Comple… 18-29 Male Of li… Of av… Stro… Extr… Freq… cont…
## 7 12 Comple… 50-64 Male Of li… Of av… Stro… Extr… All … cont…
## 8 13 Comple… 30-49 Male Of li… Of av… Stro… Very… Some… cont…
## 9 14 Comple… 30-49 Male Of li… Very … Agre… A li… Some… cont…
## 10 15 Comple… 18-29 Male Of li… Of av… Agre… Mode… Some… cont…
## # … with 283 more rows
## Warning in `==.default`(Q1, c("Very important", "Absolutely essential")):
## longer object length is not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
## # A tibble: 2 x 2
## group prop_4_5
## <fct> <dbl>
## 1 control 0.444
## 2 test 0.444
## Warning in `==.default`(Q1, c("Very important", "Absolutely essential")):
## longer object length is not a multiple of shorter object length
## Warning in `==.default`(Q1, c("Very important", "Absolutely essential")):
## longer object length is not a multiple of shorter object length
## # A tibble: 2 x 2
## group q1_prop
## <fct> <dbl>
## 1 control 0.263
## 2 test 0.157
## # A tibble: 2 x 2
## # Groups: group [2]
## group n
## <fct> <int>
## 1 control 187
## 2 test 106
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(77, 46) out of c(187, 106)
## X-squared = 0.13686, df = 1, p-value = 0.7114
## alternative hypothesis: two.sided
## 90 percent confidence interval:
## -0.12106153 0.07666641
## sample estimates:
## prop 1 prop 2
## 0.4117647 0.4339623
## Warning in `==.default`(Q2, c("Very important", "Absolutely essential")):
## longer object length is not a multiple of shorter object length
## Warning in `==.default`(Q2, c("Very important", "Absolutely essential")):
## longer object length is not a multiple of shorter object length
## # A tibble: 2 x 2
## group q1_prop
## <fct> <dbl>
## 1 control 0.201
## 2 test 0.147
## # A tibble: 2 x 2
## # Groups: group [2]
## group n
## <fct> <int>
## 1 control 187
## 2 test 106
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(59, 43) out of c(187, 106)
## X-squared = 2.4229, df = 1, p-value = 0.1196
## alternative hypothesis: two.sided
## 90 percent confidence interval:
## -0.18647689 0.00617218
## sample estimates:
## prop 1 prop 2
## 0.3155080 0.4056604
## Warning in `==.default`(Q3, c("Agree somewhat", "Strongly agree")): longer
## object length is not a multiple of shorter object length
## Warning in `==.default`(Q3, c("Agree somewhat", "Strongly agree")): longer
## object length is not a multiple of shorter object length
## # A tibble: 2 x 2
## group q1_prop
## <fct> <dbl>
## 1 control 0.287
## 2 test 0.174
## # A tibble: 2 x 2
## # Groups: group [2]
## group n
## <fct> <int>
## 1 control 187
## 2 test 106
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(84, 51) out of c(187, 106)
## X-squared = 0.27767, df = 1, p-value = 0.5982
## alternative hypothesis: two.sided
## 90 percent confidence interval:
## -0.13169196 0.06782354
## sample estimates:
## prop 1 prop 2
## 0.4491979 0.4811321
## Joining, by = c("group", "Age", "Gender")
## Warning in prop.test(x = c(3, 5), n = c(8, 9), conf.level = 0.9): Chi-
## squared approximation may be incorrect
##
## 2-sample test for equality of proportions with continuity
## correction
##
## data: c(3, 5) out of c(8, 9)
## X-squared = 0.066406, df = 1, p-value = 0.7966
## alternative hypothesis: two.sided
## 90 percent confidence interval:
## -0.6903893 0.3292782
## sample estimates:
## prop 1 prop 2
## 0.3750000 0.5555556
## Joining, by = c("group", "Age", "Gender")
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(13, 14) out of c(28, 49)
## X-squared = 2.4954, df = 1, p-value = 0.1142
## alternative hypothesis: two.sided
## 90 percent confidence interval:
## -0.009316284 0.366459141
## sample estimates:
## prop 1 prop 2
## 0.4642857 0.2857143
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(8, 12) out of c(37, 28)
## X-squared = 3.3741, df = 1, p-value = 0.03311
## alternative hypothesis: less
## 90 percent confidence interval:
## -1.0000000 -0.0644121
## sample estimates:
## prop 1 prop 2
## 0.2162162 0.4285714