Josh looks at the qualities of the people in our survey (years teaching, level of preparedness) and see how it affects the type of data they use in their classroom (Q106 or other quantitative questions)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.1 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(targets)
targets::tar_load(data_combined_filtered)
d <- data_combined_filtered
d %>%
select(years_worked) %>%
mutate(years_worked = as.numeric(years_worked)) %>%
count(years_worked) %>% # there are some outliers here
knitr::kable()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
years_worked | n |
---|---|
2.00 | 4 |
3.00 | 3 |
4.00 | 5 |
5.00 | 11 |
6.00 | 6 |
7.00 | 10 |
8.00 | 7 |
9.00 | 10 |
10.00 | 10 |
11.00 | 8 |
12.00 | 12 |
13.00 | 8 |
14.00 | 11 |
15.00 | 15 |
16.00 | 14 |
17.00 | 12 |
18.00 | 16 |
19.00 | 8 |
20.00 | 22 |
21.00 | 16 |
21.75 | 1 |
22.00 | 22 |
23.00 | 13 |
24.00 | 9 |
25.00 | 13 |
26.00 | 3 |
26.50 | 1 |
27.00 | 8 |
28.00 | 6 |
29.00 | 7 |
30.00 | 6 |
31.00 | 4 |
32.00 | 2 |
33.00 | 1 |
34.00 | 4 |
35.00 | 2 |
36.00 | 6 |
37.00 | 1 |
38.00 | 3 |
39.00 | 1 |
40.00 | 1 |
46.00 | 1 |
120.00 | 1 |
500.00 | 1 |
NA | 13 |
# paper
d %>%
select(response_id, years_worked, q106, ) %>%
mutate(years_worked = as.numeric(years_worked)) %>%
mutate(first_hand_paper = str_detect(q106, "first-hand data using pen")) %>%
mutate(first_hand_digital = str_detect(q106, "first-hand data using digital")) %>%
select(years_worked, first_hand_paper, everything()) %>%
group_by(first_hand_paper) %>%
summarize(mean_years_worked = mean(years_worked, na.rm = T))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## # A tibble: 3 x 2
## first_hand_paper mean_years_worked
## <lgl> <dbl>
## 1 FALSE 18.3
## 2 TRUE 20.6
## 3 NA 20
# separate(q106, into = str_c("var_", 1:4), sep = ",")
# digital
d %>%
select(response_id, years_worked, q106, ) %>%
mutate(years_worked = as.numeric(years_worked)) %>%
mutate(first_hand_paper = str_detect(q106, "first-hand data using pen")) %>%
mutate(first_hand_digital = str_detect(q106, "first-hand data using digital")) %>%
select(years_worked, first_hand_paper, everything()) %>%
group_by(first_hand_digital) %>%
summarize(mean_years_worked = mean(years_worked, na.rm = T))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## # A tibble: 3 x 2
## first_hand_digital mean_years_worked
## <lgl> <dbl>
## 1 FALSE 17.9
## 2 TRUE 20.8
## 3 NA 20
d %>%
select(response_id, q98) %>%
separate(q98, into = str_c("x", 1:10), sep = ",") %>%
gather(key, val, -response_id) %>%
arrange(response_id)
## Warning: Expected 10 pieces. Missing pieces filled with `NA` in 330 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## # A tibble: 3,380 x 3
## response_id key val
## <chr> <chr> <chr>
## 1 R_0ctoJq4gP39E5zP x1 High school course in science
## 2 R_0ctoJq4gP39E5zP x2 Undergraduate course in science
## 3 R_0ctoJq4gP39E5zP x3 <NA>
## 4 R_0ctoJq4gP39E5zP x4 <NA>
## 5 R_0ctoJq4gP39E5zP x5 <NA>
## 6 R_0ctoJq4gP39E5zP x6 <NA>
## 7 R_0ctoJq4gP39E5zP x7 <NA>
## 8 R_0ctoJq4gP39E5zP x8 <NA>
## 9 R_0ctoJq4gP39E5zP x9 <NA>
## 10 R_0ctoJq4gP39E5zP x10 <NA>
## # … with 3,370 more rows