Loading, setting up
library(tidyverse)
library(googlesheets4)
# d <- read_csv("data/fall-2020-analyzing-data-in-science-survey.csv")
meta <- as_sheets_id("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146")
meta
## Spreadsheet name: fall-2020-analyzing-data-in-science-survey
## ID: 1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g
## Locale: en_US
## Time zone: America/Los_Angeles
## # of sheets: 16
##
## (Sheet name): (Nominal extent in rows x columns)
## fall-2020-analyzing-data-in-science-survey: 333 x 81
## features-desired: 1000 x 27
## Q-80: Open coding : 1000 x 27
## Q 80 open coding new sheet: 1000 x 26
## Q 80 Interrater - Omiya: 1000 x 23
## Q 80 Interrater - Josh: 1000 x 23
## Q 80 AGREEMENT: 1000 x 23
## Q 80 - MASTER: 1000 x 22
## Q-82 open coding: 1000 x 25
## Q-80: code description: 1000 x 27
## Q 82 open coding new sheet: 1000 x 27
## Q 82 Interrater - Omiya: 1000 x 23
## Q 82 Interrater - Josh: 1000 x 23
## Q 82 Agreement: 1000 x 23
## Q 82 Master: 1000 x 23
## Q-82: code description: 1000 x 26
d <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146")
qual_q80 <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146", sheet = 8) %>% janitor::clean_names()
qual_q82 <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146", sheet = 15) %>% janitor::clean_names()
Prepping and exploring grade bands
d %>%
mutate(n_grades = str_count(grade_level_taught, ",")) %>%
count(n_grades)
## # A tibble: 12 × 2
## n_grades n
## <int> <int>
## 1 0 54
## 2 1 39
## 3 2 76
## 4 3 124
## 5 4 6
## 6 5 11
## 7 6 10
## 8 7 2
## 9 8 1
## 10 10 1
## 11 11 1
## 12 12 5
d_grades <- d %>%
select(response_id, grade_level_taught) %>%
mutate(grade_level_taught = str_replace(grade_level_taught, ":", "")) %>%
separate(col = grade_level_taught, into = c(str_c("grade", letters[1:13])), sep = ",") %>%
gather(key, val, -response_id) %>%
arrange(response_id) %>%
mutate(val = as.integer(val)) %>%
mutate(elem = if_else(val <= 5, 1, 0)) %>%
mutate(second = if_else(val >= 6, 1, 0)) %>%
mutate(middle = if_else(val >= 6 & val <= 8, 1, 0)) %>%
mutate(high = if_else(val >= 9, 1, 0)) %>%
select(response_id, elem, second, middle, high) %>%
group_by(response_id) %>%
summarize(elem = if_else(any(elem == 1), 1, 0),
second = if_else(any(second == 1), 1, 0),
middle = if_else(any(middle == 1), 1, 0),
high = if_else(any(high == 1), 1, 0)) %>%
mutate_if(is.double, replace_na, 0)
d_grades %>%
count(elem, middle, high) %>%
knitr::kable()
0 |
0 |
0 |
8 |
0 |
0 |
1 |
194 |
0 |
1 |
0 |
68 |
0 |
1 |
1 |
28 |
1 |
0 |
0 |
14 |
1 |
1 |
0 |
8 |
1 |
1 |
1 |
10 |
# 194 only teach high
# 68 only teach middle
# 14 only teach elem
# 28 teach middle and high
# 8 teach elem and middle
# 10 teach all
# 8 teach none (remove?)
d <- d %>%
left_join(d_grades)
reasons for not using a tool
overall
d$q97 %>% is.na() %>% table()
## .
## FALSE
## 330
table(d$q97 == "NA")
##
## FALSE TRUE
## 327 3
overall_q97 <- d %>%
select(response_id, elem, middle, high, q97) %>%
mutate(q97 = str_replace(q97, ", ", "; ")) %>%
mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
filter(val!=" ") %>%
filter(!is.na(val)) %>%
count(val) %>%
mutate(percent = n / 330) %>%
arrange(desc(n)) %>%
mutate(percent = round(percent, 3))
# p %>%
# ggplot(aes(x = reorder(val, percent), y = percent)) +
# geom_col() +
# coord_flip() +
# xlab(NULL)
cost - elem
elem <- d %>%
select(response_id, elem, middle, high, q97) %>%
mutate(q97 = str_replace(q97, ", ", "; ")) %>%
mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
filter(val!=" ") %>%
filter(!is.na(val)) %>%
filter(elem == 1) %>% # selecting elem
count(val) %>%
mutate(percent = n / 32) %>%
arrange(desc(n)) %>%
mutate(percent = round(percent, 3)) %>%
select(val, elem = percent)
cost - midd
midd <- d %>%
select(response_id, elem, middle, high, q97) %>%
mutate(q97 = str_replace(q97, ", ", "; ")) %>%
mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
filter(val!=" ") %>%
filter(!is.na(val)) %>%
filter(middle == 1) %>% # selecting elem
count(val) %>%
mutate(percent = n /114) %>%
arrange(desc(n)) %>%
mutate(percent = round(percent, 3)) %>%
select(val, midd = percent)
cost - high
high <- d %>%
select(response_id, elem, middle, high, q97) %>%
mutate(q97 = str_replace(q97, ", ", "; ")) %>%
mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
filter(val!=" ") %>%
filter(!is.na(val)) %>%
filter(high == 1) %>% # selecting elem
count(val) %>%
mutate(percent = n / 232) %>%
arrange(desc(n)) %>%
mutate(percent = round(percent, 3)) %>%
select(val, high = percent)
final table
to_plot <- overall_q97 %>%
left_join(elem) %>%
left_join(midd) %>%
left_join(high) %>%
select(Barrier = val, n, overall = percent, elem, midd, high) %>%
mutate_all(replace_na, 0)
to_plot %>%
filter(!str_detect(Barrier, "Other")) %>%
knitr::kable()
Cost |
282 |
0.855 |
0.906 |
0.868 |
0.858 |
Time to develop new lessons that I previously did using pencil-and-paper |
172 |
0.521 |
0.281 |
0.456 |
0.552 |
Difficulty of learning a new tool |
153 |
0.464 |
0.438 |
0.456 |
0.474 |
Student information security |
103 |
0.312 |
0.438 |
0.404 |
0.280 |
Availability of computers (tablets; laptops |
94 |
0.285 |
0.219 |
0.228 |
0.306 |
Student discomfort with computers |
37 |
0.112 |
0.000 |
0.096 |
0.129 |
My own discomfort with computers |
25 |
0.076 |
0.031 |
0.088 |
0.065 |
NA |
3 |
0.009 |
0.000 |
0.000 |
0.004 |
Tools used
overall
table(is.na(d$q78))
##
## FALSE
## 330
table(d$q78 == "NA")
##
## FALSE TRUE
## 329 1
overall_q78 <- d %>%
select(response_id, elem, middle, high, q78) %>%
mutate(q78 = str_replace(q78, ", ", "; ")) %>%
mutate(q78 = str_replace(q78, "etc\\.\\)", "")) %>%
separate(col = q78, into = c(str_c("grade", letters[1:12])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
filter(val!=" ") %>%
filter(!is.na(val)) %>%
count(val) %>%
mutate(percent = n / 330) %>%
arrange(desc(n)) %>%
mutate(percent = round(percent, 3))
final table
to_plot <- overall_q78 %>%
left_join(elem) %>%
left_join(midd) %>%
left_join(high) %>%
select(Technology = val, n, overall = percent, elem, midd, high) %>%
mutate_all(replace_na, 0)
to_plot %>%
filter(!str_detect(Technology, "Other")) %>%
knitr::kable()
Google Sheets |
273 |
0.827 |
0.812 |
0.781 |
0.858 |
Calculator (not for graphing) |
238 |
0.721 |
0.750 |
0.719 |
0.750 |
Data Nuggets |
195 |
0.591 |
0.469 |
0.509 |
0.672 |
Microsoft Excel |
141 |
0.427 |
0.344 |
0.307 |
0.500 |
Graphing Calculator |
95 |
0.288 |
0.188 |
0.175 |
0.384 |
Desmos |
52 |
0.158 |
0.250 |
0.219 |
0.155 |
DataClassroom |
41 |
0.124 |
0.062 |
0.105 |
0.151 |
Infogram (or similar online tool for creating an infographic) |
37 |
0.112 |
0.188 |
0.149 |
0.103 |
Common Online Data Analysis Platform (CODAP) |
14 |
0.042 |
0.031 |
0.044 |
0.047 |
TUVA |
11 |
0.033 |
0.031 |
0.061 |
0.026 |
R |
6 |
0.018 |
0.031 |
0.009 |
0.017 |
NA |
1 |
0.003 |
0.000 |
0.000 |
0.000 |
qual - q80
d %>% filter(q80 != "NA")
## # A tibble: 328 × 83
## start_date end_date status ip_address progress duration_in_sec… finished
## <list> <list> <chr> <chr> <dbl> <dbl> <lgl>
## 1 <chr [1]> <chr [1]> IP Address 104.225.1… 100 506 TRUE
## 2 <chr [1]> <chr [1]> IP Address 164.58.82… 100 779 TRUE
## 3 <chr [1]> <chr [1]> IP Address 206.15.24… 100 915 TRUE
## 4 <chr [1]> <chr [1]> IP Address 72.168.12… 100 904 TRUE
## 5 <chr [1]> <chr [1]> IP Address 162.218.1… 100 932 TRUE
## 6 <chr [1]> <chr [1]> IP Address 173.219.1… 100 1002 TRUE
## 7 <chr [1]> <chr [1]> IP Address 73.158.24… 100 869 TRUE
## 8 <chr [1]> <chr [1]> IP Address 209.6.95.… 100 1031 TRUE
## 9 <chr [1]> <chr [1]> IP Address 216.30.15… 100 517 TRUE
## 10 <chr [1]> <chr [1]> IP Address 67.185.48… 100 1243 TRUE
## # … with 318 more rows, and 76 more variables: recorded_date <list>,
## # response_id <chr>, recipient_last_name <chr>, recipient_first_name <chr>,
## # recipient_email <chr>, external_reference <chr>, location_latitude <dbl>,
## # location_longitude <dbl>, distribution_channel <chr>, user_language <chr>,
## # q78 <chr>, q78_29_text <chr>, q92_1 <chr>, q92_2 <chr>, q92_3 <chr>,
## # q92_4 <chr>, q92_5 <chr>, q92_6 <chr>, q92_7 <chr>, q92_8 <chr>,
## # q92_9 <chr>, q92_10 <chr>, q92_11 <chr>, q92_12 <chr>, q92_12_text <chr>, …
# overall
qual_q80 %>%
select(3:9) %>%
summarize_all(sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/328) %>%
arrange(desc(val)) %>%
knitr::kable()
data_visualization_graph_creation |
48 |
0.1463415 |
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns |
40 |
0.1219512 |
data_collection_their_own_or_finding_data_recording_first_hand_data |
32 |
0.0975610 |
data_application_answering_question_cer_explanations_comparing |
32 |
0.0975610 |
data_summary_summary_table_averaging_math_tasks |
27 |
0.0823171 |
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data |
12 |
0.0365854 |
other_those_that_dont_fit_into_other_categories_modeling |
10 |
0.0304878 |
qual_q80 %>%
left_join(d_grades) %>%
select(3:9, elem, middle, high) %>%
filter(elem == 1) %>%
summarize_at(vars(1:7), sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/328) %>%
arrange(desc(val)) %>%
knitr::kable()
data_collection_their_own_or_finding_data_recording_first_hand_data |
5 |
0.0152439 |
data_visualization_graph_creation |
5 |
0.0152439 |
data_summary_summary_table_averaging_math_tasks |
4 |
0.0121951 |
data_application_answering_question_cer_explanations_comparing |
3 |
0.0091463 |
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data |
2 |
0.0060976 |
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns |
2 |
0.0060976 |
other_those_that_dont_fit_into_other_categories_modeling |
2 |
0.0060976 |
qual_q80 %>%
left_join(d_grades) %>%
select(3:9, elem, middle, high) %>%
filter(middle == 1) %>%
summarize_at(vars(1:7), sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/328) %>%
arrange(desc(val)) %>%
knitr::kable()
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns |
16 |
0.0487805 |
data_collection_their_own_or_finding_data_recording_first_hand_data |
15 |
0.0457317 |
data_visualization_graph_creation |
15 |
0.0457317 |
data_application_answering_question_cer_explanations_comparing |
13 |
0.0396341 |
data_summary_summary_table_averaging_math_tasks |
8 |
0.0243902 |
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data |
5 |
0.0152439 |
other_those_that_dont_fit_into_other_categories_modeling |
4 |
0.0121951 |
qual_q80 %>%
left_join(d_grades) %>%
select(3:9, elem, middle, high) %>%
filter(high == 1) %>%
summarize_at(vars(1:7), sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/328) %>%
arrange(desc(val)) %>%
knitr::kable()
data_visualization_graph_creation |
37 |
0.1128049 |
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns |
30 |
0.0914634 |
data_application_answering_question_cer_explanations_comparing |
23 |
0.0701220 |
data_collection_their_own_or_finding_data_recording_first_hand_data |
22 |
0.0670732 |
data_summary_summary_table_averaging_math_tasks |
20 |
0.0609756 |
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data |
9 |
0.0274390 |
other_those_that_dont_fit_into_other_categories_modeling |
6 |
0.0182927 |
qual - q82
d %>% filter(q82 != "NA")
## # A tibble: 327 × 83
## start_date end_date status ip_address progress duration_in_sec… finished
## <list> <list> <chr> <chr> <dbl> <dbl> <lgl>
## 1 <chr [1]> <chr [1]> IP Address 104.225.1… 100 506 TRUE
## 2 <chr [1]> <chr [1]> IP Address 164.58.82… 100 779 TRUE
## 3 <chr [1]> <chr [1]> IP Address 206.15.24… 100 915 TRUE
## 4 <chr [1]> <chr [1]> IP Address 72.168.12… 100 904 TRUE
## 5 <chr [1]> <chr [1]> IP Address 162.218.1… 100 932 TRUE
## 6 <chr [1]> <chr [1]> IP Address 173.219.1… 100 1002 TRUE
## 7 <chr [1]> <chr [1]> IP Address 73.158.24… 100 869 TRUE
## 8 <chr [1]> <chr [1]> IP Address 209.6.95.… 100 1031 TRUE
## 9 <chr [1]> <chr [1]> IP Address 216.30.15… 100 517 TRUE
## 10 <chr [1]> <chr [1]> IP Address 67.185.48… 100 1243 TRUE
## # … with 317 more rows, and 76 more variables: recorded_date <list>,
## # response_id <chr>, recipient_last_name <chr>, recipient_first_name <chr>,
## # recipient_email <chr>, external_reference <chr>, location_latitude <dbl>,
## # location_longitude <dbl>, distribution_channel <chr>, user_language <chr>,
## # q78 <chr>, q78_29_text <chr>, q92_1 <chr>, q92_2 <chr>, q92_3 <chr>,
## # q92_4 <chr>, q92_5 <chr>, q92_6 <chr>, q92_7 <chr>, q92_8 <chr>,
## # q92_9 <chr>, q92_10 <chr>, q92_11 <chr>, q92_12 <chr>, q92_12_text <chr>, …
qual_q82 %>%
select(3:10) %>%
summarize_all(sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/330) %>%
arrange(desc(val)) %>%
knitr::kable()
student_collected_data |
265 |
0.8030303 |
curated_data |
121 |
0.3666667 |
other_data |
70 |
0.2121212 |
textbook_curriculum_data |
58 |
0.1757576 |
raw_data |
52 |
0.1575758 |
primary_data |
31 |
0.0939394 |
simulations |
22 |
0.0666667 |
sensors |
7 |
0.0212121 |
qual_q82 %>%
left_join(d_grades) %>%
select(3:10, elem, middle, high) %>%
filter(elem == 1) %>%
summarize_at(vars(1:8), sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/32) %>%
arrange(desc(val)) %>%
knitr::kable()
student_collected_data |
26 |
0.81250 |
raw_data |
10 |
0.31250 |
other_data |
7 |
0.21875 |
curated_data |
5 |
0.15625 |
textbook_curriculum_data |
4 |
0.12500 |
sensors |
2 |
0.06250 |
primary_data |
1 |
0.03125 |
simulations |
0 |
0.00000 |
qual_q82 %>%
left_join(d_grades) %>%
select(3:10, elem, middle, high) %>%
filter(middle == 1) %>%
summarize_at(vars(1:8), sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/114) %>%
arrange(desc(val)) %>%
knitr::kable()
student_collected_data |
85 |
0.7456140 |
curated_data |
39 |
0.3421053 |
other_data |
26 |
0.2280702 |
raw_data |
22 |
0.1929825 |
textbook_curriculum_data |
17 |
0.1491228 |
simulations |
8 |
0.0701754 |
primary_data |
6 |
0.0526316 |
sensors |
2 |
0.0175439 |
qual_q82 %>%
left_join(d_grades) %>%
select(3:10, elem, middle, high) %>%
filter(high == 1) %>%
summarize_at(vars(1:8), sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/234) %>%
arrange(desc(val)) %>%
knitr::kable()
student_collected_data |
194 |
0.8290598 |
curated_data |
93 |
0.3974359 |
textbook_curriculum_data |
49 |
0.2094017 |
other_data |
46 |
0.1965812 |
raw_data |
38 |
0.1623932 |
primary_data |
27 |
0.1153846 |
simulations |
14 |
0.0598291 |
sensors |
6 |
0.0256410 |
qual 82 - size
qual_data_size <- qual_q82 %>%
left_join(d_grades) %>%
select(3:11, elem, middle, high) %>%
rename(size = data_size_level_2_s_10_rows_2_columns_m_20_rows_3_4_columns_l_100s_of_rows_xl_thousands_of_rows)
qual_data_size %>%
filter(!is.na(size)) %>%
filter(size != "")
## # A tibble: 96 × 12
## student_collected_data raw_data textbook_curriculu… curated_data primary_data
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 NA NA 1 1
## 2 1 NA NA NA NA
## 3 NA NA NA NA NA
## 4 1 NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## 7 1 1 NA NA NA
## 8 1 NA NA NA NA
## 9 1 NA NA NA NA
## 10 1 NA NA 1 NA
## # … with 86 more rows, and 7 more variables: simulations <dbl>, sensors <dbl>,
## # other_data <dbl>, size <chr>, elem <dbl>, middle <dbl>, high <dbl>
qual_data_size %>%
mutate(size = str_replace_all(size, "-", ",")) %>%
mutate(size = str_replace_all(size, "<", ",")) %>%
mutate(size = str_replace_all(size, " ", "")) %>%
mutate(size = ifelse(size == "small", "s", size)) %>%
mutate(size = tolower(size)) %>%
mutate(size = str_split(size, ",")) %>%
unnest(size) %>%
filter(!is.na(size)) %>%
filter(size != "") %>%
count(size) %>%
mutate(prop = n / 96)
## # A tibble: 3 × 3
## size n prop
## <chr> <int> <dbl>
## 1 l 18 0.188
## 2 m 39 0.406
## 3 s 76 0.792
Satisfaction
# I am satisfied with the current data analysis tools that I use in my class.
d %>%
mutate(q101_1_rc = case_when(
q101_1 == "Strongly disagree" ~ 1,
q101_1 == "Somewhat disagree" ~ 2,
q101_1 == "Neither agree nor disagree" ~ 3,
q101_1 == "Somewhat agree" ~ 4,
q101_1 == "Strongly agree" ~ 5
)) %>%
select(q101_1_rc) %>%
mutate(q101_1_rc = as.integer(q101_1_rc)) %>%
summarize(mean_var = mean(q101_1_rc, na.rm = TRUE),
sd_var = sd(q101_1_rc, na.rm = T)) # 3.34 (1.04)
## # A tibble: 1 × 2
## mean_var sd_var
## <dbl> <dbl>
## 1 3.35 1.04
d %>%
mutate(q101_1_rc = case_when(
q101_1 == "Strongly disagree" ~ 1,
q101_1 == "Somewhat disagree" ~ 2,
q101_1 == "Neither agree nor disagree" ~ 3,
q101_1 == "Somewhat agree" ~ 4,
q101_1 == "Strongly agree" ~ 5
)) %>%
select(q101_1_rc, elem, middle, high) %>%
mutate(q101_1_rc = as.integer(q101_1_rc)) %>%
gather(key, val, -q101_1_rc) %>%
filter(val == 1) %>%
group_by(key) %>%
summarize(mean_var = mean(q101_1_rc, na.rm = TRUE),
sd_var = sd(q101_1_rc, na.rm = T)) %>%
arrange(desc(mean_var))
## # A tibble: 3 × 3
## key mean_var sd_var
## <chr> <dbl> <dbl>
## 1 high 3.44 1.00
## 2 middle 3.19 1.10
## 3 elem 3.19 1.18
# I use quantitative activities with my students regularly.
d %>%
mutate(q101_2_rc = case_when(
q101_2 == "Strongly disagree" ~ 1,
q101_2 == "Somewhat disagree" ~ 2,
q101_2 == "Neither agree nor disagree" ~ 3,
q101_2 == "Somewhat agree" ~ 4,
q101_2 == "Strongly agree" ~ 5
)) %>%
select(q101_2_rc) %>%
mutate(q101_2_rc = as.integer(q101_2_rc)) %>%
summarize(mean_var = mean(q101_2_rc, na.rm = TRUE),
sd_var = sd(q101_2_rc, na.rm = T)) # 3.72 (1.03)
## # A tibble: 1 × 2
## mean_var sd_var
## <dbl> <dbl>
## 1 3.73 1.04
d %>%
mutate(q101_2_rc = case_when(
q101_2 == "Strongly disagree" ~ 1,
q101_2 == "Somewhat disagree" ~ 2,
q101_2 == "Neither agree nor disagree" ~ 3,
q101_2 == "Somewhat agree" ~ 4,
q101_2 == "Strongly agree" ~ 5
)) %>%
select(q101_2_rc, elem, middle, high) %>%
mutate(q101_2_rc = as.integer(q101_2_rc)) %>%
gather(key, val, -q101_2_rc) %>%
filter(val == 1) %>%
group_by(key) %>%
summarize(mean_var = mean(q101_2_rc, na.rm = TRUE),
sd_var = sd(q101_2_rc, na.rm = T))%>%
arrange(desc(mean_var))
## # A tibble: 3 × 3
## key mean_var sd_var
## <chr> <dbl> <dbl>
## 1 high 3.80 1.04
## 2 middle 3.61 1.09
## 3 elem 3.56 1.13
# I am interested in adding more quantitative activities into my class(es
d %>%
mutate(q101_3_rc = case_when(
q101_3 == "Strongly disagree" ~ 1,
q101_3 == "Somewhat disagree" ~ 2,
q101_3 == "Neither agree nor disagree" ~ 3,
q101_3 == "Somewhat agree" ~ 4,
q101_3 == "Strongly agree" ~ 5
)) %>%
select(q101_3_rc) %>%
mutate(q101_3_rc = as.integer(q101_3_rc)) %>%
summarize(mean_var = mean(q101_3_rc, na.rm = TRUE),
sd_var = sd(q101_3_rc, na.rm = T)) # 4.41 (.75)
## # A tibble: 1 × 2
## mean_var sd_var
## <dbl> <dbl>
## 1 4.42 0.758
d %>%
mutate(q101_3_rc = case_when(
q101_3 == "Strongly disagree" ~ 1,
q101_3 == "Somewhat disagree" ~ 2,
q101_3 == "Neither agree nor disagree" ~ 3,
q101_3 == "Somewhat agree" ~ 4,
q101_3 == "Strongly agree" ~ 5
)) %>%
select(q101_3_rc, elem, middle, high) %>%
mutate(q101_3_rc = as.integer(q101_3_rc)) %>%
gather(key, val, -q101_3_rc) %>%
filter(val == 1) %>%
group_by(key) %>%
summarize(mean_var = mean(q101_3_rc, na.rm = TRUE),
sd_var = sd(q101_3_rc, na.rm = T))%>%
arrange(desc(mean_var))
## # A tibble: 3 × 3
## key mean_var sd_var
## <chr> <dbl> <dbl>
## 1 elem 4.62 0.609
## 2 middle 4.52 0.655
## 3 high 4.39 0.799
PD
d %>%
select(q99) %>%
janitor::tabyl(q99)
## q99 n percent
## I'm not sure 66 0.200000000
## NA 2 0.006060606
## No 13 0.039393939
## Yes 249 0.754545455
d %>%
filter(elem == 1) %>%
janitor::tabyl(q99)
## q99 n percent
## I'm not sure 5 0.15625
## Yes 27 0.84375
d %>%
filter(middle == 1) %>%
janitor::tabyl(q99)
## q99 n percent
## I'm not sure 26 0.22807018
## No 3 0.02631579
## Yes 85 0.74561404
d %>%
filter(high == 1) %>%
janitor::tabyl(q99)
## q99 n percent
## I'm not sure 44 0.18965517
## No 10 0.04310345
## Yes 178 0.76724138
first and second hand
d %>%
select(response_id, q106) %>%
separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>%
gather(key, val, -response_id) %>%
arrange(response_id) %>%
count(val) %>%
mutate(prop = n / 330) %>%
filter(!is.na(n)) %>%
filter(val != "NA") %>%
arrange(desc(n))
## # A tibble: 4 × 3
## val n prop
## <chr> <int> <dbl>
## 1 Analyzing first-hand data using pen-and-paper 264 0.8
## 2 Analyzing first-hand data using digital tools 260 0.788
## 3 Analyzing second-hand data using digital tools 257 0.779
## 4 Analyzing second-hand data using pen-and-paper 226 0.685
d %>%
select(response_id, elem, middle, high, q106) %>%
separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
arrange(response_id) %>%
filter(elem == 1) %>%
count(val) %>%
mutate(prop = n / 32) %>% # n elem
filter(!is.na(n)) %>%
filter(val != "NA") %>%
arrange(desc(n))
## # A tibble: 4 × 3
## val n prop
## <chr> <int> <dbl>
## 1 Analyzing first-hand data using pen-and-paper 29 0.906
## 2 Analyzing first-hand data using digital tools 23 0.719
## 3 Analyzing second-hand data using digital tools 21 0.656
## 4 Analyzing second-hand data using pen-and-paper 20 0.625
# 114, 232
d %>%
select(response_id, elem, middle, high, q106) %>%
separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
arrange(response_id) %>%
filter(middle == 1) %>%
count(val) %>%
mutate(prop = n / 114) %>% # n elem
filter(!is.na(n)) %>%
filter(val != "NA") %>%
arrange(desc(n))
## # A tibble: 4 × 3
## val n prop
## <chr> <int> <dbl>
## 1 Analyzing first-hand data using pen-and-paper 93 0.816
## 2 Analyzing second-hand data using digital tools 86 0.754
## 3 Analyzing first-hand data using digital tools 80 0.702
## 4 Analyzing second-hand data using pen-and-paper 72 0.632
d %>%
select(response_id, elem, middle, high, q106) %>%
separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>%
gather(key, val, -response_id, -elem, -middle, -high) %>%
arrange(response_id) %>%
filter(high == 1) %>%
count(val) %>%
mutate(prop = n / 232) %>% # n elem
filter(!is.na(n)) %>%
filter(val != "NA") %>%
arrange(desc(n))
## # A tibble: 4 × 3
## val n prop
## <chr> <int> <dbl>
## 1 Analyzing first-hand data using digital tools 193 0.832
## 2 Analyzing second-hand data using digital tools 189 0.815
## 3 Analyzing first-hand data using pen-and-paper 184 0.793
## 4 Analyzing second-hand data using pen-and-paper 163 0.703
demographics
d %>%
count(gender) %>%
mutate(prop = n /330) # other is prefer not to say
## # A tibble: 5 × 3
## gender n prop
## <chr> <int> <dbl>
## 1 Female 260 0.788
## 2 Male 62 0.188
## 3 NA 2 0.00606
## 4 Other (please describe) 1 0.00303
## 5 Prefer not to say 5 0.0152
d %>%
mutate(years_worked = unlist(years_worked)) %>%
mutate(years_worked = as.numeric(years_worked)) %>%
select(years_worked, elem, middle, high) %>%
filter(years_worked < 100) %>% # 2 outliers here
summarize(mean_years_worked = mean(years_worked, na.rm = T),
sd_years_worked = sd(years_worked, na.rm = T))
## # A tibble: 1 × 2
## mean_years_worked sd_years_worked
## <dbl> <dbl>
## 1 18.4 8.51
d %>%
count(underrepresented) %>%
mutate(prop = n / 330)
## # A tibble: 3 × 3
## underrepresented n prop
## <chr> <int> <dbl>
## 1 NA 2 0.00606
## 2 No 292 0.885
## 3 Yes 36 0.109
d %>%
count(with_which_groups) %>%
arrange(desc(n)) %>%
filter(with_which_groups != "NA") %>%
knitr::kable() # 9 African American/black (2.7%), 4 hispanic (1.2%), 2 asian (0.60%), 2 american indian, 1 muslim (0.30%)
Hispanic |
3 |
Black |
2 |
African American |
1 |
African American female |
1 |
African american/arab/muslim/women/disabled |
1 |
African American/black |
1 |
African Merican/black |
1 |
African-American |
1 |
All |
1 |
American Indian |
1 |
Asian |
1 |
Asian Indian |
1 |
Black or African-American |
1 |
Female, Latin/hispanic |
1 |
Filipino |
1 |
Indigenous |
1 |
Latina |
1 |
latino |
1 |
Latino |
1 |
Latinx |
1 |
Latinx, Native American |
1 |
LGBT+ |
1 |
Over 50 white woman |
1 |
Seniors |
1 |
NSSME questions
Organize and/or represent data using tables, charts, or graphs in order to facilitate analysis (4) - 1st - 34, 49, 58
# Science Classes in Which Teachers Report Students Engaging in Various Aspects of Science Practices at Least Once a Week, by Grade Range
d %>%
filter(elem == 1) %>%
count(q98_3) %>%
filter(str_detect(q98_3, "Often") | str_detect(q98_3, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 34
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 17 0.531
d %>%
filter(middle == 1) %>%
count(q98_3) %>%
filter(str_detect(q98_3, "Often") | str_detect(q98_3, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 49
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 51 0.447
d %>%
filter(high == 1) %>%
count(q98_3) %>%
filter(str_detect(q98_3, "Often") | str_detect(q98_3, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 58
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 119 0.513
# never
d %>%
filter(elem == 1) %>%
count(q98_3) %>%
filter(str_detect(q98_3, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 6
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 0 0
d %>%
filter(middle == 1) %>%
count(q98_3) %>%
filter(str_detect(q98_3, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 1
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 1 0.00877
d %>%
filter(high == 1) %>%
count(q98_3) %>%
filter(str_detect(q98_3, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 1
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 1 0.00431
Analyze data using grade-appropriate methods in order to identify patterns, trends, or relationships (6) - 5th
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_5) %>%
filter(str_detect(q98_5, "Often") | str_detect(q98_5, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 27
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 16 0.5
d %>%
filter(middle == 1) %>%
count(q98_5) %>%
filter(str_detect(q98_5, "Often") | str_detect(q98_5, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 43
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 51 0.447
d %>%
filter(high == 1) %>%
count(q98_5) %>%
filter(str_detect(q98_5, "Often") | str_detect(q98_5, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 47
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 126 0.543
# never
d %>%
filter(elem == 1) %>%
count(q98_5) %>%
filter(str_detect(q98_5, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 12
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 1 0.0312
d %>%
filter(middle == 1) %>%
count(q98_5) %>%
filter(str_detect(q98_5, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 3
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 2 0.0175
d %>%
filter(high == 1) %>%
count(q98_5) %>%
filter(str_detect(q98_5, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 3
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 3 0.0129
Determine which data would need to be collected in order to answer a scientific question (regardless of who generated the question) (1) - 5th
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_1) %>%
filter(str_detect(q98_1, "Often") | str_detect(q98_1, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 29
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 9 0.281
d %>%
filter(middle == 1) %>%
count(q98_1) %>%
filter(str_detect(q98_1, "Often") | str_detect(q98_1, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 39
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 24 0.211
d %>%
filter(high == 1) %>%
count(q98_1) %>%
filter(str_detect(q98_1, "Often") | str_detect(q98_1, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 39
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 47 0.203
# never
d %>%
filter(elem == 1) %>%
count(q98_1) %>%
filter(str_detect(q98_1, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 8
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 1 0.0312
d %>%
filter(middle == 1) %>%
count(q98_1) %>%
filter(str_detect(q98_1, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 2
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 2 0.0175
d %>%
filter(high == 1) %>%
count(q98_1) %>%
filter(str_detect(q98_1, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 3
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 5 0.0216
Identify potential sources of variability (e.g., measurement error, natural variation) in the data (5) - NA
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_4) %>%
filter(str_detect(q98_4, "Often") | str_detect(q98_4, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 7 0.219
d %>%
filter(middle == 1) %>%
count(q98_4) %>%
filter(str_detect(q98_4, "Often") | str_detect(q98_4, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 22 0.193
d %>%
filter(high == 1) %>%
count(q98_4) %>%
filter(str_detect(q98_4, "Often") | str_detect(q98_4, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 71 0.306
# never
d %>%
filter(elem == 1) %>%
count(q98_4) %>%
filter(str_detect(q98_4, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 3 0.0938
d %>%
filter(middle == 1) %>%
count(q98_4) %>%
filter(str_detect(q98_4, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 9 0.0789
d %>%
filter(high == 1) %>%
count(q98_4) %>%
filter(str_detect(q98_4, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 14 0.0603
Consider how missing data or measurement error can affect data interpretation (7)
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_6) %>%
filter(str_detect(q98_6, "Often") | str_detect(q98_6, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 4 0.125
d %>%
filter(middle == 1) %>%
count(q98_6) %>%
filter(str_detect(q98_6, "Often") | str_detect(q98_6, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 15 0.132
d %>%
filter(high == 1) %>%
count(q98_6) %>%
filter(str_detect(q98_6, "Often") | str_detect(q98_6, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 54 0.233
# never
d %>%
filter(elem == 1) %>%
count(q98_6) %>%
filter(str_detect(q98_6, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 3 0.0938
d %>%
filter(middle == 1) %>%
count(q98_6) %>%
filter(str_detect(q98_6, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 12 0.105
d %>%
filter(high == 1) %>%
count(q98_6) %>%
filter(str_detect(q98_6, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 18 0.0776
Select and use grade-appropriate mathematical and/or statistical techniques to analyze data (for example: determining the best measure of central tendency, examining variation in data, or developing a line of best fit) (9)
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_7) %>%
filter(str_detect(q98_7, "Often") | str_detect(q98_7, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 8 0.25
d %>%
filter(middle == 1) %>%
count(q98_7) %>%
filter(str_detect(q98_7, "Often") | str_detect(q98_7, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 16 0.140
d %>%
filter(high == 1) %>%
count(q98_7) %>%
filter(str_detect(q98_7, "Often") | str_detect(q98_7, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 51 0.220
# never
d %>%
filter(elem == 1) %>%
count(q98_7) %>%
filter(str_detect(q98_7, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 8 0.25
d %>%
filter(middle == 1) %>%
count(q98_7) %>%
filter(str_detect(q98_7, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 19 0.167
d %>%
filter(high == 1) %>%
count(q98_7) %>%
filter(str_detect(q98_7, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 20 0.0862
Use mathematical and/or computational models to generate data to support a scientific claim (10)
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_8) %>%
filter(str_detect(q98_8, "Often") | str_detect(q98_8, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 7 0.219
d %>%
filter(middle == 1) %>%
count(q98_8) %>%
filter(str_detect(q98_8, "Often") | str_detect(q98_8, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 19 0.167
d %>%
filter(high == 1) %>%
count(q98_8) %>%
filter(str_detect(q98_8, "Often") | str_detect(q98_8, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 57 0.246
# never
d %>%
filter(elem == 1) %>%
count(q98_8) %>%
filter(str_detect(q98_8, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 9 0.281
d %>%
filter(middle == 1) %>%
count(q98_8) %>%
filter(str_detect(q98_8, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 21 0.184
d %>%
filter(high == 1) %>%
count(q98_8) %>%
filter(str_detect(q98_8, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 32 0.138
Use data and reasoning to defend, verbally or in writing, a claim or refute alternative scientific claims about a real-world phenomenon (regardless of who made the claims) (11)
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_9) %>%
filter(str_detect(q98_9, "Often") | str_detect(q98_9, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 11 0.344
d %>%
filter(middle == 1) %>%
count(q98_9) %>%
filter(str_detect(q98_9, "Often") | str_detect(q98_9, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 42 0.368
d %>%
filter(high == 1) %>%
count(q98_9) %>%
filter(str_detect(q98_9, "Often") | str_detect(q98_9, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 117 0.504
# never
d %>%
filter(elem == 1) %>%
count(q98_9) %>%
filter(str_detect(q98_9, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 1 0.0312
d %>%
filter(middle == 1) %>%
count(q98_9) %>%
filter(str_detect(q98_9, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 3 0.0263
d %>%
filter(high == 1) %>%
count(q98_9) %>%
filter(str_detect(q98_9, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232)
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 8 0.0345
Determine which variables from a provided dataset are necessary to answer a scientific question (13)
# at least once a week
d %>%
filter(elem == 1) %>%
count(q98_2) %>%
filter(str_detect(q98_2, "Often") | str_detect(q98_2, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 29
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 6 0.188
d %>%
filter(middle == 1) %>%
count(q98_2) %>%
filter(str_detect(q98_2, "Often") | str_detect(q98_2, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 39
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 26 0.228
d %>%
filter(high == 1) %>%
count(q98_2) %>%
filter(str_detect(q98_2, "Often") | str_detect(q98_2, "All")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 39
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 65 0.280
# never
d %>%
filter(elem == 1) %>%
count(q98_2) %>%
filter(str_detect(q98_2, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 32) # 8
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 2 0.0625
d %>%
filter(middle == 1) %>%
count(q98_2) %>%
filter(str_detect(q98_2, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 114) # 2
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 2 0.0175
d %>%
filter(high == 1) %>%
count(q98_2) %>%
filter(str_detect(q98_2, "Never")) %>%
summarize(n = sum(n)) %>%
mutate(prop = n / 232) # 3
## # A tibble: 1 × 2
## n prop
## <int> <dbl>
## 1 8 0.0345
state
d %>% count(state_work) %>% # PR, outside US, 2 NA
arrange(desc(n))
## # A tibble: 46 × 2
## state_work n
## <chr> <int>
## 1 California 29
## 2 Massachusetts 19
## 3 Wisconsin 19
## 4 Michigan 17
## 5 New York 16
## 6 Arkansas 13
## 7 New Jersey 13
## 8 Texas 13
## 9 I do not reside in the United States 12
## 10 Connecticut 11
## # … with 36 more rows