Loading, setting up

library(tidyverse)
library(googlesheets4)

# d <- read_csv("data/fall-2020-analyzing-data-in-science-survey.csv")

meta <- as_sheets_id("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146")

meta
## Spreadsheet name: fall-2020-analyzing-data-in-science-survey
##               ID: 1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g
##           Locale: en_US
##        Time zone: America/Los_Angeles
##      # of sheets: 16
## 
##                               (Sheet name): (Nominal extent in rows x columns)
## fall-2020-analyzing-data-in-science-survey: 333 x 81
##                           features-desired: 1000 x 27
##                         Q-80: Open coding : 1000 x 27
##                 Q 80 open coding new sheet: 1000 x 26
##                    Q 80 Interrater - Omiya: 1000 x 23
##                     Q 80 Interrater - Josh: 1000 x 23
##                             Q 80 AGREEMENT: 1000 x 23
##                              Q 80 - MASTER: 1000 x 22
##                           Q-82 open coding: 1000 x 25
##                     Q-80: code description: 1000 x 27
##                 Q 82 open coding new sheet: 1000 x 27
##                    Q 82 Interrater - Omiya: 1000 x 23
##                     Q 82 Interrater - Josh: 1000 x 23
##                             Q 82 Agreement: 1000 x 23
##                                Q 82 Master: 1000 x 23
##                     Q-82: code description: 1000 x 26
d <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146")

qual_q80 <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146", sheet = 8) %>% janitor::clean_names()

qual_q82 <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=957320146", sheet = 15) %>% janitor::clean_names()

Prepping and exploring grade bands

d %>% 
  mutate(n_grades = str_count(grade_level_taught, ",")) %>% 
  count(n_grades)
## # A tibble: 12 × 2
##    n_grades     n
##       <int> <int>
##  1        0    54
##  2        1    39
##  3        2    76
##  4        3   124
##  5        4     6
##  6        5    11
##  7        6    10
##  8        7     2
##  9        8     1
## 10       10     1
## 11       11     1
## 12       12     5
d_grades <- d %>% 
  select(response_id, grade_level_taught) %>% 
  mutate(grade_level_taught = str_replace(grade_level_taught, ":", "")) %>% 
  separate(col = grade_level_taught, into = c(str_c("grade", letters[1:13])), sep = ",") %>% 
  gather(key, val, -response_id) %>% 
  arrange(response_id) %>% 
  mutate(val = as.integer(val)) %>% 
  mutate(elem = if_else(val <= 5, 1, 0)) %>% 
  mutate(second = if_else(val >= 6, 1, 0)) %>% 
  mutate(middle = if_else(val >= 6 & val <= 8, 1, 0)) %>% 
  mutate(high = if_else(val >= 9, 1, 0)) %>% 
  select(response_id, elem, second, middle, high) %>%
  group_by(response_id) %>% 
  summarize(elem = if_else(any(elem == 1), 1, 0),
            second = if_else(any(second == 1), 1, 0),
            middle = if_else(any(middle == 1), 1, 0),
            high = if_else(any(high == 1), 1, 0)) %>% 
  mutate_if(is.double, replace_na, 0)

d_grades %>% 
  count(elem, middle, high) %>% 
  knitr::kable()
elem middle high n
0 0 0 8
0 0 1 194
0 1 0 68
0 1 1 28
1 0 0 14
1 1 0 8
1 1 1 10
# 194 only teach high
# 68 only teach middle
# 14 only teach elem

# 28 teach middle and high
# 8 teach elem and middle
# 10 teach all

# 8 teach none (remove?)
d <- d %>% 
  left_join(d_grades)

reasons for not using a tool

overall

d$q97 %>% is.na() %>% table()
## .
## FALSE 
##   330
table(d$q97 == "NA")
## 
## FALSE  TRUE 
##   327     3
overall_q97 <- d %>% 
  select(response_id, elem, middle, high, q97) %>% 
  mutate(q97 = str_replace(q97, ", ", "; ")) %>%
  mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
  separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  count(val) %>% 
  mutate(percent = n / 330) %>% 
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3))

# p %>% 
#   ggplot(aes(x = reorder(val, percent), y = percent)) +
#   geom_col() +
#   coord_flip() +
#   xlab(NULL)

cost - elem

elem <- d %>% 
  select(response_id, elem, middle, high, q97) %>% 
  mutate(q97 = str_replace(q97, ", ", "; ")) %>%
  mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
  separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  filter(elem == 1) %>% # selecting elem
  count(val) %>% 
  mutate(percent = n / 32) %>% 
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3)) %>% 
  select(val, elem = percent)

cost - midd

midd <- d %>% 
  select(response_id, elem, middle, high, q97) %>% 
  mutate(q97 = str_replace(q97, ", ", "; ")) %>%
  mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
  separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  filter(middle == 1) %>% # selecting elem
  count(val) %>% 
  mutate(percent = n /114) %>% 
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3)) %>% 
  select(val, midd = percent)

cost - high

high <- d %>% 
  select(response_id, elem, middle, high, q97) %>% 
  mutate(q97 = str_replace(q97, ", ", "; ")) %>%
  mutate(q97 = str_replace(q97, "etc\\.\\)", "")) %>%
  separate(col = q97, into = c(str_c("grade", letters[1:9])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  filter(high == 1) %>% # selecting elem
  count(val) %>% 
  mutate(percent = n / 232) %>% 
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3)) %>% 
  select(val, high = percent)

final table

to_plot <- overall_q97 %>% 
  left_join(elem) %>% 
  left_join(midd) %>% 
  left_join(high) %>% 
  select(Barrier = val, n, overall = percent, elem, midd, high) %>% 
  mutate_all(replace_na, 0)

to_plot %>% 
  filter(!str_detect(Barrier, "Other")) %>%
  knitr::kable()
Barrier n overall elem midd high
Cost 282 0.855 0.906 0.868 0.858
Time to develop new lessons that I previously did using pencil-and-paper 172 0.521 0.281 0.456 0.552
Difficulty of learning a new tool 153 0.464 0.438 0.456 0.474
Student information security 103 0.312 0.438 0.404 0.280
Availability of computers (tablets; laptops 94 0.285 0.219 0.228 0.306
Student discomfort with computers 37 0.112 0.000 0.096 0.129
My own discomfort with computers 25 0.076 0.031 0.088 0.065
NA 3 0.009 0.000 0.000 0.004

Tools used

overall

table(is.na(d$q78))
## 
## FALSE 
##   330
table(d$q78 == "NA")
## 
## FALSE  TRUE 
##   329     1
overall_q78 <- d %>% 
  select(response_id, elem, middle, high, q78) %>% 
  mutate(q78 = str_replace(q78, ", ", "; ")) %>%
  mutate(q78 = str_replace(q78, "etc\\.\\)", "")) %>%
  separate(col = q78, into = c(str_c("grade", letters[1:12])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  count(val) %>% 
  mutate(percent = n / 330) %>% 
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3))

tools - elem

elem <- d %>% 
  select(response_id, elem, middle, high, q78) %>% 
  mutate(q78 = str_replace(q78, ", ", "; ")) %>%
  mutate(q78 = str_replace(q78, "etc\\.\\)", "")) %>%
  separate(col = q78, into = c(str_c("grade", letters[1:9])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  filter(elem == 1) %>% # selecting elem
  count(val) %>% 
  mutate(percent = n / 32) %>% 
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3)) %>% 
  select(val, elem = percent)

tools - midd

midd <- d %>% 
  select(response_id, elem, middle, high, q78) %>% 
  mutate(q78 = str_replace(q78, ", ", "; ")) %>%
  mutate(q78 = str_replace(q78, "etc\\.\\)", "")) %>%
  separate(col = q78, into = c(str_c("grade", letters[1:9])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  filter(middle == 1) %>% # selecting elem
  count(val) %>% 
  mutate(percent = n / 114) %>% 
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3)) %>% 
  select(val, midd = percent)

tools - high

high <- d %>% 
  select(response_id, elem, middle, high, q78) %>% 
  mutate(q78 = str_replace(q78, ", ", "; ")) %>%
  mutate(q78 = str_replace(q78, "etc\\.\\)", "")) %>%
  separate(col = q78, into = c(str_c("grade", letters[1:9])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  filter(val!=" ") %>% 
  filter(!is.na(val)) %>% 
  filter(high == 1) %>% # selecting elem
  count(val) %>% 
  mutate(percent = n / 232) %>%
  arrange(desc(n)) %>% 
  mutate(percent = round(percent, 3)) %>% 
  select(val, high = percent)

final table

to_plot <- overall_q78 %>% 
  left_join(elem) %>% 
  left_join(midd) %>% 
  left_join(high) %>% 
  select(Technology = val, n, overall = percent, elem, midd, high) %>% 
  mutate_all(replace_na, 0)

to_plot %>% 
  filter(!str_detect(Technology, "Other")) %>%
  knitr::kable()
Technology n overall elem midd high
Google Sheets 273 0.827 0.812 0.781 0.858
Calculator (not for graphing) 238 0.721 0.750 0.719 0.750
Data Nuggets 195 0.591 0.469 0.509 0.672
Microsoft Excel 141 0.427 0.344 0.307 0.500
Graphing Calculator 95 0.288 0.188 0.175 0.384
Desmos 52 0.158 0.250 0.219 0.155
DataClassroom 41 0.124 0.062 0.105 0.151
Infogram (or similar online tool for creating an infographic) 37 0.112 0.188 0.149 0.103
Common Online Data Analysis Platform (CODAP) 14 0.042 0.031 0.044 0.047
TUVA 11 0.033 0.031 0.061 0.026
R 6 0.018 0.031 0.009 0.017
NA 1 0.003 0.000 0.000 0.000

qual - q80

d %>% filter(q80 != "NA")
## # A tibble: 328 × 83
##    start_date end_date  status     ip_address progress duration_in_sec… finished
##    <list>     <list>    <chr>      <chr>         <dbl>            <dbl> <lgl>   
##  1 <chr [1]>  <chr [1]> IP Address 104.225.1…      100              506 TRUE    
##  2 <chr [1]>  <chr [1]> IP Address 164.58.82…      100              779 TRUE    
##  3 <chr [1]>  <chr [1]> IP Address 206.15.24…      100              915 TRUE    
##  4 <chr [1]>  <chr [1]> IP Address 72.168.12…      100              904 TRUE    
##  5 <chr [1]>  <chr [1]> IP Address 162.218.1…      100              932 TRUE    
##  6 <chr [1]>  <chr [1]> IP Address 173.219.1…      100             1002 TRUE    
##  7 <chr [1]>  <chr [1]> IP Address 73.158.24…      100              869 TRUE    
##  8 <chr [1]>  <chr [1]> IP Address 209.6.95.…      100             1031 TRUE    
##  9 <chr [1]>  <chr [1]> IP Address 216.30.15…      100              517 TRUE    
## 10 <chr [1]>  <chr [1]> IP Address 67.185.48…      100             1243 TRUE    
## # … with 318 more rows, and 76 more variables: recorded_date <list>,
## #   response_id <chr>, recipient_last_name <chr>, recipient_first_name <chr>,
## #   recipient_email <chr>, external_reference <chr>, location_latitude <dbl>,
## #   location_longitude <dbl>, distribution_channel <chr>, user_language <chr>,
## #   q78 <chr>, q78_29_text <chr>, q92_1 <chr>, q92_2 <chr>, q92_3 <chr>,
## #   q92_4 <chr>, q92_5 <chr>, q92_6 <chr>, q92_7 <chr>, q92_8 <chr>,
## #   q92_9 <chr>, q92_10 <chr>, q92_11 <chr>, q92_12 <chr>, q92_12_text <chr>, …
# overall
qual_q80 %>% 
  select(3:9) %>% 
  summarize_all(sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/328) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
data_visualization_graph_creation 48 0.1463415
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 40 0.1219512
data_collection_their_own_or_finding_data_recording_first_hand_data 32 0.0975610
data_application_answering_question_cer_explanations_comparing 32 0.0975610
data_summary_summary_table_averaging_math_tasks 27 0.0823171
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 12 0.0365854
other_those_that_dont_fit_into_other_categories_modeling 10 0.0304878
qual_q80 %>% 
  left_join(d_grades) %>%
  select(3:9, elem, middle, high) %>%
  filter(elem == 1) %>% 
  summarize_at(vars(1:7), sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/328) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
data_collection_their_own_or_finding_data_recording_first_hand_data 5 0.0152439
data_visualization_graph_creation 5 0.0152439
data_summary_summary_table_averaging_math_tasks 4 0.0121951
data_application_answering_question_cer_explanations_comparing 3 0.0091463
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 2 0.0060976
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 2 0.0060976
other_those_that_dont_fit_into_other_categories_modeling 2 0.0060976
qual_q80 %>% 
  left_join(d_grades) %>%
  select(3:9, elem, middle, high) %>%
  filter(middle == 1) %>% 
  summarize_at(vars(1:7), sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/328) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 16 0.0487805
data_collection_their_own_or_finding_data_recording_first_hand_data 15 0.0457317
data_visualization_graph_creation 15 0.0457317
data_application_answering_question_cer_explanations_comparing 13 0.0396341
data_summary_summary_table_averaging_math_tasks 8 0.0243902
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 5 0.0152439
other_those_that_dont_fit_into_other_categories_modeling 4 0.0121951
qual_q80 %>% 
  left_join(d_grades) %>%
  select(3:9, elem, middle, high) %>%
  filter(high == 1) %>% 
  summarize_at(vars(1:7), sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/328) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
data_visualization_graph_creation 37 0.1128049
data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 30 0.0914634
data_application_answering_question_cer_explanations_comparing 23 0.0701220
data_collection_their_own_or_finding_data_recording_first_hand_data 22 0.0670732
data_summary_summary_table_averaging_math_tasks 20 0.0609756
data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 9 0.0274390
other_those_that_dont_fit_into_other_categories_modeling 6 0.0182927

qual - q82

d %>% filter(q82 != "NA")
## # A tibble: 327 × 83
##    start_date end_date  status     ip_address progress duration_in_sec… finished
##    <list>     <list>    <chr>      <chr>         <dbl>            <dbl> <lgl>   
##  1 <chr [1]>  <chr [1]> IP Address 104.225.1…      100              506 TRUE    
##  2 <chr [1]>  <chr [1]> IP Address 164.58.82…      100              779 TRUE    
##  3 <chr [1]>  <chr [1]> IP Address 206.15.24…      100              915 TRUE    
##  4 <chr [1]>  <chr [1]> IP Address 72.168.12…      100              904 TRUE    
##  5 <chr [1]>  <chr [1]> IP Address 162.218.1…      100              932 TRUE    
##  6 <chr [1]>  <chr [1]> IP Address 173.219.1…      100             1002 TRUE    
##  7 <chr [1]>  <chr [1]> IP Address 73.158.24…      100              869 TRUE    
##  8 <chr [1]>  <chr [1]> IP Address 209.6.95.…      100             1031 TRUE    
##  9 <chr [1]>  <chr [1]> IP Address 216.30.15…      100              517 TRUE    
## 10 <chr [1]>  <chr [1]> IP Address 67.185.48…      100             1243 TRUE    
## # … with 317 more rows, and 76 more variables: recorded_date <list>,
## #   response_id <chr>, recipient_last_name <chr>, recipient_first_name <chr>,
## #   recipient_email <chr>, external_reference <chr>, location_latitude <dbl>,
## #   location_longitude <dbl>, distribution_channel <chr>, user_language <chr>,
## #   q78 <chr>, q78_29_text <chr>, q92_1 <chr>, q92_2 <chr>, q92_3 <chr>,
## #   q92_4 <chr>, q92_5 <chr>, q92_6 <chr>, q92_7 <chr>, q92_8 <chr>,
## #   q92_9 <chr>, q92_10 <chr>, q92_11 <chr>, q92_12 <chr>, q92_12_text <chr>, …
qual_q82 %>% 
  select(3:10) %>% 
  summarize_all(sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/330) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
student_collected_data 265 0.8030303
curated_data 121 0.3666667
other_data 70 0.2121212
textbook_curriculum_data 58 0.1757576
raw_data 52 0.1575758
primary_data 31 0.0939394
simulations 22 0.0666667
sensors 7 0.0212121
qual_q82 %>% 
  left_join(d_grades) %>%
  select(3:10, elem, middle, high) %>%
  filter(elem == 1) %>% 
  summarize_at(vars(1:8), sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/32) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
student_collected_data 26 0.81250
raw_data 10 0.31250
other_data 7 0.21875
curated_data 5 0.15625
textbook_curriculum_data 4 0.12500
sensors 2 0.06250
primary_data 1 0.03125
simulations 0 0.00000
qual_q82 %>% 
  left_join(d_grades) %>%
  select(3:10, elem, middle, high) %>%
  filter(middle == 1) %>% 
  summarize_at(vars(1:8), sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/114) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
student_collected_data 85 0.7456140
curated_data 39 0.3421053
other_data 26 0.2280702
raw_data 22 0.1929825
textbook_curriculum_data 17 0.1491228
simulations 8 0.0701754
primary_data 6 0.0526316
sensors 2 0.0175439
qual_q82 %>% 
  left_join(d_grades) %>%
  select(3:10, elem, middle, high) %>%
  filter(high == 1) %>% 
  summarize_at(vars(1:8), sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/234) %>% 
  arrange(desc(val)) %>% 
  knitr::kable()
key val prop
student_collected_data 194 0.8290598
curated_data 93 0.3974359
textbook_curriculum_data 49 0.2094017
other_data 46 0.1965812
raw_data 38 0.1623932
primary_data 27 0.1153846
simulations 14 0.0598291
sensors 6 0.0256410

qual 82 - size

qual_data_size <- qual_q82 %>% 
  left_join(d_grades) %>%
  select(3:11, elem, middle, high) %>% 
  rename(size = data_size_level_2_s_10_rows_2_columns_m_20_rows_3_4_columns_l_100s_of_rows_xl_thousands_of_rows)

qual_data_size %>% 
  filter(!is.na(size)) %>% 
  filter(size != "")
## # A tibble: 96 × 12
##    student_collected_data raw_data textbook_curriculu… curated_data primary_data
##                     <dbl>    <dbl>               <dbl>        <dbl>        <dbl>
##  1                      1       NA                  NA            1            1
##  2                      1       NA                  NA           NA           NA
##  3                     NA       NA                  NA           NA           NA
##  4                      1       NA                  NA           NA           NA
##  5                     NA       NA                  NA           NA           NA
##  6                     NA       NA                  NA           NA           NA
##  7                      1        1                  NA           NA           NA
##  8                      1       NA                  NA           NA           NA
##  9                      1       NA                  NA           NA           NA
## 10                      1       NA                  NA            1           NA
## # … with 86 more rows, and 7 more variables: simulations <dbl>, sensors <dbl>,
## #   other_data <dbl>, size <chr>, elem <dbl>, middle <dbl>, high <dbl>
qual_data_size %>% 
  mutate(size = str_replace_all(size, "-", ",")) %>% 
  mutate(size = str_replace_all(size, "<", ",")) %>% 
  mutate(size = str_replace_all(size, " ", "")) %>% 
  mutate(size = ifelse(size == "small", "s", size)) %>% 
  mutate(size = tolower(size)) %>% 
  mutate(size = str_split(size, ",")) %>% 
  unnest(size) %>% 
  filter(!is.na(size)) %>% 
  filter(size != "") %>%
  count(size) %>% 
  mutate(prop = n / 96)
## # A tibble: 3 × 3
##   size      n  prop
##   <chr> <int> <dbl>
## 1 l        18 0.188
## 2 m        39 0.406
## 3 s        76 0.792

Satisfaction

# I am satisfied with the current data analysis tools that I use in my class. 
d %>% 
  mutate(q101_1_rc = case_when(
    q101_1 == "Strongly disagree" ~ 1,
    q101_1 == "Somewhat disagree" ~ 2,
    q101_1 == "Neither agree nor disagree" ~ 3,
    q101_1 == "Somewhat agree" ~ 4,
    q101_1 == "Strongly agree" ~ 5
  )) %>% 
  select(q101_1_rc) %>% 
  mutate(q101_1_rc = as.integer(q101_1_rc)) %>% 
  summarize(mean_var = mean(q101_1_rc, na.rm = TRUE),
            sd_var = sd(q101_1_rc, na.rm = T)) # 3.34 (1.04)
## # A tibble: 1 × 2
##   mean_var sd_var
##      <dbl>  <dbl>
## 1     3.35   1.04
d %>% 
  mutate(q101_1_rc = case_when(
    q101_1 == "Strongly disagree" ~ 1,
    q101_1 == "Somewhat disagree" ~ 2,
    q101_1 == "Neither agree nor disagree" ~ 3,
    q101_1 == "Somewhat agree" ~ 4,
    q101_1 == "Strongly agree" ~ 5
  )) %>% 
  select(q101_1_rc, elem, middle, high) %>% 
  mutate(q101_1_rc = as.integer(q101_1_rc)) %>% 
  gather(key, val, -q101_1_rc) %>% 
  filter(val == 1) %>% 
  group_by(key) %>% 
  summarize(mean_var = mean(q101_1_rc, na.rm = TRUE),
            sd_var = sd(q101_1_rc, na.rm = T)) %>% 
  arrange(desc(mean_var))
## # A tibble: 3 × 3
##   key    mean_var sd_var
##   <chr>     <dbl>  <dbl>
## 1 high       3.44   1.00
## 2 middle     3.19   1.10
## 3 elem       3.19   1.18
# I use quantitative activities with my students regularly.
d %>% 
  mutate(q101_2_rc = case_when(
    q101_2 == "Strongly disagree" ~ 1,
    q101_2 == "Somewhat disagree" ~ 2,
    q101_2 == "Neither agree nor disagree" ~ 3,
    q101_2 == "Somewhat agree" ~ 4,
    q101_2 == "Strongly agree" ~ 5
  )) %>% 
  select(q101_2_rc) %>% 
  mutate(q101_2_rc = as.integer(q101_2_rc)) %>% 
  summarize(mean_var = mean(q101_2_rc, na.rm = TRUE),
            sd_var = sd(q101_2_rc, na.rm = T)) # 3.72 (1.03)
## # A tibble: 1 × 2
##   mean_var sd_var
##      <dbl>  <dbl>
## 1     3.73   1.04
d %>% 
  mutate(q101_2_rc = case_when(
    q101_2 == "Strongly disagree" ~ 1,
    q101_2 == "Somewhat disagree" ~ 2,
    q101_2 == "Neither agree nor disagree" ~ 3,
    q101_2 == "Somewhat agree" ~ 4,
    q101_2 == "Strongly agree" ~ 5
  )) %>% 
  select(q101_2_rc, elem, middle, high) %>% 
  mutate(q101_2_rc = as.integer(q101_2_rc)) %>% 
  gather(key, val, -q101_2_rc) %>% 
  filter(val == 1) %>% 
  group_by(key) %>% 
  summarize(mean_var = mean(q101_2_rc, na.rm = TRUE),
            sd_var = sd(q101_2_rc, na.rm = T))%>% 
  arrange(desc(mean_var))
## # A tibble: 3 × 3
##   key    mean_var sd_var
##   <chr>     <dbl>  <dbl>
## 1 high       3.80   1.04
## 2 middle     3.61   1.09
## 3 elem       3.56   1.13
# I am interested in adding more quantitative activities into my class(es
d %>% 
  mutate(q101_3_rc = case_when(
    q101_3 == "Strongly disagree" ~ 1,
    q101_3 == "Somewhat disagree" ~ 2,
    q101_3 == "Neither agree nor disagree" ~ 3,
    q101_3 == "Somewhat agree" ~ 4,
    q101_3 == "Strongly agree" ~ 5
  )) %>% 
  select(q101_3_rc) %>% 
  mutate(q101_3_rc = as.integer(q101_3_rc)) %>% 
  summarize(mean_var = mean(q101_3_rc, na.rm = TRUE),
            sd_var = sd(q101_3_rc, na.rm = T)) # 4.41 (.75)
## # A tibble: 1 × 2
##   mean_var sd_var
##      <dbl>  <dbl>
## 1     4.42  0.758
d %>% 
  mutate(q101_3_rc = case_when(
    q101_3 == "Strongly disagree" ~ 1,
    q101_3 == "Somewhat disagree" ~ 2,
    q101_3 == "Neither agree nor disagree" ~ 3,
    q101_3 == "Somewhat agree" ~ 4,
    q101_3 == "Strongly agree" ~ 5
  )) %>% 
  select(q101_3_rc, elem, middle, high) %>% 
  mutate(q101_3_rc = as.integer(q101_3_rc)) %>% 
  gather(key, val, -q101_3_rc) %>% 
  filter(val == 1) %>% 
  group_by(key) %>% 
  summarize(mean_var = mean(q101_3_rc, na.rm = TRUE),
            sd_var = sd(q101_3_rc, na.rm = T))%>% 
  arrange(desc(mean_var))
## # A tibble: 3 × 3
##   key    mean_var sd_var
##   <chr>     <dbl>  <dbl>
## 1 elem       4.62  0.609
## 2 middle     4.52  0.655
## 3 high       4.39  0.799

PD

d %>% 
  select(q99) %>% 
  janitor::tabyl(q99)
##           q99   n     percent
##  I'm not sure  66 0.200000000
##            NA   2 0.006060606
##            No  13 0.039393939
##           Yes 249 0.754545455
d %>% 
  filter(elem == 1) %>% 
  janitor::tabyl(q99)
##           q99  n percent
##  I'm not sure  5 0.15625
##           Yes 27 0.84375
d %>% 
  filter(middle == 1) %>% 
  janitor::tabyl(q99)
##           q99  n    percent
##  I'm not sure 26 0.22807018
##            No  3 0.02631579
##           Yes 85 0.74561404
d %>% 
  filter(high == 1) %>% 
  janitor::tabyl(q99)
##           q99   n    percent
##  I'm not sure  44 0.18965517
##            No  10 0.04310345
##           Yes 178 0.76724138

first and second hand

d %>% 
  select(response_id, q106) %>% 
  separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>% 
  gather(key, val, -response_id) %>% 
  arrange(response_id) %>% 
  count(val) %>% 
  mutate(prop = n / 330) %>% 
  filter(!is.na(n)) %>% 
  filter(val != "NA") %>% 
  arrange(desc(n))
## # A tibble: 4 × 3
##   val                                                n  prop
##   <chr>                                          <int> <dbl>
## 1 Analyzing first-hand data using pen-and-paper    264 0.8  
## 2 Analyzing first-hand data using digital tools    260 0.788
## 3 Analyzing second-hand data using digital tools   257 0.779
## 4 Analyzing second-hand data using pen-and-paper   226 0.685
d %>% 
  select(response_id, elem, middle, high, q106) %>% 
  separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  arrange(response_id) %>% 
  filter(elem == 1) %>% 
  count(val) %>% 
  mutate(prop = n / 32) %>% # n elem 
  filter(!is.na(n)) %>% 
  filter(val != "NA") %>% 
  arrange(desc(n))
## # A tibble: 4 × 3
##   val                                                n  prop
##   <chr>                                          <int> <dbl>
## 1 Analyzing first-hand data using pen-and-paper     29 0.906
## 2 Analyzing first-hand data using digital tools     23 0.719
## 3 Analyzing second-hand data using digital tools    21 0.656
## 4 Analyzing second-hand data using pen-and-paper    20 0.625
# 114, 232
d %>% 
  select(response_id, elem, middle, high, q106) %>% 
  separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  arrange(response_id) %>% 
  filter(middle == 1) %>% 
  count(val) %>% 
  mutate(prop = n / 114) %>% # n elem 
  filter(!is.na(n)) %>% 
  filter(val != "NA") %>% 
  arrange(desc(n))
## # A tibble: 4 × 3
##   val                                                n  prop
##   <chr>                                          <int> <dbl>
## 1 Analyzing first-hand data using pen-and-paper     93 0.816
## 2 Analyzing second-hand data using digital tools    86 0.754
## 3 Analyzing first-hand data using digital tools     80 0.702
## 4 Analyzing second-hand data using pen-and-paper    72 0.632
d %>% 
  select(response_id, elem, middle, high, q106) %>% 
  separate(col = q106, into = c(str_c("response_", letters[1:4])), sep = ",") %>% 
  gather(key, val, -response_id, -elem, -middle, -high) %>% 
  arrange(response_id) %>% 
  filter(high == 1) %>% 
  count(val) %>% 
  mutate(prop = n / 232) %>% # n elem 
  filter(!is.na(n)) %>% 
  filter(val != "NA") %>% 
  arrange(desc(n))
## # A tibble: 4 × 3
##   val                                                n  prop
##   <chr>                                          <int> <dbl>
## 1 Analyzing first-hand data using digital tools    193 0.832
## 2 Analyzing second-hand data using digital tools   189 0.815
## 3 Analyzing first-hand data using pen-and-paper    184 0.793
## 4 Analyzing second-hand data using pen-and-paper   163 0.703

demographics

d %>% 
  count(gender) %>% 
  mutate(prop = n /330) # other is prefer not to say
## # A tibble: 5 × 3
##   gender                      n    prop
##   <chr>                   <int>   <dbl>
## 1 Female                    260 0.788  
## 2 Male                       62 0.188  
## 3 NA                          2 0.00606
## 4 Other (please describe)     1 0.00303
## 5 Prefer not to say           5 0.0152
d %>% 
  mutate(years_worked = unlist(years_worked)) %>% 
  mutate(years_worked = as.numeric(years_worked)) %>% 
  select(years_worked, elem, middle, high) %>% 
  filter(years_worked < 100) %>% # 2 outliers here
  summarize(mean_years_worked = mean(years_worked, na.rm = T),
            sd_years_worked = sd(years_worked, na.rm = T))
## # A tibble: 1 × 2
##   mean_years_worked sd_years_worked
##               <dbl>           <dbl>
## 1              18.4            8.51
d %>% 
  count(underrepresented) %>% 
  mutate(prop = n / 330)
## # A tibble: 3 × 3
##   underrepresented     n    prop
##   <chr>            <int>   <dbl>
## 1 NA                   2 0.00606
## 2 No                 292 0.885  
## 3 Yes                 36 0.109
d %>% 
  count(with_which_groups) %>% 
  arrange(desc(n)) %>% 
  filter(with_which_groups != "NA") %>% 
  knitr::kable() # 9 African American/black (2.7%), 4 hispanic (1.2%), 2 asian (0.60%), 2 american indian, 1 muslim (0.30%)
with_which_groups n
Hispanic 3
Black 2
African American 1
African American female 1
African american/arab/muslim/women/disabled 1
African American/black 1
African Merican/black 1
African-American 1
All 1
American Indian 1
Asian 1
Asian Indian 1
Black or African-American 1
Female, Latin/hispanic 1
Filipino 1
Indigenous 1
Latina 1
latino 1
Latino 1
Latinx 1
Latinx, Native American 1
LGBT+ 1
Over 50 white woman 1
Seniors 1

NSSME questions

Organize and/or represent data using tables, charts, or graphs in order to facilitate analysis (4) - 1st - 34, 49, 58

# Science Classes in Which Teachers Report Students Engaging in Various Aspects of Science Practices at Least Once a Week, by Grade Range

d %>% 
  filter(elem == 1) %>% 
  count(q98_3) %>% 
  filter(str_detect(q98_3, "Often") | str_detect(q98_3, "All")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32) # 34
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    17 0.531
d %>% 
  filter(middle == 1) %>% 
  count(q98_3) %>% 
  filter(str_detect(q98_3, "Often") | str_detect(q98_3, "All")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114) # 49
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    51 0.447
d %>% 
  filter(high == 1) %>% 
  count(q98_3) %>% 
  filter(str_detect(q98_3, "Often") | str_detect(q98_3, "All")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232) # 58
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1   119 0.513
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_3) %>% 
  filter(str_detect(q98_3, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32) # 6
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     0     0
d %>% 
  filter(middle == 1) %>% 
  count(q98_3) %>% 
  filter(str_detect(q98_3, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114) # 1
## # A tibble: 1 × 2
##       n    prop
##   <int>   <dbl>
## 1     1 0.00877
d %>% 
  filter(high == 1) %>% 
  count(q98_3) %>% 
  filter(str_detect(q98_3, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232) # 1
## # A tibble: 1 × 2
##       n    prop
##   <int>   <dbl>
## 1     1 0.00431

Determine which data would need to be collected in order to answer a scientific question (regardless of who generated the question) (1) - 5th

# at least once a week
d %>%
  filter(elem == 1) %>%
  count(q98_1) %>%
  filter(str_detect(q98_1, "Often") | str_detect(q98_1, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 32) # 29
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     9 0.281
d %>%
  filter(middle == 1) %>%
  count(q98_1) %>%
  filter(str_detect(q98_1, "Often") | str_detect(q98_1, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 114) # 39
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    24 0.211
d %>%
  filter(high == 1) %>%
  count(q98_1) %>%
  filter(str_detect(q98_1, "Often") | str_detect(q98_1, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 232) # 39
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    47 0.203
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_1) %>% 
  filter(str_detect(q98_1, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32) # 8
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     1 0.0312
d %>% 
  filter(middle == 1) %>% 
  count(q98_1) %>% 
  filter(str_detect(q98_1, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114) # 2
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     2 0.0175
d %>% 
  filter(high == 1) %>% 
  count(q98_1) %>% 
  filter(str_detect(q98_1, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232) # 3
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     5 0.0216

Identify potential sources of variability (e.g., measurement error, natural variation) in the data (5) - NA

# at least once a week
d %>%
  filter(elem == 1) %>%
  count(q98_4) %>%
  filter(str_detect(q98_4, "Often") | str_detect(q98_4, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     7 0.219
d %>%
  filter(middle == 1) %>%
  count(q98_4) %>%
  filter(str_detect(q98_4, "Often") | str_detect(q98_4, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    22 0.193
d %>%
  filter(high == 1) %>%
  count(q98_4) %>%
  filter(str_detect(q98_4, "Often") | str_detect(q98_4, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    71 0.306
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_4) %>% 
  filter(str_detect(q98_4, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     3 0.0938
d %>% 
  filter(middle == 1) %>% 
  count(q98_4) %>% 
  filter(str_detect(q98_4, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     9 0.0789
d %>% 
  filter(high == 1) %>% 
  count(q98_4) %>% 
  filter(str_detect(q98_4, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1    14 0.0603

Consider how missing data or measurement error can affect data interpretation (7)

# at least once a week
d %>%
  filter(elem == 1) %>%
  count(q98_6) %>%
  filter(str_detect(q98_6, "Often") | str_detect(q98_6, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     4 0.125
d %>%
  filter(middle == 1) %>%
  count(q98_6) %>%
  filter(str_detect(q98_6, "Often") | str_detect(q98_6, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    15 0.132
d %>%
  filter(high == 1) %>%
  count(q98_6) %>%
  filter(str_detect(q98_6, "Often") | str_detect(q98_6, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    54 0.233
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_6) %>% 
  filter(str_detect(q98_6, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     3 0.0938
d %>% 
  filter(middle == 1) %>% 
  count(q98_6) %>% 
  filter(str_detect(q98_6, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    12 0.105
d %>% 
  filter(high == 1) %>% 
  count(q98_6) %>% 
  filter(str_detect(q98_6, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1    18 0.0776

Select and use grade-appropriate mathematical and/or statistical techniques to analyze data (for example: determining the best measure of central tendency, examining variation in data, or developing a line of best fit) (9)

# at least once a week
d %>%
  filter(elem == 1) %>%
  count(q98_7) %>%
  filter(str_detect(q98_7, "Often") | str_detect(q98_7, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     8  0.25
d %>%
  filter(middle == 1) %>%
  count(q98_7) %>%
  filter(str_detect(q98_7, "Often") | str_detect(q98_7, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    16 0.140
d %>%
  filter(high == 1) %>%
  count(q98_7) %>%
  filter(str_detect(q98_7, "Often") | str_detect(q98_7, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    51 0.220
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_7) %>% 
  filter(str_detect(q98_7, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     8  0.25
d %>% 
  filter(middle == 1) %>% 
  count(q98_7) %>% 
  filter(str_detect(q98_7, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    19 0.167
d %>% 
  filter(high == 1) %>% 
  count(q98_7) %>% 
  filter(str_detect(q98_7, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1    20 0.0862

Use mathematical and/or computational models to generate data to support a scientific claim (10)

# at least once a week
d %>%
  filter(elem == 1) %>%
  count(q98_8) %>%
  filter(str_detect(q98_8, "Often") | str_detect(q98_8, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     7 0.219
d %>%
  filter(middle == 1) %>%
  count(q98_8) %>%
  filter(str_detect(q98_8, "Often") | str_detect(q98_8, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    19 0.167
d %>%
  filter(high == 1) %>%
  count(q98_8) %>%
  filter(str_detect(q98_8, "Often") | str_detect(q98_8, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    57 0.246
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_8) %>% 
  filter(str_detect(q98_8, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     9 0.281
d %>% 
  filter(middle == 1) %>% 
  count(q98_8) %>% 
  filter(str_detect(q98_8, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    21 0.184
d %>% 
  filter(high == 1) %>% 
  count(q98_8) %>% 
  filter(str_detect(q98_8, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    32 0.138

Use data and reasoning to defend, verbally or in writing, a claim or refute alternative scientific claims about a real-world phenomenon (regardless of who made the claims) (11)

# at least once a week
d %>%
  filter(elem == 1) %>%
  count(q98_9) %>%
  filter(str_detect(q98_9, "Often") | str_detect(q98_9, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    11 0.344
d %>%
  filter(middle == 1) %>%
  count(q98_9) %>%
  filter(str_detect(q98_9, "Often") | str_detect(q98_9, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    42 0.368
d %>%
  filter(high == 1) %>%
  count(q98_9) %>%
  filter(str_detect(q98_9, "Often") | str_detect(q98_9, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1   117 0.504
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_9) %>% 
  filter(str_detect(q98_9, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     1 0.0312
d %>% 
  filter(middle == 1) %>% 
  count(q98_9) %>% 
  filter(str_detect(q98_9, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     3 0.0263
d %>% 
  filter(high == 1) %>% 
  count(q98_9) %>% 
  filter(str_detect(q98_9, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232)
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     8 0.0345

Determine which variables from a provided dataset are necessary to answer a scientific question (13)

# at least once a week
d %>%
  filter(elem == 1) %>%
  count(q98_2) %>%
  filter(str_detect(q98_2, "Often") | str_detect(q98_2, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 32) # 29
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1     6 0.188
d %>%
  filter(middle == 1) %>%
  count(q98_2) %>%
  filter(str_detect(q98_2, "Often") | str_detect(q98_2, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 114) # 39
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    26 0.228
d %>%
  filter(high == 1) %>%
  count(q98_2) %>%
  filter(str_detect(q98_2, "Often") | str_detect(q98_2, "All")) %>%
  summarize(n = sum(n)) %>%
  mutate(prop = n / 232) # 39
## # A tibble: 1 × 2
##       n  prop
##   <int> <dbl>
## 1    65 0.280
# never

d %>% 
  filter(elem == 1) %>% 
  count(q98_2) %>% 
  filter(str_detect(q98_2, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 32) # 8
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     2 0.0625
d %>% 
  filter(middle == 1) %>% 
  count(q98_2) %>% 
  filter(str_detect(q98_2, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 114) # 2
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     2 0.0175
d %>% 
  filter(high == 1) %>% 
  count(q98_2) %>% 
  filter(str_detect(q98_2, "Never")) %>% 
  summarize(n = sum(n)) %>% 
  mutate(prop = n / 232) # 3
## # A tibble: 1 × 2
##       n   prop
##   <int>  <dbl>
## 1     8 0.0345

state

d %>% count(state_work) %>%  # PR, outside US, 2 NA
  arrange(desc(n))
## # A tibble: 46 × 2
##    state_work                               n
##    <chr>                                <int>
##  1 California                              29
##  2 Massachusetts                           19
##  3 Wisconsin                               19
##  4 Michigan                                17
##  5 New York                                16
##  6 Arkansas                                13
##  7 New Jersey                              13
##  8 Texas                                   13
##  9 I do not reside in the United States    12
## 10 Connecticut                             11
## # … with 36 more rows