analyzing-data-in-science-analysis

Prepping, loading, setting up

library(tidyverse)
library(googlesheets4)

d <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=46458265",
                sheet = 1)

qual_q80 <- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=46458265",
                       sheet = "Q 80 - MASTER")

qual_q80 <- qual_q80 %>% janitor::clean_names()

names(qual_q80) <- str_c("qual_q80_", names(qual_q80))

names(qual_q80)[1] <- "response_id"

d <- d %>% 
  left_join(qual_q80)

RQ 1

How do teachers support their students to analyze and interpret data?

open coding - Q80 overall

qualitatively coded based on open/written responses to Q80

overall_freqs <- d %>% 
  select(contains("qual_q80")) %>% 
  select(-c(1, 9)) %>% 
  summarize_all(sum, na.rm = T) %>% 
  gather(key, val) %>% 
  mutate(prop = val/328) %>% 
  arrange(desc(val))

overall_freqs %>% 
  mutate(prop = prop * 100) %>% 
  mutate_if(is.numeric, round, 3) %>% 
  select(-val) %>% 
  knitr::kable()
key prop
qual_q80_data_visualization_graph_creation 49.695
qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 39.329
qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data 35.061
qual_q80_data_summary_summary_table_averaging_math_tasks 27.439
qual_q80_other_those_that_dont_fit_into_other_categories_modeling 21.341
qual_q80_data_application_answering_question_cer_explanations_comparing 20.732
qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 15.244
overall_freqs %>% 
  ggplot(aes(x = reorder(key, prop), y = prop)) +
  geom_col() +
  theme_minimal() +
  coord_flip()

fixed response question– NSSME

4 overlapping

3 maybe overlapping/overlapping a little 2 not

Q98: How often do you have students do each of the following in your class?

  1. Determine which data would need to be collected in order to answer a scientific question (regardless of who generated the question) (1) – YES
  2. Determine which variables from a provided dataset are necessary to answer a scientific question (13) – MAYBE
  3. Organize and/or represent data using tables, charts, or graphs in order to facilitate analysis (4) – YES
  4. Identify potential sources of variability (e.g., measurement error, natural variation) in the data (5) – MAYBE
  5. Analyze data using grade-appropriate methods in order to identify patterns, trends, or relationships (6) – YES
  6. Consider how missing data or measurement error can affect data interpretation (7) – NOT
  7. Select and use grade-appropriate mathematical and/or statistical techniques to analyze data (for example: determining the best measure of central tendency, examining variation in data, or developing a line of best fit) (9) – YES
  8. Use mathematical and/or computational models to generate data to support a scientific claim (10) – NO
  9. Use data and reasoning to defend, verbally or in writing, a claim or refute alternative scientific claims about a real-world phenomenon (regardless of who made the claims) (11) – MAYBE
d %>% 
  select(q98_1:q98_9) %>% 
  gather(key, val) %>% 
  count(key, val) %>% 
  filter(val != "NA") %>% 
  # Calculate total counts per key for percentage calculation
  group_by(key) %>% 
  mutate(total_n = sum(n)) %>% 
  ungroup() %>%
  # Calculate the percentage for each val
  mutate(percentage = (n / total_n) * 100) %>% 
  # Reorder based on "All or almost all science lessons"
  group_by(key) %>% 
  mutate(order_var = sum(n[val == "All or almost all science lessons"], na.rm = TRUE)) %>% 
  ungroup() %>%
  arrange(desc(order_var)) %>%
  # Use the label map for renaming keys
  mutate(key = fct_reorder(key, order_var, .desc = TRUE),
         val = factor(val, levels = c(
           "All or almost all science lessons", 
           "Often (once or twice a week)", 
           "Sometimes (once or twice a month)", 
           "Rarely (a few times a year)", 
           "Never"))) %>% 
  select(key, val, percentage) %>% 
  spread(val, percentage) %>% 
  mutate_if(is.numeric, round, 3) %>% 
  knitr::kable()
key All or almost all science lessons Often (once or twice a week) Sometimes (once or twice a month) Rarely (a few times a year) Never
q98_9 9.756 36.280 39.329 11.890 2.744
q98_5 7.927 42.378 39.939 8.841 0.915
q98_3 7.012 42.378 43.598 6.707 0.305
q98_1 4.573 16.463 44.817 32.317 1.829
q98_6 3.049 16.463 38.415 33.841 8.232
q98_8 3.049 19.512 37.805 25.000 14.634
q98_2 2.744 23.171 46.951 24.085 3.049
q98_4 2.439 24.085 42.073 24.695 6.707
q98_7 2.439 18.293 34.756 32.317 12.195

RQ2 - demographics

How does this support differe based on teachers’ grade, level of training and experience, and the subject they teach?

we use what teachers say they are doing in their own words for RQ2 – so we use Q80, not Q98

Grade Level

d_grades <- d %>% 
  select(response_id, grade_level_taught) %>% 
  mutate(grade_level_taught = str_replace(grade_level_taught, ":", "")) %>% 
  separate(col = grade_level_taught, into = c(str_c("grade", letters[1:13])), sep = ",") %>% 
  gather(key, val, -response_id) %>% 
  arrange(response_id) %>% 
  mutate(val = as.integer(val)) %>% 
  mutate(elem = if_else(val <= 5, 1, 0)) %>% 
  mutate(second = if_else(val >= 6, 1, 0)) %>% 
  mutate(middle = if_else(val >= 6 & val <= 8, 1, 0)) %>% 
  mutate(high = if_else(val >= 9, 1, 0)) %>% 
  select(response_id, elem, second, middle, high) %>%
  group_by(response_id) %>% 
  summarize(elem = if_else(any(elem == 1), 1, 0),
            second = if_else(any(second == 1), 1, 0),
            middle = if_else(any(middle == 1), 1, 0),
            high = if_else(any(high == 1), 1, 0)) %>% 
  mutate_if(is.double, replace_na, 0)

d <- d %>% 
  left_join(d_grades)

d %>% 
  count(elem)
# A tibble: 2 × 2
   elem     n
  <dbl> <int>
1     0   298
2     1    32
d %>% 
  select(contains("qual_q80"), elem, middle, high) %>% 
  select(-c(1, 9)) %>% 
  mutate_all(replace_na, 0) %>%
  select(-middle, -high) %>% 
  group_by(elem) %>% 
  summarize_all(funs(sum), na.rm = TRUE) %>% 
  gather(key, val, -elem) %>% 
  filter(elem == 1) %>% 
  mutate(prop = val / 32) %>% 
  arrange(desc(prop)) %>% 
  knitr::kable()
elem key val prop
1 qual_q80_data_visualization_graph_creation 16 0.50000
1 qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data 15 0.46875
1 qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 10 0.31250
1 qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 8 0.25000
1 qual_q80_data_summary_summary_table_averaging_math_tasks 8 0.25000
1 qual_q80_data_application_answering_question_cer_explanations_comparing 8 0.25000
1 qual_q80_other_those_that_dont_fit_into_other_categories_modeling 5 0.15625
d %>% 
  count(middle)
# A tibble: 2 × 2
  middle     n
   <dbl> <int>
1      0   216
2      1   114
d %>% 
  select(contains("qual_q80"), elem, middle, high) %>% 
  select(-c(1, 9)) %>% 
  mutate_all(replace_na, 0) %>%
  select(-elem, -high) %>% 
  group_by(middle) %>% 
  summarize_all(funs(sum), na.rm = TRUE) %>% 
  gather(key, val, -middle) %>% 
  filter(middle == 1) %>% 
  mutate(prop = val / 114) %>% 
  mutate(prop = prop * 100) %>% 
  mutate(prop = round(prop, 2)) %>%
  arrange(desc(prop)) %>% 
  knitr::kable()
middle key val prop
1 qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data 48 42.11
1 qual_q80_data_visualization_graph_creation 48 42.11
1 qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 46 40.35
1 qual_q80_data_application_answering_question_cer_explanations_comparing 31 27.19
1 qual_q80_other_those_that_dont_fit_into_other_categories_modeling 25 21.93
1 qual_q80_data_summary_summary_table_averaging_math_tasks 24 21.05
1 qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 18 15.79
d %>% 
  count(high)
# A tibble: 2 × 2
   high     n
  <dbl> <int>
1     0    98
2     1   232
d %>% 
  select(contains("qual_q80"), elem, middle, high) %>% 
  select(-c(1, 9)) %>% 
  mutate_all(replace_na, 0) %>%
  select(-elem, -middle) %>% 
  group_by(high) %>% 
  summarize_all(funs(sum), na.rm = TRUE) %>% 
  gather(key, val, -high) %>% 
  filter(high == 1) %>% 
  mutate(prop = val / 232) %>% 
  mutate(prop = prop * 100) %>% 
  mutate(prop = round(prop, 2)) %>%
  arrange(desc(prop)) %>% 
  knitr::kable()
high key val prop
1 qual_q80_data_visualization_graph_creation 125 53.88
1 qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns 95 40.95
1 qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data 78 33.62
1 qual_q80_data_summary_summary_table_averaging_math_tasks 71 30.60
1 qual_q80_data_application_answering_question_cer_explanations_comparing 48 20.69
1 qual_q80_other_those_that_dont_fit_into_other_categories_modeling 47 20.26
1 qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data 35 15.09

Subject area

# Separate the values in the professional_role column into individual rows
d_long <- d %>%
  separate_rows(professional_role, sep = ",")

d_long <- d_long %>% 
  mutate(professional_role_6_text = stringr::str_to_lower(professional_role_6_text)) %>% 
  mutate(professional_role_6_text = str_detect(professional_role_6_text, "environmental")) # double-check this works
# Trim any leading/trailing whitespace
d_long$professional_role <- trimws(d_long$professional_role)

# Create dichotomous columns for each unique professional role
d_wide <- d_long %>%
  mutate(value = 1) %>%
  pivot_wider(names_from = professional_role, values_from = value, values_fill = 0)

d_wide <- d_wide %>% 
  select(contains("qual_q80"), Biology:Physics) %>% 
  select(-c(1, 9)) %>% 
  janitor::clean_names()

d_wide <- d_wide %>% 
  mutate_all(as.numeric) %>% 
  mutate_all(replace_na, 0)

# Identify task and subject columns
task_columns <- grep("^qual_q80", names(d_wide), value = TRUE)
subject_columns <- c("biology", "other_please_describe", "general_science",
                     "earth_science_or_earth_and_space_science", "chemistry", "physics")

# Function to calculate percentages for a single subject
calculate_percentages_by_subject <- function(subject_col) {
  total_subject <- sum(d_wide[[subject_col]] == 1, na.rm = TRUE)  # Total number of respondents for this subject
  
  d_wide %>%
    filter(!!sym(subject_col) == 1) %>%  # Only include rows where the subject was selected
    summarise(across(all_of(task_columns), ~ mean(. == 1, na.rm = TRUE) * 100)) %>%
    mutate(subject = subject_col, total_responses = total_subject)  # Add subject name and total responses
}

# Apply the function to all subject columns and bind the results
percentages_by_subject <- bind_rows(lapply(subject_columns, calculate_percentages_by_subject))

percentages_by_subject %>% 
  select(subject, total_responses, everything()) %>% 
  mutate_if(is.numeric, round, 2) %>%
  knitr::kable() # other is environmental
subject total_responses qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data qual_q80_data_summary_summary_table_averaging_math_tasks qual_q80_data_visualization_graph_creation qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns qual_q80_data_application_answering_question_cer_explanations_comparing qual_q80_other_those_that_dont_fit_into_other_categories_modeling
biology 191 33.51 17.28 28.27 54.97 39.27 22.51 20.94
other_please_describe 151 29.80 14.57 25.83 50.99 43.05 22.52 23.84
general_science 99 40.40 16.16 21.21 47.47 39.39 18.18 24.24
earth_science_or_earth_and_space_science 86 30.23 13.95 25.58 37.21 40.70 22.09 23.26
chemistry 73 36.99 13.70 26.03 52.05 35.62 24.66 17.81
physics 53 32.08 16.98 28.30 39.62 41.51 16.98 18.87

years worked

This tells us how these items correlate with years worked

d %>% 
  select(contains("qual_q80"), years_worked) %>% 
  select(-c(1, 9)) %>% 
  mutate(years_worked = unlist(years_worked)) %>% 
  mutate(years_worked = as.numeric(years_worked)) %>% 
  filter(years_worked < 100) %>% 
  mutate_if(is.numeric, replace_na, 0) %>% 
  corrr::correlate() %>% 
  corrr::focus(years_worked) %>% 
  arrange(desc(years_worked))
# A tibble: 7 × 2
  term                                                              years_worked
  <chr>                                                                    <dbl>
1 qual_q80_data_interpretation_identify_and_interpret_a_graph_or_d…      0.0652 
2 qual_q80_data_collection_their_own_or_finding_data_recording_fir…      0.0517 
3 qual_q80_data_summary_summary_table_averaging_math_tasks               0.0472 
4 qual_q80_data_visualization_graph_creation                             0.0320 
5 qual_q80_data_curation_tables_organizing_entering_already_collec…      0.0122 
6 qual_q80_data_application_answering_question_cer_explanations_co…      0.00850
7 qual_q80_other_those_that_dont_fit_into_other_categories_modeling     -0.00808

prior experience

CP: not sure this is helpful

d %>% 
  count(q98)
# A tibble: 117 × 2
   q98                                                                         n
   <chr>                                                                   <int>
 1 Graduate level coursework                                                  10
 2 Graduate level coursework,Research Experience for Teachers (NSF RET)        1
 3 Graduate level coursework,Research experience                              10
 4 Graduate level coursework,Research experience,Research Experience for …     1
 5 Graduate level coursework,Research experience,Teacher professional dev…     6
 6 Graduate level coursework,Taught myself                                     3
 7 Graduate level coursework,Taught myself,Research Experience for Teache…     1
 8 Graduate level coursework,Taught myself,Research experience                 3
 9 Graduate level coursework,Taught myself,Research experience,Research E…     1
10 Graduate level coursework,Taught myself,Research experience,Teacher pr…     3
# ℹ 107 more rows
d_long_prior <- d %>% 
  separate_rows(q98, sep = ",")

d_long_prior %>% 
  count(q98) %>% 
  arrange(desc(n))
# A tibble: 11 × 2
   q98                                            n
   <chr>                                      <int>
 1 Undergraduate course in science              246
 2 Graduate level coursework                    209
 3 High school course in science                190
 4 Undergraduate course in math                 179
 5 High school course in math                   172
 6 Teacher professional development             172
 7 Taught myself                                154
 8 Research experience                          139
 9 Research Experience for Teachers (NSF RET)    30
10 NA                                             2
11 None                                           2
p <- d_long_prior %>% 
  select(contains("qual_q80"), prior_exp = q98) %>% 
  select(-c(1, 9)) %>% 
  mutate_if(is.numeric, replace_na, 0) %>% 
  gather(key, val, -prior_exp) %>%
  group_by(prior_exp, key) %>% 
  summarize(mean = mean(val)) %>% 
  mutate(mean = mean * 100)

p %>% 
  spread(key, mean) %>% 
  mutate_if(is.numeric, round, 3) %>% 
  filter(prior_exp != "NA") %>% 
  knitr::kable()
prior_exp qual_q80_data_application_answering_question_cer_explanations_comparing qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns qual_q80_data_summary_summary_table_averaging_math_tasks qual_q80_data_visualization_graph_creation qual_q80_other_those_that_dont_fit_into_other_categories_modeling
Graduate level coursework 23.923 33.971 16.746 41.148 30.144 49.761 21.531
High school course in math 23.256 37.209 19.186 44.767 29.651 56.395 15.116
High school course in science 23.684 35.789 17.368 43.684 26.842 55.789 16.316
None 50.000 0.000 0.000 0.000 0.000 0.000 50.000
Research experience 21.583 40.288 15.827 39.568 30.216 53.957 20.863
Research Experience for Teachers (NSF RET) 23.333 56.667 23.333 40.000 50.000 53.333 23.333
Taught myself 20.130 34.416 14.935 46.753 28.571 57.792 18.182
Teacher professional development 18.023 38.953 16.279 43.023 31.977 51.163 22.093
Undergraduate course in math 23.464 36.313 16.201 41.899 32.402 55.866 14.525
Undergraduate course in science 24.390 36.179 17.073 42.276 29.268 56.098 15.854

who does this - state (NGSS status)

state_data <- read_sheet("https://docs.google.com/spreadsheets/d/1wul5jRNZBVqZP53swygLD34sBXPu0xEyJyOqRXLG6Dc/edit?gid=1207018323#gid=1207018323")

state_data <- state_data %>% 
  select(state = State, status = Status)

state_data
# A tibble: 51 × 2
   state        status 
   <chr>        <chr>  
 1 Maine        NGSS   
 2 Minnesota    Adapted
 3 Kansas       NGSS   
 4 Rhode Island NGSS   
 5 Vermont      NGSS   
 6 Kentucky     NGSS   
 7 Maryland     NGSS   
 8 California   NGSS   
 9 Delaware     NGSS   
10 Washington   NGSS   
# ℹ 41 more rows
d %>% 
  select(contains("qual_q80"), state_work) %>%
  select(-c(1, 9)) %>% 
  rename(state = state_work) %>% 
  left_join(state_data, by = "state") %>% 
  mutate_if(is.numeric, replace_na, 0) %>% 
  gather(key, val, -status) %>% 
  group_by(status, key) %>% 
  mutate(val = as.integer(val)) %>% 
  summarize(mean = mean(val)) %>% 
  mutate(mean = mean * 100) %>% 
  spread(key, mean) %>% 
  filter(!is.na(status)) %>% 
  mutate_if(is.numeric, round, 2) %>% 
  knitr::kable()
status qual_q80_data_application_answering_question_cer_explanations_comparing qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns qual_q80_data_summary_summary_table_averaging_math_tasks qual_q80_data_visualization_graph_creation qual_q80_other_those_that_dont_fit_into_other_categories_modeling state
Adapted 17.21 40.16 16.39 32.79 27.87 45.08 23.77 NA
NGSS 26.45 29.75 14.05 44.63 28.10 52.07 14.88 NA
Not 16.95 32.20 15.25 40.68 20.34 57.63 28.81 NA
  • make sure our data is relevant as of 2021 - https://www.nsta.org/science-standards