library(tidyverse)
library(googlesheets4)
<- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=46458265",
d sheet = 1)
<- read_sheet("https://docs.google.com/spreadsheets/d/1gLyAAp6N_5g-Uo-eb61O-eW2apMzzqNdM8fLcBV7V3g/edit#gid=46458265",
qual_q80 sheet = "Q 80 - MASTER")
<- qual_q80 %>% janitor::clean_names()
qual_q80
names(qual_q80) <- str_c("qual_q80_", names(qual_q80))
names(qual_q80)[1] <- "response_id"
<- d %>%
d left_join(qual_q80)
analyzing-data-in-science-analysis
Prepping, loading, setting up
RQ 1
How do teachers support their students to analyze and interpret data?
open coding - Q80 overall
qualitatively coded based on open/written responses to Q80
<- d %>%
overall_freqs select(contains("qual_q80")) %>%
select(-c(1, 9)) %>%
summarize_all(sum, na.rm = T) %>%
gather(key, val) %>%
mutate(prop = val/328) %>%
arrange(desc(val))
%>%
overall_freqs mutate(prop = prop * 100) %>%
mutate_if(is.numeric, round, 3) %>%
select(-val) %>%
::kable() knitr
key | prop |
---|---|
qual_q80_data_visualization_graph_creation | 49.695 |
qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns | 39.329 |
qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data | 35.061 |
qual_q80_data_summary_summary_table_averaging_math_tasks | 27.439 |
qual_q80_other_those_that_dont_fit_into_other_categories_modeling | 21.341 |
qual_q80_data_application_answering_question_cer_explanations_comparing | 20.732 |
qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data | 15.244 |
%>%
overall_freqs ggplot(aes(x = reorder(key, prop), y = prop)) +
geom_col() +
theme_minimal() +
coord_flip()
fixed response question– NSSME
4 overlapping
3 maybe overlapping/overlapping a little 2 not
Q98: How often do you have students do each of the following in your class?
- Determine which data would need to be collected in order to answer a scientific question (regardless of who generated the question) (1) – YES
- Determine which variables from a provided dataset are necessary to answer a scientific question (13) – MAYBE
- Organize and/or represent data using tables, charts, or graphs in order to facilitate analysis (4) – YES
- Identify potential sources of variability (e.g., measurement error, natural variation) in the data (5) – MAYBE
- Analyze data using grade-appropriate methods in order to identify patterns, trends, or relationships (6) – YES
- Consider how missing data or measurement error can affect data interpretation (7) – NOT
- Select and use grade-appropriate mathematical and/or statistical techniques to analyze data (for example: determining the best measure of central tendency, examining variation in data, or developing a line of best fit) (9) – YES
- Use mathematical and/or computational models to generate data to support a scientific claim (10) – NO
- Use data and reasoning to defend, verbally or in writing, a claim or refute alternative scientific claims about a real-world phenomenon (regardless of who made the claims) (11) – MAYBE
%>%
d select(q98_1:q98_9) %>%
gather(key, val) %>%
count(key, val) %>%
filter(val != "NA") %>%
# Calculate total counts per key for percentage calculation
group_by(key) %>%
mutate(total_n = sum(n)) %>%
ungroup() %>%
# Calculate the percentage for each val
mutate(percentage = (n / total_n) * 100) %>%
# Reorder based on "All or almost all science lessons"
group_by(key) %>%
mutate(order_var = sum(n[val == "All or almost all science lessons"], na.rm = TRUE)) %>%
ungroup() %>%
arrange(desc(order_var)) %>%
# Use the label map for renaming keys
mutate(key = fct_reorder(key, order_var, .desc = TRUE),
val = factor(val, levels = c(
"All or almost all science lessons",
"Often (once or twice a week)",
"Sometimes (once or twice a month)",
"Rarely (a few times a year)",
"Never"))) %>%
select(key, val, percentage) %>%
spread(val, percentage) %>%
mutate_if(is.numeric, round, 3) %>%
::kable() knitr
key | All or almost all science lessons | Often (once or twice a week) | Sometimes (once or twice a month) | Rarely (a few times a year) | Never |
---|---|---|---|---|---|
q98_9 | 9.756 | 36.280 | 39.329 | 11.890 | 2.744 |
q98_5 | 7.927 | 42.378 | 39.939 | 8.841 | 0.915 |
q98_3 | 7.012 | 42.378 | 43.598 | 6.707 | 0.305 |
q98_1 | 4.573 | 16.463 | 44.817 | 32.317 | 1.829 |
q98_6 | 3.049 | 16.463 | 38.415 | 33.841 | 8.232 |
q98_8 | 3.049 | 19.512 | 37.805 | 25.000 | 14.634 |
q98_2 | 2.744 | 23.171 | 46.951 | 24.085 | 3.049 |
q98_4 | 2.439 | 24.085 | 42.073 | 24.695 | 6.707 |
q98_7 | 2.439 | 18.293 | 34.756 | 32.317 | 12.195 |
RQ2 - demographics
How does this support differe based on teachers’ grade, level of training and experience, and the subject they teach?
we use what teachers say they are doing in their own words for RQ2 – so we use Q80, not Q98
Grade Level
<- d %>%
d_grades select(response_id, grade_level_taught) %>%
mutate(grade_level_taught = str_replace(grade_level_taught, ":", "")) %>%
separate(col = grade_level_taught, into = c(str_c("grade", letters[1:13])), sep = ",") %>%
gather(key, val, -response_id) %>%
arrange(response_id) %>%
mutate(val = as.integer(val)) %>%
mutate(elem = if_else(val <= 5, 1, 0)) %>%
mutate(second = if_else(val >= 6, 1, 0)) %>%
mutate(middle = if_else(val >= 6 & val <= 8, 1, 0)) %>%
mutate(high = if_else(val >= 9, 1, 0)) %>%
select(response_id, elem, second, middle, high) %>%
group_by(response_id) %>%
summarize(elem = if_else(any(elem == 1), 1, 0),
second = if_else(any(second == 1), 1, 0),
middle = if_else(any(middle == 1), 1, 0),
high = if_else(any(high == 1), 1, 0)) %>%
mutate_if(is.double, replace_na, 0)
<- d %>%
d left_join(d_grades)
%>%
d count(elem)
# A tibble: 2 × 2
elem n
<dbl> <int>
1 0 298
2 1 32
%>%
d select(contains("qual_q80"), elem, middle, high) %>%
select(-c(1, 9)) %>%
mutate_all(replace_na, 0) %>%
select(-middle, -high) %>%
group_by(elem) %>%
summarize_all(funs(sum), na.rm = TRUE) %>%
gather(key, val, -elem) %>%
filter(elem == 1) %>%
mutate(prop = val / 32) %>%
arrange(desc(prop)) %>%
::kable() knitr
elem | key | val | prop |
---|---|---|---|
1 | qual_q80_data_visualization_graph_creation | 16 | 0.50000 |
1 | qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data | 15 | 0.46875 |
1 | qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns | 10 | 0.31250 |
1 | qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data | 8 | 0.25000 |
1 | qual_q80_data_summary_summary_table_averaging_math_tasks | 8 | 0.25000 |
1 | qual_q80_data_application_answering_question_cer_explanations_comparing | 8 | 0.25000 |
1 | qual_q80_other_those_that_dont_fit_into_other_categories_modeling | 5 | 0.15625 |
%>%
d count(middle)
# A tibble: 2 × 2
middle n
<dbl> <int>
1 0 216
2 1 114
%>%
d select(contains("qual_q80"), elem, middle, high) %>%
select(-c(1, 9)) %>%
mutate_all(replace_na, 0) %>%
select(-elem, -high) %>%
group_by(middle) %>%
summarize_all(funs(sum), na.rm = TRUE) %>%
gather(key, val, -middle) %>%
filter(middle == 1) %>%
mutate(prop = val / 114) %>%
mutate(prop = prop * 100) %>%
mutate(prop = round(prop, 2)) %>%
arrange(desc(prop)) %>%
::kable() knitr
middle | key | val | prop |
---|---|---|---|
1 | qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data | 48 | 42.11 |
1 | qual_q80_data_visualization_graph_creation | 48 | 42.11 |
1 | qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns | 46 | 40.35 |
1 | qual_q80_data_application_answering_question_cer_explanations_comparing | 31 | 27.19 |
1 | qual_q80_other_those_that_dont_fit_into_other_categories_modeling | 25 | 21.93 |
1 | qual_q80_data_summary_summary_table_averaging_math_tasks | 24 | 21.05 |
1 | qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data | 18 | 15.79 |
%>%
d count(high)
# A tibble: 2 × 2
high n
<dbl> <int>
1 0 98
2 1 232
%>%
d select(contains("qual_q80"), elem, middle, high) %>%
select(-c(1, 9)) %>%
mutate_all(replace_na, 0) %>%
select(-elem, -middle) %>%
group_by(high) %>%
summarize_all(funs(sum), na.rm = TRUE) %>%
gather(key, val, -high) %>%
filter(high == 1) %>%
mutate(prop = val / 232) %>%
mutate(prop = prop * 100) %>%
mutate(prop = round(prop, 2)) %>%
arrange(desc(prop)) %>%
::kable() knitr
high | key | val | prop |
---|---|---|---|
1 | qual_q80_data_visualization_graph_creation | 125 | 53.88 |
1 | qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns | 95 | 40.95 |
1 | qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data | 78 | 33.62 |
1 | qual_q80_data_summary_summary_table_averaging_math_tasks | 71 | 30.60 |
1 | qual_q80_data_application_answering_question_cer_explanations_comparing | 48 | 20.69 |
1 | qual_q80_other_those_that_dont_fit_into_other_categories_modeling | 47 | 20.26 |
1 | qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data | 35 | 15.09 |
Subject area
# Separate the values in the professional_role column into individual rows
<- d %>%
d_long separate_rows(professional_role, sep = ",")
<- d_long %>%
d_long mutate(professional_role_6_text = stringr::str_to_lower(professional_role_6_text)) %>%
mutate(professional_role_6_text = str_detect(professional_role_6_text, "environmental")) # double-check this works
# Trim any leading/trailing whitespace
$professional_role <- trimws(d_long$professional_role)
d_long
# Create dichotomous columns for each unique professional role
<- d_long %>%
d_wide mutate(value = 1) %>%
pivot_wider(names_from = professional_role, values_from = value, values_fill = 0)
<- d_wide %>%
d_wide select(contains("qual_q80"), Biology:Physics) %>%
select(-c(1, 9)) %>%
::clean_names()
janitor
<- d_wide %>%
d_wide mutate_all(as.numeric) %>%
mutate_all(replace_na, 0)
# Identify task and subject columns
<- grep("^qual_q80", names(d_wide), value = TRUE)
task_columns <- c("biology", "other_please_describe", "general_science",
subject_columns "earth_science_or_earth_and_space_science", "chemistry", "physics")
# Function to calculate percentages for a single subject
<- function(subject_col) {
calculate_percentages_by_subject <- sum(d_wide[[subject_col]] == 1, na.rm = TRUE) # Total number of respondents for this subject
total_subject
%>%
d_wide filter(!!sym(subject_col) == 1) %>% # Only include rows where the subject was selected
summarise(across(all_of(task_columns), ~ mean(. == 1, na.rm = TRUE) * 100)) %>%
mutate(subject = subject_col, total_responses = total_subject) # Add subject name and total responses
}
# Apply the function to all subject columns and bind the results
<- bind_rows(lapply(subject_columns, calculate_percentages_by_subject))
percentages_by_subject
%>%
percentages_by_subject select(subject, total_responses, everything()) %>%
mutate_if(is.numeric, round, 2) %>%
::kable() # other is environmental knitr
subject | total_responses | qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data | qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data | qual_q80_data_summary_summary_table_averaging_math_tasks | qual_q80_data_visualization_graph_creation | qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns | qual_q80_data_application_answering_question_cer_explanations_comparing | qual_q80_other_those_that_dont_fit_into_other_categories_modeling |
---|---|---|---|---|---|---|---|---|
biology | 191 | 33.51 | 17.28 | 28.27 | 54.97 | 39.27 | 22.51 | 20.94 |
other_please_describe | 151 | 29.80 | 14.57 | 25.83 | 50.99 | 43.05 | 22.52 | 23.84 |
general_science | 99 | 40.40 | 16.16 | 21.21 | 47.47 | 39.39 | 18.18 | 24.24 |
earth_science_or_earth_and_space_science | 86 | 30.23 | 13.95 | 25.58 | 37.21 | 40.70 | 22.09 | 23.26 |
chemistry | 73 | 36.99 | 13.70 | 26.03 | 52.05 | 35.62 | 24.66 | 17.81 |
physics | 53 | 32.08 | 16.98 | 28.30 | 39.62 | 41.51 | 16.98 | 18.87 |
years worked
This tells us how these items correlate with years worked
%>%
d select(contains("qual_q80"), years_worked) %>%
select(-c(1, 9)) %>%
mutate(years_worked = unlist(years_worked)) %>%
mutate(years_worked = as.numeric(years_worked)) %>%
filter(years_worked < 100) %>%
mutate_if(is.numeric, replace_na, 0) %>%
::correlate() %>%
corrr::focus(years_worked) %>%
corrrarrange(desc(years_worked))
# A tibble: 7 × 2
term years_worked
<chr> <dbl>
1 qual_q80_data_interpretation_identify_and_interpret_a_graph_or_d… 0.0652
2 qual_q80_data_collection_their_own_or_finding_data_recording_fir… 0.0517
3 qual_q80_data_summary_summary_table_averaging_math_tasks 0.0472
4 qual_q80_data_visualization_graph_creation 0.0320
5 qual_q80_data_curation_tables_organizing_entering_already_collec… 0.0122
6 qual_q80_data_application_answering_question_cer_explanations_co… 0.00850
7 qual_q80_other_those_that_dont_fit_into_other_categories_modeling -0.00808
prior experience
CP: not sure this is helpful
%>%
d count(q98)
# A tibble: 117 × 2
q98 n
<chr> <int>
1 Graduate level coursework 10
2 Graduate level coursework,Research Experience for Teachers (NSF RET) 1
3 Graduate level coursework,Research experience 10
4 Graduate level coursework,Research experience,Research Experience for … 1
5 Graduate level coursework,Research experience,Teacher professional dev… 6
6 Graduate level coursework,Taught myself 3
7 Graduate level coursework,Taught myself,Research Experience for Teache… 1
8 Graduate level coursework,Taught myself,Research experience 3
9 Graduate level coursework,Taught myself,Research experience,Research E… 1
10 Graduate level coursework,Taught myself,Research experience,Teacher pr… 3
# ℹ 107 more rows
<- d %>%
d_long_prior separate_rows(q98, sep = ",")
%>%
d_long_prior count(q98) %>%
arrange(desc(n))
# A tibble: 11 × 2
q98 n
<chr> <int>
1 Undergraduate course in science 246
2 Graduate level coursework 209
3 High school course in science 190
4 Undergraduate course in math 179
5 High school course in math 172
6 Teacher professional development 172
7 Taught myself 154
8 Research experience 139
9 Research Experience for Teachers (NSF RET) 30
10 NA 2
11 None 2
<- d_long_prior %>%
p select(contains("qual_q80"), prior_exp = q98) %>%
select(-c(1, 9)) %>%
mutate_if(is.numeric, replace_na, 0) %>%
gather(key, val, -prior_exp) %>%
group_by(prior_exp, key) %>%
summarize(mean = mean(val)) %>%
mutate(mean = mean * 100)
%>%
p spread(key, mean) %>%
mutate_if(is.numeric, round, 3) %>%
filter(prior_exp != "NA") %>%
::kable() knitr
prior_exp | qual_q80_data_application_answering_question_cer_explanations_comparing | qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data | qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data | qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns | qual_q80_data_summary_summary_table_averaging_math_tasks | qual_q80_data_visualization_graph_creation | qual_q80_other_those_that_dont_fit_into_other_categories_modeling |
---|---|---|---|---|---|---|---|
Graduate level coursework | 23.923 | 33.971 | 16.746 | 41.148 | 30.144 | 49.761 | 21.531 |
High school course in math | 23.256 | 37.209 | 19.186 | 44.767 | 29.651 | 56.395 | 15.116 |
High school course in science | 23.684 | 35.789 | 17.368 | 43.684 | 26.842 | 55.789 | 16.316 |
None | 50.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 50.000 |
Research experience | 21.583 | 40.288 | 15.827 | 39.568 | 30.216 | 53.957 | 20.863 |
Research Experience for Teachers (NSF RET) | 23.333 | 56.667 | 23.333 | 40.000 | 50.000 | 53.333 | 23.333 |
Taught myself | 20.130 | 34.416 | 14.935 | 46.753 | 28.571 | 57.792 | 18.182 |
Teacher professional development | 18.023 | 38.953 | 16.279 | 43.023 | 31.977 | 51.163 | 22.093 |
Undergraduate course in math | 23.464 | 36.313 | 16.201 | 41.899 | 32.402 | 55.866 | 14.525 |
Undergraduate course in science | 24.390 | 36.179 | 17.073 | 42.276 | 29.268 | 56.098 | 15.854 |
who does this - state (NGSS status)
<- read_sheet("https://docs.google.com/spreadsheets/d/1wul5jRNZBVqZP53swygLD34sBXPu0xEyJyOqRXLG6Dc/edit?gid=1207018323#gid=1207018323")
state_data
<- state_data %>%
state_data select(state = State, status = Status)
state_data
# A tibble: 51 × 2
state status
<chr> <chr>
1 Maine NGSS
2 Minnesota Adapted
3 Kansas NGSS
4 Rhode Island NGSS
5 Vermont NGSS
6 Kentucky NGSS
7 Maryland NGSS
8 California NGSS
9 Delaware NGSS
10 Washington NGSS
# ℹ 41 more rows
%>%
d select(contains("qual_q80"), state_work) %>%
select(-c(1, 9)) %>%
rename(state = state_work) %>%
left_join(state_data, by = "state") %>%
mutate_if(is.numeric, replace_na, 0) %>%
gather(key, val, -status) %>%
group_by(status, key) %>%
mutate(val = as.integer(val)) %>%
summarize(mean = mean(val)) %>%
mutate(mean = mean * 100) %>%
spread(key, mean) %>%
filter(!is.na(status)) %>%
mutate_if(is.numeric, round, 2) %>%
::kable() knitr
status | qual_q80_data_application_answering_question_cer_explanations_comparing | qual_q80_data_collection_their_own_or_finding_data_recording_first_hand_data | qual_q80_data_curation_tables_organizing_entering_already_collected_or_recorded_data_tidy_data | qual_q80_data_interpretation_identify_and_interpret_a_graph_or_data_looking_for_patterns | qual_q80_data_summary_summary_table_averaging_math_tasks | qual_q80_data_visualization_graph_creation | qual_q80_other_those_that_dont_fit_into_other_categories_modeling | state |
---|---|---|---|---|---|---|---|---|
Adapted | 17.21 | 40.16 | 16.39 | 32.79 | 27.87 | 45.08 | 23.77 | NA |
NGSS | 26.45 | 29.75 | 14.05 | 44.63 | 28.10 | 52.07 | 14.88 | NA |
Not | 16.95 | 32.20 | 15.25 | 40.68 | 20.34 | 57.63 | 28.81 | NA |
- make sure our data is relevant as of 2021 - https://www.nsta.org/science-standards