Loading, Setting Up

library(tidyverse)
library(janitor)

Finding out how many people responded to each survey

pre <- read_csv("pre-survey.csv")
post <- read_csv("post-survey.csv")

pre <- pre %>% 
  filter(Progress == 100) %>% 
  clean_names() # pre n = 27

post <- post %>% 
  filter(Progress > 25) %>% 
  clean_names() # two who completed 69%; post n = 21; 19 complete responses

pre_id <- pre$q23

post_id <- post$q23[nchar(post$q23) >= 6 & !is.na(post$q23)] # this filters someone who typed 5 and this filters NA responses

pre_id

##  [1] "2eajon"  "2ortro"  "0masha"  "2gryor"  "1spban"  "3orfor"  "1knweo" 
##  [8] "1grhal"  "2carea"  "2sutim"  "2mocum"  "1grpin"  "0206may" "3robr"  
## [15] "1kibev"  "9wiand"  "0chlon"  "1coyor"  "2aista"  "0miwyc"  "2kisha" 
## [22] "3anlan"  "1ausil"  "2basan"  "1auowe"  "17grbib" "2dafai"

post_id

##  [1] "0masha"  "1kibev"  "2gryor"  "0miawyc" "2aista"  "1grhal"  "1ausil" 
##  [8] "1spban"  "2basan"  "3carea"  "1knweo"  "9wiand"  "2eajon"  "2ortro" 
## [15] "1AuOwe"  "may0206" "1coyor"  "2kisha"

length(pre_id)

## [1] 27

length(post_id)

## [1] 18

c(pre_id, post_id) %>% 
  unique() %>% 
  length()

## [1] 31

pre_id %in% post_id %>% sum() # 14 of the respondents took both

## [1] 14

# 13 pre-survey respondents didn't take post
# 4 post-survey repsondents didn't take pre

pre$took_both_surveys <- pre$q23 %in% post$q23
post$took_post_surveys <- post$q23 %in% pre$q23

Motivation for Attending Carpentries Workshops

pre %>% 
  select(contains("q10_")) %>% 
  select(-q10_7_text) %>% 
  pivot_longer(q10_1:q10_7) %>% 
  filter(!is.na(value)) %>% 
  count(name, value) %>% 
  arrange(desc(n)) %>% 
  mutate(prop = n / nrow(pre)) %>% 
  mutate(prop = prop * 100) %>% 
  mutate_if(is.numeric, round, 1) %>% 
  select(-name) %>% 
  set_names("Reason for Participating", "n", "%")

## # A tibble: 7 x 3
##   `Reason for Participating`                                      n   `%`
##   <chr>                                                       <dbl> <dbl>
## 1 To learn new skills.                                           24  88.9
## 2 As a requirement for my degree program or current position.    23  85.2
## 3 To learn skills that I can apply to my current work.           22  81.5
## 4 To learn skills that I can apply to my work in the future.     20  74.1
## 5 To refresh or review my skills.                                 8  29.6
## 6 To learn skills that will help me get a job or a promotion.     8  29.6
## 7 Other (please describe)                                         1   3.7

freq_use <- pre %>% 
  select(freq_use_1:freq_use_3) %>% 
  set_names(c("Statistical", "Spreadsheet", "Programming")) %>% 
  pivot_longer(Statistical:Programming) %>% 
  count(name, value)  %>% 
  filter(!is.na(value)) %>% 
  spread(value, n, fill = 0)

freq_use %>% 
  mutate(name = factor(name, levels = c("Spreadsheet", "Statistical", "Programming"))) %>% 
  gather(key, val, -name) %>% 
  group_by(name) %>% 
  mutate(val = val/sum(val),
         val = val * 100) %>% 
  ggplot(aes(x = key, y = val, color = val)) +
  geom_point() +
  theme_bw() +
  scale_x_discrete("Frequency of Use", limits = rev(c("Daily", "Weekly", "Monthly", "Several times per year", "Less than once per year", "Never"))) +
  ylab("% of Participants") +
  xlab(NULL) +
  facet_wrap(~name) +
  coord_flip() +
  theme(legend.position = "none",
        text = element_text(size = 14, family = "Times"))

ggsave("use-of-software.png", width = 9, height = 5)

prior experience - qual

pre$q18 %>% 
  knitr::kable()

qual - concerns you have

pre$q12 %>% 
  knitr::kable()

Workshop Type and Perception of Workshop Environment/Experience

experience <- post %>% 
  select(q21_1:q21_4, q11_4) %>% 
  set_names(c("Questions Were Answered",
              "Comfortable With Instructor",
              "Instructor Was Enthusiastic",
              "Instructor Was Knowledgable",
              "Comfortable Learning")) %>% 
  gather(name, value) %>% 
  mutate(value = case_when(
    value == "Strongly disagree" ~ 1,
    value == "Disagree" ~ 2,
    value == "Somewhat disagree" ~ 2,
    value == "Neither agree nor disagree" ~ 3,
    value == "Somewhat agree" ~ 5,
    value == "Agree" ~ 6,
    value == "Strongly agree" ~ 7,
  )) %>% 
  group_by(name) %>% 
  summarize(mean_val = mean(value, na.rm = TRUE),
            sd_val = sd(value, na.rm = TRUE)/sqrt(21-1))

experience %>% 
  ggplot(aes(x = reorder(name, mean_val), y = mean_val)) +
  geom_col() +
  # geom_errorbar(aes(ymin = mean_val - sd_val,
  #                   ymax = mean_val + sd_val))
  theme_bw() +
  ylab("Mean Response") +
  xlab(NULL) +
  coord_flip() +
  theme(legend.position = "none",
        text = element_text(size = 14, family = "Times"))

ggsave("experience-in-workshop.png", width = 6, height = 4)

major strengths

post$q30 %>% 
  knitr::kable()

ways session could be improved

post$q31 %>% 
  knitr::kable()

effects of instructor

post$q32 %>% 
  knitr::kable()

Effect of Workshops on Learners’ Self-Reported Perspectives, Skills, and Confidence

effects_pre <- pre %>% 
  select(q11_1:q11_3) %>% 
  set_names(c("Overcome Getting Stuck", "Search for Answers", "Confidently Work With Data")) %>% 
  gather(name, value) %>% 
  mutate(value = case_when(
    value == "Strongly disagree" ~ 1,
    value == "Disagree" ~ 2,
    value == "Somewhat disagree" ~ 2,
    value == "Neither agree nor disagree" ~ 3,
    value == "Somewhat agree" ~ 5,
    value == "Agree" ~ 6,
    value == "Strongly agree" ~ 7,
  )) %>% 
  group_by(name) %>% 
  summarize(mean_val = mean(value, na.rm = TRUE),
            sd_val = sd(value, na.rm = TRUE)/sqrt(27-1))

effects_post <- post %>% 
  select(q11_1:q11_3) %>% 
  set_names(c("Overcome Getting Stuck", "Search for Answers", "Confidently Work With Data")) %>% 
  gather(name, value) %>% 
  mutate(value = case_when(
    value == "Strongly disagree" ~ 1,
    value == "Disagree" ~ 2,
    value == "Somewhat disagree" ~ 2,
    value == "Neither agree nor disagree" ~ 3,
    value == "Somewhat agree" ~ 5,
    value == "Agree" ~ 6,
    value == "Strongly agree" ~ 7,
  )) %>% 
  group_by(name) %>% 
  summarize(mean_val = mean(value, na.rm = TRUE),
            sd_val = sd(value, na.rm = TRUE)/sqrt(21-1))

effects_pre <- effects_pre %>% 
  mutate(time = "pre")

effects_post <- effects_post %>% 
  mutate(time = "post")

bind_rows(effects_pre, effects_post) %>% 
  mutate(time = factor(time, levels = c("pre", "post"))) %>% 
  ggplot(aes(x = reorder(name, mean_val), y = mean_val, fill = time)) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = mean_val - sd_val,
                    ymax = mean_val + sd_val),
                position = position_dodge(width = 0.9)) +
  theme_bw() +
  ylab("Mean Response") +
  xlab(NULL) +
  coord_flip() +
  theme(legend.position = "none",
        text = element_text(size = 14, family = "Times")) +
  ylim(0, 7) +
  scale_x_discrete("", limits = c("Overcome Getting Stuck", "Search for Answers", "Confidently Work With Data"))

ggsave("change-in-effects.png", width = 6, height = 4)

t-test

pre_effects_to_join <- pre %>% 
  select(q11_1:q11_3, id = q23) %>% 
  set_names(c("Overcome Getting Stuck", "Search for Answers", "Confidently Work With Data", "id")) %>% 
  gather(name, value, -id) %>% 
  mutate(value = case_when(
    value == "Strongly disagree" ~ 1,
    value == "Disagree" ~ 2,
    value == "Somewhat disagree" ~ 2,
    value == "Neither agree nor disagree" ~ 3,
    value == "Somewhat agree" ~ 5,
    value == "Agree" ~ 6,
    value == "Strongly agree" ~ 7,
  ))

post_effects_to_join <- post %>% 
  select(q11_1:q11_3, id = q23) %>% 
  set_names(c("Overcome Getting Stuck", "Search for Answers", "Confidently Work With Data", "id")) %>% 
  gather(name, value, -id) %>% 
  mutate(value = case_when(
    value == "Strongly disagree" ~ 1,
    value == "Disagree" ~ 2,
    value == "Somewhat disagree" ~ 2,
    value == "Neither agree nor disagree" ~ 3,
    value == "Somewhat agree" ~ 5,
    value == "Agree" ~ 6,
    value == "Strongly agree" ~ 7,
  )) %>% 
  rename(post_value = value)

for_t_test <- left_join(pre_effects_to_join, post_effects_to_join)

for_t_test <- for_t_test %>% 
  select(-id) %>% 
  gather(key, val, -name)

t.test(val ~ key, filter(for_t_test, name == "Overcome Getting Stuck"))

## 
##  Welch Two Sample t-test
## 
## data:  val by key
## t = 0.31031, df = 23.85, p-value = 0.759
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.9870958  1.3363022
## sample estimates:
## mean in group post_value      mean in group value 
##                 5.285714                 5.111111

t.test(val ~ key, filter(for_t_test, name == "Search for Answers"))

## 
##  Welch Two Sample t-test
## 
## data:  val by key
## t = 2.0097, df = 26.987, p-value = 0.05456
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01859124  1.79107802
## sample estimates:
## mean in group post_value      mean in group value 
##                 6.071429                 5.185185

t.test(val ~ key, filter(for_t_test, name == "Confidently Work With Data"))

## 
##  Welch Two Sample t-test
## 
## data:  val by key
## t = 2.5518, df = 34.487, p-value = 0.01531
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2817322 2.4801725
## sample estimates:
## mean in group post_value      mean in group value 
##                 4.714286                 3.333333

Ability to Perform Computing Tasks

immediately apply what was learned

post %>% 
  select(q11_4) %>% 
  mutate(q11_4 = case_when(
    q11_4 == "Strongly disagree" ~ 1,
    q11_4 == "Disagree" ~ 2,
    q11_4 == "Somewhat disagree" ~ 2,
    q11_4 == "Neither agree nor disagree" ~ 3,
    q11_4 == "Somewhat agree" ~ 5,
    q11_4 == "Agree" ~ 6,
    q11_4 == "Strongly agree" ~ 7,
  )) %>%
  set_names("apply") %>% 
  summarize(mean_val = mean(apply, na.rm = TRUE),
            sd_val = sd(apply, na.rm = TRUE)/sqrt(21-1))

## # A tibble: 1 x 2
##   mean_val sd_val
##      <dbl>  <dbl>
## 1     5.84  0.271

Accessibility requirements and issues

post %>% 
  count(q27)

## # A tibble: 3 x 2
##   q27       n
##   <chr> <int>
## 1 No       13
## 2 Yes       7
## 3 <NA>      1

post %>% 
  count(q28)

## # A tibble: 2 x 2
##   q28       n
##   <chr> <int>
## 1 No        7
## 2 <NA>     14

Other comments or feedback

post$q7 %>% 
  knitr::kable()

Analysis of Data Science Workshop Pre and Post Survey Responses