Homework 1

Data cleaning exercise

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.5     ✓ purrr   0.3.4
✓ tibble  3.1.5     ✓ dplyr   1.0.7
✓ tidyr   1.1.4     ✓ stringr 1.4.0
✓ readr   1.4.0     ✓ forcats 0.5.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(haven)

cntrl_group <- read_sav("cntrl_group.sav")
interv_group <- read_sav("interv_groups.sav")
  • Data import
new <- rbind(cntrl_group, interv_group)

head(new)
# A tibble: 6 × 7
  Intervention Class Department Pre_Exercise Post_Exercise Pre_hap Post_hap
         <dbl> <dbl>      <dbl>        <dbl>         <dbl>   <dbl>    <dbl>
1            1     1          1            3             3       3        3
2            1     1          1            2             2     999      999
3            1     1          1            1             0       1        4
4            1     1          1            1             0       4        3
5            1     1          1            0             0       4        1
6            1     1          1            2             2       4        3
  • Merged control and intervention data sets
new$Intervention[new$Intervention == "1"] <- "Control" 
new$Intervention[new$Intervention == "2"] <- "Leaflet"
new$Intervention[new$Intervention == "3"] <- "Leaflet + Quiz"
new$Intervention[new$Intervention == "4"] <- "Leaflet + Plan"

head(new$Intervention)
[1] "Control" "Control" "Control" "Control" "Control" "Control"
  • Recoded value labels for intervention type.
new$Department[new$Department == "1"] <- "Psychology"
new$Department[new$Department == "2"] <- "Performing arts"
new$Department[new$Department == "3"] <- "Accounting"
new$Department[new$Department == "4"] <- "Finance"
new$Department[new$Department == "5"] <- "Education"
new$Department[new$Department == "6"] <- "Nursing"
new$Department[new$Department == "7"] <- "Philosophy"
new$Department[new$Department == "8"] <- "English"
new$Department[new$Department == "9"] <- "Architecture"

head(new$Department)
[1] "Psychology" "Psychology" "Psychology" "Psychology" "Psychology"
[6] "Psychology"
  • Recoded value labels for department type.
new$Pre_hap[new$Pre_hap==999] <- NA
new$Post_hap[new$Post_hap==999] <- NA
  • Converted “999” values as missing.
new1 <- mutate(new,
                Missing_Data = apply(X = is.na(new), MARGIN = 1, FUN = sum)
              )
  • Created new variable that counts missing values found in observations.
final <- 
  new1 %>% mutate(across(matches("Pre_hap"), ~ 6 -.)) %>%
            mutate(across(matches("Post_hap"), ~ 6 -.))
  • Recoded a reverse scored item.
write_sav(final, "hw1.sav")
  • Recompiled new dataset into fresh .sav file for hand off.