Load Libraries

library(tidyr)
library(dplyr)

Load Data

df <- read.csv(file="new_data/overview.csv", header=T)

df$term_time2 <- df$term_time
df <- df %>%
  separate(term_time2, into = c("term", "time"), sep = "_")

df <- df %>%
  mutate(year = case_when(
    term %in% c("f18", "w19", "s19") ~ "18-19",
    term %in% c("f19", "w20", "s20") ~ "19-20",
    term %in% c("f20", "w21", "s21") ~ "20-21",
    TRUE ~ NA_character_
  ))

Unique Participants

Unique IDs: 2560

Min Count: 1

Max Count: 10

Avg Count: 1.98

ids <- data.frame(table(df$id))
# nrow(ids)
# min(ids$Freq)
# max(ids$Freq)
# mean(ids$Freq)

ids_counts <- data.frame(table(ids$Freq))
ids_counts
##   Var1 Freq
## 1    1  972
## 2    2 1096
## 3    3  222
## 4    4  183
## 5    5   40
## 6    6   35
## 7    7    8
## 8    8    3
## 9   10    1

Number of Responses by Term and Time

# Count unique ids by term
unique_id_counts <- df %>%
  group_by(term_time) %>%
  summarise(unique_ids = n_distinct(id))

unique_id_counts
## # A tibble: 20 x 2
##    term_time unique_ids
##    <chr>          <int>
##  1 f18_po           282
##  2 f18_pr           317
##  3 f19_po           360
##  4 f19_pr           409
##  5 f20_po           199
##  6 f20_pr           221
##  7 f21_po           111
##  8 f21_pr           233
##  9 s19_po           166
## 10 s19_pr           209
## 11 s20_po           216
## 12 s20_pr           276
## 13 s21_po           180
## 14 s21_pr           321
## 15 w19_po           255
## 16 w19_pr           318
## 17 w20_po           235
## 18 w20_pr           323
## 19 w21_po           202
## 20 w21_pr           229

Number of Matched Responses by Term (and Course ID)

result <- df %>%
  group_by(term, id, cid) %>%
  summarise(pr_count = sum(time == "pr"),
            po_count = sum(time == "po"))

# Filter for ids that have both pr and po entries
result_filtered <- result %>%
  filter(pr_count > 0 & po_count > 0)

matched <- data.frame(table(result_filtered$term))
matched
##    Var1 Freq
## 1   f18  239
## 2   f19  292
## 3   f20  131
## 4   f21  102
## 5   s19  123
## 6   s20  138
## 7   s21  135
## 8   w19  228
## 9   w20  175
## 10  w21  138

Number of Matched Responses by Count and Year

year_matched <- df %>%
  group_by(id, year) %>%
  summarise(count = n())

year_matched2 <- data.frame(table(year_matched$year, year_matched$count))
year_matched2
##     Var1 Var2 Freq
## 1  18-19    1  280
## 2  19-20    1  405
## 3  20-21    1  417
## 4  18-19    2  445
## 5  19-20    2  428
## 6  20-21    2  353
## 7  18-19    3   44
## 8  19-20    3   75
## 9  20-21    3   50
## 10 18-19    4   50
## 11 19-20    4   55
## 12 20-21    4   16
## 13 18-19    5    3
## 14 19-20    5   13
## 15 20-21    5    3
## 16 18-19    6    5
## 17 19-20    6    8
## 18 20-21    6    0

Number of Responses by Course ID

cid_count <- df %>%
  group_by(cid) %>%
  summarise(appearances = n_distinct(id))

cid_count
## # A tibble: 9 x 2
##   cid   appearances
##   <chr>       <int>
## 1 F001A          81
## 2 F008.           1
## 3 F010.         583
## 4 F014.           1
## 5 F040A         890
## 6 F040B         555
## 7 F040C         426
## 8 F041.         751
## 9 F058.           2

Columns in Data

df2 <- read.csv(file="new_data/df_long.csv", header=T)
colnames(df2)
##  [1] "AnonymousID"                "FeltLikeSciencePerson"     
##  [3] "SeeMyselfSciencePerson"     "FamilySeeSciencePerson"    
##  [5] "InstructorSeeSciencePerson" "PeerSeeSciencePerson"      
##  [7] "EnjoyScience"               "InterestedScience"         
##  [9] "UnderstandPreviousScience"  "UnderstandNewScience"      
## [11] "OvercomeSetbacks"           "ConfidentOutsideClass"     
## [13] "ConfidentExams"             "OthersAskHelp"             
## [15] "OutsideClassInSubject1"     "OutsideClassInSubject2"    
## [17] "RealWorldIssues"            "FindArticles"              
## [19] "CriticallyRead"             "IdentifyPatterns"          
## [21] "RecognizeArgument"          "DevelopArgument"           
## [23] "WriteDocuments"             "WorkWithOthers"            
## [25] "OralPresentation"           "Enthusiastic"              
## [27] "DiscussWithFriends"         "PlanningAdditionalClasses" 
## [29] "PursuringCareer"            "UnderstandSubject"         
## [31] "SucceedSubject"             "ComplexIdeas"              
## [33] "AskingForHelp"              "ConnectIdeas"              
## [35] "ApplyingOutsideClass"       "SystematicReasoning"       
## [37] "AnalyzingData"              "ScienceCareer"             
## [39] "Career.Goal"                "OtherCareer"               
## [41] "PreviousCourses"            "CurrentlyEmployed"         
## [43] "CurrentJobTitle"            "Ethnicity"                 
## [45] "UnlistedEthnicity"          "ArmedForces"               
## [47] "Time"                       "Date"                      
## [49] "Duration"                   "Gender"                    
## [51] "BIOL_Crse"                  "BIOL_Grade"                
## [53] "Instructor"                 "term"