Load Libraries

library(tidyr)
library(dplyr)

Load Data

df <- read.csv(file="new_data/overview.csv", header=T)

df$term_time2 <- df$term_time
df <- df %>%
  separate(term_time2, into = c("term", "time"), sep = "_")

df <- df %>%
  mutate(year = case_when(
    term %in% c("f18", "w19", "s19") ~ "18-19",
    term %in% c("f19", "w20", "s20") ~ "19-20",
    term %in% c("f20", "w21", "s21") ~ "20-21",
    TRUE ~ NA_character_
  ))

Unique Participants

Unique IDs: 2560

Min Count: 1

Max Count: 10

Avg Count: 1.98

ids <- data.frame(table(df$id))
# nrow(ids)
# min(ids$Freq)
# max(ids$Freq)
# mean(ids$Freq)

ids_counts <- data.frame(table(ids$Freq))
ids_counts

##   Var1 Freq
## 1    1  972
## 2    2 1096
## 3    3  222
## 4    4  183
## 5    5   40
## 6    6   35
## 7    7    8
## 8    8    3
## 9   10    1

Number of Responses by Term and Time

# Count unique ids by term
unique_id_counts <- df %>%
  group_by(term_time) %>%
  summarise(unique_ids = n_distinct(id))

unique_id_counts

## # A tibble: 20 x 2
##    term_time unique_ids
##    <chr>          <int>
##  1 f18_po           282
##  2 f18_pr           317
##  3 f19_po           360
##  4 f19_pr           409
##  5 f20_po           199
##  6 f20_pr           221
##  7 f21_po           111
##  8 f21_pr           233
##  9 s19_po           166
## 10 s19_pr           209
## 11 s20_po           216
## 12 s20_pr           276
## 13 s21_po           180
## 14 s21_pr           321
## 15 w19_po           255
## 16 w19_pr           318
## 17 w20_po           235
## 18 w20_pr           323
## 19 w21_po           202
## 20 w21_pr           229

Number of Matched Responses by Term (and Course ID)

result <- df %>%
  group_by(term, id, cid) %>%
  summarise(pr_count = sum(time == "pr"),
            po_count = sum(time == "po"))

# Filter for ids that have both pr and po entries
result_filtered <- result %>%
  filter(pr_count > 0 & po_count > 0)

matched <- data.frame(table(result_filtered$term))
matched

##    Var1 Freq
## 1   f18  239
## 2   f19  292
## 3   f20  131
## 4   f21  102
## 5   s19  123
## 6   s20  138
## 7   s21  135
## 8   w19  228
## 9   w20  175
## 10  w21  138

Number of Matched Responses by Count and Year

year_matched <- df %>%
  group_by(id, year) %>%
  summarise(count = n())

year_matched2 <- data.frame(table(year_matched$year, year_matched$count))
year_matched2

##     Var1 Var2 Freq
## 1  18-19    1  280
## 2  19-20    1  405
## 3  20-21    1  417
## 4  18-19    2  445
## 5  19-20    2  428
## 6  20-21    2  353
## 7  18-19    3   44
## 8  19-20    3   75
## 9  20-21    3   50
## 10 18-19    4   50
## 11 19-20    4   55
## 12 20-21    4   16
## 13 18-19    5    3
## 14 19-20    5   13
## 15 20-21    5    3
## 16 18-19    6    5
## 17 19-20    6    8
## 18 20-21    6    0

Number of Responses by Course ID

cid_count <- df %>%
  group_by(cid) %>%
  summarise(appearances = n_distinct(id))

cid_count

## # A tibble: 9 x 2
##   cid   appearances
##   <chr>       <int>
## 1 F001A          81
## 2 F008.           1
## 3 F010.         583
## 4 F014.           1
## 5 F040A         890
## 6 F040B         555
## 7 F040C         426
## 8 F041.         751
## 9 F058.           2

Columns in Data

df2 <- read.csv(file="new_data/df_long.csv", header=T)
colnames(df2)

##  [1] "AnonymousID"                "FeltLikeSciencePerson"     
##  [3] "SeeMyselfSciencePerson"     "FamilySeeSciencePerson"    
##  [5] "InstructorSeeSciencePerson" "PeerSeeSciencePerson"      
##  [7] "EnjoyScience"               "InterestedScience"         
##  [9] "UnderstandPreviousScience"  "UnderstandNewScience"      
## [11] "OvercomeSetbacks"           "ConfidentOutsideClass"     
## [13] "ConfidentExams"             "OthersAskHelp"             
## [15] "OutsideClassInSubject1"     "OutsideClassInSubject2"    
## [17] "RealWorldIssues"            "FindArticles"              
## [19] "CriticallyRead"             "IdentifyPatterns"          
## [21] "RecognizeArgument"          "DevelopArgument"           
## [23] "WriteDocuments"             "WorkWithOthers"            
## [25] "OralPresentation"           "Enthusiastic"              
## [27] "DiscussWithFriends"         "PlanningAdditionalClasses" 
## [29] "PursuringCareer"            "UnderstandSubject"         
## [31] "SucceedSubject"             "ComplexIdeas"              
## [33] "AskingForHelp"              "ConnectIdeas"              
## [35] "ApplyingOutsideClass"       "SystematicReasoning"       
## [37] "AnalyzingData"              "ScienceCareer"             
## [39] "Career.Goal"                "OtherCareer"               
## [41] "PreviousCourses"            "CurrentlyEmployed"         
## [43] "CurrentJobTitle"            "Ethnicity"                 
## [45] "UnlistedEthnicity"          "ArmedForces"               
## [47] "Time"                       "Date"                      
## [49] "Duration"                   "Gender"                    
## [51] "BIOL_Crse"                  "BIOL_Grade"                
## [53] "Instructor"                 "term"

Data Overview

Heather Perkins

2024-05-21

Load Libraries

Load Data

Unique Participants

Number of Responses by Term and Time

Number of Matched Responses by Term (and Course ID)

Number of Matched Responses by Count and Year

Number of Responses by Course ID

Columns in Data