dat=read.csv("IA.SMCM.csv")

library(dplyr)
library(Rmisc)

#Create dataset of just demographics
demo= dat %>%
  #only keep one observation per person first sorting by email
  group_by(Email)%>%
  #keep the Post-course if available, and pre-course if there is not post-course survey
  filter(RecordedDate == max(RecordedDate))

demo= demo %>% 
    #do a second filter for people who used two different emails but the same Name
    group_by(Name) %>%
    filter(RecordedDate == max(RecordedDate))%>%
  #keep only the relevant columns
  select(Name, Email, Age, Ethnicity, Credits, Major, Gender, FirstGen, BirthPlace, HS.GPA, Career.bio, Politics) %>%
  filter(Name != "")

library(summarytools)

demo %>%
    select_if(is.numeric) %>%
    summary(demo)
##      Name                Age            HS.GPA        Career.bio   
##  Length:30          Min.   :18.00   Min.   :2.000   Min.   :0.000  
##  Class :character   1st Qu.:18.25   1st Qu.:3.225   1st Qu.:4.000  
##  Mode  :character   Median :19.00   Median :3.565   Median :6.000  
##                     Mean   :19.45   Mean   :3.494   Mean   :5.241  
##                     3rd Qu.:20.00   3rd Qu.:3.750   3rd Qu.:7.000  
##                     Max.   :23.00   Max.   :4.850   Max.   :7.000  
##                     NA's   :8                       NA's   :1      
##     Politics    
##  Min.   :0.000  
##  1st Qu.:1.000  
##  Median :2.000  
##  Mean   :2.633  
##  3rd Qu.:4.000  
##  Max.   :7.000  
## 
freq(demo$Ethnicity, report.nas = F,  plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies  
## #### demo$Ethnicity  
## **Type:** Character  
## 
## |                          | Freq |      % | % Cum. |
## |------------------------------:|-----:|-------:|-------:|
## |    **Asian/Pacific Islander** |    2 |   6.67 |   6.67 |
## | **Black or African American** |    7 |  23.33 |  30.00 |
## |        **Hispanic or Latino** |    4 |  13.33 |  43.33 |
## |                     **Other** |    2 |   6.67 |  50.00 |
## |                     **White** |   15 |  50.00 | 100.00 |
## |                     **Total** |   30 | 100.00 | 100.00 |
freq(demo$Credits, report.nas = F,  plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies  
## #### demo$Credits  
## **Type:** Character  
## 
## |                  | Freq |      % | % Cum. |
## |----------------------:|-----:|-------:|-------:|
## |   **0-30 (Freshman)** |   17 |  56.67 |  56.67 |
## | **31-60 (Sophomore)** |   10 |  33.33 |  90.00 |
## |    **61-90 (Junior)** |    3 |  10.00 | 100.00 |
## |             **Total** |   30 | 100.00 | 100.00 |
freq(demo$Major, report.nas = F,  plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies  
## #### demo$Major  
## **Type:** Character  
## 
## |                         | Freq |      % | % Cum. |
## |-----------------------------:|-----:|-------:|-------:|
## |                  **Biology** |    1 |   3.33 |   3.33 |
## |        **Non-science field** |   13 |  43.33 |  46.67 |
## | **Science, but not biology** |   12 |  40.00 |  86.67 |
## |                **Undecided** |    4 |  13.33 | 100.00 |
## |                    **Total** |   30 | 100.00 | 100.00 |
freq(demo$Gender, report.nas = F,  plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies  
## #### demo$Gender  
## **Type:** Character  
## 
## |               | Freq |      % | % Cum. |
## |-------------------:|-----:|-------:|-------:|
## |         **Female** |   16 |  53.33 |  53.33 |
## | **Gender neutral** |    1 |   3.33 |  56.67 |
## |           **Male** |   13 |  43.33 | 100.00 |
## |          **Total** |   30 | 100.00 | 100.00 |
freq(demo$FirstGen, report.nas = F,  plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies  
## #### demo$FirstGen  
## **Type:** Character  
## 
## |               | Freq |      % | % Cum. |
## |-------------------:|-----:|-------:|-------:|
## | **(Empty string)** |    1 |   3.33 |   3.33 |
## |             **No** |   21 |  70.00 |  73.33 |
## |       **Not sure** |    1 |   3.33 |  76.67 |
## |            **Yes** |    7 |  23.33 | 100.00 |
## |          **Total** |   30 | 100.00 | 100.00 |
freq(demo$BirthPlace, report.nas = F,  plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies  
## #### demo$BirthPlace  
## **Type:** Character  
## 
## |                | Freq |      % | % Cum. |
## |--------------------:|-----:|-------:|-------:|
## |            **Asia** |    1 |   3.33 |   3.33 |
## | **Central America** |    2 |   6.67 |  10.00 |
## |          **Europe** |    2 |   6.67 |  16.67 |
## |   **North America** |   25 |  83.33 | 100.00 |
## |           **Total** |   30 | 100.00 | 100.00 |
freq(demo$Politics, report.nas = F,  plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies  
## #### demo$Politics  
## **Type:** Integer  
## 
## |      | Freq |      % | % Cum. |
## |----------:|-----:|-------:|-------:|
## |     **0** |    5 |  16.67 |  16.67 |
## |     **1** |    7 |  23.33 |  40.00 |
## |     **2** |    5 |  16.67 |  56.67 |
## |     **3** |    3 |  10.00 |  66.67 |
## |     **4** |    4 |  13.33 |  80.00 |
## |     **5** |    2 |   6.67 |  86.67 |
## |     **6** |    1 |   3.33 |  90.00 |
## |     **7** |    3 |  10.00 | 100.00 |
## | **Total** |   30 | 100.00 | 100.00 |