dat=read.csv("IA.SMCM.csv")
library(dplyr)
library(Rmisc)
#Create dataset of just demographics
demo= dat %>%
#only keep one observation per person first sorting by email
group_by(Email)%>%
#keep the Post-course if available, and pre-course if there is not post-course survey
filter(RecordedDate == max(RecordedDate))
demo= demo %>%
#do a second filter for people who used two different emails but the same Name
group_by(Name) %>%
filter(RecordedDate == max(RecordedDate))%>%
#keep only the relevant columns
select(Name, Email, Age, Ethnicity, Credits, Major, Gender, FirstGen, BirthPlace, HS.GPA, Career.bio, Politics) %>%
filter(Name != "")
library(summarytools)
demo %>%
select_if(is.numeric) %>%
summary(demo)
## Name Age HS.GPA Career.bio
## Length:30 Min. :18.00 Min. :2.000 Min. :0.000
## Class :character 1st Qu.:18.25 1st Qu.:3.225 1st Qu.:4.000
## Mode :character Median :19.00 Median :3.565 Median :6.000
## Mean :19.45 Mean :3.494 Mean :5.241
## 3rd Qu.:20.00 3rd Qu.:3.750 3rd Qu.:7.000
## Max. :23.00 Max. :4.850 Max. :7.000
## NA's :8 NA's :1
## Politics
## Min. :0.000
## 1st Qu.:1.000
## Median :2.000
## Mean :2.633
## 3rd Qu.:4.000
## Max. :7.000
##
freq(demo$Ethnicity, report.nas = F, plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies
## #### demo$Ethnicity
## **Type:** Character
##
## | | Freq | % | % Cum. |
## |------------------------------:|-----:|-------:|-------:|
## | **Asian/Pacific Islander** | 2 | 6.67 | 6.67 |
## | **Black or African American** | 7 | 23.33 | 30.00 |
## | **Hispanic or Latino** | 4 | 13.33 | 43.33 |
## | **Other** | 2 | 6.67 | 50.00 |
## | **White** | 15 | 50.00 | 100.00 |
## | **Total** | 30 | 100.00 | 100.00 |
freq(demo$Credits, report.nas = F, plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies
## #### demo$Credits
## **Type:** Character
##
## | | Freq | % | % Cum. |
## |----------------------:|-----:|-------:|-------:|
## | **0-30 (Freshman)** | 17 | 56.67 | 56.67 |
## | **31-60 (Sophomore)** | 10 | 33.33 | 90.00 |
## | **61-90 (Junior)** | 3 | 10.00 | 100.00 |
## | **Total** | 30 | 100.00 | 100.00 |
freq(demo$Major, report.nas = F, plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies
## #### demo$Major
## **Type:** Character
##
## | | Freq | % | % Cum. |
## |-----------------------------:|-----:|-------:|-------:|
## | **Biology** | 1 | 3.33 | 3.33 |
## | **Non-science field** | 13 | 43.33 | 46.67 |
## | **Science, but not biology** | 12 | 40.00 | 86.67 |
## | **Undecided** | 4 | 13.33 | 100.00 |
## | **Total** | 30 | 100.00 | 100.00 |
freq(demo$Gender, report.nas = F, plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies
## #### demo$Gender
## **Type:** Character
##
## | | Freq | % | % Cum. |
## |-------------------:|-----:|-------:|-------:|
## | **Female** | 16 | 53.33 | 53.33 |
## | **Gender neutral** | 1 | 3.33 | 56.67 |
## | **Male** | 13 | 43.33 | 100.00 |
## | **Total** | 30 | 100.00 | 100.00 |
freq(demo$FirstGen, report.nas = F, plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies
## #### demo$FirstGen
## **Type:** Character
##
## | | Freq | % | % Cum. |
## |-------------------:|-----:|-------:|-------:|
## | **(Empty string)** | 1 | 3.33 | 3.33 |
## | **No** | 21 | 70.00 | 73.33 |
## | **Not sure** | 1 | 3.33 | 76.67 |
## | **Yes** | 7 | 23.33 | 100.00 |
## | **Total** | 30 | 100.00 | 100.00 |
freq(demo$BirthPlace, report.nas = F, plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies
## #### demo$BirthPlace
## **Type:** Character
##
## | | Freq | % | % Cum. |
## |--------------------:|-----:|-------:|-------:|
## | **Asia** | 1 | 3.33 | 3.33 |
## | **Central America** | 2 | 6.67 | 10.00 |
## | **Europe** | 2 | 6.67 | 16.67 |
## | **North America** | 25 | 83.33 | 100.00 |
## | **Total** | 30 | 100.00 | 100.00 |
freq(demo$Politics, report.nas = F, plain.ascii = FALSE, style = "rmarkdown")
## ### Frequencies
## #### demo$Politics
## **Type:** Integer
##
## | | Freq | % | % Cum. |
## |----------:|-----:|-------:|-------:|
## | **0** | 5 | 16.67 | 16.67 |
## | **1** | 7 | 23.33 | 40.00 |
## | **2** | 5 | 16.67 | 56.67 |
## | **3** | 3 | 10.00 | 66.67 |
## | **4** | 4 | 13.33 | 80.00 |
## | **5** | 2 | 6.67 | 86.67 |
## | **6** | 1 | 3.33 | 90.00 |
## | **7** | 3 | 10.00 | 100.00 |
## | **Total** | 30 | 100.00 | 100.00 |