Installing and Launching R Packages

This code chunk is to insatll the basic tidyverse packages and to load them onto RStudio

packages <- c('tidyverse', 'ggridges')

for(p in packages){
  if(!require(p,character.only=T)){
    install.packages(p)
  }
  library(p, character.only=T)
}

Importing Data

exam_data<-read.csv("data/Exam_data.csv")

Ethnicity Distribution in class

ggplot(data=exam_data, aes(x=RACE))+
  geom_bar(alpha=0.69, fill="purple", colour="black")

Math scores

ggplot(data=exam_data, aes(x=MATHS))+
  geom_dotplot(dotsize=0.5, binwidth = 2.5)+
  scale_y_continuous(NULL,breaks=NULL)

Math scores by Gender

ggplot(data=exam_data, aes(y=MATHS, x=GENDER))+
 geom_boxplot()+
  stat_summary(geom="point",
               fun.y="mean",
               color="red",
               size=3)
## Warning: `fun.y` is deprecated. Use `fun` instead.

Uncertainty in Math scores by Ethnicity in class

ggplot(data=exam_data, aes(RACE, MATHS))+
  stat_summary(geom="bar",
               fun.y=mean,
               position="dodge",
               fill="purple",
               width=0.5)+
  stat_summary(geom="errorbar",
               fun.data=mean_se,
               position = "dodge",
               width=0.2)
## Warning: `fun.y` is deprecated. Use `fun` instead.

Histogram of Math scores by class sections

ggplot(data=exam_data, aes(x=MATHS))+
  geom_histogram(bins=20)+
  facet_wrap(.~CLASS)

How are student’s Math scores correlated to their scores in English

ggplot(data=exam_data, aes(x=MATHS, y=ENGLISH))+
  geom_point()+
  geom_smooth(method=lm,
              size=0.5)+
  coord_cartesian(xlim=c(0,100),
                  ylim=c(0,100))
## `geom_smooth()` using formula 'y ~ x'

ggridges - a new way to visualize continuous distributions

ggplot(data=exam_data, aes(x=MATHS,y=CLASS,
                           fill=factor(stat(quantile))))+
  stat_density_ridges(
    geom="density_ridges_gradient",
    calc_ecdf=TRUE,
    quantiles=c(0.025,0.975))+
  scale_fill_manual(name="Probability",
                    values=c("#FF0000A0",
                             "#A0A0A0A0",
                             "#0000FFA0"),
                    labels=c("(0,0.025",
                             "(0.025,0.975",
                             "0.975,1"))
## Picking joint bandwidth of 3.63