R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

packages = c('tidyverse')

for (p in packages){
 if(!require(p, character.only = T)){
   install.packages(p)
 }
 library(p, character.only = T)
}
## Loading required package: tidyverse
## -- Attaching packages -------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.3.2     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## -- Conflicts ----------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
exam_data <- read_csv("C:/SMU Term 3/Visual Analytics/R Files/Data/Exam_data.csv")
## Parsed with column specification:
## cols(
##   ID = col_character(),
##   CLASS = col_character(),
##   GENDER = col_character(),
##   RACE = col_character(),
##   ENGLISH = col_double(),
##   MATHS = col_double(),
##   SCIENCE = col_double()
## )
ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar(color = "black", 
           fill = "light blue")

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram(bins = 20, 
                 color = "black", 
                 fill = "light blue")

ggplot(data = exam_data, aes(x = MATHS, fill = GENDER)) +
  geom_histogram(bins = 20, color = "grey30")

ggplot(data = exam_data, aes(x = MATHS, fill = GENDER)) +
  geom_histogram(bins = 20, color = "black") +
                   scale_fill_manual(values = c("grey","orange"))

ggplot(data = exam_data,
       aes(x = MATHS)) + geom_dotplot(dotsize = 0.5)
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = exam_data,
       aes(x = MATHS)) + geom_dotplot(dotsize = 0.5) +
       scale_y_continuous(NULL, breaks = NULL)
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = exam_data, aes(x = MATHS))+
  geom_density()

ggplot(data = exam_data, aes(y= MATHS, x = GENDER)) +
  geom_boxplot()

ggplot(data = exam_data, aes(y= MATHS, x = GENDER)) +
  geom_boxplot() + geom_point(position = "jitter", size = 0.5)

ggplot(data = exam_data, aes(y = MATHS, x = GENDER)) + 
  geom_violin()

ggplot(data = exam_data, aes(y = ENGLISH, x = MATHS)) +
      geom_point()

ggplot(data = exam_data, aes(y= MATHS, x = GENDER)) +
  geom_boxplot() + stat_summary(geom = "point",
                                fun.y = "mean",
                                colour = "red",
                                size = 4)
## Warning: `fun.y` is deprecated. Use `fun` instead.

ggplot(data = exam_data, aes(y = MATHS, x = GENDER)) +
      geom_boxplot() +
      geom_point(stat = "summary",
                 color = "red",
                 size = 4)
## No summary function supplied, defaulting to `mean_se()`

ggplot(data = exam_data, aes(x = MATHS, y = ENGLISH)) +
  geom_point() +
  geom_smooth(size = 0.5, method = "lm")
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = exam_data, aes(x = RACE, y = MATHS)) + 
  stat_summary(geom = "bar",
               fun.y = "mean",
               fill = "lightblue") +
  stat_summary(geom = "errorbar",
               color = "black",
               width = 0.7)
## Warning: `fun.y` is deprecated. Use `fun` instead.
## No summary function supplied, defaulting to `mean_se()`

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram(bins = 20, color = "black", fill = "lightblue") +
  facet_wrap(~CLASS)

ggplot(data = exam_data, aes(x = MATHS)) +
  geom_histogram(bins = 20, color = "black", fill = "lightblue") +
  facet_grid(~CLASS)

ggplot(data = exam_data, aes(y= MATHS, x = CLASS)) +
  geom_boxplot() +
  facet_grid(~GENDER)

ggplot(data = exam_data, aes(y= MATHS, x = CLASS)) +
  geom_boxplot() +
  facet_grid(GENDER ~.)

#facet_grid(rows = vars(GENDER))
ggplot(data = exam_data, aes(y= MATHS, x = GENDER)) +
  geom_boxplot() +
  facet_grid(GENDER ~ CLASS)

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip()

ggplot(data = exam_data, aes(x = MATHS, y = ENGLISH)) +
  geom_point() +
  geom_smooth(method = "lm", size = 0.5) +
  coord_cartesian(xlim = c(0,100), ylim = c(0,100))
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip() +
  theme_classic()

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip() +
  theme_minimal()

ggplot(data = exam_data, aes(x = RACE)) +
  geom_bar() +
  coord_flip() +
  theme(panel.background = element_rect(fill = "beige"),
  panel.grid.major = element_line(size = 0.5,
                                  linetype = 'solid', 
                                  colour = "white"),
  axis.text = element_text(colour = "red", 
                           size = rel(0.8)),
  title = element_text(colour = "grey20", size = rel(1)))

ggplot(data = exam_data, aes(x = reorder(RACE,MATHS), y = MATHS)) +
  stat_summary(geom = "bar",
               fun.y = "mean", 
               fill = "lightblue")
## Warning: `fun.y` is deprecated. Use `fun` instead.

#-MATHS for descending
ggplot(data = exam_data, aes(x = ENGLISH)) +
  geom_histogram(bins = 20) + 
  facet_grid(~GENDER)

data_background <- data.frame(ENGLISH = exam_data[,5], ID = exam_data[,1])
ggplot() +
  geom_histogram(data = data_background, aes(x = ENGLISH), bins = 20, alpha = 0.3, colour = "grey") +
  geom_histogram(data = exam_data, aes(x = ENGLISH, fill = GENDER), bins = 20, 
                 colour = "black") +
  facet_grid(~GENDER)

ggplot(data = exam_data, aes(y = ENGLISH, x = MATHS)) +
      geom_point() +
      coord_cartesian(xlim = c(0,100), ylim = c(0,100))