# qplot - quick plot
#

# data input
dta <- read.table("hs0.txt", h=T)

# examine data 
str(dta)
## 'data.frame':    200 obs. of  11 variables:
##  $ id     : int  70 121 86 141 172 113 50 11 84 48 ...
##  $ female : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
##  $ race   : Factor w/ 4 levels "african-amer",..: 4 4 4 4 4 4 1 3 4 1 ...
##  $ ses    : Factor w/ 3 levels "high","low","middle": 2 3 1 1 3 3 3 3 3 3 ...
##  $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ prog   : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
##  $ read   : int  57 68 44 63 47 44 50 34 63 57 ...
##  $ write  : int  52 59 33 44 52 52 59 46 57 55 ...
##  $ math   : int  41 53 54 47 57 51 42 45 54 52 ...
##  $ science: int  47 63 58 53 53 63 53 39 58 NA ...
##  $ socst  : int  57 61 31 56 61 61 61 36 51 51 ...
# install ggplots
#install.packages("ggplot2")

# load it
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
### scatter plot

# quick plot math by read
qplot(dta$read, dta$math)

# ditto
qplot(read, math, data=dta)

# order ses properly
dta$ses <- ordered(dta$ses, levels=c("low", "middle", "high"))

## conditioning by changing aesthetic attributes

# math against read by ses in different colors
qplot(read, math, data=dta, color=ses)

# math against read by ses in different size
qplot(read, math, data=dta, size=ses)
## Warning: Using size for a discrete variable is not advised.

# math against read by ses in different shapes
qplot(read, math, data=dta, shape=ses)

# change intensity scale of plotting attribute - default point
# alpha controls ink intensity - transparency
qplot(read, math, data=dta, geom="jitter", alpha=I(1/2))

# math against read by ses in different colors
qplot(read, math, data=dta, color=ses,
      xlab="Reading score", ylab="Math score")

## facets

# add layer 2d-grid by column 
qplot(read, math, data=dta, facets = . ~ ses)

# add grid layer by row
qplot(read, math, data=dta, facets = race ~ .)

# add grid layer by row and column
qplot(read, math, data=dta, facets = ses ~ race)

## varying geometric objects

# points and smoothing
qplot(read, math, data=dta, geom=c("point", "smooth"))
## `geom_smooth()` using method = 'loess'

# adjust smoothing parameter
qplot(read, math, data=dta, geom=c("point", "smooth"), span=0.4)
## Warning: Ignoring unknown parameters: span
## `geom_smooth()` using method = 'loess'

# change method of smoothing to linear regression
qplot(read, math, data=dta, geom=c("point", "smooth"), method="lm",
      facets = race ~ ses,
      xlab="Reading score", ylab="Math score")
## Warning: Ignoring unknown parameters: method

### histograms

# contious data 
qplot(math, data=dta, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# change bindwidth, default = range/30
qplot(math, data=dta, geom="histogram", binwidth=3)

# density plot
qplot(math, data=dta, geom="density")

# adjust window size
qplot(math, data=dta, geom="density", adjust=0.3)

# histogram by ses in different colors
qplot(math, data=dta, geom="histogram", binwidth=3, fill=ses)

# density by ses in different colors
qplot(math, data=dta, geom="density", adjust=0.3, color=ses)

# math histogram by race in panels and sex in color
qplot(math, data=dta, geom="histogram", facets = race ~ . , 
      binwidth=2, fill=female) 

### boxplots

# y - continuous, x - discrete: dotplot
qplot(prog, math, data=dta)

# reorder by mean of y
qplot(reorder(prog, math), math, data=dta)

# jittered dotplot
qplot(reorder(race, math), math, data=dta, geom="jitter")

# boxplot
qplot(reorder(race, math), math, data=dta, geom="boxplot")

# both 
qplot(reorder(race, math), math, data=dta, geom=c("jitter", "boxplot"))

####