#dataset mtcars

library(ggplot2)
data(mtcars)

str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

#Mapping Variables onto aesthetics

# write functions inside the aes variable 
## aes(x=_, y=_, ___)
#use + geom_text(aes(label=variable))
##creates different numbers on plot to the variable 
ggplot(mtcars, aes(wt, mpg))+geom_text(aes(label = cyl))

#Setting Attributes of the plot

##remember: all attributes go on the inside of the specified graph
###i.e.geom_point(______)
#use color with "" 
##changes all points to the color yellow 
ggplot(mtcars, aes(wt, mpg, color = fcyl)) + geom_point(shape=24, color="yellow")

#positions go inside the geom_plot(_______) identity - regular form for scatter dodge - bar graphs & box plots (side by side bars) stack - bar graphs (stacked bar graphs) fill - make the graphs all the way to the top jitter - add noise around each point (no point is the same) jitterdodge nudge

##functions using iris data set

data(iris)

str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#default position for scatter plots
data("mtcars")

ggplot(mtcars, aes(mpg)) + geom_bar(position="identity")

ggplot(mtcars, aes(mpg)) + geom_bar()

#set arguments for the position 
#consistency across plots and layers
##used for scatter plots 
##prevents overlapping of points 

ggplot(mtcars, aes(mpg, cyl, alpha=0.5)) + geom_point()

ggplot(mtcars, aes(mpg, cyl)) + geom_point(alpha=0.5)

ggplot(mtcars, aes(mpg, cyl)) + geom_point(position="jitter")

#add a jitter layer 
##use for over plotting layer 

#ggplot(Vocab, aes(education, vocabulary)) +
  geom_jitter(alpha = 0.2, shape=1)
## geom_point: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_jitter
#use 

ggplot(mtcars, aes(mpg, cyl)) + geom_point(alpha=0.5)

# use + ylim (lower, upper)
ggplot(mtcars, aes(mpg, 0)) + 
  geom_point(position="jitter") +
  ylim(-2,2)

#Bar Graphs using mt cars

#use labs()
ggplot(mtcars, aes(disp, fill = cyl)) +
  geom_bar() +
labs(x="Number of Cylinders", y="Count")

#overplotting large datasets

# Be aware of over plotting when...
## Large data sets 
## Aligned values on a single axis
## Low-precision data 
## Integer data 
##bigger the number, bigger the shape size 
data(diamonds)

ggplot(diamonds, aes(carat, price, color = clarity)) +
  geom_point(alpha = 0.5, shape = 9)

ggplot(diamonds, aes(carat, price, color = clarity)) +
  geom_point(alpha = 0.5, shape = 18)

data(mpg)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) + 
  facet_wrap(~ class, ncol = 2)

ggplot(mpg, aes(x=displ, y=hwy)) + 
  geom_smooth(se=FALSE, method="lm") +
  geom_point()
## `geom_smooth()` using formula 'y ~ x'

ggplot(mpg, aes(x=displ, y=hwy)) + 
  geom_smooth(se=TRUE ) +
  geom_point()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#Histogram

#single aes (X) 
##continuous variable that is cut up into discrete bins 
#make bars wider 
ggplot(mtcars, aes(mpg)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(mtcars, aes(mpg)) + 
  geom_histogram(binwidth=2)

#use fill=___
ggplot(mtcars, aes(mpg)) + 
  geom_histogram(fill="purple")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#positions for histograms stack - default dodge - bars for different groups side by side fill - bars for diff groups shown as proportions identity - plot values as they appear in dataset

#use to fill bars up to y-axis 
##but, some bars of lower values are removed 

ggplot(mtcars, aes(mpg)) +
    geom_histogram(binwidth = 1)

ggplot(mtcars, aes(mpg)) +
    geom_histogram(binwidth = 1, position = "fill")
## Warning: Removed 8 rows containing missing values (geom_bar).