#dataset mtcars
library(ggplot2)
data(mtcars)
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
#Mapping Variables onto aesthetics
# write functions inside the aes variable
## aes(x=_, y=_, ___)
#use + geom_text(aes(label=variable))
##creates different numbers on plot to the variable
ggplot(mtcars, aes(wt, mpg))+geom_text(aes(label = cyl))
#Setting Attributes of the plot
##remember: all attributes go on the inside of the specified graph
###i.e.geom_point(______)
#use color with ""
##changes all points to the color yellow
ggplot(mtcars, aes(wt, mpg, color = fcyl)) + geom_point(shape=24, color="yellow")
#positions go inside the geom_plot(_______) identity - regular form for scatter dodge - bar graphs & box plots (side by side bars) stack - bar graphs (stacked bar graphs) fill - make the graphs all the way to the top jitter - add noise around each point (no point is the same) jitterdodge nudge
##functions using iris data set
data(iris)
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#default position for scatter plots
data("mtcars")
ggplot(mtcars, aes(mpg)) + geom_bar(position="identity")
ggplot(mtcars, aes(mpg)) + geom_bar()
#set arguments for the position
#consistency across plots and layers
##used for scatter plots
##prevents overlapping of points
ggplot(mtcars, aes(mpg, cyl, alpha=0.5)) + geom_point()
ggplot(mtcars, aes(mpg, cyl)) + geom_point(alpha=0.5)
ggplot(mtcars, aes(mpg, cyl)) + geom_point(position="jitter")
#add a jitter layer
##use for over plotting layer
#ggplot(Vocab, aes(education, vocabulary)) +
geom_jitter(alpha = 0.2, shape=1)
## geom_point: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_jitter
#use
ggplot(mtcars, aes(mpg, cyl)) + geom_point(alpha=0.5)
# use + ylim (lower, upper)
ggplot(mtcars, aes(mpg, 0)) +
geom_point(position="jitter") +
ylim(-2,2)
#Bar Graphs using mt cars
#use labs()
ggplot(mtcars, aes(disp, fill = cyl)) +
geom_bar() +
labs(x="Number of Cylinders", y="Count")
#overplotting large datasets
# Be aware of over plotting when...
## Large data sets
## Aligned values on a single axis
## Low-precision data
## Integer data
##bigger the number, bigger the shape size
data(diamonds)
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point(alpha = 0.5, shape = 9)
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point(alpha = 0.5, shape = 18)
data(mpg)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, ncol = 2)
ggplot(mpg, aes(x=displ, y=hwy)) +
geom_smooth(se=FALSE, method="lm") +
geom_point()
## `geom_smooth()` using formula 'y ~ x'
ggplot(mpg, aes(x=displ, y=hwy)) +
geom_smooth(se=TRUE ) +
geom_point()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Histogram
#single aes (X)
##continuous variable that is cut up into discrete bins
#make bars wider
ggplot(mtcars, aes(mpg)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(mtcars, aes(mpg)) +
geom_histogram(binwidth=2)
#use fill=___
ggplot(mtcars, aes(mpg)) +
geom_histogram(fill="purple")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#positions for histograms stack - default dodge - bars for different groups side by side fill - bars for diff groups shown as proportions identity - plot values as they appear in dataset
#use to fill bars up to y-axis
##but, some bars of lower values are removed
ggplot(mtcars, aes(mpg)) +
geom_histogram(binwidth = 1)
ggplot(mtcars, aes(mpg)) +
geom_histogram(binwidth = 1, position = "fill")
## Warning: Removed 8 rows containing missing values (geom_bar).