J.Korn
January 22, 2019
library(gridExtra)
library(grid)
library(ggplot2)
library(datasets)
library(dplyr)
library(nycflights13)
library(reshape2)mtcars$cyl <- as.factor(mtcars$cyl)library(printr)
library(knitr)
knitr::kable(str(mtcars), digits = 2, align = "l")## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
library(ggplot2)
# Basic scatter plot
ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()# Change the point size, and shape
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(size=2, shape=23)# Change the point size
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(aes(size=qsec))ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point() +
geom_text(label=rownames(mtcars))The functions below can be used to add regression lines to a scatter plot:
# Add the regression line
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point()+
geom_smooth(method=lm)# Change the point colors and shapes
# Change the line type and color
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(shape=18, color="blue")+
geom_smooth(method=lm, se=FALSE, linetype="dashed",
color="darkred")# Change the confidence interval fill color
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(shape=18, color="blue")+
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue")# Change point shapes by the levels of cyl
ggplot(mtcars, aes(x=wt, y=mpg, shape=cyl)) +
geom_point()# Change point shapes and colors
ggplot(mtcars, aes(x=wt, y=mpg, shape=cyl, color=cyl)) +
geom_point()# Change point shapes, colors and sizes
ggplot(mtcars, aes(x=wt, y=mpg, shape=cyl, color=cyl, size=cyl)) +
geom_point()# Add regression lines
ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
geom_point() +
geom_smooth(method=lm)# Remove confidence intervals
# Extend the regression lines
ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
geom_point() +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
geom_point() +
geom_smooth(method=lm, aes(fill=cyl))#filter Newark temperature in fist 15 days of January
df <- weather %>% filter(month == 1 & day <= 15)library(printr)
library(knitr)
knitr::kable(str(df), digits = 2, align = "l")## Classes 'tbl_df', 'tbl' and 'data.frame': 1074 obs. of 15 variables:
## $ origin : chr "EWR" "EWR" "EWR" "EWR" ...
## $ year : num 2013 2013 2013 2013 2013 ...
## $ month : num 1 1 1 1 1 1 1 1 1 1 ...
## $ day : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hour : int 1 2 3 4 5 6 7 8 9 10 ...
## $ temp : num 39 39 39 39.9 39 ...
## $ dewp : num 26.1 27 28 28 28 ...
## $ humid : num 59.4 61.6 64.4 62.2 64.4 ...
## $ wind_dir : num 270 250 240 250 260 240 240 250 260 260 ...
## $ wind_speed: num 10.36 8.06 11.51 12.66 12.66 ...
## $ wind_gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ precip : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pressure : num 1012 1012 1012 1012 1012 ...
## $ visib : num 10 10 10 10 10 10 10 10 10 10 ...
## $ time_hour : POSIXct, format: "2013-01-01 01:00:00" "2013-01-01 02:00:00" ...
#Linegraph of hourly temperature using geom_line()
ggplot(data = df, mapping = aes(x = time_hour, y = temp)) +
geom_line()ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=1)) +
geom_line(linetype = "dashed")+
geom_point()ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=1)) +
geom_line(color="red")+
geom_point()ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=1)) +
geom_line(arrow = arrow())+
geom_point()ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
geom_line()+
geom_point()ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
geom_line(linetype="dashed", color="blue", size=1.2)+
geom_point(color="red", size=3)ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
geom_line(aes(linetype=origin))+
geom_point()ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
geom_line(aes(linetype=origin))+
geom_point(aes(shape=origin))ggplot(data=df, mapping = aes(x = time_hour, y = temp, group=origin)) +
geom_line(aes(color=origin))+
geom_point(aes(color=origin))df = tips
df = data.frame(
time = factor(c("Lunch","Dinner"), levels=c("Lunch","Dinner")),
total_bill = c(14.89, 17.23)
)## 'data.frame': 2 obs. of 2 variables:
## $ time : Factor w/ 2 levels "Lunch","Dinner": 1 2
## $ total_bill: num 14.9 17.2
ggplot(data=df, aes(x=time, y=total_bill)) +
geom_bar(stat="identity")ggplot(data=df, aes(x=time, y=total_bill, fill=time)) +
geom_bar(stat="identity")ggplot(data=df, aes(x=time, y=total_bill, fill=time)) +
geom_bar(colour="black", stat="identity")To get a bar graph of counts, don’t map a variable to y, and use stat=“bin” (which is the default) instead of stat=“identity”:
ggplot(data=tips, aes(x=day)) +
geom_bar(stat="count")dat1 <- data.frame(
sex = factor(c("Female","Female","Male","Male")),
time = factor(c("Lunch","Dinner","Lunch","Dinner"), levels=c("Lunch","Dinner")),
total_bill = c(13.53, 16.81, 16.24, 17.42)
)## 'data.frame': 4 obs. of 3 variables:
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 2 2
## $ time : Factor w/ 2 levels "Lunch","Dinner": 1 2 1 2
## $ total_bill: num 13.5 16.8 16.2 17.4
ggplot(data=dat1, aes(x=time, y=total_bill, fill=sex)) +
geom_bar(stat="identity")ggplot(data=dat1, aes(x=time, y=total_bill, fill=sex)) +
geom_bar(stat="identity", position=position_dodge())mtcars$cyl <- as.factor(mtcars$cyl)## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
qplot(mtcars$mpg, geom="histogram") ggplot(data=mtcars, aes(mtcars$mpg)) + geom_histogram() ggplot(data=mtcars, aes(mtcars$mpg)) + geom_histogram() + geom_density(col=3)ggplot(data=mtcars, aes(mtcars$mpg)) + geom_histogram(stat = "bin", binwidth=1)p1 = ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point(size=2, shape=23)
p2 = qplot(mtcars$mpg, geom="histogram") grid.arrange(p1, p2, nrow = 1)p1 <- ggplot(mtcars, aes(mpg, wt, colour = factor(cyl))) +
geom_point()
p2 <- ggplot(mtcars, aes(mpg, wt, colour = factor(cyl))) +
geom_point() + facet_wrap(~ cyl, ncol = 2, scales = "free") +
guides(colour = "none") +
theme()grid.arrange(p1, p2, nrow = 1)