knitr::opts_chunk$set(fig.width=4, fig.height=2)
library(ggplot2) #load library
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color= drv)) +
geom_smooth(mapping = aes(linetype = drv), method = 'loess') +
geom_point() + theme_classic()
-base plotting system -lattice package -ggplot2 package -except there are others (plotly, heatmap.2, igraph)
library(gapminder)
dim(gapminder)
## [1] 1704 6
str(gapminder)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
View(gapminder)
plot(gapminder)
plot(gapminder$year, gapminder$lifeExp)
boxplot(lifeExp~year, data=gapminder)
hist(gapminder$lifeExp)
library(ggplot2)
ggplot(gapminder)
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) ###these are global options/values
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) + ###these are global options/values
geom_point() ####options can be put here. they will only be applied to this ("geom") layer
ggplot(data = gapminder) +
geom_point( mapping = aes(x = gdpPercap, y = lifeExp)) ##moved to local geom layer
##don't have to supply all that info
##this works too
ggplot(gapminder, aes(gdpPercap,lifeExp)) +
geom_point()
names(gapminder)
## [1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
ggplot(gapminder, aes(year, lifeExp)) +
geom_point()
Modify the code from the previous challenge to color the points by the “continent” column. What trends do you see in the data? Are they what you expected?
ggplot(gapminder, aes(x = year, y = lifeExp, color = continent)) +
geom_point()
too hard to look at. change it to a line plot
ggplot(gapminder, aes(x=year, y=lifeExp, by=country, color=continent)) +
geom_line() ##see here, was "geom_point()
##"by" draws a line for every country
##ggplot Layers
##can keep adding layers
ggplot(gapminder, aes(x=year, y=lifeExp, by=country, color=continent)) +
geom_line() + ###this layer gets drawn first
geom_point() ###this one is drawn on top
ggplot(gapminder, aes(x=year, y=lifeExp, by=country)) +
geom_line(aes(color=continent)) + ###moved global "continent" option to local
geom_point() ##no aes means default, which is black
these black dots seem sort of pointless to me. here is an example that might actually be needed
ggplot(gapminder,aes(x=gdpPercap, y=lifeExp, color=continent)) +
geom_point()
#ggplot(gapminder,aes(x=gdpPercap, y=lifeExp, color=continent)) +
# geom_point() +
# geom_histogram() ###histograms don't want y-variables
ggplot(gapminder) +
geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
geom_histogram(aes(x=gdpPercap))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(gapminder) +
geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
geom_histogram(aes(x=gdpPercap), bins=400, color=NA, fill="gray66") + ###histograms don't want y-variables ##also, manually set the color. Notice that it is not in aes()
theme_classic()
Modify the code so that you can see the dots.
ggplot(gapminder) +
geom_histogram(aes(x=gdpPercap), bins=400, color=NA, fill="gray66") +
geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
theme_classic()
ggplot(gapminder) +
geom_point(aes(x=gdpPercap, y=lifeExp, color=continent)) +
geom_histogram(aes(x=gdpPercap), bins=400, color=NA, fill="gray66", alpha=0.5) +
theme_classic()
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point() +
theme_classic()
###large gdp countries make it hard to see the rest of the points, but seems
###to be some relationship between gdp and lifeexp
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point(alpha = 0.5) +
scale_x_log10()
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
geom_smooth(method="lm") ##lm = linear model
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
geom_smooth(method="lm", se=F, size=1.5, color="black") +
theme_classic()
r2 <- round(summary(lm(lifeExp~gdpPercap, data=gapminder))$adj.r.squared, digits = 2)
p <- formatC(summary(lm(lifeExp~gdpPercap, data=gapminder))$coefficients[,4][2], format="e", digits = 2)
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
geom_smooth(method="lm", se=F, size=1.5, color="black") +
annotate("text", 1000, 84, label=paste("P=",p,", ","R2=",r2,sep="")) +
theme_classic()
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point(alpha = 0.5, color="#756bb1") + ###hex colors make it easy to keep illustrator/R consistent
scale_x_log10() +
geom_smooth(method="lm", se=F, size=1.5, color="black") +
theme_classic()
ggplot(gapminder, aes(gdpPercap, lifeExp, color=continent)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
geom_smooth(method="lm", se=F, size=1.5, color="black") +
theme_classic()
ggplot(gapminder, aes(gdpPercap, lifeExp, color=continent, group=continent)) + ##what happened!!??
geom_point(alpha = 0.5) +
scale_x_log10() +
geom_smooth(method="lm", se=F, size=1.5) +
theme_classic()
##use cowplot (Claus O. Wilke plot) https://serialmentor.com/dataviz/
install.packages("cowplot")
## Installing package into '/home/jt/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
p1 <- ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
geom_smooth(method="lm", se=F, size=1.5, color="black") +
annotate("text", 1000, 84, label=paste("P=",p,", ","R2=",r2,sep="")) +
theme_classic()
p2 <- ggplot(gapminder, aes(gdpPercap, lifeExp, color=continent, group=continent)) + ##what happened!!??
geom_point(alpha = 0.5) +
scale_x_log10() +
geom_smooth(method="lm", se=F, size=1.5) +
theme_classic()
plot_grid(p1, p2, nrow=1)
##use facet_wrap (part of ggplot2)
ggplot(gapminder, aes(gdpPercap, lifeExp, group=country)) +
geom_line() +
facet_wrap( ~ continent, scales="free") + ####use the ~ to tell facet how to break up data
###scales="free" allows the axis to be different
theme(axis.text.x = element_text(angle = 90))
ggplot(gapminder, aes(gdpPercap, lifeExp, group=country)) +
geom_line() +
facet_wrap( ~ continent, scales="free") +
theme(axis.text.x = element_text(angle = 90)) +
labs(x="GDP (per capita)", y="Life expectance (years)", title="for Science")
bestplot <- ggplot(gapminder, aes(gdpPercap, lifeExp, group=country)) +
geom_line() +
facet_wrap( ~ continent, scales="free") +
theme(axis.text.x = element_text(angle = 90)) +
labs(x="GDP (per capita)", y="Life expectance (years)", title="for Science")
ggsave(filename = "~/Desktop/awesome.png", plot = bestplot, width = 12, height = 10, dpi = 300, units = "cm")
plot_grid(p1, bestplot, nrow=1)