library(ggplot2)
x <- runif(100)
y <- x^2 + 0.2*x
ggplot(data.frame(x=x, y=y), aes(x=x, y=y)) + geom_line()
customer_data <- read.table('custdata.tsv', header=T, sep='\t')
customer_data2 <- subset(customer_data,
(customer_data$age > 0 &
customer_data$age <100 &
customer_data$income >0))
cor(customer_data2$age, customer_data2$income)
## [1] -0.02240845
ggplot(customer_data2, aes(x=age, y=income)) +
geom_point() + ylim(0, 200000)
## Warning: Removed 32 rows containing missing values (geom_point).
Notice the stat_smooth(method="lm") here.
ggplot(customer_data2, aes(x=age, y=income)) +
geom_point() + stat_smooth(method="lm") +
ylim(0, 200000)
## Warning: Removed 32 rows containing missing values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).
Notice that we’re using geom_smooth() here.
ggplot(customer_data2, aes(x=age, y=income)) +
geom_point() + geom_smooth() +
ylim(0, 200000)
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 32 rows containing missing values (stat_smooth).
## Warning: Removed 32 rows containing missing values (geom_point).
ggplot(customer_data2, aes(x=age, y=as.numeric(health.ins))) +
geom_point(position=position_jitter(w=0.05, h=0.05)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
Above we didn’t have too many points to plot so its still legible, but what if we had too many? That’s where hexbin plots come in.
library(hexbin)
ggplot(customer_data2, aes(x=age, y=income)) +
geom_hex(binwidth=c(5, 10000)) +
geom_smooth(color="white", se=F) +
ylim(0, 200000)
## Warning: Removed 32 rows containing missing values (stat_hexbin).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 32 rows containing missing values (stat_smooth).
Stacked
ggplot(customer_data) +
geom_bar(aes(x=marital.stat, fill=health.ins))
Side-by-side
ggplot(customer_data) +
geom_bar(aes(x=marital.stat, fill=health.ins), position="dodge")
Filled bar chart
ggplot(customer_data) +
geom_bar(aes(x=marital.stat, fill=health.ins), position="fill")
Adding a rug
ggplot(customer_data, aes(x=marital.stat)) +
geom_bar(aes(fill=health.ins), position="fill") +
geom_point(aes(y=-0.05), size=0.75, alpha=0.3, position=position_jitter(h=0.01))
ggplot(customer_data2) +
geom_bar(aes(x=housing.type, fill=marital.stat), position="dodge") +
theme(axis.text.x=element_text(angle=45, hjust=1))
Faceted bar chart
ggplot(customer_data2) +
geom_bar(aes(x=marital.stat), position="dodge", fill="darkgray") +
facet_wrap(~housing.type, scales="free_y") +
theme(axis.text.x = element_text(angle=45, hjust=1))