library(ggplot2)
p <- ggplot(data = mpg, mapping = aes(x = cyl , y = hwy))
p + geom_point(alpha = 0.3) # overlay problem
# add point
p + geom_point(alpha = 0.3) + geom_point(data = data.frame(cyl = mean(mpg$cyl), hwy = mean(mpg$hwy)), color = "red", size = 5)
# Since cyl is not discrete category but continuous variable, cyl is wrapped in factor() for coloring.
# mpg <- transform(mpg,cyl = as.factor(cyl))
# ggplot(data = mpg, mapping = aes(x = cyl , y = hwy)) + geom_jitter(width = 0.6, aes(color = cyl))
p + geom_jitter(width = 0.6, aes(color = factor(cyl) )) +
geom_text(aes(label = hwy), size = 3, color = "blue") +
geom_text(x = 6, y = 40,label = "Jitters...", col = "red")
# conditional distribution
ggplot(data = mpg, mapping = aes(x = factor(cyl), y = hwy)) + geom_violin()
Read the dataset “foodData.csv” in Fronter and finish the following task: (Data source:“https://www.kaggle.com/openfoodfacts/world-food-facts”)
subset(),%in%dt <- read.csv("foodData.csv")
dt1 <- subset(dt, pnns_groups_1 %in% c("Beverages", "Fruits and vegetables", "Salty snacks", "Sugary snacks"))
geom_point()"stat_smooth()" library(ggplot2)
ggplot(data = mpg, mapping = aes(x = manufacturer, y = hwy)) +
geom_boxplot() + ggtitle(expression(paste(alpha, " vs. ", beta^2) )) +
theme(text = element_text(size = 10, family = "Comic Sans MS"))
geom_histogram()geom_density()produce the bar plot for food category (pnns_groups_1) where each bin is filled with its sub-category (pnns_groups_2)
geom_bar()position = "dodge"; "fill"; "stack"For bar plot, the argument of stat have two optional values:
stat = "identity" is used when the frequency in data is providedstat = "bins" is used by default when the frequency is not provided# Calculate the frequency before plot
tableData <- data.frame(table(dt1$pnns_groups_1))
ggplot(tableData, aes( Var1 , Freq)) + geom_bar(stat = "identity")
tableData2 <- data.frame(table(dt1$pnns_groups_1, dt1$pnns_groups_2))
ggplot(tableData2, aes( Var1 , Freq)) + geom_bar(stat = "identity", aes(fill = Var2))
Use the dataset “economics” plot time series
ggplot(data = economics, aes(date, unemploy)) + geom_line() +
geom_vline(xintercept = 1990, linetype = "dashed", color = "red") +
geom_hline(yintercept = 8000, linetype = 3, color = "blue")
Lay out panels in a grid
p.base2 <- ggplot(mpg, aes(displ, cty)) + geom_point()
p.base2 + facet_grid(. ~ cyl)
p.base2 + facet_grid(cyl ~.)
p.base2 + facet_grid(drv ~ cyl)
p.base2 + facet_grid(drv ~ cyl) + geom_point(data = data.frame("displ" = 4, "cty" = 20), color = "red", size = 3)
Use facet_grid() to create following layout, and add the blue points to represent the mean value.
Create an annotation layer
p.base2 <- ggplot(mpg, aes(displ, cty)) + geom_point()
p.base2 + annotate(geom = "text", x = 4, y = 25, label = "Some text")
p.base2 + annotate(geom = "rect", xmin = 3, xmax = 5, ymin = 10, ymax = 20,
alpha = .2)
p.base2 + annotate("segment", x = 3, xend = 5, y = 10, yend = 20,
colour = "blue")
geom_text()p.base2 + geom_text(aes(label = manufacturer, color = manufacturer ), size = 2)
themes for background
p.base2 + theme_classic(base_size = 12, base_family = "Times New Roman")
p.base2 + theme_gray(base_size = 12, base_family = "Times New Roman")
$ 5. Interactive plot
#install.packages("plotly")
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.2
ggplotly(p.base2)
plot.gg <- ggplot(dt1, aes(x = sugars_100g)) + geom_histogram(bins = 15, color = "black", aes(fill = pnns_groups_1))
ggplotly(plot.gg)
p <- plot_ly(midwest, x = ~percollege, color = ~state, type = "box")
p