rm(list=ls())
library(ggplot2)
m<-ggplot(data=mtcars, aes(x=wt, y=mpg))
m
# You can now use the 2nd layer of ggplot -a scatter plot # Also additional layers, of graph title, & axis labeling
n<-m+geom_point()
n+ggtitle("Scatter plot of Weight by mpg")+
ylab("Mile per gallon") + xlab("Weights in lbs ")
## Now use 2nd layer of plot with scatter points
m + geom_point(aes(wt, mpg), color="red")+
ggtitle("Spread of weight and mpg")+
xlab("weight of cars")+ylab("milage per gallon") #change the color
# You can change what type of plot you need, ### such as linear line of best fit
m+geom_point()+geom_smooth(method=lm)
m+geom_point()+geom_smooth() # lowess curve fit
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggsave("myscatter1.pdf") #save the graphic output
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#gradient scatter plot
sp3<-ggplot(mtcars, aes(x=wt, y=mpg, color=mpg)) + geom_point()
sp3
# Gradient between n colors
sp3+scale_color_gradientn(colours = rainbow(5))
#one dimensional continuous variable
#generating a categorical data of 100male & 100female
set.seed(1234)
df <- data.frame(
sex=factor(rep(c("F", "M"), each=100)),
weight=round(c(rnorm(100, mean=55, sd=5),
rnorm(100, mean=65, sd=5)))
)
head(df)
## sex weight
## 1 F 49
## 2 F 56
## 3 F 60
## 4 F 43
## 5 F 57
## 6 F 58
table
## function (..., exclude = if (useNA == "no") c(NA, NaN), useNA = c("no",
## "ifany", "always"), dnn = list.names(...), deparse.level = 1)
## {
## list.names <- function(...) {
## l <- as.list(substitute(list(...)))[-1L]
## nm <- names(l)
## fixup <- if (is.null(nm))
## seq_along(l)
## else nm == ""
## dep <- vapply(l[fixup], function(x) switch(deparse.level +
## 1, "", if (is.symbol(x)) as.character(x) else "",
## deparse(x, nlines = 1)[1L]), "")
## if (is.null(nm))
## dep
## else {
## nm[fixup] <- dep
## nm
## }
## }
## miss.use <- missing(useNA)
## miss.exc <- missing(exclude)
## useNA <- if (miss.use && !miss.exc && !match(NA, exclude,
## nomatch = 0L))
## "ifany"
## else match.arg(useNA)
## doNA <- useNA != "no"
## if (!miss.use && !miss.exc && doNA && match(NA, exclude,
## nomatch = 0L))
## warning("'exclude' containing NA and 'useNA' != \"no\"' are a bit contradicting")
## args <- list(...)
## if (!length(args))
## stop("nothing to tabulate")
## if (length(args) == 1L && is.list(args[[1L]])) {
## args <- args[[1L]]
## if (length(dnn) != length(args))
## dnn <- if (!is.null(argn <- names(args)))
## argn
## else paste(dnn[1L], seq_along(args), sep = ".")
## }
## bin <- 0L
## lens <- NULL
## dims <- integer()
## pd <- 1L
## dn <- NULL
## for (a in args) {
## if (is.null(lens))
## lens <- length(a)
## else if (length(a) != lens)
## stop("all arguments must have the same length")
## fact.a <- is.factor(a)
## if (doNA)
## aNA <- anyNA(a)
## if (!fact.a) {
## a0 <- a
## a <- factor(a, exclude = exclude)
## }
## add.na <- doNA
## if (add.na) {
## ifany <- (useNA == "ifany")
## anNAc <- anyNA(a)
## add.na <- if (!ifany || anNAc) {
## ll <- levels(a)
## if (add.ll <- !anyNA(ll)) {
## ll <- c(ll, NA)
## TRUE
## }
## else if (!ifany && !anNAc)
## FALSE
## else TRUE
## }
## else FALSE
## }
## if (add.na)
## a <- factor(a, levels = ll, exclude = NULL)
## else ll <- levels(a)
## a <- as.integer(a)
## if (fact.a && !miss.exc) {
## ll <- ll[keep <- which(match(ll, exclude, nomatch = 0L) ==
## 0L)]
## a <- match(a, keep)
## }
## else if (!fact.a && add.na) {
## if (ifany && !aNA && add.ll) {
## ll <- ll[!is.na(ll)]
## is.na(a) <- match(a0, c(exclude, NA), nomatch = 0L) >
## 0L
## }
## else {
## is.na(a) <- match(a0, exclude, nomatch = 0L) >
## 0L
## }
## }
## nl <- length(ll)
## dims <- c(dims, nl)
## if (prod(dims) > .Machine$integer.max)
## stop("attempt to make a table with >= 2^31 elements")
## dn <- c(dn, list(ll))
## bin <- bin + pd * (a - 1L)
## pd <- pd * nl
## }
## names(dn) <- dnn
## bin <- bin[!is.na(bin)]
## if (length(bin))
## bin <- bin + 1L
## y <- array(tabulate(bin, pd), dims, dimnames = dn)
## class(y) <- "table"
## y
## }
## <bytecode: 0x000000001527c750>
## <environment: namespace:base>
c<-ggplot(df,aes(x=weight))
c+geom_area(stat="bin",fill="red", color="blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
j<-c+geom_density()
j
# Change line color and fill color
ggplot(df, aes(x=weight))+
geom_density(color="darkblue", fill="lightblue")
#adding mean line
j+ geom_vline(aes(xintercept=mean(weight)),
color="blue", linetype="dashed", size=1)
# Change line type
ggplot(df, aes(x=weight))+
geom_density(linetype="dashed")
#calculate mean of each group
library(plyr) #'plyr' to calculate mean by groups
mu <- ddply(df, "sex", summarise, grp.mean=mean(weight))
head(mu)
## sex grp.mean
## 1 F 54.2
## 2 M 65.2
# Change density plot line colors by groups
ggplot(df, aes(x=weight, color=sex)) +
geom_density()
# Add mean lines
p<-ggplot(df, aes(x=weight, color=sex)) +
geom_density()+
geom_vline(data=mu, aes(xintercept=grp.mean, color=sex),
linetype="dashed")
p
#change legend position
p + theme(legend.position="top")
p + theme(legend.position="bottom")
p + theme(legend.position="none") # Remove legend
# Histogram & density plot, overlapped
h<-ggplot(df, aes(x=weight)) +
geom_histogram(aes(y=..density..))
h
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
h+ geom_density(alpha=.3, fill="#FF6666")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(df, aes(x=weight)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Color by groups
ggplot(df, aes(x=weight, color=sex, fill=sex)) +
geom_histogram(aes(y=..density..), alpha=0.5,
position="identity")+
geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#multiple panels using facet
k<-ggplot(df, aes(x=weight))+
geom_density()+facet_grid(sex ~ .)
k
# Add mean lines
k+geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
linetype="dashed")
#bar plots of data diamond
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, color = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))
#time series plot
t <- ggplot(data = economics, aes(x = date, y = unemploy))
t <- t + geom_line()
t
t1 <- t + geom_smooth(method = loess, span=1)
t1
data(singer, package="lattice")
ggplot(singer, aes(x=height)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(singer, aes(x=voice.part, y=height)) + geom_boxplot()
ggplot(mtcars, aes(x=wt, y=mpg, size=disp)) +
geom_point(shape=21, color="black", fill="cornsilk")
boxplot(mpg~ gear*carb, data=mtcars)
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1