cancer <- read.csv("Rdatasets-master/csv/survival/cancer.csv")
head(cancer)
## X inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 1 3 306 2 74 1 1 90 100 1175 NA
## 2 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 3 1010 1 56 1 0 90 90 NA 15
## 4 4 5 210 2 57 1 1 90 60 1150 11
## 5 5 1 883 2 60 1 0 100 90 NA 0
## 6 6 12 1022 1 74 1 1 50 80 513 0
# Convert age column from a numeric to a factor variable
cancer$age <- as.factor(cancer$age)
head(cancer)
## X inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 1 3 306 2 74 1 1 90 100 1175 NA
## 2 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 3 1010 1 56 1 0 90 90 NA 15
## 4 4 5 210 2 57 1 1 90 60 1150 11
## 5 5 1 883 2 60 1 0 100 90 NA 0
## 6 6 12 1022 1 74 1 1 50 80 513 0
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(mosaic)
## Warning: package 'mosaic' was built under R version 3.4.3
## Loading required package: lattice
## Loading required package: ggformula
## Warning: package 'ggformula' was built under R version 3.4.2
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Warning: package 'mosaicData' was built under R version 3.4.2
## Loading required package: Matrix
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
cancer <- read.csv("Rdatasets-master/csv/survival/cancer.csv")
head(cancer)
## X inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 1 3 306 2 74 1 1 90 100 1175 NA
## 2 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 3 1010 1 56 1 0 90 90 NA 15
## 4 4 5 210 2 57 1 1 90 60 1150 11
## 5 5 1 883 2 60 1 0 100 90 NA 0
## 6 6 12 1022 1 74 1 1 50 80 513 0
library(ggplot2)
# Basic scatter plot
ggplot(cancer, aes(x=time, y=age)) + geom_point()

# Change the point size, and shape
ggplot(cancer, aes(x=time, y=age)) +
geom_point(size=2, shape=23)

ggplot(cancer, aes(x=time, y=age)) +
geom_point() +
geom_text(label=rownames(cancer))

geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95)
## geom_smooth: na.rm = FALSE
## stat_smooth: na.rm = FALSE, fullrange = FALSE, level = 0.95, method = auto, formula = y ~ x, se = TRUE
## position_identity
# Add the regression line
ggplot(cancer, aes(x=time, y=age)) +
geom_point()+
geom_smooth(method=lm)

# Remove the confidence interval
ggplot(cancer, aes(x=time, y=age)) +
geom_point()+
geom_smooth(method=lm, se=FALSE)

# Loess method
ggplot(cancer, aes(x=time, y=age)) +
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'loess'

# Change the point colors and shapes
# Change the line type and color
ggplot(cancer, aes(x=time, y=age)) +
geom_point(shape=18, color="blue")+
geom_smooth(method=lm, se=FALSE, linetype="dashed",
color="darkred")

# Change the confidence interval fill color
ggplot(cancer, aes(x=time, y=age)) +
geom_point(shape=18, color="blue")+
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue")

geom_rug(sides ="bl")
## geom_rug: sides = bl, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
# Add marginal rugs
ggplot(cancer, aes(x=time, y=age)) +
geom_point() + geom_rug()

# Change colors
ggplot(cancer, aes(x=time, y=age, color=age)) +
geom_point() + geom_rug()

# Add marginal rugs using faithful data
ggplot(faithful, aes(x=eruptions, y=waiting)) +
geom_point() + geom_rug()

p <- ggplot(cancer, aes(x=time, y=age, color=age, shape=age)) +
geom_point() +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)+
theme_classic()
# Use brewer color palettes
geom_rug(sides ="bl")
## geom_rug: sides = bl, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
geom_rug(sides ="bl")
## geom_rug: sides = bl, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
# Add marginal rugs
ggplot(cancer, aes(x=time, y=age)) +
geom_point() + geom_rug()

# Change colors
ggplot(cancer, aes(x=time, y=age, color="charteruse")) +
geom_point() + geom_rug()

# Add marginal rugs using faithful data
ggplot(cancer, aes(x=time, y=age)) +
geom_point() + geom_rug()

# Scatter plot with the 2d density estimation
sp <- ggplot(cancer, aes(x=time, y=age)) +
geom_point()
sp + geom_density_2d()

# Gradient color
sp + stat_density_2d(aes(fill = ..level..), geom="polygon")

# Change the gradient color
sp + stat_density_2d(aes(fill = ..level..), geom="polygon")+
scale_fill_gradient(low="blue", high="red")

# One ellipse arround all points
ggplot(faithful, aes(waiting, eruptions))+
geom_point()+
stat_ellipse()

# Ellipse by groups
p <- ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
geom_point()
p + stat_ellipse()

# Change the type of ellipses: possible values are "t", "norm", "euclid"
p + stat_ellipse(type = "norm")

head(cancer)
## X inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 1 3 306 2 74 1 1 90 100 1175 NA
## 2 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 3 1010 1 56 1 0 90 90 NA 15
## 4 4 5 210 2 57 1 1 90 60 1150 11
## 5 5 1 883 2 60 1 0 100 90 NA 0
## 6 6 12 1022 1 74 1 1 50 80 513 0
# Plot
p <- ggplot(cancer, aes(time, age))
p + geom_bin2d()

set.seed(1234)
x <- c(rnorm(500, mean = -1), rnorm(500, mean = 1.5))
y <- c(rnorm(500, mean = 1), rnorm(500, mean = 1.7))
group <- as.factor(rep(c(1,2), each=500))
df <- data.frame(x, y, group)
head(df)
## x y group
## 1 -2.20706575 -0.2053334 1
## 2 -0.72257076 1.3014667 1
## 3 0.08444118 -0.5391452 1
## 4 -3.34569770 1.6353707 1
## 5 -0.57087531 1.7029518 1
## 6 -0.49394411 -0.9058829 1
head(cancer)
## X inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 1 3 306 2 74 1 1 90 100 1175 NA
## 2 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 3 1010 1 56 1 0 90 90 NA 15
## 4 4 5 210 2 57 1 1 90 60 1150 11
## 5 5 1 883 2 60 1 0 100 90 NA 0
## 6 6 12 1022 1 74 1 1 50 80 513 0