# Load MASS package
library(MASS)
# Plot whiteside data
plot(whiteside)
# Plot Gas vs. Temp
plot(whiteside$Temp, whiteside$Gas,
xlab = "Outside temperature",
ylab = "Heating gas consumption")
# Apply the plot() function to Insul
plot(whiteside$Insul)
points()
and lines()
to plotsYou can control the point shape, color, line type and things like that
Avoid pie charts. Use a bar chart instead
# Load the MASS package
library(MASS)
head(Cars93)
## Manufacturer Model Type Min.Price Price Max.Price MPG.city
## 1 Acura Integra Small 12.9 15.9 18.8 25
## 2 Acura Legend Midsize 29.2 33.9 38.7 18
## 3 Audi 90 Compact 25.9 29.1 32.3 20
## 4 Audi 100 Midsize 30.8 37.7 44.6 19
## 5 BMW 535i Midsize 23.7 30.0 36.2 22
## 6 Buick Century Midsize 14.2 15.7 17.3 22
## MPG.highway AirBags DriveTrain Cylinders EngineSize
## 1 31 None Front 4 1.8
## 2 25 Driver & Passenger Front 6 3.2
## 3 26 Driver only Front 6 2.8
## 4 26 Driver & Passenger Front 6 2.8
## 5 30 Driver only Rear 4 3.5
## 6 31 Driver only Front 4 2.2
## Horsepower RPM Rev.per.mile Man.trans.avail Fuel.tank.capacity
## 1 140 6300 2890 Yes 13.2
## 2 200 5500 2335 Yes 18.0
## 3 172 5500 2280 Yes 16.9
## 4 172 5500 2535 Yes 21.1
## 5 208 5700 2545 Yes 21.1
## 6 110 5200 2565 No 16.4
## Passengers Length Wheelbase Width Turn.circle Rear.seat.room
## 1 5 177 102 68 37 26.5
## 2 5 195 115 71 38 30.0
## 3 5 180 102 67 37 28.0
## 4 6 193 106 70 37 31.0
## 5 4 186 109 69 39 27.0
## 6 6 189 105 69 41 28.0
## Luggage.room Weight Origin Make
## 1 11 2705 non-USA Acura Integra
## 2 15 3560 non-USA Acura Legend
## 3 14 3375 non-USA Audi 90
## 4 17 3405 non-USA Audi 100
## 5 13 3640 non-USA BMW 535i
## 6 16 2880 USA Buick Century
# Plot Max.Price vs. Price as red triangles
plot(Cars93$Price, Cars93$Max.Price,
pch = 17,
col = 'red')
# Add Min.Price vs. Price as blue circles
points(Cars93$Price, Cars93$Min.Price,
pch = 16,
col = 'blue')
# Add an equality reference line with abline()
abline(a = 0, b = 1, lty = 2)
# Load the robustbase package
library(robustbase)
# Set up the side-by-side plot array
par(mfrow = c(1,2))
# First plot: brain vs. body in its original form
plot(Animals2$body, Animals2$brain)
# Add the first title
title("Original representation")
# Second plot: log-log plot of brain vs. body
plot(Animals2$body, Animals2$brain, log = "xy")
# Add the second title
title("Log-log plot")
# Load the insuranceData package
library(insuranceData)
# Use the data() function to get the dataCar data frame
data(dataCar)
head(dataCar)
## veh_value exposure clm numclaims claimcst0 veh_body veh_age gender area
## 1 1.06 0.3039014 0 0 0 HBACK 3 F C
## 2 1.03 0.6488706 0 0 0 HBACK 2 F A
## 3 3.26 0.5694730 0 0 0 UTE 2 F E
## 4 4.14 0.3175907 0 0 0 STNWG 2 F D
## 5 0.72 0.6488706 0 0 0 HBACK 4 F C
## 6 2.01 0.8542094 0 0 0 HDTOP 3 M C
## agecat X_OBSTAT_
## 1 2 01101 0 0 0
## 2 4 01101 0 0 0
## 3 2 01101 0 0 0
## 4 2 01101 0 0 0
## 5 2 01101 0 0 0
## 6 4 01101 0 0 0
# Set up a side-by-side plot array
par(mfrow = c(1,2))
# Create a table of veh_body record counts and sort
tbl <- sort(table(dataCar$veh_body),
decreasing = T)
tbl
##
## SEDAN HBACK STNWG UTE TRUCK HDTOP COUPE PANVN MIBUS MCARA CONVT BUS
## 22233 18915 16261 4586 1750 1579 780 752 717 127 81 48
## RDSTR
## 27
# Create the pie chart and give it a title
pie(tbl)
title("Pie chart")
# Create the barplot with perpendicular, half-sized labels
barplot(tbl, las = 2, cex.names = 0.5)
# Add a title
title("Bar chart")
hist()
is part of base R and its default option yields a histogram based on the number of times a record falls into each of the bins on which the histogram is based.truehist()
is from the MASS
package and scales these counts to give an estimate of the probability density.# Set up a side-by-side plot array
par(mfrow = c(1,2))
# Create a histogram of counts with hist()
hist(Cars93$Horsepower, main = "hist() plot")
# Create a normalized histogram with truehist()
truehist(Cars93$Horsepower, main = "truehist() plot")
# Create index16, pointing to 16-week chicks
head(ChickWeight)
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
index16 <- which(ChickWeight$Time == 16)
index16
## [1] 9 21 33 45 57 69 81 93 104 116 128 140 152 164 191 205 217
## [18] 229 241 253 265 277 289 301 313 325 337 349 361 373 385 397 409 421
## [35] 433 445 457 469 481 493 505 515 527 539 551 563 575
# Get the 16-week chick weights
weights <- ChickWeight$weight[index16]
weights
## [1] 149 162 163 136 197 155 218 126 93 101 182 162 71 227 113 106 98
## [18] 275 131 145 72 197 169 144 184 187 143 170 221 151 235 287 198 135
## [35] 192 170 215 175 204 197 145 147 173 168 222 184 205
# Plot the normalized histogram
truehist(weights)
# Add the density curve to the histogram
lines(density(weights))
# Load the car package to make qqPlot() available
library(car)
# Create index16, pointing to 16-week chicks
index16 <- which(ChickWeight$Time == 16)
# Get the 16-week chick weights
weights <- ChickWeight$weight[index16]
# Show the normal QQ-plot of the chick weights
qqPlot(weights)
# Show the normal QQ-plot of the Boston$tax data
qqPlot(Boston$tax)
# Set up a side-by-side plot array
par(mfrow = c(1,2))
# Create the standard scatterplot
plot(rad ~ zn, data = Boston)
# Add the title
title("Standard scatterplot")
# Create the sunflowerplot
sunflowerplot(rad ~ zn, data = Boston)
# Add the title
title("Sunflower plot")
# Create a variable-width boxplot with log y-axis & horizontal labels
boxplot(crim ~ rad,
data = Boston,
varwidth = T,
log = "y",
las = 1)
# Add a title
title("Crime rate vs. radial highway index")
# Create a mosaic plot using the formula interface
mosaicplot(carb ~ cyl,
data = mtcars,
color = T)
# Create a side-by-side boxplot summary
par(mfrow = c(1,2))
boxplot(Cars93$Min.Price, Cars93$Max.Price)
# Load aplpack to make the bagplot() function available
library(aplpack)
# Create a bagplot for the same two variables
bagplot(Cars93$Min.Price, Cars93$Max.Price,
cex = 1.20)
# Add an equality reference line
abline(0,1, lty = 2)
# Load the corrplot library for the corrplot() function
library(corrplot)
# Extract the numerical variables from UScereal
str(UScereal)
## 'data.frame': 65 obs. of 11 variables:
## $ mfr : Factor w/ 6 levels "G","K","N","P",..: 3 2 2 1 2 1 6 4 5 1 ...
## $ calories : num 212 212 100 147 110 ...
## $ protein : num 12.12 12.12 8 2.67 2 ...
## $ fat : num 3.03 3.03 0 2.67 0 ...
## $ sodium : num 394 788 280 240 125 ...
## $ fibre : num 30.3 27.3 28 2 1 ...
## $ carbo : num 15.2 21.2 16 14 11 ...
## $ sugars : num 18.2 15.2 0 13.3 14 ...
## $ shelf : int 3 3 3 1 2 3 1 3 2 1 ...
## $ potassium: num 848.5 969.7 660 93.3 30 ...
## $ vitamins : Factor w/ 3 levels "100%","enriched",..: 2 2 2 2 2 2 2 2 2 2 ...
col_numeric <- sapply(UScereal, is.numeric)
numericalVars <- UScereal[,col_numeric]
str(numericalVars)
## 'data.frame': 65 obs. of 9 variables:
## $ calories : num 212 212 100 147 110 ...
## $ protein : num 12.12 12.12 8 2.67 2 ...
## $ fat : num 3.03 3.03 0 2.67 0 ...
## $ sodium : num 394 788 280 240 125 ...
## $ fibre : num 30.3 27.3 28 2 1 ...
## $ carbo : num 15.2 21.2 16 14 11 ...
## $ sugars : num 18.2 15.2 0 13.3 14 ...
## $ shelf : int 3 3 3 1 2 3 1 3 2 1 ...
## $ potassium: num 848.5 969.7 660 93.3 30 ...
# Compute the correlation matrix for these variables
corrMat <- cor(numericalVars)
corrMat
## calories protein fat sodium fibre carbo
## calories 1.0000000 0.7060105 0.5901757 0.5286552 0.3882179 0.78872268
## protein 0.7060105 1.0000000 0.4112661 0.5727222 0.8096397 0.54709029
## fat 0.5901757 0.4112661 1.0000000 0.2595606 0.2260715 0.18285220
## sodium 0.5286552 0.5727222 0.2595606 1.0000000 0.4954831 0.42356172
## fibre 0.3882179 0.8096397 0.2260715 0.4954831 1.0000000 0.20307489
## carbo 0.7887227 0.5470903 0.1828522 0.4235617 0.2030749 1.00000000
## sugars 0.4952942 0.1848484 0.4156740 0.2112437 0.1489158 -0.04082599
## shelf 0.4263400 0.3963311 0.3256975 0.2341275 0.3578429 0.26045989
## potassium 0.4765955 0.8417540 0.3232754 0.5566426 0.9638662 0.24204848
## sugars shelf potassium
## calories 0.49529421 0.4263400 0.4765955
## protein 0.18484845 0.3963311 0.8417540
## fat 0.41567397 0.3256975 0.3232754
## sodium 0.21124365 0.2341275 0.5566426
## fibre 0.14891577 0.3578429 0.9638662
## carbo -0.04082599 0.2604599 0.2420485
## sugars 1.00000000 0.2900511 0.2718335
## shelf 0.29005112 1.0000000 0.4262529
## potassium 0.27183347 0.4262529 1.0000000
# Generate the correlation ellipse plot
corrplot(corrMat, method = "ellipse")
# Load the rpart library
library(rpart)
# Fit an rpart model to predict medv from all other Boston variables
tree_model <- rpart(medv ~ ., data = Boston)
# Plot the structure of this decision tree model
plot(tree_model)
# Add labels to this plot
text(tree_model, cex = .7)
# Assign the return value from the par() function to plot_pars
plot_pars <- par()
# Display the names of the par() function's list elements
names(plot_pars)
## [1] "xlog" "ylog" "adj" "ann" "ask"
## [6] "bg" "bty" "cex" "cex.axis" "cex.lab"
## [11] "cex.main" "cex.sub" "cin" "col" "col.axis"
## [16] "col.lab" "col.main" "col.sub" "cra" "crt"
## [21] "csi" "cxy" "din" "err" "family"
## [26] "fg" "fig" "fin" "font" "font.axis"
## [31] "font.lab" "font.main" "font.sub" "lab" "las"
## [36] "lend" "lheight" "ljoin" "lmitre" "lty"
## [41] "lwd" "mai" "mar" "mex" "mfcol"
## [46] "mfg" "mfrow" "mgp" "mkh" "new"
## [51] "oma" "omd" "omi" "page" "pch"
## [56] "pin" "plt" "ps" "pty" "smo"
## [61] "srt" "tck" "tcl" "usr" "xaxp"
## [66] "xaxs" "xaxt" "xpd" "yaxp" "yaxs"
## [71] "yaxt" "ylbias"
# Display the number of par() function list elements
length(plot_pars)
## [1] 72
# Set up a 2-by-2 plot array
par(mfrow = c(2,2))
# Plot the Animals2 brain weight data as points
head(Animals2)
## body brain
## Lesser short-tailed shrew 0.005 0.14
## Little brown bat 0.010 0.25
## Big brown bat 0.023 0.30
## Mouse 0.023 0.40
## Musk shrew 0.048 0.33
## Star-nosed mole 0.060 1.00
plot(Animals2$brain, type = "p")
# Add the title
title("points")
# Plot the brain weights with lines
plot(Animals2$brain, type = "l")
# Add the title
title("lines")
# Plot the brain weights as lines overlaid with points
plot(Animals2$brain, type = "o")
# Add the title
title("overlaid")
# Plot the brain weights as steps
plot(Animals2$brain, type = "s")
# Add the title
title("steps")
# Compute max_hp
max_hp <- max(Cars93$Horsepower, mtcars$hp)
# Compute max_mpg
max_mpg <- max(Cars93$MPG.city, Cars93$MPG.highway, mtcars$mpg)
# Create plot with type = "n"
plot(Cars93$Horsepower, Cars93$MPG.city,
type = "n",
xlim = c(0, max_hp),
ylim = c(0, max_mpg),
xlab = "Horsepower",
ylab = "Miles per gallon")
# Add open circles to plot
points(mtcars$hp, mtcars$mpg, pch = 1)
# Add solid squares to plot
points(Cars93$Horsepower, Cars93$MPG.city, pch = 15)
# Add open triangles to plot
points(Cars93$Horsepower, Cars93$MPG.highway, pch = 6)
# Create the numerical vector x
x <- seq(0, 10, length = 200)
# Compute the Gaussian density for x with mean 2 and standard deviation 0.2
gauss1 <- dnorm(x, mean = 2, sd = 0.2)
# Compute the Gaussian density with mean 4 and standard deviation 0.5
gauss2 <- dnorm(x, mean = 4, sd = 0.5)
# Plot the first Gaussian density
plot(x, gauss1,
type = "l",
ylab = "Gaussian probability density")
# Add lines for the second Gaussian density
lines(x, gauss2, lty = 2, lwd = 3)
# Create an empty plot using type = "n"
plot(mtcars$hp, mtcars$mpg,
type='n',
xlab = "Horsepower",
ylab = "Gas mileage")
# Add points with shapes determined by cylinder number
points(mtcars$hp, mtcars$mpg, pch = mtcars$cyl)
# Create a second empty plot
plot(mtcars$hp, mtcars$mpg,
type='n',
xlab = "Horsepower",
ylab = "Gas mileage")
# Add points with shapes as cylinder characters
points(mtcars$hp, mtcars$mpg, pch = as.character(mtcars$cyl))
library(MASS)
# Build a linear regression model for the whiteside data
linear_model <- lm(Gas ~ Temp, data = whiteside)
# Create a Gas vs. Temp scatterplot from the whiteside data
plot(whiteside$Temp, whiteside$Gas)
# Use abline() to add the linear regression line
abline(linear_model, lty = 2)
# Create MPG.city vs. Horsepower plot with solid squares
plot(Cars93$Horsepower, Cars93$MPG.city, pch = 15)
# Create index3, pointing to 3-cylinder cars
head(Cars93)
## Manufacturer Model Type Min.Price Price Max.Price MPG.city
## 1 Acura Integra Small 12.9 15.9 18.8 25
## 2 Acura Legend Midsize 29.2 33.9 38.7 18
## 3 Audi 90 Compact 25.9 29.1 32.3 20
## 4 Audi 100 Midsize 30.8 37.7 44.6 19
## 5 BMW 535i Midsize 23.7 30.0 36.2 22
## 6 Buick Century Midsize 14.2 15.7 17.3 22
## MPG.highway AirBags DriveTrain Cylinders EngineSize
## 1 31 None Front 4 1.8
## 2 25 Driver & Passenger Front 6 3.2
## 3 26 Driver only Front 6 2.8
## 4 26 Driver & Passenger Front 6 2.8
## 5 30 Driver only Rear 4 3.5
## 6 31 Driver only Front 4 2.2
## Horsepower RPM Rev.per.mile Man.trans.avail Fuel.tank.capacity
## 1 140 6300 2890 Yes 13.2
## 2 200 5500 2335 Yes 18.0
## 3 172 5500 2280 Yes 16.9
## 4 172 5500 2535 Yes 21.1
## 5 208 5700 2545 Yes 21.1
## 6 110 5200 2565 No 16.4
## Passengers Length Wheelbase Width Turn.circle Rear.seat.room
## 1 5 177 102 68 37 26.5
## 2 5 195 115 71 38 30.0
## 3 5 180 102 67 37 28.0
## 4 6 193 106 70 37 31.0
## 5 4 186 109 69 39 27.0
## 6 6 189 105 69 41 28.0
## Luggage.room Weight Origin Make
## 1 11 2705 non-USA Acura Integra
## 2 15 3560 non-USA Acura Legend
## 3 14 3375 non-USA Audi 90
## 4 17 3405 non-USA Audi 100
## 5 13 3640 non-USA BMW 535i
## 6 16 2880 USA Buick Century
index3 <- which(Cars93$Cylinders == 3)
index3
## [1] 39 80 83
# Add text giving names of cars next to data points
text(x = Cars93$Horsepower[index3],
y = Cars93$MPG.city[index3],
labels = Cars93$Make[index3],
adj = 0)
# Plot MPG.city vs. Horsepower as open circles
plot(Cars93$Horsepower, Cars93$MPG.city, pch = 1)
# Create index3, pointing to 3-cylinder cars
index3 <- which(Cars93$Cylinders == 3)
# Highlight 3-cylinder cars as solid circles
points(Cars93$Horsepower[index3], Cars93$MPG.city[index3], pch = 16)
# Add car names, offset from points, with larger bold text
text(
Cars93$Horsepower[index3],
Cars93$MPG.city[index3],
Cars93$Make[index3],
adj = -0.2,
cex = 1.2,
font = 4)
# Plot Gas vs. Temp as solid triangles
plot(whiteside$Temp, whiteside$Gas, pch=17)
# Create indexB, pointing to "Before" data
indexB <- which(whiteside$Insul == "Before")
# Create indexA, pointing to "After" data
indexA <- which(whiteside$Insul == "After")
# Add "Before" text in blue, rotated 30 degrees, 80% size
text(x = whiteside$Temp[indexB], y = whiteside$Gas[indexB],
labels = "Before", col = 'blue', srt = 30, cex = .8)
# Add "After" text in red, rotated -20 degrees, 80% size
text(x = whiteside$Temp[indexA], y = whiteside$Gas[indexA],
labels = "After", col = 'red', srt = -20, cex = .8)
# Set up and label empty plot of Gas vs. Temp
plot(whiteside$Temp, whiteside$Gas,
type = "n",
xlab = "Outside temperature",
ylab = "Heating gas consumption")
# Create indexB, pointing to "Before" data
indexB <- which(whiteside$Insul == "Before")
# Create indexA, pointing to "After" data
indexA <- which(whiteside$Insul == "After")
# Add "Before" data as solid triangles
points(whiteside$Temp[indexB], whiteside$Gas[indexB], pch = 17)
# Add "After" data as open circles
points(whiteside$Temp[indexA], whiteside$Gas[indexA], pch = 1)
# Add legend that identifies points as "Before" and "After"
legend("topright", pch = c(17, 1), legend = c("Before", "After"))
head(UScereal[,c("sugars", "shelf")],10)
## sugars shelf
## 100% Bran 18.181818 3
## All-Bran 15.151515 3
## All-Bran with Extra Fiber 0.000000 3
## Apple Cinnamon Cheerios 13.333333 1
## Apple Jacks 14.000000 2
## Basic 4 10.666667 3
## Bran Chex 8.955224 1
## Bran Flakes 7.462687 3
## Cap'n'Crunch 16.000000 2
## Cheerios 0.800000 1
# Create a boxplot of sugars by shelf value, without axes
boxplot(sugars ~ shelf, data = UScereal, axes = F)
# Add a default y-axis to the left of the boxplot
axis(side = 2)
# Add an x-axis below the plot, labelled 1, 2, and 3
axis(side = 1, at = c(1,2,3))
# Add a second x-axis above the plot
axis(side = 3, at = c(1,2,3),
labels = c("floor","middle","top"))
# Create a scatterplot of MPG.city vs. Horsepower
plot(Cars93$Horsepower, Cars93$MPG.city)
# Call supsmu() to generate a smooth trend curve, with default bass
trend1 <- supsmu(Cars93$Horsepower, Cars93$MPG.city)
# Add this trend curve to the plot
lines(trend1)
# Call supsmu() for a second trend curve, with bass = 10
trend2 <- supsmu(Cars93$Horsepower, Cars93$MPG.city, bass = 10)
# Add this trend curve as a heavy, dotted line
lines(trend2, lty = 3, lwd = 2)
# Compute the number of plots to be displayed
ncol(Cars93)^2
## [1] 729
# Plot the array of scatterplots
plot(Cars93)
# Construct the vector keep_vars
keep_vars <- c("calories", "protein", "fat",
"fibre", "carbo", "sugars")
# Use keep_vars to extract the desired subset of UScereal
df <- UScereal[, keep_vars]
str(df)
## 'data.frame': 65 obs. of 6 variables:
## $ calories: num 212 212 100 147 110 ...
## $ protein : num 12.12 12.12 8 2.67 2 ...
## $ fat : num 3.03 3.03 0 2.67 0 ...
## $ fibre : num 30.3 27.3 28 2 1 ...
## $ carbo : num 15.2 21.2 16 14 11 ...
## $ sugars : num 18.2 15.2 0 13.3 14 ...
# Set up a two-by-two plot array
par(mfrow = c(2,2))
# Use matplot() to generate an array of two scatterplots
matplot(
UScereal$calories,
UScereal[,c('protein', 'fat')],
xlab = "calories",
ylab = "")
# Add a title
title("Two scatterplots")
# Use matplot() to generate an array of three scatterplots
matplot(
UScereal$calories,
UScereal[,c('protein', 'fat', 'fibre')],
xlab = "calories",
ylab = "")
# Add a title
title("Three scatterplots")
# Use matplot() to generate an array of four scatterplots
matplot(
UScereal$calories,
UScereal[,c('protein', 'fat', 'fibre', 'carbo')],
xlab = "calories",
ylab = "")
# Add a title
title("Four scatterplots")
# Use matplot() to generate an array of five scatterplots
matplot(
UScereal$calories,
UScereal[,c('protein', 'fat', 'fibre', 'carbo','sugars')],
xlab = "calories",
ylab = "")
# Add a title
title("Five scatterplots")
library(wordcloud)
# Create mfr_table of manufacturer frequencies
mfr_table <- table(Cars93$Manufacturer)
# Create the default wordcloud from this table
wordcloud(
words = names(mfr_table),
freq = as.numeric(mfr_table),
scale = c(2, 0.25))
# Change the minimum word frequency
wordcloud(
words = names(mfr_table),
freq = as.numeric(mfr_table),
scale = c(2, 0.25),
min.freq = 1)
# Create model_table of model frequencies
model_table <- table(Cars93$Model)
# Create the wordcloud of all model names with smaller scaling
wordcloud(
words = names(model_table),
freq = as.numeric(model_table),
scale = c(.75, .25),
min.freq = 1)
# Set up a two-by-two plot array
par(mfrow = c(2,2))
# Plot y1 vs. x1
plot(anscombe$x1, anscombe$y1)
# Plot y2 vs. x2
plot(anscombe$x2, anscombe$y2)
# Plot y3 vs. x3
plot(anscombe$x3, anscombe$y3)
# Plot y4 vs. x4
plot(anscombe$x4, anscombe$y4)
# Define common x and y limits for the four plots
xmin <- 4
xmax <- 19
ymin <- 3
ymax <- 13
# Set up a two-by-two plot array
par(mfrow = c(2,2))
# Plot y1 vs. x1 with common x and y limits, labels & title
plot(anscombe$x1, anscombe$y1,
xlim = c(xmin, xmax),
ylim = c(ymin, ymax),
xlab = "x value", ylab = "y value",
main = "First dataset")
# Do the same for the y2 vs. x2 plot
plot(anscombe$x2, anscombe$y2,
xlim = c(xmin, xmax),
ylim = c(ymin, ymax),
xlab = "x value", ylab = "y value",
main = "Second dataset")
# Do the same for the y3 vs. x3 plot
plot(anscombe$x3, anscombe$y3,
xlim = c(xmin, xmax),
ylim = c(ymin, ymax),
xlab = "x value", ylab = "y value",
main = "Third dataset")
# Do the same for the y4 vs. x4 plot
plot(anscombe$x4, anscombe$y4,
xlim = c(xmin, xmax),
ylim = c(ymin, ymax),
xlab = "x value", ylab = "y value",
main = "Fourth dataset")
# Set up a two-by-two plot array
par(mfrow = c(2,2))
# Plot the raw duration data
plot(geyser$duration, main = "Raw data")
# Plot the normalized histogram of the duration data
truehist(geyser$duration, main = "Histogram")
# Plot the density of the duration data
plot(density(geyser$duration), main = "Density")
# Construct the normal QQ-plot of the duration data
qqPlot(geyser$duration, main = "QQ-plot")
# Use the matrix function to create a matrix with three rows and two columns
layoutMatrix <- matrix(
c(
0, 1,
2, 0,
0, 3
),
byrow = T,
nrow = 3
)
# Call the layout() function to set up the plot array
layout(layoutMatrix)
# Show where the three plots will go
layout.show(n = 3)
# Set up the plot array
layout(layoutMatrix)
# Construct vectors indexB and indexA
indexB <- which(whiteside$Insul == "Before")
indexA <- which(whiteside$Insul == "After")
# Create plot 1 and add title
plot(whiteside$Temp[indexB], whiteside$Gas[indexB],
ylim = c(0,8))
title("Before data only")
# Create plot 2 and add title
plot(whiteside$Temp, whiteside$Gas,
ylim = c(0,8))
title("Complete dataset")
# Create plot 3 and add title
plot(whiteside$Temp[indexA], whiteside$Gas[indexA],
ylim = c(0,8))
title("After data only")
# Create row1, row2, and layoutVector
row1 <- c(1,0,0)
row2 <- c(0,2,2)
layoutVector <- c(row1,row2, row2)
# Convert layoutVector into layoutMatrix
layoutMatrix <- matrix(layoutVector, byrow = T, nrow = 3)
# Set up the plot array
layout(layoutMatrix)
# Plot scatterplot
plot(Boston$rad, Boston$zn)
# Plot sunflower plot
sunflowerplot(Boston$rad, Boston$zn)
# Create a table of Cylinders frequencies
tbl <- table(Cars93$Cylinders)
# Generate a horizontal barplot of these frequencies
mids <- barplot(tbl,
horiz = T,
col = "transparent",
names.arg = "")
# Add names labels with text()
text(20, mids, names(tbl))
# Add count labels with text()
text(35, mids, as.numeric(tbl))
# Call symbols() to create the default bubbleplot
symbols(Cars93$Horsepower, Cars93$MPG.city,
circles = sqrt(Cars93$Price))
# Repeat, with the inches argument specified
symbols(Cars93$Horsepower, Cars93$MPG.city,
circles = sqrt(Cars93$Price),
inches = 0.1)
# Call png() with the name of the file we want to create
png('bubbleplot.png')
# Re-create the plot from the last exercise
symbols(Cars93$Horsepower, Cars93$MPG.city,
circles = sqrt(Cars93$Price),
inches = 0.1)
# Save our file and return to our interactive session
dev.off()
## quartz_off_screen
## 2
# Verify that we have created the file
list.files(pattern = "png")
## [1] "bubbleplot.png"
# Iliinsky and Steele color name vector
IScolors <- c("red", "green", "yellow", "blue",
"black", "white", "pink", "cyan",
"gray", "orange", "brown", "purple")
# Create the data for the barplot
barWidths <- c(rep(2, 6), rep(1, 6))
# Recreate the horizontal barplot with colored bars
barplot(
rev(barWidths),
horiz = T,
col = rev(IScolors),
axes = F,
names.arg = rev(IScolors),
las = 1)
# Iliinsky and Steele color name vector
IScolors <- c("red", "green", "yellow", "blue",
"black", "white", "pink", "cyan",
"gray", "orange", "brown", "purple")
# Create the `cylinderLevel` variable
cylinderLevel <- as.numeric(Cars93$Cylinders)
# Create the colored bubbleplot
symbols(
Cars93$Horsepower, Cars93$MPG.city,
circles = as.numeric(Cars93$Cylinders),
inches = 0.2,
bg = IScolors[cylinderLevel])
# Create a table of Cylinders by Origin
tbl <- table(Cars93$Cylinders, Cars93$Origin)
# Create the default stacked barplot
barplot(tbl)
# Enhance this plot with color
barplot(tbl, col = IScolors)
# Load the insuranceData package
library(insuranceData)
# Use the data() function to load the dataCar data frame
data(dataCar)
head(dataCar)
## veh_value exposure clm numclaims claimcst0 veh_body veh_age gender area
## 1 1.06 0.3039014 0 0 0 HBACK 3 F C
## 2 1.03 0.6488706 0 0 0 HBACK 2 F A
## 3 3.26 0.5694730 0 0 0 UTE 2 F E
## 4 4.14 0.3175907 0 0 0 STNWG 2 F D
## 5 0.72 0.6488706 0 0 0 HBACK 4 F C
## 6 2.01 0.8542094 0 0 0 HDTOP 3 M C
## agecat X_OBSTAT_
## 1 2 01101 0 0 0
## 2 4 01101 0 0 0
## 3 2 01101 0 0 0
## 4 2 01101 0 0 0
## 5 2 01101 0 0 0
## 6 4 01101 0 0 0
# Load the tabplot package
suppressPackageStartupMessages(library(tabplot))
# Generate the default tableplot() display
tableplot(dataCar)
# Load the lattice package
library(lattice)
# Use xyplot() to construct the conditional scatterplot
xyplot(calories ~ sugars | shelf, data = UScereal)
# Load the ggplot2 package
library(ggplot2)
# Create the basic plot (not displayed): basePlot
basePlot <- ggplot(Cars93, aes(x = Horsepower, y = MPG.city))
# Display the basic scatterplot
basePlot +
geom_point()
# Color the points by Cylinders value
basePlot +
geom_point(colour = IScolors[Cars93$Cylinders])
# Make the point sizes also vary with Cylinders value
basePlot +
geom_point(colour = IScolors[Cars93$Cylinders],
size = as.numeric(Cars93$Cylinders))