data("anscombe")
anscombe
stats <- sapply(1:4, function(i) {
x <- anscombe[[paste0("x", i)]]
y <- anscombe[[paste0("y", i)]]
c(mean_x = mean(x),
mean_y = mean(y),
sd_x = sd(x),
sd_y = sd(y),
cor_xy = cor(x, y))
})
colnames(stats) <- paste("Dataset", 1:4)
round(stats, 4)
Dataset 1 Dataset 2 Dataset 3 Dataset 4
mean_x 9.0000 9.0000 9.0000 9.0000
mean_y 7.5009 7.5009 7.5000 7.5009
sd_x 3.3166 3.3166 3.3166 3.3166
sd_y 2.0316 2.0317 2.0304 2.0306
cor_xy 0.8164 0.8162 0.8163 0.8165
invisible({
plot_anscombe <- function(x, y, dataset_num) {
plot(x, y,
pch = 19, col = "steelblue",
main = paste("Dataset", dataset_num),
xlab = paste0("x", dataset_num),
ylab = paste0("y", dataset_num),
xlim = c(3, 15), ylim = c(3, 13))
abline(lm(y ~ x), col = "red", lwd = 2)
}
par(mfrow = c(2, 2),
mar = c(4, 4, 2, 1),
oma = c(0, 0, 2, 0), cex = 1)
plot_anscombe(anscombe$x1, anscombe$y1, 1)
plot_anscombe(anscombe$x2, anscombe$y2, 2)
plot_anscombe(anscombe$x3, anscombe$y3, 3)
plot_anscombe(anscombe$x4, anscombe$y4, 4)
mtext("Anscombe's Quartet", outer = TRUE, cex = 1.5, font = 2)
par(mfrow = c(1, 1))
})
# Basic scatter plot
plot(mtcars$wt, mtcars$mpg)
# Customized scatter plot
plot(mtcars$wt, mtcars$mpg,
main = "MPG vs Weight",
xlab = "Weight (1000 lbs)",
ylab = "Miles per Gallon",
pch = 19,
col = "steelblue",
cex = 1.5)
# Add regression line
abline(lm(mpg ~ wt, data=mtcars),
col = "red", lwd = 2)
# Basic histogram
hist(mtcars$mpg)
# Customized histogram
hist(mtcars$mpg,
breaks = 15,
col = "steelblue",
border = "white",
main = "Distribution of MPG",
xlab = "Miles per Gallon",
freq = FALSE)
# Add density curve
lines(density(mtcars$mpg), col = "red", lwd = 2)
counts <- table(mtcars$cyl)
barplot(counts,
main = "Cars by Cylinders",
col = c("#2196F3","#F5A623","#4CAF50"),
beside = TRUE)
boxplot(mpg ~ cyl,
data = mtcars,
main = "MPG by Cylinders",
xlab = "Cylinders",
ylab = "MPG",
col = c("#2196F3","#F5A623","#4CAF50"))
slices <- c(10, 12, 4, 16, 8)
labels <- c("US","UK","AU","DE","FR")
pie(slices, labels = labels, main = "Country Distribution")
ggplot2library(tidyverse)
geom_point()ggplot(mtcars, aes(wt, mpg)) +
geom_point(color="steelblue", size=3)
geom_line()ggplot(economics, aes(date, unemploy)) +
geom_line(color="darkred")
geom_bar()ggplot(mtcars, aes(factor(cyl))) +
geom_bar(fill="steelblue")
geom_histogram()ggplot(mtcars, aes(mpg)) +
geom_histogram(bins=15, fill="coral")
geom_boxplot()ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_boxplot(fill="lightblue")
geom_smooth()ggplot(mtcars, aes(wt, mpg)) +
geom_smooth(method="lm")
ggplot2
exampleggplot(data = mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point(size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Fuel Efficiency vs Vehicle Weight",
subtitle = "Grouped by number of cylinders",
x = "Weight (1000 lbs)",
y = "Miles per Gallon",
color = "Cylinders"
) +
scale_color_manual(values = c("#2196F3", "#F5A623", "#E91E63")) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
ggplot(mtcars, aes(mpg, fill = factor(cyl))) +
geom_density(alpha = 0.5) +
labs(title = "MPG Density by Cylinders",
fill =
"Cylinders") +
theme_minimal()
library(corrplot)
cor_matrix <- cor(mtcars[,1:7])
corrplot(cor_matrix,
method = "color",
type = "upper",
addCoef.col = "black")
# Base R
qqnorm(mtcars$mpg)
qqline(mtcars$mpg, col = "red")
# ggplot2
ggplot(mtcars, aes(sample = mpg)) +
stat_qq() + stat_qq_line()
# Base R
pairs(mtcars[,1:4])
# Enhanced with GGally
library(GGally)
ggpairs(mtcars[,1:4],
aes(color = factor(mtcars$cyl)))
# Create a ts object
data <- AirPassengers
class(data)
[1] "ts"
# Basic time series plot
plot.ts(data,
main = "Monthly Air Passengers",
ylab = "Passengers (thousands)",
xlab = "Year",
col = "steelblue", lwd = 2)
ggplot2# Convert ts to data.frame
df <- data.frame(
date = seq(as.Date("1949-01-01"),
by = "month", length.out = 144),
value = as.numeric(AirPassengers))
ggplot(df, aes(date, value)) +
geom_line(color = "steelblue") +
ylab("Passengers (thousands)") + xlab("") +
scale_x_date(date_labels = "%b %Y", date_breaks = "2 year")
par(mfrow = c(1, 2))
acf(AirPassengers, main = "ACF")
pacf(AirPassengers, main = "PACF")
# Decompose into components
decomp <- decompose(AirPassengers)
plot(decomp)
# Components:
# $trend - long-term direction
# $seasonal - repeating pattern
# $random - residual noise
# STL decomposition (more robust)
stl_result <- stl(AirPassengers,
s.window = "periodic")
plot(stl_result)
library(forecast)
fit <- auto.arima(AirPassengers)
autoplot(forecast(fit, h = 24))
library(tsutils)
# Seasonal diagram (default)
seasplot(AirPassengers)
Results of statistical testing
Evidence of trend: TRUE (pval: 0)
Evidence of seasonality: TRUE (pval: 0)
# Seasonal boxplots
seasplot(AirPassengers, outplot = 2)
Results of statistical testing
Evidence of trend: TRUE (pval: 0)
Evidence of seasonality: TRUE (pval: 0)
# Seasonal subseries
seasplot(AirPassengers, outplot = 3)
Results of statistical testing
Evidence of trend: TRUE (pval: 0)
Evidence of seasonality: TRUE (pval: 0)
# Seasonal density
seasplot(AirPassengers, outplot = 5)
Results of statistical testing
Evidence of trend: TRUE (pval: 0)
Evidence of seasonality: TRUE (pval: 0)
# With custom decomposition
seasplot(AirPassengers, decomposition = "multiplicative")
Results of statistical testing
Evidence of trend: TRUE (pval: 0)
Evidence of seasonality: TRUE (pval: 0)