This report provides an analysis of the Diamonds
dataset from the ggplot2 package. The dataset includes
variables such as carat, cut, color, clarity, and price of diamonds.
We will begin by loading the required libraries and exploring the dataset.
unique(diamonds$carat);length(unique(diamonds$carat))
## [1] 0.23 0.21 0.29 0.31 0.24 0.26 0.22 0.30 0.20 0.32 0.33 0.25 0.35 0.42 0.28
## [16] 0.38 0.70 0.86 0.71 0.78 0.96 0.73 0.80 0.75 0.74 0.81 0.59 0.90 0.91 0.61
## [31] 0.77 0.63 0.76 0.64 0.72 0.79 0.58 1.17 0.60 0.83 0.54 0.98 0.52 1.01 0.53
## [46] 0.84 0.51 1.05 0.55 0.87 1.00 0.57 0.82 1.04 0.93 1.20 0.99 0.34 0.43 0.36
## [61] 0.95 0.89 1.02 0.97 0.56 0.85 0.92 1.27 0.66 1.12 0.68 1.03 0.62 1.22 1.08
## [76] 0.88 0.50 1.19 0.39 0.65 1.24 1.50 0.27 0.41 1.13 1.06 0.69 0.40 1.14 0.94
## [91] 1.29 1.52 1.16 1.21 1.23 1.09 0.67 1.11 1.10 1.18 1.15 1.25 1.07 1.28 1.51
## [106] 0.37 1.31 1.26 1.39 1.44 1.35 1.30 1.32 1.41 1.36 1.45 1.34 1.58 1.54 1.38
## [121] 1.33 1.74 1.64 1.47 1.40 1.55 1.95 2.00 1.37 1.83 1.62 1.57 1.69 2.06 1.72
## [136] 1.66 2.14 1.49 1.46 2.15 1.96 2.22 1.70 1.53 1.85 2.01 2.27 1.68 1.56 1.81
## [151] 1.65 1.82 2.03 1.73 1.59 1.42 1.43 2.08 1.48 1.60 2.49 1.71 2.02 2.07 3.00
## [166] 2.21 2.10 1.91 2.25 2.17 2.32 2.72 1.61 2.23 2.11 2.05 1.63 2.30 2.31 1.75
## [181] 2.04 2.12 1.77 2.50 1.80 1.67 1.84 2.20 3.01 1.88 2.33 2.68 2.34 1.90 2.16
## [196] 2.74 1.78 1.76 2.28 1.79 1.94 2.43 1.86 3.11 1.87 2.09 1.89 2.52 2.19 2.18
## [211] 2.77 2.63 3.05 2.46 3.02 2.38 2.24 2.26 2.36 1.99 2.29 3.65 2.45 2.40 2.54
## [226] 3.24 2.13 2.58 3.22 3.50 2.48 1.98 2.44 2.75 1.93 2.41 2.61 2.35 2.51 2.70
## [241] 2.55 1.97 2.53 2.37 2.47 2.80 4.01 2.56 3.04 1.92 2.39 3.40 4.00 3.67 2.42
## [256] 2.66 2.65 2.59 2.60 2.57 2.71 4.13 2.64 5.01 4.50 2.67 3.51 0.44 0.45 0.47
## [271] 0.46 0.48 0.49
## [1] 273
unique(diamonds$color);length(unique(diamonds$color))
## [1] E I J H F G D
## Levels: D < E < F < G < H < I < J
## [1] 7
#user defined character variables in to factor(categorical)
diamonds$x=as.factor(diamonds$x)
diamonds$y=as.factor(diamonds$y)
str(diamonds$x)
## Factor w/ 554 levels "0","3.73","3.74",..: 21 15 31 46 60 20 21 33 13 26 ...
str(diamonds$y)
## Factor w/ 552 levels "0","3.68","3.71",..: 27 13 36 52 64 25 27 40 8 34 ...
n=nrow(diamonds)
carat = seq(1,n) #alternate to 1:n is the function seq()
#Layer 1 setting a 2 dimensional space for plotting
ggplot(diamonds, aes(x=carat,y=color))
#Layer 2 Choosing a geometry to plot
# here we use points
ggplot(diamonds, aes(x=carat,y=color)) +
geom_point()
#Layer 3 aesthetics to plot #Three options: size, shape, color
ggplot(diamonds, aes(x=carat,y=color)) +
geom_point(size=5)
ggplot(diamonds, aes(x=carat,y=color)) +
geom_point(size=2,shape=4)
ggplot(diamonds, aes(x=carat,y=color))+
geom_point(color="red")
#All three in one plot
single_num_plot1 = ggplot(diamonds,
aes(x=carat,y=color)) +
geom_point(shape=25,size=1.75,color="red")
single_num_plot1
#Layer 4 axes labels and plot titles
#We add new features to the existing graph
single_num_plot2=single_num_plot1 +
labs(title="diamonds dataset",
subtitle="data",
y="color",
x="carat",
caption="diamonds")
single_num_plot2
#Layer 5 aesthetics for axes labels and plot titles
single_num_plot3=single_num_plot2+
theme(
plot.title = element_text(color="orange", size=14,
face="bold.italic"),
plot.subtitle = element_text(color="green4", size=12,
face="italic"),
axis.title.x = element_text(color="blue",
size=14, face="bold"),
axis.title.y = element_text(color="black",
size=14, face="bold"),
plot.caption = element_text(color="steelblue",
size=14, face="italic"))
single_num_plot3
# All plots in a Grid
# Install required libraries --> gridextra
# Create individual plots
suppressWarnings(suppressMessages(library(gridExtra)))
p1 <- ggplot(diamonds, aes(x=carat)) + geom_density(fill="skyblue") + ggtitle("Density Plot")
p2 <- ggplot(diamonds, aes(x=cut)) + geom_bar(fill="pink") + ggtitle("Bar Plot")
p3 <- ggplot(diamonds, aes(x=cut, y=price)) + geom_boxplot(fill="lightgreen") + ggtitle("Boxplot")
p4 <- ggplot(diamonds, aes(x=carat)) + geom_histogram(binwidth=0.1, fill="orange") + ggtitle("Histogram")
p5 <- ggplot(diamonds, aes(x=carat, y=price)) + geom_point(alpha=0.3) + geom_smooth(method="loess") + ggtitle("Smoothed Histogram")
p6 <- ggplot(diamonds, aes(x = 2, fill = cut)) + geom_bar(width = 1) + coord_polar(theta = "y") + ggtitle("Pie Chart") + theme_void()
grid.arrange(p1, p2, p3, p4, p5, p6, ncol=3)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Failed to fit group -1.
## Caused by error in `predLoess()`:
## ! workspace required (4364865305) is too large probably because of setting 'se = TRUE'.