library(tidyverse)
library(UsingR)
library(ggplot2)
library(cowplot)
library(dplyr)
data("galton")
library(data.table)
library(tidyr)

1. Histogram

A. Simple and Combined Histograms with Color

# Simple Default Colorless Histogram
h <- ggplot(galton, aes(parent))
h <- h + geom_histogram()

# Creating Label and Color
h1 <- ggplot(galton, aes(parent))
h1 <- h1 + geom_histogram(fill = "salmon", colour = "darkblue")
h1 <- h1 + xlab("H1")

# Using Slightly Different Code
h2 <- ggplot(galton, aes(parent))+ 
  geom_histogram(fill = "red", colour = "black")+
  xlab("H2")

# Combining Two Histograms
h3 <- ggplot()+
  geom_histogram(data=galton, aes(parent), fill="blue", color="darkblue")+
  geom_histogram(data=galton, aes(child), fill="red", color="darkred")+
  xlab("H3")

# changing Count to Density 
h4 <- ggplot()+
  geom_histogram(data=galton, aes(parent, ..density..), fill="blue", color="darkblue")+
  geom_histogram(data=galton, aes(child,  ..density..), fill="red", color="darkred")+
  xlab("H4")

# Replacing Histogram with FreqPoly Graph
h5 <- ggplot()+
  geom_freqpoly(data=galton, aes(parent, ..density..), fill="blue", color="darkblue")+
  geom_freqpoly(data=galton, aes(child,  ..density..), fill="red", color="darkred")+
  xlab("H5")

# Displaying all of the Plots Together
plot_grid(h, h1, h2, h3, h4, h5)

B. Playing with Binwidth

#Binwidth=1
g <- ggplot(galton, aes(child))
g <- g + geom_histogram(fill="maroon", 
          color="coral", binwidth = 1)
g <- g + xlab("G, Binwidth = 1")

### Binwidth=2
g1 <- ggplot(galton, aes(child))
g1 <- g1 + geom_histogram(fill="maroon", 
                        color="coral", binwidth = 2)
g1 <- g1 + xlab("G1, Binwidth = 2")

### Binwidth=3
g2 <- ggplot(galton, aes(child))
g2 <- g2 + geom_histogram(fill="maroon", 
                        color="coral", binwidth = 3)
g2 <- g2 + xlab("G2, Binwidth = 3")

### Binwidth=4
g3 <- ggplot(galton, aes(child))
g3 <- g3 + geom_histogram(fill="maroon", 
                        color="coral", binwidth = 4)
g3 <- g3 + xlab("G3, Binwidth = 4")

### Putting them together
plot_grid(g, g1, g2, g3,
          labels = "Childs' Heights",
          hjust = -0.4, vjust = 0.1)

C. Histograms Alternative Ways

Plotting Variables at Once

library(reshape)
newdata <- data(iris)
n_newdata <- melt(iris)
k <- ggplot(n_newdata, aes(x=value, fill=variable))
k <- k + geom_histogram(colour="black", binwidth=1)
k <- k + facet_grid(.~variable)
k

We can use manipulate function to make our ggplot diagram interactive. The r-markdown files do not support the interactive nature. Please refer to the ggplot_plotly post in my portfolio.

2. Adding Kernal Density Plot on Histograms

# Bandwidth = Default
g4 <- ggplot(galton, aes(child))
g4 <- g4 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g4 <- g4 + geom_density(kernel="gaussian", aes(child))
g4 <- g4 + xlab("G4; Bandwidth = Default")

# Bandwidth = 1
g5 <- ggplot(galton, aes(child))
g5 <- g5 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g5 <- g5 + geom_density(kernel="gaussian", aes(child), bw = 1)
g5 <- g5 + xlab("G5;  Bandwidth = 1")

# Bandwidth = 5
g6 <- ggplot(galton, aes(child))
g6 <- g6 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g6 <- g6 + geom_density(kernel="gaussian", aes(child), bw = 5)
g6 <- g6 + xlab("G6;  Bandwidth = 5")

# Bandwidth = 10
g7 <- ggplot(galton, aes(child))
g7 <- g7 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g7 <- g7 + geom_density(kernel="gaussian", aes(child), bw = 10)+
xlab("G7;  Bandwidth = 10")

# Displaying together
plot_grid(g4, g5, g6, g7,
         labels = "Childs' Heights")

Plotting Kernel Density and Cumulative Distributive Function (CDF)

# Kernel Density
i <- ggplot(galton, aes(child), color = "darkblue")+ 
  geom_density()+ geom_density(galton, mapping=aes(parent), color = "darkred")+ xlab("Kenral Density Plot")

# Cumulative Distributive Function
i1 <- ggplot(galton, aes(child), color = "darkblue")+ 
  stat_ecdf()+ stat_ecdf(galton, mapping=aes(parent), color = "darkred")+ xlab("CDF Plot")

# Displaying together
plot_grid(i, i1)

Line Plot, Scatter Plot, Bar Plot, and Pie Chart

# Line Plot
LP <- ggplot(galton, aes(x=child, y=parent)) + geom_line()

# Scatter Plot
SP <- ggplot(galton, aes(x=child, y=parent)) + geom_point()

# Zooming in on the Scatter Plot 10 to 20 in X axis and 25 to 50 in Y
Zoom <- SP + coord_cartesian(xlim=c(65,70), ylim=c(66,70))

# Bar Chart 
my_pie <- ggplot(iris, aes(x=factor(1), fill=Species))+
  geom_bar(width = 1)+
  coord_polar("y")

# Bar Diagram
my_bar <- ggplot(iris, aes(x= Species,y=Petal.Length,fill=Species))+
  geom_bar(stat="identity")

# Horizontal Bar
H_bar <- my_bar + coord_flip()

# Displaying Plots Together
plot_grid(LP, SP, Zoom, my_pie, my_bar, H_bar, labels = "AUTO", align = "h", label_size = 12, ncol = 2, label_colour = "darkblue", label_fontfamily = "serif")

Q-Plot and Box Plots

# Default Q-Plot 
qp <- qplot(Petal.Width, Sepal.Width, data = iris)

# Assigning Color
qp1 <- qplot(Petal.Width, Sepal.Width, colour = Species, data = iris)

# Drawing Fitted Lines
qp2 <- qplot(Petal.Width, Sepal.Width, colour = Species, data = iris)
qp2 <- qp2 + geom_smooth(method='lm', formula=y~x)

# Box Plot Default
bp1 <- qplot(Petal.Width, Species, data = iris, geom = c("boxplot"))

# Box Plot with Color
bp2 <- qplot(Petal.Width, Sepal.Width, data = iris, fill=Species, geom = c("boxplot"))

# Box Plot with Jitter
bp3 <- qplot(Petal.Width, Sepal.Width, data = iris, fill=Species, geom = c("boxplot", "jitter"))

# Displaying Plots Together
library(gridExtra)
grid.arrange(qp, qp1, qp2, bp1, bp2, bp3, ncol=2)

Easy Q-Plot (with Density) and Pairwise Correlation

# Easy Q-Plot (with Density)
qplot(Petal.Width, colour = Species, data = iris,  geom = "density")

# Pairwise Scatter Plot
require(stats)
require(graphics)
pairs(iris, panel=panel.smooth,
      main = "Pairwise Scatter Plot",
      col = 4 + (iris$Petal.Length > 2))

# Pairwise Analysis Using GGPLOT
require(GGally)
gp = ggpairs(iris, lower=list(continuous=wrap("smooth", method="lm")))
gp

Scatter Plots of Varying Features

# Simple Two Dimensional Scatter Plot
l <- ggplot(iris, aes(x=Sepal.Length, y = Sepal.Width)) + geom_point()

# Giving Colors by the Clusters
l1 <- ggplot(iris, aes(x=Sepal.Length, y = Sepal.Width, colour=Species)) + geom_point()

# Advanced Scatter Plot with Fitted Line
x <- galton$parent - mean(galton$parent)
y <- galton$child - mean(galton$child)

# Setting my data
freqdata <- as.data.frame(table(x,y))
names(freqdata) <- c("child", "parent", "freq")
freqdata$child <- as.numeric(as.character(freqdata$child))
freqdata$parent <- as.numeric(as.character(freqdata$parent))

# Creating a Plot
l2 <- ggplot(filter(freqdata, freq > 0), aes(x = child, y = parent))
l2 <- l2 + scale_size(range = c(2, 20), guide = "none")
l2 <- l2 + geom_point(colour = "grey50", aes(size = freq + 20, show_guide = FALSE))
## Warning: Ignoring unknown aesthetics: show_guide
l2 <- l2 + geom_point(aes(colour = freq, size = freq))
l2 <- l2 + scale_colour_gradient(low = "lightblue", high = "white")
l2

Histogram with Mean Value Added

student_data <- read.csv("Fall 2019.csv")
final_data <- student_data[,c(3,6:11)]
colnames(final_data) <- c("video", "views_download", "minutes_watched","AMD", "%_complete", "view_time", "last_position")
final_data$view_time <- strptime(final_data$view_time, "%Y-%m-%d %H:%M:%S")
head(final_data)
##                               video views_download minutes_watched        AMD
## 1              MELTS SKILL 7 PART 1              1       0.7722142 0.77221418
## 2              MELTS SKILL 7 PART 2              1       0.8200626 0.82006258
## 3 v1_micro_teach_coaching_1920x1080              4       0.2109331 0.05273327
## 4 v1_micro_teach_coaching_1920x1080              1       0.3079116 0.30791165
## 5 v1_micro_teach_coaching_1920x1080              4       0.1667102 0.04167754
## 6                        V2 Badging              1       5.1299646 5.12996465
##   %_complete view_time last_position
## 1         15      <NA>    480.339262
## 2         17      <NA>    200.175393
## 3          3      <NA>      2.897424
## 4          5      <NA>     18.474699
## 5          3      <NA>      9.684445
## 6        100      <NA>    308.712287
ggpairs(final_data, cardinality_threshold = 30, lower = list(continuous = wrap("smooth", method = "lm")))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.