library(tidyverse)
library(UsingR)
library(ggplot2)
library(cowplot)
library(dplyr)
data("galton")
library(data.table)
library(tidyr)
# Simple Default Colorless Histogram
h <- ggplot(galton, aes(parent))
h <- h + geom_histogram()
# Creating Label and Color
h1 <- ggplot(galton, aes(parent))
h1 <- h1 + geom_histogram(fill = "salmon", colour = "darkblue")
h1 <- h1 + xlab("H1")
# Using Slightly Different Code
h2 <- ggplot(galton, aes(parent))+
geom_histogram(fill = "red", colour = "black")+
xlab("H2")
# Combining Two Histograms
h3 <- ggplot()+
geom_histogram(data=galton, aes(parent), fill="blue", color="darkblue")+
geom_histogram(data=galton, aes(child), fill="red", color="darkred")+
xlab("H3")
# changing Count to Density
h4 <- ggplot()+
geom_histogram(data=galton, aes(parent, ..density..), fill="blue", color="darkblue")+
geom_histogram(data=galton, aes(child, ..density..), fill="red", color="darkred")+
xlab("H4")
# Replacing Histogram with FreqPoly Graph
h5 <- ggplot()+
geom_freqpoly(data=galton, aes(parent, ..density..), fill="blue", color="darkblue")+
geom_freqpoly(data=galton, aes(child, ..density..), fill="red", color="darkred")+
xlab("H5")
# Displaying all of the Plots Together
plot_grid(h, h1, h2, h3, h4, h5)
#Binwidth=1
g <- ggplot(galton, aes(child))
g <- g + geom_histogram(fill="maroon",
color="coral", binwidth = 1)
g <- g + xlab("G, Binwidth = 1")
### Binwidth=2
g1 <- ggplot(galton, aes(child))
g1 <- g1 + geom_histogram(fill="maroon",
color="coral", binwidth = 2)
g1 <- g1 + xlab("G1, Binwidth = 2")
### Binwidth=3
g2 <- ggplot(galton, aes(child))
g2 <- g2 + geom_histogram(fill="maroon",
color="coral", binwidth = 3)
g2 <- g2 + xlab("G2, Binwidth = 3")
### Binwidth=4
g3 <- ggplot(galton, aes(child))
g3 <- g3 + geom_histogram(fill="maroon",
color="coral", binwidth = 4)
g3 <- g3 + xlab("G3, Binwidth = 4")
### Putting them together
plot_grid(g, g1, g2, g3,
labels = "Childs' Heights",
hjust = -0.4, vjust = 0.1)
library(reshape)
newdata <- data(iris)
n_newdata <- melt(iris)
k <- ggplot(n_newdata, aes(x=value, fill=variable))
k <- k + geom_histogram(colour="black", binwidth=1)
k <- k + facet_grid(.~variable)
k
We can use manipulate function to make our ggplot diagram interactive. The r-markdown files do not support the interactive nature. Please refer to the ggplot_plotly post in my portfolio.
# Bandwidth = Default
g4 <- ggplot(galton, aes(child))
g4 <- g4 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g4 <- g4 + geom_density(kernel="gaussian", aes(child))
g4 <- g4 + xlab("G4; Bandwidth = Default")
# Bandwidth = 1
g5 <- ggplot(galton, aes(child))
g5 <- g5 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g5 <- g5 + geom_density(kernel="gaussian", aes(child), bw = 1)
g5 <- g5 + xlab("G5; Bandwidth = 1")
# Bandwidth = 5
g6 <- ggplot(galton, aes(child))
g6 <- g6 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g6 <- g6 + geom_density(kernel="gaussian", aes(child), bw = 5)
g6 <- g6 + xlab("G6; Bandwidth = 5")
# Bandwidth = 10
g7 <- ggplot(galton, aes(child))
g7 <- g7 + geom_histogram(aes(child , ..density..), fill="white" , color="darkred")
g7 <- g7 + geom_density(kernel="gaussian", aes(child), bw = 10)+
xlab("G7; Bandwidth = 10")
# Displaying together
plot_grid(g4, g5, g6, g7,
labels = "Childs' Heights")
# Kernel Density
i <- ggplot(galton, aes(child), color = "darkblue")+
geom_density()+ geom_density(galton, mapping=aes(parent), color = "darkred")+ xlab("Kenral Density Plot")
# Cumulative Distributive Function
i1 <- ggplot(galton, aes(child), color = "darkblue")+
stat_ecdf()+ stat_ecdf(galton, mapping=aes(parent), color = "darkred")+ xlab("CDF Plot")
# Displaying together
plot_grid(i, i1)
# Line Plot
LP <- ggplot(galton, aes(x=child, y=parent)) + geom_line()
# Scatter Plot
SP <- ggplot(galton, aes(x=child, y=parent)) + geom_point()
# Zooming in on the Scatter Plot 10 to 20 in X axis and 25 to 50 in Y
Zoom <- SP + coord_cartesian(xlim=c(65,70), ylim=c(66,70))
# Bar Chart
my_pie <- ggplot(iris, aes(x=factor(1), fill=Species))+
geom_bar(width = 1)+
coord_polar("y")
# Bar Diagram
my_bar <- ggplot(iris, aes(x= Species,y=Petal.Length,fill=Species))+
geom_bar(stat="identity")
# Horizontal Bar
H_bar <- my_bar + coord_flip()
# Displaying Plots Together
plot_grid(LP, SP, Zoom, my_pie, my_bar, H_bar, labels = "AUTO", align = "h", label_size = 12, ncol = 2, label_colour = "darkblue", label_fontfamily = "serif")
# Default Q-Plot
qp <- qplot(Petal.Width, Sepal.Width, data = iris)
# Assigning Color
qp1 <- qplot(Petal.Width, Sepal.Width, colour = Species, data = iris)
# Drawing Fitted Lines
qp2 <- qplot(Petal.Width, Sepal.Width, colour = Species, data = iris)
qp2 <- qp2 + geom_smooth(method='lm', formula=y~x)
# Box Plot Default
bp1 <- qplot(Petal.Width, Species, data = iris, geom = c("boxplot"))
# Box Plot with Color
bp2 <- qplot(Petal.Width, Sepal.Width, data = iris, fill=Species, geom = c("boxplot"))
# Box Plot with Jitter
bp3 <- qplot(Petal.Width, Sepal.Width, data = iris, fill=Species, geom = c("boxplot", "jitter"))
# Displaying Plots Together
library(gridExtra)
grid.arrange(qp, qp1, qp2, bp1, bp2, bp3, ncol=2)
# Easy Q-Plot (with Density)
qplot(Petal.Width, colour = Species, data = iris, geom = "density")
# Pairwise Scatter Plot
require(stats)
require(graphics)
pairs(iris, panel=panel.smooth,
main = "Pairwise Scatter Plot",
col = 4 + (iris$Petal.Length > 2))
# Pairwise Analysis Using GGPLOT
require(GGally)
gp = ggpairs(iris, lower=list(continuous=wrap("smooth", method="lm")))
gp
# Simple Two Dimensional Scatter Plot
l <- ggplot(iris, aes(x=Sepal.Length, y = Sepal.Width)) + geom_point()
# Giving Colors by the Clusters
l1 <- ggplot(iris, aes(x=Sepal.Length, y = Sepal.Width, colour=Species)) + geom_point()
# Advanced Scatter Plot with Fitted Line
x <- galton$parent - mean(galton$parent)
y <- galton$child - mean(galton$child)
# Setting my data
freqdata <- as.data.frame(table(x,y))
names(freqdata) <- c("child", "parent", "freq")
freqdata$child <- as.numeric(as.character(freqdata$child))
freqdata$parent <- as.numeric(as.character(freqdata$parent))
# Creating a Plot
l2 <- ggplot(filter(freqdata, freq > 0), aes(x = child, y = parent))
l2 <- l2 + scale_size(range = c(2, 20), guide = "none")
l2 <- l2 + geom_point(colour = "grey50", aes(size = freq + 20, show_guide = FALSE))
## Warning: Ignoring unknown aesthetics: show_guide
l2 <- l2 + geom_point(aes(colour = freq, size = freq))
l2 <- l2 + scale_colour_gradient(low = "lightblue", high = "white")
l2
student_data <- read.csv("Fall 2019.csv")
final_data <- student_data[,c(3,6:11)]
colnames(final_data) <- c("video", "views_download", "minutes_watched","AMD", "%_complete", "view_time", "last_position")
final_data$view_time <- strptime(final_data$view_time, "%Y-%m-%d %H:%M:%S")
head(final_data)
## video views_download minutes_watched AMD
## 1 MELTS SKILL 7 PART 1 1 0.7722142 0.77221418
## 2 MELTS SKILL 7 PART 2 1 0.8200626 0.82006258
## 3 v1_micro_teach_coaching_1920x1080 4 0.2109331 0.05273327
## 4 v1_micro_teach_coaching_1920x1080 1 0.3079116 0.30791165
## 5 v1_micro_teach_coaching_1920x1080 4 0.1667102 0.04167754
## 6 V2 Badging 1 5.1299646 5.12996465
## %_complete view_time last_position
## 1 15 <NA> 480.339262
## 2 17 <NA> 200.175393
## 3 3 <NA> 2.897424
## 4 5 <NA> 18.474699
## 5 3 <NA> 9.684445
## 6 100 <NA> 308.712287
ggpairs(final_data, cardinality_threshold = 30, lower = list(continuous = wrap("smooth", method = "lm")))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.