ggplot2 Extensions v1.01

ggplot2 Extensions
- ggpmisc
- lindia
- sugrrants

ggplot2 Extensions

ggpmisc

library(ggpmisc)
library(data.table)
library(dplyr)
library(DT)
library(corrplot)
library(Hmisc)
library(rpart)
library(nlme)
library(ggpmisc)
library(ggrepel)
library(xts)
library(lubridate)
library(tibble)
library(dplyr)

library(ggpmisc)
library(data.table)
library(dplyr)
library(DT)
library(corrplot)
library(Hmisc)
library(rpart)
library(nlme)
library(ggpmisc)
library(ggrepel)
library(xts)
library(lubridate)
library(tibble)
library(dplyr)
old_theme <- theme_set(theme_bw())

tb <- mpg %>%
  group_by(cyl) %>%
  summarise(hwy = median(hwy), cty = median(cty))
data.tb <- tibble(x = 7, y = 44, tb = list(tb))
ggplot(mpg, aes(displ, hwy, colour = factor(cyl))) +
  geom_table(data = data.tb, aes(x, y, label = tb)) +
  geom_point()

tb.pm <- tibble(Parameter = c("frac(beta[1], a^2)", "frac(beta[2], a^3)"),
                Value = c("10^2.4", "10^3.532"))
data.tb <- tibble(x = 7, y = 44, tb = list(tb.pm))
ggplot(mpg, aes(displ, cty)) +
  geom_point() +
  geom_table(data = data.tb, aes(x, y, label = tb), parse = TRUE) +
  theme_bw()

set.seed(4321)
# generate artificial data
x <- -99:100
y <- x + rnorm(length(x), mean = 0, sd = abs(x))
my.data <- data.frame(x, 
                      y, 
                      group = c("A", "B"))

datatable(my.data)

ggplot(my.data, aes(x, y, colour = group)) +
  geom_quadrant_lines() +
  stat_quadrant_counts(geom = "label_npc") +
  geom_point() +
  expand_limits(y = c(-260, 260)) +
  facet_wrap(~group)

datatable(mpg)

p <- ggplot(mpg, aes(factor(cyl), hwy, fill = factor(cyl))) +
  stat_summary(geom = "col", fun.y = mean, width = 2/3) +
  labs(x = "Number of cylinders", y = NULL, title = "Means") +
  scale_fill_discrete(guide = FALSE)

data.tb <- tibble(x = 7, y = 44, 
                  plot = list(p +
                                theme_bw(8)))

ggplot(mpg, aes(displ, hwy, colour = factor(cyl))) +
  geom_plot(data = data.tb, aes(x, y, label = plot)) +
  geom_point() +
  labs(x = "Engine displacement (l)", y = "Fuel use efficiency (MPG)",
       colour = "Engine cylinders\n(number)") +
  theme_bw()

set.seed(4321)
# generate artificial data
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x, 
                      y, 
                      group = c("A", "B"), 
                      y2 = y * c(0.5,2),
                      block = c("a", "a", "b", "b"),
                      wt = sqrt(x))
datatable(my.data)

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, fill = block)) +
  geom_point(shape = 21, size = 3) +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..rr.label..), size = 3,
               geom = "label_npc", alpha = 0.33,
               formula = formula, parse = TRUE) +
  facet_wrap(~group, scales = "free_y")

formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_fit_tb(method = "lm",
              method.args = list(formula = formula),
              tb.vars = c(Parameter = "term", 
                          Estimate = "estimate", 
                          "s.e." = "std.error", 
                          "italic(t)" = "statistic", 
                          "italic(P)" = "p.value"),
              label.y = "top", label.x = "left",
              parse = TRUE)

datatable(chickwts)

ggplot(chickwts, aes(factor(feed), weight)) +
  stat_summary(fun.data = "mean_se") +
  stat_fit_tb(tb.type = "fit.anova",
              label.x = "center",
              label.y = "bottom") +
  expand_limits(y = 0)

random_string <- function(len = 6) {
paste(sample(letters, len, replace = TRUE), collapse = "")
}

# Make random data.
set.seed(1001)
d <- tibble::tibble(
  x = rnorm(100),
  y = rnorm(100),
  group = rep(c("A", "B"), c(50, 50)),
  lab = replicate(100, { random_string() })
)

datatable(d)

ggplot(data = d, aes(x, y, label = lab, color = group)) +
  geom_point() +
  stat_dens2d_labels(geom = "label_repel", 
                     keep.fraction = 0.35, 
                     alpha = 0.8,
                     label.fill = NA)

ggplot(data = d, aes(x, y, label = lab, color = group)) +
  geom_point() +
  stat_dens2d_labels(geom = "label_repel", 
                     keep.fraction = 0.6, 
                     alpha = 0.9,
                     label.fill = NA)

lindia

library(lindia)
library(MASS)
data(Cars93)
datatable(Cars93)

# create linear model
cars_lm <- lm(Price ~ Passengers + Length + RPM, data = Cars93)
plots <- gg_diagnose(cars_lm, plot.all = FALSE)
names(plots)

## [1] "residual_hist" "Passengers"    "Length"        "RPM"          
## [5] "res_fitted"    "qqplot"        "scalelocation" "resleverage"  
## [9] "cooksd"

#  [1] "residual_hist" "Passengers"    "Length"        "RPM"          
#  [5] "res_fitted"    "qqplot"        "scalelocation" "resleverage"  
#  [9] "cooksd"
exclude_plots <- plots[-c(1, 3)]    # exclude certain diagnostics plots
include_plots <- plots[c(1, 3)]      # include certain diagnostics plots
plot_all(exclude_plots)

cars_lm_2 <- lm(Price ~ Passengers + Length + RPM + Origin, data = Cars93)

gg_diagnose(cars_lm_2)

gg_diagnose(cars_lm_2, theme = theme_bw())

sugrrants

library(tidyr)
library(dplyr)
library(viridis)
library(sugrrants)
##library(tsibble)
library(ggplot2)

library(sugrrants)
datatable(pedestrian)

names(pedestrian)

##  [1] "Date_Time"     "Date"          "Year"          "Month"        
##  [5] "Mdate"         "Day"           "Time"          "Sensor_ID"    
##  [9] "Sensor_Name"   "Hourly_Counts"

pedestrian17 <- pedestrian
pedestrian17

## # A tibble: 78,755 x 10
##    Date_Time           Date        Year Month Mdate Day    Time Sensor_ID
##    <dttm>              <date>     <int> <ord> <int> <ord> <int>     <int>
##  1 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0        18
##  2 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0        13
##  3 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0         3
##  4 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0         9
##  5 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0         6
##  6 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0        25
##  7 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0        30
##  8 2016-01-01 01:00:00 2016-01-01  2016 Janu~     1 Frid~     1        18
##  9 2016-01-01 01:00:00 2016-01-01  2016 Janu~     1 Frid~     1        13
## 10 2016-01-01 01:00:00 2016-01-01  2016 Janu~     1 Frid~     1         3
## # ... with 78,745 more rows, and 2 more variables: Sensor_Name <chr>,
## #   Hourly_Counts <int>

pedestrian17$Sensor_ID= as.factor(pedestrian17$Sensor_ID)
centre <- subset(pedestrian17, Sensor_Name == "Melbourne Central")
centre_calendar <- centre %>%
  frame_calendar(x = Time, y = Hourly_Counts, date = Date, calendar = "monthly")
centre_calendar

## # A tibble: 8,880 x 12
##    Date_Time           Date        Year Month Mdate Day    Time Sensor_ID
##    <dttm>              <date>     <int> <ord> <int> <ord> <int> <fct>    
##  1 2016-01-01 00:00:00 2016-01-01  2016 Janu~     1 Frid~     0 3        
##  2 2016-01-01 01:00:00 2016-01-01  2016 Janu~     1 Frid~     1 3        
##  3 2016-01-01 02:00:00 2016-01-01  2016 Janu~     1 Frid~     2 3        
##  4 2016-01-01 03:00:00 2016-01-01  2016 Janu~     1 Frid~     3 3        
##  5 2016-01-01 04:00:00 2016-01-01  2016 Janu~     1 Frid~     4 3        
##  6 2016-01-01 05:00:00 2016-01-01  2016 Janu~     1 Frid~     5 3        
##  7 2016-01-01 06:00:00 2016-01-01  2016 Janu~     1 Frid~     6 3        
##  8 2016-01-01 07:00:00 2016-01-01  2016 Janu~     1 Frid~     7 3        
##  9 2016-01-01 08:00:00 2016-01-01  2016 Janu~     1 Frid~     8 3        
## 10 2016-01-01 09:00:00 2016-01-01  2016 Janu~     1 Frid~     9 3        
## # ... with 8,870 more rows, and 4 more variables: Sensor_Name <chr>,
## #   Hourly_Counts <int>, .Time <dbl>, .Hourly_Counts <dbl>

p1 <- centre_calendar %>% 
  ggplot(aes(x = .Time, y = .Hourly_Counts, group = Date)) +
  geom_line()
p1

prettify(p1)+theme_bw()

two_sensors <- c("Lonsdale St (South)", "Melbourne Convention Exhibition Centre")
two_sensors_df <- subset(pedestrian17, Sensor_Name == "Lonsdale St (South)" |
                           Sensor_Name =="Melbourne Convention Exhibition Centre")

two_sensors_calendar <- two_sensors_df %>%
  frame_calendar(x = Time, y = Hourly_Counts, date = Date, ncol = 4)
p4 <- ggplot(two_sensors_calendar) +
  geom_line(
    data = subset(two_sensors_calendar, Sensor_Name == "Lonsdale St (South)" ),
    aes(.Time, .Hourly_Counts, group = Date), colour = "#1b9e77"
  ) +
  geom_line(
    data = subset(two_sensors_calendar, Sensor_Name =="Melbourne Convention Exhibition Centre" ),
    aes(.Time, .Hourly_Counts, group = Date), colour = "#d95f02"
  )
prettify(p4)

library(dplyr, warn.conflicts = FALSE)
# compute the calendar layout for the data frame
calendar_df <- pedestrian %>%
  filter(Sensor_ID == 13, Year == 2016) %>%
  frame_calendar(x = Time, y = Hourly_Counts, date = Date, nrow = 4)

# ggplot
p1 <- calendar_df %>%
  ggplot(aes(x = .Time, y = .Hourly_Counts, group = Date)) +
  geom_line()+theme_bw()
prettify(p1, size = 3, label.padding = unit(0.15, "lines"))