library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v forcats   1.0.0     v stringr   1.5.0
## v lubridate 1.9.2     v tibble    3.2.1
## v purrr     1.0.1     v tidyr     1.3.0
## v readr     2.1.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## i Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
#summary(select(diamonds, x,y,z))
# distribution of x
ggplot(diamonds) + 
  geom_histogram(mapping = aes(x = x), binwidth = 0.1)

# distribution of y
ggplot(diamonds) + 
  geom_histogram(mapping = aes(x = y), binwidth = 0.1)

# distribution of price
ggplot(diamonds) + 
  geom_histogram(mapping = aes(x = z), binwidth = 0.1)

# distribution of price
ggplot(diamonds) + 
  geom_histogram(mapping = aes(x = price), binwidth = 500)

#summary stats
#summary (select(diamonds, price))
#0.9 carat vs 1 carat
diamonds %>%
  filter(carat >= 0.99, carat <= 1) %>%
  count(carat)
## # A tibble: 2 x 2
##   carat     n
##   <dbl> <int>
## 1  0.99    23
## 2  1     1558
ggplot(diamonds) +
  geom_histogram(mapping = aes(x = price)) +
  coord_cartesian(xlim = c(100, 5000), ylim = c(0, 1500))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds) +
  geom_histogram(mapping = aes(x = price)) +
  xlim(100, 5000) +
  ylim(0, 1500)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 14714 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 10 rows containing missing values (`geom_bar()`).

diamond2 <- diamonds %>%
  mutate(z = ifelse(z<3 | z>5, NA, z))

ggplot(diamond2, aes(x=z)) +
  geom_histogram() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 18041 rows containing non-finite values (`stat_bin()`).

diamond2 %>%
  mutate(cut = if_else(runif(n()) <0.1, NA, as.character(cut))) %>%
  ggplot() +
  geom_bar(mapping = aes(x = cut)) 

mean(c(1,2,3,NA), na.rm = TRUE)
## [1] 2
sum(c(1,2,3,NA), na.rm = TRUE)
## [1] 6
ggplot(diamonds, aes(x = carat, y = price)) +
  geom_point()

ggplot(data = diamonds) +
  geom_boxplot(mapping = aes(x = clarity, y = price))

ggplot(data = diamonds) +
  geom_boxplot(mapping = aes(x = cut, y = price))

ggplot(data = diamonds) +
  geom_boxplot(mapping = aes(x = color, y = price))

library(ggstance)
## Warning: package 'ggstance' was built under R version 4.2.3
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh
ggplot(data = mpg) +
  geom_boxplot(mapping = aes(x = reorder(class, hwy, FUN = median), y = hwy)) +
  coord_flip()

ggplot(data = mpg) +
  geom_boxplot(mapping = aes(y = reorder(class, hwy, FUN = median), x = hwy), orientation = "y")

library(lvplot)
## Warning: package 'lvplot' was built under R version 4.2.3
p <- ggplot(diamonds, aes(cut, carat, fill = ..LV..))
p + geom_lv()
## Warning: The dot-dot notation (`..LV..`) was deprecated in ggplot2 3.4.0.
## i Please use `after_stat(LV)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

diamonds %>%
  ggplot(aes(cut, price)) +
  geom_violin()

diamonds %>%
  ggplot(aes(price)) +
  geom_histogram() +
  facet_wrap(~ cut, scale = "free_y", nrow = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

diamonds %>%
  ggplot(aes(price)) +
  geom_freqpoly(aes(colour = cut))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#There are two methods, geom_quasirandom() and geom_beeswarm(). The first one produce a plot between violin and jitter introducing a random variable that set the point location, the second one produce plot similar to violin but offsets the points
diamonds %>%
  count(color, cut) %>%
  group_by(color) %>%
  mutate(perc = n / sum(n)) %>%
  ggplot(aes(color, cut, fill = perc)) +
  geom_tile()

diamonds %>%
  count(color, cut) %>%
  group_by(cut) %>%
  mutate(prop = n / sum(n)) %>%
  ggplot(mapping = aes(x = color, y = cut)) +
  geom_tile(mapping = aes(fill = prop))

flights <- nycflights13::flights

flights %>%
  group_by(month, dest) %>%
  summarise(dep_delay = mean(dep_delay, na.rm = TRUE)) %>%
  ggplot(aes(x = factor(month), y = dest, fill = dep_delay)) +
  geom_tile() +
  labs(x = "Month", y = "Destination", fill = "Departure Delay")
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.

ggplot(
  data = diamonds,
  mapping = aes(color = cut_number(carat, 8), x = price)
) +
  geom_freqpoly() +
  labs(x = "Price", y = "Count", color = "Carat")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#method1
diamonds %>%
  filter(between(carat, 0, 2.5)) %>%
  mutate(carat = cut_width(carat, 1)) %>%
  ggplot(aes(cut, price)) +
  geom_boxplot() +
  scale_y_log10() +
  facet_wrap(~ carat)

#method2
ggplot(diamonds, aes(x = cut_number(carat, 8), y = price, colour = cut)) +
  geom_boxplot()

ggplot(data = diamonds) +
  geom_point(mapping = aes(x = x, y = y)) +
  coord_cartesian(xlim = c(4, 11), ylim = c(4, 11))