library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v forcats 1.0.0 v stringr 1.5.0
## v lubridate 1.9.2 v tibble 3.2.1
## v purrr 1.0.1 v tidyr 1.3.0
## v readr 2.1.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
#summary(select(diamonds, x,y,z))
# distribution of x
ggplot(diamonds) +
geom_histogram(mapping = aes(x = x), binwidth = 0.1)

# distribution of y
ggplot(diamonds) +
geom_histogram(mapping = aes(x = y), binwidth = 0.1)

# distribution of price
ggplot(diamonds) +
geom_histogram(mapping = aes(x = z), binwidth = 0.1)

# distribution of price
ggplot(diamonds) +
geom_histogram(mapping = aes(x = price), binwidth = 500)

#summary stats
#summary (select(diamonds, price))
#0.9 carat vs 1 carat
diamonds %>%
filter(carat >= 0.99, carat <= 1) %>%
count(carat)
## # A tibble: 2 x 2
## carat n
## <dbl> <int>
## 1 0.99 23
## 2 1 1558
ggplot(diamonds) +
geom_histogram(mapping = aes(x = price)) +
coord_cartesian(xlim = c(100, 5000), ylim = c(0, 1500))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds) +
geom_histogram(mapping = aes(x = price)) +
xlim(100, 5000) +
ylim(0, 1500)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 14714 rows containing non-finite values (`stat_bin()`).
## Warning: Removed 10 rows containing missing values (`geom_bar()`).

diamond2 <- diamonds %>%
mutate(z = ifelse(z<3 | z>5, NA, z))
ggplot(diamond2, aes(x=z)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 18041 rows containing non-finite values (`stat_bin()`).

diamond2 %>%
mutate(cut = if_else(runif(n()) <0.1, NA, as.character(cut))) %>%
ggplot() +
geom_bar(mapping = aes(x = cut))

mean(c(1,2,3,NA), na.rm = TRUE)
## [1] 2
sum(c(1,2,3,NA), na.rm = TRUE)
## [1] 6
ggplot(diamonds, aes(x = carat, y = price)) +
geom_point()

ggplot(data = diamonds) +
geom_boxplot(mapping = aes(x = clarity, y = price))

ggplot(data = diamonds) +
geom_boxplot(mapping = aes(x = cut, y = price))

ggplot(data = diamonds) +
geom_boxplot(mapping = aes(x = color, y = price))

library(ggstance)
## Warning: package 'ggstance' was built under R version 4.2.3
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
ggplot(data = mpg) +
geom_boxplot(mapping = aes(x = reorder(class, hwy, FUN = median), y = hwy)) +
coord_flip()

ggplot(data = mpg) +
geom_boxplot(mapping = aes(y = reorder(class, hwy, FUN = median), x = hwy), orientation = "y")

library(lvplot)
## Warning: package 'lvplot' was built under R version 4.2.3
p <- ggplot(diamonds, aes(cut, carat, fill = ..LV..))
p + geom_lv()
## Warning: The dot-dot notation (`..LV..`) was deprecated in ggplot2 3.4.0.
## i Please use `after_stat(LV)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

diamonds %>%
ggplot(aes(cut, price)) +
geom_violin()

diamonds %>%
ggplot(aes(price)) +
geom_histogram() +
facet_wrap(~ cut, scale = "free_y", nrow = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

diamonds %>%
ggplot(aes(price)) +
geom_freqpoly(aes(colour = cut))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#There are two methods, geom_quasirandom() and geom_beeswarm(). The first one produce a plot between violin and jitter introducing a random variable that set the point location, the second one produce plot similar to violin but offsets the points
diamonds %>%
count(color, cut) %>%
group_by(color) %>%
mutate(perc = n / sum(n)) %>%
ggplot(aes(color, cut, fill = perc)) +
geom_tile()

diamonds %>%
count(color, cut) %>%
group_by(cut) %>%
mutate(prop = n / sum(n)) %>%
ggplot(mapping = aes(x = color, y = cut)) +
geom_tile(mapping = aes(fill = prop))

flights <- nycflights13::flights
flights %>%
group_by(month, dest) %>%
summarise(dep_delay = mean(dep_delay, na.rm = TRUE)) %>%
ggplot(aes(x = factor(month), y = dest, fill = dep_delay)) +
geom_tile() +
labs(x = "Month", y = "Destination", fill = "Departure Delay")
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.

ggplot(
data = diamonds,
mapping = aes(color = cut_number(carat, 8), x = price)
) +
geom_freqpoly() +
labs(x = "Price", y = "Count", color = "Carat")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#method1
diamonds %>%
filter(between(carat, 0, 2.5)) %>%
mutate(carat = cut_width(carat, 1)) %>%
ggplot(aes(cut, price)) +
geom_boxplot() +
scale_y_log10() +
facet_wrap(~ carat)

#method2
ggplot(diamonds, aes(x = cut_number(carat, 8), y = price, colour = cut)) +
geom_boxplot()

ggplot(data = diamonds) +
geom_point(mapping = aes(x = x, y = y)) +
coord_cartesian(xlim = c(4, 11), ylim = c(4, 11))
