Hola guys!
In this tutorial, you’ll learn new set of skills that called as ggplot2 . The ggplot2 is the powerful tools to illustrate the graph in a new dimension persepective.
library(readxl)
data <- readxl::read_excel("Data.xlsx")
head(data)
## # A tibble: 6 × 20
## Station Hours period abundance fiber fragment Film Pellet Transparent Black
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 PTB 1 SB 6.67 6.67 0 0 0 5 1.67
## 2 PTB 2 SB 5 5 0 0 0 0 3.33
## 3 PTB 3 SB 3.33 3.33 0 0 0 1.67 1.67
## 4 PTB 4 SB 5 5 0 0 0 1.67 0
## 5 PTB 5 SB 8.33 8.33 0 0 0 3.33 3.33
## 6 PTB 6 SB 0 0 0 0 0 0 0
## # ℹ 10 more variables: Red <dbl>, Blue <dbl>, Green <dbl>, Orange <dbl>,
## # Yellow <dbl>, Purple <dbl>, `<0.3` <dbl>, `0.3 - 0.5` <dbl>,
## # `0.5 - 1.0` <dbl>, `1.0 - 5.0` <dbl>
summary(data)
## Station Hours period abundance
## Length :96 Min. : 1.00 Length :96 Min. : 0.000
## N.unique : 4 1st Qu.: 6.75 N.unique : 2 1st Qu.: 3.330
## N.blank : 0 Median :12.50 N.blank : 0 Median : 6.670
## Min.nchar: 3 Mean :12.50 Min.nchar: 2 Mean : 7.587
## Max.nchar: 3 3rd Qu.:18.25 Max.nchar: 2 3rd Qu.:10.000
## Max. :24.00 Max. :25.000
## fiber fragment Film Pellet
## Min. : 0.000 Min. : 0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 1.670 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 4.165 Median : 0.000 Median :0.0000 Median :0.0000
## Mean : 4.410 Mean : 2.518 Mean :0.6427 Mean :0.0174
## 3rd Qu.: 6.670 3rd Qu.: 1.670 3rd Qu.:1.6700 3rd Qu.:0.0000
## Max. :15.000 Max. :20.000 Max. :6.6700 Max. :1.6700
## Transparent Black Red Blue
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.: 1.252 1st Qu.:0.000 1st Qu.:0.0000
## Median : 1.670 Median : 1.670 Median :0.000 Median :0.0000
## Mean : 3.247 Mean : 2.830 Mean :0.608 Mean :0.5734
## 3rd Qu.: 5.000 3rd Qu.: 3.748 3rd Qu.:1.670 3rd Qu.:1.6700
## Max. :13.330 Max. :11.670 Max. :3.330 Max. :3.3300
## Green Orange Yellow Purple
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.0174 Mean :0.0174 Mean :0.2609 Mean :0.03479
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.6700 Max. :1.6700 Max. :1.6700 Max. :1.67000
## <0.3 0.3 - 0.5 0.5 - 1.0 1.0 - 5.0
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 1.670 Median : 1.670
## Mean : 1.337 Mean : 1.719 Mean : 2.153 Mean : 2.379
## 3rd Qu.: 1.670 3rd Qu.: 3.330 3rd Qu.: 3.330 3rd Qu.: 3.330
## Max. :13.330 Max. :10.000 Max. :10.000 Max. :13.330
str(data)
## tibble [96 × 20] (S3: tbl_df/tbl/data.frame)
## $ Station : chr [1:96] "PTB" "PTB" "PTB" "PTB" ...
## $ Hours : num [1:96] 1 2 3 4 5 6 13 14 15 16 ...
## $ period : chr [1:96] "SB" "SB" "SB" "SB" ...
## $ abundance : num [1:96] 6.67 5 3.33 5 8.33 0 10 8.33 5 8.33 ...
## $ fiber : num [1:96] 6.67 5 3.33 5 8.33 0 8.33 8.33 5 5 ...
## $ fragment : num [1:96] 0 0 0 0 0 0 0 0 0 1.67 ...
## $ Film : num [1:96] 0 0 0 0 0 0 1.67 0 0 1.67 ...
## $ Pellet : num [1:96] 0 0 0 0 0 0 0 0 0 0 ...
## $ Transparent: num [1:96] 5 0 1.67 1.67 3.33 0 0 1.67 3.33 1.67 ...
## $ Black : num [1:96] 1.67 3.33 1.67 0 3.33 0 5 3.33 1.67 5 ...
## $ Red : num [1:96] 0 0 0 1.67 0 0 1.67 0 0 0 ...
## $ Blue : num [1:96] 0 1.67 0 1.67 1.67 0 3.33 1.67 0 1.67 ...
## $ Green : num [1:96] 0 0 0 0 0 0 0 0 0 0 ...
## $ Orange : num [1:96] 0 0 0 0 0 0 0 0 0 0 ...
## $ Yellow : num [1:96] 0 0 0 0 0 0 0 1.67 0 0 ...
## $ Purple : num [1:96] 0 0 0 0 0 0 0 0 0 0 ...
## $ <0.3 : num [1:96] 0 0 0 0 0 0 1.67 0 0 1.67 ...
## $ 0.3 - 0.5 : num [1:96] 0 1.67 0 0 1.67 0 1.67 0 0 1.67 ...
## $ 0.5 - 1.0 : num [1:96] 3.33 1.67 0 1.67 5 0 5 3.33 0 1.67 ...
## $ 1.0 - 5.0 : num [1:96] 3.33 1.67 3.33 3.33 1.67 0 1.67 5 5 3.33 ...
dim(data)
## [1] 96 20
nrow(data)
## [1] 96
ncol(data)
## [1] 20
length(data)
## [1] 20
names(data)
## [1] "Station" "Hours" "period" "abundance" "fiber"
## [6] "fragment" "Film" "Pellet" "Transparent" "Black"
## [11] "Red" "Blue" "Green" "Orange" "Yellow"
## [16] "Purple" "<0.3" "0.3 - 0.5" "0.5 - 1.0" "1.0 - 5.0"
Did you know that ggplot2 only works with long format data. Mostly we used wide format data with all data will be filled under specific column. But ‘ggplot’ cant perform that way. So, we need to transform first. To do that, we are using ‘tidyverse’ package.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data_long <- data %>% pivot_longer(cols = c(abundance, fiber, fragment, Film, Pellet, Transparent, Black, Red, Blue, Green, Orange, Yellow, Purple, "<0.3", "0.3 - 0.5", "0.5 - 1.0", "1.0 - 5.0"), names_to = "Parameter", values_to = "Concentration")
names(data_long)
## [1] "Station" "Hours" "period" "Parameter"
## [5] "Concentration"
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station))
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot()
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) +geom_violin()
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) +geom_bar(position = "stack", stat = "identity")
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 21)
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 15)
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 21, outlier.fill = "purple")
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 21, outlier.fill = "purple", outlier.size = 5)
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 21, outlier.fill = "purple", outlier.size = 5, outlier.alpha = 0.3)
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 21, outlier.fill = "purple", outlier.size = 5, outlier.alpha = 0.3) + facet_wrap(~Parameter)
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 21, outlier.fill = "purple", outlier.size = 3, outlier.alpha = 0.3) + facet_wrap(~Parameter)
ggplot(data_long, aes(x=Station, y = Concentration, fill = Station)) + geom_boxplot(outlier.shape = 21, outlier.fill = "purple", outlier.size = 3, outlier.alpha = 0.3) + facet_wrap(~Parameter, scales = "free_y")