library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data("mpg")
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa~
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa~
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa~
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa~
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa~
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa~
summary(mpg)
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class
## Min. :12.00 Length:234 Length:234
## 1st Qu.:18.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :23.44
## 3rd Qu.:27.00
## Max. :44.00
table(mpg$cyl)
##
## 4 5 6 8
## 81 4 79 70
mpg <- filter(mpg,cyl !=5)
table(mpg$cyl)
##
## 4 6 8
## 81 79 70
str(mpg)
## tibble [230 x 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:230] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:230] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:230] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:230] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:230] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:230] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:230] "f" "f" "f" "f" ...
## $ cty : int [1:230] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:230] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:230] "p" "p" "p" "p" ...
## $ class : chr [1:230] "compact" "compact" "compact" "compact" ...
mpg$cyl <- as.factor(mpg$cyl)
class(mpg$cyl)
## [1] "factor"
plot(data = mpg, hwy~displ, col = cyl, main = "Highway Mileage vs. Engine Displacement and Engine Cycle")
ggplot(data = mpg, aes(x=displ, y=hwy))+
geom_point(size = 2, aes(color = cyl)) +
labs(title ="Highway Mileage vs.Engine Displacement and Engine Cycle")
ggplot(data = mpg, aes(x=displ, y=hwy))+
geom_point(size = 3, aes(shape=cyl), color = "green") +
labs(title ="Highway Mileage vs.Engine Displacement and Engine Cycle")
ggplot(mpg)+ geom_point(aes(cty,hwy, color = cyl))
https://r-graph-gallery.com/220-basic-ggplot2-histogram.html
In geom_histogram(), the fill element fills color in bars
ggplot(mpg, aes(x= cty)) +
geom_histogram(binwidth=1, fill="#69b3a2", color="#e9ecef", alpha=0.9)
data(diamonds)
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
ggplot(data=diamonds, aes(x= carat, y = price))+ geom_point(aes(color=color))
diamonds %>%
count(color, cut)
## # A tibble: 35 x 3
## color cut n
## <ord> <ord> <int>
## 1 D Fair 163
## 2 D Good 662
## 3 D Very Good 1513
## 4 D Premium 1603
## 5 D Ideal 2834
## 6 E Fair 224
## 7 E Good 933
## 8 E Very Good 2400
## 9 E Premium 2337
## 10 E Ideal 3903
## # ... with 25 more rows
Heat map in ggplot2
diamonds %>%
count(color,cut) %>%
ggplot(aes(x= cut, y=color))+
geom_tile(aes(fill=n), color = "grey", lwd = 0.5) +
#scale_fill_gradient(low = "white", high = "blue")
scale_fill_distiller(palette = "YlGnBu", direction = 1)
data("economics")
head(economics)
## # A tibble: 6 x 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944
## 2 1967-08-01 510. 198911 12.6 4.7 2945
## 3 1967-09-01 516. 199113 11.9 4.6 2958
## 4 1967-10-01 512. 199311 12.9 4.9 3143
## 5 1967-11-01 517. 199498 12.8 4.7 3066
## 6 1967-12-01 525. 199657 11.8 4.8 3018
ggplot(economics, aes(date,unemploy)) + geom_line(color = "blue")