library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

mpg data

data("mpg")

Format of mpg data set: Data frame with 234 rows and 11 variables

  1. manufacturer
  2. model ⇨ model name
  3. displ ⇨engine displacement, in litres or size of engine
  4. year ⇨ year of manufacture
  5. cyl ⇨ number of cylinders
  6. trans ⇨ type of transmission
  7. drv ⇨ f = front-wheel drive, r = rear wheel drive, 4 = 4 wheel drive
  8. cty ⇨ city miles per gallon
  9. hwy ⇨ highway miles per gallon or efficiency
  10. fl ⇨ fuel type
  11. class ⇨ “type” of car
head(mpg)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa~
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa~
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa~
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa~
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa~
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa~
summary(mpg)
##  manufacturer          model               displ            year     
##  Length:234         Length:234         Min.   :1.600   Min.   :1999  
##  Class :character   Class :character   1st Qu.:2.400   1st Qu.:1999  
##  Mode  :character   Mode  :character   Median :3.300   Median :2004  
##                                        Mean   :3.472   Mean   :2004  
##                                        3rd Qu.:4.600   3rd Qu.:2008  
##                                        Max.   :7.000   Max.   :2008  
##       cyl           trans               drv                 cty       
##  Min.   :4.000   Length:234         Length:234         Min.   : 9.00  
##  1st Qu.:4.000   Class :character   Class :character   1st Qu.:14.00  
##  Median :6.000   Mode  :character   Mode  :character   Median :17.00  
##  Mean   :5.889                                         Mean   :16.86  
##  3rd Qu.:8.000                                         3rd Qu.:19.00  
##  Max.   :8.000                                         Max.   :35.00  
##       hwy             fl               class          
##  Min.   :12.00   Length:234         Length:234        
##  1st Qu.:18.00   Class :character   Class :character  
##  Median :24.00   Mode  :character   Mode  :character  
##  Mean   :23.44                                        
##  3rd Qu.:27.00                                        
##  Max.   :44.00
table(mpg$cyl)
## 
##  4  5  6  8 
## 81  4 79 70
mpg <- filter(mpg,cyl !=5)
table(mpg$cyl)
## 
##  4  6  8 
## 81 79 70
str(mpg)
## tibble [230 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:230] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:230] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:230] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:230] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:230] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:230] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:230] "f" "f" "f" "f" ...
##  $ cty         : int [1:230] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:230] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:230] "p" "p" "p" "p" ...
##  $ class       : chr [1:230] "compact" "compact" "compact" "compact" ...
mpg$cyl <- as.factor(mpg$cyl)
class(mpg$cyl)
## [1] "factor"
plot(data = mpg, hwy~displ, col = cyl, main = "Highway Mileage vs. Engine Displacement and Engine Cycle")

You can put aes(x = x, y = y, color = color) element in ggplot() or geom_point()

ggplot(data = mpg, aes(x=displ, y=hwy))+
  geom_point(size = 2, aes(color = cyl)) +
  labs(title ="Highway Mileage vs.Engine Displacement and Engine Cycle") 

Color and shape elements should be in the aes() so that it represents the third variable that you want to express.

ggplot(data = mpg, aes(x=displ, y=hwy))+
  geom_point(size = 3, aes(shape=cyl), color = "green") +
  labs(title ="Highway Mileage vs.Engine Displacement and Engine Cycle") 

ggplot(mpg)+ geom_point(aes(cty,hwy, color = cyl))

ggplot(mpg, aes(x= cty)) + 
  geom_histogram(binwidth=1, fill="#69b3a2", color="#e9ecef", alpha=0.9)

diamonds data

data(diamonds)
head(diamonds)
## # A tibble: 6 x 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
ggplot(data=diamonds, aes(x= carat, y = price))+ geom_point(aes(color=color))

diamonds %>%
  count(color, cut)
## # A tibble: 35 x 3
##    color cut           n
##    <ord> <ord>     <int>
##  1 D     Fair        163
##  2 D     Good        662
##  3 D     Very Good  1513
##  4 D     Premium    1603
##  5 D     Ideal      2834
##  6 E     Fair        224
##  7 E     Good        933
##  8 E     Very Good  2400
##  9 E     Premium    2337
## 10 E     Ideal      3903
## # ... with 25 more rows

Heat map in ggplot2

diamonds %>%
  count(color,cut) %>%
  ggplot(aes(x= cut, y=color))+
  geom_tile(aes(fill=n), color = "grey", lwd = 0.5) +
  #scale_fill_gradient(low = "white", high = "blue")
  scale_fill_distiller(palette = "YlGnBu", direction = 1)      

economics data

data("economics")
head(economics)
## # A tibble: 6 x 6
##   date         pce    pop psavert uempmed unemploy
##   <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 1967-07-01  507. 198712    12.6     4.5     2944
## 2 1967-08-01  510. 198911    12.6     4.7     2945
## 3 1967-09-01  516. 199113    11.9     4.6     2958
## 4 1967-10-01  512. 199311    12.9     4.9     3143
## 5 1967-11-01  517. 199498    12.8     4.7     3066
## 6 1967-12-01  525. 199657    11.8     4.8     3018
ggplot(economics, aes(date,unemploy)) + geom_line(color = "blue")