Tutorial followed and modified from: https://brunomioto.com/posts/intro_ggplot2/en/#the-data

Packages required for this tutorial

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(ggpath)                     
## Warning: package 'ggpath' was built under R version 4.1.3
library(magick)
## Warning: package 'magick' was built under R version 4.1.3
## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11
library(pokemon)
## Warning: package 'pokemon' was built under R version 4.1.3

Pokemon data from the “pokemon” package

#load dataset into environment
pokemon.df=pokemon::pokemon # '=' or '<-' are assignment operators

Base R plot of pokemon weight and height

#base R plotting
plot(pokemon.df$weight, pokemon.df$height)

Label the axes.

plot(pokemon.df$weight, pokemon.df$height, 
     xlab ="kg", ylab = "m", main="pokemon heights and weights")

?plot() #use to see documentation or use help(plot)
## starting httpd help server ... done

Look at the data.

#glimpse the data
glimpse(pokemon.df) #notice the type of each column, any missing values
## Rows: 949
## Columns: 22
## $ id              <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,~
## $ pokemon         <chr> "bulbasaur", "ivysaur", "venusaur", "charmander", "cha~
## $ species_id      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,~
## $ height          <dbl> 0.7, 1.0, 2.0, 0.6, 1.1, 1.7, 0.5, 1.0, 1.6, 0.3, 0.7,~
## $ weight          <dbl> 6.9, 13.0, 100.0, 8.5, 19.0, 90.5, 9.0, 22.5, 85.5, 2.~
## $ base_experience <dbl> 64, 142, 236, 62, 142, 240, 63, 142, 239, 39, 72, 178,~
## $ type_1          <chr> "grass", "grass", "grass", "fire", "fire", "fire", "wa~
## $ type_2          <chr> "poison", "poison", "poison", NA, NA, "flying", NA, NA~
## $ hp              <dbl> 45, 60, 80, 39, 58, 78, 44, 59, 79, 45, 50, 60, 40, 45~
## $ attack          <dbl> 49, 62, 82, 52, 64, 84, 48, 63, 83, 30, 20, 45, 35, 25~
## $ defense         <dbl> 49, 63, 83, 43, 58, 78, 65, 80, 100, 35, 55, 50, 30, 5~
## $ special_attack  <dbl> 65, 80, 100, 60, 80, 109, 50, 65, 85, 20, 25, 90, 20, ~
## $ special_defense <dbl> 65, 80, 100, 50, 65, 85, 64, 80, 105, 20, 25, 80, 20, ~
## $ speed           <dbl> 45, 60, 80, 65, 80, 100, 43, 58, 78, 45, 30, 70, 50, 3~
## $ color_1         <chr> "#78C850", "#78C850", "#78C850", "#F08030", "#F08030",~
## $ color_2         <chr> "#A040A0", "#A040A0", "#A040A0", NA, NA, "#A890F0", NA~
## $ color_f         <chr> "#81A763", "#81A763", "#81A763", NA, NA, "#DE835E", NA~
## $ egg_group_1     <chr> "monster", "monster", "monster", "monster", "monster",~
## $ egg_group_2     <chr> "plant", "plant", "plant", "dragon", "dragon", "dragon~
## $ url_icon        <chr> "//archives.bulbagarden.net/media/upload/7/7b/001MS6.p~
## $ generation_id   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ url_image       <chr> "https://raw.githubusercontent.com/HybridShivam/Pokemo~
#head of the data (first rows)
head(pokemon.df)
## # A tibble: 6 x 22
##      id pokemon    species_id height weight base_experience type_1 type_2    hp
##   <dbl> <chr>           <dbl>  <dbl>  <dbl>           <dbl> <chr>  <chr>  <dbl>
## 1     1 bulbasaur           1    0.7    6.9              64 grass  poison    45
## 2     2 ivysaur             2    1     13               142 grass  poison    60
## 3     3 venusaur            3    2    100               236 grass  poison    80
## 4     4 charmander          4    0.6    8.5              62 fire   <NA>      39
## 5     5 charmeleon          5    1.1   19               142 fire   <NA>      58
## 6     6 charizard           6    1.7   90.5             240 fire   flying    78
## # i 13 more variables: attack <dbl>, defense <dbl>, special_attack <dbl>,
## #   special_defense <dbl>, speed <dbl>, color_1 <chr>, color_2 <chr>,
## #   color_f <chr>, egg_group_1 <chr>, egg_group_2 <chr>, url_icon <chr>,
## #   generation_id <dbl>, url_image <chr>
#tail of the data (last rows)
tail(pokemon.df) #there are a total of 10,147
## # A tibble: 6 x 22
##      id pokemon     species_id height weight base_experience type_1 type_2    hp
##   <dbl> <chr>            <dbl>  <dbl>  <dbl>           <dbl> <chr>  <chr>  <dbl>
## 1 10142 minior-vio~        774    0.3    0.3             175 rock   flying    60
## 2 10143 mimikyu-bu~        778    0.2    0.7             167 ghost  fairy     55
## 3 10144 mimikyu-to~        778    0.4    2.8             167 ghost  fairy     55
## 4 10145 mimikyu-to~        778    0.4    2.8             167 ghost  fairy     55
## 5 10146 kommo-o-to~        784    2.4  208.              270 dragon fight~    75
## 6 10147 magearna-o~        801    1     80.5             120 steel  fairy     80
## # i 13 more variables: attack <dbl>, defense <dbl>, special_attack <dbl>,
## #   special_defense <dbl>, speed <dbl>, color_1 <chr>, color_2 <chr>,
## #   color_f <chr>, egg_group_1 <chr>, egg_group_2 <chr>, url_icon <chr>,
## #   generation_id <dbl>, url_image <chr>

Is there a relationship between height and weight?

?cor() #correlation function
cor(pokemon.df$weight, pokemon.df$height, 
    method="pearson")
## [1] 0.6561909
#okay now add a linear regression line to the previous plot
plot(pokemon.df$weight, pokemon.df$height,
     xlab = "height (m)", ylab="weight (kg)", main="pokemon heights and weights")
abline(lm(height ~ weight,data = pokemon.df),
       col="red")

Is there a relationship between weight and speed?

#cor
cor(pokemon.df$weight, pokemon.df$speed) #no correlation
## [1] 0.03333363
#assume that pokemon speeds are in m/s units
plot(x=pokemon.df$weight, y=pokemon.df$speed,
     xlab="weight (kg)",ylab="speed (m/s)", 
     main = "relationship between pokemon speed and weight")
abline(lm(speed ~ weight, data=pokemon.df),
       col="blue")

Using the ggplot package

#ggplot2 is part of the tidyverse packages
#and can use different syntax to do same things
#The + sign means 'add this'
ggplot()                                       #1 graphics layer

ggplot(data=pokemon.df,aes(x=weight,y=height)) #2 graphics layers

ggplot(data=pokemon.df,aes(x=weight,y=height))+
  geom_point()                                 #3 graphics layers

Now let’s pimp up the ggplot by adding color and shapes.

#pimp up the plot! make color relative to type_1 data categories
ggplot(
  data=pokemon.df, aes(x=weight,y=height))+
  geom_point(shape=25,
             aes(color=type_1))+
  ggtitle("heights and weights of pokemon according to type_1")+
  theme_bw()+
  geom_smooth() #adds a smooth regression line using a default method
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

To summartize: there seems to be a maximum for heights as weight increase. Most of the pokemon are clustered between height 0 to 5 m and 0 to 500 kg.

We can calculate the range, average, and median for the heights and weights

#descriptive statistics
range(pokemon.df$height) #heights range
## [1]  0.1 14.5
mean(pokemon.df$height)  #average height
## [1] 1.22824
median(pokemon.df$height)#median height
## [1] 1
range(pokemon.df$weight) #weights range
## [1]   0.1 999.9
mean(pokemon.df$weight)  #average weight
## [1] 66.21317
median(pokemon.df$weight)#median weight
## [1] 28.8

Box plots

#box plots of heights for all pokemon FIRST type
ggplot(data=pokemon.df,aes(x=type_1,y=height))+
  geom_boxplot(aes(color=type_1))+
  theme(axis.text.x = element_text(angle=50, vjust=0.5))+
  ggtitle("height distributions based on pokemon's first type")

#box plots of weights for all pokemon FIRST type
ggplot(data=pokemon.df,aes(x=type_1, y=weight))+
  geom_boxplot(aes(color=type_1))+
  theme(axis.text.x = element_text(angle=50, vjust=0.5))+
  ggtitle("weight distributions based on pokemon's first type")

#box plots of HP for all pokemon FIRST type
ggplot(data=pokemon.df,aes(x=type_1, y=hp))+
  geom_boxplot(aes(color=type_1))+
  theme(axis.text.x = element_text(angle=50, vjust=0.5))+
  ggtitle("health point distributions based on pokemon's first type")

#box plots of speeds for all pokemon FIRST type
ggplot(data=pokemon.df,aes(x=type_1, y=speed))+
  geom_boxplot(aes(color=type_1))+
  theme(axis.text.x = element_text(angle=50, vjust=0.5))+
  ggtitle("speed distributions based on pokemon's first type")

Column plots using ggplot

#column plots of the defense of all pokemons in the dataset
ggplot(data=pokemon.df, aes(x=pokemon,y=defense))+
  geom_col()+
  ggtitle("defense score for all pokemon")

What is the difference between geom_col and geom_bar? Use ?geom_col and ?geom_bar.

THE END