Set up

The following packages are required for this exercise.

library(httr)
library(jsonlite)
library(tidyverse)

Also, let’s use ggplot2 classic theme

theme_set(theme_classic())

We are going to plot some other visualizations.

Spanish Electric Network

Let’s consider this data again. Import using the API.

# Request - demand
myurl1 <- 'https://apidatos.ree.es/en/datos/demanda/evolucion'
res1 <- GET(myurl1, query=list(start_date='2018-01-01T00:00',
                             end_date='2018-12-31T23:59',
                             time_trunc='day',
                             geo_trunc='electric_system',
                             geo_limit='peninsular',
                             geo_ids=8741))

json_list1 <- fromJSON(rawToChar(res1$content))
names(json_list1)
att1 <- json_list1$included$attributes
demand  <- att1$values[1][[1]]

# Request - generation
myurl2 <- 'https://apidatos.ree.es/en/datos/generacion/evolucion-renovable-no-renovable'
res2 <- GET(myurl2, query=list(start_date='2018-01-01T00:00',
                             end_date='2018-12-31T23:59',
                             time_trunc='day',
                             geo_trunc='electric_system',
                             geo_limit='peninsular',
                             geo_ids=8741))
json_list2 <- fromJSON(rawToChar(res2$content))
names(json_list2)
att2   <- json_list2$included$attributes
genRenew <- att2$values[1][[1]]
gen_NO_Renew <- att2$values[2][[1]]

# Request - Precios
myurl3 <- 'https://apidatos.ree.es/en/datos/generacion/no-renovables-detalle-emisiones-CO2'
res3 <- GET(myurl3, query=list(start_date='2018-01-01T00:00',
                              end_date='2018-12-31T23:59',
                              time_trunc='day',
                              geo_trunc='electric_system',
                              geo_limit='peninsular',
                              geo_ids=8741))
json_list3 <- fromJSON(rawToChar(res3$content))

names(json_list3)

att3   <- json_list3$included$attributes
emis_coal <- att3$values[1][[1]]

Then, we piece together the data frame.

red_espana <- cbind(demand[,1:2], gen_NO_Renew[,1:2],
                    genRenew[,1:2], emis_coal)
names(red_espana) <- c('demand','genNORenew',
                       'genRenew','emissions_coal','datetime')
head(red_espana)
##     demand genNORenew genRenew emissions_coal                      datetime
## 1 539925.8   287922.9 316417.7       21770.98 2018-01-01T00:00:00.000+01:00
## 2 684962.0   321135.2 376182.3       26800.44 2018-01-02T00:00:00.000+01:00
## 3 713422.3   338870.2 367576.2       27789.60 2018-01-03T00:00:00.000+01:00
## 4 713027.3   324406.6 392756.2       25067.77 2018-01-04T00:00:00.000+01:00
## 5 685084.6   337284.2 327492.0       31557.17 2018-01-05T00:00:00.000+01:00
## 6 604379.4   344724.3 239958.3       40941.25 2018-01-06T00:00:00.000+01:00

And finally, let’s perform some pre-treatment.

# Split column 'datetime' into date and Hor
red_espana_sep <- separate(red_espana, col='datetime', into=c('date','time'),
                           sep='T')

# Covert data into date format
red_espana_sep$date <- as.Date(red_espana_sep$date)
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
# Extract months from date
red_espana_sep$month  <- months(red_espana_sep$date)

# Set title format (first letter in upper case)
red_espana_sep$month  <- str_to_title(red_espana_sep$month)

# Order the months
red_espana_ord <- red_espana_sep
red_espana_ord$month <- factor(red_espana_sep$month,
                               levels=month.name, ordered=TRUE)

MPG data

We can even explore more interesting graphics. However, some of them are better suited for non-temporal data. To explore these, let’s go back to the mpg data set from ggplot2 package.

head(mpg)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa~
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa~
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa~
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa~
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa~
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa~

Visualizations

Cool time series plot

Before, we used geom_point() function for making scatter plots. Moreover, we can combine several types of plotsw to get a cool result. For example, let’s put together time series of variables demand and emissions_coal, and represent them as points.

We also can add a smoothed line with geom_smooth()

plt1 <- plt1 +
  geom_smooth(aes(x=date, y=emissions_coal/1000, colour='CO2 Emissions')) +
  geom_smooth(aes(x=date, y=demand/3000, colour='Demand'))
plt1

Finally, let’s make up the plot:

  1. Change colors.
plt1 <- plt1 + scale_colour_manual(values=c('red','blue'))
plt1

# Change axis labels color
plt1 <- plt1 +
  theme(axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"))
plt1

  1. Change x-axis and left y-axis name.
plt1 <- plt1 +
  labs(x='\nDate', y='CO2 emissions x 1000 (by coal)\n', colour='Variable',
       title="My time series plot",
       subtitle="CO2 emissions and demand of energy over time")
plt1

  1. Add a secondary axis.
plt1 <- plt1 +
  scale_y_continuous(sec.axis=sec_axis(~.*1, name='Demand x 3000'))
plt1

Density plots

Sometimes we want to explore data distribution. Let’s make density plots using a grouping variable.

plt3 <- ggplot(data=mpg, mapping=aes(cty))
plt3 <- plt3 +
  geom_density(aes(fill=factor(cyl)), alpha=0.8) +
  theme(axis.text.x = element_text(colour = "black"),
        axis.text.y = element_text(colour = "black"))
plt3

Also, let’s edit labels and put title

plt3 <- plt3 + labs(title="Density plot",
       subtitle="City Mileage by Number of cylinders",
       x="City Mileage",
       fill="# Cylinders")
plt3

Maybe we can plot densities of time series taking a more innovative approach. Let’s use ggridge package, which is based on grammar of ggplot2

# install.packages("ggridges")
library(ggridges)

This is possible by adding geom_density_ridges() to the pipe flow.

ggplot(red_espana_ord, aes(y = month)) +
  geom_density_ridges(aes(x = demand, fill=month), 
                      alpha = .8, color = 'black') + 
  scale_fill_brewer(palette = 'RdBu') +
  labs(x='Demand', y='Month',
    title = 'Demand of energy',
    subtitle = 'Energy consumption distribution by month'
  ) + theme(legend.position = "none",
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"))

More information here

Heatmap for Correlation matrix

Correlation coefficient is a measure of intensity of the relation between two variables. This will be study in Unit 2. For now, just regard that correlations near to -1 indicates inverse proportionality, and values near to 1 indicates direct proportionality. Let’s plot a heatmap, where the color represents the value of correlation coefficient.

  1. Select only numeric features.
mycars <- mtcars %>% select(c(mpg,disp:qsec))
head(mycars)
##                    mpg disp  hp drat    wt  qsec
## Mazda RX4         21.0  160 110 3.90 2.620 16.46
## Mazda RX4 Wag     21.0  160 110 3.90 2.875 17.02
## Datsun 710        22.8  108  93 3.85 2.320 18.61
## Hornet 4 Drive    21.4  258 110 3.08 3.215 19.44
## Hornet Sportabout 18.7  360 175 3.15 3.440 17.02
## Valiant           18.1  225 105 2.76 3.460 20.22
  1. Let’s compute correlation matrix. Each element of this matrix is the pair correlation coefficient.
corrs <- cor(mycars)
corrs
##             mpg       disp         hp        drat         wt        qsec
## mpg   1.0000000 -0.8475514 -0.7761684  0.68117191 -0.8676594  0.41868403
## disp -0.8475514  1.0000000  0.7909486 -0.71021393  0.8879799 -0.43369788
## hp   -0.7761684  0.7909486  1.0000000 -0.44875912  0.6587479 -0.70822339
## drat  0.6811719 -0.7102139 -0.4487591  1.00000000 -0.7124406  0.09120476
## wt   -0.8676594  0.8879799  0.6587479 -0.71244065  1.0000000 -0.17471588
## qsec  0.4186840 -0.4336979 -0.7082234  0.09120476 -0.1747159  1.00000000

Hence, the correlation between mpg and disp is -0.8475514.

  1. Transform correlation matrix into a data frame. It is necessary for using tidyverse tools.
class(corrs)
## [1] "matrix" "array"
## Transformation
corrs <- as.data.frame(cor(mycars))
class(corrs)
## [1] "data.frame"
corrs
##             mpg       disp         hp        drat         wt        qsec
## mpg   1.0000000 -0.8475514 -0.7761684  0.68117191 -0.8676594  0.41868403
## disp -0.8475514  1.0000000  0.7909486 -0.71021393  0.8879799 -0.43369788
## hp   -0.7761684  0.7909486  1.0000000 -0.44875912  0.6587479 -0.70822339
## drat  0.6811719 -0.7102139 -0.4487591  1.00000000 -0.7124406  0.09120476
## wt   -0.8676594  0.8879799  0.6587479 -0.71244065  1.0000000 -0.17471588
## qsec  0.4186840 -0.4336979 -0.7082234  0.09120476 -0.1747159  1.00000000
  1. Add a new column with variable names in order to make the pivot.
corrs$variable2 <- names(corrs)
corrs
##             mpg       disp         hp        drat         wt        qsec
## mpg   1.0000000 -0.8475514 -0.7761684  0.68117191 -0.8676594  0.41868403
## disp -0.8475514  1.0000000  0.7909486 -0.71021393  0.8879799 -0.43369788
## hp   -0.7761684  0.7909486  1.0000000 -0.44875912  0.6587479 -0.70822339
## drat  0.6811719 -0.7102139 -0.4487591  1.00000000 -0.7124406  0.09120476
## wt   -0.8676594  0.8879799  0.6587479 -0.71244065  1.0000000 -0.17471588
## qsec  0.4186840 -0.4336979 -0.7082234  0.09120476 -0.1747159  1.00000000
##      variable2
## mpg        mpg
## disp      disp
## hp          hp
## drat      drat
## wt          wt
## qsec      qsec
  1. Now, pivot (reshape) the data frame
corrs <- corrs %>%  pivot_longer(cols=mpg:qsec, names_to='variable1',
                                 values_to='corr')
corrs
## # A tibble: 36 x 3
##    variable2 variable1   corr
##    <chr>     <chr>      <dbl>
##  1 mpg       mpg        1    
##  2 mpg       disp      -0.848
##  3 mpg       hp        -0.776
##  4 mpg       drat       0.681
##  5 mpg       wt        -0.868
##  6 mpg       qsec       0.419
##  7 disp      mpg       -0.848
##  8 disp      disp       1    
##  9 disp      hp         0.791
## 10 disp      drat      -0.710
## # ... with 26 more rows
  1. Finally, plot the heatmap
corrs %>% ggplot(aes(x = variable1, variable2)) +
  geom_tile(aes(fill = corr)) + labs(fill='Correlation')