The following packages are required for this exercise.
library(httr)
library(jsonlite)
library(tidyverse)
Also, let’s use ggplot2 classic theme
theme_set(theme_classic())
We are going to plot some other visualizations.
Let’s consider this data again. Import using the API.
# Request - demand
myurl1 <- 'https://apidatos.ree.es/en/datos/demanda/evolucion'
res1 <- GET(myurl1, query=list(start_date='2018-01-01T00:00',
end_date='2018-12-31T23:59',
time_trunc='day',
geo_trunc='electric_system',
geo_limit='peninsular',
geo_ids=8741))
json_list1 <- fromJSON(rawToChar(res1$content))
names(json_list1)
att1 <- json_list1$included$attributes
demand <- att1$values[1][[1]]
# Request - generation
myurl2 <- 'https://apidatos.ree.es/en/datos/generacion/evolucion-renovable-no-renovable'
res2 <- GET(myurl2, query=list(start_date='2018-01-01T00:00',
end_date='2018-12-31T23:59',
time_trunc='day',
geo_trunc='electric_system',
geo_limit='peninsular',
geo_ids=8741))
json_list2 <- fromJSON(rawToChar(res2$content))
names(json_list2)
att2 <- json_list2$included$attributes
genRenew <- att2$values[1][[1]]
gen_NO_Renew <- att2$values[2][[1]]
# Request - Precios
myurl3 <- 'https://apidatos.ree.es/en/datos/generacion/no-renovables-detalle-emisiones-CO2'
res3 <- GET(myurl3, query=list(start_date='2018-01-01T00:00',
end_date='2018-12-31T23:59',
time_trunc='day',
geo_trunc='electric_system',
geo_limit='peninsular',
geo_ids=8741))
json_list3 <- fromJSON(rawToChar(res3$content))
names(json_list3)
att3 <- json_list3$included$attributes
emis_coal <- att3$values[1][[1]]
Then, we piece together the data frame.
red_espana <- cbind(demand[,1:2], gen_NO_Renew[,1:2],
genRenew[,1:2], emis_coal)
names(red_espana) <- c('demand','genNORenew',
'genRenew','emissions_coal','datetime')
head(red_espana)
## demand genNORenew genRenew emissions_coal datetime
## 1 539925.8 287922.9 316417.7 21770.98 2018-01-01T00:00:00.000+01:00
## 2 684962.0 321135.2 376182.3 26800.44 2018-01-02T00:00:00.000+01:00
## 3 713422.3 338870.2 367576.2 27789.60 2018-01-03T00:00:00.000+01:00
## 4 713027.3 324406.6 392756.2 25067.77 2018-01-04T00:00:00.000+01:00
## 5 685084.6 337284.2 327492.0 31557.17 2018-01-05T00:00:00.000+01:00
## 6 604379.4 344724.3 239958.3 40941.25 2018-01-06T00:00:00.000+01:00
And finally, let’s perform some pre-treatment.
# Split column 'datetime' into date and Hor
red_espana_sep <- separate(red_espana, col='datetime', into=c('date','time'),
sep='T')
# Covert data into date format
red_espana_sep$date <- as.Date(red_espana_sep$date)
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
# Extract months from date
red_espana_sep$month <- months(red_espana_sep$date)
# Set title format (first letter in upper case)
red_espana_sep$month <- str_to_title(red_espana_sep$month)
# Order the months
red_espana_ord <- red_espana_sep
red_espana_ord$month <- factor(red_espana_sep$month,
levels=month.name, ordered=TRUE)
We can even explore more interesting graphics. However, some of them are better suited for non-temporal data. To explore these, let’s go back to the mpg data set from ggplot2 package.
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa~
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa~
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa~
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa~
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa~
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa~
Before, we used geom_point() function for making scatter plots. Moreover, we can combine several types of plotsw to get a cool result. For example, let’s put together time series of variables demand and emissions_coal, and represent them as points.
We also can add a smoothed line with geom_smooth()
plt1 <- plt1 +
geom_smooth(aes(x=date, y=emissions_coal/1000, colour='CO2 Emissions')) +
geom_smooth(aes(x=date, y=demand/3000, colour='Demand'))
plt1
Finally, let’s make up the plot:
plt1 <- plt1 + scale_colour_manual(values=c('red','blue'))
plt1
# Change axis labels color
plt1 <- plt1 +
theme(axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = "black"))
plt1
plt1 <- plt1 +
labs(x='\nDate', y='CO2 emissions x 1000 (by coal)\n', colour='Variable',
title="My time series plot",
subtitle="CO2 emissions and demand of energy over time")
plt1
plt1 <- plt1 +
scale_y_continuous(sec.axis=sec_axis(~.*1, name='Demand x 3000'))
plt1
Sometimes we want to explore data distribution. Let’s make density plots using a grouping variable.
plt3 <- ggplot(data=mpg, mapping=aes(cty))
plt3 <- plt3 +
geom_density(aes(fill=factor(cyl)), alpha=0.8) +
theme(axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = "black"))
plt3
Also, let’s edit labels and put title
plt3 <- plt3 + labs(title="Density plot",
subtitle="City Mileage by Number of cylinders",
x="City Mileage",
fill="# Cylinders")
plt3
Maybe we can plot densities of time series taking a more innovative approach. Let’s use ggridge package, which is based on grammar of ggplot2
# install.packages("ggridges")
library(ggridges)
This is possible by adding geom_density_ridges() to the pipe flow.
ggplot(red_espana_ord, aes(y = month)) +
geom_density_ridges(aes(x = demand, fill=month),
alpha = .8, color = 'black') +
scale_fill_brewer(palette = 'RdBu') +
labs(x='Demand', y='Month',
title = 'Demand of energy',
subtitle = 'Energy consumption distribution by month'
) + theme(legend.position = "none",
axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = "black"))
More information here
Correlation coefficient is a measure of intensity of the relation between two variables. This will be study in Unit 2. For now, just regard that correlations near to -1 indicates inverse proportionality, and values near to 1 indicates direct proportionality. Let’s plot a heatmap, where the color represents the value of correlation coefficient.
mycars <- mtcars %>% select(c(mpg,disp:qsec))
head(mycars)
## mpg disp hp drat wt qsec
## Mazda RX4 21.0 160 110 3.90 2.620 16.46
## Mazda RX4 Wag 21.0 160 110 3.90 2.875 17.02
## Datsun 710 22.8 108 93 3.85 2.320 18.61
## Hornet 4 Drive 21.4 258 110 3.08 3.215 19.44
## Hornet Sportabout 18.7 360 175 3.15 3.440 17.02
## Valiant 18.1 225 105 2.76 3.460 20.22
corrs <- cor(mycars)
corrs
## mpg disp hp drat wt qsec
## mpg 1.0000000 -0.8475514 -0.7761684 0.68117191 -0.8676594 0.41868403
## disp -0.8475514 1.0000000 0.7909486 -0.71021393 0.8879799 -0.43369788
## hp -0.7761684 0.7909486 1.0000000 -0.44875912 0.6587479 -0.70822339
## drat 0.6811719 -0.7102139 -0.4487591 1.00000000 -0.7124406 0.09120476
## wt -0.8676594 0.8879799 0.6587479 -0.71244065 1.0000000 -0.17471588
## qsec 0.4186840 -0.4336979 -0.7082234 0.09120476 -0.1747159 1.00000000
Hence, the correlation between mpg and disp is -0.8475514.
class(corrs)
## [1] "matrix" "array"
## Transformation
corrs <- as.data.frame(cor(mycars))
class(corrs)
## [1] "data.frame"
corrs
## mpg disp hp drat wt qsec
## mpg 1.0000000 -0.8475514 -0.7761684 0.68117191 -0.8676594 0.41868403
## disp -0.8475514 1.0000000 0.7909486 -0.71021393 0.8879799 -0.43369788
## hp -0.7761684 0.7909486 1.0000000 -0.44875912 0.6587479 -0.70822339
## drat 0.6811719 -0.7102139 -0.4487591 1.00000000 -0.7124406 0.09120476
## wt -0.8676594 0.8879799 0.6587479 -0.71244065 1.0000000 -0.17471588
## qsec 0.4186840 -0.4336979 -0.7082234 0.09120476 -0.1747159 1.00000000
corrs$variable2 <- names(corrs)
corrs
## mpg disp hp drat wt qsec
## mpg 1.0000000 -0.8475514 -0.7761684 0.68117191 -0.8676594 0.41868403
## disp -0.8475514 1.0000000 0.7909486 -0.71021393 0.8879799 -0.43369788
## hp -0.7761684 0.7909486 1.0000000 -0.44875912 0.6587479 -0.70822339
## drat 0.6811719 -0.7102139 -0.4487591 1.00000000 -0.7124406 0.09120476
## wt -0.8676594 0.8879799 0.6587479 -0.71244065 1.0000000 -0.17471588
## qsec 0.4186840 -0.4336979 -0.7082234 0.09120476 -0.1747159 1.00000000
## variable2
## mpg mpg
## disp disp
## hp hp
## drat drat
## wt wt
## qsec qsec
corrs <- corrs %>% pivot_longer(cols=mpg:qsec, names_to='variable1',
values_to='corr')
corrs
## # A tibble: 36 x 3
## variable2 variable1 corr
## <chr> <chr> <dbl>
## 1 mpg mpg 1
## 2 mpg disp -0.848
## 3 mpg hp -0.776
## 4 mpg drat 0.681
## 5 mpg wt -0.868
## 6 mpg qsec 0.419
## 7 disp mpg -0.848
## 8 disp disp 1
## 9 disp hp 0.791
## 10 disp drat -0.710
## # ... with 26 more rows
corrs %>% ggplot(aes(x = variable1, variable2)) +
geom_tile(aes(fill = corr)) + labs(fill='Correlation')