INTRODUCTION:
The goal is to visualize the evolution of power consumed by period of time

To impede warnings and messages

knitr::opts_chunk$set(message= FALSE)
knitr::opts_chunk$set(warning= FALSE)

Calling the libraries

library(dplyr)#to transform dataframes
library(ggplot2)#to plot
library(skimr)#to resume dataframes
library(extrafont)#to add more styles of text on the graph
library(lubridate)# to deal with datetime variables
library(help=lubridate)
dades <- read.csv("household_power_consumption.txt", header=TRUE, sep=";")
skim(dades)
Data summary
Name dades
Number of rows 2075259
Number of columns 9
_______________________
Column type frequency:
character 8
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Date 0 1 8 10 0 1442 0
Time 0 1 8 8 0 1440 0
Global_active_power 0 1 1 6 0 4187 0
Global_reactive_power 0 1 1 5 0 533 0
Voltage 0 1 1 7 0 2838 0
Global_intensity 0 1 1 6 0 222 0
Sub_metering_1 0 1 1 6 0 89 0
Sub_metering_2 0 1 1 6 0 82 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Sub_metering_3 25979 0.99 6.46 8.44 0 0 1 17 31 ▇▁▃▁▁
glimpse(dades)
## Rows: 2,075,259
## Columns: 9
## $ Date                  <chr> "16/12/2006", "16/12/2006", "16/12/2006", "16...
## $ Time                  <chr> "17:24:00", "17:25:00", "17:26:00", "17:27:00...
## $ Global_active_power   <chr> "4.216", "5.360", "5.374", "5.388", "3.666", ...
## $ Global_reactive_power <chr> "0.418", "0.436", "0.498", "0.502", "0.528", ...
## $ Voltage               <chr> "234.840", "233.630", "233.290", "233.740", "...
## $ Global_intensity      <chr> "18.400", "23.000", "23.000", "23.000", "15.8...
## $ Sub_metering_1        <chr> "0.000", "0.000", "0.000", "0.000", "0.000", ...
## $ Sub_metering_2        <chr> "1.000", "1.000", "2.000", "1.000", "1.000", ...
## $ Sub_metering_3        <dbl> 17, 16, 17, 17, 17, 17, 17, 17, 17, 16, 17, 1...

Percentage of missing values in Sub_metering_3:

paste(round(25979*100/dim(dades)[1],1),'%')
## [1] "1.3 %"
head(dades)

The columns to be taking into account are as follow:

dades <- dades %>% mutate(Global_reactive_power= NULL,Global_intensity=NULL,Voltage= NULL)
head(dades,n=10) 

To numeric values

dades <- dades %>% mutate(Global_active_power = as.numeric(Global_active_power),
                          Sub_metering_1 = as.numeric(Sub_metering_1),
                          Sub_metering_2 = as.numeric(Sub_metering_2))

It has defined null values because those chains correspond a non-numeric texts. Replacing missing values with mean values in the following columns:

for (j in 3:6){dades[,j][is.na(dades[,j])] <- mean(dades[,j],na.rm=T)}
skim(dades)
Data summary
Name dades
Number of rows 2075259
Number of columns 6
_______________________
Column type frequency:
character 2
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Date 0 1 8 10 0 1442 0
Time 0 1 8 8 0 1440 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Global_active_power 0 1 1.09 1.05 0.08 0.31 0.63 1.52 11.12 ▇▁▁▁▁
Sub_metering_1 0 1 1.12 6.11 0.00 0.00 0.00 0.00 88.00 ▇▁▁▁▁
Sub_metering_2 0 1 1.30 5.79 0.00 0.00 0.00 1.00 80.00 ▇▁▁▁▁
Sub_metering_3 0 1 6.46 8.38 0.00 0.00 1.00 17.00 31.00 ▇▁▃▁▁

Setting Datetime and multiplying Global active power by 100/6 :

dades <- dades %>% mutate(Datetime = paste(Date,Time),
                          Datetime = as.POSIXct(Datetime,format='%d/%m/%Y %H:%M:%S'),
                          Global_active_power=Global_active_power*100/6)

Creating Dark_power as the difference between Global_active_power and the total of the sub metering ones

dades$Dark_power <- dades[,3] - (dades[,4]+ dades[,5]+ dades[,6])

Consumption per minute

Creating first a program to help us save time in coding for the graphics series

graph <- function(df, title) { 
p <- ggplot(df,aes(x=Datetime)) + 
geom_line(aes(y=Global_active_power,colour='Total power')) + 
geom_line(aes(y=Sub_metering_1,colour='Sub 1')) + 
geom_line(aes(y=Sub_metering_2,colour='Sub 2')) + 
geom_line(aes(y=Sub_metering_3,colour='Sub 3')) + 
geom_line(aes(y=Dark_power,colour='Unknown power')) + 
labs(y='watt-h',x='',title= title) +
scale_colour_manual('Metering',values=c('Total power'='lightgray','Sub 1'='orange4','Sub 2'='thistle4',
                                        'Sub 3'= 'paleturquoise4','Unknown power'='lightsteelblue3')) +
theme(legend.position = 'bottom',legend.direction = 'horizontal') +
theme(axis.line = element_line(size=0.5,color='black'),
      panel.grid.major = element_blank(),
      panel.grid.minor= element_blank(),
      panel.border=element_blank(),
      panel.background=element_blank()) +
theme(plot.title = element_text(family= 'Comic Sans MS',hjust=0.5),
      text=element_text(family='Comic Sans MS'),
      axis.text.x=element_text(colour='black',size=9),
      axis.text.y=element_text(colour='black',size=9),
      legend.key=element_rect(fill='white',colour='white'))
return(p) }
graph(dades,'Power Consumption per minute')

Hourly Consumption

Adding Hour as a new column

dades$Hour <- hour(dades$Datetime)

Grouping by Date and Hour

hourly_dades <- dades %>% group_by(Date,Hour) %>% summarise(Sub_metering_1=sum(Sub_metering_1),
                                                            Sub_metering_2=sum(Sub_metering_2),
                                                            Sub_metering_3=sum(Sub_metering_3),
                                                            Dark_power=sum(Dark_power),
                                                            Global_active_power=sum(Global_active_power))
#Setting Datetime
hourly_dades <- hourly_dades %>% mutate(Datetime = paste(paste(Date,Hour),':00:00',sep=''),
                                        Datetime = as.POSIXct(Datetime,format='%d/%m/%Y %H:%M:%S'))
graph(hourly_dades,'Hourly Power Consumption')

Daily Power Consumption

Grouping by Day

daily_dades <- hourly_dades %>% group_by(Date) %>% summarise(Sub_metering_1=sum(Sub_metering_1),
                                                             Sub_metering_2=sum(Sub_metering_2),
                                                             Sub_metering_3=sum(Sub_metering_3),
                                                             Dark_power=sum(Dark_power),
                                                             Global_active_power=sum(Global_active_power))
#Setting Datetime
daily_dades <- daily_dades %>% mutate(Datetime = paste(Date,'00:00:00'),
                                      Datetime = as.POSIXct(Datetime,format='%d/%m/%Y %H:%M:%S'))
graph(daily_dades,'Daily Power Consumption')

Monthly Power Consumption

Adding Year and Month as new columns

daily_dades<- daily_dades %>% mutate(Year = year(Datetime), Month= month(Datetime))

Grouping by Year and Month

monthly_dades <-daily_dades%>%group_by(Year,Month)%>%summarise(Sub_metering_1=sum(Sub_metering_1),
                                                               Sub_metering_2=sum(Sub_metering_2),
                                                               Sub_metering_3=sum(Sub_metering_3),
                                                               Dark_power=sum(Dark_power),
                                                               Global_active_power=sum(Global_active_power))
#Setting Datetime
monthly_dades <- monthly_dades %>% 
                 mutate(Datetime = paste(paste('15',paste(Month,Year,sep='/'),sep='/'),'00:00:00'),
                                   Datetime = as.POSIXct(Datetime,format='%d/%m/%Y %H:%M:%S'))
graph(monthly_dades,'Monthly Power Consumption')

Averaged Power Consumption in a day

#0:holiday,1:weekday
#1:'Winter',2:'Spring',3:'Summer',4:'Autumn'
dades <- dades %>% mutate(Month= month(Datetime),
                          Wday = wday(Datetime,label=T,abbr = F),
                          Kday = ifelse(Wday == 'sábado' | Wday == 'domingo','w','h'))

for(s in 1:4){
dades_season <- filter(dades, 1 + 3*(s-1) <= Month & Month <= 3+ 3*(s-1) )
dades_season <- dades_season %>% group_by(Kday,Time) %>% summarize(Sub_metering_1=mean(Sub_metering_1,na.rm=T),
                                                                   Sub_metering_2=mean(Sub_metering_2,na.rm=T),
                                                                   Sub_metering_3=mean(Sub_metering_3,na.rm=T),
                                                                   Dark_power=mean(Dark_power,na.rm=T),
                                                      Global_active_power=mean(Global_active_power,na.rm=T))
dades_season$Season<-s
if(s == 1){dades_a_day <- dades_season}
else{dades_a_day <-rbind(dades_a_day,dades_season)}
             }
dades_a_day <- dades_a_day %>% mutate(Time = paste('01/01/2010',Time),
                                      Time = as.POSIXct(Time,format='%d/%m/%Y %H:%M:%S'))

Plotting

names <- list( '1'="Spring",'2'="Summer",'3'="Fall",'4'="Winter",'w'="weekday",'h'="holiday")
labeller <- function(variable,value){ return(names[value])}
ggplot(dades_a_day, aes(Time)) + geom_line (aes(y= Global_active_power, colour="Total power")) +
geom_line(aes(y= Sub_metering_1 ,colour="Sub 1")) + 
geom_line(aes(y=Sub_metering_2, colour= "Sub 2")) +
geom_line(aes(y=Sub_metering_3, colour= "Sub 3")) + 
geom_line(aes(y=Dark_power, colour= "Unknown power")) + 
scale_colour_manual('Metering',values=c('Total power'='lightgray','Sub 1'='orange4','Sub 2'='thistle4',
                                        'Sub 3'= 'paleturquoise4','Unknown power'='lightsteelblue3')) +  
facet_grid(Kday ~ Season, labeller = labeller, scale= "free_y" ) + theme_bw()+
theme(strip.background = element_rect(colour = "paleturquoise", fill = "paleturquoise"))+
theme(legend.position = "bottom",legend.direction = "horizontal",legend.title = element_blank())+
theme(plot.title = element_text(hjust = 0.5), axis.line = element_line(size= 0.5, colour= "black"))+
theme(panel.grid.minor = element_blank())+
theme(plot.title = element_text(family = "Comic Sans MS"), text = element_text(family= "Comic Sans MS"))+
theme(axis.text.x = element_text(colour="black",size=7),axis.text.y = element_text(colour="black", size=10))+
theme(legend.text = element_text(colour= "black",size=10)) +
scale_x_datetime(date_labels = "%H") + 
ggtitle("Averaged Power Consumption in a day") + ylab("watt-h") + xlab(" ")