Introduction

Analysis on the variation of household energy consumption over a 2-day period in February, 2007.

Dataset

This project uses data from the UC Irvine Machine Learning Repository, a popular repository for machine learning datasets. In particular, we will be using the “Individual household electric power consumption Data Set” which is available here: Electric power consumption [20MB]

Description : Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available. The following descriptions of the 9 variables in the dataset are taken from the UCI web site:

  • Date: Date in format dd/mm/yyyy
  • Time: time in format hh:mm:ss
  • Global_active_power: household global minute-averaged active power (in kilowatt)
  • Global_reactive_power: household global minute-averaged reactive power (in kilowatt)
  • Voltage: minute-averaged voltage (in volt)
  • Global_intensity: household global minute-averaged current intensity (in ampere)
  • Sub_metering_1: energy sub-metering No. 1 (in watt-hour of active energy). It corresponds to the kitchen, containing mainly a dishwasher, an oven and a microwave (hot plates are not electric but gas powered).
  • Sub_metering_2: energy sub-metering No. 2 (in watt-hour of active energy). It corresponds to the laundry room, containing a washing-machine, a tumble-drier, a refrigerator and a light.
  • Sub_metering_3: energy sub-metering No. 3 (in watt-hour of active energy). It corresponds to an electric water-heater and an air-conditioner.

Plotting of different plots to establish relationship

filename<-"exdata_data_household_power_consumption.zip"
if(!file.exists(filename)){
  file<-"https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip"
  download.file(file,filename,method="curl")
}
if(!file.exists("household_power_consumption")){
  unzip(filename)
  
}
data<-read.csv("household_power_consumption.txt",sep = ";",stringsAsFactors = FALSE)       #reading the csv file and marking no string as factors
a<- data[data$Date %in% c("1/2/2007","2/2/2007") ,]                                        #subsetting data to 2 particular dates
hist(as.numeric(a$Global_active_power),                                                    #coercing character to numeric and plotting a histogram   
     col="salmon",                                                                         #setting color of histogram
     main="Global Active Power",                                                           #setting main title of histogram
     xlab="Global Active Power (kilowatts)")                                               #setting x-lable of histogram

filename<-"exdata_data_household_power_consumption.zip"
if(!file.exists(filename)){
  file<-"https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip"
  download.file(file,filename,method="curl")
}
if(!file.exists("household_power_consumption")){
  unzip(filename)
  
}
data<-read.csv("household_power_consumption.txt",sep = ";",stringsAsFactors = FALSE)  #reading the csv file and marking no string as factors
subSetData <- data[data$Date %in% c("1/2/2007","2/2/2007") ,]                         #subsetting data to 2 particular dates

datetime <- strptime(paste(subSetData$Date, subSetData$Time, sep=" "), "%d/%m/%Y %H:%M:%S") #pasting date and time together and coercing from character to POSIXlt
globalActivePower <- as.numeric(subSetData$Global_active_power)                             #coercing the column of subsetted data from character to numeric

plot(datetime, globalActivePower, type="l", xlab="", ylab="Global Active Power (kilowatts)")

filename<-"exdata_data_household_power_consumption.zip"
if(!file.exists(filename)){
  file<-"https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip"
  download.file(file,filename,method="curl")
}
if(!file.exists("household_power_consumption")){
  unzip(filename)
  
}
data<-read.csv("household_power_consumption.txt",sep = ";",stringsAsFactors = FALSE)    ##reading the csv file and marking no string as factors
subSetData <- data[data$Date %in% c("1/2/2007","2/2/2007") ,]                           #subsetting data to required 2 dates

#str(subSetData)
datetime <- strptime(paste(subSetData$Date, subSetData$Time, sep=" "), "%d/%m/%Y %H:%M:%S") 
globalActivePower <- as.numeric(subSetData$Global_active_power)
subMetering1 <- as.numeric(subSetData$Sub_metering_1)
subMetering2 <- as.numeric(subSetData$Sub_metering_2)
subMetering3 <- as.numeric(subSetData$Sub_metering_3)

plot(datetime, subMetering1, type="l", ylab="Energy Submetering", xlab="")
lines(datetime, subMetering2, type="l", col="red")
lines(datetime, subMetering3, type="l", col="blue")
legend("topright", c("Sub_metering_1", "Sub_metering_2", "Sub_metering_3"), lty=1, lwd=2.5, col=c("black", "red", "blue"))

filename<-"exdata_data_household_power_consumption.zip"
if(!file.exists(filename)){
  file<-"https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip"
  download.file(file,filename,method="curl")
}
if(!file.exists("household_power_consumption")){
  unzip(filename)
  
}
data<-read.csv("household_power_consumption.txt",sep = ";",stringsAsFactors = FALSE)
subSetData <- data[data$Date %in% c("1/2/2007","2/2/2007") ,]                          #subsetting data to 2 particular dates

datetime <- strptime(paste(subSetData$Date, subSetData$Time, sep=" "), "%d/%m/%Y %H:%M:%S")  #pasting date and time together and coercing from character to POSIXlt
globalActivePower <- as.numeric(subSetData$Global_active_power)
globalReactivePower <- as.numeric(subSetData$Global_reactive_power)
voltage <- as.numeric(subSetData$Voltage)
subMetering1 <- as.numeric(subSetData$Sub_metering_1)
subMetering2 <- as.numeric(subSetData$Sub_metering_2)
subMetering3 <- as.numeric(subSetData$Sub_metering_3)

par(mfrow = c(2, 2)) 

plot(datetime, globalActivePower, type="l", xlab="", ylab="Global Active Power", cex=0.2)

plot(datetime, voltage, type="l", xlab="datetime", ylab="Voltage")

plot(datetime, subMetering1, type="l", ylab="Energy Submetering", xlab="")
lines(datetime, subMetering2, type="l", col="red")
lines(datetime, subMetering3, type="l", col="blue")
legend("topright", c("Sub_metering_1", "Sub_metering_2", "Sub_metering_3"), lty=, lwd=2.5, col=c("black", "red", "blue"), bty="o")

plot(datetime, globalReactivePower, type="l", xlab="datetime", ylab="Global_reactive_power")