This was the first project for the Exploratory Data Analysis course in Coursera’s Data Science specialization track. The objective was to practice creating plots by reproducing a series of four plots, and also to practice using github by forking and cloning an existing repository.
The goal here was simply to examine how household energy usage varies over a 2-day period in February, 2007 by reproducing a series of four plots. Due to specific image size requirements of the new plots, they will look slightly different than the originals.
This project used data from the UC Irvine Machine Learning Repository, a popular repository for machine learning datasets. The “Individual household electric power consumption Data Set” was used and made available on the course web site:
Description: Measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available.
The following descriptions of the 9 variables in the dataset are taken from the UCI web site
if(!file.exists("exdata-data-household_power_consumption.zip")) {
temp <- tempfile()
download.file("http://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip",temp)
file <- unzip(temp)
unlink(temp)
}
power <- read.table(file, header=T, sep=";")
power$Date <- as.Date(power$Date, format="%d/%m/%Y")
df <- power[(power$Date=="2007-02-01") | (power$Date=="2007-02-02"),]
df$Global_active_power <- as.numeric(as.character(df$Global_active_power))
df$Global_reactive_power <- as.numeric(as.character(df$Global_reactive_power))
df$Voltage <- as.numeric(as.character(df$Voltage))
df <- transform(df, timestamp=as.POSIXct(paste(Date, Time)), "%d/%m/%Y %H:%M:%S")
df$Sub_metering_1 <- as.numeric(as.character(df$Sub_metering_1))
df$Sub_metering_2 <- as.numeric(as.character(df$Sub_metering_2))
df$Sub_metering_3 <- as.numeric(as.character(df$Sub_metering_3))
plot1 <- function() {
hist(df$Global_active_power, main = paste("Global Active Power"), col="red", xlab="Global Active Power (kilowatts)")
dev.copy(png, file="plot1.png", width=480, height=480)
dev.off()
cat("Plot1.png has been saved in", getwd())
}
plot1()
## Plot1.png has been saved in /Users/dmaurath/Documents/JHDS/datasciencecoursera
plot1 <- function() {
plot(df$timestamp,df$Global_active_power, type="l", xlab="", ylab="Global Active Power (kilowatts)")
dev.copy(png, file="plot2.png", width=480, height=480)
dev.off()
cat("plot2.png has been saved in", getwd())
}
plot1()
## plot2.png has been saved in /Users/dmaurath/Documents/JHDS/datasciencecoursera
plot3 <- function() {
plot(df$timestamp,df$Sub_metering_1, type="l", xlab="", ylab="Energy sub metering")
lines(df$timestamp,df$Sub_metering_2,col="red")
lines(df$timestamp,df$Sub_metering_3,col="blue")
legend("topright", col=c("black","red","blue"), c("Sub_metering_1 ","Sub_metering_2 ", "Sub_metering_3 "),lty=c(1,1), lwd=c(1,1))
dev.copy(png, file="plot3.png", width=480, height=480)
dev.off()
cat("plot3.png has been saved in", getwd())
}
plot3()
## plot3.png has been saved in /Users/dmaurath/Documents/JHDS/datasciencecoursera
plot4 <- function() {
par(mfrow=c(2,2))
##PLOT 1
plot(df$timestamp,df$Global_active_power, type="l", xlab="", ylab="Global Active Power")
##PLOT 2
plot(df$timestamp,df$Voltage, type="l", xlab="datetime", ylab="Voltage")
##PLOT 3
plot(df$timestamp,df$Sub_metering_1, type="l", xlab="", ylab="Energy sub metering")
lines(df$timestamp,df$Sub_metering_2,col="red")
lines(df$timestamp,df$Sub_metering_3,col="blue")
legend("topright", col=c("black","red","blue"), c("Sub_metering_1 ","Sub_metering_2 ", "Sub_metering_3 "),lty=c(1,1), bty="n", cex=.5) #bty removes the box, cex shrinks the text, spacing added after labels so it renders correctly
#PLOT 4
plot(df$timestamp,df$Global_reactive_power, type="l", xlab="datetime", ylab="Global_reactive_power")
#OUTPUT
dev.copy(png, file="plot4.png", width=480, height=480)
dev.off()
cat("plot4.png has been saved in", getwd())
}
plot4()
## plot4.png has been saved in /Users/dmaurath/Documents/JHDS/datasciencecoursera