This project uses data from the UC Irvine Machine Learning Repository, in particular, the “Individual Household Electric Power Consumption” data set. The data set contains measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years. Different electrical quantities and some sub-metering values are available in the data set.
The 9 variables available in the data set are (as described in the UCI website):
We try to download the data to a local directory if it doesn’t exist already
fileurl <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip"
if(!file.exists("Power.txt"))
{
download.file(fileurl, "Power.txt")
}
We then try to load the data from the .txt file to the R global environment.
powerData <- read.csv("Power.txt", stringsAsFactors = FALSE, sep = ";", header = TRUE)
Now, we convert the “Date” column of the “powerData” data set to a Date format and convert the 3rd to 9th columns to numeric formats.
powerData$Date <- as.Date(powerData$Date, format = "%d/%m/%Y")
for (i in 3:9)
powerData[, i] <- as.numeric(powerData[, i])
We now subset the data set for the particular dates.
powerSub <- subset(powerData, Date >= "2007-02-01" & Date <= "2007-02-02")
We add a new column to the data set which contains both the “Date” and “Time” for the data and we subset the new “Date_Time” along with the other numeric columns.
Date_Time <- paste(powerSub$Date, powerSub$Time)
powerSub$DateTime <- as.POSIXct(Date_Time)
powerData <- powerSub[, c(10, 3:9)]
hist(powerData$Global_active_power, main = "Global Active Power", xlab = "Global Active Power (kilowatts)", col = "red")
plot(powerData$Global_active_power ~ powerData$DateTime, ylab = "Global Active Power (kilowatts)", xlab = "", type = "l")
plot(powerData$Sub_metering_1 ~ powerData$DateTime, ylab = "Energy sub metering", xlab = "", type = "l")
lines(powerData$Sub_metering_2 ~ powerData$DateTime, col = 'Red')
lines(powerData$Sub_metering_3 ~ powerData$DateTime, col = 'Blue')
legend("topright", col = c("black", "red", "blue"), legend = c("Sub_metering_1", "Sub_metering_2", "Sub_metering_3"), lwd = 1)
par(mfrow = c(2,2))
plot(powerData$Global_active_power ~ powerData$DateTime, ylab = "Global Active Power", xlab = "", type = "l")
plot(powerData$Voltage ~ powerData$DateTime, ylab = "Voltage", xlab = "datetime", type = "l")
plot(powerData$Sub_metering_1 ~ powerData$DateTime, ylab = "Energy sub metering", xlab = "", type = "l")
lines(powerData$Sub_metering_2 ~ powerData$DateTime, col = 'Red')
lines(powerData$Sub_metering_3 ~ powerData$DateTime, col = 'Blue')
legend("topright", col = c("black", "red", "blue"), legend = c("Sub_metering_1", "Sub_metering_2", "Sub_metering_3"), lwd = 1)
plot(powerData$Global_reactive_power ~ powerData$DateTime, ylab = "Global_reactive_power", xlab = "datetime", type = "l")