Project Objectives
- Utilize R libraries within RStudio to practice cleaning, exploring, visualizing, and interpreting data.
- Develop a better understanding of solar energy generation technology.
- Practice communicating analysis findings by producing a comprehensive markdown report.
- Discover how solar technology is working in a real-world scenario, and the pros and cons that are apparent from their performance.
Load in the libraries that might need to be used.
library(dplyr)
library(ggplot2)
library(tidyverse)
library(readr)
library(lubridate)
Read in all of the files, inspect them, and make necessary adjustments.
# Reading in the data from locally stored files.
plant1_gen_url <- "~/R/personal/Plant_1_Generation_Data.csv" # local file path
plant1_weather_url <- "~/R/personal/Plant_1_Weather_Sensor_Data.csv"
plant2_gen_url <- "~/R/personal/Plant_2_Generation_Data.csv"
plant2_weather_url <- "~/R/personal/Plant_2_Weather_Sensor_Data.csv"
# Convert file paths to data frames.
p1_gen_df <- read_delim(plant1_gen_url, delim=',')
p1_weather_df <- read_delim(plant1_weather_url, delim=',')
p2_gen_df <- read_delim(plant2_gen_url, delim=',')
p2_weather_df <- read_delim(plant2_weather_url, delim=',')
- I first notice that the DATE_TIME column of all data frames are not of the same data type. They all need to be converted to date time (POSIXct) format.
- I will do this using the lubridate library.
# make all timestamps the same format and data type
# remove all instances of missing AC and DC power generation data
p1_gen_df <- p1_gen_df %>%
mutate(timestamp = dmy_hm(DATE_TIME),
DATE_TIME = NULL,
DC_POWER = na.omit(DC_POWER),
AC_POWER = na.omit(AC_POWER))
p2_gen_df <- p2_gen_df %>%
mutate(timestamp = as_datetime(DATE_TIME),
DATE_TIME = NULL,
DC_POWER = na.omit(DC_POWER),
AC_POWER = na.omit(AC_POWER))
p1_weather_df <- p1_weather_df %>%
mutate(timestamp = as_datetime(DATE_TIME),
DATE_TIME = NULL)
p2_weather_df <- p2_weather_df %>%
mutate(timestamp = as_datetime(DATE_TIME),
DATE_TIME = NULL)
Use rbind(a,b) to concatenate the power generation and weather sensor dataframes of each respective plant.
gen <- rbind(p1_gen_df, p2_gen_df)
weather <- rbind(p1_weather_df, p2_weather_df)
Since inverters are used to convert DC power to usable AC power, we should explore how efficient these inverters are in generating usable electricity.
Join Weather and Solar data on timestamp column
ws_join <- right_join(weather, gen, by="timestamp")
Clean data
# add day, hour, plant, and wday columns to weather dataframe.
ws_join <- ws_join %>%
mutate(day = day(timestamp),
hour = hour(timestamp),
wday = wday(timestamp, label=TRUE),
SOURCE_KEY.x = NULL,
SOURCE_KEY_inv <- SOURCE_KEY.y,
SOURCE_KEY.y = NULL
)
ws_join$plant <- case_when(
ws_join$PLANT_ID == 4135001 ~ "Plant 1",
ws_join$PLANT_ID == 4136001 ~ "Plant 2"
)
Analyze relationships between Solar Panel and Weather Sensor data.
ws1 <- ws_join %>% filter(plant=="Plant 1")
ggplot(ws1, aes(IRRADIATION, DC_POWER)) +
geom_point(color='red') +
geom_smooth() +
xlab('Irradiation') +
ylab('DC Power Generation (kW)') +
ggtitle('Plant 1 DC Power Generation by Irradiation Value') +
scale_x_continuous(breaks = seq(0, 1.25, by = 0.25))

ws2 <- ws_join %>% filter(plant=="Plant 2")
ggplot(ws2, aes(IRRADIATION, DC_POWER)) +
geom_point(color='blue') +
geom_smooth(color='red') +
xlab('Irradiation') +
ylab('DC Power Generation (kW)') +
ggtitle('Plant 2 DC Power Generation by Irradiation Value') +
scale_x_continuous(breaks = seq(0, 1.25, by = 0.25))
