Work by: Joey Congrove

Data Source: https://www.kaggle.com/anikannal/solar-power-generation-data

Data Overview

Project Objectives

Load in the libraries that might need to be used.

library(dplyr)
library(ggplot2)
library(tidyverse)
library(readr)
library(lubridate)

Read in all of the files, inspect them, and make necessary adjustments.

# Reading in the data from locally stored files.
plant1_gen_url <- "~/R/personal/Plant_1_Generation_Data.csv" # local file path
plant1_weather_url <- "~/R/personal/Plant_1_Weather_Sensor_Data.csv"

plant2_gen_url <- "~/R/personal/Plant_2_Generation_Data.csv"
plant2_weather_url <- "~/R/personal/Plant_2_Weather_Sensor_Data.csv"  

# Convert file paths to data frames.
p1_gen_df <- read_delim(plant1_gen_url, delim=',')
p1_weather_df <- read_delim(plant1_weather_url, delim=',')
p2_gen_df <- read_delim(plant2_gen_url, delim=',')
p2_weather_df <- read_delim(plant2_weather_url, delim=',')
  • I first notice that the DATE_TIME column of all data frames are not of the same data type. They all need to be converted to date time (POSIXct) format.
  • I will do this using the lubridate library.
# make all timestamps the same format and data type
# remove all instances of missing AC and DC power generation data
p1_gen_df <- p1_gen_df %>% 
  mutate(timestamp = dmy_hm(DATE_TIME),
         DATE_TIME = NULL,
         DC_POWER = na.omit(DC_POWER),
         AC_POWER = na.omit(AC_POWER))

p2_gen_df <- p2_gen_df %>% 
  mutate(timestamp = as_datetime(DATE_TIME),
         DATE_TIME = NULL,
         DC_POWER = na.omit(DC_POWER),
         AC_POWER = na.omit(AC_POWER))

p1_weather_df <- p1_weather_df %>% 
  mutate(timestamp = as_datetime(DATE_TIME),
         DATE_TIME = NULL)

p2_weather_df <- p2_weather_df %>% 
  mutate(timestamp = as_datetime(DATE_TIME),
         DATE_TIME = NULL)

Use rbind(a,b) to concatenate the power generation and weather sensor dataframes of each respective plant.

gen <- rbind(p1_gen_df, p2_gen_df)
weather <- rbind(p1_weather_df, p2_weather_df)

Since inverters are used to convert DC power to usable AC power, we should explore how efficient these inverters are in generating usable electricity.

Visualize the performance of each plant’s respective solar panels from the panel plant

# make a new column that calculates each DC-to-AC ratio to explore
# how much electricity is generated in relation to DC power.
gen$dc_ac_ratio <- (gen$DC_POWER / gen$AC_POWER)

gen <- gen %>% 
  mutate(day=day(timestamp),
         hour=hour(timestamp)) 

# change PLANT_ID values to something more readable
gen$plant <- case_when(
  gen$PLANT_ID == 4135001 ~ "Plant 1",
  gen$PLANT_ID == 4136001 ~ "Plant 2"
)
gen$PLANT_ID <- NULL

# make a column showing the day of the week for further analysis
gen$wday <- wday(gen$timestamp, label=TRUE)
# since dividing by 0 causes NaN's to appear, change them to 0.
dc_ac <- gen$dc_ac_ratio
gen$dc_ac_ratio[is.nan(dc_ac)] <- 0

Visualize the input:output ratios for each plant by hour.

avg_gen_inv <- gen %>% group_by(SOURCE_KEY, plant, hour) %>%
  summarize(mean_ratio = mean(dc_ac_ratio))


ggplot(avg_gen_inv, aes(hour, mean_ratio, color = plant)) +
  geom_point() +
  geom_smooth() + 
  xlab('Hour of the Day') +
  ylab('Avg. DC-to-AC Energy Ratio') +
  ggtitle('Mean Inverter DC-to-AC Ratio by Hour and Plant') +
  scale_x_continuous(breaks = seq(0, 23, by = 2)) + 
  scale_y_continuous(breaks=seq(0,12, by=1))

gen_inv <- gen %>% group_by(SOURCE_KEY, plant, hour)

ggplot(gen_inv, aes(hour, dc_ac_ratio, color = plant)) +
  geom_point() +
  geom_smooth() + 
  xlab('Hour of the Day') +
  ylab('DC-to-AC Energy Ratio') +
  ggtitle('Inverter DC-to-AC Ratio by Hour and Plant') +
  scale_x_continuous(breaks = seq(0, 23, by = 2)) + 
  scale_y_continuous(breaks=seq(0,12, by=1))

Graph Explanations
  • Both plants generate nearly 0 power before 5am and after 7pm.
  • Plant 1 appears to be far less efficient than Plant 2 in converting DC to AC. On average, Plant 2 operates at about 10 times more efficiently than Plant 1.
  • Plant 2, however, appears far less consistent, with many instances where the ratio is zero, meaning either AC, DC, or both were equal to 0.

Join Weather and Solar data on timestamp column

ws_join <- right_join(weather, gen, by="timestamp")

Clean data

# add day, hour, plant, and wday columns to weather dataframe.
ws_join <- ws_join %>% 
  mutate(day = day(timestamp),
         hour = hour(timestamp),
         wday = wday(timestamp, label=TRUE),
         SOURCE_KEY.x = NULL,
         SOURCE_KEY_inv <- SOURCE_KEY.y,
         SOURCE_KEY.y = NULL
         )

ws_join$plant <- case_when(
  ws_join$PLANT_ID == 4135001 ~ "Plant 1",
  ws_join$PLANT_ID == 4136001 ~ "Plant 2"
)

Analyze relationships between Solar Panel and Weather Sensor data.

ws1 <- ws_join %>% filter(plant=="Plant 1")

ggplot(ws1, aes(IRRADIATION, DC_POWER)) +
  geom_point(color='red') +
  geom_smooth() + 
  xlab('Irradiation') +
  ylab('DC Power Generation (kW)') +
  ggtitle('Plant 1 DC Power Generation by Irradiation Value') +
  scale_x_continuous(breaks = seq(0, 1.25, by = 0.25))

ws2 <- ws_join %>% filter(plant=="Plant 2")

ggplot(ws2, aes(IRRADIATION, DC_POWER)) +
  geom_point(color='blue') +
  geom_smooth(color='red') + 
  xlab('Irradiation') +
  ylab('DC Power Generation (kW)') +
  ggtitle('Plant 2 DC Power Generation by Irradiation Value') +
  scale_x_continuous(breaks = seq(0, 1.25, by = 0.25))

Graph Explanations

Find out what could be causing sub-optimal DC power generation.

inv1 <- gen %>% 
  filter(plant=="Plant 1") %>% 
  group_by(SOURCE_KEY) %>% 
  summarize(dc_avg = mean(DC_POWER))

ggplot(inv1, aes(SOURCE_KEY, dc_avg)) +
  geom_col(fill='red') +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab('Inverter Key') +
  ylab('Mean DC Generation (kW)') +
  ggtitle('Plant 1')

inv2 <- gen %>% 
  filter(plant=="Plant 2") %>% 
  group_by(SOURCE_KEY) %>% 
  summarize(dc_avg = mean(DC_POWER))

ggplot(inv2, aes(SOURCE_KEY, dc_avg)) +
  geom_col(fill='blue') +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab('Inverter Key') +
  ylab('Mean DC Generation (kW)') +
  ggtitle('Plant 2')

Graph Explanations