library(randomForest)
## Warning: package 'randomForest' was built under R version 4.3.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## Loading required package: lattice
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("C:/Users/Muraa/Downloads/FPV_Oakville_CA_Inverter1_data.csv")
View(data)
dim(data)
## [1] 242045 28
head(data)
## X DAY HOUR ACPWRT DCVOLT RESIST TEMPER VL1TO2 VL2TO3 VL3TO1 ACCUR1
## 1 0 2022080 00:05:00 32767 32767 32767 32767 32767 32767 32767 32767
## 2 1 2022080 00:10:00 32767 32767 32767 32767 32767 32767 32767 32767
## 3 2 2022080 00:15:00 32767 32767 32767 32767 32767 32767 32767 32767
## 4 3 2022080 00:20:00 32767 32767 32767 32767 32767 32767 32767 32767
## 5 4 2022080 00:25:00 32767 32767 32767 32767 32767 32767 32767 32767
## 6 5 2022080 00:30:00 32767 32767 32767 32767 32767 32767 32767 32767
## ACCUR2 ACCUR3 ACVLT1 ACVLT2 ACVLT3 ACFRQ1 ACFRQ2 ACFRQ3 APPWR1 APPWR2 APPWR3
## 1 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767
## 2 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767
## 3 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767
## 4 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767
## 5 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767
## 6 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767 32767
## ACPWR1 ACPWR2 ACPWR3 REPWR1 REPWR2 REPWR3
## 1 32767 32767 32767 32767 32767 32767
## 2 32767 32767 32767 32767 32767 32767
## 3 32767 32767 32767 32767 32767 32767
## 4 32767 32767 32767 32767 32767 32767
## 5 32767 32767 32767 32767 32767 32767
## 6 32767 32767 32767 32767 32767 32767
str(data)
## 'data.frame': 242045 obs. of 28 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ DAY : int 2022080 2022080 2022080 2022080 2022080 2022080 2022080 2022080 2022080 2022080 ...
## $ HOUR : chr "00:05:00" "00:10:00" "00:15:00" "00:20:00" ...
## $ ACPWRT: num 32767 32767 32767 32767 32767 ...
## $ DCVOLT: num 32767 32767 32767 32767 32767 ...
## $ RESIST: num 32767 32767 32767 32767 32767 ...
## $ TEMPER: num 32767 32767 32767 32767 32767 ...
## $ VL1TO2: num 32767 32767 32767 32767 32767 ...
## $ VL2TO3: num 32767 32767 32767 32767 32767 ...
## $ VL3TO1: num 32767 32767 32767 32767 32767 ...
## $ ACCUR1: num 32767 32767 32767 32767 32767 ...
## $ ACCUR2: num 32767 32767 32767 32767 32767 ...
## $ ACCUR3: num 32767 32767 32767 32767 32767 ...
## $ ACVLT1: num 32767 32767 32767 32767 32767 ...
## $ ACVLT2: num 32767 32767 32767 32767 32767 ...
## $ ACVLT3: num 32767 32767 32767 32767 32767 ...
## $ ACFRQ1: num 32767 32767 32767 32767 32767 ...
## $ ACFRQ2: num 32767 32767 32767 32767 32767 ...
## $ ACFRQ3: num 32767 32767 32767 32767 32767 ...
## $ APPWR1: num 32767 32767 32767 32767 32767 ...
## $ APPWR2: num 32767 32767 32767 32767 32767 ...
## $ APPWR3: num 32767 32767 32767 32767 32767 ...
## $ ACPWR1: num 32767 32767 32767 32767 32767 ...
## $ ACPWR2: num 32767 32767 32767 32767 32767 ...
## $ ACPWR3: num 32767 32767 32767 32767 32767 ...
## $ REPWR1: num 32767 32767 32767 32767 32767 ...
## $ REPWR2: num 32767 32767 32767 32767 32767 ...
## $ REPWR3: num 32767 32767 32767 32767 32767 ...
colnames(data)
## [1] "X" "DAY" "HOUR" "ACPWRT" "DCVOLT" "RESIST" "TEMPER" "VL1TO2"
## [9] "VL2TO3" "VL3TO1" "ACCUR1" "ACCUR2" "ACCUR3" "ACVLT1" "ACVLT2" "ACVLT3"
## [17] "ACFRQ1" "ACFRQ2" "ACFRQ3" "APPWR1" "APPWR2" "APPWR3" "ACPWR1" "ACPWR2"
## [25] "ACPWR3" "REPWR1" "REPWR2" "REPWR3"
summary(data)
## X DAY HOUR ACPWRT
## Min. : 0 Min. :2022080 Length:242045 Min. : 0.00
## 1st Qu.:2086 1st Qu.:2022267 Class :character 1st Qu.: 0.00
## Median :4264 Median :2023075 Mode :character Median : 8.54
## Mean :4305 Mean :2022804 Mean :13010.52
## 3rd Qu.:6505 3rd Qu.:2023225 3rd Qu.:32767.00
## Max. :8927 Max. :2024010 Max. :32767.00
## DCVOLT RESIST TEMPER VL1TO2
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 458.3
## 1st Qu.: 846.7 1st Qu.:32767 1st Qu.: 0.0 1st Qu.: 484.7
## Median :32767.0 Median :32767 Median : 47.9 Median : 492.6
## Mean :19149.7 Mean :26718 Mean :13019.1 Mean :13301.6
## 3rd Qu.:32767.0 3rd Qu.:32767 3rd Qu.:32767.0 3rd Qu.:32767.0
## Max. :32767.0 Max. :32767 Max. :32767.0 Max. :32767.0
## VL2TO3 VL3TO1 ACCUR1 ACCUR2
## Min. : 454.9 Min. : 458.2 Min. : 0.0 Min. : 0.0
## 1st Qu.: 483.1 1st Qu.: 487.1 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 491.6 Median : 496.0 Median : 10.2 Median : 10.1
## Mean :13300.7 Mean :13303.1 Mean :13011.0 Mean :13011.0
## 3rd Qu.:32767.0 3rd Qu.:32767.0 3rd Qu.:32767.0 3rd Qu.:32767.0
## Max. :32767.0 Max. :32767.0 Max. :32767.0 Max. :32767.0
## ACCUR3 ACVLT1 ACVLT2 ACVLT3
## Min. : 0.0 Min. : 265.4 Min. : 262.9 Min. : 263.7
## 1st Qu.: 0.0 1st Qu.: 281.1 1st Qu.: 278.7 1st Qu.: 280.1
## Median : 10.1 Median : 286.0 Median : 283.3 Median : 285.3
## Mean :13011.0 Mean :13178.3 Mean :13176.8 Mean :13177.7
## 3rd Qu.:32767.0 3rd Qu.:32767.0 3rd Qu.:32767.0 3rd Qu.:32767.0
## Max. :32767.0 Max. :32767.0 Max. :32767.0 Max. :32767.0
## ACFRQ1 ACFRQ2 ACFRQ3 APPWR1
## Min. : 59.9 Min. : 59.9 Min. : 59.9 Min. : 0
## 1st Qu.: 60.0 1st Qu.: 60.0 1st Qu.: 60.0 1st Qu.: 0
## Median : 60.0 Median : 60.0 Median : 60.0 Median : 2877
## Mean :13044.2 Mean :13044.2 Mean :13044.2 Mean :13865
## 3rd Qu.:32767.0 3rd Qu.:32767.0 3rd Qu.:32767.0 3rd Qu.:32767
## Max. :32767.0 Max. :32767.0 Max. :32767.0 Max. :32767
## APPWR2 APPWR3 ACPWR1 ACPWR2
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0
## Median : 2847 Median : 2840 Median : 2860 Median : 2860
## Mean :13860 Mean :13856 Mean :13859 Mean :13859
## 3rd Qu.:32767 3rd Qu.:32767 3rd Qu.:32767 3rd Qu.:32767
## Max. :32767 Max. :32767 Max. :32767 Max. :32767
## ACPWR3 REPWR1 REPWR2 REPWR3
## Min. : 0 Min. :-1880 Min. :-1929 Min. :-1871
## 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0
## Median : 2829 Median : 0 Median : 0 Median : 0
## Mean :13854 Mean :12945 Mean :12944 Mean :12945
## 3rd Qu.:32767 3rd Qu.:32767 3rd Qu.:32767 3rd Qu.:32767
## Max. :32767 Max. :32767 Max. :32767 Max. :32767
colnames(data)
## [1] "X" "DAY" "HOUR" "ACPWRT" "DCVOLT" "RESIST" "TEMPER" "VL1TO2"
## [9] "VL2TO3" "VL3TO1" "ACCUR1" "ACCUR2" "ACCUR3" "ACVLT1" "ACVLT2" "ACVLT3"
## [17] "ACFRQ1" "ACFRQ2" "ACFRQ3" "APPWR1" "APPWR2" "APPWR3" "ACPWR1" "ACPWR2"
## [25] "ACPWR3" "REPWR1" "REPWR2" "REPWR3"
sum(is.na(data))
## [1] 0
data <- na.omit(data)
X <- data %>% select(-ACPWR1, -X)
y <- data$ACPWR1
set.seed(123)
train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_index, ]
X_test <- X[-train_index, ]
y_train <- y[train_index]
y_test <- y[-train_index]
set.seed(123)
rf_model <- randomForest(X_train, y_train, ntree = 100, importance = TRUE)
print(rf_model)
##
## Call:
## randomForest(x = X_train, y = y_train, ntree = 100, importance = TRUE)
## Type of random forest: regression
## Number of trees: 100
## No. of variables tried at each split: 8
##
## Mean of squared residuals: 56.42203
## % Var explained: 100
y_pred <- predict(rf_model, X_test)
if(length(y_pred) != length(y_test)) {
stop("Prediction and test labels have different lengths!")
}
mse <- mean((y_pred - y_test)^2)
rmse <- sqrt(mse)
cat("Mean Squared Error (MSE):", mse, "\n")
## Mean Squared Error (MSE): 23.83062
cat("Root Mean Squared Error (RMSE):", rmse, "\n")
## Root Mean Squared Error (RMSE): 4.881661
importance(rf_model)
## %IncMSE IncNodePurity
## DAY 1.120500 3.938312e+07
## HOUR 1.354064 6.957346e+06
## ACPWRT 5.193277 2.284701e+11
## DCVOLT 1.576844 3.129793e+06
## RESIST 1.367702 1.122247e+05
## TEMPER 2.232073 1.613279e+08
## VL1TO2 1.005004 4.563262e+11
## VL2TO3 1.430430 9.115166e+11
## VL3TO1 1.027148 2.205984e+05
## ACCUR1 1.317675 4.812932e+10
## ACCUR2 2.304290 5.283782e+10
## ACCUR3 1.367619 2.277405e+10
## ACVLT1 1.010674 2.464398e+05
## ACVLT2 -1.087618 1.020857e+05
## ACVLT3 2.743429 3.191180e+12
## ACFRQ1 2.526447 2.733739e+12
## ACFRQ2 2.526439 2.733279e+12
## ACFRQ3 2.526441 2.732267e+12
## APPWR1 5.014432 2.263258e+11
## APPWR2 1.506411 7.985387e+10
## APPWR3 2.355482 5.668801e+10
## ACPWR2 6.652380 3.428839e+11
## ACPWR3 2.748804 7.858268e+10
## REPWR1 4.844985 8.659507e+12
## REPWR2 7.193382 1.549213e+13
## REPWR3 4.855546 8.660417e+12
plot(y_test, y_pred, main = "Predicted vs Actual", xlab = "Actual Values", ylab = "Predicted Values", col = "blue", pch = 16)
abline(0, 1, col = "red")

residuals <- y_test - y_pred
plot(y_pred, residuals, main = "Residual Plot", xlab = "Predicted Values", ylab = "Residuals", col = "green", pch = 16)
abline(h = 0, col = "red")

hist(residuals, main = "Error Distribution", xlab = "Residuals", col = "lightblue", breaks = 30)

varImpPlot(rf_model)
