library(randomForest)
## Warning: package 'randomForest' was built under R version 4.3.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## Loading required package: lattice
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- read.csv("C:/Users/Muraa/Downloads/FPV_Oakville_CA_Inverter1_data.csv")
View(data)
dim(data)
## [1] 242045     28
head(data)
##   X     DAY     HOUR ACPWRT DCVOLT RESIST TEMPER VL1TO2 VL2TO3 VL3TO1 ACCUR1
## 1 0 2022080 00:05:00  32767  32767  32767  32767  32767  32767  32767  32767
## 2 1 2022080 00:10:00  32767  32767  32767  32767  32767  32767  32767  32767
## 3 2 2022080 00:15:00  32767  32767  32767  32767  32767  32767  32767  32767
## 4 3 2022080 00:20:00  32767  32767  32767  32767  32767  32767  32767  32767
## 5 4 2022080 00:25:00  32767  32767  32767  32767  32767  32767  32767  32767
## 6 5 2022080 00:30:00  32767  32767  32767  32767  32767  32767  32767  32767
##   ACCUR2 ACCUR3 ACVLT1 ACVLT2 ACVLT3 ACFRQ1 ACFRQ2 ACFRQ3 APPWR1 APPWR2 APPWR3
## 1  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767
## 2  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767
## 3  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767
## 4  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767
## 5  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767
## 6  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767  32767
##   ACPWR1 ACPWR2 ACPWR3 REPWR1 REPWR2 REPWR3
## 1  32767  32767  32767  32767  32767  32767
## 2  32767  32767  32767  32767  32767  32767
## 3  32767  32767  32767  32767  32767  32767
## 4  32767  32767  32767  32767  32767  32767
## 5  32767  32767  32767  32767  32767  32767
## 6  32767  32767  32767  32767  32767  32767
str(data)
## 'data.frame':    242045 obs. of  28 variables:
##  $ X     : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ DAY   : int  2022080 2022080 2022080 2022080 2022080 2022080 2022080 2022080 2022080 2022080 ...
##  $ HOUR  : chr  "00:05:00" "00:10:00" "00:15:00" "00:20:00" ...
##  $ ACPWRT: num  32767 32767 32767 32767 32767 ...
##  $ DCVOLT: num  32767 32767 32767 32767 32767 ...
##  $ RESIST: num  32767 32767 32767 32767 32767 ...
##  $ TEMPER: num  32767 32767 32767 32767 32767 ...
##  $ VL1TO2: num  32767 32767 32767 32767 32767 ...
##  $ VL2TO3: num  32767 32767 32767 32767 32767 ...
##  $ VL3TO1: num  32767 32767 32767 32767 32767 ...
##  $ ACCUR1: num  32767 32767 32767 32767 32767 ...
##  $ ACCUR2: num  32767 32767 32767 32767 32767 ...
##  $ ACCUR3: num  32767 32767 32767 32767 32767 ...
##  $ ACVLT1: num  32767 32767 32767 32767 32767 ...
##  $ ACVLT2: num  32767 32767 32767 32767 32767 ...
##  $ ACVLT3: num  32767 32767 32767 32767 32767 ...
##  $ ACFRQ1: num  32767 32767 32767 32767 32767 ...
##  $ ACFRQ2: num  32767 32767 32767 32767 32767 ...
##  $ ACFRQ3: num  32767 32767 32767 32767 32767 ...
##  $ APPWR1: num  32767 32767 32767 32767 32767 ...
##  $ APPWR2: num  32767 32767 32767 32767 32767 ...
##  $ APPWR3: num  32767 32767 32767 32767 32767 ...
##  $ ACPWR1: num  32767 32767 32767 32767 32767 ...
##  $ ACPWR2: num  32767 32767 32767 32767 32767 ...
##  $ ACPWR3: num  32767 32767 32767 32767 32767 ...
##  $ REPWR1: num  32767 32767 32767 32767 32767 ...
##  $ REPWR2: num  32767 32767 32767 32767 32767 ...
##  $ REPWR3: num  32767 32767 32767 32767 32767 ...
colnames(data)
##  [1] "X"      "DAY"    "HOUR"   "ACPWRT" "DCVOLT" "RESIST" "TEMPER" "VL1TO2"
##  [9] "VL2TO3" "VL3TO1" "ACCUR1" "ACCUR2" "ACCUR3" "ACVLT1" "ACVLT2" "ACVLT3"
## [17] "ACFRQ1" "ACFRQ2" "ACFRQ3" "APPWR1" "APPWR2" "APPWR3" "ACPWR1" "ACPWR2"
## [25] "ACPWR3" "REPWR1" "REPWR2" "REPWR3"
summary(data)
##        X             DAY              HOUR               ACPWRT        
##  Min.   :   0   Min.   :2022080   Length:242045      Min.   :    0.00  
##  1st Qu.:2086   1st Qu.:2022267   Class :character   1st Qu.:    0.00  
##  Median :4264   Median :2023075   Mode  :character   Median :    8.54  
##  Mean   :4305   Mean   :2022804                      Mean   :13010.52  
##  3rd Qu.:6505   3rd Qu.:2023225                      3rd Qu.:32767.00  
##  Max.   :8927   Max.   :2024010                      Max.   :32767.00  
##      DCVOLT            RESIST          TEMPER            VL1TO2       
##  Min.   :    0.0   Min.   :    0   Min.   :    0.0   Min.   :  458.3  
##  1st Qu.:  846.7   1st Qu.:32767   1st Qu.:    0.0   1st Qu.:  484.7  
##  Median :32767.0   Median :32767   Median :   47.9   Median :  492.6  
##  Mean   :19149.7   Mean   :26718   Mean   :13019.1   Mean   :13301.6  
##  3rd Qu.:32767.0   3rd Qu.:32767   3rd Qu.:32767.0   3rd Qu.:32767.0  
##  Max.   :32767.0   Max.   :32767   Max.   :32767.0   Max.   :32767.0  
##      VL2TO3            VL3TO1            ACCUR1            ACCUR2       
##  Min.   :  454.9   Min.   :  458.2   Min.   :    0.0   Min.   :    0.0  
##  1st Qu.:  483.1   1st Qu.:  487.1   1st Qu.:    0.0   1st Qu.:    0.0  
##  Median :  491.6   Median :  496.0   Median :   10.2   Median :   10.1  
##  Mean   :13300.7   Mean   :13303.1   Mean   :13011.0   Mean   :13011.0  
##  3rd Qu.:32767.0   3rd Qu.:32767.0   3rd Qu.:32767.0   3rd Qu.:32767.0  
##  Max.   :32767.0   Max.   :32767.0   Max.   :32767.0   Max.   :32767.0  
##      ACCUR3            ACVLT1            ACVLT2            ACVLT3       
##  Min.   :    0.0   Min.   :  265.4   Min.   :  262.9   Min.   :  263.7  
##  1st Qu.:    0.0   1st Qu.:  281.1   1st Qu.:  278.7   1st Qu.:  280.1  
##  Median :   10.1   Median :  286.0   Median :  283.3   Median :  285.3  
##  Mean   :13011.0   Mean   :13178.3   Mean   :13176.8   Mean   :13177.7  
##  3rd Qu.:32767.0   3rd Qu.:32767.0   3rd Qu.:32767.0   3rd Qu.:32767.0  
##  Max.   :32767.0   Max.   :32767.0   Max.   :32767.0   Max.   :32767.0  
##      ACFRQ1            ACFRQ2            ACFRQ3            APPWR1     
##  Min.   :   59.9   Min.   :   59.9   Min.   :   59.9   Min.   :    0  
##  1st Qu.:   60.0   1st Qu.:   60.0   1st Qu.:   60.0   1st Qu.:    0  
##  Median :   60.0   Median :   60.0   Median :   60.0   Median : 2877  
##  Mean   :13044.2   Mean   :13044.2   Mean   :13044.2   Mean   :13865  
##  3rd Qu.:32767.0   3rd Qu.:32767.0   3rd Qu.:32767.0   3rd Qu.:32767  
##  Max.   :32767.0   Max.   :32767.0   Max.   :32767.0   Max.   :32767  
##      APPWR2          APPWR3          ACPWR1          ACPWR2     
##  Min.   :    0   Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.:    0   1st Qu.:    0   1st Qu.:    0   1st Qu.:    0  
##  Median : 2847   Median : 2840   Median : 2860   Median : 2860  
##  Mean   :13860   Mean   :13856   Mean   :13859   Mean   :13859  
##  3rd Qu.:32767   3rd Qu.:32767   3rd Qu.:32767   3rd Qu.:32767  
##  Max.   :32767   Max.   :32767   Max.   :32767   Max.   :32767  
##      ACPWR3          REPWR1          REPWR2          REPWR3     
##  Min.   :    0   Min.   :-1880   Min.   :-1929   Min.   :-1871  
##  1st Qu.:    0   1st Qu.:    0   1st Qu.:    0   1st Qu.:    0  
##  Median : 2829   Median :    0   Median :    0   Median :    0  
##  Mean   :13854   Mean   :12945   Mean   :12944   Mean   :12945  
##  3rd Qu.:32767   3rd Qu.:32767   3rd Qu.:32767   3rd Qu.:32767  
##  Max.   :32767   Max.   :32767   Max.   :32767   Max.   :32767
colnames(data)
##  [1] "X"      "DAY"    "HOUR"   "ACPWRT" "DCVOLT" "RESIST" "TEMPER" "VL1TO2"
##  [9] "VL2TO3" "VL3TO1" "ACCUR1" "ACCUR2" "ACCUR3" "ACVLT1" "ACVLT2" "ACVLT3"
## [17] "ACFRQ1" "ACFRQ2" "ACFRQ3" "APPWR1" "APPWR2" "APPWR3" "ACPWR1" "ACPWR2"
## [25] "ACPWR3" "REPWR1" "REPWR2" "REPWR3"
sum(is.na(data))
## [1] 0
data <- na.omit(data)
X <- data %>% select(-ACPWR1, -X)  
y <- data$ACPWR1
set.seed(123)  
train_index <- createDataPartition(y, p = 0.8, list = FALSE)  
X_train <- X[train_index, ]
X_test <- X[-train_index, ]
y_train <- y[train_index]
y_test <- y[-train_index]
set.seed(123)
rf_model <- randomForest(X_train, y_train, ntree = 100, importance = TRUE)
print(rf_model)
## 
## Call:
##  randomForest(x = X_train, y = y_train, ntree = 100, importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 100
## No. of variables tried at each split: 8
## 
##           Mean of squared residuals: 56.42203
##                     % Var explained: 100
y_pred <- predict(rf_model, X_test)
if(length(y_pred) != length(y_test)) {
  stop("Prediction and test labels have different lengths!")
}
mse <- mean((y_pred - y_test)^2)
rmse <- sqrt(mse)
cat("Mean Squared Error (MSE):", mse, "\n")
## Mean Squared Error (MSE): 23.83062
cat("Root Mean Squared Error (RMSE):", rmse, "\n")
## Root Mean Squared Error (RMSE): 4.881661
importance(rf_model)
##          %IncMSE IncNodePurity
## DAY     1.120500  3.938312e+07
## HOUR    1.354064  6.957346e+06
## ACPWRT  5.193277  2.284701e+11
## DCVOLT  1.576844  3.129793e+06
## RESIST  1.367702  1.122247e+05
## TEMPER  2.232073  1.613279e+08
## VL1TO2  1.005004  4.563262e+11
## VL2TO3  1.430430  9.115166e+11
## VL3TO1  1.027148  2.205984e+05
## ACCUR1  1.317675  4.812932e+10
## ACCUR2  2.304290  5.283782e+10
## ACCUR3  1.367619  2.277405e+10
## ACVLT1  1.010674  2.464398e+05
## ACVLT2 -1.087618  1.020857e+05
## ACVLT3  2.743429  3.191180e+12
## ACFRQ1  2.526447  2.733739e+12
## ACFRQ2  2.526439  2.733279e+12
## ACFRQ3  2.526441  2.732267e+12
## APPWR1  5.014432  2.263258e+11
## APPWR2  1.506411  7.985387e+10
## APPWR3  2.355482  5.668801e+10
## ACPWR2  6.652380  3.428839e+11
## ACPWR3  2.748804  7.858268e+10
## REPWR1  4.844985  8.659507e+12
## REPWR2  7.193382  1.549213e+13
## REPWR3  4.855546  8.660417e+12
plot(y_test, y_pred, main = "Predicted vs Actual", xlab = "Actual Values", ylab = "Predicted Values", col = "blue", pch = 16)
abline(0, 1, col = "red")

residuals <- y_test - y_pred
plot(y_pred, residuals, main = "Residual Plot", xlab = "Predicted Values", ylab = "Residuals", col = "green", pch = 16)
abline(h = 0, col = "red")

hist(residuals, main = "Error Distribution", xlab = "Residuals", col = "lightblue", breaks = 30)

varImpPlot(rf_model)