meter 1 regression models

Project Title: Meter Readings from a Railway Station (part 2)

NAME: ASWATHY GUNADEEP

COLLEGE / COMPANY: NATIONAL INSTITUTE OF TECHNOLOGY KARNATAKA

setwd("C:/Users/user/Desktop/tarsha systems summer internship/datasets")
met1.df <- read.csv(paste("1.csv", sep=""))
View(met1.df)
sub.df <- subset(met1.df[,c(1,5,9,13,20,24,25,26,27,28,29,36,37,38,39,40)])
attach(sub.df)
str(sub.df)

## 'data.frame':    10480 obs. of  16 variables:
##  $ W.Total            : num  5515 3891 4178 3911 5633 ...
##  $ VAr.Total          : num  -4193 -4106 -4154 -3981 -3741 ...
##  $ P.F                : num  0.796 0.688 0.709 0.701 0.833 ...
##  $ VA.Total           : num  6928 5657 5892 5581 6762 ...
##  $ Amps.Ave.          : num  9.41 7.72 8 7.72 9.44 ...
##  $ Frequency          : num  50.1 50 50.1 49.9 50 ...
##  $ Wh.Rec.            : num  26794 28127 29373 30585 32092 ...
##  $ VAh.Rec.           : num  32338 34018 35634 37218 39013 ...
##  $ VArh.I.Rec.        : num  0.0144 0.0144 0.0144 0.0144 0.0144 ...
##  $ VArh.C.Rec.        : num  -17121 -18095 -19067 -20050 -20959 ...
##  $ Neutral.Current    : num  2.2 0 0 0 1.96 ...
##  $ Rising.Demand      : num  7255 5332 4985 4831 6036 ...
##  $ Maximum.Demand     : num  7269 7393 7393 7393 7393 ...
##  $ RPM                : num  1502 1499 1503 1498 1500 ...
##  $ Load.Hours.Received: num  1.18e+09 1.24e+09 1.30e+09 1.35e+09 1.41e+09 1.47e+09 1.53e+09 1.59e+09 1.65e+09 1.71e+09 ...
##  $ No.Of.Intrruptions : int  0 0 0 0 0 0 0 0 0 0 ...

Simple linear regression (1)

sfit1 <- lm(sub.df$W.Total ~ sub.df$VA.Total, data=sub.df) 
summary(sfit1)

## 
## Call:
## lm(formula = sub.df$W.Total ~ sub.df$VA.Total, data = sub.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5850.4  -249.6    65.5   302.8  2807.5 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -2.807e+03  2.057e+01  -136.5   <2e-16 ***
## sub.df$VA.Total  1.238e+00  3.350e-03   369.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 610.7 on 10478 degrees of freedom
## Multiple R-squared:  0.9287, Adjusted R-squared:  0.9287 
## F-statistic: 1.366e+05 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$VA.Total, sub.df$W.Total, main="active and apparent total power", xlab="Apparent Power Total", ylab="Active Power Total", col="orange")
abline(sfit1)

The regression coefficient (1.238) is significantly dfferent from zero (p < 0.001). There is an expected increase of 1.238 watts of apparent power for every 1 watt increase in total active power. The multiple R-squared indciates that the model accounts for 93% of the variance in apparent power. Although, the residual standard error is quite high for this model to predict W Total.

Simple linear regression (2)

sfit2 <- lm(sub.df$W.Total ~ sub.df$Amps.Ave., data=sub.df) 
summary(sfit2)

## 
## Call:
## lm(formula = sub.df$W.Total ~ sub.df$Amps.Ave., data = sub.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6433.3  -203.0   117.7   281.1  2273.4 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -2273.406     18.539  -122.6   <2e-16 ***
## sub.df$Amps.Ave.   815.515      2.131   382.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 591.2 on 10478 degrees of freedom
## Multiple R-squared:  0.9332, Adjusted R-squared:  0.9332 
## F-statistic: 1.465e+05 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$Amps.Ave., sub.df$W.Total, main="active total power and average current", xlab="average current", ylab="Active Power Total", col="blue")
abline(sfit2)

The regression coefficient (815.5) is very dfferent from zero (p < 0.001). There is an expected increase of 815.5 amperes of current for every 1 watt increase in total active power. The multiple R-squared indciates that the model accounts for 93.3% of the variance in average current. Although, the residual standard error is quite high for this model to predict W Total.

Simple linear regression (3)

sfit3 <- lm(sub.df$VA.Total ~ sub.df$Amps.Ave., data=sub.df) 
summary(sfit3)

## 
## Call:
## lm(formula = sub.df$VA.Total ~ sub.df$Amps.Ave., data = sub.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2229.99   -90.77     3.78    98.34   915.61 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      467.9738     5.1828   90.29   <2e-16 ***
## sub.df$Amps.Ave. 654.3793     0.5957 1098.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 165.3 on 10478 degrees of freedom
## Multiple R-squared:  0.9914, Adjusted R-squared:  0.9914 
## F-statistic: 1.207e+06 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$Amps.Ave., sub.df$VA.Total, main="apparent total power and average current", xlab="average current", ylab="Apparent Power Total", col="skyblue")
abline(sfit3)

The regression coefficient (654.3) is significantly dfferent from zero (p < 0.001). There is an expected increase of 654.3 amps of current for every 1 watt increase in total apparent power. The multiple R-squared indciates that the model accounts for 99.1% of the variance in average current produced. The residual standard error is adjustable and this a decent model to predict VA total.

Simple linear regression (4)

sfit4 <- lm(sub.df$Wh.Rec. ~ sub.df$VAh.Rec., data=sub.df) 
summary(sfit4)

## 
## Call:
## lm(formula = sub.df$Wh.Rec. ~ sub.df$VAh.Rec., data = sub.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -86307 -31419 -15986  29133 913283 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.313e+04  7.831e+02  -80.61   <2e-16 ***
## sub.df$VAh.Rec.  7.590e-01  8.752e-05 8671.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40280 on 10478 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 7.52e+07 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$VAh.Rec., sub.df$Wh.Rec., main="active energy received and apparent energy received", xlab="apparent energy received", ylab="active energy received", col="red")
abline(sfit4)

The regression coefficient (7.590e-01) is significantly dfferent from zero (p < 0.001). The multiple R-squared indciates that the model accounts for 99.99% of the variance in apparent energy received. Although, the residual standard error is quite high to use this model.

Simple linear regression (5)

sfit5 <- lm(sub.df$Wh.Rec. ~ sub.df$No.Of.Intrruptions, data=sub.df) 
summary(sfit5)

## 
## Call:
## lm(formula = sub.df$Wh.Rec. ~ sub.df$No.Of.Intrruptions, data = sub.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1111587  -445395  -117364   536117  1251847 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -1.465e+05  1.184e+04  -12.37   <2e-16 ***
## sub.df$No.Of.Intrruptions  2.301e+00  3.984e-03  577.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 595400 on 10478 degrees of freedom
## Multiple R-squared:  0.9696, Adjusted R-squared:  0.9695 
## F-statistic: 3.336e+05 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$No.Of.Intrruptions, sub.df$Wh.Rec., main="Active Energy Received and No.Of.Intrruptions", xlab="No.Of.Intrruptions", ylab="Active Energy Received", col="burlywood")
abline(sfit5)

The regression coefficient (2.301) is significantly dfferent from zero (p < 0.001). The multiple R-squared indciates that the model accounts for 97% of the variance in the no. of interruptions. Although, the residual standard error is quite high to use this model.

Simple linear regression (6)

sfit6 <- lm(sub.df$VAh.Rec. ~ sub.df$No.Of.Intrruptions, data=sub.df) 
summary(sfit6)

## 
## Call:
## lm(formula = sub.df$VAh.Rec. ~ sub.df$No.Of.Intrruptions, data = sub.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1523427  -594646  -133738   694996  1672157 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -1.058e+05  1.582e+04  -6.687 2.39e-11 ***
## sub.df$No.Of.Intrruptions  3.031e+00  5.323e-03 569.346  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 795500 on 10478 degrees of freedom
## Multiple R-squared:  0.9687, Adjusted R-squared:  0.9687 
## F-statistic: 3.242e+05 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$No.Of.Intrruptions, sub.df$VAh.Rec., main="Apparent Energy Received and No.Of.Intrruptions", xlab="No.Of.Intrruptions", ylab="Apparent Energy Received", col="plum")
abline(sfit6)

The regression coefficient (3.301) is significantly dfferent from zero (p < 0.001). The multiple R-squared indciates that the model accounts for 96.8% of the variance in the no. of interruptions. Although, the residual standard error is quite high to use this model.

Multiple linear regression(1)

fit1 <- lm(sub.df$W.Total ~ ., data=sub.df) 
summary(fit1)

## 
## Call:
## lm(formula = sub.df$W.Total ~ ., data = sub.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9484.5   -88.2    -0.2    82.5  1004.7 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -5.899e+01  2.403e+02  -0.245  0.80612    
## VAr.Total            5.838e-01  3.964e-03 147.274  < 2e-16 ***
## P.F                  1.613e+03  1.407e+01 114.633  < 2e-16 ***
## VA.Total             1.052e+00  1.637e-02  64.271  < 2e-16 ***
## Amps.Ave.           -5.228e+01  1.098e+01  -4.762 1.95e-06 ***
## Frequency            3.771e+04  2.247e+05   0.168  0.86674    
## Wh.Rec.              1.379e-04  8.971e-05   1.537  0.12439    
## VAh.Rec.             3.901e-05  1.197e-04   0.326  0.74444    
## VArh.I.Rec.         -5.339e-04  1.333e-04  -4.005 6.25e-05 ***
## VArh.C.Rec.          2.776e-04  1.069e-04   2.597  0.00942 ** 
## Neutral.Current      3.251e+01  2.813e+00  11.557  < 2e-16 ***
## Rising.Demand        1.568e-01  2.176e-03  72.047  < 2e-16 ***
## Maximum.Demand       2.978e-02  1.016e-02   2.931  0.00338 ** 
## RPM                 -1.258e+03  7.491e+03  -0.168  0.86665    
## Load.Hours.Received -5.201e-09  1.815e-09  -2.866  0.00417 ** 
## No.Of.Intrruptions   5.486e-05  1.044e-05   5.255 1.51e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 230 on 10464 degrees of freedom
## Multiple R-squared:  0.9899, Adjusted R-squared:  0.9899 
## F-statistic: 6.843e+04 on 15 and 10464 DF,  p-value: < 2.2e-16

Multiple linear regression(2)

fit2 <- lm(sub.df$VAr.Total ~ ., data=sub.df) 
summary(fit2)

## 
## Call:
## lm(formula = sub.df$VAr.Total ~ ., data = sub.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1550.0  -102.8   -21.8    58.8 13487.6 
## 
## Coefficients:
##                       Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)          4.430e+02  3.381e+02    1.310 0.190109    
## W.Total              1.156e+00  7.846e-03  147.274  < 2e-16 ***
## P.F                 -2.916e+03  8.479e+00 -343.941  < 2e-16 ***
## VA.Total            -1.854e+00  2.028e-02  -91.439  < 2e-16 ***
## Amps.Ave.            5.395e+02  1.454e+01   37.117  < 2e-16 ***
## Frequency           -2.677e+05  3.162e+05   -0.847 0.397249    
## Wh.Rec.             -5.652e-04  1.261e-04   -4.481 7.49e-06 ***
## VAh.Rec.             1.143e-04  1.683e-04    0.679 0.497020    
## VArh.I.Rec.          1.122e-03  1.874e-04    5.989 2.18e-09 ***
## VArh.C.Rec.         -5.795e-04  1.504e-04   -3.854 0.000117 ***
## Neutral.Current     -6.855e+01  3.927e+00  -17.458  < 2e-16 ***
## Rising.Demand       -8.349e-02  3.655e-03  -22.844  < 2e-16 ***
## Maximum.Demand      -1.018e-01  1.427e-02   -7.135 1.04e-12 ***
## RPM                  8.923e+03  1.054e+04    0.847 0.397231    
## Load.Hours.Received  6.630e-09  2.553e-09    2.596 0.009431 ** 
## No.Of.Intrruptions  -6.139e-05  1.469e-05   -4.178 2.96e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 323.6 on 10464 degrees of freedom
## Multiple R-squared:  0.9666, Adjusted R-squared:  0.9666 
## F-statistic: 2.021e+04 on 15 and 10464 DF,  p-value: < 2.2e-16

Multiple linear regression(3)

fit3 <- lm(sub.df$VA.Total ~ ., data=sub.df) 
summary(fit3)

## 
## Call:
## lm(formula = sub.df$VA.Total ~ ., data = sub.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -903.7  -51.6    0.1   51.3 3837.0 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.035e+02  1.215e+02  -0.851 0.394569    
## W.Total              2.690e-01  4.185e-03  64.271  < 2e-16 ***
## VAr.Total           -2.395e-01  2.619e-03 -91.439  < 2e-16 ***
## P.F                 -6.740e+02  8.416e+00 -80.081  < 2e-16 ***
## Amps.Ave.            4.918e+02  2.788e+00 176.391  < 2e-16 ***
## Frequency            2.752e+04  1.136e+05   0.242 0.808607    
## Wh.Rec.             -1.724e-04  4.533e-05  -3.803 0.000144 ***
## VAh.Rec.            -3.271e-05  6.050e-05  -0.541 0.588803    
## VArh.I.Rec.          5.059e-04  6.728e-05   7.519 5.99e-14 ***
## VArh.C.Rec.         -2.847e-04  5.400e-05  -5.272 1.38e-07 ***
## Neutral.Current      3.626e+00  1.431e+00   2.534 0.011299 *  
## Rising.Demand       -3.615e-03  1.345e-03  -2.687 0.007213 ** 
## Maximum.Demand      -1.762e-02  5.137e-03  -3.429 0.000608 ***
## RPM                 -9.172e+02  3.788e+03  -0.242 0.808671    
## Load.Hours.Received  4.512e-10  9.179e-10   0.492 0.623013    
## No.Of.Intrruptions  -1.949e-05  5.282e-06  -3.691 0.000225 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 116.3 on 10464 degrees of freedom
## Multiple R-squared:  0.9957, Adjusted R-squared:  0.9957 
## F-statistic: 1.632e+05 on 15 and 10464 DF,  p-value: < 2.2e-16

Multiple linear regression(4)

fit4<- lm(sub.df$Amps.Ave. ~ ., data=sub.df) 
summary(fit4)

## 
## Call:
## lm(formula = sub.df$Amps.Ave. ~ ., data = sub.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1688 -0.1165  0.0047  0.1092  2.5197 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.100e-01  2.138e-01   0.982 0.325970    
## W.Total             -4.136e-05  8.686e-06  -4.762 1.95e-06 ***
## VAr.Total            2.156e-04  5.810e-06  37.117  < 2e-16 ***
## P.F                  6.427e-01  1.772e-02  36.264  < 2e-16 ***
## VA.Total             1.522e-03  8.627e-06 176.391  < 2e-16 ***
## Frequency           -6.242e+01  1.999e+02  -0.312 0.754847    
## Wh.Rec.              2.993e-07  7.975e-08   3.752 0.000176 ***
## VAh.Rec.             5.522e-08  1.064e-07   0.519 0.603851    
## VArh.I.Rec.         -8.151e-07  1.184e-07  -6.884 6.17e-12 ***
## VArh.C.Rec.          4.785e-07  9.501e-08   5.037 4.81e-07 ***
## Neutral.Current     -1.401e-02  2.515e-03  -5.572 2.58e-08 ***
## Rising.Demand       -7.229e-05  2.259e-06 -31.998  < 2e-16 ***
## Maximum.Demand       2.723e-05  9.037e-06   3.013 0.002592 ** 
## RPM                  2.080e+00  6.663e+00   0.312 0.754865    
## Load.Hours.Received  1.169e-12  1.615e-12   0.724 0.469060    
## No.Of.Intrruptions   1.541e-08  9.296e-09   1.658 0.097321 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2046 on 10464 degrees of freedom
## Multiple R-squared:  0.9943, Adjusted R-squared:  0.9943 
## F-statistic: 1.219e+05 on 15 and 10464 DF,  p-value: < 2.2e-16

The error in all the cases are very minimum, suggesting a good fit to the model.

Polynomial regression (1)

polymodel1 <- lm(sub.df$W.Total ~ sub.df$Amps.Ave. + I(sub.df$Amps.Ave.))
summary(polymodel1)

## 
## Call:
## lm(formula = sub.df$W.Total ~ sub.df$Amps.Ave. + I(sub.df$Amps.Ave.))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6433.3  -203.0   117.7   281.1  2273.4 
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -2273.406     18.539  -122.6   <2e-16 ***
## sub.df$Amps.Ave.      815.515      2.131   382.7   <2e-16 ***
## I(sub.df$Amps.Ave.)        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 591.2 on 10478 degrees of freedom
## Multiple R-squared:  0.9332, Adjusted R-squared:  0.9332 
## F-statistic: 1.465e+05 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$W.Total, predict(sfit2), col="yellow",ylab="active total power",xlab="average current",main="polynomial regression")
lines(smooth.spline(sub.df$W.Total,predict(polymodel1)),col="blue",lwd=2)

Polynomial regression (2)

polymodel2 <- lm(sub.df$VA.Total ~ sub.df$Amps.Ave. + I(sub.df$Amps.Ave.))
summary(polymodel2)

## 
## Call:
## lm(formula = sub.df$VA.Total ~ sub.df$Amps.Ave. + I(sub.df$Amps.Ave.))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2229.99   -90.77     3.78    98.34   915.61 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         467.9738     5.1828   90.29   <2e-16 ***
## sub.df$Amps.Ave.    654.3793     0.5957 1098.44   <2e-16 ***
## I(sub.df$Amps.Ave.)       NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 165.3 on 10478 degrees of freedom
## Multiple R-squared:  0.9914, Adjusted R-squared:  0.9914 
## F-statistic: 1.207e+06 on 1 and 10478 DF,  p-value: < 2.2e-16

plot(sub.df$VA.Total, predict(sfit3), col="yellow",ylab="apparent total power",xlab="average current",main="polynomial regression")
lines(smooth.spline(sub.df$VA.Total,predict(polymodel2)),col="blue",lwd=2)

This shows that I^2 that is sqaure of the current improves the model and the curve fits better.

Ridge regression(1) We are using only a test set of 1st 5000 values of the dataset.

sub.index <- sample(6000, 5000, replace = FALSE)
asub.df <- sub.df[sub.index, ]
bsub.df <- sub.df[-sub.index, ]
library(glmnet)

## Loading required package: Matrix

## Loading required package: foreach

## Loaded glmnet 2.0-16

x <- model.matrix(W.Total ~ VA.Total + Amps.Ave., data=asub.df)[,-1]
y <- asub.df$W.Total
gfit <- cv.glmnet(x, y, alpha = 0)
plot(gfit)

summary(gfit)

##            Length Class  Mode     
## lambda     99     -none- numeric  
## cvm        99     -none- numeric  
## cvsd       99     -none- numeric  
## cvup       99     -none- numeric  
## cvlo       99     -none- numeric  
## nzero      99     -none- numeric  
## name        1     -none- character
## glmnet.fit 12     elnet  list     
## lambda.min  1     -none- numeric  
## lambda.1se  1     -none- numeric

The lowest point in the curve indicates the optimal lambda: the log value of lambda that best minimised the error in cross-validation. This value is:

opt_lambda <- gfit$lambda.min
opt_lambda

## [1] 250.862

The fitted model

fit <- gfit$glmnet.fit
summary(fit)

##           Length Class     Mode   
## a0        100    -none-    numeric
## beta      200    dgCMatrix S4     
## df        100    -none-    numeric
## dim         2    -none-    numeric
## lambda    100    -none-    numeric
## dev.ratio 100    -none-    numeric
## nulldev     1    -none-    numeric
## npasses     1    -none-    numeric
## jerr        1    -none-    numeric
## offset      1    -none-    logical
## call        4    -none-    call   
## nobs        1    -none-    numeric

y_predicted <- predict(fit, s = opt_lambda, newx = x)
# Sum of Squares Total and Error
sst <- sum((y - mean(y))^2)
sse <- sum((y_predicted - y)^2)

# R squared
rsq <- 1 - sse / sst
rsq

## [1] 0.9391111

The optimal model has accounted for 94% of the variance in the training data, that is the rest 5481 values of the dataset.