Project Title: Meter Readings from 3 Railway Stations

NAME: ASWATHY GUNADEEP

EMAIL: aswathygunadeep@gmail.com

COLLEGE / COMPANY: NATIONAL INSTITUTE OF TECHNOLOGY KARNATAKA

Based on the meter readings from the electricity board from a railway station, we intend to find out what affects the total ampere and power readings.

The data set includes information about:

setwd("C:/Users/user/Desktop/tarsha systems summer internship/datasets")
met1.df <- read.csv(paste("1.csv", sep=""))
View(met1.df)

SUMMARY OF THE VARIABLES OF THE DATASET

str(met1.df)
## 'data.frame':    10480 obs. of  40 variables:
##  $ W.Total            : num  5515 3891 4178 3911 5633 ...
##  $ W.R                : num  1607 1242 1341 1239 1689 ...
##  $ W.Y                : num  1941 1283 1388 1310 1991 ...
##  $ W.B                : num  1967 1366 1449 1362 1952 ...
##  $ VAr.Total          : num  -4193 -4106 -4154 -3981 -3741 ...
##  $ VAr.R              : num  -1253 -1287 -1311 -1270 -1174 ...
##  $ Var.Y              : num  -1439 -1382 -1410 -1338 -1274 ...
##  $ VAr.B              : num  -1500 -1437 -1433 -1373 -1293 ...
##  $ P.F                : num  0.796 0.688 0.709 0.701 0.833 ...
##  $ P.F.R              : num  0.789 0.694 0.715 0.698 0.821 ...
##  $ P.F.Y              : num  0.803 0.68 0.702 0.699 0.842 ...
##  $ P.F.B              : num  0.795 0.689 0.711 0.704 0.834 ...
##  $ VA.Total           : num  6928 5657 5892 5581 6762 ...
##  $ VA.R               : num  2038 1789 1875 1775 2057 ...
##  $ VA.Y               : num  2417 1885 1979 1872 2364 ...
##  $ VA.B               : num  2474 1983 2038 1934 2342 ...
##  $ Volts.R            : num  251 247 249 244 244 ...
##  $ Volts.Y            : num  247 246 247 242 240 ...
##  $ Volts.B            : num  252 250 251 245 244 ...
##  $ Amps.Ave.          : num  9.41 7.72 8 7.72 9.44 ...
##  $ Amps.R             : num  8.33 7.37 7.68 7.39 8.59 ...
##  $ Amps.Y             : num  9.9 7.86 8.11 7.8 9.89 ...
##  $ Amps.B             : num  10.01 7.93 8.2 7.98 9.82 ...
##  $ Frequency          : num  50.1 50 50.1 49.9 50 ...
##  $ Wh.Rec.            : num  26794 28127 29373 30585 32092 ...
##  $ VAh.Rec.           : num  32338 34018 35634 37218 39013 ...
##  $ VArh.I.Rec.        : num  0.0144 0.0144 0.0144 0.0144 0.0144 ...
##  $ VArh.C.Rec.        : num  -17121 -18095 -19067 -20050 -20959 ...
##  $ Neutral.Current    : num  2.2 0 0 0 1.96 ...
##  $ X.THD.Volt.R       : num  3.22 3.21 2.98 2.57 2.27 ...
##  $ X..THD.Volt.Y      : num  2.36 2.25 2.52 2.08 1.81 ...
##  $ X..THD.Volt.B      : num  2.64 2.29 2.48 1.75 1.84 ...
##  $ X.THD.Amps.R       : num  20.9 19.6 17.9 14 11.8 ...
##  $ X.THD.Amps.Y       : num  16.5 16.3 14.9 21.1 11 ...
##  $ X.THD.Amps.B       : num  17.7 17.5 16.1 13.6 15.9 ...
##  $ Rising.Demand      : num  7255 5332 4985 4831 6036 ...
##  $ Maximum.Demand     : num  7269 7393 7393 7393 7393 ...
##  $ RPM                : num  1502 1499 1503 1498 1500 ...
##  $ Load.Hours.Received: num  1.18e+09 1.24e+09 1.30e+09 1.35e+09 1.41e+09 1.47e+09 1.53e+09 1.59e+09 1.65e+09 1.71e+09 ...
##  $ No.Of.Intrruptions : int  0 0 0 0 0 0 0 0 0 0 ...

VISUALIZATION: CORRGRAM

library(corrgram)
par(mfrow=c(1,1))
sub.df <- subset(met1.df[,c(1,5,9,13,20,24,25,26,27,28,29,36,37,38,39,40)])
corrgram(sub.df, order=TRUE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of dataset")

Statistically, blue line shows that 2 continuous variables are positively correlated and negatively correlated if it is red. These are some of the possible conclusions that can be deduced from the above graph:

CORRELATION MATRIIX

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
data <- cor(sub.df,use = "complete.obs")
round(data,2)
##                     W.Total VAr.Total   P.F VA.Total Amps.Ave. Frequency
## W.Total                1.00      0.28  0.19     0.96      0.97      0.01
## VAr.Total              0.28      1.00 -0.82     0.14      0.17     -0.03
## P.F                    0.19     -0.82  1.00     0.26      0.25      0.01
## VA.Total               0.96      0.14  0.26     1.00      1.00      0.03
## Amps.Ave.              0.97      0.17  0.25     1.00      1.00      0.03
## Frequency              0.01     -0.03  0.01     0.03      0.03      1.00
## Wh.Rec.                0.03      0.32 -0.29    -0.01      0.00      0.01
## VAh.Rec.               0.03      0.32 -0.29     0.00      0.00      0.02
## VArh.I.Rec.            0.00      0.46 -0.47    -0.05     -0.04      0.01
## VArh.C.Rec.           -0.04     -0.30  0.27     0.00     -0.01     -0.02
## Neutral.Current        0.32      0.04  0.02     0.34      0.33      0.02
## Rising.Demand          0.62      0.17  0.18     0.49      0.48      0.02
## Maximum.Demand         0.05      0.16 -0.12     0.03      0.04      0.16
## RPM                    0.01     -0.03  0.01     0.03      0.03      1.00
## Load.Hours.Received   -0.03     -0.02  0.02    -0.02     -0.02      0.01
## No.Of.Intrruptions     0.03      0.38 -0.35    -0.02     -0.01      0.02
##                     Wh.Rec. VAh.Rec. VArh.I.Rec. VArh.C.Rec.
## W.Total                0.03     0.03        0.00       -0.04
## VAr.Total              0.32     0.32        0.46       -0.30
## P.F                   -0.29    -0.29       -0.47        0.27
## VA.Total              -0.01     0.00       -0.05        0.00
## Amps.Ave.              0.00     0.00       -0.04       -0.01
## Frequency              0.01     0.02        0.01       -0.02
## Wh.Rec.                1.00     1.00        0.60       -1.00
## VAh.Rec.               1.00     1.00        0.60       -1.00
## VArh.I.Rec.            0.60     0.60        1.00       -0.56
## VArh.C.Rec.           -1.00    -1.00       -0.56        1.00
## Neutral.Current       -0.10    -0.10       -0.02        0.11
## Rising.Demand          0.06     0.06        0.00       -0.06
## Maximum.Demand         0.85     0.85        0.34       -0.86
## RPM                    0.01     0.02        0.01       -0.02
## Load.Hours.Received    0.00     0.00        0.00        0.00
## No.Of.Intrruptions     0.98     0.98        0.65       -0.98
##                     Neutral.Current Rising.Demand Maximum.Demand   RPM
## W.Total                        0.32          0.62           0.05  0.01
## VAr.Total                      0.04          0.17           0.16 -0.03
## P.F                            0.02          0.18          -0.12  0.01
## VA.Total                       0.34          0.49           0.03  0.03
## Amps.Ave.                      0.33          0.48           0.04  0.03
## Frequency                      0.02          0.02           0.16  1.00
## Wh.Rec.                       -0.10          0.06           0.85  0.01
## VAh.Rec.                      -0.10          0.06           0.85  0.02
## VArh.I.Rec.                   -0.02          0.00           0.34  0.01
## VArh.C.Rec.                    0.11         -0.06          -0.86 -0.02
## Neutral.Current                1.00          0.19          -0.09  0.02
## Rising.Demand                  0.19          1.00           0.09  0.02
## Maximum.Demand                -0.09          0.09           1.00  0.16
## RPM                            0.02          0.02           0.16  1.00
## Load.Hours.Received            0.00         -0.01           0.00  0.01
## No.Of.Intrruptions            -0.10          0.05           0.80  0.02
##                     Load.Hours.Received No.Of.Intrruptions
## W.Total                           -0.03               0.03
## VAr.Total                         -0.02               0.38
## P.F                                0.02              -0.35
## VA.Total                          -0.02              -0.02
## Amps.Ave.                         -0.02              -0.01
## Frequency                          0.01               0.02
## Wh.Rec.                            0.00               0.98
## VAh.Rec.                           0.00               0.98
## VArh.I.Rec.                        0.00               0.65
## VArh.C.Rec.                        0.00              -0.98
## Neutral.Current                    0.00              -0.10
## Rising.Demand                     -0.01               0.05
## Maximum.Demand                     0.00               0.80
## RPM                                0.01               0.02
## Load.Hours.Received                1.00               0.00
## No.Of.Intrruptions                 0.00               1.00
  1. Relation between average current, VA Total and rising demand:
library(car)
## Loading required package: carData
scatterplotMatrix(formula = ~ Amps.Ave. + VA.Total
 + Rising.Demand, cex=0.6, data=met1.df, diagonal="histogram", col="red")
## Warning in applyDefaults(diagonal, defaults = list(method =
## "adaptiveDensity"), : unnamed diag arguments, will be ignored

PEARSON’S CORRELATION TEST:

par(mfrow=c(3,1))
cor.test(met1.df$Amps.Ave.,met1.df$VA.Total)
## 
##  Pearson's product-moment correlation
## 
## data:  met1.df$Amps.Ave. and met1.df$VA.Total
## t = 1098.4, df = 10478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9955180 0.9958478
## sample estimates:
##      cor 
## 0.995686
cor.test(met1.df$Amps.Ave.,met1.df$Rising.Demand)
## 
##  Pearson's product-moment correlation
## 
## data:  met1.df$Amps.Ave. and met1.df$Rising.Demand
## t = 55.873, df = 10478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4642231 0.4937277
## sample estimates:
##       cor 
## 0.4791107
cor.test(met1.df$VA.Total,met1.df$Rising.Demand)
## 
##  Pearson's product-moment correlation
## 
## data:  met1.df$VA.Total and met1.df$Rising.Demand
## t = 58.172, df = 10478, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4794728 0.5084195
## sample estimates:
##       cor 
## 0.4940831

From the analysis, it is evident that apparent power is more if the average current produced is more.

  1. Factors driving total active power(W Total)
par(mfrow=c(1,1))
xyplot(W.Total ~ VA.Total, data=met1.df, main="total active power v/s total apparent power", type = c("p", "g"),xlab="apparent power", ylab="active power", col="green")

par(mfrow=c(1,2))
hist(met1.df$W.Total, 
     main="Total Active power", 
     col=c("red","blue","green","yellow"),
     xlab="Total Active power", xlim=c(0,20000), ylim=c(0,3000))
hist(met1.df$VA.Total, 
     main="Total Apparent power", 
     col=c("red","blue","green","yellow"),
     xlab="Total Apparent power",xlim=c(0,20000),ylim=c(0,4000))

par(mfrow=c(1,1))
sub1.df <- subset(met1.df[,c(1,13,36)])
data <- cor(sub1.df,use = "complete.obs")
round(data,2)
##               W.Total VA.Total Rising.Demand
## W.Total          1.00     0.96          0.62
## VA.Total         0.96     1.00          0.49
## Rising.Demand    0.62     0.49          1.00

Total active power increases with total active power and the variable rising demand also slightly affects its value.

  1. Relation between frequecy and RPM

Dependent t-Test: NULL Hypothesis: There is no significent difference between the 2 variables.

par(mfrow=c(1,2))
log.transformed.fr = log(met1.df$Frequency)
boxplot(log.transformed.fr,col="turquoise")
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 1 is not drawn
log.transformed.rpm = log(met1.df$RPM)
boxplot(log.transformed.rpm,col="orange")
## Warning in bplt(at[i], wid = width[i], stats = z$stats[, i], out = z$out[z
## $group == : Outlier (-Inf) in boxplot 1 is not drawn

t.test(met1.df$Frequency,met1.df$RPM,paired=TRUE)
## 
##  Paired t-test
## 
## data:  met1.df$Frequency and met1.df$RPM
## t = -10390, df = 10479, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1449.586 -1449.039
## sample estimates:
## mean of the differences 
##               -1449.313

Since the p-value is very less(<0.01), we reject our null hypothesis.The box plots revealed several outliers.

  1. Relation between active energy received and apparent energy received
par(mfrow=c(1,1))
xyplot(Wh.Rec. ~ VAh.Rec., data=met1.df, main="active energy received v/s apparent energy received", type = c("p", "g"),xlab="apparent energy received", ylab="active energy received", col="violet")

There is almost a linear increase.

  1. Power factor and total reactive power
par(mfrow=c(1,1))
xyplot(VAr.Total ~ P.F, data=met1.df, main="Power factor and total reactive power ", type = c("p", "g"),xlab="Power factor", ylab="total reactive power", col="darkolivegreen")

The graph is a skewed one and also reactive power decreases with power factor.

VARIATION OF EACH VARIABLE IN EACH PHASE

R PHASE

library(corrplot)
## corrplot 0.84 loaded
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
par(mfrow=c(1,1))
sub2.df <- subset(met1.df[,c(2,6,10,14,17,21,30,33)])
corrplot.mixed(corr=cor(sub2.df, use="complete.obs"), 
               upper="ellipse", tl.pos="lt", main="R Phase")

Y PHASE

par(mfrow=c(1,1))
sub3.df <- subset(met1.df[,c(3,7,11,15,18,22,31,34)])
corrplot.mixed(corr=cor(sub3.df, use="complete.obs"), 
               upper="ellipse", tl.pos="lt", main="Y Phase")

B PHASE

par(mfrow=c(1,1))
sub4.df <- subset(met1.df[,c(4,8,12,16,19,23,32,35)])
corrplot.mixed(corr=cor(sub4.df, use="complete.obs"), 
               upper="ellipse", tl.pos="lt", main="B Phase")

For all the 3 phases, the same trends are shown as we have discussed above.