historian.R

#Read data
historian <- read.csv("WPY_pv.csv", stringsAsFactors=F)

#Remove NA values to save memory
historian <- historian[!is.na(historian$Value),]

#Convert Date of Sample to date/time format
historian$DateOfSample <- as.POSIXlt(historian$DateOfSample, format="%d/%m/%Y %H:%M:%S")

#Shorten tag names
historian$Tag <- unlist(sapply(historian$TagName, strsplit, "_"))[seq(2, nrow(historian)*3, by=3)]
historian$Tag <- tolower(historian$Tag)

#Inspect data
str(historian)

## 'data.frame':    241218 obs. of  4 variables:
##  $ DateOfSample: POSIXlt, format: "2013-06-01 12:03:00" "2013-06-01 12:03:00" ...
##  $ TagName     : chr  "Coliban.WPY_CWCRA01_PV" "Coliban.WPY_DWPHA01_PV" "Coliban.WPY_FNCRA01_PV" "Coliban.WPY_FNFM01_PV" ...
##  $ Value       : num  1.22 6.4 1.28 0 7.6 ...
##  $ Tag         : chr  "cwcra01" "dwpha01" "fncra01" "fnfm01" ...

head(historian)

##          DateOfSample                TagName Value     Tag
## 1 2013-06-01 12:03:00 Coliban.WPY_CWCRA01_PV  1.22 cwcra01
## 2 2013-06-01 12:03:00 Coliban.WPY_DWPHA01_PV  6.40 dwpha01
## 3 2013-06-01 12:03:00 Coliban.WPY_FNCRA01_PV  1.28 fncra01
## 4 2013-06-01 12:03:00  Coliban.WPY_FNFM01_PV  0.00  fnfm01
## 5 2013-06-01 12:03:00 Coliban.WPY_FNPHA01_PV  7.60 fnpha01
## 6 2013-06-01 12:03:00  Coliban.WPY_FNTM01_PV  0.03  fntm01

range(historian$DateOfSample)

## [1] "2013-06-01 01:03:00 EST" "2014-06-30 12:53:33 EST"

table(historian$Tag)

## 
## cwcra01 dwpha01 fncra01  fnfm01 fnpha01  fntm01  fntm02  fntm03  rwfm01 
##   25305   19860   48954   22876   38157    9437    9556   10259   17032 
##  rwtm01  twfm01 
##   16172   23610

summary(historian$Value)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.40    2.13    7.87    7.80  116.00

#Using base plotting system
densities <- tapply(historian$Value, historian$Tag, density, na.rm=T)
par(mfrow=c(2,3), mar=c(4,4,2,1))
lapply(densities, plot)

plot of chunk unnamed-chunk-1

## $cwcra01
## NULL
## 
## $dwpha01
## NULL
## 
## $fncra01
## NULL
## 
## $fnfm01
## NULL
## 
## $fnpha01
## NULL
## 
## $fntm01
## NULL
## 
## $fntm02
## NULL
## 
## $fntm03
## NULL
## 
## $rwfm01
## NULL
## 
## $rwtm01
## NULL
## 
## $twfm01
## NULL

par(mfrow=c(1,1), mar=c(4,6,1,1))

plot of chunk unnamed-chunk-1

boxplot(Value~Tag, data=historian, horizontal = T, las=2, col=rainbow(11))

plot of chunk unnamed-chunk-1

par(mfrow=c(2,3), mar=c(4,4,2,1))
for (i in 1:11) {
        with(historian, plot(
                DateOfSample[Tag==unique(historian$Tag)[i]], 
                Value[Tag==unique(historian$Tag)[i]], 
                type="l", main=unique(historian$Tag)[i],
                xlab="Date of Sample", ylab="Value"))
}

plot of chunk unnamed-chunk-1

#Smooth scattr plots
for (i in 1:11) {
        with(historian, smoothScatter(
                DateOfSample[Tag==unique(historian$Tag)[i]], 
                Value[Tag==unique(historian$Tag)[i]], 
                , main=unique(historian$Tag)[i],
                xlab="Date of Sample", ylab="Value"))
}

## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009

plot of chunk unnamed-chunk-1

## Warning: Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'
## Warning: Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'
## Warning: Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'

plot of chunk unnamed-chunk-1

#Using ggplot
library(ggplot2)
#Boxplots
ggplot(historian, aes(y=Value, x=TagName)) + geom_boxplot() + coord_flip()
#Time Series
ggplot(historian, aes(x=DateOfSample, y=Value)) + geom_point() + 
        facet_grid(~Tag) + geom_smooth()

## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-1

historian.R

peter

Wed Aug 27 10:39:58 2014