#IoT Sample Data Illustration
setwd("~/Documents/DataScience/IoT")
# DATA COLLECTED FROM SQL SERVER ON MICROSOFT AZURE CLOUD USING FOLLOWING CODE.
# install.pacakges(“RODBC”) ## database connector
# require(“RODBC”)
#
# #Flow sensor connected to channel 18
#
# The Code:
#
# install.packages("RODBC”) ## database connector
# require("RODBC”)
# #new data connection using RODBC drivers connect… allows customized connection liking to SQL data store in the server listed below
# d <- odbcDriverConnect(connection = "Driver={SQL Server Native Client 11.0};
# Server=tcp:qvr9aar3i6.database.windows.net,1433;
# Database=TelemetryDB;
# Uid=drcrook@qvr9aar3i6;
# Pwd=xxxxx; # not shown
# Encrypt=yes;
# TrustServerCertificate=no;
# Connection Timeout=30;”)
# class(d) # d is class RODBC
# sqlColumns(d, "WaterFlows”) # shows columns in the table “WaterFlows” in the db we connected to
# # one sensor, one telemetry value for simplicity of this analysis
# #
# flows <- as.data.frame(sqlFetch(d, "WaterFlows"))
# write.csv(flows, file="flows.csv", row.names=F)
flows <- read.csv("flows.csv", header=T)
summary(flows)
## Id Location WaterFlow
## Min. : 33 WaterSystemPrototype :6071 Min. : 0.000
## 1st Qu.:1550 1st Qu.: 6.827
## Median :3068 Median : 7.681
## Mean :3068 Mean : 7.799
## 3rd Qu.:4586 3rd Qu.: 8.533
## Max. :6103 Max. :84.843
##
## CollectionTime DeviceId
## 11/12/15 16:41: 12 PGKit1 :6071
## 11/12/15 16:42: 12
## 11/12/15 16:43: 12
## 11/12/15 16:44: 12
## 11/12/15 16:45: 12
## 11/12/15 16:46: 12
## (Other) :5999
## SensorId
## PGKit1-Flow-18 :6071
##
##
##
##
##
##
str(flows)
## 'data.frame': 6071 obs. of 6 variables:
## $ Id : int 33 34 35 36 37 38 39 40 41 42 ...
## $ Location : Factor w/ 1 level "WaterSystemPrototype ": 1 1 1 1 1 1 1 1 1 1 ...
## $ WaterFlow : num 7.68 7.89 7.68 7.68 7.68 ...
## $ CollectionTime: Factor w/ 551 levels "11/12/15 16:40",..: 1 1 1 1 1 1 1 1 1 2 ...
## $ DeviceId : Factor w/ 1 level "PGKit1 ": 1 1 1 1 1 1 1 1 1 1 ...
## $ SensorId : Factor w/ 1 level "PGKit1-Flow-18 ": 1 1 1 1 1 1 1 1 1 1 ...
# flows variables - ID, location, waterFlow , Colletiontime, DeviceID, SensorID
# key value waterFlow
# ID is sequential
# Sensor ID - sensor type, sensor connection channel /flow sensor
#flows["WaterFlow"]
summary(flows["WaterFlow"]) #WaterFlow is skewed
## WaterFlow
## Min. : 0.000
## 1st Qu.: 6.827
## Median : 7.681
## Mean : 7.799
## 3rd Qu.: 8.533
## Max. :84.843
var(flows["WaterFlow"])
## WaterFlow
## WaterFlow 2.92272
sd(flows$WaterFlow)
## [1] 1.709597
boxplot(
x = flows$WaterFlow,
xlab = "Flow/Minute",
horizontal = TRUE
)

outlierThreshold <- sd(flows$WaterFlow) * 1.5 #calculate outlier threshold
m <- mean(flows$WaterFlow) #calculate mean
filtFlows <- flows[flows$WaterFlow > (m - outlierThreshold),] #filter out anything under threshold
filtFlows <- filtFlows[filtFlows$WaterFlow < (m + outlierThreshold),] #filter out anything over threshold
summary(flows$WaterFlow)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 6.827 7.681 7.799 8.533 84.840
summary(filtFlows$WaterFlow)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.297 6.827 8.533 7.886 8.533 9.052
#new box plot, no outliers
boxplot(
x = filtFlows$WaterFlow,
xlab = "Flow/Minute",
horizontal = TRUE
)

#line graph w/outliers
plot(
y = flows$WaterFlow,
x = flows$CollectionTime,
type = "l"
)

#line graph no outliers
# plot(
# y = filtFlows$WaterFlow,
# x = filtFlows$CollectionTime,
# type = "l"
# )
# Timeseries analysis
t <- ts(flows$WaterFlow, frequency = 6307200)
plot(t)

plot(density(flows$WaterFlow))

plot(density(filtFlows$WaterFlow))
