#Libraries
library(tidyverse)
library(rmarkdown)
library(RODBC)
library(RODBCext)
library(optparse)
library(data.table)
library(Matrix)
library(outliers)
library(plyr)
library(dplyr)
library(reshape2)
library(pastecs)
library(ggplot2)
library(plotly)
library(ggmap)
Set your database connection info
server <- "USINTERN13\\SQLEXPRESS"
database <-"test1"
#startDate <- "20160101"
#endDate <- "20170202"
Reads in unique 108 zones
allNames <- as.data.frame(read.csv("C:/Users/sspinetto/Desktop/allnames.csv", stringsAsFactors = FALSE))
f <- function(s) strsplit(s, "_")
names <-sapply(allNames, f)
uniqueNames <- colsplit(allNames$mx_cfe_manzanillo_one_load_act, pattern="_", names = c("0","1","name","4","5","6"))
finalNames <- unique(uniqueNames$name)
finalNames[[53]]<- "acapulco"
finalNames<- sort(finalNames)
This is the function that gets the % of non zeroes for each load type, later it is looped through for each zone
getMyZeros <- function (server, database, finalNames) {
tryCatch({
necName <- finalNames
fileName1 <- paste0("mx_cfe_",necName)
#set connection
setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
myServer <- odbcDriverConnect(setServ)
# perform queries to zero, one, two, three
query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load0 <- sqlQuery(myServer,query0)
query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load3 <- sqlQuery(myServer,query1)
query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load2 <- sqlQuery(myServer,query2)
query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load1 <- sqlQuery(myServer,query3)
# trunc for compare
trunc0 <- head(load0, -4872)
trunc1 <- head(load1, -3864)
trunc2 <- head(load2, -2520)
trunc3 <- load3
#combine
fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
"date_3", "time_3", "load_act_3")
## sanity check to make sure columns align
#
# head(fullLoad)
## clean up
compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
zeroPercent0 <- mean(!compareLoad$fullLoad.load_act_0)
zeroPercent1 <- mean(!compareLoad$fullLoad.load_act_1)
zeroPercent2 <- mean(!compareLoad$fullLoad.load_act_2)
zeroPercent3 <- mean(!compareLoad$fullLoad.load_act_3)
zeroVector <- c(zeroPercent0,zeroPercent1,zeroPercent2,zeroPercent3)
#
# print (zeroPercent0)
# print (zeroPercent1)
# print (zeroPercent2)
# print (zeroPercent3)
### lets find the differences
compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
summary(compareLoad$dif0to1)
summary(compareLoad$dif1to2)
summary(compareLoad$dif2to3)
summary(compareLoad$dif0to3)
}, error= function(e){})
return(zeroVector)
}
here we have our function to get summaries
getMySummaries <- function (server, database, finalNames) {
tryCatch({
fileName1 <- paste0("mx_cfe_", finalNames)
#set connection
setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
myServer <- odbcDriverConnect(setServ)
# perform queries to zero, one, two, three
query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load0 <- sqlQuery(myServer,query0)
query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load3 <- sqlQuery(myServer,query1)
query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load2 <- sqlQuery(myServer,query2)
query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load1 <- sqlQuery(myServer,query3)
# trunc for compare
trunc0 <- head(load0, -4872)
trunc1 <- head(load1, -3864)
trunc2 <- head(load2, -2520)
trunc3 <- load3
#combine
fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
"date_3", "time_3", "load_act_3")
## sanity check to make sure columns align
#
# head(fullLoad)
## clean up
compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
zeroPercent0 <- mean(!compareLoad$fullLoad.load_act_0)
zeroPercent1 <- mean(!compareLoad$fullLoad.load_act_1)
zeroPercent2 <- mean(!compareLoad$fullLoad.load_act_2)
zeroPercent3 <- mean(!compareLoad$fullLoad.load_act_3)
zeroVector <- c(zeroPercent0,zeroPercent1,zeroPercent2,zeroPercent3)
#
# print (zeroPercent0)
# print (zeroPercent1)
# print (zeroPercent2)
# print (zeroPercent3)
### lets find the differences
compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
mean01 <- summary(load0$load_act)
sumVector <- mean01
}, error= function(e){})
return(sumVector)
}
third function to get something else :P
getMyOther <- function (server, database, finalNames) {
tryCatch({
fileName1 <- paste0("mx_cfe_", finalNames)
#set connection
setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
myServer <- odbcDriverConnect(setServ)
# perform queries to zero, one, two, three
query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load0 <- sqlQuery(myServer,query0)
query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load3 <- sqlQuery(myServer,query1)
query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load2 <- sqlQuery(myServer,query2)
query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load1 <- sqlQuery(myServer,query3)
# trunc for compare
trunc0 <- head(load0, -4872)
trunc1 <- head(load1, -3864)
trunc2 <- head(load2, -2520)
trunc3 <- load3
#combine
fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
"date_3", "time_3", "load_act_3")
## sanity check to make sure columns align
#
# head(fullLoad)
## clean up
compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
### lets find the differences
compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
mean01 <- summary(load0$load_act)
sumVector <- mean01
}, error= function(e){})
return(s)
}
First Loop, Calculates Non Zeroes
loop_zero_data <- vector("list", length(finalNames))
for(x in seq_along(finalNames)){
loop_zero_data[[x]] <- getMyZeros(server,database,finalNames[[x]])
}
names(loop_zero_data) <- finalNames
### prints % of zeroes in each data set, load0, load1, load2, load3
head(loop_zero_data)
## $acapulco
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $acapulco
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $ags
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $apatzingan
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $caborca
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $camargo
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
second Loop calculates summaries
loop_sum_data <- vector("list", length(finalNames))
for(x in seq_along(finalNames)){
loop_sum_data[[x]] <- getMySummaries(server,database,finalNames[[x]])
}
names(loop_sum_data) <- finalNames
### prints % of zeroes in each data set, load0, load1, load2, load3
head(loop_sum_data)
## $acapulco
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 185.3 212.1 188.6 231.4 495.0
##
## $acapulco
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 185.3 212.1 188.6 231.4 495.0
##
## $ags
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 341.1 403.2 361.7 456.4 711.0
##
## $apatzingan
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 80.71 93.84 86.84 110.00 190.60
##
## $caborca
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 64.92 129.80 172.10 291.80 461.10
##
## $camargo
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 81.57 113.00 112.00 149.00 263.40
Put Summary Data in Table 4 plotting
getMedian <- ldply(loop_sum_data)
plot_ly(getMedian, y= ~Median, type='scatter', mode='markers', text= ~.id)
lets see what else we can visualize that’s useful…..
lookupNames <- sapply(finalNames, paste0, " Mexico")
lonlat <- geocode(lookupNames)
cities <- cbind(finalNames, lonlat)
myPlace <- "Mexico"
myZoom <- 5
myMap <- get_map(location=myPlace, zoom=myZoom)
mapper <- ggmap(myMap)
mapper+ geom_point(data=cities, aes(x=lon, y=lat), size=2,colour= "red")+geom_text(data =cities,aes(label=.id),hjust=0, vjust=0, size=2)
