#Libraries
library(tidyverse)
library(rmarkdown)
library(RODBC)
library(RODBCext)
library(optparse)
library(data.table)
library(Matrix)
library(outliers)
library(plyr)
library(dplyr)
library(reshape2)
library(pastecs)
library(ggplot2)
library(plotly)
library(ggmap)
Set your database connection info
server <- "USINTERN13\\SQLEXPRESS"
database <-"test1"
#startDate <- "20160101" 
#endDate <- "20170202"
Reads in unique 108 zones
allNames <- as.data.frame(read.csv("C:/Users/sspinetto/Desktop/allnames.csv", stringsAsFactors = FALSE))

f <- function(s) strsplit(s, "_")
names <-sapply(allNames, f)

uniqueNames <- colsplit(allNames$mx_cfe_manzanillo_one_load_act, pattern="_", names = c("0","1","name","4","5","6"))

finalNames <- unique(uniqueNames$name)
finalNames[[53]]<- "acapulco"
finalNames<- sort(finalNames)
This is the function that gets the % of non zeroes for each load type, later it is looped through for each zone
getMyZeros <- function (server, database, finalNames) {

tryCatch({ 
  
necName <- finalNames
  
fileName1 <- paste0("mx_cfe_",necName)
  
#set connection
setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
myServer <- odbcDriverConnect(setServ)

# perform queries to zero, one, two, three
query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
 #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
                 )
load0 <-  sqlQuery(myServer,query0)


query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
                 #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
)
load3 <-  sqlQuery(myServer,query1)

query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
                 #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
)
load2 <-  sqlQuery(myServer,query2)

query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
                 #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
)
load1 <-  sqlQuery(myServer,query3)


# trunc for compare
trunc0 <- head(load0, -4872)
trunc1 <- head(load1, -3864)
trunc2 <- head(load2, -2520)
trunc3 <- load3


#combine

fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))


colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
                        "date_3", "time_3", "load_act_3")
                      
## sanity check to make sure columns align
# 
# head(fullLoad)

## clean up


compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
                          fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)


zeroPercent0 <- mean(!compareLoad$fullLoad.load_act_0)
zeroPercent1 <- mean(!compareLoad$fullLoad.load_act_1)
zeroPercent2 <- mean(!compareLoad$fullLoad.load_act_2)
zeroPercent3 <- mean(!compareLoad$fullLoad.load_act_3)


zeroVector <- c(zeroPercent0,zeroPercent1,zeroPercent2,zeroPercent3)
# 
# print (zeroPercent0)
# print (zeroPercent1)
# print (zeroPercent2)
# print (zeroPercent3)


### lets find the differences

compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3




summary(compareLoad$dif0to1)
summary(compareLoad$dif1to2)
summary(compareLoad$dif2to3)
summary(compareLoad$dif0to3)
}, error= function(e){})
return(zeroVector)
}
here we have our function to get summaries
getMySummaries <- function (server, database, finalNames) {
  
  tryCatch({ 
    

    
    fileName1 <- paste0("mx_cfe_", finalNames)
    
    #set connection
    setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
    myServer <- odbcDriverConnect(setServ)
    
    # perform queries to zero, one, two, three
    query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load0 <-  sqlQuery(myServer,query0)
    
    
    query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load3 <-  sqlQuery(myServer,query1)
    
    query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load2 <-  sqlQuery(myServer,query2)
    
    query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load1 <-  sqlQuery(myServer,query3)
    
    
    # trunc for compare
    trunc0 <- head(load0, -4872)
    trunc1 <- head(load1, -3864)
    trunc2 <- head(load2, -2520)
    trunc3 <- load3
    
    
    #combine
    
    fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
    
    
    colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
                            "date_3", "time_3", "load_act_3")
    
    ## sanity check to make sure columns align
    # 
    # head(fullLoad)
    
    ## clean up
    
    
    compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
                              fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
    
    
    zeroPercent0 <- mean(!compareLoad$fullLoad.load_act_0)
    zeroPercent1 <- mean(!compareLoad$fullLoad.load_act_1)
    zeroPercent2 <- mean(!compareLoad$fullLoad.load_act_2)
    zeroPercent3 <- mean(!compareLoad$fullLoad.load_act_3)
    
    
    zeroVector <- c(zeroPercent0,zeroPercent1,zeroPercent2,zeroPercent3)
    # 
    # print (zeroPercent0)
    # print (zeroPercent1)
    # print (zeroPercent2)
    # print (zeroPercent3)
    
    
    ### lets find the differences
    
    compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
    compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
    compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
    compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
    
    
  
    
    mean01 <- summary(load0$load_act)
    
    sumVector <- mean01
    
  }, error= function(e){})
  return(sumVector)
}

third function to get something else :P

getMyOther <- function (server, database, finalNames) {
  
  tryCatch({ 
    

    
    fileName1 <- paste0("mx_cfe_", finalNames)
    
    #set connection
    setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
    myServer <- odbcDriverConnect(setServ)
    
    # perform queries to zero, one, two, three
    query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load0 <-  sqlQuery(myServer,query0)
    
    
    query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load3 <-  sqlQuery(myServer,query1)
    
    query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load2 <-  sqlQuery(myServer,query2)
    
    query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
                     #     ," where date > ",startDate," AND date < ", endDate   #uncomment this line if using custom date set 
    )
    load1 <-  sqlQuery(myServer,query3)
    
    
    # trunc for compare
    trunc0 <- head(load0, -4872)
    trunc1 <- head(load1, -3864)
    trunc2 <- head(load2, -2520)
    trunc3 <- load3
    
    
    #combine
    
    fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
    
    
    colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
                            "date_3", "time_3", "load_act_3")
    
    ## sanity check to make sure columns align
    # 
    # head(fullLoad)
    
    ## clean up
    
    
    compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
                              fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
    
    
    

    ### lets find the differences
    
    compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
    compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
    compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
    compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
    
    
  
    
    mean01 <- summary(load0$load_act)
    
    sumVector <- mean01
    
  }, error= function(e){})
  return(s)
}
First Loop, Calculates Non Zeroes
loop_zero_data <- vector("list", length(finalNames))

for(x in seq_along(finalNames)){
  
  loop_zero_data[[x]] <- getMyZeros(server,database,finalNames[[x]])
}

names(loop_zero_data) <- finalNames

### prints % of zeroes in each data set, load0, load1, load2, load3

head(loop_zero_data)
## $acapulco
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
## 
## $acapulco
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
## 
## $ags
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
## 
## $apatzingan
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
## 
## $caborca
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
## 
## $camargo
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
second Loop calculates summaries
loop_sum_data <- vector("list", length(finalNames))

for(x in seq_along(finalNames)){
  
  loop_sum_data[[x]] <- getMySummaries(server,database,finalNames[[x]])
}
names(loop_sum_data) <- finalNames

### prints % of zeroes in each data set, load0, load1, load2, load3

head(loop_sum_data)
## $acapulco
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   185.3   212.1   188.6   231.4   495.0 
## 
## $acapulco
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   185.3   212.1   188.6   231.4   495.0 
## 
## $ags
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   341.1   403.2   361.7   456.4   711.0 
## 
## $apatzingan
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   80.71   93.84   86.84  110.00  190.60 
## 
## $caborca
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   64.92  129.80  172.10  291.80  461.10 
## 
## $camargo
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   81.57  113.00  112.00  149.00  263.40

Put Summary Data in Table 4 plotting

  getMedian <- ldply(loop_sum_data)
  
  plot_ly(getMedian, y= ~Median, type='scatter', mode='markers', text= ~.id)

lets see what else we can visualize that’s useful…..

lookupNames <- sapply(finalNames, paste0, " Mexico")

lonlat <- geocode(lookupNames) 
cities <- cbind(finalNames, lonlat)

myPlace <- "Mexico"
    myZoom <- 5
    

    myMap <- get_map(location=myPlace, zoom=myZoom)
    mapper <- ggmap(myMap)
    
    mapper+ geom_point(data=cities, aes(x=lon, y=lat), size=2,colour= "red")+geom_text(data =cities,aes(label=.id),hjust=0, vjust=0, size=2)