Required Libraries

Load required libraries and plot template.

#Load Required Libraries
library(solarf)
library(knitr) #For pretty tables on the report using kable 
library(plyr)
library(ggplot2)
library(GGally)
library(caret)
library(extrafont)
#Theme for Plot
science_theme = 

theme(
    panel.background=element_blank(), 
    #panel.grid.major = element_line(size = 0.5, color = "grey"),
    #axis.line = element_line(size = 0.7, color = "black"), 
    #legend.position = c(0.9, 0.7), 
    #text = element_text(size = 14),
    axis.line=element_blank(),
    text = element_text(size=15, family="Interstate"),
    axis.text.x = element_text(hjust = 1, size=15,color="black"), 
    axis.text.y = element_text(hjust = 1, size=15,color="black"), 
    axis.title.y=element_text(size=15), 
    axis.title.x=element_text(size=15),
    axis.ticks = element_line(colour=NA),
    panel.grid.major = element_line(colour = 'gray', linetype = 'dashed'),
    panel.grid.minor = element_line(colour = NA),  
    panel.background = element_rect(colour = 'white'))

Data Preparation Parameters

variable <- "SOLR"
nclusters <- 5

SOLR

SCBH1 <- readRDS("/Users/carlos/Google Drive/Research/SolarForecasting/datasets/mesowest/data/SCBH1.rds")

Vectorization

Vectorize SCBH1 over the training and test partitions.

vector <- station.vectorize(SCBH1,variable)
vector <- vector[,c(1:3,12:21)]

Cleansing

If SOLR is missing any hour value, the day is discarded.

vector <- vector[complete.cases(vector),]

Discretization 5-means

Cluster with 5-means the dataset to obtain the daily profiles.

#Set seed for reproducibility
set.seed(1234)
kmeans.results <- kmeans(vector[,4:ncol(vector)],nclusters)
vector$clusterid <- kmeans.results[[1]]
centroids <- kmeans.results[[2]]

#Order the centroids in crescent order of intensity and reset rows
ordered.centroids <- as.numeric(names(sort(rowSums(centroids))))
centroids <- centroids[ordered.centroids,]
rownames(centroids) <- seq(1,nclusters)

#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
vector$clusterid <- ordered.centroids[vector$clusterid]

Transitions

Define the istransition variable by observing if the previous day had a different clusterid than the current.

vector$istransition <- vector$clusterid
for (i in 2:length(vector$istransition)){
    if(vector$istransition[i-1] == vector$istransition[i])
        vector$istransition[i] <- 0 #False for C++
    else
        vector$istransition[i] <- 1 #True for C++
}
vector$istransition[1] <- -9999 # NA for C++
vector.solar <- vector

Summary

Sample 5 vectors.

kable(vector[sample(nrow(vector),5),])
YEAR MON DAY 8 9 10 11 12 13 14 15 16 17 clusterid istransition
2853 2011 8 20 64 140 134 149 201 188 146 211 129 81 4 1
38 2003 2 7 372 620 736 826 850 793 685 497 280 60 2 1
959 2006 3 24 199 529 929 995 726 501 333 218 181 59 2 1
2939 2011 11 15 113 146 185 207 250 264 149 227 73 7 4 1
1932 2008 12 18 62 116 296 304 260 565 472 449 134 16 4 1

Centroids.

centroids_plot <- data.frame(centroids)
centroids_plot$color <-rownames(centroids)
colnames(centroids_plot) <- c(colnames(centroids),"Profile")
kable(centroids)
8 9 10 11 12 13 14 15 16 17
96.48299 147.0476 185.7497 209.1020 225.3701 235.7184 240.3211 238.3252 202.2259 109.97551
265.63504 416.2701 524.8015 536.3723 466.2657 392.3810 337.9927 257.6292 170.9825 70.75328
179.27831 312.7568 434.9427 547.3954 651.0064 687.7654 639.9725 530.1320 341.7138 146.76885
359.17192 574.7114 732.5726 803.5174 768.3454 669.8628 540.4795 396.4653 258.4795 108.48107
357.29153 573.5679 752.0663 885.0358 954.2930 941.3826 841.5112 675.5182 450.1414 200.40016
ggparcoord(
  data = centroids_plot,
  columns=c(1:(ncol(centroids)-1)),
  scale="globalminmax",
  alphaLines=0.2,
  groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
  ylab("Solar Radiation Itensity (W/m2") + ggtitle(paste('SCBH1',variable,'Centroids'))# + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))

Data Preparation Parameters

variable <- "PREC"
nclusters <- 5

PREC

Vectorization

Vectorize SCBH1 over the training and test partitions.

vector <- station.vectorize(SCBH1,variable)
vector <- vector[,c(1:3,12:21)]

Cleansing

If PREC is missing any hour value, the day is discarded.

vector <- vector[complete.cases(vector),]

Discretization Binning (since Centroids are constant lines)

#Set seed for reproducibility
set.seed(1234)

#Mean precipitation of each day
mean.prec <- rowMeans(vector[,4:ncol(vector)])

library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.1.3
## Loading required package: grid
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.1.3
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:caret':
## 
##     cluster
## 
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.1.3
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
#Min and max
bins <- cut(mean.prec,breaks=5)
interval_labels <- levels(cut(mean.prec,breaks=5))

bin_interval_mean <- function(interval_label){
    lower_and_upper <- unlist(strsplit(interval_label, ",", fixed = TRUE))   
    lower <- as.numeric(substr(lower_and_upper[1],start=2,stop=nchar(lower_and_upper[1])))
    upper <- as.numeric(substr(lower_and_upper[2],start=1,stop=nchar(lower_and_upper[2])-1))
    return (mean(c(lower,upper)))
}

centroids[1,1:ncol(centroids)] <- bin_interval_mean(interval_labels[1])
centroids[2,1:ncol(centroids)] <- bin_interval_mean(interval_labels[2])
centroids[3,1:ncol(centroids)] <- bin_interval_mean(interval_labels[3])
centroids[4,1:ncol(centroids)] <- bin_interval_mean(interval_labels[4])
centroids[5,1:ncol(centroids)] <- bin_interval_mean(interval_labels[5])

vector$clusterid <- bins

#Order the centroids in crescent order of intensity and reset rows
ordered.centroids <- as.numeric(names(sort(rowSums(centroids))))
centroids <- centroids[ordered.centroids,]
rownames(centroids) <- seq(1,nclusters)

#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
vector$clusterid <- ordered.centroids[vector$clusterid]

Transitions

Define the istransition variable by observing if the previous day had a different clusterid than the current.

vector$istransition <- vector$clusterid
for (i in 2:length(vector$istransition)){
    if(vector$istransition[i-1] == vector$istransition[i])
        vector$istransition[i] <- 0 #False for C++
    else
        vector$istransition[i] <- 1 #True for C++
}
vector$istransition[1] <- -9999 # NA for C++
vector.prec <- vector

Summary

Sample 5 vectors.

kable(vector[sample(nrow(vector),5),])
YEAR MON DAY 8 9 10 11 12 13 14 15 16 17 clusterid istransition
598 2005 3 25 34.11 34.12 34.13 34.13 34.13 34.32 34.34 34.34 34.39 34.49 2 1
2592 2010 10 19 15.42 15.42 15.42 15.42 15.42 15.42 15.42 15.42 15.42 15.42 1 1
2543 2010 8 31 13.70 13.70 13.70 13.70 13.70 13.70 13.70 13.70 13.70 13.70 1 0
2595 2010 10 22 15.42 15.42 15.42 15.42 15.42 15.42 15.42 15.42 15.42 15.52 1 0
3543 2013 7 12 29.59 29.59 29.59 29.59 29.59 29.59 29.60 29.60 29.61 29.62 2 1

Centroids.

centroids_plot <- data.frame(centroids)
centroids_plot$color <-rownames(centroids)
colnames(centroids_plot) <- c(colnames(centroids),"Profile")
kable(centroids)
8 9 10 11 12 13 14 15 16 17
10.0495 10.0495 10.0495 10.0495 10.0495 10.0495 10.0495 10.0495 10.0495 10.0495
30.3000 30.3000 30.3000 30.3000 30.3000 30.3000 30.3000 30.3000 30.3000 30.3000
50.5000 50.5000 50.5000 50.5000 50.5000 50.5000 50.5000 50.5000 50.5000 50.5000
70.7000 70.7000 70.7000 70.7000 70.7000 70.7000 70.7000 70.7000 70.7000 70.7000
90.9000 90.9000 90.9000 90.9000 90.9000 90.9000 90.9000 90.9000 90.9000 90.9000
ggparcoord(
  data = centroids_plot,
  columns=c(1:(ncol(centroids)-1)),
  scale="globalminmax",
  alphaLines=0.2,
  groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
  ylab("Precipitation (in)") + ggtitle(paste('SCBH1',variable,'Centroids'))# + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))

Data Preparation Parameters

variable <- "TMPF"
nclusters <- 5

TMPF

Vectorization

Vectorize SCBH1 over the training and test partitions.

vector <- station.vectorize(SCBH1,variable)
vector <- vector[,c(1:3,12:21)]

Cleansing

If TMPF is missing any hour value, the day is discarded.

vector <- vector[complete.cases(vector),]

Discretization 5-means

Cluster with 5-means the dataset to obtain the daily profiles.

#Set seed for reproducibility
set.seed(1234)
kmeans.results <- kmeans(vector[,4:ncol(vector)],nclusters)
vector$clusterid <- kmeans.results[[1]]
centroids <- kmeans.results[[2]]

#Order the centroids in crescent order of intensity and reset rows
ordered.centroids <- as.numeric(names(sort(rowSums(centroids))))
centroids <- centroids[ordered.centroids,]
rownames(centroids) <- seq(1,nclusters)

#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
vector$clusterid <- ordered.centroids[vector$clusterid]

Transitions

Define the istransition variable by observing if the previous day had a different clusterid than the current.

vector$istransition <- vector$clusterid
for (i in 2:length(vector$istransition)){
    if(vector$istransition[i-1] == vector$istransition[i])
        vector$istransition[i] <- 0 #False for C++
    else
        vector$istransition[i] <- 1 #True for C++
}
vector$istransition[1] <- -9999 # NA for C++
vector.tmpfc <- vector

Summary

Sample 5 vectors.

kable(vector[sample(nrow(vector),5),])
YEAR MON DAY 8 9 10 11 12 13 14 15 16 17 clusterid istransition
2676 2011 1 24 65 74 75 74 73 75 78 75 79 72 5 1
43 2003 2 12 67 69 72 74 77 77 73 72 69 69 5 1
1088 2006 8 27 79 79 82 80 80 75 81 84 84 83 2 1
2779 2011 5 7 69 67 66 70 70 72 72 71 69 67 1 1
2176 2009 8 19 80 82 81 84 79 84 82 85 83 78 2 1

Centroids.

centroids_plot <- data.frame(centroids)
centroids_plot$color <-rownames(centroids)
colnames(centroids_plot) <- c(colnames(centroids),"Profile")
kable(centroids)
8 9 10 11 12 13 14 15 16 17
67.13850 68.49307 69.43490 69.95014 70.36288 70.27978 69.75069 69.44875 68.65651 67.13573
69.35815 72.10112 73.48034 74.34129 74.50421 74.40590 74.03511 73.20365 72.16433 70.33427
72.90111 75.43667 76.67667 77.17444 77.31222 77.10889 76.58000 75.60333 74.27556 72.24000
76.27563 77.96518 79.05609 79.55416 79.69439 79.51451 79.10928 78.37041 77.02998 75.00193
78.60000 80.42192 81.70411 82.57808 82.81918 82.69452 82.26712 81.31370 79.62603 77.17808
ggparcoord(
  data = centroids_plot,
  columns=c(1:(ncol(centroids)-1)),
  scale="globalminmax",
  alphaLines=0.2,
  groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
  ylab("Solar Radiation Itensity (W/m2") + ggtitle(paste('SCBH1',variable,'Centroids'))# + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))

Data Preparation Parameters

variable <- "DWPF"
nclusters <- 5

DWPF

Vectorization

Vectorize SCBH1 over the training and test partitions.

vector <- station.vectorize(SCBH1,variable)
vector <- vector[,c(1:3,12:21)]

Cleansing

If DWPF is missing any hour value, the day is discarded.

vector <- vector[complete.cases(vector),]

Discretization 5-means

Cluster with 5-means the dataset to obtain the daily profiles.

#Set seed for reproducibility
set.seed(1234)
kmeans.results <- kmeans(vector[,4:ncol(vector)],nclusters)
vector$clusterid <- kmeans.results[[1]]
centroids <- kmeans.results[[2]]

#Order the centroids in crescent order of intensity and reset rows
ordered.centroids <- as.numeric(names(sort(rowSums(centroids))))
centroids <- centroids[ordered.centroids,]
rownames(centroids) <- seq(1,nclusters)

#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
vector$clusterid <- ordered.centroids[vector$clusterid]

Transitions

Define the istransition variable by observing if the previous day had a different clusterid than the current.

vector$istransition <- vector$clusterid
for (i in 2:length(vector$istransition)){
    if(vector$istransition[i-1] == vector$istransition[i])
        vector$istransition[i] <- 0 #False for C++
    else
        vector$istransition[i] <- 1 #True for C++
}
vector$istransition[1] <- -9999 # NA for C++
vector.dwpfc <- vector

Summary

Sample 5 vectors.

kable(vector[sample(nrow(vector),5),])
YEAR MON DAY 8 9 10 11 12 13 14 15 16 17 clusterid istransition
2675 2011 1 23 59.0 58.0 60.5 57.7 58.2 59.6 62.0 60.8 60.6 60.0 2 1
43 2003 2 12 62.7 60.4 59.6 60.1 58.5 56.9 56.4 58.7 57.2 58.4 2 1
1086 2006 8 25 65.8 62.1 63.0 64.6 65.9 65.1 65.1 66.2 66.2 65.2 4 1
2777 2011 5 5 63.2 64.6 65.3 64.1 62.0 62.9 65.1 63.4 64.5 63.8 4 1
2174 2009 8 17 66.6 67.2 65.2 64.7 63.0 64.5 64.6 63.8 63.4 65.3 4 1

Centroids.

centroids_plot <- data.frame(centroids)
centroids_plot$color <-rownames(centroids)
colnames(centroids_plot) <- c(colnames(centroids),"Profile")
kable(centroids)
8 9 10 11 12 13 14 15 16 17
56.47021 56.11543 55.42181 55.10904 55.12606 55.02553 54.84309 54.98723 55.25426 55.26862
62.44886 61.62593 60.80012 60.42569 60.23385 60.23157 60.09952 60.20312 60.31441 60.41080
65.61374 64.79147 64.23080 63.84224 63.60427 63.56928 63.52449 63.53191 63.53191 63.71561
68.31388 67.76972 67.22666 66.99829 66.98410 66.95231 66.96891 66.89527 66.96036 66.97736
71.74212 71.39322 70.98993 70.86593 71.08590 71.11685 71.01172 70.87692 70.70989 70.48535
ggparcoord(
  data = centroids_plot,
  columns=c(1:(ncol(centroids)-1)),
  scale="globalminmax",
  alphaLines=0.2,
  groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
  ylab("Solar Radiation Itensity (W/m2") + ggtitle(paste('SCBH1',variable,'Centroids'))# + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))

Data Preparation Parameters

variable <- "RELH"
nclusters <- 5

RELH

Vectorization

Vectorize SCBH1 over the training and test partitions.

vector <- station.vectorize(SCBH1,variable)
vector <- vector[,c(1:3,12:21)]

Cleansing

If RELH is missing any hour value, the day is discarded.

vector <- vector[complete.cases(vector),]

Discretization 5-means

Cluster with 5-means the dataset to obtain the daily profiles.

#Set seed for reproducibility
set.seed(1234)
kmeans.results <- kmeans(vector[,4:ncol(vector)],nclusters)
vector$clusterid <- kmeans.results[[1]]
centroids <- kmeans.results[[2]]

#Order the centroids in crescent order of intensity and reset rows
ordered.centroids <- as.numeric(names(sort(rowSums(centroids))))
centroids <- centroids[ordered.centroids,]
rownames(centroids) <- seq(1,nclusters)

#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
vector$clusterid <- ordered.centroids[vector$clusterid]

Transitions

Define the istransition variable by observing if the previous day had a different clusterid than the current.

vector$istransition <- vector$clusterid
for (i in 2:length(vector$istransition)){
    if(vector$istransition[i-1] == vector$istransition[i])
        vector$istransition[i] <- 0 #False for C++
    else
        vector$istransition[i] <- 1 #True for C++
}
vector$istransition[1] <- -9999 # NA for C++
vector.relh <- vector

Summary

Sample 5 vectors.

kable(vector[sample(nrow(vector),5),])
YEAR MON DAY 8 9 10 11 12 13 14 15 16 17 clusterid istransition
2674 2011 1 22 92 70 63 57 54 53 56 56 60 68 2 1
43 2003 2 12 86 74 65 62 53 50 56 63 66 69 2 1
1088 2006 8 27 75 68 61 62 63 98 73 67 63 53 2 1
2776 2011 5 4 82 70 73 68 69 65 68 67 74 83 1 1
2175 2009 8 18 79 100 91 57 53 50 59 61 69 72 1 0

Centroids.

centroids_plot <- data.frame(centroids)
centroids_plot$color <-rownames(centroids)
colnames(centroids_plot) <- c(colnames(centroids),"Profile")
kable(centroids)
8 9 10 11 12 13 14 15 16 17
65.57185 58.77502 55.11298 52.95144 52.12983 52.01685 52.52032 54.11893 57.10505 62.23092
76.48750 68.47768 63.97768 61.47321 60.60893 60.65268 61.32589 63.16429 66.18750 71.51071
88.66667 82.81530 77.44012 73.56277 71.20924 69.55988 69.36219 70.34921 73.37374 78.84993
78.82600 70.20755 66.13627 66.55975 69.77149 74.46960 78.07547 81.62474 85.66876 89.92243
94.30963 91.15138 88.83716 87.88991 87.81422 88.42202 88.97477 89.85321 90.91743 94.12156
ggparcoord(
  data = centroids_plot,
  columns=c(1:(ncol(centroids)-1)),
  scale="globalminmax",
  alphaLines=0.2,
  groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
  ylab("Solar Radiation Itensity (W/m2") + ggtitle(paste('SCBH1',variable,'Centroids'))# + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))

Data Preparation Parameters

variable <- "SKNT"
nclusters <- 5

SKNT

Vectorization

Vectorize SCBH1 over the training and test partitions.

vector <- station.vectorize(SCBH1,variable)
vector <- vector[,c(1:3,12:21)]

Cleansing

If SKNT is missing any hour value, the day is discarded.

vector <- vector[complete.cases(vector),]

Discretization 5-means

Cluster with 5-means the dataset to obtain the daily profiles.

#Set seed for reproducibility
set.seed(1234)
kmeans.results <- kmeans(vector[,4:ncol(vector)],nclusters)
vector$clusterid <- kmeans.results[[1]]
centroids <- kmeans.results[[2]]

#Order the centroids in crescent order of intensity and reset rows
ordered.centroids <- as.numeric(names(sort(rowSums(centroids))))
centroids <- centroids[ordered.centroids,]
rownames(centroids) <- seq(1,nclusters)

#Update the centroid ids, by mapping the row ids before ordering to after ordering (element position is old ordering, value is new ordering)
vector$clusterid <- ordered.centroids[vector$clusterid]

Transitions

Define the istransition variable by observing if the previous day had a different clusterid than the current.

vector$istransition <- vector$clusterid
for (i in 2:length(vector$istransition)){
    if(vector$istransition[i-1] == vector$istransition[i])
        vector$istransition[i] <- 0 #False for C++
    else
        vector$istransition[i] <- 1 #True for C++
}
vector$istransition[1] <- -9999 # NA for C++
vector.wind <- vector

Summary

Sample 5 vectors.

kable(vector[sample(nrow(vector),5),])
YEAR MON DAY 8 9 10 11 12 13 14 15 16 17 clusterid istransition
2501 2010 7 20 3 5 8 9 11 12 11 10 9 8 5 1
40 2003 2 9 1 2 6 5 6 5 6 3 4 7 1 1
1002 2006 6 2 5 2 4 5 9 7 10 8 8 7 4 1
2588 2010 10 15 3 10 6 9 8 8 8 8 7 6 4 1
2047 2009 4 12 7 7 9 7 11 11 9 8 6 7 5 1

Centroids.

centroids_plot <- data.frame(centroids)
centroids_plot$color <-rownames(centroids)
colnames(centroids_plot) <- c(colnames(centroids),"Profile")
kable(centroids)
8 9 10 11 12 13 14 15 16 17
3.229682 4.238516 4.935512 5.522968 5.802120 5.905477 5.725265 5.361307 4.752650 3.852473
4.404416 6.048580 7.101577 7.807571 8.184227 8.439748 8.328707 7.994953 7.157098 5.984227
6.879090 8.610242 9.785206 10.186344 10.492176 10.550498 10.302987 9.911807 9.012802 7.752489
48.045454 45.136364 50.772727 59.363636 66.227273 56.090909 56.090909 48.227273 47.545454 47.318182
58.461539 78.846154 87.846154 102.153846 99.230769 95.615385 88.923077 92.307692 80.307692 69.692308
ggparcoord(
  data = centroids_plot,
  columns=c(1:(ncol(centroids)-1)),
  scale="globalminmax",
  alphaLines=0.2,
  groupColumn=ncol(centroids_plot)
) + geom_line(size=2,alpha=0.6) + science_theme + xlab("Hour of the Day") +
  ylab("Solar Radiation Itensity (W/m2") + ggtitle(paste('SCBH1',variable,'Centroids'))# + theme(axis.text.x = element_text(hjust = 1, size=20,color="black"), axis.text.y = element_text(hjust = 1, size=20,color="black"), plot.title = element_text(size=20), axis.title.y=element_text(size=20), axis.title.x=element_text(size=20))

Mutual Information

Solar Cluster x Solar Cluster

Summary

Centroids

entropy <- read.table("~/Desktop/OCI/out/entropy-tsmissing.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

++ Solar Cluster x Solar Transition

+++ Data Preparation

solar <- vector.solar[,c("YEAR","MON","DAY","istransition")]
solarc <- vector.solar[,c("YEAR","MON","DAY","clusterid")]
write.table(vector.solar[,c("clusterid","istransition")],file="~/Desktop/OCI/data/solarCid_solarTransition.csv",col.names=FALSE,row.names=FALSE,sep=",")

+++ Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-solarCid_solarTransition.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

++ Humidity Transition x Solar Transition

+++ Data Preparation

#Get the Humidity Transitions
relh <- vector.relh[,c("YEAR","MON","DAY","istransition")]

#Inner Join with the Solar Transitions
pair <- merge(relh,solar,by=c("YEAR","MON","DAY"))[,c("istransition.x","istransition.y")]
colnames(pair) <- c("relht","solart")

#Save
write.table(pair[,c("relht","solart")],file="~/Desktop/OCI/data/humidityTransition_SolarTransition.csv",col.names=FALSE,row.names=FALSE,sep=",")

+++ Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-humidityTransition_SolarTransition.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

Humidity Centroid x Solar Centroid

#Get the Humidity Transitions
relhc <- vector.relh[,c("YEAR","MON","DAY","clusterid")]

#Inner Join with the Solar and Humidity Centroids
pair <- merge(relhc,solarc,by=c("YEAR","MON","DAY"))[,c("clusterid.x","clusterid.y")]
colnames(pair) <- c("relhc","solarc")

#Save
write.table(pair[,c("relhc","solarc")],file="~/Desktop/OCI/data/humidityCentroid_SolarCentroid.csv",col.names=FALSE,row.names=FALSE,sep=",")

Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-humidityCentroid_SolarCentroid.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

Temperature Centroid x Solar Centroid

#Get the Temperature Centroids
tmpfc <- vector.tmpfc[,c("YEAR","MON","DAY","clusterid")]

#Inner Join with the Solar and Temperature Centroids
pair <- merge(tmpfc,solarc,by=c("YEAR","MON","DAY"))[,c("clusterid.x","clusterid.y")]
colnames(pair) <- c("tmpfc","solarc")

#Save
write.table(pair[,c("tmpfc","solarc")],file="~/Desktop/OCI/data/temperatureCentroid_SolarCentroid.csv",col.names=FALSE,row.names=FALSE,sep=",")

Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-temperatureCentroid_SolarCentroid.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

Dew Point Centroid x Solar Centroid

#Get the Humidity Transitions
dwpfc <- vector.dwpfc[,c("YEAR","MON","DAY","clusterid")]

#Inner Join with the Solar and Humidity Centroids
pair <- merge(dwpfc,solarc,by=c("YEAR","MON","DAY"))[,c("clusterid.x","clusterid.y")]
colnames(pair) <- c("dwpfc","solarc")

#Save
write.table(pair[,c("dwpfc","solarc")],file="~/Desktop/OCI/data/dewpointCentroid_SolarCentroid.csv",col.names=FALSE,row.names=FALSE,sep=",")

Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-dewpointCentroid_SolarCentroid.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

Precipitation 5-Bins x Solar Centroid

#Get the Humidity Transitions
precc <- vector.prec[,c("YEAR","MON","DAY","clusterid")]

#Inner Join with the Solar and Humidity Centroids
pair <- merge(precc,solarc,by=c("YEAR","MON","DAY"))[,c("clusterid.x","clusterid.y")]
colnames(pair) <- c("precc","solarc")

#Save
write.table(pair[,c("precc","solarc")],file="~/Desktop/OCI/data/precipitationCentroid_SolarCentroid.csv",col.names=FALSE,row.names=FALSE,sep=",")

Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-precipitationCentroid_SolarCentroid.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

++ Wind Speed Transition x Solar Transition

+++ Data Preparation

#Get the wind Transitions
wind <- vector.wind[,c("YEAR","MON","DAY","istransition")]

#Inner Join with the Solar Transitions
pair <- merge(wind,solar,by=c("YEAR","MON","DAY"))[,c("istransition.x","istransition.y")]
colnames(pair) <- c("windt","solart")

#Save
write.table(pair[,c("windt","solart")],file="~/Desktop/OCI/data/windTransition_SolarTransition.csv",col.names=FALSE,row.names=FALSE,sep=",")

+++ Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-windTransition_SolarTransition.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))

Wind Centroid x Solar Centroid

Data Preparation

#Get the wind Transitions
windc <- vector.wind[,c("YEAR","MON","DAY","clusterid")]

#Inner Join with the Solar Transitions
pair <- merge(windc,solarc,by=c("YEAR","MON","DAY"))[,c("clusterid.x","clusterid.y")]
colnames(pair) <- c("windc","solarc")

#Save
write.table(pair[,c("windc","solarc")],file="~/Desktop/OCI/data/windCentroid_SolarCentroid.csv",col.names=FALSE,row.names=FALSE,sep=",")

Summary

entropy <- read.table("~/Desktop/OCI/out/entropy-windCentroid_SolarCentroid.txt",header=TRUE)
ggplot(entropy, aes(x=Lag, y=Ixy)) + geom_line(colour="#94003C") + science_theme + ggtitle("Point-to-Point Mutual Information in Days") + xlab(expression(tau)) + ylab(expression(I(x[t],x[t+tau])))