Create an empty data frame “time” with daily dates from 1979 to 2017:

#Create an empty data frame from Jan 1 1979 to Jun 7 2017 in format YYYYMMDD HH MM SS
time<-data.frame(seq(ISOdate(1979,1,1), ISOdate(2017,6,7), by='day'))

#Rename the variable from x to time
colnames(time)[1]<-'time'

#Create a new variable name "date"only with the first 10 characters of the variable time
time['date']<-substr(time$time,1,10)

#Rename variable "TIME" to "DS"
colnames(time)[2]<-'ds'
str(time)

## 'data.frame':    14038 obs. of  2 variables:
##  $ time: POSIXct, format: "1979-01-01 12:00:00" "1979-01-02 12:00:00" ...
##  $ ds  : chr  "1979-01-01" "1979-01-02" "1979-01-03" "1979-01-04" ...

Load data from CSV file that has information from every other day and ice extent from the original data set:

library(readr)
seaice_north<- read_csv('seaice_north_28.csv')

## Parsed with column specification:
## cols(
##   Year = col_integer(),
##   Month = col_integer(),
##   Day = col_integer(),
##   dsold = col_character(),
##   Extent = col_double(),
##   ds_fail = col_character(),
##   month2 = col_character(),
##   day2 = col_character(),
##   ds = col_date(format = "")
## )

#Eliminate information from 1978
seaice_north<-seaice_north[-c(1:34),]
#Eliminate extra columns
seaice_north<-seaice_north[,c(5,9)]

#Convert variable "ds" from date to character
seaice_north$ds<-as.character(seaice_north$ds)
str(seaice_north)

## Classes 'tbl_df', 'tbl' and 'data.frame':    12420 obs. of  2 variables:
##  $ Extent: num  15 14.9 14.9 15 15.2 ...
##  $ ds    : chr  "1979-01-02" "1979-01-04" "1979-01-06" "1979-01-08" ...

Merge both data sets:

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#The "new" data set will be our main and on the right we'll bring the information from the original data set that has data from every other day, we will expect rows with n/a values.
daily.seaice.north<-time %>% left_join(seaice_north, by = "ds")
#Eliminate the 1st column and rename from extent to y
daily.seaice.north<-daily.seaice.north[,-c(1)]
colnames(daily.seaice.north)[2]<-'y'
str(daily.seaice.north)

## 'data.frame':    14038 obs. of  2 variables:
##  $ ds: chr  "1979-01-01" "1979-01-02" "1979-01-03" "1979-01-04" ...
##  $ y : num  NA 15 NA 14.9 NA ...

library(imputeTS)
plotNA.distribution(daily.seaice.north$y)

After reviewing the statistics and the plot of the missing values, most of them are withing the first 10 years. Accounting for only 11.5% of the entire time series.

#Impute data in n/a rows
imputed.seaice.north<-na.interpolation(daily.seaice.north)
head(imputed.seaice.north);tail(imputed.seaice.north)

##           ds       y
## 1 1979-01-01 14.9970
## 2 1979-01-02 14.9970
## 3 1979-01-03 14.9595
## 4 1979-01-04 14.9220
## 5 1979-01-05 14.9255
## 6 1979-01-06 14.9290

##               ds      y
## 14033 2017-06-02 11.765
## 14034 2017-06-03 11.681
## 14035 2017-06-04 11.601
## 14036 2017-06-05 11.545
## 14037 2017-06-06 11.510
## 14038 2017-06-07 11.455

Plot imputed values:

plotNA.imputations(daily.seaice.north$y, imputed.seaice.north$y)

Divide data sets:

train.new<-imputed.seaice.north[c(1:12053),]      #From Jan 1st 1979 to December 31st 2011 (12053 days)
#valid.new<-imputed.seaice.north[c(12054:13149),]  #From Jan 1st 2012 to December 31st 2014 (1096 days)
#test.new<-imputed.seaice.north[c(13150:14038),]   #From Jan 1st 2015 to June      7th 2017 (889 days)
head(train.new);tail(train.new)

##           ds       y
## 1 1979-01-01 14.9970
## 2 1979-01-02 14.9970
## 3 1979-01-03 14.9595
## 4 1979-01-04 14.9220
## 5 1979-01-05 14.9255
## 6 1979-01-06 14.9290

##               ds      y
## 12048 2011-12-26 12.682
## 12049 2011-12-27 12.755
## 12050 2011-12-28 12.854
## 12051 2011-12-29 13.097
## 12052 2011-12-30 13.115
## 12053 2011-12-31 13.180

Run Prophet package on training set:

library(prophet)

## Loading required package: Rcpp

m<-prophet(train.new, yearly.seasonality = TRUE,  weekly.seasonality = TRUE, daily.seasonality = TRUE)

## Initial log joint probability = -343.871
## Optimization terminated normally: 
##   Convergence detected: relative gradient magnitude is below tolerance

We need to construct a dataframe for prediction. The make_future_dataframe function takes the model object and a number of periods to forecast:

future <- make_future_dataframe(m, periods = 0)
head(future);tail(future);str(future)

##           ds
## 1 1979-01-01
## 2 1979-01-02
## 3 1979-01-03
## 4 1979-01-04
## 5 1979-01-05
## 6 1979-01-06

##               ds
## 12050 2011-12-28
## 12051 2011-12-29
## 12052 2011-12-30
## 12053 2011-12-31
## 12054       <NA>
## 12055 2011-12-31

## 'data.frame':    12055 obs. of  1 variable:
##  $ ds: POSIXct, format: "1979-01-01" "1979-01-02" ...

As with most modeling procedures in R, we use the generic predict function to get our forecast:

forecast <- predict(m)
head(forecast);tail(forecast)

##           ds    trend     daily daily_lower daily_upper  seasonal
## 1 1979-01-01 14.47520 -2.004815   -2.004815   -2.004815 0.1126136
## 2 1979-01-02 14.47462 -2.004815   -2.004815   -2.004815 0.1584334
## 3 1979-01-03 14.47403 -2.004815   -2.004815   -2.004815 0.2036588
## 4 1979-01-04 14.47345 -2.004815   -2.004815   -2.004815 0.2508084
## 5 1979-01-05 14.47287 -2.004815   -2.004815   -2.004815 0.2975026
## 6 1979-01-06 14.47228 -2.004815   -2.004815   -2.004815 0.3444948
##   seasonal_lower seasonal_upper seasonalities seasonalities_lower
## 1      0.1126136      0.1126136     0.1126136           0.1126136
## 2      0.1584334      0.1584334     0.1584334           0.1584334
## 3      0.2036588      0.2036588     0.2036588           0.2036588
## 4      0.2508084      0.2508084     0.2508084           0.2508084
## 5      0.2975026      0.2975026     0.2975026           0.2975026
## 6      0.3444948      0.3444948     0.3444948           0.3444948
##   seasonalities_upper        weekly  weekly_lower  weekly_upper   yearly
## 1           0.1126136  6.172632e-04  6.172632e-04  6.172632e-04 2.116812
## 2           0.1584334  4.913515e-05  4.913515e-05  4.913515e-05 2.163199
## 3           0.2036588 -1.071270e-03 -1.071270e-03 -1.071270e-03 2.209545
## 4           0.2508084 -2.425398e-04 -2.425398e-04 -2.425398e-04 2.255866
## 5           0.2975026  1.543358e-04  1.543358e-04  1.543358e-04 2.302163
## 6           0.3444948  8.875370e-04  8.875370e-04  8.875370e-04 2.348422
##   yearly_lower yearly_upper yhat_lower yhat_upper trend_lower trend_upper
## 1     2.116812     2.116812   14.18218   15.00413    14.47520    14.47520
## 2     2.163199     2.163199   14.22140   15.04913    14.47462    14.47462
## 3     2.209545     2.209545   14.23505   15.11111    14.47403    14.47403
## 4     2.255866     2.255866   14.29972   15.15704    14.47345    14.47345
## 5     2.302163     2.302163   14.36159   15.21680    14.47287    14.47287
## 6     2.348422     2.348422   14.41372   15.23303    14.47228    14.47228
##       yhat
## 1 14.58781
## 2 14.63305
## 3 14.67769
## 4 14.72426
## 5 14.77037
## 6 14.81678

##               ds    trend     daily daily_lower daily_upper     seasonal
## 12048 2011-12-26 12.58571 -2.004815   -2.004815   -2.004815 -0.181632114
## 12049 2011-12-27 12.58557 -2.004815   -2.004815   -2.004815 -0.134178999
## 12050 2011-12-28 12.58543 -2.004815   -2.004815   -2.004815 -0.087775849
## 12051 2011-12-29 12.58529 -2.004815   -2.004815   -2.004815 -0.039813541
## 12052 2011-12-30 12.58515 -2.004815   -2.004815   -2.004815  0.007423479
## 12053 2011-12-31 12.58501 -2.004815   -2.004815   -2.004815  0.054787004
##       seasonal_lower seasonal_upper seasonalities seasonalities_lower
## 12048   -0.181632114   -0.181632114  -0.181632114        -0.181632114
## 12049   -0.134178999   -0.134178999  -0.134178999        -0.134178999
## 12050   -0.087775849   -0.087775849  -0.087775849        -0.087775849
## 12051   -0.039813541   -0.039813541  -0.039813541        -0.039813541
## 12052    0.007423479    0.007423479   0.007423479         0.007423479
## 12053    0.054787004    0.054787004   0.054787004         0.054787004
##       seasonalities_upper        weekly  weekly_lower  weekly_upper
## 12048        -0.181632114  6.172632e-04  6.172632e-04  6.172632e-04
## 12049        -0.134178999  4.913515e-05  4.913515e-05  4.913515e-05
## 12050        -0.087775849 -1.071270e-03 -1.071270e-03 -1.071270e-03
## 12051        -0.039813541 -2.425398e-04 -2.425398e-04 -2.425398e-04
## 12052         0.007423479  1.543358e-04  1.543358e-04  1.543358e-04
## 12053         0.054787004  8.875370e-04  8.875370e-04  8.875370e-04
##         yearly yearly_lower yearly_upper yhat_lower yhat_upper trend_lower
## 12048 1.822566     1.822566     1.822566   11.98866   12.82949    12.58571
## 12049 1.870587     1.870587     1.870587   12.03926   12.89430    12.58557
## 12050 1.918111     1.918111     1.918111   12.07021   12.94840    12.58543
## 12051 1.965244     1.965244     1.965244   12.10484   12.98149    12.58529
## 12052 2.012084     2.012084     2.012084   12.13066   13.02043    12.58515
## 12053 2.058715     2.058715     2.058715   12.17763   13.06269    12.58501
##       trend_upper     yhat
## 12048    12.58571 12.40408
## 12049    12.58557 12.45139
## 12050    12.58543 12.49766
## 12051    12.58529 12.54548
## 12052    12.58515 12.59258
## 12053    12.58501 12.63980

You can use the generic plot function to plot the forecast, but you must also pass the model in to be plotted:

plot(m, forecast)

Just as in Python, you can plot the components of the forecast. In R, you use the prophet_plot_components function instead of an instance method:

#prophet_plot_components(m, forecast)

Extract the data from our prediction data frame to convert daily predictions into monthly predictions.

#Get ds and yhat values from the forecast data frame
dataframe<-forecast[,c('ds','yhat')]

#Extract Month and Year from our DS variable:
dataframe['Month']<-substr(dataframe$ds,6,7)
dataframe['Year']<-substr(dataframe$ds,1,4)

#Convert Month and Year from characters into numeric
dataframe$Month<-as.numeric(dataframe$Month)
dataframe$Year<-as.numeric(dataframe$Year)

#Create a new data frame in which we'll calculate the monthly mean of yhat by Month and Year.
newframe<-aggregate(dataframe$yhat,list(by=dataframe$Year,dataframe$Month),FUN=mean)

#Rename the new frame from by to Year, Month and yhat
colnames(newframe)<-c('Year','Month','yhat')

#Sort by Year and Month
newframe<-newframe[order(newframe$Year),]
head(newframe);tail(newframe);str(newframe)

##     Year Month     yhat
## 1   1979     1 15.23012
## 34  1979     2 16.08859
## 67  1979     3 16.21382
## 100 1979     4 15.47618
## 133 1979     5 14.05808
## 166 1979     6 12.51549

##     Year Month      yhat
## 231 2011     7  8.459748
## 264 2011     8  6.176259
## 297 2011     9  5.378490
## 330 2011    10  7.260112
## 363 2011    11  9.639488
## 396 2011    12 11.810423

## 'data.frame':    396 obs. of  3 variables:
##  $ Year : num  1979 1979 1979 1979 1979 ...
##  $ Month: num  1 2 3 4 5 6 7 8 9 10 ...
##  $ yhat : num  15.2 16.1 16.2 15.5 14.1 ...

Import file “North.csv” with actual monthly values

actual_values<- read_csv('North.csv')

## Parsed with column specification:
## cols(
##   Date = col_character(),
##   Extent = col_double()
## )

actual_values_training<-actual_values[c(1:396),]
str(actual_values_training)

## Classes 'tbl_df', 'tbl' and 'data.frame':    396 obs. of  2 variables:
##  $ Date  : chr  "Jan-79" "Feb-79" "Mar-79" "Apr-79" ...
##  $ Extent: num  15.4 16.2 16.3 15.4 13.9 ...

actualvsprophet<-data.frame(newframe, actual_values_training$Extent)

#rename columns
colnames(actualvsprophet)[4]<-'Actual'
colnames(actualvsprophet)[3]<-'Prophet'

#calculate errors
actualvsprophet['Error']<-actualvsprophet$Actual-actualvsprophet$Prophet

Plot errors:

plot(actualvsprophet$Error, type='l')

RMSE

sqrt(mean((actualvsprophet$Error)^2))

## [1] 0.3061204

Ljung Box Test

Box.test(actualvsprophet$Error,lag=1,type="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  actualvsprophet$Error
## X-squared = 158.08, df = 1, p-value < 2.2e-16

#write.csv(actualvsprophet,file = "actualvsprophet_training.csv")

Prophet Data Prep Ice Extent

Jorge Argueta