Time series

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ## Project code

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(xts)

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

new_jere_csv<- read.csv("C:/Users/USER/OneDrive/Desktop/R CLASS/Documents/icammda_pricemeaslesdata - Copy.csv")
head(new_jere_csv,10)

##    Time.Month. Jere
## 1            1   12
## 2            2    7
## 3            3    1
## 4            4   17
## 5            5   50
## 6            6    2
## 7            7    8
## 8            8  166
## 9            9  131
## 10          10   59

jere_date<- ts(new_jere_csv$Jere,start=c(2016,1),frequency = 12)
jere_date

##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2016  12   7   1  17  50   2   8 166 131  59  39   9
## 2017   0   4   0   1   1   0   0   0   1   0   2   6
## 2018   0   2   2   1   2   3   0   0   2   0  11 244
## 2019 343 474 658 468  56  34   0   3   0   1   1   1
## 2020   1   0  41  30 546 275 310  58 262  87  53 178
## 2021  26 205 123 138 174 352 242 271  46  79  90  73
## 2022 157  84 233 134 162  83 138 158   1   0   2   0

plot(jere_date,col="magenta",main="Cases of measles in Jere local government,Borno state",xlab="Time(month)",ylab="Jere")

## Differencing
jere_adf<- diff(jere_date)
plot(jere_adf)

acf(jere_adf,main ="acf of measles in Jere Local Government")

pacf(jere_adf,main="pacf of measles in Jere Local Government")

## Test for seasonality : statistical tools
decomp<- decompose(jere_date)
plot(decomp)

### ACF and PCF to check for seasonality
jere_date1<- sqrt(jere_date)
plot(jere_date1,xlab="Time",ylab="sqrt")

### fit arima model
fit_arima<- auto.arima(jere_date1,ic="aic",trace = T)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : 499.4077
##  ARIMA(0,1,0)            with drift         : 499.3606
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 496.8347
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : 497.1494
##  ARIMA(0,1,0)                               : 497.3669
##  ARIMA(1,1,0)            with drift         : 496.6547
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : 496.5915
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : 498.4702
##  ARIMA(1,1,0)(0,0,2)[12] with drift         : 498.4692
##  ARIMA(1,1,0)(1,0,2)[12] with drift         : Inf
##  ARIMA(0,1,0)(0,0,1)[12] with drift         : 497.3204
##  ARIMA(2,1,0)(0,0,1)[12] with drift         : 497.5704
##  ARIMA(1,1,1)(0,0,1)[12] with drift         : 497.8492
##  ARIMA(2,1,1)(0,0,1)[12] with drift         : 499.5704
##  ARIMA(1,1,0)(0,0,1)[12]                    : 494.5929
##  ARIMA(1,1,0)                               : 494.6626
##  ARIMA(1,1,0)(1,0,1)[12]                    : 496.4714
##  ARIMA(1,1,0)(0,0,2)[12]                    : 496.4702
##  ARIMA(1,1,0)(1,0,0)[12]                    : 494.8373
##  ARIMA(1,1,0)(1,0,2)[12]                    : 498.464
##  ARIMA(0,1,0)(0,0,1)[12]                    : 495.3209
##  ARIMA(2,1,0)(0,0,1)[12]                    : 495.5724
##  ARIMA(1,1,1)(0,0,1)[12]                    : 495.8514
##  ARIMA(0,1,1)(0,0,1)[12]                    : 495.1504
##  ARIMA(2,1,1)(0,0,1)[12]                    : 497.5724
## 
##  Best model: ARIMA(1,1,0)(0,0,1)[12]

summary(fit_arima)

## Series: jere_date1 
## ARIMA(1,1,0)(0,0,1)[12] 
## 
## Coefficients:
##           ar1     sma1
##       -0.1856  -0.1842
## s.e.   0.1115   0.1256
## 
## sigma^2 = 21.49:  log likelihood = -244.3
## AIC=494.59   AICc=494.9   BIC=501.85
## 
## Training set error measures:
##                       ME   RMSE      MAE MPE MAPE      MASE       ACF1
## Training set -0.02615164 4.5526 3.078198 NaN  Inf 0.4495227 0.01862956

#### AIC
AIC(fit_arima)

## [1] 494.5929

### choosing best model
sarima_model<- arima(jere_date1,order=c(1,1,0),seasonal =list(order=c(0,0,1),period=12))
sarimal_plot<-forecast(sarima_model,h=36)
plot(sarimal_plot)

AIC(sarima_model)

## [1] 494.5929

## forecast
forecast_arima<- forecast(fit_arima,h=36)
plot(forecast_arima)

# R code

Suppose you are tasked with computing the precise dosage

amounts of a certain drug in a collection of hypothetical scientific experiments.

These amounts depend upon some predetermined set of “dosage thresholds” (lowdose, meddose, and

highdose), as well as a predetermined dose level factor vector

named doselevel. Look at the following items (i–iv) to see the

intended form of these objects. Then write a set of nested if

statements that produce a new numeric vector called dosage, according to the following rules: – First, if there are any instances of “High” in doselevel, perform the following operations:

Check if lowdose is greater than or equal to 10. If so,overwrite lowdose with 10; otherwise, overwrite lowdose by itself divided by 2.
Check if meddose is greater than or equal to 26. If so,overwrite meddose by 26.
Check if highdose is less than 60. If so, overwrite highdose with 60; otherwise, overwrite highdose by itself multiplied by 1:5.
Create a vector named dosage with the value of lowdose repeated (rep) to match the length of doselevel.
Overwrite the elements in dosage corresponding to the index positions of instances of “Med” in doselevel by meddose.
Overwrite the elements in dosage corresponding to the index positions of instances of “High” in doselevel by highdose.

– Otherwise (in other words, if there are no instances of “High” in doselevel), perform the following operations:

Create a new version of doselevel, a factor vector with levels “Low” and “Med” only, and label these with “Small” and “Large”, respectively (refer to Section 4.3 for details or see ?factor).

Conditions and Loops 191* Check to see if lowdose is less than 15 AND meddose is less than 35. If so, overwrite lowdose by itself multiplied by 2 and overwrite meddose by itself plus highdose.

Create a vector named dosage, which is the value of lowdose repeated (rep) to match the length of doselevel.
Overwrite the elements in dosage corresponding to the index positions of instances of “Large” in doselevel by meddose.

Now, confirm the following: i. Given lowdose <- 12.5

meddose <- 25.3

highdose <- 58.1

doselevel <- factor(c(“Low”,“High”,“High”,“High”,“Low”,“Med”,“Med”),levels=c(“Low”,“Med”,“High”))

the result of dosage after running the nested if statements is as follows:

R> dosage [1] 10.0 60.0 60.0 60.0 10.0 25.3 25.3

Using the same lowdose, meddose, and highdose thresholds as in (i), given

doselevel <- factor(c(“Low”,“Low”,“Low”,“Med”,“Low”,“Med”,“Med”),levels=c(“Low”,“Med”,“High”))

the result of dosage after running the nested if statements is

as follows:

R> dosage [1] 25.0 25.0 25.0 83.4 25.0 83.4 83.4 Also, doselevel has been overwritten as follows: R> doselevel [1] Small Small Small Large Small Large Large Levels: Small Large

Given lowdose <- 9

meddose <- 49

highdose <- 61

doselevel <- factor(c(“Low”,“Med”,“Med”),levels=c(“Low”,“Med”,“High”))

192 Chapter 10the result of dosage after running the nested if statements is as follows:

R> dosage

[1] 9 49 49

Also, doselevel has been overwritten as follows:

R> doselevel

[1] Small Large Large Levels: Small Large

Using the same lowdose, meddose, and highdose thresholds as (iii), as well as the same doselevel as (i), the result of dosage after running the nested if statements is as follows:

R> dosage

[1] 4.5 91.5 91.5 91.5 4.5 26.0 26.0

lowdose <- 12.5
meddose <- 25.3
highdose <- 58.1
doselevel <- factor(c("Low", "High", "High", "High", "Low", "Med", "Med"), levels = c("Low", "Med", "High"))
if(any("High"==doselevel)){
  if(lowdose>=10){
    lowdose<-10
} else {
  lowdose<-lowdose / 2
}
if(meddose >= 26) {
  meddose<-26
}
if(highdose < 60){
  highdose<-60
} else {
  highdose<-highdose* 1.5
  }
dosage<-rep(lowdose,length(doselevel))

dosage[doselevel=="Med"]<-meddose

dosage[doselevel=="High"]<-highdose

} else {
  doselevel<-factor(doselevel,levels=c("Low","Med"),labels=c("Small","Large"))
  
if(lowdose< 15 && meddose < 35){
  lowdose<-lowdose * 2
  
  meddose<-meddose + highdose
  }
  
 dosage<-rep(lowdose,length(doselevel))
 
 dosage[doselevel=="Large"]<-meddose
}
dosage

## [1] 10.0 60.0 60.0 60.0 10.0 25.3 25.3

###
lowdose<-12.5
meddose<-25.3
highdose<-58.1
doselevel<-factor(c("Low","High","High","High","Low","Med",
                    "Med"),levels=c("Low","Med","High"))
dosage

## [1] 10.0 60.0 60.0 60.0 10.0 25.3 25.3

####
lowdose <- 12.5
meddose <- 25.3
highdose <- 58.1
doselevel <- factor(c("Low", "High", "High", "High", "Low", "Med", "Med"), levels = c("Low", "Med", "High"))

if(any("High"==doselevel)){
  if(lowdose>=10){
    lowdose<-10
} else {
  lowdose<-lowdose / 2
}
if(meddose >= 26) {
  meddose<-26
}
if(highdose < 60){
  highdose<-60
} else {
  highdose<-highdose* 1.5
  }
dosage<-rep(lowdose,length(doselevel))

dosage[doselevel=="Med"]<-meddose

dosage[doselevel=="High"]<-highdose

} else {
  doselevel<-factor(doselevel,levels=c("Low","Med"),labels=c("Small","Large"))
  
if(lowdose< 15 && meddose < 35){
  lowdose<-lowdose * 2
  
  meddose<-meddose + highdose
  }
  
 dosage<-rep(lowdose,length(doselevel))
 
 dosage[doselevel=="Large"]<-meddose
}
dosage

## [1] 10.0 60.0 60.0 60.0 10.0 25.3 25.3

### ii
lowdose<-12.5
meddose<-25.3
highdose<-58.1
doselevel <- factor(c("Low","Low","Low","Med","Low","Med",
                      "Med"),levels=c("Low","Med","High"))
if(any("High"==doselevel)){
  if(lowdose>=10){
    lowdose<-10
} else {
  lowdose<-lowdose / 2
}
if(meddose >= 26) {
  meddose<-26
}
if(highdose < 60){
  highdose<-60
} else {
  highdose<-highdose* 1.5
  }
dosage<-rep(lowdose,length(doselevel))

dosage[doselevel=="Med"]<-meddose

dosage[doselevel=="High"]<-highdose

} else {
  doselevel<-factor(doselevel,levels=c("Low","Med"),labels=c("Small","Large"))
  
if(lowdose< 15 && meddose < 35){
  lowdose<-lowdose * 2
  
  meddose<-meddose + highdose
  }
  
 dosage<-rep(lowdose,length(doselevel))
 
 dosage[doselevel=="Large"]<-meddose
}
doselevel

## [1] Small Small Small Large Small Large Large
## Levels: Small Large

### iii
lowdose <- 9
meddose <- 49
highdose <- 61
doselevel <- factor(c("Low","Med","Med"),
                    levels=c("Low","Med","High"))
if(any("High"==doselevel)){
  if(lowdose>=10){
    lowdose<-10
} else {
  lowdose<-lowdose / 2
}
if(meddose >= 26) {
  meddose<-26
}
if(highdose < 60){
  highdose<-60
} else {
  highdose<-highdose* 1.5
  }
dosage<-rep(lowdose,length(doselevel))

dosage[doselevel=="Med"]<-meddose

dosage[doselevel=="High"]<-highdose

} else {
  doselevel<-factor(doselevel,levels=c("Low","Med"),labels=c("Small","Large"))
  
if(lowdose< 15 && meddose < 35){
  lowdose<-lowdose * 2
  
  meddose<-meddose + highdose
  }
  
 dosage<-rep(lowdose,length(doselevel))
 
 dosage[doselevel=="Large"]<-meddose
}
dosage

## [1]  9 49 49

## iv
lowdose <- 9
meddose <- 49
highdose <- 61
doselevel<-factor(c("Low","High","High","High","Low","Med",
                    "Med"),levels=c("Low","Med","High"))
if(any("High"==doselevel)){
  if(lowdose>=10){
    lowdose<-10
} else {
  lowdose<-lowdose / 2
}
if(meddose >= 26) {
  meddose<-26
}
if(highdose < 60){
  highdose<-60
} else {
  highdose<-highdose* 1.5
  }
dosage<-rep(lowdose,length(doselevel))

dosage[doselevel=="Med"]<-meddose

dosage[doselevel=="High"]<-highdose

} else {
  doselevel<-factor(doselevel,levels=c("Low","Med"),labels=c("Small","Large"))
  
if(lowdose< 15 && meddose < 35){
  lowdose<-lowdose * 2
  
  meddose<-meddose + highdose
  }
  
 dosage<-rep(lowdose,length(doselevel))
 
 dosage[doselevel=="Large"]<-meddose
}
dosage

## [1]  4.5 91.5 91.5 91.5  4.5 26.0 26.0

A quadratic equation in the variable x is often expressed in the following form: k1x2 + k2x + k3 = 0 Here, k1, k2, and k3 are constants. Given values for these constants, you can attempt to find up to two real roots—values of x that satisfy the equation. Write a function that takes k1, k2,and k3 as arguments and finds and returns any solutions (as a numeric vector) in such a situation. This is achieved as follows:

– Evaluate k2^2− 4k1k3. If this is negative, there are no solutions, and an appropriate message should be printed to the console. – If k2^2 − 4k1k3 is zero, then there is one solution, computed by −k2/2k1.

– If k2^2 − 4k1k3 is positive, then there are two solutions, given by (−k2 − (k2^2 − 4k1k3)0:5)/2k1 and (−k2 + (k2^2 − 4k1k3)0:5)/2k1.

– No default values are needed for the three arguments, but the function should check to see whether any are missing.

If so, an appropriate character string message should be returned to the user, informing the user that the calculations are not possible. Now, test your function. i. Confirm the following: * 2x2 − x − 5 has roots 1:850781 and −1:350781. * x2 + x + 1 has no real roots. ii. Attempt to find solutions to the following quadratic equations: * 1:3x2 − 8x − 3:13 * 2:25x2 − 3x + 1 * 1:4x2 − 2:2x − 5:1 * −5x2 + 10:11x − 9:9 iii. Test your programmed response in the function if one of the arguments is missing.

quad<- function(k1,k2,k3,..){
  if(any(c(missing(k1),missing(k2),missing(k3)))){
    return(k1,k2,k3)
  }
   quadractic<- (k2^2 - 4*k1*k3 )
     if(quadractic<0){
       cat("There are no real root")
     }else if(quadractic==0){
       return(-k2/2*k1)
     }else{
     a<-((-k2 -(k2^2 -4*k1*k3)^0.5)/(2*k1))
    b<-((-k2 + (k2^2 - 4*k1*k3)^0.5)/(2*k1))
    cat("The quadractic has two roots which are:",a,"and",b)
     }
   }
quad(2,-1,-5)

## The quadractic has two roots which are: -1.350781 and 1.850781

quad(k1=1,k2=1,k3=1)

## There are no real root

### II
quad(1.3,-8,-3.13)

## The quadractic has two roots which are: -0.3691106 and 6.522957

quad(2.25,-3,1)

## [1] 3.375

quad(1.4,-2.2,-5.1)

## The quadractic has two roots which are: -1.278312 and 2.84974

quad(-5,10.11,-9.9)

## There are no real root

A forested nature reserve has 13 bird-viewing platforms scattered throughout a large block of land. The naturalists claim that at any point in time, there is a 75 percent chance of seeing birds at each platform. Suppose you walk through the reserve and visit every platform. If you assume that all relevant conditions are satisfied, let X be a binomial random variable representing the total number of platforms at which you see birds. a. Visualize the probability mass function of the binomial distribution of interest. b. What is the probability you see birds at all sites? c. What is the probability you see birds at more than 9 platforms? d. What is the probability of seeing birds at between 8 and 11 platforms (inclusive)? Confirm your answer by using only the d-function and then again using only the p-function. e. Say that, before your visit, you decide that if you see birds at fewer than 9 sites, you’ll make a scene and demand your entry fee back. What’s the probability of your embarrassing yourself in this way?

Simulate realizations of X that represent 10 different visits to the reserve; store your resulting vector as an object.
Compute the mean and standard deviation of the distribution of interest.

### a
Y.prob<-dbinom(x=0:13,size = 13,prob=0.75)
barplot(Y.prob,names.arg=0:13,col="gray")

## b
dbinom(13,13,0.75)

## [1] 0.02375726

## c
1-pbinom(q=9,size=13,prob=0.75)

## [1] 0.5842527

sum(dbinom(x=10:13,size = 13,prob = 0.75))

## [1] 0.5842527

###
sum(dbinom(x=8:13,size=13,prob = 0.75))

## [1] 0.9197874

###
pbinom(q=11,size = 13,prob = 0.75) - pbinom(q=7,size = 13,prob = 0.75)

## [1] 0.793082

## e
pbinom(q=9,size=13,prob = 0.75)

## [1] 0.4157473

## f
rbinom(10,13,0.75)

##  [1]  7  7 12 11 10  7 11 11 12  9

## g mean of binomial distribution
X.mean<- 13*0.75
X.mean

## [1] 9.75

X.sd<-sqrt(13* 0.75*0.25)
X.sd

## [1] 1.561249

## outliers
foo <- c(0.6,-0.6,0.1,-0.2,-1.0,0.4,0.3,-1.8,1.1,6.0)
plot(foo,rep(0,10),yaxt="n",ylab="",bty="n",cex=2,cex.axis=1.5,cex.lab=1.5)
abline(h=0,col="gray",lty=2)
arrows(5,0.5,5.9,0.1,lwd=2)
text(5,0.7,labels="outlier?",cex=3)

bar <- c(0.1,0.3,1.3,0.6,0.2,-1.7,0.8,0.9,-0.8,-1.0)
baz <- c(-0.3,0.9,2.8,2.3,1.2,-4.1,-0.4,4.1,-2.3,-100.0)
plot(bar,baz,axes=T,cex=2,cex.axis=1.5,cex.lab=1.5,col="blue",pch=19)
arrows(-0.5,-80,-1.0,-97,lwd=2)
text(-0.5,-74,labels="outlier?",cex=3)

Every Saturday, at the same time, an individual stands by the side of a road and tallies the number

of cars going by within a 120-minute window. Based on previous knowledge, she believes that the

mean number of cars going by during this time is exactly 107. Let X represent the appropriate

Poisson random variable of the number of cars passing her position in each Saturday session.

What is the probability that more than 100 cars pass her on any given Saturday?
Determine the probability that no cars pass.Common Probability Distributions 341c. Plot the relevant Poisson mass function over the values in

60 ≤ x ≤ 150. d. Simulate 260 results from this distribution (about five years of weekly Saturday monitoring

sessions). Plot the simulated results using hist; use xlim to set the horizontal limits from 60 to

150.Compare your histogram to the shape of your mass function from (c).

## more than 100 cars pass
1-ppois(q=100,107)

## [1] 0.7319128

### no car pass
dpois(x=0,107)

## [1] 3.39227e-47

## plot the relevant poisson mass function
x.dpois<-dpois(x=60:150,107)
barplot(x.dpois,names.arg=60:150,main="x.dpois")

### simulate
r.hist<-rpois(n=260,107)
hist(r.hist,xlim=c(60,150),plot = TRUE)

##

Write a function that can compute F as per the following notes:

– Arguments must be present for P, i, t, and y. The argument for t should have a default value of 12.

– Another argument giving a logical value that determines whether to plot the amount F at each

integer time should be included. For example, if plotit=TRUE (the default) and y is 5 years, the

plot should show the amount F at y = 1;2;3;4;5.

– If this function is plotted, the plot should always be a stepplot, so plot should always be

called with type=“s”.

– If plotit=FALSE, the final amount F should be returned as a numeric vector corresponding to the

same integer times, as shown earlier.

– An ellipsis should also be included to control other details of plotting, if it takes place.

Now, using your function, do the following:

Work out the final amount after a 10-year investment of a principal of $5000, at an interest rate of 4.4 percent per annum compounded monthly.
Re-create the following step-plot, which shows the result of $100 invested at 22.9 percent per annum, compounded monthly, for 20 years:
Perform another calculation based on the same parameters as in (ii), but this time, assume the

interest is compounded annually. Return and store the results as a numeric vector.Then, use lines

to add a second step-line, corresponding to this annually accrued amount, to the plot created

previously.Use a different color or line type and make use of the legend function so the two lines

can be differentiated.

compute<-function(P,i,t= 12, y, plotit= TRUE,...){
  y<- 1 : y
  x<- P*(1 + i/(100*t))^(t*y)
  
  if(plotit){
    plot(y,x,type="s",...)
  
  }
    return(x)
}
## i,p=5,000,i=4.4,y=10
compute(5000,4.4,y=10)

##  [1] 5224.491 5459.062 5704.164 5960.271 6227.877 6507.498 6799.674 7104.967
##  [9] 7423.968 7757.291

##(ii)
compute(100,22.9,12,y=20,plotit=T,main="Compound interest calculator",xlab="Year(y)",ylab="Balance(F)")

##  [1]  125.4632  157.4102  197.4918  247.7796  310.8722  390.0303  489.3445
##  [8]  613.9473  770.2780  966.4155 1212.4959 1521.2362 1908.5917 2394.5804
## [15] 3004.3174 3769.3130 4729.1010 5933.2818 7444.0856 9339.5886

## (iii)
calc<-compute(100,22.9,1,y=20,plotit=F,main="Compound interest calculator",xlab="Year(y)",ylab="Balance(F)")
lines(1:20,calc,lty=2,col="blue",type="s")
legend("topleft",lty=c(1,5),legend=c("monthly interest","annual interest"))

Time series

FALANA BARAKAT

2025-09-05

R Markdown