Set up Rstudio

Setting up RMarkdown when opening it enables you to create dynamic, reproducible, and visually appealing reports, presentations, and documents, that can help you communicate your data analysis and research findings more effectively.

basic commands

addition

5+7
[1] 12
5+9
[1] 14

subtraction

8-6
[1] 2

exponential

exp(8)
[1] 2980.958

lets generate a regular sequence

1:50#in ascending order
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
50:1#in descending order
 [1] 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28 27 26
[26] 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1
5*5:50
 [1]  25  30  35  40  45  50  55  60  65  70  75  80  85  90  95 100 105 110 115
[20] 120 125 130 135 140 145 150 155 160 165 170 175 180 185 190 195 200 205 210
[39] 215 220 225 230 235 240 245 250
2*1:50
 [1]   2   4   6   8  10  12  14  16  18  20  22  24  26  28  30  32  34  36  38
[20]  40  42  44  46  48  50  52  54  56  58  60  62  64  66  68  70  72  74  76
[39]  78  80  82  84  86  88  90  92  94  96  98 100
50:2*3i
 [1] 0+150i 0+147i 0+144i 0+141i 0+138i 0+135i 0+132i 0+129i 0+126i 0+123i
[11] 0+120i 0+117i 0+114i 0+111i 0+108i 0+105i 0+102i 0+ 99i 0+ 96i 0+ 93i
[21] 0+ 90i 0+ 87i 0+ 84i 0+ 81i 0+ 78i 0+ 75i 0+ 72i 0+ 69i 0+ 66i 0+ 63i
[31] 0+ 60i 0+ 57i 0+ 54i 0+ 51i 0+ 48i 0+ 45i 0+ 42i 0+ 39i 0+ 36i 0+ 33i
[41] 0+ 30i 0+ 27i 0+ 24i 0+ 21i 0+ 18i 0+ 15i 0+ 12i 0+  9i 0+  6i

complex numbers

z<-2+3i
z
[1] 2+3i
N<-3i
N
[1] 0+3i
M<-2i+5i
M
[1] 0+7i

Complex number confirmation

is.complex(z)
[1] TRUE
is.complex(N)
[1] TRUE
is.complex(M)
[1] TRUE

Square roots

sqrt(9)
[1] 3
sqrt(81)
[1] 9
sqrt(144)
[1] 12

power function

820^2
[1] 672400
25^2
[1] 625

covariance and correlation

x1=c(4,6,9,11)
x2=c(33,12,98,50)

Variance and Covariances

var(x1)
[1] 9.666667
var(x2)
[1] 1341.583
cov(x1,x2)
[1] 62.83333
cor(x1,x2)
[1] 0.5517507

Mean

mean(x1+x2)
[1] 55.75

Data Binding

X<-cbind(x1,x2)
X
     x1 x2
[1,]  4 33
[2,]  6 12
[3,]  9 98
[4,] 11 50

Variance and Covariance

var(X)
          x1         x2
x1  9.666667   62.83333
x2 62.833333 1341.58333
cor(X)
          x1        x2
x1 1.0000000 0.5517507
x2 0.5517507 1.0000000
cov(X)
          x1         x2
x1  9.666667   62.83333
x2 62.833333 1341.58333

Additional Data set and Calculations

x=c(88,52,69,23,14,25)
z=c(120,150,200,128,415,800)
y=c(20,30,40,50,60,70)
B<- data.frame(x,y,z)
head (B,5)
   x  y   z
1 88 20 120
2 52 30 150
3 69 40 200
4 23 50 128
5 14 60 415

Variance and Covariance

library(stargazer)
stargazer (var(B), type="text")

=============================
      x        y       z     
-----------------------------
x  863.767   -475  -4,194.633
y    -475     350    4,123   
z -4,194.633 4,123 71,536.170
-----------------------------
stargazer(cov(B), type = "text")

=============================
      x        y       z     
-----------------------------
x  863.767   -475  -4,194.633
y    -475     350    4,123   
z -4,194.633 4,123 71,536.170
-----------------------------
stargazer(cor(B), type = "text")

======================
    x      y      z   
----------------------
x   1    -0.864 -0.534
y -0.864   1    0.824 
z -0.534 0.824    1   
----------------------

T-TEST

Age<-c(19,25,27,30,22,23,24,26,25,30,25,26,36,21,25,29)
gender<-gl(2,8,labels=c("male","female"))
GA<-data.frame(Age,gender)
head(GA,5)
  Age gender
1  19   male
2  25   male
3  27   male
4  30   male
5  22   male

T-test across gender

t.test(Age~gender)

    Welch Two Sample t-test

data:  Age by gender
t = -1.3218, df = 12.888, p-value = 0.2092
alternative hypothesis: true difference in means between group male and group female is not equal to 0
95 percent confidence interval:
 -6.919281  1.669281
sample estimates:
  mean in group male mean in group female 
              24.500               27.125 

One sample t-test(Scores)

score<-c(50,65,72,77,73,85,88,80,65,56,66,78,82,90)
t.test(score,mu=75)

    One Sample t-test

data:  score
t = -0.51906, df = 13, p-value = 0.6124
alternative hypothesis: true mean is not equal to 75
95 percent confidence interval:
 66.51943 80.19486
sample estimates:
mean of x 
 73.35714 
t.test(score,mu=60)

    One Sample t-test

data:  score
t = 4.2202, df = 13, p-value = 0.001001
alternative hypothesis: true mean is not equal to 60
95 percent confidence interval:
 66.51943 80.19486
sample estimates:
mean of x 
 73.35714 
t.test(score,mu=75,alternative="greater",conf.level=0.99)

    One Sample t-test

data:  score
t = -0.51906, df = 13, p-value = 0.6938
alternative hypothesis: true mean is greater than 75
99 percent confidence interval:
 64.96873      Inf
sample estimates:
mean of x 
 73.35714 

Right or left tail

t.test(score,mu=75,alternative="l",conf.level=0.99)

    One Sample t-test

data:  score
t = -0.51906, df = 13, p-value = 0.3062
alternative hypothesis: true mean is less than 75
99 percent confidence interval:
     -Inf 81.74555
sample estimates:
mean of x 
 73.35714 

two tailed but CI is 99%

t.test(score,mu=75, conf.level=0.99)

    One Sample t-test

data:  score
t = -0.51906, df = 13, p-value = 0.6124
alternative hypothesis: true mean is not equal to 75
99 percent confidence interval:
 63.82308 82.89120
sample estimates:
mean of x 
 73.35714 
t.test(score,mu=75,alternative="two.sided",conf.level=0.99)

    One Sample t-test

data:  score
t = -0.51906, df = 13, p-value = 0.6124
alternative hypothesis: true mean is not equal to 75
99 percent confidence interval:
 63.82308 82.89120
sample estimates:
mean of x 
 73.35714 

Normality test

shapiro.test(score)

    Shapiro-Wilk normality test

data:  score
W = 0.96172, p-value = 0.7513

Wilcoxon test

wilcox.test(score, mu=75)

    Wilcoxon signed rank test with continuity correction

data:  score
V = 48, p-value = 0.8014
alternative hypothesis: true location is not equal to 75
wilcox.test(score, mu=75,alternative ="less", paired = FALSE, exact = TRUE, correct = TRUE)

    Wilcoxon signed rank test with continuity correction

data:  score
V = 48, p-value = 0.4007
alternative hypothesis: true location is less than 75

Additional T-test

weight<-c(135,180,108,128,160,143,175,170,205,195,185,150,175,190,180,220)
gender<-gl(2,8,labels=c("male","female"))
paired_t_test<-data.frame(weight,gender)
head(paired_t_test,5)
  weight gender
1    135   male
2    180   male
3    108   male
4    128   male
5    160   male

For this case to test for t-test, we use the following command;

t.test(weight~gender)

    Welch Two Sample t-test

data:  weight by gender
t = -3.2304, df = 13.477, p-value = 0.006308
alternative hypothesis: true difference in means between group male and group female is not equal to 0
95 percent confidence interval:
 -62.69717 -12.55283
sample estimates:
  mean in group male mean in group female 
             149.875              187.500 

Variance Equality

t.test(weight~gender,var.equal=T)

    Two Sample t-test

data:  weight by gender
t = -3.2304, df = 14, p-value = 0.006044
alternative hypothesis: true difference in means between group male and group female is not equal to 0
95 percent confidence interval:
 -62.60587 -12.64413
sample estimates:
  mean in group male mean in group female 
             149.875              187.500 
wilcox.test(weight~gender, paired=FALSE)

    Wilcoxon rank sum test with continuity correction

data:  weight by gender
W = 6, p-value = 0.007319
alternative hypothesis: true location shift is not equal to 0
before<-c(117,111,98,104,105,100,81,89,78)
after<-c(83,85,75,82,82,77,62,69,64)
data.frame(before,after)
  before after
1    117    83
2    111    85
3     98    75
4    104    82
5    105    82
6    100    77
7     81    62
8     89    69
9     78    64
TTEST<-t.test(before,after,paired=T)
TTEST

    Paired t-test

data:  before and after
t = 12.52, df = 8, p-value = 1.551e-06
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 18.49173 26.84160
sample estimates:
mean difference 
       22.66667 

Alternative non-parametric

WLCX<-wilcox.test(before,after,paired=T)
WLCX

    Wilcoxon signed rank test with continuity correction

data:  before and after
V = 45, p-value = 0.008909
alternative hypothesis: true location shift is not equal to 0

Other Important Tests

Data Importation

data<-read.csv("C:\\Users\\user\\Downloads\\training model.csv")
head(data,5)
  year       CPI Exch.Rate Lend.Int.Rates
1 1987  7.872727  16.45499        14.0000
2 1988  8.848083  17.74710        15.0000
3 1989 10.035029  20.57247        17.2500
4 1990 11.602322  22.91477        18.7500
5 1991 13.805882  27.50870        18.9975

Attach the dataset

attach(data)
str(data)
'data.frame':   26 obs. of  4 variables:
 $ year          : int  1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 ...
 $ CPI           : num  7.87 8.85 10.04 11.6 13.81 ...
 $ Exch.Rate     : num  16.5 17.7 20.6 22.9 27.5 ...
 $ Lend.Int.Rates: num  14 15 17.2 18.8 19 ...

Plot the dataset

plot.ts(data$CPI)

plot.ts(data$Exch.Rate)

plot.ts(data$Lend.Int.Rates)

plot.ts(data$CPI,type="l")

Load the following important libraries

library(tseries)
library(tidyverse)
library(vars)
library(olsrr)

Test stationarity

adf.test(data$CPI)

    Augmented Dickey-Fuller Test

data:  data$CPI
Dickey-Fuller = 1.5556, Lag order = 2, p-value = 0.99
alternative hypothesis: stationary
adf.test(data$Exch.Rate)

    Augmented Dickey-Fuller Test

data:  data$Exch.Rate
Dickey-Fuller = -1.743, Lag order = 2, p-value = 0.6703
alternative hypothesis: stationary
adf.test(data$Lend.Int.Rates)

    Augmented Dickey-Fuller Test

data:  data$Lend.Int.Rates
Dickey-Fuller = -2.0656, Lag order = 2, p-value = 0.5474
alternative hypothesis: stationary

Test stationarity

adf.test(CPI)

    Augmented Dickey-Fuller Test

data:  CPI
Dickey-Fuller = 1.5556, Lag order = 2, p-value = 0.99
alternative hypothesis: stationary
adf.test(Exch.Rate)

    Augmented Dickey-Fuller Test

data:  Exch.Rate
Dickey-Fuller = -1.743, Lag order = 2, p-value = 0.6703
alternative hypothesis: stationary
adf.test(Lend.Int.Rates)

    Augmented Dickey-Fuller Test

data:  Lend.Int.Rates
Dickey-Fuller = -2.0656, Lag order = 2, p-value = 0.5474
alternative hypothesis: stationary

Difference the series

adf.test(diff(CPI))

    Augmented Dickey-Fuller Test

data:  diff(CPI)
Dickey-Fuller = -1.4862, Lag order = 2, p-value = 0.7681
alternative hypothesis: stationary
CPI1<-diff(CPI)
CPI1
 [1]  0.9753552  1.1869462  1.5672929  2.2035602  3.7734740  8.0827912
 [7]  7.3943910  0.5138071  2.9757047  4.1376231  2.2034025  2.4784791
[13]  4.5251056  2.8595567  1.0368861  5.2739474  6.9662002  6.5447763
[19]  4.3778887  3.2861734 12.1267763  9.7326307  4.1694197 14.9004387
[25] 11.3632053
ts.plot(diff(CPI1))

adf.test(diff(CPI1))

    Augmented Dickey-Fuller Test

data:  diff(CPI1)
Dickey-Fuller = -3.1705, Lag order = 2, p-value = 0.1265
alternative hypothesis: stationary
adf.test(Exch.Rate)

    Augmented Dickey-Fuller Test

data:  Exch.Rate
Dickey-Fuller = -1.743, Lag order = 2, p-value = 0.6703
alternative hypothesis: stationary
adf.test(diff(Exch.Rate))

    Augmented Dickey-Fuller Test

data:  diff(Exch.Rate)
Dickey-Fuller = -2.9525, Lag order = 2, p-value = 0.2095
alternative hypothesis: stationary
ts.plot(diff(Exch.Rate))

EXR<-diff(Exch.Rate)

Further differencing

adf.test(diff(EXR))

    Augmented Dickey-Fuller Test

data:  diff(EXR)
Dickey-Fuller = -3.5356, Lag order = 2, p-value = 0.05894
alternative hypothesis: stationary
plot(diff(EXR))

EXR1<-diff(EXR)
ts.plot(EXR1)

adf.test(diff(EXR1))

    Augmented Dickey-Fuller Test

data:  diff(EXR1)
Dickey-Fuller = -4.5082, Lag order = 2, p-value = 0.01
alternative hypothesis: stationary
ts.plot(diff(EXR1))

Data Visualization

CPI

hist(CPI)

hist(log(CPI))

hist(diff(CPI))

Exchange Rate

hist(Exch.Rate)

hist(log(Exch.Rate))

hist(diff(Exch.Rate))

Lending Interest Rates

hist(Lend.Int.Rates)

hist(log(Lend.Int.Rates))

hist(diff(Lend.Int.Rates))

Test Normality

shapiro.test(CPI)

    Shapiro-Wilk normality test

data:  CPI
W = 0.93963, p-value = 0.1316
shapiro.test(Exch.Rate)

    Shapiro-Wilk normality test

data:  Exch.Rate
W = 0.86288, p-value = 0.002555
shapiro.test(Lend.Int.Rates)

    Shapiro-Wilk normality test

data:  Lend.Int.Rates
W = 0.86773, p-value = 0.003203

ESTIMATE REGRESSION EQUATION

model<-lm(log(CPI)~log(Exch.Rate)+log(Lend.Int.Rates),data=data)
library(stargazer)
stargazer(model,type="text")

===============================================
                        Dependent variable:    
                    ---------------------------
                             log(CPI)          
-----------------------------------------------
log(Exch.Rate)               1.515***          
                              (0.085)          
                                               
log(Lend.Int.Rates)          -0.552***         
                              (0.138)          
                                               
Constant                      -0.704           
                              (0.528)          
                                               
-----------------------------------------------
Observations                    26             
R2                             0.935           
Adjusted R2                    0.929           
Residual Std. Error       0.223 (df = 23)      
F Statistic           164.844*** (df = 2; 23)  
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01

Model Diagnostic

1. Test for the presence of outliers in the model

library(car)
library(tseries)
outlierTest(model)
No Studentized residuals with Bonferroni p < 0.05
Largest |rstudent|:
   rstudent unadjusted p-value Bonferroni p
26 2.702426           0.013008       0.3382

The p-value for Boniferron test statistics shows that there are outliers in the data set

qqPlot(model, main= "QQ Plot Showing the Possible Presence of outliers")

[1] 18 26
leveragePlots(model)

2. test the presence of multicollinearity in the model

vif(model)
     log(Exch.Rate) log(Lend.Int.Rates) 
           1.000162            1.000162 

VIF of 1.000162 is an indication that predictors are no correlated

Testing for the presence of autocorrelation

durbinWatsonTest(model)
 lag Autocorrelation D-W Statistic p-value
   1       0.6176191     0.5324069       0
 Alternative hypothesis: rho != 0

The results shows that there is a correlation of the regression residuals

3. Testing for heteroscedasticty

ncvTest(model)
Non-constant Variance Score Test 
Variance formula: ~ fitted.values 
Chisquare = 5.509929, Df = 1, p = 0.018909

the results show that the variance of the error terms is not constant

spreadLevelPlot(model)


Suggested power transformation:  -2.486722 

Correcting Econometric problems

Heteroscedasticity

coeftest(model, hccm(model, type = "hc0"))

t test of coefficients:

                     Estimate Std. Error t value  Pr(>|t|)    
(Intercept)         -0.704260   0.315018 -2.2356   0.03537 *  
log(Exch.Rate)       1.514945   0.057345 26.4181 < 2.2e-16 ***
log(Lend.Int.Rates) -0.552044   0.109898 -5.0232 4.403e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(stargazer)
stargazer(coeftest(model, hccm(model, type = "hc0")),type="text")

===============================================
                        Dependent variable:    
                    ---------------------------
                                               
-----------------------------------------------
log(Exch.Rate)               1.515***          
                              (0.057)          
                                               
log(Lend.Int.Rates)          -0.552***         
                              (0.110)          
                                               
Constant                     -0.704**          
                              (0.315)          
                                               
===============================================
===============================================
Note:               *p<0.1; **p<0.05; ***p<0.01

VISUAL REPRESENTATION OF THE TIME SERIES DATA USING GGPLOT2 AND GGTHEMES

We need to declare our data be a time series

Consumer Price Index

CPI<-ts(data$CPI,start=1987,frequency = 1)
plot.ts(CPI,type="l",main="Time Series plot CPI",xlab="Year",ylab="Consumer Price Index")

plot.ts(diff(CPI),type="l",main="Time Series plot CPI",xlab="Year",ylab="Consumer Price Index")

Exchange Rate

Exch.Rate<-ts(data$Exch.Rate,start=1987,frequency = 1)
plot.ts(Exch.Rate,type="l",main="Time Series plot Exch.Rate",xlab="Year",ylab="Exchange Rate")

plot.ts(diff(Exch.Rate),type="l",main="Time Series plot Exch.Rate",xlab="Year",ylab="Exchange Rate")

Lending Interest Rates

Lend.Int.Rates<-ts(data$Lend.Int.Rates,start=1987,frequency = 1)
plot.ts(Lend.Int.Rates,type="l",main="Time Series plot Lend.Int.Rates",xlab="Year",ylab="Lend.Int.Rates")

plot.ts(diff(Lend.Int.Rates),type="l",main="Time Series plot Lend.Int.Rates",xlab="Year",ylab="Lend.Int.Rates")

Using ggplot for data visualization

if(!require(ggplot2)){install.packages("ggplot2")} ##intalls ggplot2 if not installed
library(ggplot2)
if(!require(ggthemes)){install.packages("ggthemes")}

Loading required package: ggthemes

Warning: package ‘ggthemes’ was built under R version 4.1.3

library(ggthemes)
ggplot(data=data,aes(x=year,y=CPI))+geom_line()

or

ggplot(data=data,aes(x=year,y=CPI))+geom_line()

OR

ggplot(data=data,aes(x=year,y=CPI))+geom_line()+
  labs(title="Time Series plot of CPI",
       caption="source:World Bank",
       y="Consumer Price Index", x="Year",
       color=3) + # title and caption
  theme(axis.text.x = element_text(angle = 0, vjust=0.5, size = 12), # rotate x axis text
        axis.title=element_text(size=12,face="bold"),
        panel.grid = element_blank())+
  #theme(panel.grid.minor = element_blank())+#turn off minor grid(to run remove #be4 theme)
  theme(legend.text = element_text(size=12,face="bold"))+
  theme_set(theme_economist())

Combined Graphs

ggplot(data=data,aes(x=year,y=CPI))+geom_line()+
  labs(title="Time Series plot of CPI",
       caption="source:World Bank 2018", y="Consumer Price Index", x="Year")

ggplot(data=data,aes(x=year,y=Exch.Rate))+geom_line()+
  labs(title="Time Series plot of Exhange Rate",
       caption="source:World Bank 2018", y="Exhange Rate", x="Year")

ggplot(data=data,aes(x=year,y=Lend.Int.Rates))+geom_line()+
  labs(title="Time Series plot of Lend.Int.Rates",
       caption="source:World Bank 2018", y="Lend.Int.Rates", x="Year")

Create Date Object

date<-seq(as.Date("1987-01-01"),by="1 year",length.out=length(data$year))
date
 [1] "1987-01-01" "1988-01-01" "1989-01-01" "1990-01-01" "1991-01-01"
 [6] "1992-01-01" "1993-01-01" "1994-01-01" "1995-01-01" "1996-01-01"
[11] "1997-01-01" "1998-01-01" "1999-01-01" "2000-01-01" "2001-01-01"
[16] "2002-01-01" "2003-01-01" "2004-01-01" "2005-01-01" "2006-01-01"
[21] "2007-01-01" "2008-01-01" "2009-01-01" "2010-01-01" "2011-01-01"
[26] "2012-01-01"
ggplot(data=data,aes(x=date))+
  geom_line(aes(y=Exch.Rate,colour="Exhange Rate"))+
  geom_line(aes(y=Lend.Int.Rates,colour="Lending Interest Rates"))+
  geom_line(aes(y=CPI,colour="Consumer Price Index"))+
  labs(title="Trends of CPI,Interest Rates and Exchange Rates",
       caption="", y="Rate", x="Time in Years", color="Key")+
  scale_x_date( date_labels = "%Y", breaks = "1 year")+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 8))

theme_set(theme_dark())

theme_set(theme_economist())
theme_set(theme_base())
p1<-ggplot(data=data,aes(x=date,y=CPI))+geom_line()+
  labs(title="Consumer Price Index",
       caption="", y="Consumer Price Index", x="Time in Years", color=3)+
  scale_x_date( date_labels = "%Y-%b", breaks = "1 years")+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 8))
p2<-ggplot(data=data,aes(x=date,y=Exch.Rate))+geom_line()+
  labs(title="Exchange Rate",
       caption="", y="Exchange Rate", x="Time in Years", color="Key")+
  scale_x_date( date_labels = "%Y-%b", breaks = "1 years")+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 8))
p3<-ggplot(data=data,aes(x=date,y=Lend.Int.Rates))+geom_line()+
  labs(title="Lending Interest Rates",
       caption="", y="Lending Interest Rates", x="Time in Years", color="Key")+
  scale_x_date( date_labels = "%Y-%b", breaks = "1 years")+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 8))

Load the following Important Library

library(grid)

Make Plots

grid.newpage()
grid.draw(rbind(ggplotGrob(p1),ggplotGrob(p2),ggplotGrob(p3),size="last"))

OR

grid.newpage()
grid.draw(rbind(ggplotGrob(p1),ggplotGrob(p3),size="last"))

grid.newpage()
grid.draw(rbind(ggplotGrob(p2),ggplotGrob(p3),size="last"))

grid.newpage()
grid.draw(rbind(ggplotGrob(p1),size="last"))

grid.newpage()
grid.draw(rbind(ggplotGrob(p2),size="last"))

grid.newpage()
grid.draw(rbind(ggplotGrob(p3),size="last"))