Graph Plotting using R
Installation of Packages
This installs necessary packages that are required for the plot
if (!require(pacman)) install.packages("pacman")## Loading required package: pacman
pacman::p_load(tseries, tvReg, TSstudio, forecast, tibbletime, roll, gganimate, gapminder, rmdformats, ggpubr,ggplot2,readxl,RColorBrewer,data.table,astsa, foreign, haven,utils,changepoint,openxlsx,tsDyn,urca,KFAS,lubridate,forecast,vars,dplR, ggfortify)Scatter Plot
This functions plots scatter diagram with linear trend and the linear trend. Along with linear trend it also estimates the correlation coefficient and its p-value for significance.
scatter_plot=function(x,y, xtitle="X Title", ytitle="Y Title", title="Title"){
r=cor(x, y)
if(r <=0){
xp=max(x)-(max(x)-min(x))/4
yp=max(y)-(max(y)-min(y))/4
}
if(r>0){
xp=max(x)-(max(x)-min(x))/4
yp=min(y)+(max(y)-min(y))/4
}
data=data.frame(x,y)
p=ggplot(data, aes(x =x, y=y)) + stat_cor(method = "pearson", label.x = xp, label.y = yp)
p=p+ geom_smooth(method=lm, se=T, fullrange=FALSE, colour="black")+geom_point(colour = "darkcyan", size = 3)
p=p+theme_bw()
p=p+theme(text=element_text(face="bold", size=12))
p=p+ggtitle(title)
p=p+theme(legend.position = "bottom")+theme(text=element_text(face="bold", size=14))
p=p+labs(caption = "",x=xtitle,y=ytitle)
p=p + theme(plot.title = element_text(hjust = 0.5),plot.caption = element_text(hjust = 0))
plot(p)
}
# An Example
x=rnorm(200)
y=rnorm(200)+1.02*x
data=data.frame(x,y)
colnames(data)=c("X Variable", "Y Variable")
head(data)## X Variable Y Variable
## 1 -1.5223575 -2.3914440
## 2 1.5879104 1.1390294
## 3 0.8617752 -1.9134524
## 4 -0.4435070 -0.8396368
## 5 -1.4009492 -1.3501885
## 6 -1.6095549 -1.0920441
data=data
x=data$`X Variable`
y=data$`Y Variable`
scatter_plot(x, y) # scatter_plot(x variable, y variable, xtitle="", ytitle="", title="")## `geom_smooth()` using formula 'y ~ x'
Scatter Plot with Categories
This plots scatter plot and colours the data points based on category,
scatter_plot_cat=function(x,y, cat, xtitle="X Title", ytitle="Y Title", title="Title", catname="Category Name"){
r=cor(x, y)
if(r <=0){
xp=max(x)-(max(x)-min(x))/4
yp=max(y)-(max(y)-min(y))/4
}
if(r>0){
xp=max(x)-(max(x)-min(x))/4
yp=min(y)+(max(y)-min(y))/4
}
p=ggplot(data, aes(x =x, y =y, color=cat)) + stat_cor(method = "pearson", label.x =xp, label.y = yp, color="black")
p=p+ geom_smooth(method=lm, se=T, fullrange=FALSE, colour="black")+geom_point(colour = "darkcyan", size = 3)
p=p+theme_bw()
p=p+theme(text=element_text(face="bold", size=12))
p=p+ggtitle(title)+geom_point(aes(color = cat), size=2)
p=p+theme(legend.position = "bottom")+theme(text=element_text( face="bold", size=14))
p=p+labs(color=catname, caption = "",x=xtitle,y=ytitle)
p=p + theme(plot.title = element_text(hjust = 0.5),plot.caption = element_text(hjust = 0))
plot(p)
}
x=rnorm(100)
y=rnorm(100)+1.5*x
z=rep(c("A","B","C","D"),25)
data=data.frame(z,x,y)
colnames(data)=c("Company","X Variable", "Y-Variable")
head(data)## Company X Variable Y-Variable
## 1 A -0.75154920 -1.7635570
## 2 B 2.59854604 3.3262248
## 3 C -0.63521434 -2.0551013
## 4 D 0.11567055 1.3692380
## 5 A -0.53152184 -0.6642490
## 6 B -0.06097329 -0.2657264
x=data$`X Variable`
y=data$`Y-Variable`
cat=data$Company
scatter_plot_cat(x, y, cat,title="", xtitle = "X Title", ytitle="Y Title", catname="Company") ## `geom_smooth()` using formula 'y ~ x'
# scatter_plot(x variable, y yariable,category, xtitle="", ytitle="", title="")Bar Graph
This code plots the bar graph.
plot_bar=function(x,y, xtitle="X Title", ytitle="Y Title", title="Title"){
p=ggplot(data, aes(x =x, y =y)) + geom_bar(stat="identity", fill="steelblue")
p=p+theme_linedraw()+geom_text(aes(label=y), vjust=1.1, color="white", size=4.5)
p=p+theme(panel.grid.major = element_line(colour = "gray", linetype = "dotted"), panel.grid.minor = element_line(colour = "gray", linetype = "dotted"))
p=p+theme(text=element_text(face="bold", size=12))
p=p+ggtitle(title)
p=p+theme(legend.position = "bottom")+theme(text=element_text(face="bold", size=14))
p=p+labs(caption = "",x=xtitle,y=ytitle)
p=p + theme(plot.title = element_text(hjust = 0.5),plot.caption = element_text(hjust = 0))
plot(p)
}
x=c(2,5,7,5,8,3)
y=c("A","B","C","D","E","F")
data=data.frame(y,x)
colnames(data)=c("Category","Value")
head(data)## Category Value
## 1 A 2
## 2 B 5
## 3 C 7
## 4 D 5
## 5 E 8
## 6 F 3
x=data$Category
y=data$Value
plot_bar(x, y)Secondary Axis Plot
This code is useful when we have two series of different scale and we want to plot them both using secondary axis.
secondary_plot=function(x,y1,y2,xtitle,y1title,y2title){
data=data.frame(x,y1,y2)
k=mean(mean(y1,na.rm = TRUE)/mean(y2,na.rm = TRUE),median(y1,na.rm = TRUE)/median(y2,na.rm = TRUE))
p <- ggplot(data, aes(x = x))+theme_bw()
p <- p + geom_line(aes(y = y1, colour =y1title))
p <- p + geom_line(aes(y = y2*k, colour = y2title))
p <- p + scale_y_continuous(sec.axis = sec_axis(~./k, name = y2title))
p <- p + scale_colour_manual(values =brewer.pal(n = 8, name = "Dark2"))
p <- p + labs(y = y1title,
x = xtitle,
colour = "")
p <- p + theme(legend.position = c(0.8, 0.9))
p
}
x=seq(1,1000,1)
y1=rnorm(1000,7,3)
y2=rnorm(1000,78,13)
xtitle=c("X Title")
y1title=c("Y1 Title")
y2title=c("Y2 Title")
secondary_plot(x,y1,y2,xtitle,y1title,y2title)Basic Time Series Plots
data=read.xlsx("https://mudulisilu.files.wordpress.com/2021/11/tsdata.xlsx")
tseries_plot=function(d,year,number,freq){
datats=ts(d, start = c(year,number),frequency = freq)
autoplot(datats, facets = FALSE)+theme_bw()+theme(legend.position = c(0.8, 0.8))+scale_colour_discrete(name="")+ylab("")
}
# An Example
x1=data$GDPGrowth
x2=data$GFCFGrowth
d=data.frame(x1,x2)
colnames(d)=c("GDP Growth", "Capital Formation")
year=2007
number=3 # month number or quarter number
freq=4 # for monthly data=12, quarterly data=4, annual data=1
tseries_plot(d,year,number,freq)Breakpoint Analysis
breakpoint_plot=function(x,year,number,freq,varname){
xts=ts(x, start = c(year,number),frequency = freq)
autoplot(breakpoints(xts ~ 1,h = 10), ts.colour = 'blue',
cpt.colour = 'red', cpt.linetype = 'dashed')+theme_bw()+ylab(varname)
}
# An Example
x=data$GDPGrowth
varname=c("GDP Growth")
year=2007
number=3 # month number or quarter number
freq=4 # for monthly data=12, quarterly data=4, annual data=1
breakpoint_plot(x,year,number, freq, varname)## Warning: `filter_()` was deprecated in dplyr 0.7.0.
## Please use `filter()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Mean and Variance Ploting
mean_plot=function(x,year,number,freq,varname){
xts=ts(x, start = c(year,number),frequency = freq)
mean <- cpt.mean(xts, penalty = "BIC", method = "PELT")
autoplot(mean)
plot(mean,cpt.col = "maroon", xlab = "Time", cpt.width = 2)
}
# An Example
x=data$GDPGrowth
varname=c("GDP Growth")
year=2007
number=3 # month number or quarter number
freq=4 # for monthly data=12, quarterly data=4, annual data=1
mean_plot(x,year,number, freq, varname)variance_plot=function(x,year,number,freq,varname){
xts=ts(x, start = c(year,number),frequency = freq)
mv <- cpt.meanvar(xts, method = "PELT")
plot(mv,cpt.col = "maroon", xlab = "Time", cpt.width = 2)
}
# An Example
x=data$GDPGrowth
varname=c("GDP Growth")
year=2007
number=3 # month number or quarter number
freq=4 # for monthly data=12, quarterly data=4, annual data=1
variance_plot(x,year,number, freq, varname)Auto-ARIMA Model and Forecasting
An Auto-ARIMA\((p,d,q)\) is estimated based on the BIC. Based the the estimated results the forecasting exercise for a given horizon is done.
#Basics ARIMA Model
arima_plot=function(x,year,number,varname, freq,conf_level, forecast_horizon){
xts=ts(x, start = c(year,number),frequency = freq)
arima_m <- auto.arima(xts)
summary(arima_m)
forecast_arima <- forecast(arima_m, level = c(conf_level), h =forecast_horizon)
autoplot(forecast_arima)+theme_bw()+ylab(varname)
}
# An Example
x=data$GDPGrowth
varname=c("GDP Growth")
year=2007
number=3 # month number or quarter number
freq=4 # for monthly data=12, quarterly data=4, annual data=1
conf_level=95
forecast_horizon=12
arima_plot(x,year,number,varname, freq,conf_level, forecast_horizon)VAR Model Forecasting
This code is for forecasting purpose using a VAR model. This code only plots the forecast results, however, stationary properties of the time series, stability of the model, etc., needs to be checked before implementing this code.
var_plot=function(d,year,number,freq,forecast_horizon){
datats=ts(d, start = c(year,number),frequency = freq)
lag_select <- VARselect(datats, lag.max = 5, type = 'const')$selection[1]
var_m <- VAR(datats, p =lag_select, type = 'const')
autoplot(predict(var_m, n.ahead =forecast_horizon), ts.colour = 'dodgerblue4',predict.colour = 'blue', predict.linetype = 'dashed')+theme_bw()
}
# An Example
x1=data$GDPGrowth
x2=data$GFCFGrowth
d=data.frame(x1,x2)
colnames(d)=c("GDP Growth", "Capital Formation")
year=2007
number=3 # month number or quarter number
freq=4 # for monthly data=12, quarterly data=4, annual data=1
forecast_horizon=4
var_plot(d,year,number,freq,forecast_horizon)Rollling Regression Plot
In this code rolling regression estimated for a time series variable with pre-specified window.
roll_plot=function(y,ind_list,formula,time,v,w,ci, year, number, freq,ylabname){
zz=qnorm(p=(1-ci)/2, lower.tail=FALSE)
roll_model=roll_lm(as.matrix(data.frame(ind_list)), y,width=w)
coef.lm <- stats::lm(formula)$coef
ttts=ts(time,start=c(year,number), frequency =freq)
dff=data.frame(a=as.Date(ttts),b=roll_model$coefficients[,v+1],se=roll_model$std.error[,v+1])
dff=na.omit(dff)
plot=ggplot(dff, aes(a))+ geom_ribbon(aes(ymin = b - zz*se, ymax = b + zz*se), fill = "grey85")
plot=plot+geom_line(aes(y = b), color="turquoise4", size=0.5)+theme_bw()+ xlab("Time") + ylab(ylabname)
#plot=plot + annotate("rect", xmin =as.Date("2014-09-01"),xmax =as.Date("2017-05-01"), ymin=-0.21,ymax=0.3, fill="blue",alpha=0.1)
plot=plot + annotate("text", x =as.Date("2016-01-01"), y = -0.15, label = paste("Shaded regions are",ci*100,"% confidence intervals"))
plot=plot+geom_hline(yintercept=0, linetype="dashed", color = "red",size=1)
plot
}
# An Example
y=data$GDPGrowth # dependent variable
x1=data$GFCFGrowth # explanatory variable
time=data$Quarter #time variable
ind_list=list(x1) # making the list of independent variables
formula=y ~ x1 # formula for the linear model
year=2007 # starting year of time series
number=3 # month number or quarter number
freq=4 # frequency of the time series, quarterly =4, monthly =12
v= 1 #Order of variable of interest to be ploted
w=12 # Window Size
ci=0.95 # confidence interval
ylabname="Coefficient of GFCF" # y-axis name
roll_plot(y,ind_list,formula,time,v,w,ci, year, number, freq, ylabname)