This is the first assignment of the Certificate in Computer Applications in Economic Analysis(CAEA) offered by Gokhale Institute of Politics & Economics(GIPE),Pune.
Module in R of the above CAEA course is being delivered by Dr.Savita Kulkarni from Symbiosis School Of Economics(SSE),Pune.
I have used R markdown in Rstudio for this assignment.Using R markdown helps me in knitting the code chunks as well as the output/plots.Also i can create html/pdf/word documents by knitting the rmd file created here.
I have used/applied various functions/packages for exploring the data.Its just to reapply what i have understood/learnt since i started the beautiful journey of exploring R.I have done more than what is required of the assignment.Many things would seem unnecessary for completing the assignment, but ultimately if one tortures the data,more insights can be gathered about the data at hand, which can be useful in later stages of further analysis/modeling.
#Importing the dataset from the current directory assuming that the csv file has already been saved in the current working directory.
rbigdp<-read.csv("rbigdp.csv",header = TRUE)
#seeing the structure of the data set
str(rbigdp)
## 'data.frame': 54 obs. of 10 variables:
## $ Quarter : Factor w/ 4 levels "Q1","Q2","Q3",..: 1 2 3 4 1 2 3 4 1 2 ...
## $ Agriculture : num 992 740 1370 1096 982 ...
## $ Mining : num 83.9 81.6 90.9 100.6 91.9 ...
## $ Manufacturing: num 601 600 617 661 596 ...
## $ Electricity : num 89.4 88.8 92.8 95 95.4 ...
## $ Construction : num 191 194 198 219 210 ...
## $ Trade : num 727 705 821 839 782 ...
## $ Finance : num 433 437 443 464 482 ...
## $ Community : num 444 438 457 674 447 ...
## $ Gross : num 3562 3284 4089 4149 3687 ...
#Other ways to get information about the dataset
names(rbigdp) #Gives the names of variables of the object
## [1] "Quarter" "Agriculture" "Mining" "Manufacturing"
## [5] "Electricity" "Construction" "Trade" "Finance"
## [9] "Community" "Gross"
dim(rbigdp) #Gives the dimension of object (rows column)
## [1] 54 10
#seeing just the first and last 6 rows of the data set
head(rbigdp)
## Quarter Agriculture Mining Manufacturing Electricity Construction Trade
## 1 Q1 992.41 83.91 601.40 89.38 191.11 727.47
## 2 Q2 739.74 81.61 600.01 88.75 193.66 705.34
## 3 Q3 1369.57 90.90 616.90 92.81 198.27 821.00
## 4 Q4 1095.84 100.57 661.45 94.99 218.54 839.46
## 5 Q1 981.92 91.89 595.78 95.41 209.89 782.25
## 6 Q2 745.26 90.12 601.15 97.47 208.59 759.33
## Finance Community Gross
## 1 432.53 443.72 3561.93
## 2 437.01 438.13 3284.25
## 3 442.56 456.56 4088.57
## 4 464.16 674.00 4149.01
## 5 482.45 447.20 3686.80
## 6 482.01 467.46 3451.39
tail(rbigdp)
## Quarter Agriculture Mining Manufacturing Electricity Construction
## 49 Q1 1318.49 149.41 1193.32 163.55 585.54
## 50 Q2 1052.14 143.21 1213.61 163.84 601.01
## 51 Q3 1811.97 167.33 1214.35 164.64 600.90
## 52 Q4 1477.83 182.50 1256.11 166.95 638.30
## 53 Q1 1349.68 161.19 1234.48 173.67 627.13
## 54 Q2 1061.16 156.88 1324.92 176.04 639.83
## Trade Finance Community Gross
## 49 2238.62 1169.02 1008.24 7826.20
## 50 2271.28 1187.39 1106.02 7738.50
## 51 2384.67 1250.67 1205.26 8799.80
## 52 2651.34 1326.48 1329.73 9029.24
## 53 2419.61 1263.22 1076.60 8305.57
## 54 2464.52 1278.44 1246.02 8347.80
summary(rbigdp,digits = 8)
## Quarter Agriculture Mining Manufacturing
## Q1:14 Min. : 739.7400 Min. : 81.61000 Min. : 595.7800
## Q2:14 1st Qu.:1024.7200 1st Qu.:101.09250 1st Qu.: 666.3750
## Q3:13 Median :1149.8500 Median :117.38000 Median : 782.8450
## Q4:13 Mean :1192.6080 Mean :123.03037 Mean : 861.4102
## 3rd Qu.:1364.5975 3rd Qu.:142.08500 3rd Qu.:1055.6275
## Max. :1827.3800 Max. :182.50000 Max. :1324.9200
## Electricity Construction Trade
## Min. : 88.75000 Min. :191.1100 Min. : 705.3400
## 1st Qu.:110.55000 1st Qu.:253.0150 1st Qu.: 966.0275
## Median :122.41500 Median :314.8950 Median :1268.8150
## Mean :127.95352 Mean :367.9585 Mean :1428.4335
## 3rd Qu.:148.50750 3rd Qu.:485.8875 3rd Qu.:1815.7450
## Max. :176.04000 Max. :639.8300 Max. :2651.3400
## Finance Community Gross
## Min. : 432.5300 Min. : 438.1300 Min. :3284.250
## 1st Qu.: 576.0350 1st Qu.: 630.9725 1st Qu.:4437.705
## Median : 713.7000 Median : 797.1150 Median :5319.255
## Mean : 778.4502 Mean : 798.8476 Mean :5678.695
## 3rd Qu.: 970.3150 3rd Qu.: 932.4525 3rd Qu.:6881.235
## Max. :1326.4800 Max. :1329.7300 Max. :9029.240
#############################
#scatter plot matrix
library(RColorBrewer)
plot(rbigdp[,2:10],col=brewer.pal(3,"Set1"))
####################################
#aliter way to get summary
library(stargazer)
stargazer(rbigdp,type = "text")
##
## ========================================================
## Statistic N Mean St. Dev. Min Max
## --------------------------------------------------------
## Agriculture 54 1,192.608 261.931 739.740 1,827.380
## Mining 54 123.030 25.512 81.610 182.500
## Manufacturing 54 861.410 225.374 595.780 1,324.920
## Electricity 54 127.954 24.220 88.750 176.040
## Construction 54 367.959 142.115 191.110 639.830
## Trade 54 1,428.434 557.657 705.340 2,651.340
## Finance 54 778.450 259.532 432.530 1,326.480
## Community 54 798.848 220.475 438.130 1,329.730
## Gross 54 5,678.695 1,583.311 3,284.250 9,029.240
## --------------------------------------------------------
#aliter way to get summary
library(pastecs)
round(stat.desc(rbigdp[,2:10]),4)
## Agriculture Mining Manufacturing Electricity Construction
## nbr.val 54.0000 54.0000 54.0000 54.0000 54.0000
## nbr.null 0.0000 0.0000 0.0000 0.0000 0.0000
## nbr.na 0.0000 0.0000 0.0000 0.0000 0.0000
## min 739.7400 81.6100 595.7800 88.7500 191.1100
## max 1827.3800 182.5000 1324.9200 176.0400 639.8300
## range 1087.6400 100.8900 729.1400 87.2900 448.7200
## sum 64400.8300 6643.6400 46516.1500 6909.4900 19869.7600
## median 1149.8500 117.3800 782.8450 122.4150 314.8950
## mean 1192.6080 123.0304 861.4102 127.9535 367.9585
## SE.mean 35.6442 3.4718 30.6695 3.2959 19.3394
## CI.mean.0.95 71.4933 6.9636 61.5152 6.6108 38.7899
## var 68607.5911 650.8851 50793.2583 586.6067 20196.6790
## std.dev 261.9305 25.5125 225.3736 24.2200 142.1150
## coef.var 0.2196 0.2074 0.2616 0.1893 0.3862
## Trade Finance Community Gross
## nbr.val 54.0000 54.0000 54.0000 54.0000
## nbr.null 0.0000 0.0000 0.0000 0.0000
## nbr.na 0.0000 0.0000 0.0000 0.0000
## min 705.3400 432.5300 438.1300 3284.2500
## max 2651.3400 1326.4800 1329.7300 9029.2400
## range 1946.0000 893.9500 891.6000 5744.9900
## sum 77135.4100 42036.3100 43137.7700 306649.5100
## median 1268.8150 713.7000 797.1150 5319.2550
## mean 1428.4335 778.4502 798.8476 5678.6946
## SE.mean 75.8875 35.3179 30.0028 215.4613
## CI.mean.0.95 152.2111 70.8387 60.1781 432.1607
## var 310981.5413 67357.0104 48609.1615 2506873.7078
## std.dev 557.6572 259.5323 220.4749 1583.3110
## coef.var 0.3904 0.3334 0.2760 0.2788
#aliter way to get summary
library(psych)
describe(rbigdp[,2:10])
## vars n mean sd median trimmed mad min
## Agriculture 1 54 1192.61 261.93 1149.85 1181.08 257.68 739.74
## Mining 2 54 123.03 25.51 117.38 121.43 26.66 81.61
## Manufacturing 3 54 861.41 225.37 782.85 845.40 219.52 595.78
## Electricity 4 54 127.95 24.22 122.41 127.34 27.54 88.75
## Construction 5 54 367.96 142.12 314.89 358.21 129.95 191.11
## Trade 6 54 1428.43 557.66 1268.82 1384.68 551.44 705.34
## Finance 7 54 778.45 259.53 713.70 760.18 248.65 432.53
## Community 8 54 798.85 220.47 797.12 791.28 219.11 438.13
## Gross 9 54 5678.69 1583.31 5319.26 5588.52 1623.88 3284.25
## max range skew kurtosis se
## Agriculture 1827.38 1087.64 0.39 -0.37 35.64
## Mining 182.50 100.89 0.51 -0.69 3.47
## Manufacturing 1324.92 729.14 0.56 -1.13 30.67
## Electricity 176.04 87.29 0.28 -1.13 3.30
## Construction 639.83 448.72 0.57 -1.15 19.34
## Trade 2651.34 1946.00 0.59 -0.95 75.89
## Finance 1326.48 893.95 0.59 -0.92 35.32
## Community 1329.73 891.60 0.26 -0.61 30.00
## Gross 9029.24 5744.99 0.50 -0.95 215.46
#to know about the function use help or ?
#?stargazer
#######################
#get means of all variables i.e. column wise means in the data set
meanrbi<-sapply(rbigdp[,2:10],mean,na.rm=TRUE) #excluding the first column as it is a nominal data
meanrbi
## Agriculture Mining Manufacturing Electricity Construction
## 1192.6080 123.0304 861.4102 127.9535 367.9585
## Trade Finance Community Gross
## 1428.4335 778.4502 798.8476 5678.6946
#therefore mean of the first three series are
meanrbi[1:3]
## Agriculture Mining Manufacturing
## 1192.6080 123.0304 861.4102
#get variances of all variables in the data set except that of the 'Quarter' variable
varrbi<-sapply(rbigdp[,2:10],var,na.rm=TRUE)
varrbi
## Agriculture Mining Manufacturing Electricity Construction
## 68607.5911 650.8851 50793.2583 586.6067 20196.6790
## Trade Finance Community Gross
## 310981.5413 67357.0104 48609.1615 2506873.7078
#variances of the first three series of the dataset are
varrbi[1:3]
## Agriculture Mining Manufacturing
## 68607.5911 650.8851 50793.2583
#correlation matrix with values truncated to 2 decimals
corrbi<-cor(rbigdp[,2:10])
corrbi<-round(as.matrix(corrbi),2)
corrbi
## Agriculture Mining Manufacturing Electricity Construction
## Agriculture 1.00 0.60 0.47 0.45 0.47
## Mining 0.60 1.00 0.95 0.94 0.95
## Manufacturing 0.47 0.95 1.00 0.98 0.99
## Electricity 0.45 0.94 0.98 1.00 0.99
## Construction 0.47 0.95 0.99 0.99 1.00
## Trade 0.52 0.97 0.99 0.98 0.99
## Finance 0.47 0.95 0.99 0.99 0.99
## Community 0.43 0.94 0.93 0.92 0.92
## Gross 0.61 0.98 0.98 0.97 0.98
## Trade Finance Community Gross
## Agriculture 0.52 0.47 0.43 0.61
## Mining 0.97 0.95 0.94 0.98
## Manufacturing 0.99 0.99 0.93 0.98
## Electricity 0.98 0.99 0.92 0.97
## Construction 0.99 0.99 0.92 0.98
## Trade 1.00 0.99 0.93 0.99
## Finance 0.99 1.00 0.93 0.98
## Community 0.93 0.93 1.00 0.93
## Gross 0.99 0.98 0.93 1.00
#plotting the correlation matrix
library(corrplot)
corrplot(corrbi, method="number")
corrplot(corrbi, method="circle")
############################
#custom function
## the following code and figure is adapted from the help file for pairs
## put (absolute) correlations on the upper panels,
## with size proportional to the correlations.
#first create a function (panel.cor)
panel.cor <- function(x, y, digits=2, prefix="", cex.cor){
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex * abs(r))
}
# now use the function for the given data.
pairs(rbigdp[,2:10], lower.panel=panel.smooth, upper.panel=panel.cor)
#Before plotting the time series, lets plot the histogram with superimposed density plot of the GDP & Agriculture variables
par(mfrow=c(1,2)) #for plotting both the histograms side by side
#histogram with density curve of the 'agriculture' variable
hist(rbigdp$Agriculture,col="steelblue",xlab="agriculture",freq = F)
lines(density(rbigdp$Agriculture),col="violetred",lwd=3)
#histogram with density curve of the 'Gross/GDP' variable
hist(rbigdp$Gross,col="firebrick",xlab="Gross/GDP",freq = F)
lines(density(rbigdp$Gross),col="blue4",lwd=3)
#########################
#scatter plot matrix with the histograms for all the variables
#This plots the histogram across the diagonals and scatterplot with the regression lines in other cells
par(mfrow=c(1,1))
library(car)
scatterplotMatrix(rbigdp[,2:10],diagonal = "histogram",smooth = FALSE)
#############################
#Now we will plot the timeseries of GDP and Agriculture combined in a single plot
#setting the original margins for the plot window
par(mar=c(5,5,4,1))
par(mfrow=c(1,1))
#plotting the Gross/GDP series over time
plot(rbigdp$Gross,type="l",lwd=4,col="blue",xlab="no of quarterly periods",ylab="Gross/GDP & Agriculture",main="Plot of GDP & Agriculture over time",ylim=c(0,10000))
#superimposing the Agriculture timeseries on the above plot
lines(rbigdp$Agriculture,col="red",lwd=4)
#overlaying the points on the graph
points(rbigdp$Gross,pch=17)
points(rbigdp$Agriculture,pch=17)
#Adding text to the graph
text(20,6500,"GDP",col="blue",cex=2)
text(35,2800,"Agriculture",col="red",cex=2)
see the analysis/document in your browser by clicking at the link: Assignment1_CAEA_SavitaKulkarni_R
#########################################################