library(readxl) #for loading data
library(fitdistrplus) #for fitting mathematical curves
## Loading required package: MASS
## Loading required package: survival
library(LogisticCurveFitting) #for fitting logistic curve
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
df = read_excel("YEAR 2002-21-2.xlsx")
df.total = df[37,]
df = df[-(37:40),]
df.total = ts(as.numeric(unlist(df.total[-1])),start=2002)/1000
plot(df.total,main="Line Plot of Cyber-Crimes indt India (2002-2021)",ylab="No. of Crimes('000)",xlab="Year")
t=0:19
#creating data frame for easy use
d.total = data.frame(t,df.total)
#linear growth model y(t) = a + bt
poly1 = lm(df.total~t,data=d.total)
lin.fit = poly1$coefficients
linear.growth.model = function(t) lin.fit[1] + lin.fit[2]*t
#quadratic growth model y(t) = a + bt + ct^2
poly2 = lm(df.total~t+I(t^2),data=d.total)
quad.fit = poly2$coefficients
quad.growth.model = function(t) {
quad.fit[1] + quad.fit[2]*t +quad.fit[3]*t^2
}
#exponential growth model y(t) = a*exp(rt)
est.lambda = 1/mean(df.total) #mle of lambda
exp.model = lm(log(df.total)~t,data=d.total)
exp.fit = exp.model$coefficients
exp.growth.model = function(t){
exp(exp.fit[1] + exp.fit[2]*t)
}
#gompertz growth model
plot(t,df.total,main="Fitting Mathematical Curves to the Cyber Crime data",xlim = c(0,23),ylim=c(0,100),ylab="No. of Crimes(in '000)",xlab="Year",xaxt="n")
axis(1, at = seq(0,23,3), labels = seq(2002,2025,3))
curve(linear.growth.model,col="blue",add=TRUE)
curve(quad.growth.model,col="green",add=TRUE)
curve(exp.growth.model,col="red",add=TRUE)
summary(poly1)
##
## Call:
## lm(formula = df.total ~ t, data = d.total)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.279 -8.915 -2.246 7.381 16.888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.3746 4.2387 -2.684 0.0152 *
## t 2.4979 0.3814 6.549 3.73e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.836 on 18 degrees of freedom
## Multiple R-squared: 0.7044, Adjusted R-squared: 0.688
## F-statistic: 42.89 on 1 and 18 DF, p-value: 3.734e-06
summary(poly2)
##
## Call:
## lm(formula = df.total ~ t + I(t^2), data = d.total)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4684 -2.4011 0.0816 2.5538 6.7140
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.33420 2.25325 2.367 0.03 *
## t -3.07169 0.54969 -5.588 3.27e-05 ***
## I(t^2) 0.29314 0.02793 10.496 7.58e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.7 on 17 degrees of freedom
## Multiple R-squared: 0.9605, Adjusted R-squared: 0.9558
## F-statistic: 206.6 on 2 and 17 DF, p-value: 1.182e-12
summary(exp.model)
##
## Call:
## lm(formula = log(df.total) ~ t, data = d.total)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.93241 -0.28659 -0.01763 0.21574 1.40458
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.61777 0.22490 -7.193 1.08e-06 ***
## t 0.29705 0.02024 14.678 1.85e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5219 on 18 degrees of freedom
## Multiple R-squared: 0.9229, Adjusted R-squared: 0.9186
## F-statistic: 215.5 on 1 and 18 DF, p-value: 1.851e-11
n.zone = sort(c("Haryana", "Himachal Pradesh", "Jammu & Kashmir", "Punjab", "Rajasthan", "Delhi UT","Chandigarh"))
c.zone = sort(c("Uttar Pradesh", "Chhattisgarh", "Uttarakhand", "Madhya Pradesh")) #central zone
e.zone = sort(c("Bihar", "Jharkhand", "Odisha" , "West Bengal")) #eastern zone
ne.zone = sort(c("Sikkim","Tripura","Arunachal Pradesh","Assam","Mizoram","Manipur","Meghalaya","Nagaland"))
w.zone = sort(c("Goa", "Gujarat", "Maharashtra", "Daman & Diu", "D&N Haveli"))
s.zone = sort(c("Andhra Pradesh", "Karnataka", "Kerala", "Tamil Nadu", "Puducherry","A & N Islands","Lakshadweep","Telangana"))
states = unlist(df[1])
# writing the counts in percentage per 1000
zonal.df = function(zone){
cbind(year= 2002:2021, colSums(matrix(as.numeric(unlist(df[which(states %in% zone),][-1])),nrow=length(zone),byrow=FALSE))/c(df.total))
}
z.data = data.frame(n.crimes = round(rbind(north = zonal.df(n.zone),central = zonal.df(c.zone),east = zonal.df(e.zone),north.east = zonal.df(ne.zone),west = zonal.df(w.zone),
south = zonal.df(s.zone)),8),zone = rep(c("north","central","east","north-east","west","south"),rep(20,6)))
ggplot(z.data,aes(x=n.crimes.year,y=n.crimes.V2,col=zone))+
geom_line()
south.df = ts(zonal.df(s.zone)[,2],start = 2002)
plot(south.df)
l1 = lag(c(south.df),k=1)[-1] - south.df[-1]
plot(l1,type="l")
l1 = lag(l1,k=1)[-1] - l1[-1]
plot(l1,type="l")
l1 = lag(l1,k=1)[-1] - l1[-1]
plot(l1,type="l")
acf(south.df,type="correlation",main="ACF Plot")
acf(south.df,type="covariance",main="AVCF Plot")
acf(south.df,type="partial",main="PACF Plot")
p = cut-off point of PACF d = lag such that the ts is stationary q = cut-off point of ACF