Library packages
Before we start the analysis, here are several packages needed to carry out data analysis using nonparametric regression with the Fourier series approach. However, we have to make sure that all the packages have been installed. so that we can call the packages as follows.
library("readxl")
library(gtools)
library(MASS)
library(pracma)
##
## Attaching package: 'pracma'
## The following object is masked from 'package:gtools':
##
## logit
Scatterplots
par(mfrow = c(2, 3))
plot(data$CAR,data$ROA,pch=19, col="red", xlab="CAR",ylab="ROA")
plot(data$NPL,data$ROA,pch=19, col="Red", xlab="NPL",ylab="ROA")
plot(data$NIM,data$ROA,pch=19, col="Red", xlab="NIM",ylab="ROA")
plot(data$BOPO,data$ROA,pch=19, col="Red", xlab="BOPO",ylab="ROA")
plot(data$LDR,data$ROA,pch=19, col="Red", xlab="LDR",ylab="ROA")

Based on the scatterplots, we can see that the relationship between ROA and variables CAR, NPL, NIM, and LDR does't form a particular curve. Therefore, variables CAR, NPL, NIM, and LDR we assume follow the nonparametric regression with Fourier serries approach. Furthermore, for variable BOPO tend to be linear relationship. However, we cannot immediately say that ROA and BOPO have a linear relationship without further analysis. we try to compare between when BOPO is modeled using linear parametric regression and nonparametric regression with Fourier series approach as follows.
Linear Parametric Regression for variable BOPO
x4=as.matrix(data[,8])
reg<-lm(y~x4, data)
summary(reg)
##
## Call:
## lm(formula = y ~ x4, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.8225 -0.2467 -0.1097 0.2202 5.0568
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.882480 0.581155 11.84 2.01e-15 ***
## x4 -0.065995 0.005731 -11.52 5.22e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.408 on 45 degrees of freedom
## Multiple R-squared: 0.7466, Adjusted R-squared: 0.741
## F-statistic: 132.6 on 1 and 45 DF, p-value: 5.22e-15
Nonparametric Regression with Fourier Series Approach for Variable BOPO
fourierseries<-function(y, x4, K)
{
n=length(y)
a<-(1*(K+1))+1
Z<-matrix(0, n, a)
result<-matrix(0, K, 4)
for (k in 1:K)
{
for(i in 1:n)
{
for(j in 1:k)
{
Z[i,1]=1/2
Z[i,2]<-x4[i]
Z[i,2+j]=cos(j*x4[i])
}
}
I<-diag(1,n,n)
M<- Z%*%ginv(t(Z)%*%Z)%*%t(Z)
Ytop<-M%*%y
W<-(1/n)*I
MSE<-t(y-Ytop) %*% W %*% (y-Ytop)
Q<-(n^-1*sum(diag(I-M)))^2
GCV<-MSE/Q
SSE<-sum((y-Ytop)^2)
SST<-sum((y-mean(y))^2)
Rsq=(1-(SSE/(SST)))*100
result[k,1]<-k
result[k,2]<-GCV
result[k,3]<-Rsq
result[k,4]<-MSE
}
print(result)
GCV2<-min(result[,2])
s<-1
repeat{
if(result[s,2]==GCV2)
{
kOpt<-result[s,1]
GCVOpt<-GCV2
break
}
else s<-s+1
}
cat("optimal number of K is \t",kOpt,"\n")
cat("the smallest GCV value is \t",GCV2,"\n")
print(GCV2)
}
A=fourierseries(y, x4, K=10)
## [,1] [,2] [,3] [,4]
## [1,] 1 2.092409 75.51785 1.833818
## [2,] 2 2.190699 75.51967 1.833682
## [3,] 3 2.147957 77.10073 1.715254
## [4,] 4 2.217339 77.47331 1.687346
## [5,] 5 2.040108 80.27256 1.477670
## [6,] 6 1.999289 81.62183 1.376604
## [7,] 7 1.803323 84.26240 1.178814
## [8,] 8 1.884664 84.40681 1.167997
## [9,] 9 1.846644 85.53608 1.083409
## [10,] 10 1.841727 86.36488 1.021329
## optimal number of K is 7
## the smallest GCV value is 1.803323
## [1] 1.803323
Based on the analysis, we obtain the Rsquare value of 74.66% using linear parametric regression and 75.52% using nonparametric regression with Fourier series approach for the number of K (oscillation parameters) is 1. If we focus on the smallest GCV value, then we even obtain the higher Rsquare value of 84.26 when the number of K is 7. Since the Rsquare value using nonparametric regression with Fourier series approach is higher than linear parametric regression. Therefore, variable BOPO is better to be modelled using nonparametric regression with Fourier series approach.
MAIN RESULT USING ALL THE PREDICTOR VARIABLES
Combinations Number of K
p=ncol(x) #number of predictor variables
K=5 #number of K
K.com=permutations(K, p, c(1:K), repeats.allowed = TRUE)
GCV Value and Hypothesis testing for all the combinations of K
Best=data.frame(matrix(0,1,19))
colnames(Best)=c("Var x1", "Var x2", "Var x3", "Var x4", "Var x5", "K.opt Var x1", "K.opt Var x2", "K.opt Var x3", "K.opt Var x4", "K.opt Var x5", "GCV", "MSE", "R.sq (%)", "d1", "d2", "Fvalue", "Ftable", "Pvalue", "Decisions")
X.first=c(1:(K+1))
Best[,1:p]=colnames(x)
h=0
for (k in 1:nrow(K.com))
{
X=matrix(0,nrow(x))
for (l in 1:p)
{
X1=matrix(0,nrow(x))
for (f in 1:K.com[k,l])
{
cosine=cos(X.first[f]*x[,l])
X1=cbind(X1,cosine)
}
X1=X1[,-1]
X1=cbind(1/2,x[,l],X1)
X=cbind(X,X1)
}
X=X[,-1]
GCV=0; R.sq=0; MSE=0; d1=0; d2=0; Fvalue=0; Ftable=0; Pvalue=0; Decision=0
I=diag(n)
V=X%*%ginv((t(X)%*%X))%*%t(X)
MSE=(n^-1)*t(y)%*%t(I-V)%*%(I-V)%*%y
GCV.all=MSE/(((n^-1)*sum(diag(I-V)))^2)
B=ginv((t(X)%*%X))%*%t(X)%*%y
y.pred=V%*%y
R2=(1-(sum((y-y.pred)^2)/sum((y-mean(y))^2)))*100
mse=sum((y-y.pred)^2)/(n)
MSE=rbind(MSE,mse)
R.sq=rbind(R.sq,R2)
GCV=rbind(GCV,GCV.all)
d1.all=sum(diag(V))
d2.all=n-sum(diag(V))
F1=t(B)%*%t(X)%*%y/d1.all
F2=t(y-X%*%B)%*%(y-X%*%B)/d2.all
Fvalue.all=F1/F2
alfa=0.05
Ftable.all=qf(alfa,d1.all,d2.all, lower.tail = FALSE)
Pvalue.all=pf(Fvalue.all,d1.all,d2.all, lower.tail= FALSE)
Decision.all=if(Fvalue.all>=Ftable.all) "H0 Rejected" else "H0 Accepted"
d1=rbind(d1,d1.all)
d2=rbind(d2,d2.all)
Fvalue=rbind(Fvalue,Fvalue.all)
Ftable=rbind(Ftable,Ftable.all)
Pvalue=rbind(Pvalue,Pvalue.all)
Decision=rbind(Decision,Decision.all)
list=cbind(GCV[-1],MSE[-1],R.sq[-1],d1[-1],d2[-1],Fvalue[-1],Ftable[-1],Pvalue[-1],Decision[-1])
best_comb=which(list[,1]==min(list[,1]))
hh=list[best_comb,]
h=rbind(h,hh)
}
Optimal=h[-1,]
rr=matrix(Optimal,nrow=nrow(Optimal))
list2=cbind(K.com,rr)
colnames(list2)=c("K.opt Var x1", "K.opt Var x2", "K.opt Var x3", "K.opt Var x4", "K.opt Var x5", "GCV", "MSE", "R.sq (%)", "d1", "d2", "Fvalue", "Ftable", "Pvalue", "Decisions")
The Optimum Combinations of K Based on the Smallest GCV Value
K.optimum=which(list2[,6]==min(list2[,6]))
ss=as.vector(list2[K.optimum,])
Best[,6:19]=ss
Best_combination=Best
Parameters Estimation for the Optimal Combinations of K
x.opt=matrix(0,ncol=1,nrow=nrow(data))
for (xb in 1:p)
{
tb=data[Best_combination[,xb]]
x.opt=cbind(x.opt,tb)
}
x.opt=as.matrix(x.opt[-1])
K.opt=matrix(0,ncol=1,nrow=1)
for (zb in 6:10)
{
Kb=Best_combination[,zb]
K.opt=cbind(K.opt,Kb)
}
K.opt=as.matrix(K.opt[-1])
rownames(K.opt)=c("K.opt Var x1", "K.opt Var x2", "K.opt Var x3", "K.opt Var x4", "K.opt Var x5")
X.opt=matrix(0,nrow(x.opt))
for (ll in 1:p)
{
X1.opt=matrix(0,nrow(x.opt))
for (ff in 1:K.opt[ll])
{
cosine=cos(X.first[ff]*x.opt[,ll])
X1.opt=cbind(X1.opt,cosine)
}
X1.opt=X1.opt[,-1]
X1.opt=cbind(1/2,x.opt[,ll],X1.opt)
X.opt=cbind(X.opt,X1.opt)
}
X.opt=X.opt[,-1]
V.opt=X.opt%*%ginv(t(X.opt)%*%X.opt)%*%t(X.opt)
MSE.opt=n^-1*t(y)%*%t(I-V.opt)%*%(I-V.opt)%*%y
GCV.opt=MSE.opt/((n^-1*sum(diag(I-V.opt)))^2)
y.hat=V.opt%*%y
R2.opt=(1-(sum((y-y.hat)^2)/sum((y-mean(y))^2)))*100
mse.opt=sum((y-y.hat)^2)/(n)
B.opt=ginv(t(X.opt)%*%X.opt)%*%t(X.opt)%*%y
colnames(B.opt)=c("Estimated Parameters")
B.opt
## Estimated Parameters
## [1,] 2.276220551
## [2,] 0.020118474
## [3,] 0.034500044
## [4,] -0.386612382
## [5,] -0.351531158
## [6,] -0.136568689
## [7,] -0.701818780
## [8,] 2.276220551
## [9,] -0.045297709
## [10,] -0.661289886
## [11,] -0.637496622
## [12,] 2.276220551
## [13,] 0.171282530
## [14,] -0.213379896
## [15,] 0.130962100
## [16,] -0.450686292
## [17,] 2.276220551
## [18,] -0.068193968
## [19,] -0.414151940
## [20,] -0.423457021
## [21,] 2.276220551
## [22,] -0.001449064
## [23,] -0.265330032
Hypothesis Testing for the Optimal Combinations of K
d1.opt=sum(diag(V.opt))
d2.opt=n-sum(diag(V.opt))
F1.opt=t(B.opt)%*%t(X.opt)%*%y/d1.opt
F2.opt=t(y-X.opt%*%B.opt)%*%(y-X.opt%*%B.opt)/d2.opt
Fvalue.opt=F1.opt/F2.opt
Ftable.opt=qf(alfa,d1.opt,d2.opt, lower.tail = FALSE)
Pvalue.opt=pf(Fvalue.opt,d1.opt,d2.opt, lower.tail= FALSE)
cat("\n MSE =",mse.opt,"\n")
##
## MSE = 0.4880899
cat("\n GCV =",GCV.opt,"\n")
##
## GCV = 1.375243
cat("\n Degrees of Freedom 1 =",d1.opt,"\n")
##
## Degrees of Freedom 1 = 19
cat("\n Degrees of Freedom 2 =",d2.opt,"\n")
##
## Degrees of Freedom 2 = 28
cat("\n Ftest =",Fvalue.opt,"\n")
##
## Ftest = 22.30987
cat("\n Ftable =",Ftable.opt,"\n")
##
## Ftable = 1.972027
cat("\n P Value =",Pvalue.opt,"\n")
##
## P Value = 3.822656e-12
cat("\n R Square =",R2.opt,"%","\n")
##
## R Square = 93.48382 %
cat("\n Optimal Combinatios of K =",ss[1:p],"\n")
##
## Optimal Combinatios of K = 5 2 3 2 1
Decision
if (Fvalue.opt >= Ftable.opt) "The null hypothesis is rejected (at least one of the parameters is not zero)" else "The null hypothesis fails to be rejected (all the parameters are zero)"
## [1] "The null hypothesis is rejected (at least one of the parameters is not zero)"