Library and Data

#library
library(car)
## Loading required package: carData
library(reshape2)
library(ggplot2)
library(sae)
## Warning: package 'sae' was built under R version 4.3.2
## Loading required package: MASS
## Loading required package: lme4
## Loading required package: Matrix
#data
data("grapes")
head(grapes)
##   grapehect      area  workdays         var
## 1  30.94776  203.9378  73.95884  21.5305443
## 2  57.21614  187.2251 148.21444 201.4299002
## 3  73.75407  590.7302 171.34921   2.8182181
## 4  66.24203  318.3233 105.86333  21.3808940
## 5  36.93180  217.2579  87.83813  64.2439319
## 6  78.53393 1562.0299 202.68657   0.1629171

Direct Estimation

#estimasi langsung
dir<-grapes$grapehect
head(dir)
## [1] 30.94776 57.21614 73.75407 66.24203 36.93180 78.53393
#rse direct esimation
rsedir<-round(sqrt(grapes$var)/dir*100, 4)
head(rsedir)
## [1] 14.9933 24.8052  2.2762  6.9804 21.7028  0.5140

Korelasi Auxiliary Variables

cordata <- data.matrix(grapes[-4])
cormat <- round(cor(cordata, method = "pearson"),2)
melted_cormat <- melt(cormat)


# Peroleh segitiga bawah dari matriks korelasi
  get_lower_tri<-function(cormat){
    cormat[upper.tri(cormat)] <- NA
    return(cormat)
  }
# Peroleh segitiga atas dari matriks korelasi
  get_upper_tri <- function(cormat){
    cormat[lower.tri(cormat)]<- NA
    return(cormat)
  }

upper_tri <- get_upper_tri(cormat)
# Atur matriks korelasi
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Buat ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white",
   midpoint = 0, limit = c(-1,1), space = "Lab",
    name="Pearson\nCorrelation") +
  theme_minimal()+ # minimal theme
 theme(axis.text.x = element_text(angle = 45, vjust = 1,
    size = 9, hjust = 1))+
 coord_fixed()

ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 2) +
theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.6, 0.7),
  legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                title.position = "top", title.hjust = 0.5))

cor.test(grapes$grapehect,grapes$area,
         method = "pearson",
         alternative = "two.sided")
## 
##  Pearson's product-moment correlation
## 
## data:  grapes$grapehect and grapes$area
## t = 4.7049, df = 272, p-value = 4.047e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1610687 0.3804642
## sample estimates:
##       cor 
## 0.2743325
cor.test(grapes$grapehect,grapes$workdays,
         method = "pearson",
         alternative = "two.sided")
## 
##  Pearson's product-moment correlation
## 
## data:  grapes$grapehect and grapes$workdays
## t = 17.117, df = 272, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6577547 0.7726881
## sample estimates:
##       cor 
## 0.7201253

Non-multikolinearitas

reg<-lm(grapehect~area+workdays, data = grapes)
vif(reg)
##     area workdays 
## 1.368868 1.368868

EBLUP FH

#eblup fh
model<-eblupFH(grapehect ~ area + workdays, var,
               method = "REML",data=grapes)
fh<-model$eblup
head(fh)
##       [,1]
## 1 30.90838
## 2 65.54759
## 3 73.85757
## 4 62.69932
## 5 37.28492
## 6 78.54235
#estimasi koefisien regresi
model$fit$estcoef
##                   beta   std.error    tvalue        pvalue
## (Intercept) -5.7495585 2.321169658 -2.477009  1.324885e-02
## area        -0.0104852 0.001842961 -5.689322  1.275444e-08
## workdays     0.5221005 0.018185978 28.708962 2.948893e-181
#rse eblup fh
mse<-mseFH(grapehect ~ area + workdays, var, method = "REML",data = grapes)
rsefh<-round(sqrt(mse$mse)/fh*100,4)
head(rsefh)
##      [,1]
## 1 13.6814
## 2 12.5837
## 3  2.2433
## 4  6.7209
## 5 16.9025
## 6  0.5135

Komparasi

Statisitik Deskriptif Estimasi

cbind(summary(dir),summary(fh))
##  Direct Estimation    EBLUP Fay-Herriot   
##  "Min.   :  0.2478  " "Min.   :  0.6299  "
##  "1st Qu.: 38.9887  " "1st Qu.: 44.0792  "
##  "Median : 60.3268  " "Median : 59.8382  "
##  "Mean   : 69.5096  " "Mean   : 65.6598  "
##  "3rd Qu.: 85.7415  " "3rd Qu.: 79.4917  "
##  "Max.   :342.9919  " "Max.   :233.4081  "

Statistik Deskriptif RSE

cbind(summary(rsedir), summary(rsefh))
##  RSE Direct Estimation RSE EBLUP Fay-Herriot
##  "Min.   :    0.190  " "Min.   : 0.1904  "  
##  "1st Qu.:    9.398  " "1st Qu.: 7.4045  "  
##  "Median :   21.608  " "Median :11.5622  "  
##  "Mean   :  176.457  " "Mean   :12.5044  "  
##  "3rd Qu.:   56.604  " "3rd Qu.:16.1095  "  
##  "Max.   :18307.695  " "Max.   :48.9311  "

Plot Direct Estimation vs EBLUP FH

ggplot(est, aes(x = as.factor(kategori), y = est, fill = as.factor(kategori))) + 
  stat_boxplot(geom = "errorbar",
               width = 0.25) + 
  geom_boxplot() +
  scale_x_discrete(labels=c("Direct Estimation", "EBLUP FH")) +
  labs(x="", y="Estimasi") +
  scale_fill_discrete(labels=c("Direct Estimation", "EBLUP FH")) +
  theme(legend.title = element_blank())

Plot RSE Direct Estimation vs EBLUP FH

# Scatter plot by group
ggplot(rse, aes(x = id, y = RSE, color = as.factor(Kategori))) +
  geom_point() +
  labs(x="") +
  scale_y_log10() +
  scale_color_discrete(labels = c("Direct Estimation", "EBLUP FH")) +
  theme(legend.title=element_blank())