VISUALISASI DATA DAN PEMBANGKITAN BILANGAN ACAK

Bab 1 Visualisasi Data

Selain untuk analisis statistika formal, R juga digunakan untuk membuat grafik. R sudah menyediakan banyak fungsi grafik. Package standar grafik adalah graphics, tetapi terdapat beberapa package graphics lain seperti: lattice, grid, dan ggplot2

Grafik

Package standar grafik adalah graphics. Perintah dasar grafik pada package ini adalah plot

#dasar plot
x <- 1:40
y <- rnorm(40, 5,1)

plot(x,y,type="p")

plot(x, y, type="o")

plot(x,y,type="n")

plot(x,y,type="p",xlab="Sumbu x",ylab="Sumbu y",main="Bilangan Acak Normal",col=2,pch=16)

plot(x,y,type="p",xlab="Sumbu x",ylab="Sumbu y",main="Bilangan Acak Normal",col=rainbow(40),pch=16,cex=2,xlim=c(0,50),ylim=c(2.5,7.5))

#Menambahkan amatan
x1 <- 41:50
y1 <- rnorm(10,5,1)
points(x1,y1,cex=2)

#Menambahkan garis
x2 <- rep(40,5,20)
y2 <- seq(min(c(y,y1)),max(c(y,y1)),length=20)
lines(x2,y2,col=4,lwd=2,lty=2)

abline(h=mean(y),col="red",lwd=2.5)
abline(a=2,b=1/10,col="maroon3",lwd=2,lty=2)

#menambahkan tanda panah
arrows(x0=30,y0=3.5,x1=40,y1=mean(y)-.1,lwd = 2)

#menambahkan tulisan
text(x=19,y=3.3,labels="Titik potong",cex=0.7)

text(x=3,y=7.3,labels="Data awal",cex=0.7)
text(x=46,y=7.3,labels="Data baru",cex=0.7)

Latihan 1

plot(sin,-pi, 2*pi)

plot(table(rpois(100,5)),type="h",col="red",
lwd=1,main="rpois(100,lambda=5)")

Latihan 2

a1 <- 1:25
a2 <- rnorm(25,4,2)
plot(a1,a2,pch="w",main="W")

Latihan 3

plot(a1,a2,type="n",main="W")
text(a1,a2,labels=paste("w",1:25,sep=""),col=rainbow(25),cex=0.8)

Latihan 4

x <- rchisq(100,df=4)
hist(x,freq=FALSE,ylim=c(0,0.2))
curve(dchisq(x,df=4),col=2,lty=2,lwd=2,add=TRUE)

par(mfrow=c(2,2))
plot(1:40,y,type="p",xlab="Sumbu x",ylab="Sumbu y",
main="Bilangan Acak Normal",col=2,pch=16)
plot(sin,-pi, 2*pi)
plot(table(rpois(100,5)),type="h",col="red",
lwd=1,main="rpois(100,lambda=5)")
plot(a1,a2,type="n",main="W")
text(a1,a2,labels=paste("w",1:25,sep=""),
col=rainbow(25),cex=0.8)

par(mfcol=c(2,2))
plot(1:40,y,type="p",xlab="Sumbu x",ylab="Sumbu y",
main="Bilangan Acak Normal",col=2,pch=16)
plot(sin,-pi, 2*pi)
plot(table(rpois(100,5)),type="h",col="red",lwd=1,
main="rpois(100,lambda=5)")
plot(a1,a2,type="n",main="W")
text(a1,a2,labels=paste("w",1:25,sep=""),
col=rainbow(25),cex=0.8)

Latihan 5

windows()
yb<-rnorm(100,5,1)
split.screen(c(2,2))
## [1] 1 2 3 4
screen(3)
boxplot(yb)
title("BoxplotBilangan Acak Normal",cex.main=0.7)
screen(4)
xb<-1:100
plot(xb,yb,type="l",lwd=2,col="blue")
title("Line Plot Bilangan Acak Normal",cex.main=0.7)
screen(2)
hist(yb,freq=FALSE,main=NULL,ylim=c(0,0.5))
x <-yb
curve(dnorm(x,5,1),col="red",lty=2,lwd=2,add=TRUE)
title("Histogram Bilangan Acak Normal",cex.main=0.7)
screen(1)
plot(xb,yb,pch=16,col=rainbow(100))
title("Scatter Plot Bilangan Acak Normal",cex.main=0.7)

Latihan 6

#Ilustrasi mtext:
f <- function(x) cumsum(x)
  x <- 1:20
  y <- f(x)
  plot(x, y, xlab = "", ylab = "")
  mtext("Plotting the expression",
  side = 3, line = 2.5)
    mtext(expression(y[i] == sum(x[j
    ],j==1,i)), side = 3, line = 0)
  mtext("The first variable", side
  = 1, line = 3)
  mtext("The second variable",
  side = 2, line = 3)

#setting Parameter
x <- rnorm(10); y<- rnorm (10)
tmp <- par(mfrow=c(2,2))
plot(x=1:10,x)
plot(x=1:10,y)
boxplot(x)
boxplot(y)

par(tmp)

#split screen
split.screen(c(2,2))
## [1] 1 2 3 4
 screen(1)
 plot(x=1:10,x)
 screen(2)
 plot(x=1:10,y)
 screen(3)
 boxplot(x)
 screen(4)
 boxplot(y)

 close.screen(all=TRUE)
 
#layout
rancangan <- matrix(c(1,1,3,1,1,3,2,2,4,2,2,4), nrow=3)
rancangan
##      [,1] [,2] [,3] [,4]
## [1,]    1    1    2    2
## [2,]    1    1    2    2
## [3,]    3    3    4    4
obj <- layout(rancangan)
  layout.show(obj)

  plot(x=1:10,x)
  plot(x=1:10,y)
  boxplot(x,horizontal = TRUE)
  boxplot(y,horizontal = TRUE)

layout(1)  

#Ilustrasi menggunakan Data: mtcars (Motor Trend Car Road Tests)
datasets::mtcars
library(ggplot2)
qplot(factor(gear), data=mtcars)

dtx <- data.frame(x=rnorm(100))
qplot(x,data=dtx)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(factor(gear), factor(cyl), data=mtcars)

qplot(mpg, wt, data = mtcars)

qplot(factor(cyl), wt, data = mtcars, geom = c("boxplot","jitter"))

ggplot(mpg, aes(displ, hwy
)) + geom_point()

ggplot(mpg, aes(displ, hwy
)) + layer(geom = "point",
stat = "identity",
position = "identity",
params = list(na.rm = FALSE)
)

Bab 2 Pembangkitan Bilangan Acak

Pembangkitan bilangan acak merupakan alat yang diperlukan dalam komputasi statistik, umumnya untuk simulasi. Bilangan acak yang dibangkitkan merupakan pseudorandom (acak semu). Bilangan acak yg dibangkitkan memenuhi sebaran statistik tertentu (pdf/pmf, cdf). Semua metode pembangkitkan bilangan acak tergantung dari pembangkitan bilangan acak uniform.

R telah menyiapkan banyak fungsi untuk membangkitkan data berdasarkan sebaran Fungsi. Umumnya dimulai dengan huruf r diikuti dengan nama sebaran

Contoh: Data sebaran pseudo normal: rnorm Data sebaran pseudo seragam: runif

Pembangkit bilangan menggunakan seed yang umumnya mengambil waktu di komputer. Luaran dari pembangkit bilangan acak akan sama untuk nilai seed yang sama.

#pembangkitanbilanganacak
x <-rnorm(10) #x~N(0,1)
x1 <-rnorm(10,3,2) #x1~N(3,sd=2)
x2 <-rbinom(10,1,0.4) #x2~bernoulli(0.4)
#mencarinilaipeluangkumulatifpeubahacak
p1 <-pnorm(1.645) #P(Z<1.645)=0.95
p2 <-pnorm(1.96) #P(Z<1.975)=0.975
p3 <-pnorm(-1.96)
p4 <-pf(15,df1=10,df2=15)
#mencarinilaikuantildaripeluangpeubahacak
q1 <-qnorm(0.975)
q2 <-qnorm(0.95,2,1) #X~N(2,1), P(X<x)=0.95
#mencarinilaidensity peubahacak
##plot density normal
a <-seq(-4,4,length=1000)
da <-dnorm(a)
plot(a,da)

Latihan 1

#Membangkitkan bilangan acak Eksponensial
set.seed(10)
#Eksponensial(lambda=3)
eks <-function(n,lambda){
U <-runif(n)
X <--log(1-U)/lambda
return(X)
}
yy1 <-eks(1000,3) #inverse transform method
yy2 <-rexp(1000,rate=3) #fungsi bawaan R
par(mfrow=c(1,2))
hist(yy1,main="Exp dari Inverse Transform")
hist(yy2,main="Exp dari fungsi rexp")

Latihan 2

#Bangkitkan bilangan acak yang memilikifkp𝑓𝑌𝑦=3𝑦2;0<𝑦<1 menggunakan acceptance-rejection method!
ar <-function(n,x0,x1,f) {
xx <-seq(x0,x1,length=10000)
c<-max(f(xx))
terima <-0;iterasi <-0
hasil <-numeric(n)
while(terima<n) {
x <-runif(1,x0,x1)
y1<-runif(1,0,c)
y2<-f(x)
if(y1<=y2) {
terima <-terima+1
hasil[terima]<-x}
iterasi <-iterasi+1}
list(hasil=hasil,iterasi=iterasi)
}
set.seed(10)
f <-function(x) {3*x^2}
yyy <-ar(100,0,1,f)
yyy
## $hasil
##   [1] 0.8647212 0.7751099 0.8382877 0.7707715 0.5355970 0.8613824 0.2036477
##   [8] 0.7979930 0.7438394 0.3443435 0.9837322 0.6935082 0.6331153 0.8880315
##  [15] 0.7690405 0.6483695 0.8795432 0.9360689 0.7233519 0.7620444 0.9868082
##  [22] 0.8760261 0.7240640 0.8140516 0.5588949 0.8900940 0.7456896 0.8480646
##  [29] 0.8703302 0.8223331 0.8508123 0.7709219 0.8953595 0.5803863 0.5982260
##  [36] 0.9235285 0.7367755 0.6898170 0.8301572 0.9293209 0.9095163 0.5347576
##  [43] 0.3478601 0.8759762 0.7286815 0.8749293 0.6988356 0.8312562 0.5572238
##  [50] 0.6647687 0.7400502 0.9806898 0.3800746 0.7553169 0.5184889 0.8879149
##  [57] 0.9177773 0.8084086 0.8537441 0.4232184 0.7604306 0.3405763 0.3886568
##  [64] 0.4774175 0.5387605 0.9485434 0.7124685 0.9081691 0.9457656 0.7716899
##  [71] 0.6946655 0.5368832 0.8481593 0.8242752 0.5123742 0.3152032 0.9924487
##  [78] 0.9327120 0.9892809 0.6283590 0.5254605 0.8810815 0.5291748 0.5765517
##  [85] 0.7231807 0.8761180 0.3995670 0.8986123 0.9335217 0.7859216 0.7784128
##  [92] 0.6955333 0.9060413 0.9916424 0.4729846 0.9770567 0.9386110 0.9959093
##  [99] 0.8543663 0.8309547
## 
## $iterasi
## [1] 322
par(mfrow=c(1,1))
hist(yyy$hasil, main="f(x)=3*x^2",prob=T)
x <-seq(0, 1, 0.01)
lines(x, f(x), lwd=2, col=4)

Latihan 3

#Pembangkitan Bilangan Acak untuk Model Analisis Regresi
set.seed(123)
X1 <-runif(25,10,25)
X2 <-runif(25,90,200)
Y <-10 + 2.3*X1 + 0.7*X2 + rnorm(25,0,9)
model1 <-lm(Y~X1)
summary(model1)
## 
## Call:
## lm(formula = Y ~ X1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.991 -17.738  -2.632  17.896  33.171 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  131.846     19.741   6.679 8.18e-07 ***
## X1             1.049      1.015   1.033    0.312    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.42 on 23 degrees of freedom
## Multiple R-squared:  0.04433,    Adjusted R-squared:  0.002775 
## F-statistic: 1.067 on 1 and 23 DF,  p-value: 0.3124
model2 <-lm(Y~X2)
summary(model2)
## 
## Call:
## lm(formula = Y ~ X2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.804  -4.850  -1.606  10.011  20.569 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 70.83913   13.86879   5.108 3.57e-05 ***
## X2           0.58213    0.09767   5.960 4.46e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.38 on 23 degrees of freedom
## Multiple R-squared:  0.607,  Adjusted R-squared:  0.5899 
## F-statistic: 35.52 on 1 and 23 DF,  p-value: 4.464e-06
model3 <-lm(Y~X1+X2)
summary(model3)
## 
## Call:
## lm(formula = Y ~ X1 + X2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.8291  -4.5994  -0.4576   5.0602  20.5307 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.62881   13.40229  -0.047    0.963    
## X1           2.72951    0.40701   6.706 9.67e-07 ***
## X2           0.72459    0.06105  11.869 4.91e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.427 on 22 degrees of freedom
## Multiple R-squared:  0.8709, Adjusted R-squared:  0.8592 
## F-statistic: 74.21 on 2 and 22 DF,  p-value: 1.66e-10
model4 <-lm(Y~X1:X2)
summary(model4)
## 
## Call:
## lm(formula = Y ~ X1:X2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.929  -7.009  -1.430   2.856  37.374 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 80.645221  10.481428   7.694 8.32e-08 ***
## X1:X2        0.027491   0.003929   6.997 3.94e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.97 on 23 degrees of freedom
## Multiple R-squared:  0.6804, Adjusted R-squared:  0.6665 
## F-statistic: 48.96 on 1 and 23 DF,  p-value: 3.942e-07
model5 <-lm(Y~X1*X2)
summary(model5)
## 
## Call:
## lm(formula = Y ~ X1 * X2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.4425  -4.5808  -0.6702   4.6431  20.2409 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 11.625896  37.941991   0.306   0.7623  
## X1           2.063573   1.967526   1.049   0.3062  
## X2           0.635870   0.263687   2.411   0.0251 *
## X1:X2        0.004905   0.014165   0.346   0.7326  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.6 on 21 degrees of freedom
## Multiple R-squared:  0.8716, Adjusted R-squared:  0.8533 
## F-statistic: 47.53 on 3 and 21 DF,  p-value: 1.548e-09
R2 <-matrix(c(summary(model1)$r.squared,summary(model1)$adj.r.squared,summary(model2)$r.squared,summary(model2)$adj.r.squared,summary(model3)$r.squared,summary(model3)$adj.r.squared,summary(model4)$r.squared,summary(model4)$adj.r.squared,summary(model5)$r.squared,summary(model5)$adj.r.squared),5,byrow=T)
colnames(R2)<-c("R2","R2.adj"); R2*100
##             R2     R2.adj
## [1,]  4.432592  0.2774876
## [2,] 60.699195 58.9904640
## [3,] 87.090357 85.9167529
## [4,] 68.036265 66.6465370
## [5,] 87.163648 85.3298839
coef(model3)
## (Intercept)          X1          X2 
##  -0.6288095   2.7295126   0.7245911
confint(model3)
##                   2.5 %     97.5 %
## (Intercept) -28.4234482 27.1658292
## X1            1.8854322  3.5735929
## X2            0.5979779  0.8512044
cbind(coef(model3),confint(model3))
##                              2.5 %     97.5 %
## (Intercept) -0.6288095 -28.4234482 27.1658292
## X1           2.7295126   1.8854322  3.5735929
## X2           0.7245911   0.5979779  0.8512044
anova(model3)
par(mfrow=c(2,2)); plot(model3)