library(cluster)
library(MASS)
#install.packages("smacof")
library(smacof)
## Warning: package 'smacof' was built under R version 4.2.3
## Loading required package: plotrix
## Loading required package: colorspace
## Loading required package: e1071
##
## Attaching package: 'smacof'
## The following object is masked from 'package:base':
##
## transform
library(stats)
#install.packages("bios2mds")
library(bios2mds)
## Warning: package 'bios2mds' was built under R version 4.2.3
## Loading required package: amap
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:plotrix':
##
## rescale
## Loading required package: rgl
##
## Attaching package: 'rgl'
## The following object is masked from 'package:plotrix':
##
## mtext3d
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(vegan)
## Warning: package 'vegan' was built under R version 4.2.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.2.3
## Loading required package: lattice
## This is vegan 2.6-4
https://www.statology.org/multidimensional-scaling-in-r/#:~:text=The%20easiest%20way%20to%20perform,by%20the%20dist()%20function.
#Membuat data frame
df <- data.frame(points=c(4, 4, 6, 7, 8, 14, 16, 19, 25, 25, 28),
assists=c(3, 2, 2, 5, 4, 8, 7, 6, 8, 10, 11),
blocks=c(7, 3, 6, 7, 5, 8, 8, 4, 2, 2, 1),
rebounds=c(4, 5, 5, 6, 5, 8, 10, 4, 3, 2, 2))
#Menambahkan nama observasi
row.names(df) <- LETTERS[1:11]
#Menampilkan data frame
df
## points assists blocks rebounds
## A 4 3 7 4
## B 4 2 3 5
## C 6 2 6 5
## D 7 5 7 6
## E 8 4 5 5
## F 14 8 8 8
## G 16 7 8 10
## H 19 6 4 4
## I 25 8 2 3
## J 25 10 2 2
## K 28 11 1 2
#Menghitung matriks jarak
d <- dist(df, diag=T)
d
## A B C D E F G
## A 0.000000
## B 4.242641 0.000000
## C 2.645751 3.605551 0.000000
## D 4.123106 5.916080 3.464102 0.000000
## E 4.690416 4.898979 3.000000 2.645751 0.000000
## F 11.916375 13.038405 10.630146 7.937254 8.366600 0.000000
## G 14.035669 14.798649 12.409674 10.099505 10.344080 3.000000 0.000000
## H 15.588457 15.588457 13.784049 12.569805 11.269428 7.810250 7.874008
## I 22.181073 21.954498 20.420578 19.157244 17.832555 13.490738 12.922848
## J 22.781571 22.693611 21.213203 19.748418 18.520259 14.035669 13.784049
## K 26.076810 25.884358 24.474477 23.000000 21.771541 17.029386 16.522712
## H I J K
## A
## B
## C
## D
## E
## F
## G
## H 0.000000
## I 6.708204 0.000000
## J 7.745967 2.236068 0.000000
## K 10.908712 4.472136 3.316625 0.000000
#Menampilan MDS
fit <- cmdscale(d, eig=TRUE, k=2)
fit
## $points
## [,1] [,2]
## A -10.6617577 -1.2511291
## B -10.3858237 -3.3450473
## C -9.0330408 -1.1968116
## D -7.4905743 1.0578445
## E -6.4021114 -1.0743669
## F -0.4618426 4.7392534
## G 0.8850934 6.1460850
## H 4.7352436 -0.6004609
## I 11.3793381 -1.3563398
## J 12.0844168 -1.5494108
## K 15.3510585 -1.5696166
##
## $eig
## [1] 9.348267e+02 8.375979e+01 1.120286e+01 6.392515e+00 7.931699e-14
## [6] 2.508166e-14 2.121837e-14 5.886445e-15 1.800144e-15 -6.762063e-15
## [11] -4.355705e-14
##
## $x
## NULL
##
## $ac
## [1] 0
##
## $GOF
## [1] 0.983019 0.983019
#Mengesktrak koordinat (x, y)
x <- fit$points[,1]
y <- fit$points[,2]
#Membuat plot
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="Multidimensional Scaling Results", type="n")
#Menambahkan nama observasi sebagai label
text(x, y, labels=row.names(df))
#Menampilkan nilai pemain A dan C (berdekatan)
df[rownames(df) %in% c('A', 'C'), ]
## points assists blocks rebounds
## A 4 3 7 4
## C 6 2 6 5
#Menampilkan nilai pemain B dan K (berjauhan)
df[rownames(df) %in% c('B', 'K'), ]
## points assists blocks rebounds
## B 4 2 3 5
## K 28 11 1 2
#Menampilkan koordinat (x,y) titik-titik pada plot
fit
## $points
## [,1] [,2]
## A -10.6617577 -1.2511291
## B -10.3858237 -3.3450473
## C -9.0330408 -1.1968116
## D -7.4905743 1.0578445
## E -6.4021114 -1.0743669
## F -0.4618426 4.7392534
## G 0.8850934 6.1460850
## H 4.7352436 -0.6004609
## I 11.3793381 -1.3563398
## J 12.0844168 -1.5494108
## K 15.3510585 -1.5696166
##
## $eig
## [1] 9.348267e+02 8.375979e+01 1.120286e+01 6.392515e+00 7.931699e-14
## [6] 2.508166e-14 2.121837e-14 5.886445e-15 1.800144e-15 -6.762063e-15
## [11] -4.355705e-14
##
## $x
## NULL
##
## $ac
## [1] 0
##
## $GOF
## [1] 0.983019 0.983019
Interpretasi: - Pemain dari dataframe asli yang memiliki nilai yang sama di empat kolom (poin, assist, blok, dan rebound) akan ditempatkan berdekatan satu sama lain dalam plot.
Sebagai contoh, pemain A dan C terletak berdekatan satu sama lain. Nilai poin, assist, blok, dan rebound mereka sangat mirip, yang menjelaskan mengapa mereka terletak sangat berdekatan dalam plot 2-D.
Sebaliknya, perhatikan pemain B dan K yang terletak berjauhan dalam plot. Jika kita mengacu pada nilai mereka dalam kerangka data asli, kita dapat melihat bahwa mereka sangat berbeda.
Anda juga dapat mengekstrak koordinat (x, y) yang tepat untuk setiap pemain di dalam plot dengan mengetikkan fit, yang merupakan nama variabel tempat kita menyimpan hasil dari fungsi cmdscale()
#Menampilkan biplot
res <- mds(df)
## Warning in df[row(df) > col(df)] <- x: number of items to replace is not a
## multiple of replacement length
## Warning in df[row(df) > col(df)] <- x: number of items to replace is not a
## multiple of replacement length
## Warning in wghts * diss^2: longer object length is not a multiple of shorter
## object length
## Warning in wgths * d * dhat: longer object length is not a multiple of shorter
## object length
## Warning in dhat - d: longer object length is not a multiple of shorter object
## length
## Warning in wgths * diss: longer object length is not a multiple of shorter
## object length
## Warning in dhat - e: longer object length is not a multiple of shorter object
## length
## Warning in w * Result^2: longer object length is not a multiple of shorter
## object length
## Warning in dhat - e: longer object length is not a multiple of shorter object
## length
## Warning in dhat - confdiss: longer object length is not a multiple of shorter
## object length
res
##
## Call:
## mds(delta = df)
##
## Model: Symmetric SMACOF
## Number of objects: 11
## Stress-1 value: 0.428
## Number of iterations: 1
fitbi <- biplotmds(res, data.frame(c(df)))
plot(fitbi, main = "MDS Biplot", vecscale = 1)
data(varespec)
#Menampilkan struktur data
str(varespec)
## 'data.frame': 24 obs. of 44 variables:
## $ Callvulg: num 0.55 0.67 0.1 0 0 ...
## $ Empenigr: num 11.13 0.17 1.55 15.13 12.68 ...
## $ Rhodtome: num 0 0 0 2.42 0 0 1.55 0 0.35 0.07 ...
## $ Vaccmyrt: num 0 0.35 0 5.92 0 ...
## $ Vaccviti: num 17.8 12.1 13.5 16 23.7 ...
## $ Pinusylv: num 0.07 0.12 0.25 0 0.03 0.12 0.1 0.1 0.05 0.12 ...
## $ Descflex: num 0 0 0 3.7 0 0.02 0.78 0 0.4 0 ...
## $ Betupube: num 0 0 0 0 0 0 0.02 0 0 0 ...
## $ Vacculig: num 1.6 0 0 1.12 0 0 2 0 0.2 0 ...
## $ Diphcomp: num 2.07 0 0 0 0 0 0 0 0 0.07 ...
## $ Dicrsp : num 0 0.33 23.43 0 0 ...
## $ Dicrfusc: num 1.62 10.92 0 3.63 3.42 ...
## $ Dicrpoly: num 0 0.02 1.68 0 0.02 0.02 0 0.23 0.2 0 ...
## $ Hylosple: num 0 0 0 6.7 0 0 0 0 9.97 0 ...
## $ Pleuschr: num 4.67 37.75 32.92 58.07 19.42 ...
## $ Polypili: num 0.02 0.02 0 0 0.02 0.02 0 0 0 0 ...
## $ Polyjuni: num 0.13 0.23 0.23 0 2.12 1.58 0 0.02 0.08 0.02 ...
## $ Polycomm: num 0 0 0 0.13 0 0.18 0 0 0 0 ...
## $ Pohlnuta: num 0.13 0.03 0.32 0.02 0.17 0.07 0.1 0.13 0.07 0.03 ...
## $ Ptilcili: num 0.12 0.02 0.03 0.08 1.8 0.27 0.03 0.1 0.03 0.25 ...
## $ Barbhatc: num 0 0 0 0.08 0.02 0.02 0 0 0 0.07 ...
## $ Cladarbu: num 21.73 12.05 3.58 1.42 9.08 ...
## $ Cladrang: num 21.47 8.13 5.52 7.63 9.22 ...
## $ Cladstel: num 3.5 0.18 0.07 2.55 0.05 ...
## $ Cladunci: num 0.3 2.65 8.93 0.15 0.73 0.25 2.38 0.82 0.05 0.95 ...
## $ Cladcocc: num 0.18 0.13 0 0 0.08 0.1 0.17 0.15 0.02 0.17 ...
## $ Cladcorn: num 0.23 0.18 0.2 0.38 1.42 0.25 0.13 0.05 0.03 0.05 ...
## $ Cladgrac: num 0.25 0.23 0.48 0.12 0.5 0.18 0.18 0.22 0.07 0.23 ...
## $ Cladfimb: num 0.25 0.25 0 0.1 0.17 0.1 0.2 0.22 0.1 0.18 ...
## $ Cladcris: num 0.23 1.23 0.07 0.03 1.78 0.12 0.2 0.17 0.02 0.57 ...
## $ Cladchlo: num 0 0 0.1 0 0.05 0.05 0.02 0 0 0.02 ...
## $ Cladbotr: num 0 0 0.02 0.02 0.05 0.02 0 0 0.02 0.07 ...
## $ Cladamau: num 0.08 0 0 0 0 0 0 0 0 0 ...
## $ Cladsp : num 0.02 0 0 0.02 0 0 0.02 0.02 0 0.07 ...
## $ Cetreric: num 0.02 0.15 0.78 0 0 0 0.02 0.18 0 0.18 ...
## $ Cetrisla: num 0 0.03 0.12 0 0 0 0 0.08 0.02 0.02 ...
## $ Flavniva: num 0.12 0 0 0 0.02 0.02 0 0 0 0 ...
## $ Nepharct: num 0.02 0 0 0 0 0 0 0 0 0 ...
## $ Stersp : num 0.62 0.85 0.03 0 1.58 0.28 0 0.03 0.02 0.03 ...
## $ Peltapht: num 0.02 0 0 0.07 0.33 0 0 0 0 0.02 ...
## $ Icmaeric: num 0 0 0 0 0 0 0 0.07 0 0 ...
## $ Cladcerv: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Claddefo: num 0.25 1 0.33 0.15 1.97 0.37 0.15 0.67 0.08 0.47 ...
## $ Cladphyl: num 0 0 0 0 0 0 0 0 0 0 ...
varespec.nmds.bray <- metaMDS(varespec, distance = "bray", trace = FALSE, trymax = 100)
varespec.nmds.bray
##
## Call:
## metaMDS(comm = varespec, distance = "bray", trymax = 100, trace = FALSE)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(varespec))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.1825658
## Stress type 1, weak ties
## Best solution was repeated 1 time in 20 tries
## The best solution was from try 3 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(varespec))'
#trymax adalah jumlah permulaan acak
#trace = FALSE merupakan perintah agar R tidak menampilkan semua permulaan acak ini
#Menampilkan plot NMDS
plot(varespec.nmds.bray, type="t")
#EValuasi pemetaan NMDS
stressplot(varespec.nmds.bray)
#Goodness of fit
gof <- goodness(varespec.nmds.bray)
plot(varespec.nmds.bray, type="t", main = "goodness of fit")
points(varespec.nmds.bray, display="sites", cex=gof*100)
#Membandingkan penempatan
varespec.nmds.eu <- metaMDS(varespec, distance="eu", trace=FALSE, trymax=100)
#Gunakan jarak euclidean
pro <- procrustes(varespec.nmds.bray, varespec.nmds.eu) # visually compares ordinations
plot(pro, cex=1.5)
varespec.nmds.jacc <- metaMDS(varespec, distance = "jaccard", trace = FALSE, trymax = 100)
pro1 <- procrustes(varespec.nmds.bray, varespec.nmds.jacc)
plot(pro1, cex=1.5)
#Exact dissimilarity indices
varespec.kul <- vegdist(varespec, method="kulczynski")
str(varespec.kul)
## 'dist' Named num [1:276] 0.531 0.668 0.549 0.375 0.508 ...
## - attr(*, "maxdist")= num 1
## - attr(*, "Size")= int 24
## - attr(*, "Labels")= chr [1:24] "18" "15" "24" "27" ...
## - attr(*, "Diag")= logi FALSE
## - attr(*, "Upper")= logi FALSE
## - attr(*, "method")= chr "kulczynski"
## - attr(*, "call")= language vegdist(x = varespec, method = "kulczynski")
#Relating community data to environmental variables
data(varechem) #soil chemical variables
str(varechem) #24 observation of 14 variables
## 'data.frame': 24 obs. of 14 variables:
## $ N : num 19.8 13.4 20.2 20.6 23.8 22.8 26.6 24.2 29.8 28.1 ...
## $ P : num 42.1 39.1 67.7 60.8 54.5 40.9 36.7 31 73.5 40.5 ...
## $ K : num 140 167 207 234 181 ...
## $ Ca : num 519 357 973 834 777 ...
## $ Mg : num 90 70.7 209.1 127.2 125.8 ...
## $ S : num 32.3 35.2 58.1 40.7 39.5 40.8 33.8 27.1 42.5 60.2 ...
## $ Al : num 39 88.1 138 15.4 24.2 ...
## $ Fe : num 40.9 39 35.4 4.4 3 ...
## $ Mn : num 58.1 52.4 32.1 132 50.1 ...
## $ Zn : num 4.5 5.4 16.8 10.7 6.6 9.1 7.4 5.2 9.3 9.1 ...
## $ Mo : num 0.3 0.3 0.8 0.2 0.3 0.4 0.3 0.3 0.3 0.5 ...
## $ Baresoil: num 43.9 23.6 21.2 18.7 46 40.5 23 29.8 17.6 29.9 ...
## $ Humdepth: num 2.2 2.2 2 2.9 3 3.8 2.8 2 3 2.2 ...
## $ pH : num 2.7 2.8 3 2.8 2.7 2.7 2.8 2.8 2.8 2.8 ...
fit <- envfit(varespec.nmds.bray, varechem, permu=999)
fit
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## N -0.05728 -0.99836 0.2536 0.052 .
## P 0.61969 0.78485 0.1938 0.108
## K 0.76642 0.64234 0.1809 0.128
## Ca 0.68517 0.72839 0.4119 0.001 ***
## Mg 0.63249 0.77457 0.4270 0.001 ***
## S 0.19135 0.98152 0.1752 0.134
## Al -0.87162 0.49018 0.5269 0.002 **
## Fe -0.93604 0.35190 0.4450 0.002 **
## Mn 0.79872 -0.60171 0.5231 0.001 ***
## Zn 0.61754 0.78654 0.1879 0.113
## Mo -0.90308 0.42946 0.0609 0.532
## Baresoil 0.92491 -0.38019 0.2508 0.047 *
## Humdepth 0.93284 -0.36029 0.5200 0.001 ***
## pH -0.64800 0.76164 0.2308 0.066 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#plotting envfit output
plot(varespec.nmds.bray, display="sites")
plot(fit, p.max=0.05) #only display variables that are significant
Interpretasi: - Goodness of fit Lingkaran yang lebih besar menggambarkan
plot yang memiliki kesesuaian lemah dengan matrix dissimilarity - Plot
terakhir Tanda panah menunjuk ke arah perubahan paling cepat pada
variabel lingkungan, atau disebut arah gradien. Panjang panah sebanding
dengan korelasi antara ordination dan variabel lingkungan, atau disebut
kekuatan gradien.