library("MPsychoR")
library("smacof")
VaHSdata = read.csv("VaHSdata.csv")
options(digits=2, scipen=999)
data(list="VaHSdata", package="MPsychoR")
mydata2<-missRanger::missRanger(data = VaHSdata, verbose=0)
library(psych)
describe(mydata2)
#Standardized the variables since not all of the variables were the same.
standardize = function(x){
return ((x-mean(x,na.rm=TRUE))/sd(x,na.rm=TRUE))
}
std_data = as.data.frame(apply(mydata2,2,standardize))
for(j in 1:ncol(std_data))std_data[,j]<-round(std_data[,j])
M <- tidyr::gather(data=std_data,key="Item", value = "Value")
M$Item <-factor(x=M$Item, levels = names(sort(tapply(X=M$Value, INDEX=M$Item, FUN=mean))))
require(ggplot2)
Loading required package: ggplot2
ggplot(M) + aes(x=Value, fill=Item)+geom_bar()+facet_wrap(~Item)
library("smacof")
Wdelta <- dist(t(std_data)) ## Euclidean distances
fit.VaHSdata1 <- mds(Wdelta, type = "ordinal") ## MDS fit
fit.VaHSdata1
Call:
mds(delta = Wdelta, type = "ordinal")
Model: Symmetric SMACOF
Number of objects: 12
Stress-1 value: 0.1
Number of iterations: 51
plot(fit.VaHSdata1, main = "VaHSdata")
#The stress is higher using the randomstress test.
set.seed(123)
rsvec <- randomstress(n = attr(Wdelta, "Size"), ndim = 2,
nrep = 500, type = "ordinal")
mean(rsvec)
[1] 0.22
hist(x=rsvec, main = "Histogram of randomly generated/nmultidimensional scaling stress")
#The model above is less than 0.21, which means that it is statistically significant.
mean(rsvec) - 2*sd(rsvec)
[1] 0.19
set.seed(123)
permmds <- permtest(fit.VaHSdata1, data = std_data,
method.dat = "euclidean", nrep = 500,
verbose = FALSE)
permmds
Call: permtest.smacof(object = fit.VaHSdata1, data = std_data, method.dat = "euclidean",
nrep = 500, verbose = FALSE)
SMACOF Permutation Test
Number of objects: 12
Number of replications (permutations): 500
Observed stress value: 0.1
p-value: <0.001
n <- attr(Wdelta, "Size")
svec <- NULL
for (i in 1:(n-1)) {
svec[i] <- mds(Wdelta, ndim = i, type = "ordinal")$stress
}
plot(x=1:(n-1), y=svec, type="b", main = "MDS Scree Plot", pch=20, xlab="Number of Dimensions", ylab = "Stress")
set.seed(123)
fit.VaHSdata <- NULL ## 100 random starts
for(i in 1:100) {
fit.VaHSdata[[i]] <- mds(Wdelta, type = "ordinal",
init = "random")
}
ind <- which.min(sapply(fit.VaHSdata,
function(x) x$stress))
fit.VaHSdata2 <- fit.VaHSdata[[ind]]
fit.VaHSdata2$stress ## lowest stress (random start)
[1] 0.1
fit.VaHSdata2
Call:
mds(delta = Wdelta, type = "ordinal", init = "random")
Model: Symmetric SMACOF
Number of objects: 12
Stress-1 value: 0.1
Number of iterations: 97
fit.VaHSdata1$stress ## stress (classical scaling start)
[1] 0.1
fit.VaHSdata3 <- mds(Wdelta, type = "interval")
fit.VaHSdata3
Call:
mds(delta = Wdelta, type = "interval")
Model: Symmetric SMACOF
Number of objects: 12
Stress-1 value: 0.16
Number of iterations: 6
plot(fit.VaHSdata3, plot.type = "Shepard",
main = "Shepard Diagram (Interval MDS)")
plot(fit.VaHSdata2, plot.type = "Shepard",
main = "Shepard Diagram (Orginal MDS)")
plot(fit.VaHSdata2, plot.type = "Shepard",
main = "Shepard Diagram (Ordinal MDS)")
#Configurations
plot(fit.VaHSdata2, plot.type = "confplot", main= "confplot (Random Ordinal MDS)")
plot(fit.VaHSdata2, plot.type = "resplot", main= "resplot (Random Ordinal MDS)")
plot(fit.VaHSdata2, plot.type = "stressplot", main= "stressplot (Random Ordinal MDS)")
plot(fit.VaHSdata2, plot.type = "bubbleplot", main= "bubbleplot(Random Ordinal MDS)")
library("colorspace")
set.seed(123)
bootWen <- bootmds(fit.VaHSdata2, data = std_data,
method.dat = "euclidean", nrep = 100)
cols <- rainbow_hcl(17, l = 60)
plot(bootWen, col = cols)
colpal <- c(rainbow_hcl(4, c=100))
pal <-palette(colpal)
memb <- c(1,2,2,2,2,2,3,3,4,4,4,3)
plot(fit.VaHSdata2, main = "Configuration", label.conf = list(label=FALSE), col = memb)
legend("topright", legend = c("Enrollment", "Race", "Subgroup", "Achievement"), col=colpal, pch=20)
palette(pal)
colpal <- c(rainbow_hcl(4, c=100, l=30))
pal <- palette(colpal)
text(fit.VaHSdata2$conf[-7,], labels=rownames(fit.VaHSdata2$conf)[-7], col=memb[-7], pos=3, cex=0.8)
text(fit.VaHSdata2$conf[7.1], fit.VaHSdata2$conf[7,2], labels= rownames(fit.VaHSdata2$conf)[7], col=memb[7], pos=1, cex=0.8)
#This is taking it a step further with Confirmatory Multidimensional Scaling
library("MPsychoR")
library("smacof")
cormatrix<-cor(std_data)
corrplot::corrplot(corr=cormatrix, method="ellipse", diag = FALSE, order="hclust")
ocpD <- sim2diss(cormatrix)
fit.ocp1 <- mds(ocpD, type = "ordinal")
fit.ocp1
Call:
mds(delta = ocpD, type = "ordinal")
Model: Symmetric SMACOF
Number of objects: 12
Stress-1 value: 0.1
Number of iterations: 30
v_color <- viridis::viridis(
n=length(13)
)
names(v_color)<-sort(13)
v<-c("confplot", "resplot", "Shepard", "stressplot", "bubbleplot", "histogram")
for(j in v) plot(x=fit.ocp1, plot.type=j, main=j)
mdsbi <- biplotmds(fit.ocp1, ocpD)
plot(mdsbi)
library(MASS)
out_Car1=isoMDS(ocpD, k=1)
initial value 35.208679
final value 27.355976
converged
library(MASS)
out_Car2=isoMDS(ocpD, k=2)
initial value 14.611676
iter 5 value 10.316203
final value 10.209001
converged
library(MASS)
out_Car3=isoMDS(ocpD, k=3)
out_Car1
$points
[,1]
Enrollment 0.090
Asian_p 0.096
Black_p -0.404
Hispanic_p -0.211
White_p 0.650
Multirace_p 0.248
ESL_P -0.203
SWD_p -0.100
GCI2019 0.533
Chronic.Abs2019 -0.547
Reading2019 0.383
Disadvantaged_p -0.536
$stress
[1] 27
out_Car2
$points
[,1] [,2]
Enrollment 0.09 -0.337
Asian_p 0.11 -0.378
Black_p -0.34 0.318
Hispanic_p -0.26 -0.328
White_p 0.51 0.457
Multirace_p 0.33 -0.072
ESL_P -0.26 -0.319
SWD_p -0.13 0.446
GCI2019 0.55 0.165
Chronic.Abs2019 -0.54 -0.059
Reading2019 0.45 -0.226
Disadvantaged_p -0.51 0.333
$stress
[1] 10
out_Car3
$points
[,1] [,2] [,3]
Enrollment 0.13 -0.438 -0.218
Asian_p 0.18 -0.477 -0.155
Black_p -0.51 0.256 -0.428
Hispanic_p -0.30 -0.442 0.155
White_p 0.53 0.518 0.446
Multirace_p 0.42 -0.059 -0.385
ESL_P -0.29 -0.433 0.171
SWD_p -0.23 0.590 -0.217
GCI2019 0.68 0.292 -0.048
Chronic.Abs2019 -0.57 -0.022 0.429
Reading2019 0.55 -0.221 0.123
Disadvantaged_p -0.58 0.437 0.125
$stress
[1] 3