観測値を16分割し、どのような分布になっているか
library(plyr)
set.seed(1234)
df <- data.frame(id=1:100,x1=rnorm(100),x2=rnorm(100))
#each(length,mean,sd)(rnorm(100))
a <- mean(df$x1);a1 <- sd(df$x1)
aa1 <- a-a1 #-シグマ
aa2 <- a+a1 #+シグマ
b <- mean(df$x2);b1 <- sd(df$x2)
bb1 <- b-b1 #-シグマ
bb2 <- b+b1 #+シグマ
df1 <- ddply(df,.(x1,x2),summarize,
id2=ifelse(x1 < aa1,1,ifelse(x1 >= aa1 & x1 < a,2,
ifelse(x1 >= a & x1 < aa2,3,4 ))),
id3=ifelse(x2 < bb1,1,ifelse(x2 >= bb1 & x2 < b,2,
ifelse(x2 >= b & x2 < bb2,3,4 ))),
id4 = id2 * id3) #評価点数
head(df1)
## x1 x2 id2 id3 id4
## 1 -2.345698 -0.50247778 1 2 2
## 2 -2.180040 -0.31611833 1 2 2
## 3 -1.806031 -0.37723765 1 2 2
## 4 -1.629093 -1.65010093 1 1 1
## 5 -1.448205 0.08005964 1 3 3
## 6 -1.390701 -1.12376279 1 1 1
mm <- matrix(NA,4,4)
ee <- t(table(df1[,3:4]))
k <- 1
for(i in 4:1){
mm[k,] <- ee[i,]
k <- k + 1
}
dimnames(mm) <- list(c(4:1),c(1:4))
mm #個数
## 1 2 3 4
## 4 0 8 5 1
## 3 4 19 7 6
## 2 5 14 9 10
## 1 3 6 2 1
sum(mm)
## [1] 100
視覚化
library(gplots)
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
cc <- c("1","2","3","4")
rr <- c("1","2","3","4")
aa <- expand.grid(cc,rr)
n <- nrow(aa)
a <- a1 <- matrix(NA,n,2)
b <- sapply(1:n,function(x){
a[x,1] <- mm[aa[x,1],aa[x,2]]
})
#b1 <- sapply(1:n,function(x){
# a1[x,2] <- ifelse(aa[x,1]==1,1+3,ifelse(aa[x,1]==2,
#2+1,ifelse(aa[x,1]==3,3-1,1)))
#})
mm1 <- data.frame(cbind(aa,b))
class(mm1)
## [1] "data.frame"
#dimnames(mm1) <- list(c("1","2","3","4"),c("1","2","3","4"))
#balloonplot(mm1$Var2,mm1$Var1,mm1$b)
levels(mm1$Var1) <- c("4","3","2","1")#行名の変更
balloonplot(mm1$Var2,mm1$Var1,mm1$b)

round(mm/100,2)#比率
## 1 2 3 4
## 4 0.00 0.08 0.05 0.01
## 3 0.04 0.19 0.07 0.06
## 2 0.05 0.14 0.09 0.10
## 1 0.03 0.06 0.02 0.01
round(sum(mm[c(2:3),c(2:3)])/100,2)#+-シグマ内個数の比率
## [1] 0.49
library(gplots)
carnames <- c("bmw","renault","mercedes","seat")
carcolors <- c("red","white","silver","green")
datavals <- round(rnorm(16, mean=100, sd=60),1)
data <- data.frame(Car=rep(carnames,4),
Color=rep(carcolors, c(4,4,4,4) ),
Value=datavals )
# show the data
data
## Car Color Value
## 1 bmw red 129.1
## 2 renault red 141.8
## 3 mercedes red 111.1
## 4 seat red 142.0
## 5 bmw white 118.7
## 6 renault white 145.6
## 7 mercedes white 210.5
## 8 seat white 166.7
## 9 bmw silver 102.0
## 10 renault silver 33.1
## 11 mercedes silver 125.1
## 12 seat silver 76.0
## 13 bmw green 189.6
## 14 renault green 3.6
## 15 mercedes green 75.1
## 16 seat green 125.3
# generate balloon plot with default scaling
balloonplot( data$Car, data$Color, data$Value)

# show margin label rotation & space expansion, using some long labels
levels(data$Car) <- c("BMW: High End, German","Renault: Medium End, French",
"Mercedes: High End, German", "Seat: Imaginary, Unknown Producer")
# generate balloon plot with default scaling
balloonplot( data$Car, data$Color, data$Value, colmar=3, colsrt=90)

# Create an example using table
xnames <- sample( letters[1:3], 50, replace=2)
ynames <- sample( 1:5, 50, replace=2)
tab <- table(xnames, ynames)
balloonplot(tab)

# Example of multiple classification variabls using the Titanic data
library(datasets)
data(Titanic)
dframe <- as.data.frame(Titanic) # convert to 1 entry per row format
attach(dframe)
balloonplot(x=Class, y=list(Survived, Age, Sex), z=Freq, sort=TRUE)

# colorize: surviors lightblue, non-survivors: grey
Colors <- Titanic
Colors[,,,"Yes"] <- "skyblue"
Colors[,,,"No"] <- "grey"
colors <- as.character(as.data.frame(Colors)$Freq)
balloonplot(x=list(Age,Sex),
y=list(Class=Class,
Survived=gdata::reorder.factor(Survived,c(2,1))),
z=Freq,
zlab="Number of Passengers",
sort=TRUE,
dotcol = colors,
show.zeros=TRUE,
show.margins=TRUE)
————-
(a1 <- count(df1,c("id2","id3")))#組み合わせ個数算出
## id2 id3 freq
## 1 1 1 3
## 2 1 2 5
## 3 1 3 4
## 4 2 1 6
## 5 2 2 14
## 6 2 3 19
## 7 2 4 8
## 8 3 1 2
## 9 3 2 9
## 10 3 3 7
## 11 3 4 5
## 12 4 1 1
## 13 4 2 10
## 14 4 3 6
## 15 4 4 1
nn <- nrow(a1)
nn1 <- nrow(df1)
bb <- numeric(nn1)
for(i in 1:nn){
for(j in 1:nn1){
if(df1[j,3]==a1[i,1] & df1[j,4]==a1[i,2]){
bb[j] <- a1[i,3]
}
}
}
bb
## [1] 5 5 5 3 4 3 4 3 4 4 5 5 14 8 19 8 8 8 6 14 14 19 19
## [24] 19 19 14 19 19 6 6 8 6 8 14 19 14 19 19 19 6 19 19 8 14 14 19
## [47] 14 19 8 14 14 14 19 14 19 6 19 14 19 9 7 9 9 9 7 5 7 9 2
## [70] 7 9 5 7 9 7 7 5 5 9 2 9 5 6 10 10 6 6 6 6 10 10 10
## [93] 10 10 10 6 1 1 10 10
df2 <- cbind(df1,freq1=bb)
head(df2)
## x1 x2 id2 id3 id4 freq1
## 1 -2.345698 -0.50247778 1 2 2 5
## 2 -2.180040 -0.31611833 1 2 2 5
## 3 -1.806031 -0.37723765 1 2 2 5
## 4 -1.629093 -1.65010093 1 1 1 3
## 5 -1.448205 0.08005964 1 3 3 4
## 6 -1.390701 -1.12376279 1 1 1 3