観測値を１６分割し、どのような分布になっているか

library(plyr)

set.seed(1234)
df <- data.frame(id=1:100,x1=rnorm(100),x2=rnorm(100))
#each(length,mean,sd)(rnorm(100))
a <- mean(df$x1);a1 <- sd(df$x1)
aa1 <- a-a1   #-シグマ 
aa2 <- a+a1   #+シグマ

b <- mean(df$x2);b1 <- sd(df$x2)
bb1 <- b-b1   #-シグマ
bb2 <- b+b1   #+シグマ

df1 <- ddply(df,.(x1,x2),summarize,
             id2=ifelse(x1 < aa1,1,ifelse(x1 >= aa1 & x1 < a,2,
                               ifelse(x1 >= a & x1 < aa2,3,4 ))),
             id3=ifelse(x2 < bb1,1,ifelse(x2 >= bb1 & x2 < b,2,
                               ifelse(x2 >= b & x2 < bb2,3,4 ))),
             id4 = id2 * id3) #評価点数 
head(df1)

##          x1          x2 id2 id3 id4
## 1 -2.345698 -0.50247778   1   2   2
## 2 -2.180040 -0.31611833   1   2   2
## 3 -1.806031 -0.37723765   1   2   2
## 4 -1.629093 -1.65010093   1   1   1
## 5 -1.448205  0.08005964   1   3   3
## 6 -1.390701 -1.12376279   1   1   1

mm <- matrix(NA,4,4)
ee <- t(table(df1[,3:4]))
k <- 1
for(i in 4:1){
  mm[k,] <- ee[i,]
  k <- k + 1
}
dimnames(mm) <- list(c(4:1),c(1:4))
mm　#個数

##   1  2 3  4
## 4 0  8 5  1
## 3 4 19 7  6
## 2 5 14 9 10
## 1 3  6 2  1

sum(mm)

## [1] 100

視覚化

library(gplots)

## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess

cc <- c("1","2","3","4")
rr <- c("1","2","3","4")
aa <- expand.grid(cc,rr)

n <- nrow(aa)
a <- a1 <- matrix(NA,n,2)

b <- sapply(1:n,function(x){
  a[x,1] <- mm[aa[x,1],aa[x,2]]
  })
#b1 <- sapply(1:n,function(x){
#  a1[x,2] <- ifelse(aa[x,1]==1,1+3,ifelse(aa[x,1]==2,
#2+1,ifelse(aa[x,1]==3,3-1,1)))
#})

mm1 <- data.frame(cbind(aa,b))
class(mm1)

## [1] "data.frame"

#dimnames(mm1) <- list(c("1","2","3","4"),c("1","2","3","4"))
#balloonplot(mm1$Var2,mm1$Var1,mm1$b)

levels(mm1$Var1) <- c("4","3","2","1")#行名の変更
balloonplot(mm1$Var2,mm1$Var1,mm1$b)

round(mm/100,2)#比率

##      1    2    3    4
## 4 0.00 0.08 0.05 0.01
## 3 0.04 0.19 0.07 0.06
## 2 0.05 0.14 0.09 0.10
## 1 0.03 0.06 0.02 0.01

round(sum(mm[c(2:3),c(2:3)])/100,2)#+-シグマ内個数の比率

## [1] 0.49

library(gplots)
    
carnames <- c("bmw","renault","mercedes","seat")
carcolors <- c("red","white","silver","green")
datavals <- round(rnorm(16, mean=100, sd=60),1)
data <- data.frame(Car=rep(carnames,4),
                   Color=rep(carcolors, c(4,4,4,4) ),
                   Value=datavals )
# show the data
data

##         Car  Color Value
## 1       bmw    red 129.1
## 2   renault    red 141.8
## 3  mercedes    red 111.1
## 4      seat    red 142.0
## 5       bmw  white 118.7
## 6   renault  white 145.6
## 7  mercedes  white 210.5
## 8      seat  white 166.7
## 9       bmw silver 102.0
## 10  renault silver  33.1
## 11 mercedes silver 125.1
## 12     seat silver  76.0
## 13      bmw  green 189.6
## 14  renault  green   3.6
## 15 mercedes  green  75.1
## 16     seat  green 125.3

# generate balloon plot with default scaling
balloonplot( data$Car, data$Color, data$Value)

# show margin label rotation & space expansion, using some long labels
levels(data$Car) <- c("BMW: High End, German","Renault: Medium End, French",
 "Mercedes: High End, German", "Seat: Imaginary, Unknown Producer")
 
# generate balloon plot with default scaling
balloonplot( data$Car, data$Color, data$Value, colmar=3, colsrt=90)

# Create an example using table
xnames <- sample( letters[1:3], 50, replace=2)
ynames <- sample( 1:5, 50, replace=2)
 
tab <- table(xnames, ynames)
 
balloonplot(tab)

# Example of multiple classification variabls using the Titanic data
library(datasets)
data(Titanic)
 
dframe <- as.data.frame(Titanic) # convert to 1 entry per row format
attach(dframe)
balloonplot(x=Class, y=list(Survived, Age, Sex), z=Freq, sort=TRUE)


# colorize: surviors lightblue, non-survivors: grey
Colors <- Titanic
Colors[,,,"Yes"] <- "skyblue"
Colors[,,,"No"] <- "grey"
colors <- as.character(as.data.frame(Colors)$Freq)
 
balloonplot(x=list(Age,Sex),
            y=list(Class=Class,
            Survived=gdata::reorder.factor(Survived,c(2,1))),
            z=Freq,
            zlab="Number of Passengers",
            sort=TRUE,
            dotcol = colors,
            show.zeros=TRUE,
            show.margins=TRUE)

————-

(a1 <- count(df1,c("id2","id3")))#組み合わせ個数算出

##    id2 id3 freq
## 1    1   1    3
## 2    1   2    5
## 3    1   3    4
## 4    2   1    6
## 5    2   2   14
## 6    2   3   19
## 7    2   4    8
## 8    3   1    2
## 9    3   2    9
## 10   3   3    7
## 11   3   4    5
## 12   4   1    1
## 13   4   2   10
## 14   4   3    6
## 15   4   4    1

nn <- nrow(a1)
nn1 <- nrow(df1)
bb <- numeric(nn1)

for(i in 1:nn){
  for(j in 1:nn1){
    if(df1[j,3]==a1[i,1] & df1[j,4]==a1[i,2]){
       bb[j] <- a1[i,3]
    }
  }
}
bb

##   [1]  5  5  5  3  4  3  4  3  4  4  5  5 14  8 19  8  8  8  6 14 14 19 19
##  [24] 19 19 14 19 19  6  6  8  6  8 14 19 14 19 19 19  6 19 19  8 14 14 19
##  [47] 14 19  8 14 14 14 19 14 19  6 19 14 19  9  7  9  9  9  7  5  7  9  2
##  [70]  7  9  5  7  9  7  7  5  5  9  2  9  5  6 10 10  6  6  6  6 10 10 10
##  [93] 10 10 10  6  1  1 10 10

df2 <- cbind(df1,freq1=bb)
head(df2)

##          x1          x2 id2 id3 id4 freq1
## 1 -2.345698 -0.50247778   1   2   2     5
## 2 -2.180040 -0.31611833   1   2   2     5
## 3 -1.806031 -0.37723765   1   2   2     5
## 4 -1.629093 -1.65010093   1   1   1     3
## 5 -1.448205  0.08005964   1   3   3     4
## 6 -1.390701 -1.12376279   1   1   1     3

Untitled

yoshio

Wednesday, October 01, 2014

観測値を１６分割し、どのような分布になっているか

視覚化

————-