library(car)
library(vcdExtra)

## Loading required package: vcd

## Loading required package: grid

## Loading required package: gnm

## 
## Attaching package: 'vcdExtra'

## The following object is masked from 'package:car':
## 
##     Burt

library(FactoMineR)
library(tidyverse)

## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr

## Conflicts with tidy packages ----------------------------------------------

## filter():    dplyr, stats
## lag():       dplyr, stats
## recode():    dplyr, car
## some():      purrr, car
## summarise(): dplyr, vcdExtra

Package car(John Fox 先生の“Companion for Applied Regression”のパッケージ)についているデータをload

data("Womenlf")
Womenlf %>% tbl_df()

## # A tibble: 263 x 4
##      partic hincome children  region
##  *   <fctr>   <int>   <fctr>  <fctr>
##  1 not.work      15  present Ontario
##  2 not.work      13  present Ontario
##  3 not.work      45  present Ontario
##  4 not.work      23  present Ontario
##  5 not.work      19  present Ontario
##  6 not.work       7  present Ontario
##  7 not.work      15  present Ontario
##  8 fulltime       7  present Ontario
##  9 not.work      15  present Ontario
## 10 not.work      23  present Ontario
## # ... with 253 more rows

str(Womenlf)

## 'data.frame':    263 obs. of  4 variables:
##  $ partic  : Factor w/ 3 levels "fulltime","not.work",..: 2 2 2 2 2 2 2 1 2 2 ...
##  $ hincome : int  15 13 45 23 19 7 15 7 15 23 ...
##  $ children: Factor w/ 2 levels "absent","present": 2 2 2 2 2 2 2 2 2 2 ...
##  $ region  : Factor w/ 5 levels "Atlantic","BC",..: 3 3 3 3 3 3 3 3 3 3 ...

このデータは、263 行（ケース）、4 列（変数）のデータフレーム data の詳細は、

?Womenlf

である。

変数名を確認する

colnames(Womenlf)

## [1] "partic"   "hincome"  "children" "region"

partic （勤務形態：フルタイム、仕事してない、パートタイム Labour-Force Participation. A factor with levels (note: out of order): fulltime, Working full-time; not.work, Not working outside the home; parttime, Working part-time.

hincome 夫の収入単位＄1000 Husband’s income, $1000s.

children 子供：いない、いる Presence of children in the household. A factor with levels: absent, present.

region 居住地域： A factor with levels: Atlantic, Atlantic Canada; BC, British Columbia; Ontario; Prairie, Prairie provinces; Quebec.

クロス表をつくる

table(行変数,列変数)　とすることで、行変数と列変数のクロス表がつくられる。
?table でhelp
exclude=NULL を指定しないと NAが除去された集計となる。

table(Womenlf$partic,Womenlf$children)

##           
##            absent present
##   fulltime     46      20
##   not.work     26     129
##   parttime      7      35

別の表記

with(Womenlf, table(partic,children))

##           children
## partic     absent present
##   fulltime     46      20
##   not.work     26     129
##   parttime      7      35

xtabs を使う

xtabs(~partic + children, data=Womenlf)

##           children
## partic     absent present
##   fulltime     46      20
##   not.work     26     129
##   parttime      7      35

# カイ二乗検定を行う。chisq.test(

)

chisq.test に投げ込めばよい

chisq.test(table(Womenlf$partic,Womenlf$children))

## 
##  Pearson's Chi-squared test
## 
## data:  table(Womenlf$partic, Womenlf$children)
## X-squared = 65.945, df = 2, p-value = 4.788e-15

なにか変数に付値して、それを投げ込んでもよい。

.tbl <- xtabs(~partic + children, data=Womenlf)
chisq.test(.tbl)

## 
##  Pearson's Chi-squared test
## 
## data:  .tbl
## X-squared = 65.945, df = 2, p-value = 4.788e-15

mosaic(.tbl,main = "pattic vs children")

## ピアソン残差を色分けする。

mosaic(.tbl,main = "pattic vs children",shade=TRUE)

mosaicplot も使える

ただし、表での行と列の位置がひっくり返っている。使うならvcd::mosaic()を。

mosaic() で日本語使う方法は、slidshareを参照してください。

「二つのmosaic plotと日本語表示」Tokyo.R#65（2017/09/23）でのLTスライド。
https://www.slideshare.net/kazuofujimoto/2mosaic-plot-80084536

mosaicplot(.tbl)

データの属性を確認する

summary(Womenlf)

##       partic       hincome         children        region   
##  fulltime: 66   Min.   : 1.00   absent : 79   Atlantic: 30  
##  not.work:155   1st Qu.:10.00   present:184   BC      : 29  
##  parttime: 42   Median :14.00                 Ontario :108  
##                 Mean   :14.76                 Prairie : 31  
##                 3rd Qu.:19.00                 Quebec  : 65  
##                 Max.   :45.00

str(Womenlf)

## 'data.frame':    263 obs. of  4 variables:
##  $ partic  : Factor w/ 3 levels "fulltime","not.work",..: 2 2 2 2 2 2 2 1 2 2 ...
##  $ hincome : int  15 13 45 23 19 7 15 7 15 23 ...
##  $ children: Factor w/ 2 levels "absent","present": 2 2 2 2 2 2 2 2 2 2 ...
##  $ region  : Factor w/ 5 levels "Atlantic","BC",..: 3 3 3 3 3 3 3 3 3 3 ...

hincome は整数、それ以外は、因子（factor）。なので、この三つの変数を使ってMCAをやってみる。

res.MCA <- MCA(Womenlf[,c(1,3,4)])

col.var = で変数（var）を色分けする。

plot.MCA(res.MCA,invisible = "ind",col.var = c(rep(1,3),rep(3,2),rep(4,5)))

res.MCA <- MCA(Womenlf,quanti.sup = 2)

#explor::explor(res.MCA)

res <- explor::prepare_results(res.MCA)
explor::MCA_ind_plot(res, xax = 1, yax = 2,ind_sup = FALSE,
    lab_var = NULL, , ind_lab_min_contrib = 0,
    col_var = "partic", labels_size = 9,
    point_opacity = 0.5, opacity_var = NULL, point_size = 64,
    ellipses = TRUE, transitions = TRUE, labels_positions = NULL,
    xlim = c(-1.43, 2.14), ylim = c(-1.16, 2.41))

res <- explor::prepare_results(res.MCA)
explor::MCA_ind_plot(res, xax = 1, yax = 2,ind_sup = FALSE,
    lab_var = NULL, , ind_lab_min_contrib = 0,
    col_var = "children", labels_size = 9,
    point_opacity = 0.5, opacity_var = NULL, point_size = 64,
    ellipses = TRUE, transitions = TRUE, labels_positions = NULL,
    xlim = c(-1.39, 2.17), ylim = c(-1.55, 2.01))

クラスタリング

res.HCPC <- HCPC(res.MCA,nb.clust = 6)

plot.HCPC(res.HCPC)

#plot.HCPC(res.HCPC,choice = "tree")

表を極める（1）tableの基礎

fujimoto@sakushin-u.ac.jp

2018年2月23日

Package car(John Fox 先生の“Companion for Applied Regression”のパッケージ)についているデータをload

変数名を確認する

クロス表をつくる

別の表記

xtabs を使う

chisq.test に投げ込めばよい

なにか変数に付値して、それを投げ込んでもよい。

mosaicplot も使える

データの属性を確認する

col.var = で変数（var）を色分けする。

クラスタリング

表を極める（1）tableの基礎

fujimoto@sakushin-u.ac.jp

2018年2月23日

Package car(John Fox 先生の“Companion for Applied Regression”のパッケージ)についているデータをload

変数名を確認する

クロス表をつくる

別の表記

xtabs を使う

chisq.test に投げ込めばよい

なにか変数に付値して、それを投げ込んでもよい。

mosaicplot も使える

データの属性を確認する

col.var = で 変数（var）を色分けする。

クラスタリング

col.var = で変数（var）を色分けする。