wordassociation.R

#**************************************************************************
# Word association: Tam Linh
# 19.04.2015 
# ver R.3.1.3
#************************************************************************** 


# load package ------------------------------------------------------------
library(SensoMineR)

## Loading required package: FactoMineR

library(FactoMineR)
options(warn=-1)


# Tinh phan tram tung modality --------------------------------------------

n.subject <- c(16,38,66,40,40,40,55,65)
percentage <- n.subject/120*100
round(percentage,2)

## [1] 13.33 31.67 55.00 33.33 33.33 33.33 45.83 54.17

# Import data -------------------------------------------------------------

setwd("C:/Users/LE Minh-Tam/Dropbox/10. Project/15. Linh thesis/WordAssociation/01. Data")
tradfood <- read.table("tradfood.txt", header=TRUE, row.names=1, sep="\t")


# Kiem dinh chi-squared ---------------------------------------------------

## Tuoi
age <- tradfood[,1:3]

for (i in 1:nrow(age)) {
  print(rownames(age[i,]))
  test.i <- chisq.test(age[i,])
  print(test.i)
}

## [1] "thoi.gian"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 15.5714, df = 2, p-value = 0.0004156
## 
## [1] "quen.thuoc"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 21.7647, df = 2, p-value = 1.879e-05
## 
## [1] "cam.nhan.ve.cam.quan"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 10.3425, df = 2, p-value = 0.005678
## 
## [1] "gia.dinh"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 23.75, df = 2, p-value = 6.962e-06
## 
## [1] "dip.le"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 10.8333, df = 2, p-value = 0.004442
## 
## [1] "tinh.dan.toc"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 15, df = 2, p-value = 0.0005531
## 
## [1] "pho.bien"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 6, df = 2, p-value = 0.04979
## 
## [1] "che.bien.bao.quan"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 1.25, df = 2, p-value = 0.5353
## 
## [1] "dinh.duong"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 5.9091, df = 2, p-value = 0.0521
## 
## [1] "am.thuc"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 5, df = 2, p-value = 0.08208
## 
## [1] "dac.trung.vung.mien"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 4.2105, df = 2, p-value = 0.1218
## 
## [1] "thuong.xuyen.dung"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 11.9231, df = 2, p-value = 0.002576
## 
## [1] "an.toan.chat.luong"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 5, df = 2, p-value = 0.08208
## 
## [1] "tien.dung"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 36.5, df = 2, p-value = 1.186e-08
## 
## [1] "phong.tuc"
## 
##  Chi-squared test for given probabilities
## 
## data:  age[i, ]
## X-squared = 8, df = 2, p-value = 0.01832

## Gioi tinh
sex <- tradfood[,4:5]

for (i in 1:nrow(sex)) {
  print(rownames(sex[i,]))
  test.i <- chisq.test(sex[i,])
  print(test.i)
}

## [1] "thoi.gian"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.8, df = 1, p-value = 0.3711
## 
## [1] "quen.thuoc"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.8, df = 1, p-value = 0.3711
## 
## [1] "cam.nhan.ve.cam.quan"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.3952, df = 1, p-value = 0.5296
## 
## [1] "gia.dinh"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.1452, df = 1, p-value = 0.7031
## 
## [1] "dip.le"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.2238, df = 1, p-value = 0.6361
## 
## [1] "tinh.dan.toc"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.5505, df = 1, p-value = 0.4581
## 
## [1] "pho.bien"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.0054, df = 1, p-value = 0.9417
## 
## [1] "che.bien.bao.quan"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 8.2451, df = 1, p-value = 0.004086
## 
## [1] "dinh.duong"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.5505, df = 1, p-value = 0.4581
## 
## [1] "am.thuc"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 1.2755, df = 1, p-value = 0.2587
## 
## [1] "dac.trung.vung.mien"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.0314, df = 1, p-value = 0.8592
## 
## [1] "thuong.xuyen.dung"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 1.2255, df = 1, p-value = 0.2683
## 
## [1] "an.toan.chat.luong"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 0.4785, df = 1, p-value = 0.4891
## 
## [1] "tien.dung"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 3.5694, df = 1, p-value = 0.05885
## 
## [1] "phong.tuc"
## 
##  Chi-squared test for given probabilities
## 
## data:  sex[i, ]
## X-squared = 7.3841, df = 1, p-value = 0.00658

## Vung mien
region <- tradfood[,6:8]

for (i in 1:nrow(region)) {
  print(rownames(region[i,]))
  test.i <- chisq.test(region[i,])
  print(test.i)
}

## [1] "thoi.gian"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 3.5147, df = 2, p-value = 0.1725
## 
## [1] "quen.thuoc"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 36.7189, df = 2, p-value = 1.063e-08
## 
## [1] "cam.nhan.ve.cam.quan"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 2.4966, df = 2, p-value = 0.287
## 
## [1] "gia.dinh"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 1.6775, df = 2, p-value = 0.4323
## 
## [1] "dip.le"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 5.1345, df = 2, p-value = 0.07674
## 
## [1] "tinh.dan.toc"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 14.2372, df = 2, p-value = 0.0008099
## 
## [1] "pho.bien"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 23.0778, df = 2, p-value = 9.744e-06
## 
## [1] "che.bien.bao.quan"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 2.285, df = 2, p-value = 0.319
## 
## [1] "dinh.duong"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 1.158, df = 2, p-value = 0.5605
## 
## [1] "am.thuc"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 5.9872, df = 2, p-value = 0.05011
## 
## [1] "dac.trung.vung.mien"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 12.5253, df = 2, p-value = 0.001906
## 
## [1] "thuong.xuyen.dung"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 4.8881, df = 2, p-value = 0.08681
## 
## [1] "an.toan.chat.luong"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 2.8451, df = 2, p-value = 0.2411
## 
## [1] "tien.dung"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 2.6493, df = 2, p-value = 0.2659
## 
## [1] "phong.tuc"
## 
##  Chi-squared test for given probabilities
## 
## data:  region[i, ]
## X-squared = 12.6862, df = 2, p-value = 0.001759

# Correspondence analysis -------------------------------------------------
## sex.ca <- CA(sex) # 1 bac tu do 

age.ca <- CA(age, graph=FALSE)
plot(age.ca, title="Correspodence analysis for Age")

region.ca <- CA(region, graph=FALSE)
plot(region.ca, title="Correspodence analysis for Region")

# MFA

res <- MFA(tradfood, group=c(3,2,3), type=rep("f",3),
           ncp=Inf, name.group=c("age","sex","region"))

# Coffee_IP ---------------------------------------------------------------
setwd("C:/Users/LE Minh-Tam/Dropbox/10. Project/15. Linh thesis/WordAssociation/01. Data")
cof.ip <- read.table("data_ip.txt", header=TRUE, sep="\t")

res.IdMap <- IdMap(cof.ip, col.p=2, col.j=1, 
                   col.lik=ncol(cof.ip), id.recogn="id_")

plot.IdMap(res.IdMap, xlim=c(-5,5), ylim=c(-5,5), levels.contour=NULL, color=TRUE)

## atttribute
att.cof <- cof.ip[,c(1,2,seq(from=3, to=ncol(cof.ip)-1, by=2))]
res.att <-decat(att.cof, formul="~Sanpham+Nguoithu", firstvar = 3, graph=FALSE)
res.PCA <- PCA(res.att$adjmean, graph=FALSE)
X11(); plot(res.PCA, choix = "ind")

X11(); plot(res.PCA, choix = "var")

# Draft = so lieu bai bao Dom -----------------------------------------------------
# setwd("C:/Users/LE Minh-Tam/Dropbox/10. Project/15. Linh thesis/WordAssociation/script")
# rice1 <- read.table("rice1.txt", header=TRUE, row.names=1, sep="\t")
# rice <- rice1[,2:ncol(rice1)]
# 
# ## Chi-squared test
# for (i in 1:nrow(rice)) {
#   print(rownames(rice[i,]))
#   test.i <- chisq.test(rice[i,])
#   print(test.i)
# }
# 
# ## Correspondence analysis
# res.CA <- CA(rice)

wordassociation.R

LE Minh-Tam

Mon Apr 20 13:16:28 2015