Xplortext
library(Xplortext)
data(open.question)
head(open.question)
## Gender Age_Group Age Education Gen_Age Gen_Edu Age_Edu Gen_Edu_Age
## 1 Man >70 80 E_Low M>55 M_EduLow >55_Low M_Low_>55
## 2 Man 50_54 54 E_Low M31_55 M_EduLow 31_55_Low M_Low_31_55
## 3 Man 40_44 40 E_Low M31_55 M_EduLow 31_55_Low M_Low_31_55
## 4 Woman 25_29 27 E_Medium W<=30 W_EduMed <=30_Medium W_Med_<=30
## 5 Woman 35_39 39 E_Medium W31_55 W_EduMed 31_55_Medium W_Med_31_55
## 6 Man >70 80 E_Low M>55 M_EduLow >55_Low M_Low_>55
## Important
## 1 good health
## 2 happiness in people around me contented family would make me happy
## 3 contentment
## 4 health
## 5 to be happy
## 6 my wife
## Relaunch
## 1 happiness
## 2 contented with life as a whole
## 3 family
## 4 happiness money family
## 5 healthy have enough to eat enough money to live on
## 6 music holidays good health
## [1] 300 10
## 'data.frame': 300 obs. of 10 variables:
## $ Gender : Factor w/ 2 levels "Man","Woman": 1 1 1 2 2 1 2 2 2 2 ...
## $ Age_Group : Factor w/ 12 levels "18_19","20_24",..: 12 8 6 3 5 12 7 4 10 11 ...
## $ Age : int 80 54 40 27 39 80 46 33 64 65 ...
## $ Education : Factor w/ 3 levels "E_Low","E_Medium",..: 1 1 1 2 2 1 2 1 1 1 ...
## $ Gen_Age : Ord.factor w/ 6 levels "M<=30"<"M31_55"<..: 3 2 2 4 5 3 5 5 6 6 ...
## $ Gen_Edu : Ord.factor w/ 6 levels "M_EduLow"<"M_EduMed"<..: 1 1 1 5 5 1 5 4 4 4 ...
## $ Age_Edu : Ord.factor w/ 9 levels "<=30_Low"<"<=30_Medium"<..: 7 4 4 2 5 7 5 4 7 7 ...
## $ Gen_Edu_Age: Ord.factor w/ 17 levels "M_Low_31_55"<..: 2 1 1 12 13 2 13 10 11 11 ...
## $ Important : Factor w/ 151 levels "a happy home life",..: 39 49 17 57 133 108 57 135 57 63 ...
## $ Relaunch : Factor w/ 279 levels "a good family life a happy family life you would say time murder no rapes nothing nasty no accidents no ill wea"| __truncated__,..: 86 25 36 90 130 170 86 146 147 221 ...
## Gender Age_Group Age Education Gen_Age
## Man :142 >70 :44 Min. :18.00 E_Low :108 M<=30 :33
## Woman:158 35_39 :35 1st Qu.:32.00 E_Medium:135 M31_55:62
## 25_29 :31 Median :43.00 E_High : 57 M>55 :47
## 30_34 :31 Mean :46.48 W<=30 :32
## 40_44 :30 3rd Qu.:62.00 W31_55:78
## 45_49 :30 Max. :90.00 W>55 :48
## (Other):99
## Gen_Edu Age_Edu Gen_Edu_Age Important
## M_EduLow :51 31_55_Medium:64 W_Med_31_55: 34 health : 29
## M_EduMed :64 >55_Low :57 M_Low_>55 : 30 my family : 27
## M_EduHigh:27 31_55_Low :50 M_Med_31_55: 30 good health: 23
## W_EduLow :57 <=30_Medium :45 W_Low_31_55: 29 family : 22
## W_EduMed :71 31_55_High :26 W_Low_>55 : 27 happiness : 16
## W_EduHigh:30 >55_Medium :26 M_Med_<=30 : 24 (Other) :181
## (Other) :32 (Other) :126 NA's : 2
## Relaunch
## family : 4
## good health : 4
## happiness : 4
## a good family life a happy family life you would say time murder no rapes nothing nasty no accidents no ill wealth those sort of things I can't really think of anymore: 1
## a good family life having true friendship : 1
## (Other) :274
## NA's : 12
res.TD<-TextData(open.question, var.text=c(9,10), remov.number=TRUE, Fmin=10, Dmin=10,
stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"),
context.quanti=c("Age"))
res.LexCA<-LexCA(res.TD, graph=FALSE,ncp=8)
ellipseLexCA(res.LexCA, selWord="meta 1",selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord="contrib 10",selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord=c("work","job","money","comfortable"), selDoc=NULL,
col.word="brown")

ellipseLexCA(res.LexCA, selWord="cos2 0.2", selDoc=NULL, col.word="brown")

res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Age", Fmin=10, Dmin=10,
remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)
ellipseLexCA(res.LexCA, selWord=NULL, col.doc="black")

ellipseLexCA(res.LexCA, selWord="meta 3", selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord="contrib 10", selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord=c("work","job","money","comfortable"), selDoc=NULL,
col.word="brown")

ellipseLexCA(res.LexCA, selWord="cos2 0.2", selDoc=NULL, col.word="brown")

data(open.question)
res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)
res.LexCHCca<-LexCHCca(res.LexCA, nb.clust=4, min=3)

res.LabelTree<-LabelTree(res.LexCHCca)
res.TD<-TextData(open.question, var.text=c(9,10), Fmin=10, Dmin=10,
remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, lmd=0, lmw=1)


res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, lmd=0, lmw=1)


res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)
res.ccah<-LexCHCca(res.LexCA, nb.clust=4, min=3)

res.TD<-TextData(open.question, var.text=c(9,10), Fmin=10, Dmin=10, stop.word.tm=TRUE,
context.quali=c("Gender","Age_Group","Education"), context.quanti=c("Age"))
res.LexCA<-LexCA(res.TD, graph=FALSE, ncp=8)
res.hcca<-LexHCca(res.LexCA, graph=TRUE, nb.clust=5, order=TRUE)


res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
remov.number=TRUE, stop.word.tm=TRUE)
res.CA <- LexCA(res.TD, graph=FALSE)
plot(res.CA, selDoc="contrib 30", selWord="coord 20")
res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Edu", Fmin=10, Dmin=10,
remov.number=TRUE, stop.word.tm=TRUE)
LD<-LexChar(res.TD,maxCharDoc = 0)
##
##
## CHARACTERISTIC WORDS
## (DETAILED INFORMATION)
##
## Group1: M_EduLow
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Word Intern % glob % Intern freq Glob freq p.value v.test
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## wife 1.178 0.469 7 19 0.02865 2.18822
## friends 0.168 1.038 1 42 0.02042 -2.318547
## peace 0.000 0.716 0 29 0.01972 -2.331662
##
## Group2: M_EduMed
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##
## Group3: M_EduHigh
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Word Intern % glob % Intern freq Glob freq p.value v.test
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## job 2.786 1.235 10 50 0.02175 2.294698
##
## Group4: W_EduLow
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##
## Group5: W_EduMed
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Word Intern % glob % Intern freq Glob freq p.value v.test
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## job 0.620 1.235 7 50 0.03192 -2.145466
## good 1.417 2.396 16 97 0.01174 -2.519859
##
## Group6: W_EduHigh
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Word Intern % glob % Intern freq Glob freq p.value v.test
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## husband 1.499 0.568 7 23 0.02446 2.24984



res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)
res.chcca<-LexCHCca(res.LexCA, nb.clust=4, min=3, graph=FALSE)
plot(res.chcca, choice="tree")

plot(res.chcca, choice="map")

plot(res.chcca, choice="bar", max.plot=5)

# Non aggregate analysis
res.TD<-TextData(open.question, var.text=c(9,10), remov.number=TRUE, Fmin=10, Dmin=10,
stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"), context.quanti=c("Age"))
plot(res.TD)


# Aggregate analysis
res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Age", remov.number=TRUE,
Fmin=10, Dmin=10, stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"),
context.quanti=c("Age"), segment=TRUE)
plot(res.TD)



res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD,lmd=0,lmw=1)


## **Results for CA and Aggregate Lexical Table (LexCA)**
## *The results are available in the following objects:
##
## name
## 1 "$eig"
## 2 "$row"
## 3 "$col"
## 4 "$row.sup"
## 5 "$col.sup"
## 6 "$quanti.sup"
## 7 "$quali.sup"
## 8 "$meta"
## 9 "$VCr"
## 10 "Inertia"
## 11 "segment"
## 12 "var.agg"
## description
## 1 "Eigenvalues and % of variance"
## 2 "CA results for the active documents/aggregate documents"
## 3 "CA results for active words"
## 4 "CA results for the supplementary documents/aggregate documents"
## 5 "CA results for supplementary words"
## 6 "CA results for the supplementary continuous variables"
## 7 "CA results for the supplementary categorical variables"
## 8 "list of Keydocs selected documents and Metakeys selected words"
## 9 "Cramer's V coefficient"
## 10 "total inertia"
## 11 "CA results for repeated segments"
## 12 "name of the aggregate variable used"
## [1] "All the results are in file "
# Non aggregate analysis
res.TD<-TextData(open.question, var.text=c(9,10), remov.number=TRUE, Fmin=10, Dmin=10,
stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"), context.quanti=c("Age"))
plot(res.TD)


# Aggregate analysis and repeated segments
res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Age", remov.number=TRUE,
Fmin=10, Dmin=10, stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"),
context.quanti=c("Age"), segment=TRUE)
plot(res.TD)


