Xplortext

library(Xplortext)

data(open.question)
head(open.question)

##   Gender Age_Group Age Education Gen_Age  Gen_Edu      Age_Edu Gen_Edu_Age
## 1    Man       >70  80     E_Low    M>55 M_EduLow      >55_Low   M_Low_>55
## 2    Man     50_54  54     E_Low  M31_55 M_EduLow    31_55_Low M_Low_31_55
## 3    Man     40_44  40     E_Low  M31_55 M_EduLow    31_55_Low M_Low_31_55
## 4  Woman     25_29  27  E_Medium   W<=30 W_EduMed  <=30_Medium  W_Med_<=30
## 5  Woman     35_39  39  E_Medium  W31_55 W_EduMed 31_55_Medium W_Med_31_55
## 6    Man       >70  80     E_Low    M>55 M_EduLow      >55_Low   M_Low_>55
##                                                            Important
## 1                                                        good health
## 2 happiness in people around me contented family would make me happy
## 3                                                        contentment
## 4                                                             health
## 5                                                        to be happy
## 6                                                            my wife
##                                             Relaunch
## 1                                          happiness
## 2                     contented with life as a whole
## 3                                             family
## 4                             happiness money family
## 5 healthy have enough to eat enough money to live on
## 6                        music holidays  good health

dim(open.question)

## [1] 300  10

str(open.question)

## 'data.frame':    300 obs. of  10 variables:
##  $ Gender     : Factor w/ 2 levels "Man","Woman": 1 1 1 2 2 1 2 2 2 2 ...
##  $ Age_Group  : Factor w/ 12 levels "18_19","20_24",..: 12 8 6 3 5 12 7 4 10 11 ...
##  $ Age        : int  80 54 40 27 39 80 46 33 64 65 ...
##  $ Education  : Factor w/ 3 levels "E_Low","E_Medium",..: 1 1 1 2 2 1 2 1 1 1 ...
##  $ Gen_Age    : Ord.factor w/ 6 levels "M<=30"<"M31_55"<..: 3 2 2 4 5 3 5 5 6 6 ...
##  $ Gen_Edu    : Ord.factor w/ 6 levels "M_EduLow"<"M_EduMed"<..: 1 1 1 5 5 1 5 4 4 4 ...
##  $ Age_Edu    : Ord.factor w/ 9 levels "<=30_Low"<"<=30_Medium"<..: 7 4 4 2 5 7 5 4 7 7 ...
##  $ Gen_Edu_Age: Ord.factor w/ 17 levels "M_Low_31_55"<..: 2 1 1 12 13 2 13 10 11 11 ...
##  $ Important  : Factor w/ 151 levels "a happy home life",..: 39 49 17 57 133 108 57 135 57 63 ...
##  $ Relaunch   : Factor w/ 279 levels "a good family life a happy family life you would say time murder no rapes nothing nasty no accidents no ill wea"| __truncated__,..: 86 25 36 90 130 170 86 146 147 221 ...

summary(open.question)

##    Gender      Age_Group       Age           Education     Gen_Age  
##  Man  :142   >70    :44   Min.   :18.00   E_Low   :108   M<=30 :33  
##  Woman:158   35_39  :35   1st Qu.:32.00   E_Medium:135   M31_55:62  
##              25_29  :31   Median :43.00   E_High  : 57   M>55  :47  
##              30_34  :31   Mean   :46.48                  W<=30 :32  
##              40_44  :30   3rd Qu.:62.00                  W31_55:78  
##              45_49  :30   Max.   :90.00                  W>55  :48  
##              (Other):99                                             
##       Gen_Edu           Age_Edu        Gen_Edu_Age        Important  
##  M_EduLow :51   31_55_Medium:64   W_Med_31_55: 34   health     : 29  
##  M_EduMed :64   >55_Low     :57   M_Low_>55  : 30   my family  : 27  
##  M_EduHigh:27   31_55_Low   :50   M_Med_31_55: 30   good health: 23  
##  W_EduLow :57   <=30_Medium :45   W_Low_31_55: 29   family     : 22  
##  W_EduMed :71   31_55_High  :26   W_Low_>55  : 27   happiness  : 16  
##  W_EduHigh:30   >55_Medium  :26   M_Med_<=30 : 24   (Other)    :181  
##                 (Other)     :32   (Other)    :126   NA's       :  2  
##                                                                                                                                                                     Relaunch  
##  family                                                                                                                                                                 :  4  
##  good health                                                                                                                                                            :  4  
##  happiness                                                                                                                                                              :  4  
##  a good family life a happy family life you would say time murder no rapes nothing nasty no accidents no ill wealth those sort of things I can't really think of anymore:  1  
##  a good family life having true friendship                                                                                                                              :  1  
##  (Other)                                                                                                                                                                :274  
##  NA's                                                                                                                                                                   : 12

res.TD<-TextData(open.question, var.text=c(9,10), remov.number=TRUE, Fmin=10, Dmin=10,
                 stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"),
                 context.quanti=c("Age"))

res.LexCA<-LexCA(res.TD, graph=FALSE,ncp=8)

ellipseLexCA(res.LexCA, selWord="meta 1",selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord="contrib 10",selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord=c("work","job","money","comfortable"), selDoc=NULL,
             col.word="brown")

ellipseLexCA(res.LexCA, selWord="cos2 0.2", selDoc=NULL, col.word="brown")

res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Age", Fmin=10, Dmin=10,
                 remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)
ellipseLexCA(res.LexCA, selWord=NULL, col.doc="black")

ellipseLexCA(res.LexCA, selWord="meta 3", selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord="contrib 10", selDoc=NULL, col.word="brown")

ellipseLexCA(res.LexCA, selWord=c("work","job","money","comfortable"), selDoc=NULL,
             col.word="brown")

ellipseLexCA(res.LexCA, selWord="cos2 0.2", selDoc=NULL, col.word="brown")

data(open.question)
res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
                 stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)
res.LexCHCca<-LexCHCca(res.LexCA, nb.clust=4, min=3)

res.LabelTree<-LabelTree(res.LexCHCca)



res.TD<-TextData(open.question, var.text=c(9,10), Fmin=10, Dmin=10,
                 remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, lmd=0, lmw=1)

res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
                 remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, lmd=0, lmw=1)

res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
                 stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)
res.ccah<-LexCHCca(res.LexCA, nb.clust=4, min=3)

res.TD<-TextData(open.question, var.text=c(9,10), Fmin=10, Dmin=10, stop.word.tm=TRUE,
                 context.quali=c("Gender","Age_Group","Education"), context.quanti=c("Age"))
res.LexCA<-LexCA(res.TD, graph=FALSE, ncp=8)
res.hcca<-LexHCca(res.LexCA, graph=TRUE, nb.clust=5, order=TRUE)

res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
                 remov.number=TRUE, stop.word.tm=TRUE)
res.CA <- LexCA(res.TD, graph=FALSE)
plot(res.CA, selDoc="contrib 30", selWord="coord 20")

res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Edu", Fmin=10, Dmin=10,
                 remov.number=TRUE, stop.word.tm=TRUE)
LD<-LexChar(res.TD,maxCharDoc = 0)

## 
## 
## CHARACTERISTIC WORDS
## (DETAILED INFORMATION)
## 
## Group1: M_EduLow
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##       Word          Intern %  glob % Intern freq Glob freq  p.value    v.test 
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##                 wife   1.178   0.469          7         19  0.02865     2.18822
##              friends   0.168   1.038          1         42  0.02042   -2.318547
##                peace   0.000   0.716          0         29  0.01972   -2.331662
## 
## Group2: M_EduMed
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## 
## Group3: M_EduHigh
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##       Word          Intern %  glob % Intern freq Glob freq  p.value    v.test 
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##                  job   2.786   1.235         10         50  0.02175    2.294698
## 
## Group4: W_EduLow
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## 
## Group5: W_EduMed
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##       Word          Intern %  glob % Intern freq Glob freq  p.value    v.test 
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##                  job   0.620   1.235          7         50  0.03192   -2.145466
##                 good   1.417   2.396         16         97  0.01174   -2.519859
## 
## Group6: W_EduHigh
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##       Word          Intern %  glob % Intern freq Glob freq  p.value    v.test 
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
##              husband   1.499   0.568          7         23  0.02446     2.24984

plot(LD)

res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
                 stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD, graph=FALSE)


res.chcca<-LexCHCca(res.LexCA, nb.clust=4, min=3, graph=FALSE)
plot(res.chcca, choice="tree")

plot(res.chcca, choice="map")

plot(res.chcca, choice="bar", max.plot=5)

# Non aggregate analysis
res.TD<-TextData(open.question, var.text=c(9,10), remov.number=TRUE, Fmin=10, Dmin=10,
                 stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"), context.quanti=c("Age"))
plot(res.TD)

# Aggregate analysis
res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Age", remov.number=TRUE,
                 Fmin=10, Dmin=10, stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"),
                 context.quanti=c("Age"), segment=TRUE)
plot(res.TD)

res.TD<-TextData(open.question,var.text=c(9,10), var.agg="Age_Group", Fmin=10, Dmin=10,
                 remov.number=TRUE, stop.word.tm=TRUE)
res.LexCA<-LexCA(res.TD,lmd=0,lmw=1)

print(res.LexCA)

## **Results for CA and Aggregate Lexical Table (LexCA)**
## *The results are available in the following objects:
## 
##    name         
## 1  "$eig"       
## 2  "$row"       
## 3  "$col"       
## 4  "$row.sup"   
## 5  "$col.sup"   
## 6  "$quanti.sup"
## 7  "$quali.sup" 
## 8  "$meta"      
## 9  "$VCr"       
## 10 "Inertia"    
## 11 "segment"    
## 12 "var.agg"    
##    description                                                     
## 1  "Eigenvalues and % of variance"                                 
## 2  "CA results for the active documents/aggregate documents"       
## 3  "CA results for active words"                                   
## 4  "CA results for the supplementary documents/aggregate documents"
## 5  "CA results for supplementary words"                            
## 6  "CA results for the supplementary continuous variables"         
## 7  "CA results for the supplementary categorical variables"        
## 8  "list of Keydocs selected documents and Metakeys selected words"
## 9  "Cramer's V coefficient"                                        
## 10 "total inertia"                                                 
## 11 "CA results for repeated segments"                              
## 12 "name of the aggregate variable used"                           
## [1] "All the results are in file "

# Non aggregate analysis
res.TD<-TextData(open.question, var.text=c(9,10), remov.number=TRUE, Fmin=10, Dmin=10,
                 stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"), context.quanti=c("Age"))
plot(res.TD)

# Aggregate analysis and repeated segments
res.TD<-TextData(open.question, var.text=c(9,10), var.agg="Gen_Age", remov.number=TRUE,
                 Fmin=10, Dmin=10, stop.word.tm=TRUE, context.quali=c("Gender","Age_Group","Education"),
                 context.quanti=c("Age"), segment=TRUE)
plot(res.TD)

Xplortext 001

Subasish Das

Xplortext