legis=read.csv("123.csv" , stringsAsFactors = FALSE)
head(legis)
## id amend harmon extend lay_down establish expand com_prov member
## 1 2005/0214(COD) 0 0 0 0 0 0 0 NONE
## 2 2006/0084(COD) 0 0 0 0 0 0 0 NONE
## 3 2006/0167(COD) 0 0 0 0 0 0 0 NONE
## 4 2007/0229(COD) 0 0 0 0 0 0 0 NONE
## 5 2007/0286(COD) 0 0 0 0 0 0 0 NONE
## 6 2008/0009(COD) 0 0 0 0 0 0 0 NONE
names(legis)[names(legis) == "ï..procedure_ref1id"] <- "id"
legis_table <- table(legis$id, legis$member)
legis_table <- legis_table[,colSums(legis_table) > 0]
CA_legis=CA(legis_table) ## first quick and not too pretty of a plot
CA_legis$col
## $coord
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## com_prov 0.7211548 7.731987e-01 1.769154e+01 2.960563e+00 5.269865e+00
## establish 0.7211548 -3.781780e-01 -2.641957e-01 1.735698e+00 -1.471198e-01
## extend 0.7211548 -3.046382e-01 -2.707529e-01 -2.966657e+00 5.381034e+00
## lay_down 0.7211548 3.126331e-01 1.319246e+00 -1.014428e+00 -2.771346e+00
## NONE -1.3866649 1.856115e-14 -1.747287e-15 2.397944e-15 7.538484e-15
## amend 0.7211548 -1.912247e-01 -1.141510e-01 -4.771735e-01 2.492578e-01
## harmon 0.7211548 6.518065e+00 -5.942797e-01 5.096445e-01 4.410863e-01
##
## $contrib
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## com_prov 0.1098340 1.483507e-01 8.599776e+01 2.786374e+00 9.482421e+00
## establish 7.6883832 2.484265e+00 1.342472e+00 6.704050e+01 5.173218e-01
## extend 0.2745851 5.757264e-02 5.035494e-02 6.994640e+00 2.471677e+01
## lay_down 2.5261830 5.578341e-01 1.099855e+01 7.524220e+00 6.031558e+01
## NONE 65.7866948 1.384944e-26 1.358936e-28 2.961310e-28 3.143426e-27
## amend 22.6258134 1.869229e+00 7.375348e-01 1.491113e+01 4.370037e+00
## harmon 0.9885064 9.488275e+01 8.733334e-01 7.431353e-01 5.978742e-01
##
## $cos2
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## com_prov 0.001468589 1.688207e-03 8.838424e-01 2.475096e-02 7.842281e-02
## establish 0.129730886 3.567623e-02 1.741157e-02 7.515101e-01 5.399194e-03
## extend 0.004617047 8.239030e-04 6.508092e-04 7.813434e-02 2.570626e-01
## lay_down 0.041111641 7.726408e-03 1.375814e-01 8.134872e-02 6.071406e-01
## NONE 1.000000000 1.791706e-28 1.587762e-30 2.990440e-30 2.955459e-29
## amend 0.510069790 3.586415e-02 1.278004e-02 2.233188e-01 6.093549e-02
## harmon 0.011869040 9.696074e-01 8.060101e-03 5.927796e-03 4.440232e-03
##
## $inertia
## [1] 0.7478881 0.5926409 0.5947202 0.6144690 0.6578669 0.4435827 0.8328445
summary(CA_legis)
##
## Call:
## CA(X = legis_table)
##
## The chi square of independence between the two variables is equal to 4246.36 (p-value = 1 ).
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
## Variance 1.000 0.851 0.769 0.664 0.619 0.581
## % of var. 22.301 18.980 17.142 14.816 13.794 12.966
## Cumulative % of var. 22.301 41.282 58.424 73.239 87.034 100.000
##
## Rows (the 10 first)
## Iner*1000 Dim.1 ctr cos2 Dim.2 ctr
## 2005/0214(COD) | 2.030 | -1.387 0.203 1.000 | 0.000 0.000
## 2006/0084(COD) | 2.030 | -1.387 0.203 1.000 | 0.000 0.000
## 2006/0167(COD) | 2.030 | -1.387 0.203 1.000 | 0.000 0.000
## 2007/0112(COD) | 99.102 | 0.721 0.110 0.011 | -0.269 0.018
## 2007/0152(COD) | 198.944 | 0.721 0.055 0.003 | -0.330 0.014
## 2007/0229(COD) | 2.030 | -1.387 0.203 1.000 | 0.000 0.000
## 2007/0286(COD) | 2.030 | -1.387 0.203 1.000 | 0.000 0.000
## 2008/0009(COD) | 2.030 | -1.387 0.203 1.000 | 0.000 0.000
## 2008/0028(COD) | 1.371 | 0.721 0.055 0.400 | -0.207 0.005
## 2008/0062(COD) | 2.030 | -1.387 0.203 1.000 | 0.000 0.000
## cos2 Dim.3 ctr cos2
## 2005/0214(COD) 0.000 | 0.000 0.000 0.000 |
## 2006/0084(COD) 0.000 | 0.000 0.000 0.000 |
## 2006/0167(COD) 0.000 | 0.000 0.000 0.000 |
## 2007/0112(COD) 0.002 | -0.220 0.013 0.001 |
## 2007/0152(COD) 0.001 | -0.309 0.013 0.001 |
## 2007/0229(COD) 0.000 | 0.000 0.000 0.000 |
## 2007/0286(COD) 0.000 | 0.000 0.000 0.000 |
## 2008/0009(COD) 0.000 | 0.000 0.000 0.000 |
## 2008/0028(COD) 0.033 | -0.130 0.002 0.013 |
## 2008/0062(COD) 0.000 | 0.000 0.000 0.000 |
##
## Columns
## Iner*1000 Dim.1 ctr cos2 Dim.2 ctr
## com_prov | 747.888 | 0.721 0.110 0.001 | 0.773 0.148
## establish | 592.641 | 0.721 7.688 0.130 | -0.378 2.484
## extend | 594.720 | 0.721 0.275 0.005 | -0.305 0.058
## lay_down | 614.469 | 0.721 2.526 0.041 | 0.313 0.558
## NONE | 657.867 | -1.387 65.787 1.000 | 0.000 0.000
## amend | 443.583 | 0.721 22.626 0.510 | -0.191 1.869
## harmon | 832.844 | 0.721 0.989 0.012 | 6.518 94.883
## cos2 Dim.3 ctr cos2
## com_prov 0.002 | 17.692 85.998 0.884 |
## establish 0.036 | -0.264 1.342 0.017 |
## extend 0.001 | -0.271 0.050 0.001 |
## lay_down 0.008 | 1.319 10.999 0.138 |
## NONE 0.000 | 0.000 0.000 0.000 |
## amend 0.036 | -0.114 0.738 0.013 |
## harmon 0.970 | -0.594 0.873 0.008 |
#The results of the correspondence analysis are clearly not the best (see Chi2 and p), but we are using it for identification of the cases rather than establishing relationship.
#Graphs below summarize the results. We essentialluy want to pick up the cases that are further away from the zero. THose procedures that are concetrated around Zero would be the ‘usual’ ones, whilst the further away the procedure is from the overlap of the dimensions the
#plot(CA_legis, asp=1:1, selectCol = 'contrib 5', fig.width = 25 ) ## quite an ugly plot
#if you run the chunk below, you can check 20 procedures and 4 columns that contribute most to the 2 dimensions
#plot(CA_legis, asp=0.5, selectRow = 'contrib 20' , selectCol = 'contrib 2' )
#Checking 20 Procedures and 4 columns that contribute the most to the the 2 dimentions
#plot(CA_legis, asp=0.5, selectRow = 'contrib 20' , selectCol = 'contrib 4' )
##Extract the eigenvalues/variances retained by each dimension (axis)
#Eigenvalues correspond to the amount of information retained by each axis. Dimensions are ordered decreasingly and listed #according to the amount of variance explained in the solution. Dimension 1 explains the most variance in the solution, #followed by dimension 2 and so on.
# any axis with the eigenvalues above 1/(nrow(legis)-1) ==> 1/946=0.001 (aka 0.1%) for columns and 1/8=0.125 (12.5%) should #be considered important and considered in the data-interpretation (see M. T. Bendixen 1995)
get_eigenvalue(CA_legis)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 1.0000000 22.30146 22.30146
## Dim.2 0.8510843 18.98042 41.28187
## Dim.3 0.7686423 17.14184 58.42372
## Dim.4 0.6643382 14.81571 73.23943
## Dim.5 0.6185286 13.79409 87.03351
## Dim.6 0.5814188 12.96649 100.00000
fviz_eig(CA_legis, addlabels = TRUE, ylim = c(0, 35))
##biplot of rows and columns variables///** The distance between any row points ##or column points gives a measure of their similarity (or dissimilarity). ##Row points with similar profile are closed on the factor map. The same holds true for column points.
fviz_ca_biplot(CA_legis) ## this may take a bit of time coz there are quite a few points// repel==> avoid overlap, but we have too many rows to use t
Let’s add some color: ##variables with low cos2 values will be colored in “white” #variables with mid cos2 values will be colored in “blue” #variables with high cos2 values will be colored in red
fviz_ca_row(CA_legis, col.row = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = FALSE)
#contribution of the procedures to the dimensions==> #Rows that contribute the most to Dim.1 and Dim.2 are the most important in explaining the variability in the data set. #Rows that do not contribute much to any dimension or that contribute to the last dimensions are less important.
#Rows that do not contribute much to any dimension or that contribute to the last dimensions are less important.
head(CA_legis$row$contrib)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## 2005/0214(COD) 0.20304535 5.836379e-29 1.868973e-29 2.878310e-29 1.741468e-28
## 2006/0084(COD) 0.20304535 3.028389e-28 1.112909e-31 9.291087e-29 1.301363e-30
## 2006/0167(COD) 0.20304535 4.740962e-29 2.233700e-29 2.713802e-28 1.423427e-28
## 2007/0112(COD) 0.10983405 1.792248e-02 1.323964e-02 1.418812e+00 4.374841e+00
## 2007/0152(COD) 0.05491702 1.352924e-02 1.310231e-02 2.105747e+00 7.992118e+00
## 2007/0229(COD) 0.20304535 2.299050e-26 2.220336e-28 2.134680e-28 1.369183e-28
## colored plot for columns
fviz_ca_col(CA_legis, col.col = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)
#Bar plot for columns #A cos2 is [0,1] & closer to 1 corresponds to a column/row variables that are well represented on the factor map.
fviz_cos2(CA_legis, choice = "col", axes = 1:2)
##contribution of rows on the first two dimensions
fviz_contrib(CA_legis, choice = "col", axes = 1:2)
##biplot for the CA. NOTA BENE: here one can interpret only the distance between the row’s points #or columns. BUT not inter-disctance
fviz_ca_biplot(CA_legis, repel = FALSE)
##If the angle between two arrows is acute, then their is a strong association between the corresponding row and column. #To interpret the distance between rows and and a column you should perpendicularly project row points on the column arrow
fviz_ca_biplot(CA_legis,
map ="rowprincipal", arrow = c(TRUE, TRUE),
selectRow = 'contrib 20' ,
repel =FALSE)
#Contribution plot to see which points are important for the interpretation #In this display, points that contribute very little to the solution, are close to the center
#of the biplot and are relatively unimportant to the interpretation.
fviz_ca_biplot(CA_legis, map ="colgreen", arrow = c(TRUE, FALSE),
title = "CA - Biplot", col.row='blue',
# select.row = list(cos2 = 0.6), <= specifying this allows to see the quality of the rows on the factor map
select.row = list(contrib =20),
select.col = list(contrib = 5),
repel = FALSE)
##description of the dimensions
cal <- dimdesc(CA_legis, axes = c(1,2))
head(cal[[1]]$col, 10)
## coord
## NONE -1.3866649
## lay_down 0.7211548
## amend 0.7211548
## establish 0.7211548
## com_prov 0.7211548
## extend 0.7211548
## harmon 0.7211548
#export the bar-plot with eigenvalues and the biplot into a sibgle pdf file
scree.plot <- fviz_eig(CA_legis)
# Biplot of row and column variables
biplot.ca <- fviz_ca_biplot(CA_legis)
ggexport(plotlist = list(scree.plot, biplot.ca),
filename = "CA.pdf")
## file saved to CA.pdf