rDNA

rDNa package workshop

This is an RrDNA notebook of coding and explanations given by prof Philip Leifeld in a workshop at University of Essex in 2021.

rDNA allows to manipulate and enhance the functionalities of DNA. They must be installed in the same folder and will communicate to each other.

Library

#installing packages
#install.packages("ggraph")
#install.packages("MCMCpack")
#install.packages("rem")
#install.packages("~/dna/rDNA_2.1.18.tar.gz", repos = NULL, type = "source")

#load the library
library(rDNA)

## Version:      2.1.18
## Date:         2019-09-08
## Author:       Philip Leifeld  (University of Essex)
## Contributors: Johannes B. Gruber (University of Glasgow),
##               Tim Henrichsen  (Scuola superiore Sant'Anna Pisa)
## Project home: github.com/leifeld/dna

#to get help
help(package="rDNA")

#seed
set.seed(12345)

#initialize the connection to the .jar DNA
dna_init()

## Jar file: /Users/rubensyanes/dna/dna-2.0-beta25.jar

#initialize the console and open a file
dna_gui("sample.dna")

## To return to R, close the DNA window when done.

#initialize a connection in order to manipulate a file
conn <- dna_connection("sample.dna")

## Data loaded: 35 statements and 9 documents.
## DNA database: /Users/rubensyanes/dna/sample.dna
## 35 statements in 9 documents
## Statement types: DNA Statement, Annotation

conn

## DNA database: /Users/rubensyanes/dna/sample.dna
## 35 statements in 9 documents
## Statement types: DNA Statement, Annotation

#create a network, we can define the whole attributes 
#used in the export screen of DNA
#check help(dna_network) for more information
nw <- dna_network(conn)

## (1/5): Processing network options... Done.
## (2/5): Filtering statements...
##        34 out of 35 statements retained.
## (3/5): Compiling node labels... 
##        6 entries for the first and 6 entries for the second variable.
## (4/5): Computing network matrix... Done.
## (5/5): Retrieving results.

#to view the matrix 
View(nw)

#plot the network
dna_plotNetwork(nw)

## Using `bipartite` as default layout

Example fo a congruence network

#creating a congruence network
congruence <- dna_network(conn, 
                          networkType = "onemode",
                         qualifierAggregation = "congruence",
                         duplicates = "document",
                         excludeValues = list(concept = 
                                                "There should be legislation to regulate emissions."))

## (1/5): Processing network options... Done.
## (2/5): Filtering statements...
##        [Excluded] concept: There should be legislation to regulate emissions.
##        14 out of 35 statements retained.
## (3/5): Compiling node labels... 
##        6 entries for the first and 5 entries for the second variable.
## (4/5): Computing network matrix... Done.
## (5/5): Retrieving results.

#plot the congruence network
dna_plotNetwork(congruence)

## Using "stress" as default layout

#we can use different layouts
dna_plotNetwork(congruence, layout = "kk")

dna_plotNetwork(congruence, layout = "drl")

dna_plotNetwork(congruence, layout = "mds")

#and edit them
dna_plotNetwork(congruence, 
                layout = "mds",
                node_size = 8,
                edge_size_range = c(1,5),
                font_size = 8,
                truncate = 80)

Affiliation network

#create an affiliation network
affiliation <- dna_network(conn,
                           networkType = "twomode",
                           qualifierAggregation = "combine",
                           duplicates = "document",
                           excludeValues = list(agreement = 0))

## (1/5): Processing network options... Done.
## (2/5): Filtering statements...
##        [Excluded] agreement: 0
##        11 out of 35 statements retained.
## (3/5): Compiling node labels... 
##        5 entries for the first and 6 entries for the second variable.
## (4/5): Computing network matrix... 
##        An edge weight of 0 maps onto integer combination: 
##        An edge weight of 1 maps onto integer combination: 1 
##        Done.
## (5/5): Retrieving results.

#plot the network
dna_plotNetwork(affiliation,
                layout = "fr",
                node_size = 8,
                edge_size_range = c(1,4),
                font_size = 8,
                truncate = 80)

Plotting network using statnet

Using another library called statnet it is also possible to draw networks

#getting the library
library(statnet)

## Loading required package: tergm

## Loading required package: ergm

## Loading required package: network

## 
## 'network' 1.18.0 (2022-10-05), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information

## 
## 'ergm' 4.2.3 (2022-10-02), part of the Statnet Project
## * 'news(package="ergm")' for changes since last version
## * 'citation("ergm")' for citation information
## * 'https://statnet.org' for help, support, and other information

## 'ergm' 4 is a major update that introduces some backwards-incompatible
## changes. Please type 'news(package="ergm")' for a list of major
## changes.

## Loading required package: networkDynamic

## 
## 'networkDynamic' 0.11.2 (2022-05-04), part of the Statnet Project
## * 'news(package="networkDynamic")' for changes since last version
## * 'citation("networkDynamic")' for citation information
## * 'https://statnet.org' for help, support, and other information

## Registered S3 method overwritten by 'tergm':
##   method                   from
##   simulate_formula.network ergm

## 
## 'tergm' 4.1.0 (2022-06-22), part of the Statnet Project
## * 'news(package="tergm")' for changes since last version
## * 'citation("tergm")' for citation information
## * 'https://statnet.org' for help, support, and other information

## 
## Attaching package: 'tergm'

## The following object is masked from 'package:ergm':
## 
##     snctrl

## Loading required package: ergm.count

## 
## 'ergm.count' 4.1.1 (2022-05-24), part of the Statnet Project
## * 'news(package="ergm.count")' for changes since last version
## * 'citation("ergm.count")' for citation information
## * 'https://statnet.org' for help, support, and other information

## Loading required package: sna

## Loading required package: statnet.common

## 
## Attaching package: 'statnet.common'

## The following object is masked from 'package:ergm':
## 
##     snctrl

## The following objects are masked from 'package:base':
## 
##     attr, order

## sna: Tools for Social Network Analysis
## Version 2.7 created on 2022-05-09.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.

## Loading required package: tsna

## 
## 'statnet' 2019.6 (2019-06-13), part of the Statnet Project
## * 'news(package="statnet")' for changes since last version
## * 'citation("statnet")' for citation information
## * 'https://statnet.org' for help, support, and other information

## unable to reach CRAN

# creating the network
nw2 <- network(congruence)

#plotting the network
plot(nw2,
     edge.lwd = congruence^2,
     displaylabels = TRUE,
     usearrows = FALSE,
     edge.col = "gray"
     )

Attributes

All what can be done in DNA, can be done in R and put back. Here is the example using attributes. WE can retrieve the attribute table, modify it and put it back using functions such as

dna_getAttributes
dna_setAttributes
and fix()

#retrieve the matadata of all atrributes available
at <-dna_getAttributes(conn,
                       statementType = "DNA Statement",
                       variable = "organization")

View(at)


#the attributes can be modified and then put back at DNA using the command
# dna_setAttributes
?dna_setAttributes

Different graphs

Hive plots: A different form of presenting data not available in visone. It allows to see the ties among actors grouped by type.

dna_plotHive(congruence, sort_by = "frequency")

Clustering and dendograms:

There are several forms of clustering and showing the clusters that are not available in visone or other software packages.

#clustering the data
clust <- dna_cluster(conn)

#plotting a basic dendogram
plot(clust)

#using dna plotting dendogram option
dna_plotDendro(clust)

#other option when you defined the clustering method
clust2<- dna_cluster(conn,
                     clust.method = "edge_betweenness")
#plot it
dna_plotDendro(clust2)

#another type of clustering
clust3 <- dna_cluster(conn,
                      duplicates = "acrossrange",
                      attribute1 = "type",
                      cutree.k = 2,
                      excludeValues = list("concept" = 
                                                "There should be legislation to regulate emissions."))

## Warning: In factor analysis:  Error in solve.default(cv): Lapack routine dgesv: system is exactly singular: U[4,4] = 0

## Warning in type.convert.default(unlist(x, use.names = FALSE)): 'as.is' should be
## specified by the caller; using TRUE

#plotting it
dna_plotDendro(clust3, 
               shape = "diagonal",
               colors = "brewer",
               rectangles = "red")

Heatmaps

Another form of clustering that allow to create clusters in the two axis (actors and concepts), and shows the intensity of the relationship (agreement)

#heatmaps, we first make the clustering
heatmap <- dna_cluster(conn)

#plotting
dna_plotHeatmap(heatmap)

## Warning: Position guide is perpendicular to the intended axis. Did you mean to
## specify a different guide `position`?

## Warning: guide_axis(): Discarding guide on merge. Do you have more than one guide with the same position?
## guide_axis(): Discarding guide on merge. Do you have more than one guide with the same position?

#another cluster
clust4 <- dna_cluster(conn,
                      duplicates = "include",
                      clust.method = "walktrap",
                      excludeValues = list("concept" = "There should be legislation to regulate emissions."))

## Warning: In factor analysis:  Error in solve.default(cv): Lapack routine dgesv: system is exactly singular: U[6,6] = 0

dna_plotHeatmap(clust4,
                values = TRUE,
                truncate = 20,
                colours="brewer",
                custom_colors = "YlOrRd",
                dendro_x = FALSE,
                dendro_y_size = 0.4,
                show_legend = FALSE)

## Warning in dna_plotHeatmap(clust4, values = TRUE, truncate = 20, colours =
## "brewer", : After truncation, some column labels are now exactly the same.Those
## are followed by # + number now. Consider increasing the 'truncation' value.

## Warning: Position guide is perpendicular to the intended axis. Did you mean to
## specify a different guide `position`?

## Warning: guide_axis(): Discarding guide on merge. Do you have more than one guide with the same position?
## guide_axis(): Discarding guide on merge. Do you have more than one guide with the same position?

#Multidimensional scaling MDS
clust5 <- dna_cluster(conn, cutree.k = 2)

#plot 
dna_plotCoordinates(clust5, label = TRUE, clust_method = "inherit")

Other graphs

#bar
dna_barplot(conn)

#frequency
dna_plotFrequency(conn)

Scaling

Scale ideological positions of two variables (e.g., organizations and concepts) from a DNA connection by using Markov Chain Monte Carlo for binary one-dimensional Item Response .

For one-dimensional ordinal scaling, see dna_scale1dord, for two-dimensional binary scaling, see dna_scale2dbin and for two-dimensional ordinal scaling dna_scale2dord.

#scaling
scale1d <- dna_scale1dbin(conn)

## 
## 
## MCMCirt1d iteration 1 of 21000 
## 
## 
## MCMCirt1d iteration 2101 of 21000 
## 
## 
## MCMCirt1d iteration 4201 of 21000 
## 
## 
## MCMCirt1d iteration 6301 of 21000 
## 
## 
## MCMCirt1d iteration 8401 of 21000 
## 
## 
## MCMCirt1d iteration 10501 of 21000 
## 
## 
## MCMCirt1d iteration 12601 of 21000 
## 
## 
## MCMCirt1d iteration 14701 of 21000 
## 
## 
## MCMCirt1d iteration 16801 of 21000 
## 
## 
## MCMCirt1d iteration 18901 of 21000

#plotting
dna_plotScale(scale1d)

dna_plotScale(scale1d, variable = "concept")

#one-dimensional ordinal scaling
scale1d <- dna_scale1dord(conn)

## 
## 
## MCMCordfactanal iteration 1 of 21000 
## Lambda = 
##   -1.90932   1.69800
##   -0.78973   0.20243
##   -0.02646   0.17979
##    0.51661   0.40185
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  1.00  1.00
## 
## MCMCordfactanal iteration 2101 of 21000 
## Lambda = 
##   -0.82640  -0.32900
##   -0.43979  -0.55998
##    0.32408  -0.18547
##    0.89479  -0.19001
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 4201 of 21000 
## Lambda = 
##   -1.05071  -0.92448
##    1.61505   0.68242
##    0.24747  -0.71510
##    0.77408  -0.32480
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 6301 of 21000 
## Lambda = 
##   -0.36149  -1.01515
##   -1.96538  -0.24338
##    0.08994  -0.29617
##    0.03515  -0.04422
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 8401 of 21000 
## Lambda = 
##   -0.33139  -0.47593
##   -0.48130  -0.26439
##   -0.32258  -0.40658
##    1.34756   1.31353
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 10501 of 21000 
## Lambda = 
##    0.68501  -1.06066
##   -1.12624  -0.48182
##    0.98543  -1.06516
##   -0.19771   1.24103
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 12601 of 21000 
## Lambda = 
##   -1.39500   0.54184
##   -1.69035  -0.27513
##    0.65361  -1.18230
##    0.50229  -0.25786
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 14701 of 21000 
## Lambda = 
##   -1.68453   0.77078
##   -0.43467  -0.26561
##    0.25221   0.78360
##    0.97104   1.99177
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 16801 of 21000 
## Lambda = 
##   -0.51913  -0.28967
##    0.02508   1.50117
##    1.24018  -0.96064
##   -0.41210   0.28211
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## MCMCordfactanal iteration 18901 of 21000 
## Lambda = 
##   -0.13022   0.78629
##   -0.51323   0.17091
##    0.53120   1.82675
##    0.78030  -0.61750
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.80  1.00
## 
## Acceptance rates:
##  CO2 legislation will not hurt the economy.
##                                           1
##  Climate change is caused by greenhouse gases (CO2).
##                                                    1
##  Emissions legislation should regulate CO2.
##                                         0.8
##  There should be legislation to regulate emissions.
##                                                   1

#plot
dna_plotScale(scale1d)

dna_plotScale(scale1d, hpd =NULL)

#2d scale
scale2d <- dna_scale2dord(conn)

## 
## 
## MCMCordfactanal iteration 1 of 21000 
## Lambda = 
##   -1.95065  -0.87668   0.21744
##   -1.35127  -0.53513   0.26759
##    1.79359   0.97417  -0.23250
##    2.18443   0.19638  -0.92037
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  1.00  1.00
## 
## MCMCordfactanal iteration 2101 of 21000 
## Lambda = 
##   -4.03786  -1.25312   4.96453
##   -3.45282   0.81212  -2.79325
##    1.37119  -5.80588   8.34958
##    2.16185  -6.30302   0.88738
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.88  1.00
## 
## MCMCordfactanal iteration 4201 of 21000 
## Lambda = 
##   -0.12282   2.03901   2.42270
##   -0.57414   1.83418  -0.89488
##    5.25362  -0.06082   6.69390
##    4.80210  -5.45602   1.94710
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.89  1.00
## 
## MCMCordfactanal iteration 6301 of 21000 
## Lambda = 
##   -2.87432   3.67372   0.51369
##    0.73254  -4.52150   2.03549
##    1.39584   3.33188  -0.68912
##    1.95929   3.45435  -0.36147
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.89  1.00
## 
## MCMCordfactanal iteration 8401 of 21000 
## Lambda = 
##   -0.15157   2.94249  -2.13297
##   -4.96574   0.50342   1.07052
##    1.55790   3.92720  -0.74218
##    1.06475   0.44896  -0.03379
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.89  1.00
## 
## MCMCordfactanal iteration 10501 of 21000 
## Lambda = 
##    0.28460   4.37527   2.62658
##    0.50142  -0.44468   1.35233
##    3.38401   2.07784   1.57321
##    4.29147  -0.42876  -4.07342
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.89  1.00
## 
## MCMCordfactanal iteration 12601 of 21000 
## Lambda = 
##   -3.50096   1.11706   4.88286
##    0.34934  -3.83316  -4.09778
##   -1.07734   2.67181  -1.01241
##   -0.42818  -3.89934   6.20184
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.89  1.00
## 
## MCMCordfactanal iteration 14701 of 21000 
## Lambda = 
##   -0.90220  -0.63824  -1.06059
##   -2.28516   1.58948   3.65930
##    5.87839  -5.43868  -1.00527
##    1.16825  -1.45921   2.38573
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.90  1.00
## 
## MCMCordfactanal iteration 16801 of 21000 
## Lambda = 
##   -1.03125   6.68265   4.65284
##    1.17927  -1.27841  -1.71156
##    4.30052  -2.06077   3.81571
##    4.54336   4.83306   1.82446
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.90  1.00
## 
## MCMCordfactanal iteration 18901 of 21000 
## Lambda = 
##   -6.49189  -4.78595  -2.59675
##   -1.32752   2.17509   3.61072
##    0.66218  -4.37775  -1.60058
##    3.75221  -2.27873   8.71540
## 
## Metropolis-Hastings acceptance rates = 
##   1.00  1.00  0.90  1.00
## 
## Acceptance rates:
##  CO2 legislation will not hurt the economy.
##                                           1
##  Climate change is caused by greenhouse gases (CO2).
##                                                    1
##  Emissions legislation should regulate CO2.
##                                        0.89
##  There should be legislation to regulate emissions.
##                                                   1

#plot
dna_plotScale(scale2d, dimensions = 2, hpd = NULL, label_size = 2)

Document management

As we did before with attributes, we also can retrieve documents, change them and upload it again

dna_getDocuments, to retrieve them
dna_setDocuments, to save them in the file again

#create a df with all the documents in a DNA file
documents <- dna_getDocuments(conn)

#see the structure
str(documents)

## Classes 'dna_dataframe' and 'data.frame':    9 obs. of  10 variables:
##  $ id     : int  1 2 3 4 5 6 7 9 8
##  $ title  : chr  "999-999: Bluestein, Joel-BUS-Y" "109-867: Voinovich, George-CON-R-Y" "109-867: Whitman, Christine Todd-GOV-N" "109-1: Callahan, Kateri-NGO-Y" ...
##  $ text   : chr  "\nTestimony at hearing on Powerplant multipollutant legislation before Subcommittee on Clean Air, Climate Chang"| __truncated__ "\nOPENING STATEMENT OF HON. GEORGE V. VOINOVICH, U.S. SENATOR FROM THE STATE OF OHIO\n\nSenator Voinovich. Than"| __truncated__ "\nStatement of Hon. Christine Todd Whitman, Administrator, U.S. Evironmental Protection Agency, April 8, 2003\n"| __truncated__ "\nSTATEMENT OF KATERI CALLAHAN\n\nMs. Callahan. Thank you, Mr. Chairman. My name is Kateri Callahan, and I serv"| __truncated__ ...
##  $ coder  : int  2 2 2 2 2 2 2 1 2
##  $ author : chr  "Bluestein, Joel" "Voinovich, George" "Whitman, Christine Todd" "Callahan, Kateri" ...
##  $ source : chr  "109" "109" "109" "109" ...
##  $ section: chr  "876" "867" "867" "1" ...
##  $ notes  : chr  "" "" "This is a note" "" ...
##  $ type   : chr  "" "" "" "" ...
##  $ date   : POSIXct, format: "2005-01-25 21:00:00" "2005-02-01 21:00:00" ...

# to change pieces o
documents$title[1] <- "999-999: Bluestein, Joel-BUS-Y"
documents$notes[3] <- "This is a note"


#to load it to the database: it works!
dna_setDocuments(conn, documents, removeStatements = TRUE, simulate = FALSE)

## Changes will be written both in memory and to the SQL database!
## New documents added: 0
## Deleted documents:   0
## Deleted statements:  0
## Titles updated:      0
## Texts updated:       0
## Coders updated:      0
## Authors updated:     0
## Sources updated:     0
## Sections updated:    0
## Notes updated:       0
## Types updated:       0
## Dates updated:       0

#we need to change simulate = TRUE to make the changes

#remove document: it works!
dna_removeDocument(conn, 9, removeStatements = TRUE, simulate=FALSE, verbose = TRUE)

## Changes will be written both in memory and to the SQL database!
## Statements removed in Document 9: 0
## Removal of Document 9: successful.

#to add a document

#load the text and date
my_text <- paste(letters, collapse = " ")
some_date <- as.POSIXct("2022-10-01")

#add the document: it works!
dna_addDocument(conn, 
                title = "My new document", 
                text = my_text,
                date = some_date)

## A new document with ID 9 was added to the database.

dna_gui("sample.dna")

## To return to R, close the DNA window when done.

Here is the complete list of elements that can be manipulated and their rDNA functions

elements <- list(
  "Documents" = c("dna_addDocument", 
                  "dna_removeDocument", 
                  "dna_getDocument", 
                  "dna_setDocument"), 
  "Statements" = c("dna_addStatementType", 
                   "dna_removeStatement",
                   "dna_getStatementType",
                   "dna_colorStatementType"),
  "Attributes" = c("dna_addAttribute",
                   "dna_removeAttribute",
                   "dna_getAttribute",
                   "dna_setAttribute"),
  "Variables" = c("dna_addVariable",
                  "dna_removeVariable",
                  "dna_getVariables",
                  "dna_renameVariable",
                  "dna_recastVariable"),
  "Regular expressions" = c("dna_addRegex",
                            "dna_removeRegex",
                            "dna_getRegex"),
  "Coders" = c("dna_addCoder",
               "dna_removeCoder",
               "dna_getCoder",
               "dna_setCoder"),
  "Settings" = c("dna_getSettings", 
                 "dna_setSettings"))

print.noquote(elements)

## $Documents
## [1] dna_addDocument    dna_removeDocument dna_getDocument    dna_setDocument   
## 
## $Statements
## [1] dna_addStatementType   dna_removeStatement    dna_getStatementType  
## [4] dna_colorStatementType
## 
## $Attributes
## [1] dna_addAttribute    dna_removeAttribute dna_getAttribute   
## [4] dna_setAttribute   
## 
## $Variables
## [1] dna_addVariable    dna_removeVariable dna_getVariables   dna_renameVariable
## [5] dna_recastVariable
## 
## $`Regular expressions`
## [1] dna_addRegex    dna_removeRegex dna_getRegex   
## 
## $Coders
## [1] dna_addCoder    dna_removeCoder dna_getCoder    dna_setCoder   
## 
## $Settings
## [1] dna_getSettings dna_setSettings