Cluster Text

#read from excel data
setwd("C:/Users/zul/Documents/bali/hasil")
data=read.csv("rekapall.csv",header=TRUE,sep=",")
head(data)

##    doc acid    agent agents alzheimers antibodies apparatus cancer cell
## 1 doc1    0 0.000000      0          0          0         0      0    0
## 2 doc2    0 6.643856      0          0          0         0      0    0
## 3 doc3    0 0.000000      0          0          0         0      0    0
## 4 doc4    0 0.000000      0          0          0         0      0    0
## 5 doc5    0 0.000000      0          0          0         0      0    0
## 6 doc6    0 0.000000      0          0          0         0      0    0
##   chronic composition compositions compounds comprising conditions
## 1       0     0.00000            0         0          0          0
## 2       0     4.53952            0         0          0          0
## 3       0     0.00000            0         0          0          0
## 4       0     0.00000            0         0          0          0
## 5       0     0.00000            0         0          0          0
## 6       0     0.00000            0         0          0          0
##   derivatives detection diagnosis  disease diseases disorders drug
## 1           0         0         0 2.717857        0         0    0
## 2           0         0         0 2.717857        0         0    0
## 3           0         0         0 2.717857        0         0    0
## 4           0         0         0 0.000000        0         0    0
## 5           0         0         0 0.000000        0         0    0
## 6           0         0         0 0.000000        0         0    0
##   inflammatory inhibitors kinase metabolic  method  methods
## 1            0          0      0         0 0.00000 0.000000
## 2            0          0      0         0 3.92139 0.000000
## 3            0          0      0         0 0.00000 3.120294
## 4            0          0      0         0 3.92139 0.000000
## 5            0          0      0         0 0.00000 0.000000
## 6            0          0      0         0 0.00000 0.000000
##   neurodegenerative novel parkinsons pharmaceutical preventing prevention
## 1          5.965784     0          0              0          0          0
## 2          0.000000     0          0              0          0          0
## 3          0.000000     0          0              0          0          0
## 4          0.000000     0          0              0          0          0
## 5          0.000000     0          0              0          0          0
## 6          0.000000     0          0              0          0          0
##   related substituted   system systems therapeutic thereof treating
## 1       0           0 0.000000       0           0       0 3.795859
## 2       0           0 0.000000       0           0       0 0.000000
## 3       0           0 0.000000       0           0       0 0.000000
## 4       0           0 0.000000       0           0       0 0.000000
## 5       0           0 5.506353       0           0       0 0.000000
## 6       0           0 5.506353       0           0       0 0.000000
##   treatment use uses    using cluster
## 1  0.000000   0    0 0.000000      11
## 2  0.000000   0    0 5.011588       9
## 3  3.490051   0    0 0.000000      10
## 4  0.000000   0    0 5.011588       5
## 5  0.000000   0    0 0.000000       8
## 6  0.000000   0    0 0.000000       8

names(data)

##  [1] "doc"               "acid"              "agent"            
##  [4] "agents"            "alzheimers"        "antibodies"       
##  [7] "apparatus"         "cancer"            "cell"             
## [10] "chronic"           "composition"       "compositions"     
## [13] "compounds"         "comprising"        "conditions"       
## [16] "derivatives"       "detection"         "diagnosis"        
## [19] "disease"           "diseases"          "disorders"        
## [22] "drug"              "inflammatory"      "inhibitors"       
## [25] "kinase"            "metabolic"         "method"           
## [28] "methods"           "neurodegenerative" "novel"            
## [31] "parkinsons"        "pharmaceutical"    "preventing"       
## [34] "prevention"        "related"           "substituted"      
## [37] "system"            "systems"           "therapeutic"      
## [40] "thereof"           "treating"          "treatment"        
## [43] "use"               "uses"              "using"            
## [46] "cluster"

data=data[,-1]
data2=data[,-45]
hasil=aggregate(x = data2, by = list(data$cluster), FUN = "mean")

for (i in 1:18) {
  cat(paste("cluster ", i, ":  ", sep=""))
  s <- sort(hasil[i,], decreasing=T)
  cat(names(s)[1:5], "\n")
}

## cluster 1:  Group.1 agents cell novel uses 
## cluster 2:  Group.1 disease cancer disorders pharmaceutical 
## cluster 3:  Group.1 apparatus derivatives comprising composition 
## cluster 4:  Group.1 antibodies compounds use novel 
## cluster 5:  Group.1 inhibitors using systems kinase 
## cluster 6:  Group.1 thereof use method composition 
## cluster 7:  Group.1 uses thereof acid compounds 
## cluster 8:  Group.1 method system disease detection 
## cluster 9:  Group.1 disease treatment method inflammatory 
## cluster 10:  Group.1 compositions methods treatment disease 
## cluster 11:  Group.1 using methods treating disease 
## cluster 12:  Group.1 methods related diagnosis alzheimers 
## cluster 13:  Group.1 treating disorders diseases methods 
## cluster 14:  Group.1 use thereof compositions methods 
## cluster 15:  Group.1 treatment disease prevention use 
## cluster 16:  Group.1 diseases treatment metabolic neurodegenerative 
## cluster 17:  Group.1 comprising composition prevention treatment 
## cluster 18:  Group.1 preventing composition treating pharmaceutical

##    Group.1 preventing composition treating pharmaceutical  disease
## 18      18    4.21074    4.005458 3.349288       2.504437 2.238235
##    comprising inflammatory  method alzheimers   related metabolic
## 18   2.083074     1.069217 0.92268  0.7816301 0.7816301 0.7506849
##          use   chronic     novel derivatives      acid neurodegenerative
## 18 0.7478583 0.7370994 0.7370994   0.6130467 0.3564055         0.3509285
##    compounds  diseases   thereof treatment agent agents antibodies
## 18 0.2710725 0.2576954 0.2461426 0.2052971     0      0          0
##    apparatus cancer cell compositions conditions detection diagnosis
## 18         0      0    0            0          0         0         0
##    disorders drug inhibitors kinase methods parkinsons prevention
## 18         0    0          0      0       0          0          0
##    substituted system systems therapeutic uses using
## 18           0      0       0           0    0     0

library("factoextra")

## Warning: package 'factoextra' was built under R version 3.1.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.1.3

library("FactoMineR")

## Warning: package 'FactoMineR' was built under R version 3.1.3

data2=data[,-45]
res.pca <- PCA(data2,  graph = FALSE)
get_eig(res.pca)

##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1   2.6005180        5.9102682                    5.910268
## Dim.2   2.1284923        4.8374825                   10.747751
## Dim.3   1.8817338        4.2766678                   15.024419
## Dim.4   1.8276040        4.1536454                   19.178064
## Dim.5   1.7508935        3.9793035                   23.157367
## Dim.6   1.5256579        3.4674043                   26.624772
## Dim.7   1.4945348        3.3966699                   30.021442
## Dim.8   1.4436980        3.2811319                   33.302573
## Dim.9   1.3778338        3.1314404                   36.434014
## Dim.10  1.2656385        2.8764512                   39.310465
## Dim.11  1.2582478        2.8596541                   42.170119
## Dim.12  1.2141091        2.7593390                   44.929458
## Dim.13  1.1799973        2.6818119                   47.611270
## Dim.14  1.1518417        2.6178219                   50.229092
## Dim.15  1.1253557        2.5576265                   52.786718
## Dim.16  1.1086033        2.5195529                   55.306271
## Dim.17  1.0636766        2.4174467                   57.723718
## Dim.18  1.0403587        2.3644515                   60.088170
## Dim.19  0.9972266        2.2664240                   62.354594
## Dim.20  0.9824718        2.2328904                   64.587484
## Dim.21  0.9767692        2.2199300                   66.807414
## Dim.22  0.9578738        2.1769860                   68.984400
## Dim.23  0.9337268        2.1221064                   71.106506
## Dim.24  0.9099645        2.0681012                   73.174608
## Dim.25  0.8967292        2.0380209                   75.212629
## Dim.26  0.8594841        1.9533729                   77.166001
## Dim.27  0.8250709        1.8751610                   79.041162
## Dim.28  0.7970043        1.8113733                   80.852536
## Dim.29  0.7902816        1.7960944                   82.648630
## Dim.30  0.7389986        1.6795422                   84.328172
## Dim.31  0.6994618        1.5896859                   85.917858
## Dim.32  0.6517243        1.4811915                   87.399050
## Dim.33  0.6331740        1.4390318                   88.838082
## Dim.34  0.6174943        1.4033961                   90.241478
## Dim.35  0.5660831        1.2865525                   91.528030
## Dim.36  0.5586521        1.2696639                   92.797694
## Dim.37  0.5351412        1.2162300                   94.013924
## Dim.38  0.5175077        1.1761538                   95.190078
## Dim.39  0.4473394        1.0166805                   96.206758
## Dim.40  0.4080090        0.9272932                   97.134052
## Dim.41  0.3868659        0.8792408                   98.013292
## Dim.42  0.3228683        0.7337916                   98.747084
## Dim.43  0.2985001        0.6784094                   99.425493
## Dim.44  0.2527829        0.5745066                  100.000000

p <- fviz_pca_ind(res.pca, geom = "point",
    habillage=data$cluster, addEllipses=TRUE,
    ellipse.level= 0.95)+ theme_minimal()

Plot

print(p)

## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

## Warning: Removed 296 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

Biplot

fviz_pca_biplot(res.pca,  label="var", habillage=data$cluster,
      addEllipses=TRUE, ellipse.level=0.95) +
  theme_minimal()

## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

## Warning: Removed 296 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

Network

#Build Network
library('igraph')

## Warning: package 'igraph' was built under R version 3.1.3

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library('network')

## Warning: package 'network' was built under R version 3.1.3

## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.

## 
## Attaching package: 'network'

## The following objects are masked from 'package:igraph':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges,
##     delete.vertices, get.edge.attribute, get.edges,
##     get.vertex.attribute, is.bipartite, is.directed,
##     list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute

library('sna')

## Warning: package 'sna' was built under R version 3.1.3

## sna: Tools for Social Network Analysis
## Version 2.3-2 created on 2014-01-13.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.

## 
## Attaching package: 'sna'

## The following object is masked from 'package:network':
## 
##     %c%

## The following objects are masked from 'package:igraph':
## 
##     %c%, betweenness, bonpow, closeness, components, degree,
##     dyad.census, evcent, hierarchy, is.connected, neighborhood,
##     triad.census

library('ndtv')

## Warning: package 'ndtv' was built under R version 3.1.3

## Loading required package: networkDynamic

## Warning: package 'networkDynamic' was built under R version 3.1.3

## 
## networkDynamic: version 0.9.0, created on 2016-01-12
## Copyright (c) 2016, Carter T. Butts, University of California -- Irvine
##                     Ayn Leslie-Cook, University of Washington
##                     Pavel N. Krivitsky, University of Wollongong
##                     Skye Bender-deMoll, University of Washington
##                     with contributions from
##                     Zack Almquist, University of California -- Irvine
##                     David R. Hunter, Penn State University
##                     Li Wang
##                     Kirk Li, University of Washington
##                     Steven M. Goodreau, University of Washington
##                     Jeffrey Horner
##                     Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("networkDynamic").

## Loading required package: animation

## Warning: package 'animation' was built under R version 3.1.3

## 
## ndtv: version 0.9.0, created on 2016-2-18
## Copyright (c) 2016, Skye Bender-deMoll, University of Washington
##                     with contributions from
##                     Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ndtv").

library('visNetwork')

## Warning: package 'visNetwork' was built under R version 3.1.3

## 
## Attaching package: 'visNetwork'

## The following object is masked from 'package:igraph':
## 
##     %>%

library('reshape')

## Warning: package 'reshape' was built under R version 3.1.3

#read from excel data
setwd("C:/Users/zul/Documents/bali/hasil")

data=read.csv("rekapall.csv",header=TRUE,sep=",")
datax=data[,-45]

data=data[,-1]
data2=data[,-45]
txt=colnames(data2)
hasil=aggregate(x = data2, by = list(data$cluster), FUN = "mean")
hasil=hasil[,-1]
colnames(hasil)=c(1:44)
hasil2=hasil[1,]
hasil2=as.matrix(hasil2)
hasil3=t(hasil2)%*%hasil2
m=hasil3
m2 <- melt(m)[melt(upper.tri(m))$value,]
names(m2) <- c("from","to", "value")
m2=as.data.frame(m2)
edges=m2

nb <- 44
nodes <- data.frame(id = 1:nb, label = txt,
 group = rep("A",nb), value = 1:nb,
 title = paste0("<p>", txt,"<p>"), stringsAsFactors = FALSE)
edges1=edges[edges$value>0,]
visNetwork(nodes, edges1, height = "500px", width = "100%") %>% 
  visOptions(highlightNearest = TRUE) %>%
  visLayout(randomSeed = 123)

CA

library(RODBC)

## Warning: package 'RODBC' was built under R version 3.1.3

library(ca)

## Warning: package 'ca' was built under R version 3.1.3

setwd("C:/Users/zul/Documents/bali/hasil")
channel<-odbcConnectAccess("dbpaten.mdb") 
data<-sqlQuery(channel,"SELECT * FROM Query4")
odbcClose(channel)


data1=read.csv("rekapall.csv",header=TRUE,sep=",")
m5=as.data.frame(cbind(data,data1$cluster))
# m5=m5[m5$Var1!="",]

colnames(m5)[1]="kode"
colnames(m5)[2]="clust"

mca41 = mjca(m5,lambda = "indicator", nd = 5)
cats1 = apply(m5, 2, function(x) nlevels(as.factor(x)))
cats1

##          kode         clust         Expr1 data1$cluster 
##           500           450            36            18

mca4_vars_df1 = data.frame(mca41$colcoord, Variable = rep(names(cats1), 
    cats1))

mca41$levelnames <- gsub("kode","",mca41$levelnames)
mca41$levelnames <- gsub("clust","",mca41$levelnames)


rownames(mca4_vars_df1) = mca41$levelnames

# plot of variable categories
ggplot(data = mca4_vars_df1, aes(x = X1, y = X2, label = rownames(mca4_vars_df1))) + 
    geom_hline(yintercept = 0, colour = "gray70") + geom_vline(xintercept = 0, 
    colour = "gray70") + geom_text(aes(colour = Variable)) + ggtitle("MCA")+labs(colour="Response")+theme_bw()

###############################################

Seminar Bali

Zulhanif

2016

Cluster Text

Plot

Biplot

Network

CA