Cluster Text
#read from excel data
setwd("C:/Users/zul/Documents/bali/hasil")
data=read.csv("rekapall.csv",header=TRUE,sep=",")
head(data)
## doc acid agent agents alzheimers antibodies apparatus cancer cell
## 1 doc1 0 0.000000 0 0 0 0 0 0
## 2 doc2 0 6.643856 0 0 0 0 0 0
## 3 doc3 0 0.000000 0 0 0 0 0 0
## 4 doc4 0 0.000000 0 0 0 0 0 0
## 5 doc5 0 0.000000 0 0 0 0 0 0
## 6 doc6 0 0.000000 0 0 0 0 0 0
## chronic composition compositions compounds comprising conditions
## 1 0 0.00000 0 0 0 0
## 2 0 4.53952 0 0 0 0
## 3 0 0.00000 0 0 0 0
## 4 0 0.00000 0 0 0 0
## 5 0 0.00000 0 0 0 0
## 6 0 0.00000 0 0 0 0
## derivatives detection diagnosis disease diseases disorders drug
## 1 0 0 0 2.717857 0 0 0
## 2 0 0 0 2.717857 0 0 0
## 3 0 0 0 2.717857 0 0 0
## 4 0 0 0 0.000000 0 0 0
## 5 0 0 0 0.000000 0 0 0
## 6 0 0 0 0.000000 0 0 0
## inflammatory inhibitors kinase metabolic method methods
## 1 0 0 0 0 0.00000 0.000000
## 2 0 0 0 0 3.92139 0.000000
## 3 0 0 0 0 0.00000 3.120294
## 4 0 0 0 0 3.92139 0.000000
## 5 0 0 0 0 0.00000 0.000000
## 6 0 0 0 0 0.00000 0.000000
## neurodegenerative novel parkinsons pharmaceutical preventing prevention
## 1 5.965784 0 0 0 0 0
## 2 0.000000 0 0 0 0 0
## 3 0.000000 0 0 0 0 0
## 4 0.000000 0 0 0 0 0
## 5 0.000000 0 0 0 0 0
## 6 0.000000 0 0 0 0 0
## related substituted system systems therapeutic thereof treating
## 1 0 0 0.000000 0 0 0 3.795859
## 2 0 0 0.000000 0 0 0 0.000000
## 3 0 0 0.000000 0 0 0 0.000000
## 4 0 0 0.000000 0 0 0 0.000000
## 5 0 0 5.506353 0 0 0 0.000000
## 6 0 0 5.506353 0 0 0 0.000000
## treatment use uses using cluster
## 1 0.000000 0 0 0.000000 11
## 2 0.000000 0 0 5.011588 9
## 3 3.490051 0 0 0.000000 10
## 4 0.000000 0 0 5.011588 5
## 5 0.000000 0 0 0.000000 8
## 6 0.000000 0 0 0.000000 8
names(data)
## [1] "doc" "acid" "agent"
## [4] "agents" "alzheimers" "antibodies"
## [7] "apparatus" "cancer" "cell"
## [10] "chronic" "composition" "compositions"
## [13] "compounds" "comprising" "conditions"
## [16] "derivatives" "detection" "diagnosis"
## [19] "disease" "diseases" "disorders"
## [22] "drug" "inflammatory" "inhibitors"
## [25] "kinase" "metabolic" "method"
## [28] "methods" "neurodegenerative" "novel"
## [31] "parkinsons" "pharmaceutical" "preventing"
## [34] "prevention" "related" "substituted"
## [37] "system" "systems" "therapeutic"
## [40] "thereof" "treating" "treatment"
## [43] "use" "uses" "using"
## [46] "cluster"
data=data[,-1]
data2=data[,-45]
hasil=aggregate(x = data2, by = list(data$cluster), FUN = "mean")
for (i in 1:18) {
cat(paste("cluster ", i, ": ", sep=""))
s <- sort(hasil[i,], decreasing=T)
cat(names(s)[1:5], "\n")
}
## cluster 1: Group.1 agents cell novel uses
## cluster 2: Group.1 disease cancer disorders pharmaceutical
## cluster 3: Group.1 apparatus derivatives comprising composition
## cluster 4: Group.1 antibodies compounds use novel
## cluster 5: Group.1 inhibitors using systems kinase
## cluster 6: Group.1 thereof use method composition
## cluster 7: Group.1 uses thereof acid compounds
## cluster 8: Group.1 method system disease detection
## cluster 9: Group.1 disease treatment method inflammatory
## cluster 10: Group.1 compositions methods treatment disease
## cluster 11: Group.1 using methods treating disease
## cluster 12: Group.1 methods related diagnosis alzheimers
## cluster 13: Group.1 treating disorders diseases methods
## cluster 14: Group.1 use thereof compositions methods
## cluster 15: Group.1 treatment disease prevention use
## cluster 16: Group.1 diseases treatment metabolic neurodegenerative
## cluster 17: Group.1 comprising composition prevention treatment
## cluster 18: Group.1 preventing composition treating pharmaceutical
s
## Group.1 preventing composition treating pharmaceutical disease
## 18 18 4.21074 4.005458 3.349288 2.504437 2.238235
## comprising inflammatory method alzheimers related metabolic
## 18 2.083074 1.069217 0.92268 0.7816301 0.7816301 0.7506849
## use chronic novel derivatives acid neurodegenerative
## 18 0.7478583 0.7370994 0.7370994 0.6130467 0.3564055 0.3509285
## compounds diseases thereof treatment agent agents antibodies
## 18 0.2710725 0.2576954 0.2461426 0.2052971 0 0 0
## apparatus cancer cell compositions conditions detection diagnosis
## 18 0 0 0 0 0 0 0
## disorders drug inhibitors kinase methods parkinsons prevention
## 18 0 0 0 0 0 0 0
## substituted system systems therapeutic uses using
## 18 0 0 0 0 0 0
library("factoextra")
## Warning: package 'factoextra' was built under R version 3.1.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.1.3
library("FactoMineR")
## Warning: package 'FactoMineR' was built under R version 3.1.3
data2=data[,-45]
res.pca <- PCA(data2, graph = FALSE)
get_eig(res.pca)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.6005180 5.9102682 5.910268
## Dim.2 2.1284923 4.8374825 10.747751
## Dim.3 1.8817338 4.2766678 15.024419
## Dim.4 1.8276040 4.1536454 19.178064
## Dim.5 1.7508935 3.9793035 23.157367
## Dim.6 1.5256579 3.4674043 26.624772
## Dim.7 1.4945348 3.3966699 30.021442
## Dim.8 1.4436980 3.2811319 33.302573
## Dim.9 1.3778338 3.1314404 36.434014
## Dim.10 1.2656385 2.8764512 39.310465
## Dim.11 1.2582478 2.8596541 42.170119
## Dim.12 1.2141091 2.7593390 44.929458
## Dim.13 1.1799973 2.6818119 47.611270
## Dim.14 1.1518417 2.6178219 50.229092
## Dim.15 1.1253557 2.5576265 52.786718
## Dim.16 1.1086033 2.5195529 55.306271
## Dim.17 1.0636766 2.4174467 57.723718
## Dim.18 1.0403587 2.3644515 60.088170
## Dim.19 0.9972266 2.2664240 62.354594
## Dim.20 0.9824718 2.2328904 64.587484
## Dim.21 0.9767692 2.2199300 66.807414
## Dim.22 0.9578738 2.1769860 68.984400
## Dim.23 0.9337268 2.1221064 71.106506
## Dim.24 0.9099645 2.0681012 73.174608
## Dim.25 0.8967292 2.0380209 75.212629
## Dim.26 0.8594841 1.9533729 77.166001
## Dim.27 0.8250709 1.8751610 79.041162
## Dim.28 0.7970043 1.8113733 80.852536
## Dim.29 0.7902816 1.7960944 82.648630
## Dim.30 0.7389986 1.6795422 84.328172
## Dim.31 0.6994618 1.5896859 85.917858
## Dim.32 0.6517243 1.4811915 87.399050
## Dim.33 0.6331740 1.4390318 88.838082
## Dim.34 0.6174943 1.4033961 90.241478
## Dim.35 0.5660831 1.2865525 91.528030
## Dim.36 0.5586521 1.2696639 92.797694
## Dim.37 0.5351412 1.2162300 94.013924
## Dim.38 0.5175077 1.1761538 95.190078
## Dim.39 0.4473394 1.0166805 96.206758
## Dim.40 0.4080090 0.9272932 97.134052
## Dim.41 0.3868659 0.8792408 98.013292
## Dim.42 0.3228683 0.7337916 98.747084
## Dim.43 0.2985001 0.6784094 99.425493
## Dim.44 0.2527829 0.5745066 100.000000
p <- fviz_pca_ind(res.pca, geom = "point",
habillage=data$cluster, addEllipses=TRUE,
ellipse.level= 0.95)+ theme_minimal()
Plot
print(p)
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: Removed 296 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

Biplot
fviz_pca_biplot(res.pca, label="var", habillage=data$cluster,
addEllipses=TRUE, ellipse.level=0.95) +
theme_minimal()
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: Removed 296 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.

Network
#Build Network
library('igraph')
## Warning: package 'igraph' was built under R version 3.1.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library('network')
## Warning: package 'network' was built under R version 3.1.3
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## Attaching package: 'network'
## The following objects are masked from 'package:igraph':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
library('sna')
## Warning: package 'sna' was built under R version 3.1.3
## sna: Tools for Social Network Analysis
## Version 2.3-2 created on 2014-01-13.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
##
## Attaching package: 'sna'
## The following object is masked from 'package:network':
##
## %c%
## The following objects are masked from 'package:igraph':
##
## %c%, betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
library('ndtv')
## Warning: package 'ndtv' was built under R version 3.1.3
## Loading required package: networkDynamic
## Warning: package 'networkDynamic' was built under R version 3.1.3
##
## networkDynamic: version 0.9.0, created on 2016-01-12
## Copyright (c) 2016, Carter T. Butts, University of California -- Irvine
## Ayn Leslie-Cook, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Skye Bender-deMoll, University of Washington
## with contributions from
## Zack Almquist, University of California -- Irvine
## David R. Hunter, Penn State University
## Li Wang
## Kirk Li, University of Washington
## Steven M. Goodreau, University of Washington
## Jeffrey Horner
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("networkDynamic").
## Loading required package: animation
## Warning: package 'animation' was built under R version 3.1.3
##
## ndtv: version 0.9.0, created on 2016-2-18
## Copyright (c) 2016, Skye Bender-deMoll, University of Washington
## with contributions from
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ndtv").
library('visNetwork')
## Warning: package 'visNetwork' was built under R version 3.1.3
##
## Attaching package: 'visNetwork'
## The following object is masked from 'package:igraph':
##
## %>%
library('reshape')
## Warning: package 'reshape' was built under R version 3.1.3
#read from excel data
setwd("C:/Users/zul/Documents/bali/hasil")
data=read.csv("rekapall.csv",header=TRUE,sep=",")
datax=data[,-45]
data=data[,-1]
data2=data[,-45]
txt=colnames(data2)
hasil=aggregate(x = data2, by = list(data$cluster), FUN = "mean")
hasil=hasil[,-1]
colnames(hasil)=c(1:44)
hasil2=hasil[1,]
hasil2=as.matrix(hasil2)
hasil3=t(hasil2)%*%hasil2
m=hasil3
m2 <- melt(m)[melt(upper.tri(m))$value,]
names(m2) <- c("from","to", "value")
m2=as.data.frame(m2)
edges=m2
nb <- 44
nodes <- data.frame(id = 1:nb, label = txt,
group = rep("A",nb), value = 1:nb,
title = paste0("<p>", txt,"<p>"), stringsAsFactors = FALSE)
edges1=edges[edges$value>0,]
visNetwork(nodes, edges1, height = "500px", width = "100%") %>%
visOptions(highlightNearest = TRUE) %>%
visLayout(randomSeed = 123)
CA
library(RODBC)
## Warning: package 'RODBC' was built under R version 3.1.3
library(ca)
## Warning: package 'ca' was built under R version 3.1.3
setwd("C:/Users/zul/Documents/bali/hasil")
channel<-odbcConnectAccess("dbpaten.mdb")
data<-sqlQuery(channel,"SELECT * FROM Query4")
odbcClose(channel)
data1=read.csv("rekapall.csv",header=TRUE,sep=",")
m5=as.data.frame(cbind(data,data1$cluster))
# m5=m5[m5$Var1!="",]
colnames(m5)[1]="kode"
colnames(m5)[2]="clust"
mca41 = mjca(m5,lambda = "indicator", nd = 5)
cats1 = apply(m5, 2, function(x) nlevels(as.factor(x)))
cats1
## kode clust Expr1 data1$cluster
## 500 450 36 18
mca4_vars_df1 = data.frame(mca41$colcoord, Variable = rep(names(cats1),
cats1))
mca41$levelnames <- gsub("kode","",mca41$levelnames)
mca41$levelnames <- gsub("clust","",mca41$levelnames)
rownames(mca4_vars_df1) = mca41$levelnames
# plot of variable categories
ggplot(data = mca4_vars_df1, aes(x = X1, y = X2, label = rownames(mca4_vars_df1))) +
geom_hline(yintercept = 0, colour = "gray70") + geom_vline(xintercept = 0,
colour = "gray70") + geom_text(aes(colour = Variable)) + ggtitle("MCA")+labs(colour="Response")+theme_bw()

###############################################