El siguiente laboratorio hace uso de cadenas de markov para generacion de palabas y tambien estabilizacion de matriz
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(tm)
## Loading required package: NLP
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:igraph':
##
## crossing
Diagramar redes de nodos:
sites_edges <-
c("A","B",
"B","A",
"B","C",
"C","A",
"C","B",
"C","E",
"D","A",
"E","D",
"E","C",
"E","B")
net_sites <- graph(sites_edges)
plot(net_sites, layout=layout.circle)
Leer el los ficheros de los libros Quijote y Biblia
libro <- file("quijote.txt")
libro_2 <- file("biblia.txt")
libro_lines <- readLines(libro)
libro_lines_2 <- readLines(libro_2)
preparar contenido de lectura
libro_words <- paste(libro_lines,collapse = " ")
libro_words_2 <- paste(libro_lines_2,collapse = " ")
libro_words <- strsplit(libro_words, split = " ") %>% unlist()
libro_words_2 <- strsplit(libro_words_2, split = " ") %>% unlist()
Juntando los dos archivos
#libro_words <- c(libro_words,libro_words_2)
Removiendo caracteres no deseados
libro_words <- sapply(libro_words,"removePunctuation",USE.NAMES = FALSE)
libro_words <- sapply(libro_words,"tolower",USE.NAMES = FALSE)
libro_words <- sapply(libro_words,"stripWhitespace",USE.NAMES = FALSE)
libro_words <- sapply(libro_words,"removeNumbers",USE.NAMES = FALSE)
#libro_words <- sapply(libro_words,"removeWords",words=stopwords('spanish')
#,USE.NAME= FALSE)
libro_words <- libro_words[libro_words!=""]
libro_words <- libro_words[libro_words!=" "]
Creando los bigramas
bigrams<-
lapply(ngrams(libro_words,2), paste, collapse=" ") %>% unlist()
bigrams<-
table(bigrams) %>% as.data.frame()
bigrams <- bigrams %>%
separate(bigrams,into=c("word1","word2"),sep=" ")
muestra_words <- bigrams %>% filter(Freq > 90)
Creando los vertices del mapa
libro_vertices<-
paste(muestra_words$word1,muestra_words$word2) %>%
strsplit(" ") %>%
unlist()
g1<-
graph(libro_vertices)
plot(g1,edge.arrow.size = 0.3,
arrow.width = 0.1,
vertex.size = 5,
vertex.label.cex = 0.8)
Creando funciones generadoras de frases x palabra
next_word <- function(palabra){
top_words<-
bigrams %>%
filter(word1 == palabra) %>%
arrange(desc(Freq)) %>%
select(word2,Freq) %>%
mutate(total=sum(Freq),
prob=Freq/total)
sample(top_words$word2,size = 1,prob = top_words$prob) %>% return()
}
next_word_greedy <- function(palabra){
top_words<-
bigrams %>%
filter(word1 == palabra) %>%
arrange(desc(Freq)) %>%
select(word2,Freq) %>%
mutate(max_freq=max(Freq)) %>%
filter(Freq==max_freq)
top_words$word2 %>% return()
}
next_word_greedy("pedro")
## [1] "le" "y"
Funciones Generando oraciones
sentence <- function(seed="don",n=10){
oracion <- seed
for(i in 1:n){
oracion <- c( oracion,next_word(oracion[i]) )
}
paste(oracion,collapse = " ") %>% return()
}
sentence_greedy <- function(seed="don",n=10){
oracion <- seed
for(i in 1:n){
oracion <- c( oracion,next_word_greedy(oracion[i]) )
}
paste(oracion,collapse = " ") %>% return()
}
Prueba de semillas
set.seed(123)
sentence("delante",n = 15)
## [1] "delante de vencer una golondrina sola la soledad de haber en quitã¡ndosela dieron respondiã³ sancho que"
sentence_greedy("belen",n = 150)
## [1] "belen"
Funcion Producto matricial
matprod.par <- function(cl, A, B){
if (ncol(A) != nrow(B)) stop("Matrices do not conforme")
idx <- splitIndices(nrow(A), length(cl))
Alist <- lapply(idx, function(ii) A[ii,,drop=FALSE])
## ans <- clusterApply(cl, Alist, function(aa, B) aa %*% B, B)
## Same as above, but faster:
ans <- clusterApply(cl, Alist, get("%*%"), B)
do.call(rbind, ans)
}
construiremos la matriz de markov haciendo uso de las muestras
library(markovchain)
## Package: markovchain
## Version: 0.6.9.14
## Date: 2019-01-20
## BugReport: http://github.com/spedygiorgio/markovchain/issues
mcFit <- markovchainFit(data=muestra_words$word1)
mcFit$estimate
## MLE Fit
## A 7 - dimensional discrete Markov Chain defined by the following states:
## a, de, don, en, lo, que, vuestra
## The transition matrix (by rows) is defined as follows:
## a de don en lo que vuestra
## a 0.5 0.50 0.00 0.0 0.0 0.00 0.00
## de 0.0 0.75 0.25 0.0 0.0 0.00 0.00
## don 0.0 0.00 0.00 1.0 0.0 0.00 0.00
## en 0.0 0.00 0.00 0.5 0.5 0.00 0.00
## lo 0.0 0.00 0.00 0.0 0.0 1.00 0.00
## que 0.0 0.00 0.00 0.0 0.0 0.75 0.25
## vuestra 0.0 0.00 0.00 0.0 0.0 0.00 0.00
Producto matrical calculo de ejemplo considerando s
m_sqr=mcFit$standardError
m_sqr
## a de don en lo que vuestra
## a 0.5 0.5000000 0.00 0.0 0.0 0.0000000 0.00
## de 0.0 0.4330127 0.25 0.0 0.0 0.0000000 0.00
## don 0.0 0.0000000 0.00 1.0 0.0 0.0000000 0.00
## en 0.0 0.0000000 0.00 0.5 0.5 0.0000000 0.00
## lo 0.0 0.0000000 0.00 0.0 0.0 1.0000000 0.00
## que 0.0 0.0000000 0.00 0.0 0.0 0.4330127 0.25
## vuestra 1.0 1.0000000 1.00 1.0 1.0 1.0000000 1.00
rowsmat=nrow(m_sqr)
rowscol=ncol(m_sqr)
m_scluster=matrix(data = rep(.2, rowsmat*rowscol), nrow = rowsmat, ncol = rowscol)
matoper=m_sqr*.85+m_scluster*.15
for(i in 1:rowsmat) {
# i-th element of `u1` squared into `i`-th position of `usq`
matoper[i,] <- matoper[i,] / sum(matoper[i,])
print(c(matoper[i,],i,sum(matoper[i,])))
}
## a de don en lo que
## 0.42924528 0.42924528 0.02830189 0.02830189 0.02830189 0.02830189
## vuestra
## 0.02830189 1.00000000 1.00000000
## a de don en lo que
## 0.03794775 0.50351699 0.30674428 0.03794775 0.03794775 0.03794775
## vuestra
## 0.03794775 2.00000000 1.00000000
## a de don en lo que
## 0.02830189 0.02830189 0.02830189 0.83018868 0.02830189 0.02830189
## vuestra
## 0.02830189 3.00000000 1.00000000
## a de don en lo que
## 0.02830189 0.02830189 0.02830189 0.42924528 0.42924528 0.02830189
## vuestra
## 0.02830189 4.00000000 1.00000000
## a de don en lo que
## 0.02830189 0.02830189 0.02830189 0.02830189 0.02830189 0.83018868
## vuestra
## 0.02830189 5.00000000 1.00000000
## a de don en lo que
## 0.03794775 0.03794775 0.03794775 0.03794775 0.03794775 0.50351699
## vuestra
## 0.30674428 6.00000000 1.00000000
## a de don en lo que vuestra
## 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571
##
## 7.0000000 1.0000000
funcion de procesamiento paralelo y deteccion de procesadores
library(parallel)
detectCores()
## [1] 8
Revisaremos la matrix de markov para el caso de las palabras de muestra
(nc <- detectCores())
## [1] 8
cl <- makeCluster(rep("localhost", nc))
matoper
## a de don en lo que
## a 0.42924528 0.42924528 0.02830189 0.02830189 0.02830189 0.02830189
## de 0.03794775 0.50351699 0.30674428 0.03794775 0.03794775 0.03794775
## don 0.02830189 0.02830189 0.02830189 0.83018868 0.02830189 0.02830189
## en 0.02830189 0.02830189 0.02830189 0.42924528 0.42924528 0.02830189
## lo 0.02830189 0.02830189 0.02830189 0.02830189 0.02830189 0.83018868
## que 0.03794775 0.03794775 0.03794775 0.03794775 0.03794775 0.50351699
## vuestra 0.14285714 0.14285714 0.14285714 0.14285714 0.14285714 0.14285714
## vuestra
## a 0.02830189
## de 0.03794775
## don 0.02830189
## en 0.02830189
## lo 0.02830189
## que 0.30674428
## vuestra 0.14285714
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
Revisaremos la matrix de markov nuevamente despues de 5 operaciones
matoper
## a de don en lo que
## a 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## de 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## don 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## en 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## lo 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## que 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## vuestra 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## vuestra
## a 0.1178091
## de 0.1178091
## don 0.1178091
## en 0.1178091
## lo 0.1178091
## que 0.1178091
## vuestra 0.1178091
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
matoper=matprod.par(cl,matoper,matoper)
Revisaremos la matrix final de markov nuevamente despues de 10 operaciones
matoper
## a de don en lo que
## a 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## de 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## don 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## en 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## lo 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## que 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## vuestra 0.07638967 0.1429365 0.08418258 0.189075 0.1215701 0.268037
## vuestra
## a 0.1178091
## de 0.1178091
## don 0.1178091
## en 0.1178091
## lo 0.1178091
## que 0.1178091
## vuestra 0.1178091
stopCluster(cl)