#This will be used as filter to remove unnecessary links from my DataFrame of movies. Only names listed here will be accepted
listOfActresses <-
read_html("https://en.wikipedia.org/wiki/List_of_American_film_actresses") %>%
html_nodes("a") %>%
html_attr('href') %>%
data_frame() %>%
rename(links='.') %>%
filter(str_detect(links,"/wiki/")) %>%
filter(!str_detect(links,"#")) %>%
filter(!str_detect(links,"image")) %>%
filter(!str_detect(tolower(links),"file"))%>%
filter(!str_detect(links,"https:")) %>%
filter(!str_detect(links,":"))
listOfActresses <- as.data.frame(listOfActresses[!grepl("Main_Page", listOfActresses$links),])
colnames(listOfActresses) <- "links"
listOfActresses <- as.data.frame(listOfActresses[!grepl("/wiki/List_of_American_television_actresses", listOfActresses$links),])
colnames(listOfActresses) <- "links"
# List of actresses
getLinks <- function(Index)
read_html(Index) %>%
html_nodes("a") %>%
html_attr('href') %>%
data_frame() %>%
rename(links='.') %>%
filter(str_detect(links,"/wiki/")) %>%
filter(!str_detect(links,"#")) %>%
filter(!str_detect(links,"image")) %>%
filter(!str_detect(tolower(links),"file"))%>%
filter(!str_detect(links,"https:")) %>%
filter(!str_detect(links,":")) %>%
filter(!str_detect(links,"Privacy_policy")) %>%
filter(!str_detect(links,"Terms_of_Use")) %>%
filter(!str_detect(links,"Main_Page")) %>%
filter(!str_detect(links,"//foundation.wikimedia.org/")) %>%
filter(!str_detect(links,"International_Standard_Book_Number"))
Con el proposito de reducir la lista de actirces, solo tome las que han participado activamente en una pelicula en los ulitmos 20 anios.
actressesPerYear <- data.frame(links = character())
for(i in 0:20){
#Create the link to each movie year starting in 2000 ending 2020
movieYear <- String(2000 + i)
movieYear <- paste("https://en.wikipedia.org/wiki/List_of_American_films_of_", movieYear, sep= "")
auxGetActresses <- getLinks(movieYear)
auxActressesPerYear <- merge(auxGetActresses, listOfActresses, all = FALSE)
actressesPerYear <- rbind(auxActressesPerYear,actressesPerYear)
}
totalActresses <- actressesPerYear %>% group_by(links) %>% summarize(references = n())
totalActresses <- as.data.frame(actressesPerYear[!grepl("//", actressesPerYear$links),])
colnames(totalActresses) <- "links"
totalActresses <- totalActresses %>% group_by(links) %>% summarize(references = n())
El proposito es visitar cada pagina de cada actriz, tomar los links que se muestran en esta pagina, visitar cada uno de los links y encontrar referencias a otras actrices para poder ver la relavancia de ellas.
#1 Take the list of actresses
#2 Get the list of actresses of the last 20 years
#3 Take each one of the list of actresses
#4 Get all the links this actress is making reference to
#5 Explore those links
#6 Go over the row and +1 in each actress who appears in this list
#7 Move column
#Empty dataframes used
actressesDF <- data.frame(links = character())
linksDF <- data.frame(links = character())
#Create a squared matrix with rows and columns that match the actresses names
mcMatrix <- matrix(0, nrow = length(totalActresses$links), ncol = length(totalActresses$links))
colnames(mcMatrix) <- totalActresses$links
rownames(mcMatrix) <- totalActresses$links
#Go to each Actress wiki and read all the links inside of it to visit this links and find references to other Actresses
for(i in 1:length(totalActresses$links)){
#Take each actress links
actressLinkArg <- String(totalActresses$links[[i]])
actressLink <- paste("https://en.wikipedia.org", actressLinkArg, sep= "")
linksDF <- getLinks(actressLink)
linksDF <- linksDF %>% group_by(links) %>% summarise(references = n())
#Go to each link found and find references to herself or other actresses
if(!any(is.na(linksDF$links))){
for(j in 1:length(linksDF$links)){
linkArg <- String(linksDF$links[[j]])
referenceLink <- paste("https://en.wikipedia.org", linkArg, sep= "")
referencesFound <- getLinks(referenceLink)
#Cross check with the list of addresses we have
intersectActresses <- data.frame(intersect(referencesFound$links,totalActresses$links))
colnames(intersectActresses)[1] <- "links"
intersectActresses <- intersectActresses %>% group_by(links) %>% summarise(references = n())
#We make sure there are no NA in the DF and +1 to each reference found
if(!any(is.na(intersectActresses))){
if(length(intersectActresses$links) > 0){
for(k in 1:length(intersectActresses$links)){
mcMatrix[String(intersectActresses$links[[k]]),actressLinkArg] <- mcMatrix[String(intersectActresses$links[[k]]),actressLinkArg] + 1
}
}
}
}
}
}
Debido a que el tiempo que tomo la seccion anterior excedio los 3 dias, se decidio detener el analisis y trabajar con los valores obtenidos. Esta matriz es de 150x150 y se presentan en base a estos numeros los analisis obtenidos.
#bckpMcMatrix <- mcMatrix
#mcMatrix <- bckpMcMatrix
#Preparing the subset
subMcMatrix <- mcMatrix[c(1:150),c(1:150)]
sumMcMatrix <- rowSums(subMcMatrix)
transMcMatrix <- subMcMatrix / sumMcMatrix
#Getting the subset of the matrix and creating matrix S, alfa and beta
sMatrix <- matrix(rep(1/150, 22500), nrow = 150)
alfa <- 0.85
beta <- (1-alfa)
#Calculations to get the transition matrix
mC <- alfa*transMcMatrix + beta*sMatrix
#We will take the matrix to the power of 100 to see if it stabilize at some point
mcPwrTen <- mC %^% 10
mcPwrTwenty <- mC %^% 20
mcPwrFifty <- mC %^% 50
mcPwrHundred <- mC %^% 100
Podemos ver que la matriz se estabiliza en la potencia 50 y se puede ver eso en los siguientes subsets
print(mcPwrTen[c(1:3),c(1:3)])
/wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin 0.005975571 0.003403492 0.002693227
/wiki/Addison_Timlin 0.005897302 0.006438972 0.002699677
/wiki/Adrianne_Palicki 0.005947622 0.003549209 0.002957123
print(mcPwrTwenty[c(1:3),c(1:3)])
/wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin 0.005966690 0.003428729 0.002703070
/wiki/Addison_Timlin 0.005965928 0.003441561 0.002703620
/wiki/Adrianne_Palicki 0.005966566 0.003430695 0.002705044
print(mcPwrFifty[c(1:3),c(1:3)])
/wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin 0.005966449 0.003428928 0.002703166
/wiki/Addison_Timlin 0.005966449 0.003428929 0.002703166
/wiki/Adrianne_Palicki 0.005966449 0.003428929 0.002703166
print(mcPwrHundred[c(1:3),c(1:3)])
/wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin 0.005966449 0.003428928 0.002703166
/wiki/Addison_Timlin 0.005966449 0.003428928 0.002703166
/wiki/Adrianne_Palicki 0.005966449 0.003428928 0.002703166
diagSubMcMatrix <- diag(mC)
print(diagSubMcMatrix)
/wiki/Abigail_Breslin /wiki/Addison_Timlin
0.18233333 0.54995833
/wiki/Adrianne_Palicki /wiki/Adrienne_Barbeau
0.38159701 0.46295652
/wiki/Adrienne_Shelly /wiki/Agnes_Moorehead
0.44117857 0.11511686
/wiki/Ahna_O%27Reilly /wiki/Aimee_Garcia
0.26544444 0.51100000
/wiki/Aimee_Teegarden /wiki/Aisha_Tyler
0.63850000 0.36463636
/wiki/AJ_Michalka /wiki/Alanna_Ubach
0.43662500 0.46141667
/wiki/Alex_Borstein /wiki/Alexa_Davalos
0.18239665 0.56766667
/wiki/Alexa_Vega /wiki/Alexandra_Breckenridge
0.57125316 0.25058932
/wiki/Alexandra_Daddario /wiki/Alexandra_Holden
0.40791489 0.41854386
/wiki/Alexandra_Shipp /wiki/Alexie_Gilmore
0.38263265 0.66621739
/wiki/Alexis_Arquette /wiki/Alexis_Bledel
0.42600000 0.13671429
/wiki/Alexis_Dziena /wiki/Alexis_Knapp
0.38263265 0.34100000
/wiki/Alfre_Woodard /wiki/Ali_Larter
0.12708233 0.16829622
/wiki/Alia_Shawkat /wiki/Alice_Drummond
0.32898913 0.36303704
/wiki/Alice_Greczyn /wiki/Alicia_Keys
0.72023077 0.32335948
/wiki/Alicia_Silverstone /wiki/Alicia_Witt
0.15014703 0.42600000
/wiki/Alison_Brie /wiki/Alison_Eastwood
0.26977470 0.56260714
/wiki/Alison_Lohman /wiki/Allie_DeBerry
0.28433333 0.77372727
/wiki/Allison_Janney /wiki/Allison_Williams_(actress)
0.12370983 0.19608197
/wiki/Ally_Sheedy /wiki/Aly_Michalka
0.10875591 0.37573118
/wiki/Alyson_Hannigan /wiki/Alyson_Stoner
0.30170755 0.39887234
/wiki/Alyssa_Diaz /wiki/Alyssa_Milano
0.49683333 0.39442857
/wiki/Amanda_Bynes /wiki/Amanda_Detmer
0.20437423 0.51100000
/wiki/Amanda_Peet /wiki/Amanda_Righetti
0.29804301 0.34559459
/wiki/Amanda_Seyfried /wiki/Amber_Benson
0.21617056 0.48671429
/wiki/Amber_Heard /wiki/Amber_Stevens
0.42885235 0.19933333
/wiki/Amber_Tamblyn /wiki/Amber_Valletta
0.19181633 0.52858621
/wiki/America_Ferrera /wiki/Amy_Acker
0.15411258 0.35889474
/wiki/Amy_Adams /wiki/Amy_Brenneman
0.08850351 0.17493822
/wiki/Amy_Irving /wiki/Amy_Madigan
0.36011854 0.15160060
/wiki/Amy_Poehler /wiki/Amy_Ryan
0.12837003 0.09743224
/wiki/Amy_Schumer /wiki/Amy_Sedaris
0.15987224 0.43215942
/wiki/Amy_Smart /wiki/Ana_Gasteyer
0.19983041 0.32239303
/wiki/Analeigh_Tipton /wiki/Andie_MacDowell
0.69826563 0.09895194
/wiki/Andrea_Bowen /wiki/Andrea_Martin
0.25376316 0.28194233
/wiki/Angela_Bassett /wiki/Angela_Bettis
0.09690164 0.52988889
/wiki/Angela_Lansbury /wiki/Angelina_Jolie
0.16508584 0.10550654
/wiki/Angie_Dickinson /wiki/Angie_Harmon
0.15905785 0.27687719
/wiki/Anika_Noni_Rose /wiki/Anjelica_Huston
0.24947884 0.09624313
/wiki/Ann_Cusack /wiki/Ann_Dowd
0.47912500 0.11213269
/wiki/Ann_Robinson /wiki/Ann-Margret
0.63850000 0.10269695
/wiki/Anna_Camp /wiki/Anna_Chlumsky
0.32046309 0.17082030
/wiki/Anna_Faris /wiki/Anna_Gunn
0.22547183 0.18394702
/wiki/Anna_Kendrick /wiki/Annabella_Sciorra
0.10668783 0.48789320
/wiki/Annabeth_Gish /wiki/AnnaLynne_McCord
0.38975969 0.56200000
/wiki/AnnaSophia_Robb /wiki/Anne_Archer
0.29886325 0.35516667
/wiki/Anne_Bancroft /wiki/Anne_Dudek
0.12170000 0.35889474
/wiki/Anne_Hathaway /wiki/Anne_Heche
0.09600745 0.14479699
/wiki/Annette_Bening /wiki/Annie_Mumolo
0.09016457 0.46766667
/wiki/Annie_Parisse /wiki/Annie_Potts
0.35100000 0.32506250
/wiki/Ari_Graynor /wiki/Ariel_Winter
0.46463636 0.19483446
/wiki/Arielle_Kebbel /wiki/Ashanti_(singer)
0.49927586 0.32947458
/wiki/Ashley_Bell_(actress) /wiki/Ashley_Benson
0.35516667 0.31742336
/wiki/Ashley_Fink /wiki/Ashley_Greene
0.44174074 0.27687719
/wiki/Ashley_Hinshaw /wiki/Ashley_Judd
0.56766667 0.15991759
/wiki/Ashley_Olsen /wiki/Ashley_Rickards
0.40752174 0.46463636
/wiki/Ashley_Scott /wiki/Ashley_Tisdale
0.56766667 0.20886026
/wiki/Aubrey_Plaza /wiki/Aunjanue_Ellis
0.28131915 0.36804545
/wiki/Awkwafina /wiki/Azura_Skye
0.07322612 0.46141667
/wiki/Bai_Ling /wiki/Bailee_Madison
0.18588064 0.57281818
/wiki/Barbara_Crampton /wiki/Barbara_Hershey
0.53567742 0.12655918
/wiki/Barbra_Streisand /wiki/Beanie_Feldstein
0.22428509 0.24385714
/wiki/Bebe_Neuwirth /wiki/Bella_Thorne
0.17526093 0.37122222
/wiki/Beth_Behrs /wiki/Betsy_Russell
0.31266667 0.61651724
/wiki/Bette_Midler /wiki/Betty_Buckley
0.16534136 0.22600000
/wiki/Betty_White /wiki/Beverley_Mitchell
0.18414539 0.43467347
/wiki/Beverly_D%27Angelo /wiki/Beyonc%C3%A9_Knowles
0.38530851 0.36329508
/wiki/Bianca_Kajlich /wiki/Bianca_Lawson
0.53618519 0.41198901
/wiki/Bijou_Phillips /wiki/Billie_Lourd
0.58594624 0.33526966
/wiki/Blair_Brown /wiki/Blake_Lively
0.16199641 0.17819870
/wiki/Blanchard_Ryan /wiki/Blythe_Danner
0.07135473 0.12526348
/wiki/Bonnie_Hunt /wiki/Bonnie_Somerville
0.17895100 0.44016667
/wiki/Brandy_Norwood /wiki/Bree_Turner
0.32732184 0.54405556
/wiki/Brenda_Song /wiki/Briana_Evigan
0.44836842 0.16773774
/wiki/Brianna_Hildebrand /wiki/Bridget_Fonda
0.40752174 0.08006977