For each roll of the die, there are 6 possibilities, so \[N_{3rolls} = 6 \times 6 \times 6 = 216\]
There are two possibilities for two rolls totalling 3: 1 then 2; and 2 then 1. The total probability is
\[\begin{align} P(sum = 3) &= P(1, 2) + P(2, 1) \\ &= P(1) \times P(2) + P(2) \times P(1) \\ &= \frac{1}{6} \times \frac{1}{6} + \frac{1}{6} \times \frac{1}{6} \\ &= \frac{2}{36} = \frac{1}{18} = 0.0556 \end{align}\]
word_prob <- function(flnm, ranked = FALSE, printAllWords = FALSE, returnTable = FALSE) {
library(stringr)
# read in file
body <- read.delim(flnm, header = FALSE, stringsAsFactors = FALSE)
# collapse and convert to lowercase
body <- str_to_lower(paste(body, collapse = ' '))
# remove anything that isnt a space or lowercase letter
body <- str_replace_all(body, '[^[:lower:]^ ]', '')
# convert to vector
word_vec <- str_split(body, ' ')[[1]]
# get count of words and convert to probability
word_table <- as.data.frame(table(word_vec)[-1], stringsAsFactors = FALSE) #remove count of spaces
suppressPackageStartupMessages(library(dplyr))
word_table <- word_table %>% transmute(Word = word_vec, Probability = Freq / sum(Freq))
if(ranked) {
word_table <- arrange(word_table, desc(Probability))
}
print(head(word_table, ifelse(printAllWords, nrow(word_table), 10)))
if(returnTable){return(word_table)}
}word_prob('data/assign6.sample.txt', ranked = TRUE) Word Probability
1 the 0.05697151
2 a 0.03373313
3 and 0.02848576
4 for 0.02248876
5 in 0.02098951
6 of 0.02098951
7 to 0.02098951
8 is 0.01649175
9 said 0.01649175
10 that 0.01424288
phrase_prob <- function(flnm, word1, word2) {
# adapt previous function
library(stringr)
body <- read.delim(flnm, header = FALSE, stringsAsFactors = FALSE)
body <- str_to_lower(paste(body, collapse = ' '))
body <- str_replace_all(body, '[^[:lower:]^ ]', '')
word_vec <- str_split(body, ' ')[[1]]
word_table <- as.data.frame(table(word_vec)[-1], stringsAsFactors = FALSE)
suppressPackageStartupMessages(library(dplyr))
word_table <- word_table %>% transmute(Word = word_vec, Probability = Freq / sum(Freq))
# get individual word probabilities
prob_1 <- word_table[word_table$Word == word1, 2]
prob_2 <- word_table[word_table$Word == word2, 2]
# find count of words appearing together
phrase_count <- str_count(body, paste(word1, word2)) + str_count(body, paste(word2, word1))
# divide by the number of possible phrases -- 1 fewer than the number of words
prob_phrase <- phrase_count[1] / (length(word_vec) - 1)
probs <- data.frame(Term = c(word1, word2, paste0(paste(word1, word2), ";", paste(word2, word1))),
Probability = c(prob_1, prob_2, prob_phrase), stringsAsFactors = FALSE)
probs
}phrase_prob('data/assign6.sample.txt', 'the', 'federal') Term Probability
1 the 0.056971514
2 federal 0.003748126
3 the federal;federal the 0.002949853