Homework 8

Question 1

A) There is a very large and very low spike in the early years of the data set. Since then, the value has been always spiked positively.

B) See below!

C) See below!

D) See below!

url <- "https://www.usinflationcalculator.com/inflation/historical-inflation-rates/"
page <- read_html(url)
inftable <- page %>% html_table() %>% .[[1]]

infdata <- inftable %>% select(Year, Jan:Dec) %>% mutate(across(Jan:Dec, as.character)) %>% pivot_longer(cols = Jan:Dec, names_to = "Month", values_to = "Rate") %>% mutate(Month = factor(Month, levels = month.abb), Rate = suppressWarnings(as.numeric(Rate)) ) %>% filter(!is.na(Rate))

ystat <- infdata %>% group_by(Year) %>% summarise(Minimum = min(Rate), Maximum = max(Rate), Average = mean(Rate)) %>% pivot_longer(cols = c(Minimum, Maximum, Average), names_to = "Metric", values_to = "Value")

a <- ggplot(ystat, aes(x = Year, y = Value, color = Metric)) + geom_line(linewidth = 1) + scale_color_manual(values = c("Average" = "red", "Maximum" = "green", "Minimum" = "blue")) 
print(a)

b <- a + transition_reveal(Year)
animate(b, renderer = gifski_renderer())

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

mavg <- infdata %>% group_by(Month) %>% summarise(avgrate = mean(Rate))

c <- ggplot(mavg, aes(x = Month, y = avgrate)) +  geom_col(fill = "darkblue") 
print(c)

d <- infdata %>% plot_ly( x = ~Month, y = ~Rate,frame = ~Year, type = 'bar', marker = list(color = 'darkblue')) %>% layout(yaxis = list(range = c(min(infdata$Rate), max(infdata$Rate)))) 
d

Question 2

See below!

data <- function(start_date, end_date, api_key) {
  
dates <- seq(as.Date(start_date), as.Date(end_date), by = "days")
mtype <- c()
  
for (i in seq_along(dates)) {date_str <- as.character(dates[i])
    
url <- paste0("https://api.nasa.gov/planetary/apod?api_key=", api_key, "&date=", date_str)
response <- GET(url)
    
if (status_code(response) == 200) {
cdata <- content(response, as = "text", encoding = "UTF-8")
json <- fromJSON(cdata)
mtype <- c(mtype, json$media_type)
      
if (json$media_type == "image") {
img <- image_read(json$url)
annotated <- image_annotate(img, text = paste(json$date, "-", json$title), size = 30, color = "white", boxcolor = "darkblue",gravity = "south")
        
plot(annotated)}}}
  
return(table(mtype))
}

api_key <- "vMaUAVxpi8jQXK9aqKrkm2FBeC9KuyTA05JZwMZB"
table <- data("2025-11-11", "2025-11-20", api_key)

print(table)

## mtype
## image other 
##     9     1

Question 3

A) See below!

B) Yes all three books result in positive and negative sentiments. With some books having more data points. However, Ninety-Three results in a very low negative sentiment in some instances,

C) Marius, Valjean, and Cosette in Les miserables because they are the main characters in the book. Same with Cimourdain, lantenac, and gauvain from Ninety-Three as well as gringoire, quasimodo, and archdeacon from Notre Dame.

read <- function(name){df <- read.delim(name, header = FALSE, stringsAsFactors = FALSE)
df <- df[-(1:16), , drop = FALSE] 
colnames(df) <- "text"
tibble(line = 1:nrow(df), text = df$text)}

miserables  <- read("miserables")
notredame   <- read("notredame")
ninetythree <- read("ninetythree")

## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string

books <- bind_rows(mutate(miserables, book = "Les Miserables"), mutate(notredame, book = "Notre Dame"),mutate(ninetythree, book = "Ninety-Three"))

books %>% unnest_tokens(word, text) %>% anti_join(stop_words, by = "word") %>% count(book, word, sort = TRUE) %>% group_by(book) %>% slice_max(n, n = 10) %>% print()

## # A tibble: 31 × 3
## # Groups:   book [3]
##    book           word        n
##    <chr>          <chr>   <int>
##  1 Les Miserables marius   1373
##  2 Les Miserables jean     1235
##  3 Les Miserables valjean  1050
##  4 Les Miserables cosette   922
##  5 Les Miserables day       788
##  6 Les Miserables time      761
##  7 Les Miserables chapter   744
##  8 Les Miserables rue       666
##  9 Les Miserables de        636
## 10 Les Miserables father    568
## # ℹ 21 more rows

books %>% unnest_tokens(pair, text, token = "ngrams", n = 2) %>% separate(pair, c("w1","w2"), sep=" ") %>% filter(!(w1 %in% stop_words$word | w2 %in% stop_words$word)) %>% unite(pair, w1, w2, sep = " ") %>% count(book, pair, sort = TRUE) %>% group_by(book) %>% slice_max(n, n = 10) %>% print()

## # A tibble: 30 × 3
## # Groups:   book [3]
##    book           pair                  n
##    <chr>          <chr>             <int>
##  1 Les Miserables NA NA              1467
##  2 Les Miserables jean valjean        974
##  3 Les Miserables rue de              176
##  4 Les Miserables monsieur le         109
##  5 Les Miserables de la               105
##  6 Les Miserables chapter ii           94
##  7 Les Miserables chapter iii          88
##  8 Les Miserables chapter iv           78
##  9 Les Miserables rue des              77
## 10 Les Miserables project gutenberg    71
## # ℹ 20 more rows

sentiment <- books %>% mutate(chunk = (line-1) %/% 80 + 1) %>% unnest_tokens(word, text) %>% inner_join(get_sentiments("bing")) %>% count(book, chunk, sentiment) %>% pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% mutate(net = positive - negative)

## Joining with `by = join_by(word)`

## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 41373 of `x` matches multiple rows in `y`.
## ℹ Row 6639 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

ggplot(sentiment, aes(chunk, net)) + geom_line() + facet_wrap(~book, scales="free_x")

tfidf <- books %>% unnest_tokens(word, text) %>% count(book, word) %>% bind_tf_idf(word, book, n) %>% arrange(desc(tf_idf))

top15 <- tfidf %>% slice_max(tf_idf, n = 15)
print(top15)

## # A tibble: 15 × 6
##    book           word           n       tf   idf   tf_idf
##    <chr>          <chr>      <int>    <dbl> <dbl>    <dbl>
##  1 Les Miserables marius      1373 0.00239  1.10  0.00263 
##  2 Ninety-Three   cimourdain   246 0.00191  1.10  0.00209 
##  3 Les Miserables valjean     1050 0.00183  1.10  0.00201 
##  4 Notre Dame     gringoire    321 0.00169  1.10  0.00186 
##  5 Les Miserables cosette      922 0.00160  1.10  0.00176 
##  6 Notre Dame     quasimodo    230 0.00121  1.10  0.00133 
##  7 Ninety-Three   lantenac     150 0.00116  1.10  0.00128 
##  8 Notre Dame     archdeacon   195 0.00103  1.10  0.00113 
##  9 Les Miserables thénardier   504 0.000877 1.10  0.000964
## 10 Ninety-Three   gauvain      286 0.00222  0.405 0.000898
## 11 Les Miserables javert       423 0.000736 1.10  0.000809
## 12 Ninety-Three   tourgue       74 0.000573 1.10  0.000630
## 13 Ninety-Three   radoub        70 0.000542 1.10  0.000596
## 14 Ninety-Three   georgette     69 0.000534 1.10  0.000587
## 15 Ninety-Three   rené          68 0.000527 1.10  0.000579

ggplot(top15, aes(reorder(word, tf_idf), tf_idf)) + geom_col() + coord_flip() + facet_wrap(~book, scales="free")

Homework 8

2025-12-04

Question 1

A) There is a very large and very low spike in the early years of the data set. Since then, the value has been always spiked positively.

B) See below!

C) See below!

D) See below!

Question 2

See below!

Question 3

A) See below!

B) Yes all three books result in positive and negative sentiments. With some books having more data points. However, Ninety-Three results in a very low negative sentiment in some instances,

C) Marius, Valjean, and Cosette in Les miserables because they are the main characters in the book. Same with Cimourdain, lantenac, and gauvain from Ninety-Three as well as gringoire, quasimodo, and archdeacon from Notre Dame.