Taylor Swift Lyric Generator

2022-12-31

# notes to myself to remember how to use Shiny
#all functions need to be in `context: setup`  
#all data needs to be in `context: data`  
#widgets: https://shiny.rstudio.com/articles/action-buttons.html  
#`panel: sidebar` is like ui and shouldn't have much code at all. 
#`context: server` is the equivalent of the old server function

sysfonts::font_add("TaylorSwift", "Satisfaction.ttf")
showtext::showtext_auto()

library(BiocManager)

## Bioconductor version '3.14' is out-of-date; the current release version '3.16'
##   is available with R version '4.2'; see https://bioconductor.org/install

options(repos = BiocManager::repositories())

taylor_swift_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv')

## Rows: 132 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Artist, Album, Title, Lyrics
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Download UD model for tagging
ud_eng_dl   <- udpipe::udpipe_download_model(language = "english-ewt")

## Downloading udpipe model from https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.5/master/inst/udpipe-ud-2.5-191206/english-ewt-ud-2.5-191206.udpipe to /Users/sara/Desktop/TidyTuesdayCreations/english-ewt-ud-2.5-191206.udpipe

##  - This model has been trained on version 2.5 of data from https://universaldependencies.org

##  - The model is distributed under the CC-BY-SA-NC license: https://creativecommons.org/licenses/by-nc-sa/4.0

##  - Visit https://github.com/jwijffels/udpipe.models.ud.2.5 for model license details.

##  - For a list of all models and their licenses (most models you can download with this package have either a CC-BY-SA or a CC-BY-SA-NC license) read the documentation at ?udpipe_download_model. For building your own models: visit the documentation by typing vignette('udpipe-train', package = 'udpipe')

## Downloading finished, model stored at '/Users/sara/Desktop/TidyTuesdayCreations/english-ewt-ud-2.5-191206.udpipe'

# Load model
ud_eng <- udpipe::udpipe_load_model("english-ewt-ud-2.5-191206.udpipe")
# Analyze the horror movie titles and pipe to tibble format
ud_swift <- udpipe::udpipe_annotate(ud_eng, x = taylor_swift_lyrics$Lyrics) |> 
  as_tibble() |>
  filter(!grepl("'|-", token)) |>
  mutate(token = if_else(token == "I", token, str_to_lower(token))) |>
  filter(token != "uh" & token != "eeh" & token != "eh" & token != "hm" & token != "ey")

counts <- ud_swift |>
  group_by(doc_id, paragraph_id, sentence_id) |>
  count()

counts |>
  ggplot(aes(x = n))+
  geom_histogram(binwidth = 1)+
  scale_x_continuous(n.breaks = 25, limits = c(0,25))+
  geom_vline(xintercept = 7, color = "red")

## Warning: Removed 630 rows containing non-finite values (`stat_bin()`).

## Warning: Removed 2 rows containing missing values (`geom_bar()`).

types <- ud_swift |>
  group_by(doc_id, paragraph_id, sentence_id) |>
  slice_head(n = 7) |>
  group_by(token_id, upos) |>
  count() |>
  arrange(desc(n))

common_types <- types |>
  group_by(token_id) |>
  slice_max(n = 5, order_by = n)

# sentence structure:
## pron + verb + adp + det + noun

# Function to make titles
# random=TRUE (default) means random possible words are selected
# random=FALSE means the most frequent words per type are selected
make_title <- function(random=T){
  pronoun1 <- ud_swift |> 
    filter(upos=="PRON") |>
    filter(!grepl("Poss=Yes",feats)) |>
    filter(token != "him" & token != "her" & 
             token != "me" & token != "that" & token != "us" & token != "mine" & token != "yourself" & token != "there" & token != "the")
  pronoun2 <- ud_swift |> 
    filter(upos=="PRON") |>
    filter(grepl("Poss=Yes",feats))
  conjunctions <- ud_swift |> 
    filter(upos=="CCONJ")
  nouns <- ud_swift |> 
    filter(upos=="NOUN") |>
    filter(!(grepl("s$",token)))
  adjs <- ud_swift |> 
    filter(upos=="ADJ")
  adps <- ud_swift |> 
    filter(upos=="ADP")
  verbs <- ud_swift |> 
    filter(upos=="VERB") |>
    filter(!grepl("ing$", token)) |>
    filter(!grepl("in$", token)) |>
    filter(!(grepl("VerbForm=Part", feats)))
  dets <- ud_swift |> 
    filter(upos=="DET") |>
    filter(!(token %in% c("all","these")))
  mod <- ud_swift |> 
    filter(upos=="PART")
  if(random){
    pro1 <- sample(pronoun1$token, 1)
    pro2 <- sample(pronoun2$token, 1)
    ns <- sample(nouns$token, 2)
    n1 <- ns[1]
    n2 <- ns[2]
      if(pro1 %in% c("she","he","everything","it", "what", "who", "this", "nothing", "anything", "anyone, everyone")){
        verbs <- verbs |>
          filter(grepl("s$",token))
      }
      else{
        verbs <- verbs |>
          filter(!(grepl("s$",token)))
      }
    
    if(str_starts(n1, "[aeiou]") == TRUE){
      dets <- dets |>
        filter(!grepl("a$",token))
    }
    else{
      dets <- dets |>
        filter(!grepl("an$",token))
    }
    a <- sample(adjs$token, 1)
    d <- sample(dets$token, 1)
    p <- sample(adps$token, 1)
    v <- sample(verbs$token, 1)
  }
  else{
    ns <- nouns |> 
      count(token, sort=T)
    n1 <- ns[1,]$token
    n2 <- ns[2,]$token
    a <- adjs |> 
      count(token, sort=T) |> 
      slice_max(n = 1, order_by = n) |> 
      select(token)
    d <- dets |> 
      count(token, sort=T) |> 
      slice_max(n = 1, order_by = n) |> 
      select(token)
    p <- adps |> 
      count(token, sort=T) |> 
      slice_max(n = 1, order_by = n) |> 
      select(token)
  }
  return(paste(str_to_title(pro1), v, d, a, n1, p, pro2, n2, sep = " "))
}

# Function to make horror movie poster
make_poster <- function(random=T, seed = 47){
  set.seed(seed)
    title_text <- make_title(random = random)
  ggplot() +
    geom_text(aes(x=0, y=0, label=title_text), size=13, color="white", fontface=2) +
    xlim(c(-1, 1))+
    ylim(c(-1,1))+
    theme_void() +
        labs(title = "<span style='font-size:40pt'>Ad Lib of <span style = 'font-family:TaylorSwift;'>Taylor Swift </span>Lyrics</span>")+
    theme(plot.background = element_rect(color=NA, fill="#c2a474"), plot.title = element_markdown(face = "bold", hjust = 0.5, color = "white", size = 40, vjust = -4))
}

Ad Lib Lyrics

#numericInput('seed', 'Random Seed', 47)
#submitButton("Update View", icon("refresh"))
actionButton('seed', "Refresh")

# or put an action button, ui widget that will re-render a plot

plotOutput("TaylorSwift")