Quiz1 - Group1

Please find the book and line number in Jane Austen books which has the most number of trust words.

Expected outcome:

book linenumber sentiment n
trust 5
.

Solution

library(tidytext)
library(tidyverse)
library(janeaustenr)
austen_books() %>%
  group_by(book) %>%
  mutate(linenumber = row_number()) %>%
  ungroup() %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words) %>%
  inner_join(get_sentiments("nrc")) %>%
  filter(sentiment=="trust") %>% 
  count(book,linenumber, sentiment) %>% 
  arrange(desc(n)) %>% 
  head(2)

Quiz1 - Group2

Please find the book and line number in Jane Austen books which has the most number of positive words.

Expected outcome:

book linenumber sentiment n
positive 6
.

Solution

library(tidytext)
library(janeaustenr)
austen_books() %>%
  group_by(book) %>%
  mutate(linenumber = row_number()) %>%
  ungroup() %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words) %>%
  inner_join(get_sentiments("bing")) %>%
  filter(sentiment=="positive") %>% 
  count(book,linenumber, sentiment) %>% 
  arrange(desc(n)) %>% 
  head(2)

Quiz2 - Group1

Find the highest correlated “positive” (nrc) words in “Emma” (sections of 20 lines, word count threshold is 10)

Expected output:

item1 item2 correlation
lucky 0.6329502

Solution

library(janeaustenr)
library(tidytext)
library(widyr)
library(tidyverse)
austen_books() %>%
  filter(book=="Emma") %>% 
  mutate(section = row_number() %/% 20) %>%
  filter(section > 0) %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% stop_words$word) %>% 
  inner_join(get_sentiments("nrc")) %>% 
  filter(sentiment=="positive") %>% 
  group_by(word) %>%
  filter(n() >= 10) %>%   # try lower numbers and see what happens
  pairwise_cor(word, section, sort = TRUE) %>% 
  head(2)
Joining, by = "word"

Note: this quiz gave different result in most of student computers, most likely due to a bug in widyr package

Quiz2 - Group2

Find the top 2 highest correlated words with the word pride in sections of 20 lines in all Jane Austen books. (Word count threshold is 10)

Expected output:

item1 item2 correlation
pride 0.2301
pride 0.1817

Solution

library(janeaustenr)
library(tidytext)
library(widyr)
library(tidyverse)
austen_books() %>%
  mutate(section = row_number() %/% 20) %>%
  filter(section > 0) %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% stop_words$word) %>% 
  group_by(word) %>%
  filter(n() >= 10) %>%   # try lower numbers and see what happens
  pairwise_cor(word, section, sort = TRUE) %>% 
  filter(item1=="pride") %>% 
  head(2)

Note: this quiz gave different result in most of student computers, most likely due to a bug in widyr package

Quiz3 - Group1 and Group2

Please make a linear model of displacement versus mpg from mtcars lm(mpg ~ disp) and answer the questions:

  1. if a car has 250 displacement, what would its mpg be?

  2. which car has the highest (positive) residual?

  3. if lm(mpg ~ cyl) has R squared of 0.726, which model, disp or cyl is better for predicting mpg

Solution

library(broom)
# a
lmfit <- lm(mpg ~ disp, mtcars)
dummy <- data_frame(disp=250)
predict(lmfit,dummy)
       1 
19.29607 
# b
augment(lmfit) %>% 
  arrange(-.resid) %>% 
  head(1)
# residual is difference between actual value and predicted value (actual-prediction)
# thus Corolla has 7.2 more mileage than expected mileage for its displacement
# c
summary(lmfit)$r.squared   #OR glance(lmfit)
[1] 0.7183433
# higher R-squared means better fit/model

Quiz4 - Group1 and Group2

Please do two different kmeans cluster on mtcars

  1. regular data, 3 clusters, 10 starts
  2. scaled data, 3 clusters, 10 starts

then show the table of cluster matches

1 2 3
1
2
3

Solution

set.seed(1)
k1 <- kmeans(mtcars, centers= 3, nstart = 10)
k2 <- kmeans(scale(mtcars), centers= 3, nstart = 10 )
table(k1$cluster, k2$cluster)
   
     1  2  3
  1  3 13  0
  2  2  0  7
  3  0  2  5
LS0tCnRpdGxlOiAiUXVpemVzIGFuZCBTb2x1dGlvbnMiCmF1dGhvcjogYWxwZXIgeWlsbWF6CmRhdGU6IEphbiAzcmQsIDIwMTgKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKPHN0eWxlIHR5cGU9InRleHQvY3NzIj4KLnRhYmxlIHsKICAgIHdpZHRoOiA0MCU7Cn0KPC9zdHlsZT4KCiMgUXVpejEgLSBHcm91cDEKClBsZWFzZSBmaW5kIHRoZSBib29rIGFuZCBsaW5lIG51bWJlciBpbiBKYW5lIEF1c3RlbiBib29rcyB3aGljaCBoYXMgdGhlIG1vc3QgbnVtYmVyIG9mIHRydXN0IHdvcmRzLgoKKiB1c2UgYG5yY2Agc2VudGltZW50cwoqIHBsZWFzZSByZW1vdmUgc3RvcCB3b3JkcwoqIGhpbnQ6IGBjb3VudChib29rLGxpbmVudW1iZXIsIHNlbnRpbWVudClgCgpFeHBlY3RlZCBvdXRjb21lOgoKfCBib29rIHwgbGluZW51bWJlciB8IHNlbnRpbWVudCB8IG4gfAp8LS0tLS0tfC0tLS0tLS0tLS0tLXwtLS0tLS0tLS0tLXwtLS18CnwgLi4uICB8ICAgIC4uLiAgICAgfCAgdHJ1c3QgICAgfCA1IHwKfCAuLi4gIHwgICAgLi4uICAgICB8ICAgLi4uICAgICB8IC4gfAoKCiMjIFNvbHV0aW9uCgpgYGB7ciBzb2x1dGlvbi1xejEtZ3IxLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShqYW5lYXVzdGVucikKCmF1c3Rlbl9ib29rcygpICU+JQogIGdyb3VwX2J5KGJvb2spICU+JQogIG11dGF0ZShsaW5lbnVtYmVyID0gcm93X251bWJlcigpKSAlPiUKICB1bmdyb3VwKCkgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0KSAlPiUKICBhbnRpX2pvaW4oc3RvcF93b3JkcykgJT4lCiAgaW5uZXJfam9pbihnZXRfc2VudGltZW50cygibnJjIikpICU+JQogIGZpbHRlcihzZW50aW1lbnQ9PSJ0cnVzdCIpICU+JSAKICBjb3VudChib29rLGxpbmVudW1iZXIsIHNlbnRpbWVudCkgJT4lIAogIGFycmFuZ2UoZGVzYyhuKSkgJT4lIAogIGhlYWQoMikKYGBgCgojIFF1aXoxIC0gR3JvdXAyCgpQbGVhc2UgZmluZCB0aGUgYm9vayBhbmQgbGluZSBudW1iZXIgaW4gSmFuZSBBdXN0ZW4gYm9va3Mgd2hpY2ggaGFzIHRoZSBtb3N0IG51bWJlciBvZiBwb3NpdGl2ZSB3b3Jkcy4KCiogdXNlIGBiaW5nYCBzZW50aW1lbnRzCiogcGxlYXNlIHJlbW92ZSBzdG9wIHdvcmRzCiogaGludDogYGNvdW50KGJvb2ssbGluZW51bWJlciwgc2VudGltZW50KWAKCkV4cGVjdGVkIG91dGNvbWU6Cgp8IGJvb2sgfCBsaW5lbnVtYmVyIHwgc2VudGltZW50IHwgbiB8CnwtLS0tLS18LS0tLS0tLS0tLS0tfC0tLS0tLS0tLS0tfC0tLXwKfCAuLi4gIHwgICAgLi4uICAgICB8IHBvc2l0aXZlICB8IDYgfAp8IC4uLiAgfCAgICAuLi4gICAgIHwgICAuLi4gICAgIHwgLiB8CgojIyBTb2x1dGlvbgoKYGBge3Igc29sdXRpb24tcXoxLWdyMiwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoamFuZWF1c3RlbnIpCgphdXN0ZW5fYm9va3MoKSAlPiUKICBncm91cF9ieShib29rKSAlPiUKICBtdXRhdGUobGluZW51bWJlciA9IHJvd19udW1iZXIoKSkgJT4lCiAgdW5ncm91cCgpICU+JQogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkgJT4lCiAgYW50aV9qb2luKHN0b3Bfd29yZHMpICU+JQogIGlubmVyX2pvaW4oZ2V0X3NlbnRpbWVudHMoImJpbmciKSkgJT4lCiAgZmlsdGVyKHNlbnRpbWVudD09InBvc2l0aXZlIikgJT4lIAogIGNvdW50KGJvb2ssbGluZW51bWJlciwgc2VudGltZW50KSAlPiUgCiAgYXJyYW5nZShkZXNjKG4pKSAlPiUgCiAgaGVhZCgyKQpgYGAKCiMgUXVpejIgLSBHcm91cDEKCkZpbmQgdGhlICBoaWdoZXN0IGNvcnJlbGF0ZWQgInBvc2l0aXZlIiAobnJjKSB3b3JkcyBpbiAiRW1tYSIgKHNlY3Rpb25zIG9mIDIwIGxpbmVzLCB3b3JkIGNvdW50IHRocmVzaG9sZCBpcyAxMCkKCkV4cGVjdGVkIG91dHB1dDoKCnwgaXRlbTEgfCBpdGVtMiB8IGNvcnJlbGF0aW9uIHwKfC0tLS0tLS18LS0tLS0tLXwtLS0tLS0tLS0tLS0tfAp8IGx1Y2t5IHwgICAgICAgfCAgIDAuNjMyOTUwMiB8CgojIyBTb2x1dGlvbgoKYGBge3Igc29sdXRpb24tcXoyLWdyMX0KbGlicmFyeShqYW5lYXVzdGVucikKbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeSh3aWR5cikKbGlicmFyeSh0aWR5dmVyc2UpCgphdXN0ZW5fYm9va3MoKSAlPiUKICBmaWx0ZXIoYm9vaz09IkVtbWEiKSAlPiUgCiAgbXV0YXRlKHNlY3Rpb24gPSByb3dfbnVtYmVyKCkgJS8lIDIwKSAlPiUKICBmaWx0ZXIoc2VjdGlvbiA+IDApICU+JQogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkgJT4lCiAgZmlsdGVyKCF3b3JkICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgCiAgaW5uZXJfam9pbihnZXRfc2VudGltZW50cygibnJjIikpICU+JSAKICBmaWx0ZXIoc2VudGltZW50PT0icG9zaXRpdmUiKSAlPiUgCiAgZ3JvdXBfYnkod29yZCkgJT4lCiAgZmlsdGVyKG4oKSA+PSAxMCkgJT4lICAgIyB0cnkgbG93ZXIgbnVtYmVycyBhbmQgc2VlIHdoYXQgaGFwcGVucwogIHBhaXJ3aXNlX2Nvcih3b3JkLCBzZWN0aW9uLCBzb3J0ID0gVFJVRSkgJT4lIAogIGhlYWQoMikKYGBgCgo+IE5vdGU6IHRoaXMgcXVpeiBnYXZlIGRpZmZlcmVudCByZXN1bHQgaW4gbW9zdCBvZiBzdHVkZW50IGNvbXB1dGVycywgbW9zdCBsaWtlbHkgZHVlIHRvIGEgYnVnIGluIGB3aWR5cmAgcGFja2FnZQoKIyBRdWl6MiAtIEdyb3VwMgoKRmluZCB0aGUgdG9wIDIgaGlnaGVzdCBjb3JyZWxhdGVkIHdvcmRzIHdpdGggdGhlIHdvcmQgYHByaWRlYCBpbiBzZWN0aW9ucyBvZiAyMCBsaW5lcyBpbiBhbGwgSmFuZSBBdXN0ZW4gYm9va3MuIChXb3JkIGNvdW50IHRocmVzaG9sZCBpcyAxMCkKCkV4cGVjdGVkIG91dHB1dDoKCnwgaXRlbTEgfCBpdGVtMiB8IGNvcnJlbGF0aW9uIHwKfC0tLS0tLS18LS0tLS0tLXwtLS0tLS0tLS0tLS0tfAp8IHByaWRlIHwgICAgICAgfCAgIDAuMjMwMSAgICB8CnwgcHJpZGUgfCAgICAgICB8ICAgMC4xODE3ICAgIHwKCiMjIFNvbHV0aW9uCgpgYGB7ciBzb2x1dGlvbi1xejItZ3IyfQpsaWJyYXJ5KGphbmVhdXN0ZW5yKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHdpZHlyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKCmF1c3Rlbl9ib29rcygpICU+JQogIG11dGF0ZShzZWN0aW9uID0gcm93X251bWJlcigpICUvJSAyMCkgJT4lCiAgZmlsdGVyKHNlY3Rpb24gPiAwKSAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQpICU+JQogIGZpbHRlcighd29yZCAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lIAogIGdyb3VwX2J5KHdvcmQpICU+JQogIGZpbHRlcihuKCkgPj0gMTApICU+JSAgICMgdHJ5IGxvd2VyIG51bWJlcnMgYW5kIHNlZSB3aGF0IGhhcHBlbnMKICBwYWlyd2lzZV9jb3Iod29yZCwgc2VjdGlvbiwgc29ydCA9IFRSVUUpICU+JSAKICBmaWx0ZXIoaXRlbTE9PSJwcmlkZSIpICU+JSAKICBoZWFkKDIpCmBgYAoKPiBOb3RlOiB0aGlzIHF1aXogZ2F2ZSBkaWZmZXJlbnQgcmVzdWx0IGluIG1vc3Qgb2Ygc3R1ZGVudCBjb21wdXRlcnMsIG1vc3QgbGlrZWx5IGR1ZSB0byBhIGJ1ZyBpbiBgd2lkeXJgIHBhY2thZ2UKCiMgUXVpejMgLSBHcm91cDEgYW5kIEdyb3VwMgoKUGxlYXNlIG1ha2UgYSBsaW5lYXIgbW9kZWwgb2YgZGlzcGxhY2VtZW50IHZlcnN1cyBtcGcgCmZyb20gbXRjYXJzIGBsbShtcGcgfiBkaXNwKWAgYW5kIGFuc3dlciB0aGUgcXVlc3Rpb25zOgoKYS4gaWYgYSBjYXIgaGFzIDI1MCBkaXNwbGFjZW1lbnQsIHdoYXQgd291bGQgaXRzIG1wZyBiZT8KCmIuIHdoaWNoIGNhciBoYXMgdGhlIGhpZ2hlc3QgKHBvc2l0aXZlKSByZXNpZHVhbD8KCmMuIGlmIGxtKG1wZyB+IGN5bCkgaGFzIFIgc3F1YXJlZCBvZiAwLjcyNiwgd2hpY2ggbW9kZWwsCmBkaXNwYCBvciBgY3lsYCBpcyBiZXR0ZXIgZm9yIHByZWRpY3RpbmcgYG1wZ2AKCiMjIFNvbHV0aW9uCgpgYGB7ciBzb2x1dGlvbi1xejN9CmxpYnJhcnkoYnJvb20pCgojIGEKbG1maXQgPC0gbG0obXBnIH4gZGlzcCwgbXRjYXJzKQpkdW1teSA8LSBkYXRhX2ZyYW1lKGRpc3A9MjUwKQpwcmVkaWN0KGxtZml0LGR1bW15KQoKIyBiCmF1Z21lbnQobG1maXQpICU+JSAKICBhcnJhbmdlKC0ucmVzaWQpICU+JSAKICBoZWFkKDEpCgojIHJlc2lkdWFsIGlzIGRpZmZlcmVuY2UgYmV0d2VlbiBhY3R1YWwgdmFsdWUgYW5kIHByZWRpY3RlZCB2YWx1ZSAoYWN0dWFsLXByZWRpY3Rpb24pCiMgdGh1cyBDb3JvbGxhIGhhcyA3LjIgbW9yZSBtaWxlYWdlIHRoYW4gZXhwZWN0ZWQgbWlsZWFnZSBmb3IgaXRzIGRpc3BsYWNlbWVudAoKIyBjCnN1bW1hcnkobG1maXQpJHIuc3F1YXJlZCAgICNPUiBnbGFuY2UobG1maXQpCgojIGhpZ2hlciBSLXNxdWFyZWQgbWVhbnMgYmV0dGVyIGZpdC9tb2RlbApgYGAKCiMgUXVpejQgLSBHcm91cDEgYW5kIEdyb3VwMgoKUGxlYXNlIGRvIHR3byBkaWZmZXJlbnQga21lYW5zIGNsdXN0ZXIgb24gbXRjYXJzCgoxLiByZWd1bGFyIGRhdGEsICAzIGNsdXN0ZXJzLCAxMCBzdGFydHMKMi4gc2NhbGVkIGRhdGEsIDMgY2x1c3RlcnMsIDEwIHN0YXJ0cwoKdGhlbiBzaG93IHRoZSB0YWJsZSBvZiBjbHVzdGVyIG1hdGNoZXMKCnwgICB8IDEgfCAyIHwgMyB8CnwtLS18LS0tfC0tLXwtLS18CnwgMSB8ICAgfCAgIHwgICB8CnwgMiB8ICAgfCAgIHwgICB8CnwgMyB8ICAgfCAgIHwgICB8CgojIyBTb2x1dGlvbgoKYGBge3Igc29sdXRpb24tcXo0fQpzZXQuc2VlZCgxKQprMSA8LSBrbWVhbnMobXRjYXJzLCBjZW50ZXJzPSAzLCBuc3RhcnQgPSAxMCkKazIgPC0ga21lYW5zKHNjYWxlKG10Y2FycyksIGNlbnRlcnM9IDMsIG5zdGFydCA9IDEwICkKCnRhYmxlKGsxJGNsdXN0ZXIsIGsyJGNsdXN0ZXIpCmBgYA==