tuesdata = tidytuesdayR::tt_load('2020-05-05')
villagers = tuesdata$villagers
villagers[,"phrase_len"] = stringr::str_length(villagers$phrase)
villagers[,"phrase_words"] = stringr::str_count(villagers$phrase, " ") + 1
villagers$personality = as.factor(villagers$personality)
pacman::p_load(tidyverse,dplyr)
phrase_len_by_personality = villagers[,c("personality","phrase_len")] %>%
  group_by(personality) %>% summarise_each(funs(mean, sd))

phrase_words_by_personality = villagers[,c("personality","phrase_words")] %>%
  group_by(personality) %>% summarise_each(funs(mean, sd))
aov_words = glm(villagers$phrase_words ~ villagers$personality)
aov_len = glm(villagers$phrase_len ~ villagers$personality)
summary(aov_words)
summary(aov_len)
villagers %>% ggplot(., aes(x = phrase_len)) + geom_bar(stat = "count") + 
  ggtitle(label = "Distribution of AC:NH Villager Catchphrase Lengths") + 
  ggthemes::theme_excel_new()


ggplot(phrase_len_by_personality, aes(x = personality, y = mean)) +
                                       geom_bar(stat="identity") +
                                       geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = .2) +
                                       theme_bw() +
  labs(x = "Personality", y = "Phrase Length", title = "Phrase Length by Personality",
       caption = "There is no personality type with a catch phrase length significantly different from the others.")


ggplot(phrase_words_by_personality, aes(x = personality, y = mean)) +
  geom_bar(stat="identity") +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = .2) +
  theme_bw()

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KdHVlc2RhdGEgPSB0aWR5dHVlc2RheVI6OnR0X2xvYWQoJzIwMjAtMDUtMDUnKQ0KdmlsbGFnZXJzID0gdHVlc2RhdGEkdmlsbGFnZXJzDQp2aWxsYWdlcnNbLCJwaHJhc2VfbGVuIl0gPSBzdHJpbmdyOjpzdHJfbGVuZ3RoKHZpbGxhZ2VycyRwaHJhc2UpDQp2aWxsYWdlcnNbLCJwaHJhc2Vfd29yZHMiXSA9IHN0cmluZ3I6OnN0cl9jb3VudCh2aWxsYWdlcnMkcGhyYXNlLCAiICIpICsgMQ0KdmlsbGFnZXJzJHBlcnNvbmFsaXR5ID0gYXMuZmFjdG9yKHZpbGxhZ2VycyRwZXJzb25hbGl0eSkNCmBgYA0KDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHRpZHl2ZXJzZSxkcGx5cikNCnBocmFzZV9sZW5fYnlfcGVyc29uYWxpdHkgPSB2aWxsYWdlcnNbLGMoInBlcnNvbmFsaXR5IiwicGhyYXNlX2xlbiIpXSAlPiUNCiAgZ3JvdXBfYnkocGVyc29uYWxpdHkpICU+JSBzdW1tYXJpc2VfZWFjaChmdW5zKG1lYW4sIHNkKSkNCg0KcGhyYXNlX3dvcmRzX2J5X3BlcnNvbmFsaXR5ID0gdmlsbGFnZXJzWyxjKCJwZXJzb25hbGl0eSIsInBocmFzZV93b3JkcyIpXSAlPiUNCiAgZ3JvdXBfYnkocGVyc29uYWxpdHkpICU+JSBzdW1tYXJpc2VfZWFjaChmdW5zKG1lYW4sIHNkKSkNCg0KYGBgDQoNCmBgYHtyfQ0KYW92X3dvcmRzID0gZ2xtKHZpbGxhZ2VycyRwaHJhc2Vfd29yZHMgfiB2aWxsYWdlcnMkcGVyc29uYWxpdHkpDQphb3ZfbGVuID0gZ2xtKHZpbGxhZ2VycyRwaHJhc2VfbGVuIH4gdmlsbGFnZXJzJHBlcnNvbmFsaXR5KQ0Kc3VtbWFyeShhb3Zfd29yZHMpDQpzdW1tYXJ5KGFvdl9sZW4pDQpgYGANCg0KYGBge3J9DQp2aWxsYWdlcnMgJT4lIGdncGxvdCguLCBhZXMoeCA9IHBocmFzZV9sZW4pKSArIGdlb21fYmFyKHN0YXQgPSAiY291bnQiKSArIA0KICBnZ3RpdGxlKGxhYmVsID0gIkRpc3RyaWJ1dGlvbiBvZiBBQzpOSCBWaWxsYWdlciBDYXRjaHBocmFzZSBMZW5ndGhzIikgKyANCiAgZ2d0aGVtZXM6OnRoZW1lX2V4Y2VsX25ldygpDQoNCmdncGxvdChwaHJhc2VfbGVuX2J5X3BlcnNvbmFsaXR5LCBhZXMoeCA9IHBlcnNvbmFsaXR5LCB5ID0gbWVhbikpICsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikgKw0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VvbV9lcnJvcmJhcihhZXMoeW1pbiA9IG1lYW4gLSBzZCwgeW1heCA9IG1lYW4gKyBzZCksIHdpZHRoID0gLjIpICsNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRoZW1lX2J3KCkgKw0KICBsYWJzKHggPSAiUGVyc29uYWxpdHkiLCB5ID0gIlBocmFzZSBMZW5ndGgiLCB0aXRsZSA9ICJQaHJhc2UgTGVuZ3RoIGJ5IFBlcnNvbmFsaXR5IiwNCiAgICAgICBjYXB0aW9uID0gIlRoZXJlIGlzIG5vIHBlcnNvbmFsaXR5IHR5cGUgd2l0aCBhIGNhdGNoIHBocmFzZSBsZW5ndGggc2lnbmlmaWNhbnRseSBkaWZmZXJlbnQgZnJvbSB0aGUgb3RoZXJzLiIpDQoNCmdncGxvdChwaHJhc2Vfd29yZHNfYnlfcGVyc29uYWxpdHksIGFlcyh4ID0gcGVyc29uYWxpdHksIHkgPSBtZWFuKSkgKw0KICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIpICsNCiAgZ2VvbV9lcnJvcmJhcihhZXMoeW1pbiA9IG1lYW4gLSBzZCwgeW1heCA9IG1lYW4gKyBzZCksIHdpZHRoID0gLjIpICsNCiAgdGhlbWVfYncoKQ0KYGBg