Goals

Step 1: Upload & clean text into quanteda gui

Step 2: Analysis

Sentiment, least priority (can be done in SPSS)

Word usage

KWIC

Load the dataset

library("quanteda")
Package version: 2.9.9000
Unicode version: 10.0
ICU version: 61.1
Parallel computing: 12 of 12 threads used.
See https://quanteda.io for tutorials and examples.
library("dplyr")

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
corp <- read.csv("~/Dropbox/_Whittlesey Street/Unmanned Stores Data.csv") %>%
    corpus(text_field = "Full.Text")

Descriptives

Page type by sentiment (as classified by Brandwatch):

Source
(Brandwatch) Sentiment
Total
negative neutral positive n
blog 13.5 80.0 6.5 934
forum 25.0 67.3 7.7 651
instagram 3.6 54.0 42.5 252
news 7.3 88.0 4.7 10049
reddit 29.5 57.3 13.1 525
review 26.7 46.7 26.7 30
tumblr 19.9 70.2 9.9 1114
twitter 2.7 94.7 2.6 32079
youtube 16.8 66.7 16.5 381

Country breakdowns:

corp$Country2 <- ifelse(corp$Country %in% c("Germany", "United States of America", "United Kingdom"),
                        corp$Country, "Other")

docvars(corp) %>%
    group_by(Country2) %>%
    summarize(n = n()) %>%
    mutate(pct = round(n / sum(n), 1)) %>%
    kbl(col.names = NULL) %>%
    add_header_above(c("Country", "Total", "%"), align = c("l", "r", "r")) %>%
    kable_styling("striped")
Country
Total
%
Germany 592 0.0
Other 30562 0.7
United Kingdom 2468 0.1
United States of America 12393 0.3

Sentiment by country

Sentiment by country:

tab <- docvars(corp) %>%
    select(Country2, Sentiment) %>%
    with(., prop.table(table(Country2, Sentiment), margin = 1) * 100) %>%
    round(1)
cbind(tab, n = table(corp$Sentiment)) %>%
    kbl() %>%
    add_header_above(c("Country", "(Brandwatch) Sentiment" = 3, "Total" = 1), align = c("l", "c", "r")) %>%
    kable_styling("striped")
number of rows of result is not a multiple of vector length (arg 2)
Country
(Brandwatch) Sentiment
Total
negative neutral positive n
Germany 2.9 88.3 8.8 2350
Other 5.1 91.4 3.4 41887
United Kingdom 4.5 90.4 5.1 1778
United States of America 5.2 90.3 4.4 2350
library("ggplot2")
as.data.frame(tab) %>%
    ggplot(aes(x = Country2, y = Freq, fill = Sentiment)) +
    geom_bar(stat = "identity", position = position_dodge()) +
    scale_fill_brewer(palette = "Paired") +
    xlab("") +
    ylab("Percentage") +
    theme_minimal()

Comparing sentiment on balance:

library("quanteda.tidy")

Attaching package: ‘quanteda.tidy’

The following object is masked from ‘package:stats’:

    filter
net_sentiment <- docvars(corp) %>%
    select(Sentiment, Country2) %>%
    group_by(Sentiment, Country2) %>%
    summarise(n = n()) %>%
    filter(Sentiment != "neutral") %>%
    rename(Country = Country2)
`summarise()` has grouped output by 'Sentiment'. You can override using the `.groups` argument.
library("tidyr")
net_sentiment <- pivot_wider(net_sentiment, names_from = Sentiment, values_from = n) %>%
    mutate(Sentiment = log(positive / negative))

ggplot(net_sentiment, aes(x = Country, y = Sentiment, color = Country, fill = Country)) + 
    geom_bar(stat = "identity") + 
    ylab("Net Sentiment") +
    # scale_colour_brewer(palette = "Paired") +
    theme_minimal()

Keywords by sentiment

library("quanteda.textstats")
library("quanteda.textplots")

toks <- tokens(corp, remove_punct = TRUE)
dfmat <- dfm(toks)

Germany

colls_DE <- tokens_subset(toks, Country == "Germany") %>%
    tokens_remove(c(stopwords("de"), stopwords("en")), padding = TRUE) %>%
    textstat_collocations()
toks_DE <- tokens_subset(toks, Country == "Germany") %>%
    tokens_compound(colls_DE[1:200, ])
keywords_de <- dfm(toks_DE) %>%
    dfm_subset(Sentiment != "neutral" & Country == "Germany") %>%
    dfm_remove(c(stopwords("de"), stopwords("en"))) %>%
    dfm_group(groups = Sentiment) %>%
    textstat_keyness(target = "positive", measure = "lr")

textplot_keyness(keywords_de)

United Kingdom

colls_UK <- tokens_subset(toks, Country == "United Kingdom") %>%
    tokens_remove(stopwords("en"), padding = TRUE) %>%
    textstat_collocations()
toks_UK <- tokens_subset(toks, Country == "United Kingdom") %>%
    tokens_compound(colls_UK[1:200, ])
keywords_uk <- dfm(toks_UK) %>%
    dfm_subset(Sentiment != "neutral" & Country == "United Kingdom") %>%
    dfm_remove(stopwords("en")) %>%
    dfm_group(groups = Sentiment) %>%
    textstat_keyness(target = "positive", measure = "lr")

textplot_keyness(keywords_uk)

United States

LS0tCnRpdGxlOiAiVW5tYW5uZWQgU3RvcmUgQW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIEdvYWxzCgpTdGVwIDE6IFVwbG9hZCAmIGNsZWFuIHRleHQgaW50byBxdWFudGVkYSBndWkKClN0ZXAgMjogQW5hbHlzaXMgCgpTZW50aW1lbnQsIGxlYXN0IHByaW9yaXR5IChjYW4gYmUgZG9uZSBpbiBTUFNTKQoKLSAgCUdlbmVyYWwgc2VudGltZW50IAotICAJU2VudGltZW50IG9uIGluZGl2aWR1YWwgYWNjb3VudHMgKHZhcmlhYmxlIGluIHJvdyBPKQotICAJU2VudGltZW50IGJ5IGNvbnRpbmVudCAodmFyaWFibGUgcm93IEsgb3IgTCkKLSAgCVNlbnRpbWVudCBieSBjb3VudHJ5ICh2YXJpYWJsZSByb3cgTSkgIAoKV29yZCB1c2FnZQoKLSAgCU1vc3QgdXNlZCB3b3JkcyBtaW51cyBzdG9wIHdvcmRzIAotICAJTW9zdCB1c2VkIHdvcmRzIHBlciBjb250aW5lbnQKLSAgCU1vc3QgdXNlZCBuZWdhdGl2ZSB3b3JkcyAoYnV0IG1vc3QgZW50cmllcyBhcmUgbmV1dHJhbCkgCi0gIAlNb3N0IHVzZWQgcG9zaXRpdmUgd29yZHMgKGJ1dCBtb3N0IGVudHJpZXMgYXJlIG5ldXRyYWwpCgpLV0lDCgotICAJUGF5bWVudCAmIGNoZWNrb3V0Ci0gIAlQYXltZW50ICYgY2hlY2tvdXQgPT4gc2VudGltZW50Ci0gIAlBY2Nlc3MgJiBjaGVjay1pbiAgCgoKCiMjIExvYWQgdGhlIGRhdGFzZXQKCmBgYHtyfQpsaWJyYXJ5KCJxdWFudGVkYSIpCmxpYnJhcnkoImRwbHlyIikKCmNvcnAgPC0gcmVhZC5jc3YoIn4vRHJvcGJveC9fV2hpdHRsZXNleSBTdHJlZXQvVW5tYW5uZWQgU3RvcmVzIERhdGEuY3N2IikgJT4lCiAgICBjb3JwdXModGV4dF9maWVsZCA9ICJGdWxsLlRleHQiKQpgYGAKCiMjIERlc2NyaXB0aXZlcwoKUGFnZSB0eXBlIGJ5IHNlbnRpbWVudCAoYXMgY2xhc3NpZmllZCBieSBCcmFuZHdhdGNoKToKYGBge3J9CmxpYnJhcnkoImthYmxlRXh0cmEiKQp0YWIgPC0gd2l0aChkb2N2YXJzKGNvcnApLCBwcm9wLnRhYmxlKHRhYmxlKFBhZ2UuVHlwZSwgU2VudGltZW50KSwgbWFyZ2luID0gMSkgKiAxMDApICU+JQogICAgIHJvdW5kKDEpCmNiaW5kKHRhYiwgbiA9IHRhYmxlKGNvcnAkUGFnZS5UeXBlKSkgJT4lCiAgICBrYmwoKSAlPiUKICAgIGFkZF9oZWFkZXJfYWJvdmUoYygiU291cmNlIiwgIihCcmFuZHdhdGNoKSBTZW50aW1lbnQiID0gMywgIlRvdGFsIiA9IDEpLCBhbGlnbiA9IGMoImwiLCAiYyIsICJyIikpICU+JQogICAga2FibGVfc3R5bGluZygic3RyaXBlZCIpCmBgYAoKQ291bnRyeSBicmVha2Rvd25zOgpgYGB7cn0KY29ycCRDb3VudHJ5MiA8LSBpZmVsc2UoY29ycCRDb3VudHJ5ICVpbiUgYygiR2VybWFueSIsICJVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EiLCAiVW5pdGVkIEtpbmdkb20iKSwKICAgICAgICAgICAgICAgICAgICAgICAgY29ycCRDb3VudHJ5LCAiT3RoZXIiKQoKZG9jdmFycyhjb3JwKSAlPiUKICAgIGdyb3VwX2J5KENvdW50cnkyKSAlPiUKICAgIHN1bW1hcml6ZShuID0gbigpKSAlPiUKICAgIG11dGF0ZShwY3QgPSByb3VuZChuIC8gc3VtKG4pLCAxKSkgJT4lCiAgICBrYmwoY29sLm5hbWVzID0gTlVMTCkgJT4lCiAgICBhZGRfaGVhZGVyX2Fib3ZlKGMoIkNvdW50cnkiLCAiVG90YWwiLCAiJSIpLCBhbGlnbiA9IGMoImwiLCAiciIsICJyIikpICU+JQogICAga2FibGVfc3R5bGluZygic3RyaXBlZCIpCmBgYAoKIyMgU2VudGltZW50IGJ5IGNvdW50cnkKClNlbnRpbWVudCBieSBjb3VudHJ5OgpgYGB7cn0KdGFiIDwtIGRvY3ZhcnMoY29ycCkgJT4lCiAgICBzZWxlY3QoQ291bnRyeTIsIFNlbnRpbWVudCkgJT4lCiAgICB3aXRoKC4sIHByb3AudGFibGUodGFibGUoQ291bnRyeTIsIFNlbnRpbWVudCksIG1hcmdpbiA9IDEpICogMTAwKSAlPiUKICAgIHJvdW5kKDEpCmNiaW5kKHRhYiwgbiA9IHRhYmxlKGNvcnAkU2VudGltZW50KSkgJT4lCiAgICBrYmwoKSAlPiUKICAgIGFkZF9oZWFkZXJfYWJvdmUoYygiQ291bnRyeSIsICIoQnJhbmR3YXRjaCkgU2VudGltZW50IiA9IDMsICJUb3RhbCIgPSAxKSwgYWxpZ24gPSBjKCJsIiwgImMiLCAiciIpKSAlPiUKICAgIGthYmxlX3N0eWxpbmcoInN0cmlwZWQiKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KCJnZ3Bsb3QyIikKYXMuZGF0YS5mcmFtZSh0YWIpICU+JQogICAgZ2dwbG90KGFlcyh4ID0gQ291bnRyeTIsIHkgPSBGcmVxLCBmaWxsID0gU2VudGltZW50KSkgKwogICAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIHBvc2l0aW9uID0gcG9zaXRpb25fZG9kZ2UoKSkgKwogICAgc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZSA9ICJQYWlyZWQiKSArCiAgICB4bGFiKCIiKSArCiAgICB5bGFiKCJQZXJjZW50YWdlIikgKwogICAgdGhlbWVfbWluaW1hbCgpCmBgYAoKQ29tcGFyaW5nIHNlbnRpbWVudCBvbiBiYWxhbmNlOgpgYGB7cn0KbGlicmFyeSgicXVhbnRlZGEudGlkeSIpCgpuZXRfc2VudGltZW50IDwtIGRvY3ZhcnMoY29ycCkgJT4lCiAgICBzZWxlY3QoU2VudGltZW50LCBDb3VudHJ5MikgJT4lCiAgICBncm91cF9ieShTZW50aW1lbnQsIENvdW50cnkyKSAlPiUKICAgIHN1bW1hcmlzZShuID0gbigpKSAlPiUKICAgIGZpbHRlcihTZW50aW1lbnQgIT0gIm5ldXRyYWwiKSAlPiUKICAgIHJlbmFtZShDb3VudHJ5ID0gQ291bnRyeTIpCgpsaWJyYXJ5KCJ0aWR5ciIpCm5ldF9zZW50aW1lbnQgPC0gcGl2b3Rfd2lkZXIobmV0X3NlbnRpbWVudCwgbmFtZXNfZnJvbSA9IFNlbnRpbWVudCwgdmFsdWVzX2Zyb20gPSBuKSAlPiUKICAgIG11dGF0ZShTZW50aW1lbnQgPSBsb2cocG9zaXRpdmUgLyBuZWdhdGl2ZSkpCgpnZ3Bsb3QobmV0X3NlbnRpbWVudCwgYWVzKHggPSBDb3VudHJ5LCB5ID0gU2VudGltZW50LCBjb2xvciA9IENvdW50cnksIGZpbGwgPSBDb3VudHJ5KSkgKyAKICAgIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiKSArIAogICAgeWxhYigiTmV0IFNlbnRpbWVudCIpICsKICAgICMgc2NhbGVfY29sb3VyX2JyZXdlcihwYWxldHRlID0gIlBhaXJlZCIpICsKICAgIHRoZW1lX21pbmltYWwoKQpgYGAKCgoKIyMgS2V5d29yZHMgYnkgc2VudGltZW50CgpgYGB7cn0KbGlicmFyeSgicXVhbnRlZGEudGV4dHN0YXRzIikKbGlicmFyeSgicXVhbnRlZGEudGV4dHBsb3RzIikKCnRva3MgPC0gdG9rZW5zKGNvcnAsIHJlbW92ZV9wdW5jdCA9IFRSVUUpCmRmbWF0IDwtIGRmbSh0b2tzKQpgYGAKCgojIyMgR2VybWFueQoKYGBge3IsIGZpZy5oZWlnaHQgPSAzLjUsIGZpZy53aWR0aCA9IDV9CmNvbGxzX0RFIDwtIHRva2Vuc19zdWJzZXQodG9rcywgQ291bnRyeSA9PSAiR2VybWFueSIpICU+JQogICAgdG9rZW5zX3JlbW92ZShjKHN0b3B3b3JkcygiZGUiKSwgc3RvcHdvcmRzKCJlbiIpKSwgcGFkZGluZyA9IFRSVUUpICU+JQogICAgdGV4dHN0YXRfY29sbG9jYXRpb25zKCkKdG9rc19ERSA8LSB0b2tlbnNfc3Vic2V0KHRva3MsIENvdW50cnkgPT0gIkdlcm1hbnkiKSAlPiUKICAgIHRva2Vuc19jb21wb3VuZChjb2xsc19ERVsxOjIwMCwgXSkKa2V5d29yZHNfZGUgPC0gZGZtKHRva3NfREUpICU+JQogICAgZGZtX3N1YnNldChTZW50aW1lbnQgIT0gIm5ldXRyYWwiICYgQ291bnRyeSA9PSAiR2VybWFueSIpICU+JQogICAgZGZtX3JlbW92ZShjKHN0b3B3b3JkcygiZGUiKSwgc3RvcHdvcmRzKCJlbiIpKSkgJT4lCiAgICBkZm1fZ3JvdXAoZ3JvdXBzID0gU2VudGltZW50KSAlPiUKICAgIHRleHRzdGF0X2tleW5lc3ModGFyZ2V0ID0gInBvc2l0aXZlIiwgbWVhc3VyZSA9ICJsciIpCgp0ZXh0cGxvdF9rZXluZXNzKGtleXdvcmRzX2RlKQpgYGAKIyMjIFVuaXRlZCBLaW5nZG9tCgpgYGB7ciwgZmlnLmhlaWdodCA9IDMuNSwgZmlnLndpZHRoID0gNX0KY29sbHNfVUsgPC0gdG9rZW5zX3N1YnNldCh0b2tzLCBDb3VudHJ5ID09ICJVbml0ZWQgS2luZ2RvbSIpICU+JQogICAgdG9rZW5zX3JlbW92ZShzdG9wd29yZHMoImVuIiksIHBhZGRpbmcgPSBUUlVFKSAlPiUKICAgIHRleHRzdGF0X2NvbGxvY2F0aW9ucygpCnRva3NfVUsgPC0gdG9rZW5zX3N1YnNldCh0b2tzLCBDb3VudHJ5ID09ICJVbml0ZWQgS2luZ2RvbSIpICU+JQogICAgdG9rZW5zX2NvbXBvdW5kKGNvbGxzX1VLWzE6MjAwLCBdKQprZXl3b3Jkc191ayA8LSBkZm0odG9rc19VSykgJT4lCiAgICBkZm1fc3Vic2V0KFNlbnRpbWVudCAhPSAibmV1dHJhbCIgJiBDb3VudHJ5ID09ICJVbml0ZWQgS2luZ2RvbSIpICU+JQogICAgZGZtX3JlbW92ZShzdG9wd29yZHMoImVuIikpICU+JQogICAgZGZtX2dyb3VwKGdyb3VwcyA9IFNlbnRpbWVudCkgJT4lCiAgICB0ZXh0c3RhdF9rZXluZXNzKHRhcmdldCA9ICJwb3NpdGl2ZSIsIG1lYXN1cmUgPSAibHIiKQoKdGV4dHBsb3Rfa2V5bmVzcyhrZXl3b3Jkc191aykKYGBgCiMjIyBVbml0ZWQgU3RhdGVzCgpgYGB7ciwgZmlnLmhlaWdodCA9IDMuNSwgZmlnLndpZHRoID0gNX0KY29sbHNfVVMgPC0gdG9rZW5zX3N1YnNldCh0b2tzLCBDb3VudHJ5ID09ICJVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EiKSAlPiUKICAgIHRva2Vuc19yZW1vdmUoc3RvcHdvcmRzKCJlbiIpLCBwYWRkaW5nID0gVFJVRSkgJT4lCiAgICB0ZXh0c3RhdF9jb2xsb2NhdGlvbnMoKQp0b2tzX1VTIDwtIHRva2Vuc19zdWJzZXQodG9rcywgQ291bnRyeSA9PSAiVW5pdGVkIFN0YXRlcyBvZiBBbWVyaWNhIikgJT4lCiAgICB0b2tlbnNfY29tcG91bmQoY29sbHNfVVNbMToyMDAsIF0pCgprZXl3b3Jkc191cyA8LSBkZm0odG9rc19VUykgJT4lCiAgICBkZm1fc3Vic2V0KFNlbnRpbWVudCAhPSAibmV1dHJhbCIgJiBDb3VudHJ5ID09ICJVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EiKSAlPiUKICAgIGRmbV9yZW1vdmUoc3RvcHdvcmRzKCJlbiIpKSAlPiUKICAgIGRmbV9ncm91cChncm91cHMgPSBTZW50aW1lbnQpICU+JQogICAgdGV4dHN0YXRfa2V5bmVzcyh0YXJnZXQgPSAicG9zaXRpdmUiLCBtZWFzdXJlID0gImxyIikKCnRleHRwbG90X2tleW5lc3Moa2V5d29yZHNfdXMpCmBgYA==