Goals
Step 1: Upload & clean text into quanteda gui
Step 2: Analysis
Sentiment, least priority (can be done in SPSS)
- General sentiment
- Sentiment on individual accounts (variable in row O)
- Sentiment by continent (variable row K or L)
- Sentiment by country (variable row M)
Word usage
- Most used words minus stop words
- Most used words per continent
- Most used negative words (but most entries are neutral)
- Most used positive words (but most entries are neutral)
KWIC
- Payment & checkout
- Payment & checkout => sentiment
- Access & check-in
Load the dataset
library("quanteda")
Package version: 2.9.9000
Unicode version: 10.0
ICU version: 61.1
Parallel computing: 12 of 12 threads used.
See https://quanteda.io for tutorials and examples.
library("dplyr")
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
corp <- read.csv("~/Dropbox/_Whittlesey Street/Unmanned Stores Data.csv") %>%
corpus(text_field = "Full.Text")
Descriptives
Page type by sentiment (as classified by Brandwatch):
Source |
(Brandwatch) Sentiment |
Total |
| |
negative |
neutral |
positive |
n |
| blog |
13.5 |
80.0 |
6.5 |
934 |
| forum |
25.0 |
67.3 |
7.7 |
651 |
| instagram |
3.6 |
54.0 |
42.5 |
252 |
| news |
7.3 |
88.0 |
4.7 |
10049 |
| reddit |
29.5 |
57.3 |
13.1 |
525 |
| review |
26.7 |
46.7 |
26.7 |
30 |
| tumblr |
19.9 |
70.2 |
9.9 |
1114 |
| twitter |
2.7 |
94.7 |
2.6 |
32079 |
| youtube |
16.8 |
66.7 |
16.5 |
381 |
Country breakdowns:
corp$Country2 <- ifelse(corp$Country %in% c("Germany", "United States of America", "United Kingdom"),
corp$Country, "Other")
docvars(corp) %>%
group_by(Country2) %>%
summarize(n = n()) %>%
mutate(pct = round(n / sum(n), 1)) %>%
kbl(col.names = NULL) %>%
add_header_above(c("Country", "Total", "%"), align = c("l", "r", "r")) %>%
kable_styling("striped")
Country |
Total |
% |
| Germany |
592 |
0.0 |
| Other |
30562 |
0.7 |
| United Kingdom |
2468 |
0.1 |
| United States of America |
12393 |
0.3 |
Sentiment by country
Sentiment by country:
tab <- docvars(corp) %>%
select(Country2, Sentiment) %>%
with(., prop.table(table(Country2, Sentiment), margin = 1) * 100) %>%
round(1)
cbind(tab, n = table(corp$Sentiment)) %>%
kbl() %>%
add_header_above(c("Country", "(Brandwatch) Sentiment" = 3, "Total" = 1), align = c("l", "c", "r")) %>%
kable_styling("striped")
number of rows of result is not a multiple of vector length (arg 2)
Country |
(Brandwatch) Sentiment |
Total |
| |
negative |
neutral |
positive |
n |
| Germany |
2.9 |
88.3 |
8.8 |
2350 |
| Other |
5.1 |
91.4 |
3.4 |
41887 |
| United Kingdom |
4.5 |
90.4 |
5.1 |
1778 |
| United States of America |
5.2 |
90.3 |
4.4 |
2350 |
library("ggplot2")
as.data.frame(tab) %>%
ggplot(aes(x = Country2, y = Freq, fill = Sentiment)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_brewer(palette = "Paired") +
xlab("") +
ylab("Percentage") +
theme_minimal()

Comparing sentiment on balance:
library("quanteda.tidy")
Attaching package: ‘quanteda.tidy’
The following object is masked from ‘package:stats’:
filter
net_sentiment <- docvars(corp) %>%
select(Sentiment, Country2) %>%
group_by(Sentiment, Country2) %>%
summarise(n = n()) %>%
filter(Sentiment != "neutral") %>%
rename(Country = Country2)
`summarise()` has grouped output by 'Sentiment'. You can override using the `.groups` argument.
library("tidyr")
net_sentiment <- pivot_wider(net_sentiment, names_from = Sentiment, values_from = n) %>%
mutate(Sentiment = log(positive / negative))
ggplot(net_sentiment, aes(x = Country, y = Sentiment, color = Country, fill = Country)) +
geom_bar(stat = "identity") +
ylab("Net Sentiment") +
# scale_colour_brewer(palette = "Paired") +
theme_minimal()

Keywords by sentiment
library("quanteda.textstats")
library("quanteda.textplots")
toks <- tokens(corp, remove_punct = TRUE)
dfmat <- dfm(toks)
Germany
colls_DE <- tokens_subset(toks, Country == "Germany") %>%
tokens_remove(c(stopwords("de"), stopwords("en")), padding = TRUE) %>%
textstat_collocations()
toks_DE <- tokens_subset(toks, Country == "Germany") %>%
tokens_compound(colls_DE[1:200, ])
keywords_de <- dfm(toks_DE) %>%
dfm_subset(Sentiment != "neutral" & Country == "Germany") %>%
dfm_remove(c(stopwords("de"), stopwords("en"))) %>%
dfm_group(groups = Sentiment) %>%
textstat_keyness(target = "positive", measure = "lr")
textplot_keyness(keywords_de)

United Kingdom
colls_UK <- tokens_subset(toks, Country == "United Kingdom") %>%
tokens_remove(stopwords("en"), padding = TRUE) %>%
textstat_collocations()
toks_UK <- tokens_subset(toks, Country == "United Kingdom") %>%
tokens_compound(colls_UK[1:200, ])
keywords_uk <- dfm(toks_UK) %>%
dfm_subset(Sentiment != "neutral" & Country == "United Kingdom") %>%
dfm_remove(stopwords("en")) %>%
dfm_group(groups = Sentiment) %>%
textstat_keyness(target = "positive", measure = "lr")
textplot_keyness(keywords_uk)

United States

LS0tCnRpdGxlOiAiVW5tYW5uZWQgU3RvcmUgQW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIEdvYWxzCgpTdGVwIDE6IFVwbG9hZCAmIGNsZWFuIHRleHQgaW50byBxdWFudGVkYSBndWkKClN0ZXAgMjogQW5hbHlzaXMgCgpTZW50aW1lbnQsIGxlYXN0IHByaW9yaXR5IChjYW4gYmUgZG9uZSBpbiBTUFNTKQoKLSAgCUdlbmVyYWwgc2VudGltZW50IAotICAJU2VudGltZW50IG9uIGluZGl2aWR1YWwgYWNjb3VudHMgKHZhcmlhYmxlIGluIHJvdyBPKQotICAJU2VudGltZW50IGJ5IGNvbnRpbmVudCAodmFyaWFibGUgcm93IEsgb3IgTCkKLSAgCVNlbnRpbWVudCBieSBjb3VudHJ5ICh2YXJpYWJsZSByb3cgTSkgIAoKV29yZCB1c2FnZQoKLSAgCU1vc3QgdXNlZCB3b3JkcyBtaW51cyBzdG9wIHdvcmRzIAotICAJTW9zdCB1c2VkIHdvcmRzIHBlciBjb250aW5lbnQKLSAgCU1vc3QgdXNlZCBuZWdhdGl2ZSB3b3JkcyAoYnV0IG1vc3QgZW50cmllcyBhcmUgbmV1dHJhbCkgCi0gIAlNb3N0IHVzZWQgcG9zaXRpdmUgd29yZHMgKGJ1dCBtb3N0IGVudHJpZXMgYXJlIG5ldXRyYWwpCgpLV0lDCgotICAJUGF5bWVudCAmIGNoZWNrb3V0Ci0gIAlQYXltZW50ICYgY2hlY2tvdXQgPT4gc2VudGltZW50Ci0gIAlBY2Nlc3MgJiBjaGVjay1pbiAgCgoKCiMjIExvYWQgdGhlIGRhdGFzZXQKCmBgYHtyfQpsaWJyYXJ5KCJxdWFudGVkYSIpCmxpYnJhcnkoImRwbHlyIikKCmNvcnAgPC0gcmVhZC5jc3YoIn4vRHJvcGJveC9fV2hpdHRsZXNleSBTdHJlZXQvVW5tYW5uZWQgU3RvcmVzIERhdGEuY3N2IikgJT4lCiAgICBjb3JwdXModGV4dF9maWVsZCA9ICJGdWxsLlRleHQiKQpgYGAKCiMjIERlc2NyaXB0aXZlcwoKUGFnZSB0eXBlIGJ5IHNlbnRpbWVudCAoYXMgY2xhc3NpZmllZCBieSBCcmFuZHdhdGNoKToKYGBge3J9CmxpYnJhcnkoImthYmxlRXh0cmEiKQp0YWIgPC0gd2l0aChkb2N2YXJzKGNvcnApLCBwcm9wLnRhYmxlKHRhYmxlKFBhZ2UuVHlwZSwgU2VudGltZW50KSwgbWFyZ2luID0gMSkgKiAxMDApICU+JQogICAgIHJvdW5kKDEpCmNiaW5kKHRhYiwgbiA9IHRhYmxlKGNvcnAkUGFnZS5UeXBlKSkgJT4lCiAgICBrYmwoKSAlPiUKICAgIGFkZF9oZWFkZXJfYWJvdmUoYygiU291cmNlIiwgIihCcmFuZHdhdGNoKSBTZW50aW1lbnQiID0gMywgIlRvdGFsIiA9IDEpLCBhbGlnbiA9IGMoImwiLCAiYyIsICJyIikpICU+JQogICAga2FibGVfc3R5bGluZygic3RyaXBlZCIpCmBgYAoKQ291bnRyeSBicmVha2Rvd25zOgpgYGB7cn0KY29ycCRDb3VudHJ5MiA8LSBpZmVsc2UoY29ycCRDb3VudHJ5ICVpbiUgYygiR2VybWFueSIsICJVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EiLCAiVW5pdGVkIEtpbmdkb20iKSwKICAgICAgICAgICAgICAgICAgICAgICAgY29ycCRDb3VudHJ5LCAiT3RoZXIiKQoKZG9jdmFycyhjb3JwKSAlPiUKICAgIGdyb3VwX2J5KENvdW50cnkyKSAlPiUKICAgIHN1bW1hcml6ZShuID0gbigpKSAlPiUKICAgIG11dGF0ZShwY3QgPSByb3VuZChuIC8gc3VtKG4pLCAxKSkgJT4lCiAgICBrYmwoY29sLm5hbWVzID0gTlVMTCkgJT4lCiAgICBhZGRfaGVhZGVyX2Fib3ZlKGMoIkNvdW50cnkiLCAiVG90YWwiLCAiJSIpLCBhbGlnbiA9IGMoImwiLCAiciIsICJyIikpICU+JQogICAga2FibGVfc3R5bGluZygic3RyaXBlZCIpCmBgYAoKIyMgU2VudGltZW50IGJ5IGNvdW50cnkKClNlbnRpbWVudCBieSBjb3VudHJ5OgpgYGB7cn0KdGFiIDwtIGRvY3ZhcnMoY29ycCkgJT4lCiAgICBzZWxlY3QoQ291bnRyeTIsIFNlbnRpbWVudCkgJT4lCiAgICB3aXRoKC4sIHByb3AudGFibGUodGFibGUoQ291bnRyeTIsIFNlbnRpbWVudCksIG1hcmdpbiA9IDEpICogMTAwKSAlPiUKICAgIHJvdW5kKDEpCmNiaW5kKHRhYiwgbiA9IHRhYmxlKGNvcnAkU2VudGltZW50KSkgJT4lCiAgICBrYmwoKSAlPiUKICAgIGFkZF9oZWFkZXJfYWJvdmUoYygiQ291bnRyeSIsICIoQnJhbmR3YXRjaCkgU2VudGltZW50IiA9IDMsICJUb3RhbCIgPSAxKSwgYWxpZ24gPSBjKCJsIiwgImMiLCAiciIpKSAlPiUKICAgIGthYmxlX3N0eWxpbmcoInN0cmlwZWQiKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KCJnZ3Bsb3QyIikKYXMuZGF0YS5mcmFtZSh0YWIpICU+JQogICAgZ2dwbG90KGFlcyh4ID0gQ291bnRyeTIsIHkgPSBGcmVxLCBmaWxsID0gU2VudGltZW50KSkgKwogICAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIHBvc2l0aW9uID0gcG9zaXRpb25fZG9kZ2UoKSkgKwogICAgc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZSA9ICJQYWlyZWQiKSArCiAgICB4bGFiKCIiKSArCiAgICB5bGFiKCJQZXJjZW50YWdlIikgKwogICAgdGhlbWVfbWluaW1hbCgpCmBgYAoKQ29tcGFyaW5nIHNlbnRpbWVudCBvbiBiYWxhbmNlOgpgYGB7cn0KbGlicmFyeSgicXVhbnRlZGEudGlkeSIpCgpuZXRfc2VudGltZW50IDwtIGRvY3ZhcnMoY29ycCkgJT4lCiAgICBzZWxlY3QoU2VudGltZW50LCBDb3VudHJ5MikgJT4lCiAgICBncm91cF9ieShTZW50aW1lbnQsIENvdW50cnkyKSAlPiUKICAgIHN1bW1hcmlzZShuID0gbigpKSAlPiUKICAgIGZpbHRlcihTZW50aW1lbnQgIT0gIm5ldXRyYWwiKSAlPiUKICAgIHJlbmFtZShDb3VudHJ5ID0gQ291bnRyeTIpCgpsaWJyYXJ5KCJ0aWR5ciIpCm5ldF9zZW50aW1lbnQgPC0gcGl2b3Rfd2lkZXIobmV0X3NlbnRpbWVudCwgbmFtZXNfZnJvbSA9IFNlbnRpbWVudCwgdmFsdWVzX2Zyb20gPSBuKSAlPiUKICAgIG11dGF0ZShTZW50aW1lbnQgPSBsb2cocG9zaXRpdmUgLyBuZWdhdGl2ZSkpCgpnZ3Bsb3QobmV0X3NlbnRpbWVudCwgYWVzKHggPSBDb3VudHJ5LCB5ID0gU2VudGltZW50LCBjb2xvciA9IENvdW50cnksIGZpbGwgPSBDb3VudHJ5KSkgKyAKICAgIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiKSArIAogICAgeWxhYigiTmV0IFNlbnRpbWVudCIpICsKICAgICMgc2NhbGVfY29sb3VyX2JyZXdlcihwYWxldHRlID0gIlBhaXJlZCIpICsKICAgIHRoZW1lX21pbmltYWwoKQpgYGAKCgoKIyMgS2V5d29yZHMgYnkgc2VudGltZW50CgpgYGB7cn0KbGlicmFyeSgicXVhbnRlZGEudGV4dHN0YXRzIikKbGlicmFyeSgicXVhbnRlZGEudGV4dHBsb3RzIikKCnRva3MgPC0gdG9rZW5zKGNvcnAsIHJlbW92ZV9wdW5jdCA9IFRSVUUpCmRmbWF0IDwtIGRmbSh0b2tzKQpgYGAKCgojIyMgR2VybWFueQoKYGBge3IsIGZpZy5oZWlnaHQgPSAzLjUsIGZpZy53aWR0aCA9IDV9CmNvbGxzX0RFIDwtIHRva2Vuc19zdWJzZXQodG9rcywgQ291bnRyeSA9PSAiR2VybWFueSIpICU+JQogICAgdG9rZW5zX3JlbW92ZShjKHN0b3B3b3JkcygiZGUiKSwgc3RvcHdvcmRzKCJlbiIpKSwgcGFkZGluZyA9IFRSVUUpICU+JQogICAgdGV4dHN0YXRfY29sbG9jYXRpb25zKCkKdG9rc19ERSA8LSB0b2tlbnNfc3Vic2V0KHRva3MsIENvdW50cnkgPT0gIkdlcm1hbnkiKSAlPiUKICAgIHRva2Vuc19jb21wb3VuZChjb2xsc19ERVsxOjIwMCwgXSkKa2V5d29yZHNfZGUgPC0gZGZtKHRva3NfREUpICU+JQogICAgZGZtX3N1YnNldChTZW50aW1lbnQgIT0gIm5ldXRyYWwiICYgQ291bnRyeSA9PSAiR2VybWFueSIpICU+JQogICAgZGZtX3JlbW92ZShjKHN0b3B3b3JkcygiZGUiKSwgc3RvcHdvcmRzKCJlbiIpKSkgJT4lCiAgICBkZm1fZ3JvdXAoZ3JvdXBzID0gU2VudGltZW50KSAlPiUKICAgIHRleHRzdGF0X2tleW5lc3ModGFyZ2V0ID0gInBvc2l0aXZlIiwgbWVhc3VyZSA9ICJsciIpCgp0ZXh0cGxvdF9rZXluZXNzKGtleXdvcmRzX2RlKQpgYGAKIyMjIFVuaXRlZCBLaW5nZG9tCgpgYGB7ciwgZmlnLmhlaWdodCA9IDMuNSwgZmlnLndpZHRoID0gNX0KY29sbHNfVUsgPC0gdG9rZW5zX3N1YnNldCh0b2tzLCBDb3VudHJ5ID09ICJVbml0ZWQgS2luZ2RvbSIpICU+JQogICAgdG9rZW5zX3JlbW92ZShzdG9wd29yZHMoImVuIiksIHBhZGRpbmcgPSBUUlVFKSAlPiUKICAgIHRleHRzdGF0X2NvbGxvY2F0aW9ucygpCnRva3NfVUsgPC0gdG9rZW5zX3N1YnNldCh0b2tzLCBDb3VudHJ5ID09ICJVbml0ZWQgS2luZ2RvbSIpICU+JQogICAgdG9rZW5zX2NvbXBvdW5kKGNvbGxzX1VLWzE6MjAwLCBdKQprZXl3b3Jkc191ayA8LSBkZm0odG9rc19VSykgJT4lCiAgICBkZm1fc3Vic2V0KFNlbnRpbWVudCAhPSAibmV1dHJhbCIgJiBDb3VudHJ5ID09ICJVbml0ZWQgS2luZ2RvbSIpICU+JQogICAgZGZtX3JlbW92ZShzdG9wd29yZHMoImVuIikpICU+JQogICAgZGZtX2dyb3VwKGdyb3VwcyA9IFNlbnRpbWVudCkgJT4lCiAgICB0ZXh0c3RhdF9rZXluZXNzKHRhcmdldCA9ICJwb3NpdGl2ZSIsIG1lYXN1cmUgPSAibHIiKQoKdGV4dHBsb3Rfa2V5bmVzcyhrZXl3b3Jkc191aykKYGBgCiMjIyBVbml0ZWQgU3RhdGVzCgpgYGB7ciwgZmlnLmhlaWdodCA9IDMuNSwgZmlnLndpZHRoID0gNX0KY29sbHNfVVMgPC0gdG9rZW5zX3N1YnNldCh0b2tzLCBDb3VudHJ5ID09ICJVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EiKSAlPiUKICAgIHRva2Vuc19yZW1vdmUoc3RvcHdvcmRzKCJlbiIpLCBwYWRkaW5nID0gVFJVRSkgJT4lCiAgICB0ZXh0c3RhdF9jb2xsb2NhdGlvbnMoKQp0b2tzX1VTIDwtIHRva2Vuc19zdWJzZXQodG9rcywgQ291bnRyeSA9PSAiVW5pdGVkIFN0YXRlcyBvZiBBbWVyaWNhIikgJT4lCiAgICB0b2tlbnNfY29tcG91bmQoY29sbHNfVVNbMToyMDAsIF0pCgprZXl3b3Jkc191cyA8LSBkZm0odG9rc19VUykgJT4lCiAgICBkZm1fc3Vic2V0KFNlbnRpbWVudCAhPSAibmV1dHJhbCIgJiBDb3VudHJ5ID09ICJVbml0ZWQgU3RhdGVzIG9mIEFtZXJpY2EiKSAlPiUKICAgIGRmbV9yZW1vdmUoc3RvcHdvcmRzKCJlbiIpKSAlPiUKICAgIGRmbV9ncm91cChncm91cHMgPSBTZW50aW1lbnQpICU+JQogICAgdGV4dHN0YXRfa2V5bmVzcyh0YXJnZXQgPSAicG9zaXRpdmUiLCBtZWFzdXJlID0gImxyIikKCnRleHRwbG90X2tleW5lc3Moa2V5d29yZHNfdXMpCmBgYA==