[Video]
What is text mining?
# Load qdap
library(qdap)
## Loading required package: qdapDictionaries
## Loading required package: qdapRegex
## Loading required package: qdapTools
## Loading required package: RColorBrewer
##
## Attaching package: 'qdap'
## The following objects are masked from 'package:base':
##
## Filter, proportions
# Print new_text to the console
new_text
## [1] "DataCamp is the first online learning platform that focuses on building the best learning experience specifically for Data Science. We have offices in New York, London, and Belgium, and to date, we trained over 3.8 million (aspiring) data scientists in over 150 countries. These data science enthusiasts completed more than 185 million exercises. You can take free beginner courses, or subscribe for $29/month to get access to all premium courses."
# Find the 10 most frequent terms: term_count
term_count <- freq_terms(new_text, 10)
# Plot term_count
plot(term_count)
[Video]
# Import text data from CSV, no factors
tweets <- read.csv(coffee_data_file, stringsAsFactors = FALSE)
# View the structure of tweets
str(tweets)
## 'data.frame': 1000 obs. of 15 variables:
## $ num : int 1 2 3 4 5 6 7 8 9 10 ...
## $ text : chr "@ayyytylerb that is so true drink lots of coffee" "RT @bryzy_brib: Senior March tmw morning at 7:25 A.M. in the SENIOR lot. Get up early, make yo coffee/breakfast"| __truncated__ "If you believe in #gunsense tomorrow would be a very good day to have your coffee any place BUT @Starbucks Guns"| __truncated__ "My cute coffee mug. http://t.co/2udvMU6XIG" ...
## $ favorited : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ replyToSN : chr "ayyytylerb" NA NA NA ...
## $ created : chr "8/9/2013 2:43" "8/9/2013 2:43" "8/9/2013 2:43" "8/9/2013 2:43" ...
## $ truncated : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ replyToSID : num 3.66e+17 NA NA NA NA ...
## $ id : num 3.66e+17 3.66e+17 3.66e+17 3.66e+17 3.66e+17 ...
## $ replyToUID : int 1637123977 NA NA NA NA NA NA 1316942208 NA NA ...
## $ statusSource: chr "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>" "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>" "web" "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>" ...
## $ screenName : chr "thejennagibson" "carolynicosia" "janeCkay" "AlexandriaOOTD" ...
## $ retweetCount: int 0 1 0 0 2 0 0 0 1 2 ...
## $ retweeted : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ longitude : logi NA NA NA NA NA NA ...
## $ latitude : logi NA NA NA NA NA NA ...
# Isolate text from tweets
coffee_tweets <- tweets$text
# Load tm
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:qdap':
##
## ngrams
##
## Attaching package: 'tm'
## The following objects are masked from 'package:qdap':
##
## as.DocumentTermMatrix, as.TermDocumentMatrix
# Make a vector source from coffee_tweets
coffee_source <- VectorSource(coffee_tweets)
## coffee_source is already in your workspace
# Make a volatile corpus from coffee_corpus
coffee_corpus <- VCorpus(coffee_source)
# Print out coffee_corpus
coffee_corpus
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 1000
# Print the 15th tweet in coffee_corpus
coffee_corpus[[15]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 111
# Print the contents of the 15th tweet in coffee_corpus
coffee_corpus[[15]][1]
## $content
## [1] "@HeatherWhaley I was about 2 joke it takes 2 hands to hold hot coffee...then I read headline! #Don'tDrinkNShoot"
# Now use content to review the plain text of the 10th tweet
content(coffee_corpus[[10]])
## [1] "RT @Dorkv76: I can't care before coffee."
# Create a DataframeSource from the example text
df_source <- DataframeSource(example_text)
# Convert df_source to a volatile corpus
df_corpus <- VCorpus(df_source)
# Examine df_corpus
df_corpus
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 2
## Content: documents: 3
# Examine df_corpus metadata
meta(df_corpus)
## author date
## 1 Author1 1514953399
## 2 Author2 1514866998
## 3 Author3 1514780598
# Compare the number of documents in the vector source
vec_corpus
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 3
# Compare metadata in the vector corpus
meta(vec_corpus)
## data frame with 0 columns and 3 rows
[Video]
# Create the object: text
text <- "<b>She</b> woke up at 6 A.M. It\'s so early! She was only 10% awake and began drinking coffee in front of her computer."
# Make lowercase
tolower(text)
## [1] "<b>she</b> woke up at 6 a.m. it's so early! she was only 10% awake and began drinking coffee in front of her computer."
# Remove punctuation
removePunctuation(text)
## [1] "bSheb woke up at 6 AM Its so early She was only 10 awake and began drinking coffee in front of her computer"
# Remove numbers
removeNumbers(text)
## [1] "<b>She</b> woke up at A.M. It's so early! She was only % awake and began drinking coffee in front of her computer."
# Remove whitespace
stripWhitespace(text)
## [1] "<b>She</b> woke up at 6 A.M. It's so early! She was only 10% awake and began drinking coffee in front of her computer."
## text is still loaded in your workspace
# Remove text within brackets
bracketX(text)
## [1] "She woke up at 6 A.M. It's so early! She was only 10% awake and began drinking coffee in front of her computer."
# Replace numbers with words
replace_number(text)
## [1] "<b>She</b> woke up at six A.M. It's so early! She was only ten% awake and began drinking coffee in front of her computer."
# Replace abbreviations
replace_abbreviation(text)
## [1] "<b>She</b> woke up at 6 AM It's so early! She was only 10% awake and began drinking coffee in front of her computer."
# Replace contractions
replace_contraction(text)
## [1] "<b>She</b> woke up at 6 A.M. it is so early! She was only 10% awake and began drinking coffee in front of her computer."
# Replace symbols with words
replace_symbol(text)
## [1] "<b>She</b> woke up at 6 A.M. It's so early! She was only 10 percent awake and began drinking coffee in front of her computer."
## text is preloaded into your workspace
# List standard English stop words
stopwords("en")
## [1] "i" "me" "my" "myself" "we"
## [6] "our" "ours" "ourselves" "you" "your"
## [11] "yours" "yourself" "yourselves" "he" "him"
## [16] "his" "himself" "she" "her" "hers"
## [21] "herself" "it" "its" "itself" "they"
## [26] "them" "their" "theirs" "themselves" "what"
## [31] "which" "who" "whom" "this" "that"
## [36] "these" "those" "am" "is" "are"
## [41] "was" "were" "be" "been" "being"
## [46] "have" "has" "had" "having" "do"
## [51] "does" "did" "doing" "would" "should"
## [56] "could" "ought" "i'm" "you're" "he's"
## [61] "she's" "it's" "we're" "they're" "i've"
## [66] "you've" "we've" "they've" "i'd" "you'd"
## [71] "he'd" "she'd" "we'd" "they'd" "i'll"
## [76] "you'll" "he'll" "she'll" "we'll" "they'll"
## [81] "isn't" "aren't" "wasn't" "weren't" "hasn't"
## [86] "haven't" "hadn't" "doesn't" "don't" "didn't"
## [91] "won't" "wouldn't" "shan't" "shouldn't" "can't"
## [96] "cannot" "couldn't" "mustn't" "let's" "that's"
## [101] "who's" "what's" "here's" "there's" "when's"
## [106] "where's" "why's" "how's" "a" "an"
## [111] "the" "and" "but" "if" "or"
## [116] "because" "as" "until" "while" "of"
## [121] "at" "by" "for" "with" "about"
## [126] "against" "between" "into" "through" "during"
## [131] "before" "after" "above" "below" "to"
## [136] "from" "up" "down" "in" "out"
## [141] "on" "off" "over" "under" "again"
## [146] "further" "then" "once" "here" "there"
## [151] "when" "where" "why" "how" "all"
## [156] "any" "both" "each" "few" "more"
## [161] "most" "other" "some" "such" "no"
## [166] "nor" "not" "only" "own" "same"
## [171] "so" "than" "too" "very"
# Print text without standard stop words
removeWords(text, stopwords("en"))
## [1] "<b>She</b> woke 6 A.M. It's early! She 10% awake began drinking coffee front computer."
# Add "coffee" and "bean" to the list: new_stops
new_stops <- c("coffee", "bean", stopwords("en"))
# Remove stop words from text
removeWords(text, new_stops)
## [1] "<b>She</b> woke 6 A.M. It's early! She 10% awake began drinking front computer."
# Create complicate
complicate <- c("complicated", "complication", "complicatedly")
# Perform word stemming: stem_doc
stem_doc <- stemDocument(complicate)
# Create the completion dictionary: comp_dict
comp_dict <- "complicate"
# Perform stem completion: complete_text
complete_text <- stemCompletion(stem_doc, comp_dict)
# Print complete_text
complete_text
## complic complic complic
## "complicate" "complicate" "complicate"
# Remove punctuation: rm_punc
rm_punc <- removePunctuation(text_data)
# Create character vector: n_char_vec
n_char_vec <- unlist(strsplit(rm_punc, split = " "))
# Perform word stemming: stem_doc
stem_doc <- stemDocument(n_char_vec)
# Print stem_doc
stem_doc
## [1] "In" "a" "complic" "hast" "Tom" "rush" "to"
## [8] "fix" "a" "new" "complic" "too" "complic"
# Re-complete stemmed document: complete_doc
complete_doc <- stemCompletion(stem_doc, comp_dict)
# Print complete_doc
complete_doc
## In a complic hast Tom rush
## "In" "a" "complicate" "haste" "Tom" "rush"
## to fix a new complic too
## "to" "fix" "a" "new" "complicate" "too"
## complic
## "complicate"
# Alter the function code to match the instructions
clean_corpus <- function(corpus){
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removeWords, words = c(stopwords("en"), "coffee", "mug"))
corpus <- tm_map(corpus, stripWhitespace)
return(corpus)
}
# Apply your customized function to the tweet_corp: clean_corp
clean_corp <- clean_corpus(tweet_corp)
# Print out a cleaned up tweet
content(clean_corp[[227]])
## [1] "also dogs arent smart enough dip donut eat part thats dipped ladyandthetramp"
# Print out the same tweet in the original form
tweets$text[227]
## [1] "Also, dogs aren't smart enough to dip the donut in the coffee and then eat the part that's been dipped. #ladyandthetramp"
[Video]
When should you use the term-document matrix instead of the document-term matrix?
# Create the document-term matrix from the corpus
coffee_dtm <- DocumentTermMatrix(clean_corp)
# Print out coffee_dtm data
coffee_dtm
## <<DocumentTermMatrix (documents: 1000, terms: 3075)>>
## Non-/sparse entries: 7384/3067616
## Sparsity : 100%
## Maximal term length: 27
## Weighting : term frequency (tf)
# Convert coffee_dtm to a matrix
coffee_m <- as.matrix(coffee_dtm)
# Print the dimensions of coffee_m
dim(coffee_m)
## [1] 1000 3075
# Review a portion of the matrix to get some Starbucks
coffee_m[25:35, c("star", "starbucks")]
## Terms
## Docs star starbucks
## 25 0 0
## 26 0 1
## 27 0 1
## 28 0 0
## 29 0 0
## 30 0 0
## 31 0 0
## 32 0 0
## 33 0 0
## 34 0 1
## 35 0 0
# Create a term-document matrix from the corpus
coffee_tdm <- TermDocumentMatrix(clean_corp)
# Print coffee_tdm data
coffee_tdm
## <<TermDocumentMatrix (terms: 3075, documents: 1000)>>
## Non-/sparse entries: 7384/3067616
## Sparsity : 100%
## Maximal term length: 27
## Weighting : term frequency (tf)
# Convert coffee_tdm to a matrix
coffee_m <- as.matrix(coffee_tdm)
# Print the dimensions of the matrix
dim(coffee_m)
## [1] 3075 1000
# Review a portion of the matrix
coffee_m[c("star", "starbucks"), 25:35]
## Docs
## Terms 25 26 27 28 29 30 31 32 33 34 35
## star 0 0 0 0 0 0 0 0 0 0 0
## starbucks 0 1 1 0 0 0 0 0 0 1 0
[Video]
What is the best business reason to create a text mining visual like a word cloud?
## coffee_tdm is still loaded in your workspace
# Convert coffee_tdm to a matrix
coffee_m <- as.matrix(coffee_tdm)
# Calculate the row sums of coffee_m
term_frequency <- rowSums(coffee_m)
# Sort term_frequency in decreasing order
term_frequency <- sort(term_frequency, decreasing = TRUE)
# View the top 10 most common words
term_frequency[1:10]
## like cup shop just get morning want drinking
## 111 103 69 66 62 57 49 47
## can looks
## 45 45
# Plot a barchart of the 10 most common words
barplot(term_frequency[1:10], col = "tan", las = 2)
# Create frequency
frequency <- freq_terms(
tweets$text,
top = 10,
at.least = 3,
stopwords = "Top200Words"
)
# Make a frequency bar chart
plot(frequency)
# Create frequency
frequency <- freq_terms(
tweets$text,
top = 10,
at.least = 3,
stopwords = stopwords("english")
)
# Make a frequency bar chart
plot(frequency)
[Video]
# Load wordcloud package
library(wordcloud)
# Print the first 10 entries in term_frequency
term_frequency[1:10]
## like cup shop just get morning want drinking
## 111 103 69 66 62 57 49 47
## can looks
## 45 45
# Vector of terms
terms_vec <- names(term_frequency)
# Create a wordcloud for the values in word_freqs
wordcloud(terms_vec, term_frequency,
max.words = 50, colors = "red")
# Review a "cleaned" tweet
content(chardonnay_corp[[24]])
## [1] "I brought some Marvin Gaye and Chardonnay."
# Add to stopwords
stops <- c(stopwords(kind = 'en'), 'chardonnay')
# Review last 6 stopwords
tail(stops)
## [1] "same" "so" "than" "too" "very"
## [6] "chardonnay"
# Apply to a corpus
cleaned_chardonnay_corp <- tm_map(chardonnay_corp, removeWords, stops)
# Review a "cleaned" tweet again
content(cleaned_chardonnay_corp[[24]])
## [1] "I brought Marvin Gaye Chardonnay."
# Sort the chardonnay_words in descending order
sorted_chardonnay_words <- sort(chardonnay_words, decreasing = TRUE)
# Print the 6 most frequent chardonnay terms
head(sorted_chardonnay_words)
## marvin gaye just like bottle lol
## 104 76 75 55 47 43
# Get a terms vector
terms_vec <- names(chardonnay_words)
# Create a wordcloud for the values in word_freqs
wordcloud(terms_vec, chardonnay_words,
max.words = 50, colors = "red")
# Print the list of colors
colors()
## [1] "white" "aliceblue" "antiquewhite"
## [4] "antiquewhite1" "antiquewhite2" "antiquewhite3"
## [7] "antiquewhite4" "aquamarine" "aquamarine1"
## [10] "aquamarine2" "aquamarine3" "aquamarine4"
## [13] "azure" "azure1" "azure2"
## [16] "azure3" "azure4" "beige"
## [19] "bisque" "bisque1" "bisque2"
## [22] "bisque3" "bisque4" "black"
## [25] "blanchedalmond" "blue" "blue1"
## [28] "blue2" "blue3" "blue4"
## [31] "blueviolet" "brown" "brown1"
## [34] "brown2" "brown3" "brown4"
## [37] "burlywood" "burlywood1" "burlywood2"
## [40] "burlywood3" "burlywood4" "cadetblue"
## [43] "cadetblue1" "cadetblue2" "cadetblue3"
## [46] "cadetblue4" "chartreuse" "chartreuse1"
## [49] "chartreuse2" "chartreuse3" "chartreuse4"
## [52] "chocolate" "chocolate1" "chocolate2"
## [55] "chocolate3" "chocolate4" "coral"
## [58] "coral1" "coral2" "coral3"
## [61] "coral4" "cornflowerblue" "cornsilk"
## [64] "cornsilk1" "cornsilk2" "cornsilk3"
## [67] "cornsilk4" "cyan" "cyan1"
## [70] "cyan2" "cyan3" "cyan4"
## [73] "darkblue" "darkcyan" "darkgoldenrod"
## [76] "darkgoldenrod1" "darkgoldenrod2" "darkgoldenrod3"
## [79] "darkgoldenrod4" "darkgray" "darkgreen"
## [82] "darkgrey" "darkkhaki" "darkmagenta"
## [85] "darkolivegreen" "darkolivegreen1" "darkolivegreen2"
## [88] "darkolivegreen3" "darkolivegreen4" "darkorange"
## [91] "darkorange1" "darkorange2" "darkorange3"
## [94] "darkorange4" "darkorchid" "darkorchid1"
## [97] "darkorchid2" "darkorchid3" "darkorchid4"
## [100] "darkred" "darksalmon" "darkseagreen"
## [103] "darkseagreen1" "darkseagreen2" "darkseagreen3"
## [106] "darkseagreen4" "darkslateblue" "darkslategray"
## [109] "darkslategray1" "darkslategray2" "darkslategray3"
## [112] "darkslategray4" "darkslategrey" "darkturquoise"
## [115] "darkviolet" "deeppink" "deeppink1"
## [118] "deeppink2" "deeppink3" "deeppink4"
## [121] "deepskyblue" "deepskyblue1" "deepskyblue2"
## [124] "deepskyblue3" "deepskyblue4" "dimgray"
## [127] "dimgrey" "dodgerblue" "dodgerblue1"
## [130] "dodgerblue2" "dodgerblue3" "dodgerblue4"
## [133] "firebrick" "firebrick1" "firebrick2"
## [136] "firebrick3" "firebrick4" "floralwhite"
## [139] "forestgreen" "gainsboro" "ghostwhite"
## [142] "gold" "gold1" "gold2"
## [145] "gold3" "gold4" "goldenrod"
## [148] "goldenrod1" "goldenrod2" "goldenrod3"
## [151] "goldenrod4" "gray" "gray0"
## [154] "gray1" "gray2" "gray3"
## [157] "gray4" "gray5" "gray6"
## [160] "gray7" "gray8" "gray9"
## [163] "gray10" "gray11" "gray12"
## [166] "gray13" "gray14" "gray15"
## [169] "gray16" "gray17" "gray18"
## [172] "gray19" "gray20" "gray21"
## [175] "gray22" "gray23" "gray24"
## [178] "gray25" "gray26" "gray27"
## [181] "gray28" "gray29" "gray30"
## [184] "gray31" "gray32" "gray33"
## [187] "gray34" "gray35" "gray36"
## [190] "gray37" "gray38" "gray39"
## [193] "gray40" "gray41" "gray42"
## [196] "gray43" "gray44" "gray45"
## [199] "gray46" "gray47" "gray48"
## [202] "gray49" "gray50" "gray51"
## [205] "gray52" "gray53" "gray54"
## [208] "gray55" "gray56" "gray57"
## [211] "gray58" "gray59" "gray60"
## [214] "gray61" "gray62" "gray63"
## [217] "gray64" "gray65" "gray66"
## [220] "gray67" "gray68" "gray69"
## [223] "gray70" "gray71" "gray72"
## [226] "gray73" "gray74" "gray75"
## [229] "gray76" "gray77" "gray78"
## [232] "gray79" "gray80" "gray81"
## [235] "gray82" "gray83" "gray84"
## [238] "gray85" "gray86" "gray87"
## [241] "gray88" "gray89" "gray90"
## [244] "gray91" "gray92" "gray93"
## [247] "gray94" "gray95" "gray96"
## [250] "gray97" "gray98" "gray99"
## [253] "gray100" "green" "green1"
## [256] "green2" "green3" "green4"
## [259] "greenyellow" "grey" "grey0"
## [262] "grey1" "grey2" "grey3"
## [265] "grey4" "grey5" "grey6"
## [268] "grey7" "grey8" "grey9"
## [271] "grey10" "grey11" "grey12"
## [274] "grey13" "grey14" "grey15"
## [277] "grey16" "grey17" "grey18"
## [280] "grey19" "grey20" "grey21"
## [283] "grey22" "grey23" "grey24"
## [286] "grey25" "grey26" "grey27"
## [289] "grey28" "grey29" "grey30"
## [292] "grey31" "grey32" "grey33"
## [295] "grey34" "grey35" "grey36"
## [298] "grey37" "grey38" "grey39"
## [301] "grey40" "grey41" "grey42"
## [304] "grey43" "grey44" "grey45"
## [307] "grey46" "grey47" "grey48"
## [310] "grey49" "grey50" "grey51"
## [313] "grey52" "grey53" "grey54"
## [316] "grey55" "grey56" "grey57"
## [319] "grey58" "grey59" "grey60"
## [322] "grey61" "grey62" "grey63"
## [325] "grey64" "grey65" "grey66"
## [328] "grey67" "grey68" "grey69"
## [331] "grey70" "grey71" "grey72"
## [334] "grey73" "grey74" "grey75"
## [337] "grey76" "grey77" "grey78"
## [340] "grey79" "grey80" "grey81"
## [343] "grey82" "grey83" "grey84"
## [346] "grey85" "grey86" "grey87"
## [349] "grey88" "grey89" "grey90"
## [352] "grey91" "grey92" "grey93"
## [355] "grey94" "grey95" "grey96"
## [358] "grey97" "grey98" "grey99"
## [361] "grey100" "honeydew" "honeydew1"
## [364] "honeydew2" "honeydew3" "honeydew4"
## [367] "hotpink" "hotpink1" "hotpink2"
## [370] "hotpink3" "hotpink4" "indianred"
## [373] "indianred1" "indianred2" "indianred3"
## [376] "indianred4" "ivory" "ivory1"
## [379] "ivory2" "ivory3" "ivory4"
## [382] "khaki" "khaki1" "khaki2"
## [385] "khaki3" "khaki4" "lavender"
## [388] "lavenderblush" "lavenderblush1" "lavenderblush2"
## [391] "lavenderblush3" "lavenderblush4" "lawngreen"
## [394] "lemonchiffon" "lemonchiffon1" "lemonchiffon2"
## [397] "lemonchiffon3" "lemonchiffon4" "lightblue"
## [400] "lightblue1" "lightblue2" "lightblue3"
## [403] "lightblue4" "lightcoral" "lightcyan"
## [406] "lightcyan1" "lightcyan2" "lightcyan3"
## [409] "lightcyan4" "lightgoldenrod" "lightgoldenrod1"
## [412] "lightgoldenrod2" "lightgoldenrod3" "lightgoldenrod4"
## [415] "lightgoldenrodyellow" "lightgray" "lightgreen"
## [418] "lightgrey" "lightpink" "lightpink1"
## [421] "lightpink2" "lightpink3" "lightpink4"
## [424] "lightsalmon" "lightsalmon1" "lightsalmon2"
## [427] "lightsalmon3" "lightsalmon4" "lightseagreen"
## [430] "lightskyblue" "lightskyblue1" "lightskyblue2"
## [433] "lightskyblue3" "lightskyblue4" "lightslateblue"
## [436] "lightslategray" "lightslategrey" "lightsteelblue"
## [439] "lightsteelblue1" "lightsteelblue2" "lightsteelblue3"
## [442] "lightsteelblue4" "lightyellow" "lightyellow1"
## [445] "lightyellow2" "lightyellow3" "lightyellow4"
## [448] "limegreen" "linen" "magenta"
## [451] "magenta1" "magenta2" "magenta3"
## [454] "magenta4" "maroon" "maroon1"
## [457] "maroon2" "maroon3" "maroon4"
## [460] "mediumaquamarine" "mediumblue" "mediumorchid"
## [463] "mediumorchid1" "mediumorchid2" "mediumorchid3"
## [466] "mediumorchid4" "mediumpurple" "mediumpurple1"
## [469] "mediumpurple2" "mediumpurple3" "mediumpurple4"
## [472] "mediumseagreen" "mediumslateblue" "mediumspringgreen"
## [475] "mediumturquoise" "mediumvioletred" "midnightblue"
## [478] "mintcream" "mistyrose" "mistyrose1"
## [481] "mistyrose2" "mistyrose3" "mistyrose4"
## [484] "moccasin" "navajowhite" "navajowhite1"
## [487] "navajowhite2" "navajowhite3" "navajowhite4"
## [490] "navy" "navyblue" "oldlace"
## [493] "olivedrab" "olivedrab1" "olivedrab2"
## [496] "olivedrab3" "olivedrab4" "orange"
## [499] "orange1" "orange2" "orange3"
## [502] "orange4" "orangered" "orangered1"
## [505] "orangered2" "orangered3" "orangered4"
## [508] "orchid" "orchid1" "orchid2"
## [511] "orchid3" "orchid4" "palegoldenrod"
## [514] "palegreen" "palegreen1" "palegreen2"
## [517] "palegreen3" "palegreen4" "paleturquoise"
## [520] "paleturquoise1" "paleturquoise2" "paleturquoise3"
## [523] "paleturquoise4" "palevioletred" "palevioletred1"
## [526] "palevioletred2" "palevioletred3" "palevioletred4"
## [529] "papayawhip" "peachpuff" "peachpuff1"
## [532] "peachpuff2" "peachpuff3" "peachpuff4"
## [535] "peru" "pink" "pink1"
## [538] "pink2" "pink3" "pink4"
## [541] "plum" "plum1" "plum2"
## [544] "plum3" "plum4" "powderblue"
## [547] "purple" "purple1" "purple2"
## [550] "purple3" "purple4" "red"
## [553] "red1" "red2" "red3"
## [556] "red4" "rosybrown" "rosybrown1"
## [559] "rosybrown2" "rosybrown3" "rosybrown4"
## [562] "royalblue" "royalblue1" "royalblue2"
## [565] "royalblue3" "royalblue4" "saddlebrown"
## [568] "salmon" "salmon1" "salmon2"
## [571] "salmon3" "salmon4" "sandybrown"
## [574] "seagreen" "seagreen1" "seagreen2"
## [577] "seagreen3" "seagreen4" "seashell"
## [580] "seashell1" "seashell2" "seashell3"
## [583] "seashell4" "sienna" "sienna1"
## [586] "sienna2" "sienna3" "sienna4"
## [589] "skyblue" "skyblue1" "skyblue2"
## [592] "skyblue3" "skyblue4" "slateblue"
## [595] "slateblue1" "slateblue2" "slateblue3"
## [598] "slateblue4" "slategray" "slategray1"
## [601] "slategray2" "slategray3" "slategray4"
## [604] "slategrey" "snow" "snow1"
## [607] "snow2" "snow3" "snow4"
## [610] "springgreen" "springgreen1" "springgreen2"
## [613] "springgreen3" "springgreen4" "steelblue"
## [616] "steelblue1" "steelblue2" "steelblue3"
## [619] "steelblue4" "tan" "tan1"
## [622] "tan2" "tan3" "tan4"
## [625] "thistle" "thistle1" "thistle2"
## [628] "thistle3" "thistle4" "tomato"
## [631] "tomato1" "tomato2" "tomato3"
## [634] "tomato4" "turquoise" "turquoise1"
## [637] "turquoise2" "turquoise3" "turquoise4"
## [640] "violet" "violetred" "violetred1"
## [643] "violetred2" "violetred3" "violetred4"
## [646] "wheat" "wheat1" "wheat2"
## [649] "wheat3" "wheat4" "whitesmoke"
## [652] "yellow" "yellow1" "yellow2"
## [655] "yellow3" "yellow4" "yellowgreen"
# Print the wordcloud with the specified colors
wordcloud(chardonnay_freqs$term, chardonnay_freqs$num,
max.words = 100,
colors = c("grey80", "darkgoldenrod1", "tomato"))
# Select 5 colors
color_pal <- cividis(n = 5)
# Examine the palette output
color_pal
## [1] "#00204DFF" "#414D6BFF" "#7C7B78FF" "#BCAF6FFF" "#FFEA46FF"
# Create a wordcloud with the selected palette
wordcloud(chardonnay_freqs$term, chardonnay_freqs$num,
max.words = 100, colors = color_pal)
[Video]
Michael is a hybrid thinker and doer—a byproduct of being a CliftonStrengths “Learner” over time. With 20+ years of engineering, design, and product experience, he helps organizations identify market needs, mobilize internal and external resources, and deliver delightful digital customer experiences that align with business goals. He has been entrusted with problem-solving for brands—ranging from Fortune 500 companies to early-stage startups to not-for-profit organizations.
Michael earned his BS in Computer Science from New York Institute of Technology and his MBA from the University of Maryland, College Park. He is also a candidate to receive his MS in Applied Analytics from Columbia University.
LinkedIn | Twitter | www.michaelmallari.com/data | www.columbia.edu/~mm5470