library(tm)
## Loading required package: NLP
library(SnowballC)
library(wordcloud)
## Loading required package: RColorBrewer
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
library(syuzhet)
library(lda)
library(ldatuning)
library(topicmodels)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::annotate() masks NLP::annotate()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(SentimentAnalysis)
## 
## Attaching package: 'SentimentAnalysis'
## 
## The following object is masked from 'package:base':
## 
##     write

For this problem set you will be working with the Amazon Fine Foods Reviews data (https://www.kaggle.com/datasets/snap/amazon-fine-food-reviews). We will limit our analysis to just the text of the reviews and the score given by the user (i.e., number of starts). Perform the following four steps to conduct text analysis using the packages discussed in class.

##Part 1: Data Cleaning & Preprocessing

Load the data and select the columns: HelpfulnessNumerator, HelpfulnessDenominator, Score, and Text.

# Remember to set working directory, then use read.csv()
amazon_reviews <- read.csv("/Users/preethi/Desktop/Machine Learning/ReviewsData.csv")
reviews <- amazon_reviews %>%
  select(HelpfulnessNumerator, HelpfulnessDenominator, Score, Text)
glimpse(reviews)
## Rows: 2,739
## Columns: 4
## $ HelpfulnessNumerator   <int> 844, 866, 808, 580, 491, 559, 538, 536, 524, 48…
## $ HelpfulnessDenominator <int> 923, 878, 815, 593, 569, 562, 544, 539, 536, 49…
## $ Score                  <int> 3, 5, 5, 1, 3, 5, 5, 5, 2, 5, 5, 1, 5, 4, 1, 5,…
## $ Text                   <chr> "I ordered one of these Fresh \"Whole\" Rabbits…

Create a variable Helpfulness Percentage by dividing the Numerator by the Denominator column.

reviews <- reviews %>%
  mutate(HelpfulnessPercentage = ifelse(HelpfulnessDenominator > 0,
                                        HelpfulnessNumerator / HelpfulnessDenominator,  NA_real_))
                                        
glimpse(reviews)
## Rows: 2,739
## Columns: 5
## $ HelpfulnessNumerator   <int> 844, 866, 808, 580, 491, 559, 538, 536, 524, 48…
## $ HelpfulnessDenominator <int> 923, 878, 815, 593, 569, 562, 544, 539, 536, 49…
## $ Score                  <int> 3, 5, 5, 1, 3, 5, 5, 5, 2, 5, 5, 1, 5, 4, 1, 5,…
## $ Text                   <chr> "I ordered one of these Fresh \"Whole\" Rabbits…
## $ HelpfulnessPercentage  <dbl> 0.9144095, 0.9863326, 0.9914110, 0.9780776, 0.8…

Remove stop words and punctuation; convert to lowercase.

library(tm)

# Create corpus
docs <- Corpus(VectorSource(amazon_reviews$Text))

# Function to replace unwanted symbols with a space
to_space <- content_transformer(function(x, pattern) gsub(pattern, " ", x))

# Remove HTML tags like <br>, <div>, etc.
docs <- tm_map(docs, content_transformer(function(x) gsub("<.*?>", " ", x)))
## Warning in tm_map.SimpleCorpus(docs, content_transformer(function(x)
## gsub("<.*?>", : transformation drops documents
# Remove specific symbols
symbols_to_remove <- c(":", "-", "'", '"', ";")
for (sym in symbols_to_remove) {
  docs <- tm_map(docs, to_space, sym)
}
## Warning in tm_map.SimpleCorpus(docs, to_space, sym): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(docs, to_space, sym): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(docs, to_space, sym): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(docs, to_space, sym): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(docs, to_space, sym): transformation drops
## documents
# Convert to lowercase
docs <- tm_map(docs, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(docs, content_transformer(tolower)):
## transformation drops documents
# Remove punctuation
docs <- tm_map(docs, removePunctuation)
## Warning in tm_map.SimpleCorpus(docs, removePunctuation): transformation drops
## documents
# Remove numbers
docs <- tm_map(docs, removeNumbers)
## Warning in tm_map.SimpleCorpus(docs, removeNumbers): transformation drops
## documents
# Remove stopwords (standard English)
docs <- tm_map(docs, removeWords, stopwords("en"))
## Warning in tm_map.SimpleCorpus(docs, removeWords, stopwords("en")):
## transformation drops documents
# Remove additional custom stopwords
custom_stopwords <- c(
  "br", "can", "part", "meaning", "thus", "understand", "set", "also", 
  "one", "provides", "used", "help", "may", "ie", "us", "ing", "s", "t", "m",
  "oneplus", "phone", "phones", "re", "mobile", "does", "doesn", 
  "pro", "will", "get"
)
docs <- tm_map(docs, removeWords, custom_stopwords)
## Warning in tm_map.SimpleCorpus(docs, removeWords, custom_stopwords):
## transformation drops documents
# Remove extra whitespace
docs <- tm_map(docs, stripWhitespace)
## Warning in tm_map.SimpleCorpus(docs, stripWhitespace): transformation drops
## documents
# View the result
inspect(docs[[1]])
## <<PlainTextDocument>>
## Metadata:  7
## Content:  chars: 342
## 
##  ordered fresh whole rabbits arrived head fur insides missing exactly whole d say maybe just damaged shipping won buying another mean without long ears know even rabbit size shape catnot ve seen cat head fur insides missing mean like really close anything plus side delicious tall cold glass tuscan milk give three stars three whole stars btw
head(docs)
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 6

Create a word cloud for the most positive (5 star) and most negative (1 star) reviews.

#I used the help of AI to complete this step & make the word cloud work for each of the 5-star & 1-star reviews

library(tm)
library(wordcloud)
library(RColorBrewer)
# Subset for 5-star and 1-star reviews
reviews_5star <- subset(amazon_reviews, Score == 5)
reviews_1star <- subset(amazon_reviews, Score == 1)

clean_corpus <- function(text_vector) {
  corpus <- Corpus(VectorSource(text_vector))
  corpus <- tm_map(corpus, content_transformer(tolower))
  corpus <- tm_map(corpus, removePunctuation)
  corpus <- tm_map(corpus, removeNumbers)
  custom_stopwords <- c(stopwords("en"), "br", "nbsp", "quot")
  corpus <- tm_map(corpus, removeWords, custom_stopwords)
  corpus <- tm_map(corpus, stripWhitespace)
  return(corpus)
}

corpus_5star <- clean_corpus(reviews_5star$Text)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, custom_stopwords):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, stripWhitespace): transformation drops
## documents
corpus_1star <- clean_corpus(reviews_1star$Text)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, custom_stopwords):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, stripWhitespace): transformation drops
## documents
dtm_5star <- DocumentTermMatrix(corpus_5star,
                                control = list(wordLengths = c(2, 20),
                                               bounds = list(global = c(10, Inf))))
dtm_1star <- DocumentTermMatrix(corpus_1star,
                                control = list(wordLengths = c(2, 20),
                                               bounds = list(global = c(10, Inf))))

freq_5star <- colSums(as.matrix(dtm_5star))
freq_1star <- colSums(as.matrix(dtm_1star))

# 5-star reviews
wordcloud(names(freq_5star), freq_5star, min.freq=20, max.words=50, scale=c(4,0.7), colors=brewer.pal(3, "Dark2"), main="5-Star Reviews")

# 1-star reviews
wordcloud(names(freq_1star), freq_1star, min.freq=20, max.words=50, scale=c(4,0.7), colors=brewer.pal(3, "Reds"), main="1-Star Reviews")

Part 2: Sentiment Analysis

Calculate sentiment scores for each review using the get_sentiment() function with the bing method.

library(tidytext)
library(dplyr)
library(tidyr)
library(ggplot2)

# Add a ReviewID before tokenizing
reviews <- reviews %>%
  mutate(ReviewID = row_number())

# Tokenize and clean (removing stop words and numbers)
reviews_tokens <- reviews %>%
  select(ReviewID, Score, Text) %>%
  unnest_tokens(word, Text) %>%
  filter(!word %in% stop_words$word,  # Remove stop words
         !str_detect(word, "^[0-9]+$"))  # Remove numbers

# Sentiment using Bing lexicon
reviews_sentiment <- reviews_tokens %>%
  inner_join(get_sentiments("bing"), by = "word") %>%
  mutate(sentiment_value = if_else(sentiment == "positive", 1, -1)) %>%
  group_by(ReviewID) %>%
  summarise(sentiment_score = sum(sentiment_value), .groups = "drop")

# Merge back with the original reviews to include Score and Text
reviews_with_sentiment <- reviews %>%
  left_join(reviews_sentiment, by = "ReviewID")

# Inspect the first few rows of the merged data
print(head(reviews_with_sentiment))
##   HelpfulnessNumerator HelpfulnessDenominator Score
## 1                  844                    923     3
## 2                  866                    878     5
## 3                  808                    815     5
## 4                  580                    593     1
## 5                  491                    569     3
## 6                  559                    562     5
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Text
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     I ordered one of these Fresh "Whole" Rabbits, but when it arrived its head, fur and insides were missing.  Not exactly whole, I'd say!  Maybe it was just damaged during shipping, but I won't be buying another one.  I mean - without the long ears, how do I know it even WAS a rabbit?  It was the same size and shape as a cat...not that I've seen a cat with its head, fur and insides missing.  I mean, not like really close or anything.<br /><br />On the plus side, it was delicious with a tall, cold glass of Tuscan Milk, so I give it three stars.  That's three WHOLE stars, BTW.
## 2 see update at end of review<br /><br />******************<br /><br />I was lamenting about only having frech lettuce, herbs, chives, and tomatoes from may garden during the long fall-winter-spring parts of the year. The refrigerated stuff from the grocery store goes bad right away and does not taste as good as the fresh stuff from the garden, either.<br /><br />Then I saw a Time Magazine page on the new AeroGrow AeroGarden, and I just had to try it out. After reading the AeroGrow website before making the purchase, I realized that buying one garden would not work for me, as the tomatoes cannot be grown in the same garden as lettuce & herbs. This is partially due to the large amount of room taken up by the tomatoes, and also because the lamp/watering cycle is different and finally because the nutrients are different.<br /><br />So, I bought two gardens, along with the Salad Greens seed kit and Cherry Tomato seed kit. Each garden comes with a mixed herb kit, so I figured to mix in a few herbs with the other seeds, and if they did not work, no big loss.<br /><br />The products came quickly and the instructions for assembly were very clear and well written with excellent diagrams. I came to realize that this somewhat pricey product at least comes from a company that produces a classy product (a rare thing these days). I found a space on a shelf beside the basement stairs, and placed both assembled gardens there. I also bought and placed a digital thermometer with maximum/minimum temperature memory readouts ($10 at Radio Shack), because I was unsure of what temperature extremes the plants might experience in that location (68-72 as it turns out).<br /><br />The seed kits contain pre-seeded planting pods. Each pod is basically a plastic cup shaped frame with two pieces of foam rubber inside the cup part, like two slices of bread with the seeds sandwiched between them. They simply insert into the seven holes in the top of the garden's water tank. The Salad Greens and Herbs come with seven pods per kit, while the Tomatos come with three pods plus four hole plugs-the plants are bigger so three of them take up thw whole space available. The hole plugs prevent evaporation of the water through the unoccupied holes.<br /><br />The water tank holds exactly one gallon of regular drinking water. Well water is not recommended, presumably because of impurities, and since I am on a well I bought two one-gallon plastic jugs of 'drinking water' at the store for 50 cents each and filled the tanks with their contents. A pump in the tank takes water and pipes it to the rim of each of the seven holes in the tank's top, and here the trickle of water flows into the foam sandwich of each seed pod. The foam stays moist and the rest of the water drips back down into the tank. The garden's 'computer' cycles the water flow on and off according to the amount recommended for the type of plant being grown. A water level sensor turns on a flashing red light when it is time to add more water to the tank.<br /><br />The top of the garden is a reflector with two compact-fluorescent lamps, of the variety that has the special ultraviolet (UV) coating that causes the emmitted light to resemble sunlight. The reflector rides on a vertical pole that extends up from the garden's base, so you can raise and lower the lamps as required to keep them the correct distance above the plants. The garden's 'computer' also turns the lamps on and off according to a schedule tailored to the type of plant. If using the gardens in a place where the light might be a problem at night, you can syncronize the computer so that the lights are on only during the daytime and off when you are trying to sleep.<br /><br />The seed kits come with little clear plastic cups that cover each pod until the seeds have germinated, then you can dispose of them. The kits also come with a bag of nutrient tablets, which you add to the water tank when the computer prompts you to by flashing a red light. The nutrients are tailored to the type of plant being grown, and there are enough of them to feed the plants during their anticipated life span.<br /><br />I planted one garden with five salad green (leaf lettuce) pods, plus one pod each from the Herb kit, chives and parsley. The other garden got the threee pods from the Cherry Tomato kit; two reds and one yellow variety.<br /><br />Each seed pod has a label that tells you how many days to wait for plants to appear after germination. All of my plants appeared like clockwork.<br /><br />I have had the gardens for about six weeks now, and have been enjoying salads containing lettuce, parsley and chives plus other odds and ends from the fridge, for the last two or three weeks. The lettuce and herbs are all beautiful, with no problems from bugs or too much/too little water, excessive temperatures, etc. No need to wash the plants or check for bugs or pick off bad spots, everything goes straight to the salad bowl. What a joy! Even with only five lettuce plants, I have to eat two meals including salad each day to keep up with the growth. This would easily feed two people, and if all seven salad green pods had been used, three people.<br /><br />The tomatoes are all doing well ahnd have been pruned according to instructions. It will be some time yet before they produce flowers, and then fruit. But based on the health of the plants, I expect a good yield.<br /><br />Each seed kit comes with a full color manual/booklet that covers all aspects of 'planting', germinating, feeding, pruning (if required) and then harvesting the plants. There are also photos of plants where things have gone wrong (leaves burned because the lamps were not raised up as the plants grew taller, etc) with clear instructions on how to recognize problems and correct them. Harvesting instructions clearly tell how much can be taken at a time without killing the plant, and so on.<br /><br />The AeroGrow gardens are a well designed, well built product with excellent documentation. All my visitors are amazed at how well the product works, and many have gone out and bought their own. I anticipate years of improved eating because of this product.<br /><br />*****************<br /><br />Update November 2007<br /><br />I used my two gardens all Winter (2006/2007) until it was time to start getting produce from my real (outdoor) garden in the Summer, at which time I put the little gardens to rest for the season. During their use, I got a large crop of cherry tomatoes, all of which were beautiful. I went through on crop of lettuce and salad greens, and when the lettuce finally bolted, I replanted with only lettuce (no chives, etc this time) and got another couple of months worth of lettuce. I have now fired them up again in the Fall, and have nice little plants popping up. Aerogrow has more seed options available now, so I have planted a more interesting kind of lettuce.
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               I purchased a burrito from a small shop a few blocks from home. I was unimpressed with their habanero "hot" salsa. Eager to kick it up a notch, I reached for a bottle of what I later found to be Dave's Gourmet Ghost Pepper Jolokia Hot Sauce. I unscrewed the top and went to put a dab on. I quickly realized that there was no flow regulation but not before a large pool of the magma colored liquid dripped into my lunch. I decided to dab my finger in it and see what I was dealing with. It was formidable, sweet and flavorful with a long heat. I thought I could take the heat.<br /><br />I demolished the burrito, hot sauce and all, and shrugged off the pain. Every bite was saturated with the taste of a thousand tortured souls but the guacamole still tasted great. I wiped my tingling lips and while downing a glass of water I looked at the bottle. It claimed a heat rating of "Insanity++." I headed home thinking surely the worst must be over. I've ate plenty of hot food and my stomach is battle tested. I was wrong.<br /><br />I walked no more than a block before I started to feel odd. It was in the forties in Cleveland but I could feel the sweat forming on my brow. I walked another block and I could literally feel the burning sensation outlining my stomach. My breaths were noticeably faster and shorter. People on the street looked at me weird. I figured it would go away by the time I got home but I decided to pick up the pace. By the time my apartment was in sight I was experiencing tunnel vision and it felt like a live agitated weasel had been placed inside me. I knew what I had to do. After flushing my lunch, a tablespoon of this sauce, half a gallon of milk, and my ego down the drain, I can honestly say I am just happy to be alive. This sauce is not for mortals.
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        This product is called "Hunmatsu-RyokuCha," in Japanese, and not the same as "Maccha" or "Matcha."<br />The translation for Hunmatsu Ryokucha and Maccha are actually the same, green tea powder.<br />However, the ingredient for Maccha is a special kind of tea called "Tencha," in Japanese, and has unique "Umami."<br />"Tencha" is grown in the shade in order to prevent the final product from developing certain astringent bitterness, called "Shibumi," in Japanese.  There are many kinds of bitterness.  i.e. For Maccha, it is not necessary true that bitterness is bad.  Actually, the first grade Maccha is more bitter than the low grade Maccha, given you are comparing amongst REAL Maccha powder.<br /><br />The process of producing REAL Maccha is much more complicated and painstaking than Hunmatsu-Ryokucha.<br />Hunmatsu-Ryokucha is ground regular Sencha Green Tea that are casually served all over Japan in regular household.  (Sorry, I am struggling with English.)  But one must understand that the kind of tea plants, the way they grow the tea plants, the way they process the tea leaves, are all different, not only in Japan, but also in any countries.<br /><br />(If you can read Chinese Characters...) Hunmatsu-Ryokucha consists of 4 Chinese Characters, Hunmatsu means powder, Ryokucha means green tea; however, Maccha sonsists of two Chinese Characters.  Maccha means special green tea powder intended for tea ceremony.  And we, Japanese, don't drink Maccha regularly.  Most Japanese don't know a lot about tea ceremony either.  (As most Japanese are not martial artists....)<br /><br />In the United States, Asian Grocery Store sells both Hunmatu Ryokucha and Maccha.  The real Maccha is usually in a nice tiny tin can, and the amount included is about 28g (about 1 oz), priced about 9 - 10 dollars each.  I don't think that any country but Japan produces Maccha, but I might be wrong.  However, if the green tea powder is produced in Taiwan, China, Korea,and other Asian countries, it is most likely to be Hunmatsu Ryokucha, NOT Maccha.  The same product sold at this vendor is much cheaper at Asian Grocery Store since you don't have to pay for S/H.<br /><br />The advantage of Hunmatsu-Ryokucha over Maccha is that the former has higher nutritional value, which I don't want to discuss in this site.  But the cancer preventing effect in this cheap powder is higher than more expensive Maccha.<br /><br />The disadvantage of Hunmatsu-Ryokucha over Maccha is of course, "Shibumi" (the closest direct translation is astringency.)  That's why you may feel it is more bitter than Maccha.  The powder is probably coarser than the Maccha.<br /><br />You may use Hunmatsu-Ryokucha for baking and dessert making, in the same way as you use Maccha. But the flavor and taste are definitely different.<br /><br />I have not tried Starbucks Maccha Latte.  But I assume that they use the low grade Maccha powder since, amongst real Maccha, the low grade Maccha is less bitter than the premium quality Maccha for tea ceremony, I heard.<br /><br />To conclude, if your purpose is to enjoy REAL Maccha flavor, you should not purchase this product.<br />But if your purpose is simply for your health, this product is better than Maccha.  However, you should research availability and cost comparisons of Hunmatsu-Ryokucha products by actually going to the Asian Grocery Stores.  You may get better price than this.  (Actually, I am pretty sure that you get better prices locally.)  Most Vietnamese owned grocery stores have Hunmatsu-Ryokucha in stock, not to mention Japanese and Korean owned grocers.<br /><br />Also, Hunmatsu-Ryokucha is popular in Japan, too.  Thus not all green tea powder imported from Japan is "Maccha."<br /><br />If you want to buy the quality Maccha, you should find the Japanese Grocery Store (owened by Japanese) and ask specifically "Maccha" for tea ceremony.  And if you are not satisfied, sign up for tea ceremony class taught by a tea ceremony master.<br /><br />For the seller, one must not include "Matcha" or "Maccha" in the descriptions or titles, if the product was just "Hunmatsu-Ryokucha."  As I wrote earlier, these two products share the same English Translation.  So you can say, this is green tea powder, grounded green tea, powdered green tea.  However, you must not even hint this is the same green tea powder that can be used for the formal Japanese tea ceremony.  Simply, if you call this "Maccha" or "Matcha," you are lying.  And you can be actually sued, and you will lose.  Not only this seller, but I find many sellers are falsely including "Matccha" or "Matcha" for non-MATCHA GREEN TEA POWDER.  So I believe this is just the innocent mistake.  I encourage the sellers to correct their mistakes, though.
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Coconut water is the liquid inside an unopened coconut. It is not "coconut milk," although there is often confusion about that. Coconut milk is made from the meat of the coconut after it is opened and the water poured out. Inside the coconut, the water is sterile and very benign to the human body - it has been safely used in emergencies as IV fluid! (Straight out of the coconut, that is, NOT from a Tetrapak.)<br /><br />Coconuts in general are very healthy items. In the South Pacific the coconut palm is called "the tree of Life," because it provides so many things for a healthy life: food, water, shelter, medicine, etc. Coconut oil is one of the healthiest oils on the planet, but has been the victim of intense propaganda campaigns by corporate vegetable oil cartels and other evil critters.<br /><br />Coconut water tastes (to me) a bit like very watery, unsweetened tapioca, with just a hint of sourness. It is quite bland by itself, but mixes easily with fruit juices, etc. It has great health properties and is rapidly becoming popular with athletes as a superb isotonic drink, because it has great electrolyte and mineral properties, and is a natural food with no harmful side effects or hideous chemical garbage masquerading as food.<br /><br />Good organic coconut water is very hard to find, but there are many brands of non-organic like this one on the market. I checked out the package info and vendor's web site, but could find only general information on coconut water and its benefits, nothing specific about this exact product - where and how it is grown and harvested, the quality of the land and processing, etc. That is not a good sign.<br /><br />For an item that is very health oriented, the only reasons I can see for hiding that information are pure marketing ignorance, or knowing that the truth would not paint a good picture. Rain forests are reportedly being cut down for coconut palm plantations, especially those dedicated to unhealthy corporate factory farming practices. It would be vital for a company to distance themselves from that kind of activity by using only properly grown plants on healthy, environmentally sound plantations, and making that a very prominent part of their marketing.<br /><br />It would also be vital to play up everything possible that points to a healthy product by specifically mentioning those things about this specific product, not just general properties. This company doesn't do any of that, so I can only assume that this product is not produced in a way they want me to know about. That is the kiss of death for a product in my eyes. Granted, I am fussier than most in this regard.<br /><br />Don't be misled by the "100% Natural" marketing hype. That means virtually nothing. It is not a strict, legally defined standard, as is "USDA Organic," for example. Virtually anything found in "nature," i.e. toxic heavy metals, MSG (yes, it occurs in nature), animal manure, etc., can be considered "natural" and can legally be included in a product labeled "100% Natural." Granted, USDA organic standards are very weak as organic standards go (compared to say, Oregon Tilth), but they are still better than nothing.<br /><br />While I love coconut and want to give this product 4 or 5 stars, the complete lack of honest information about its origins and processing make me VERY, VERY suspicious. Without more information about how safe/healthy the product actually is, I can only give it 3 stars and a serious "caveat emptor." It is probably much better than any ordinary "sports drinks," but there are too many unanswered questions to be sure. One's health is not something to be treated lightly.<br /><br />UPDATE July 2009:<br /><br />After continued searching, I still have not been able to locate a consistent source of organic or Fair Trade coconut water. While the manufacturer of this coconut water still does not give out any real information about the growing, harvesting, or other relevant details of production, neither do any of their competitors.<br /><br />So far, this is still the best coconut water I have been able to find, and I continue to drink it, even with the questions I have.
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  This Ecobrew reusable Keurig K-cup is great for brewing your own coffee. It seems to have been very well designed and is easy to use. One of the things I felt bad about when I bought my Keurig coffee maker was the fact that although it was a great convenient coffee maker... it does tend to produce lots of waste when you buy the disposable k-cups.  I think this product also ends up saving you lots of money in the long run too. K-cups can be quite expensive.  I really love this product and I really can't find anything bad to say about it. There is another product called the Solofill that essentially does the same thing but is made from another company. I own both products and if you're curious to how they stack up against one another, check out the paragraph below. You may want to do research to make sure that your Keurig coffee maker is compatible with the EcoBrew or Solofill. I know that some K-cup coffee makers are not compatible with these products. I own the MR. Coffee single K-cup coffee maker and can say that it's compatible with both of these products.  If you want to know more about the Mr. Coffee Keurig coffee maker, feel free to read my review on that as well :)<br /><br />EcoBrew VS Solofill<br /><br />First off, I'd like to state that I purchased both the Solofill and the Ecobrew.  Both of these products are really great reusable Keurig filters that let you brew your own coffee in a Keurig coffee maker.  These essentially let you make your own K-cups with your own coffee... and they're reusable!  I would rate the Solofill as 4.5 Stars out of 5 and the Ecobrew as a 5 out of 5 Star Product.  The Ecobrew is a bit better than the Solofill because it is easier to rinse out. The Solofill tends to collect grinds at the very bottom and it's very hard to get the few last grinds out. The Ecobrew also fits better in my Mr Coffee single cup K-cup brewer. The Ecobrew seems to have a slightly larger capacity for coffee grinds too, which may mean it's capable of brewing slightly stronger coffee. And lastly, the Ecobrew has a flat bottom that can sit on a counter top by itself. This makes it much easier to fill than the Solofill. With the Solofill, it couldn't be balanced on a table-top so I had to cut out the bottom of a small dixie cup to make my own "stand" in order to fill it properly.
##   HelpfulnessPercentage ReviewID sentiment_score
## 1             0.9144095        1               0
## 2             0.9863326        2               1
## 3             0.9914110        3               0
## 4             0.9780776        4             -10
## 5             0.8629174        5               1
## 6             0.9946619        6              10
# Plot the sentiment score by star rating
ggplot(reviews_with_sentiment, aes(x = as.factor(Score), y = sentiment_score)) +
  geom_col(fill = "steelblue") +
  labs(x = "Star Rating", y = "Net Sentiment Score", 
       title = "Sentiment Score by Star Rating")
## Warning: Removed 105 rows containing missing values or values outside the scale range
## (`geom_col()`).

Make a plot of the top emotions for poor reviews (1 and 2 stars) and positive reviews (4 and 5 stars)

library(tidyverse)
library(tidytext)

# Step 1: Filter only poor and positive reviews, tokenize and clean
reviews_emotion <- reviews %>%
  filter(Score %in% c(1, 2, 4, 5)) %>%
  select(ReviewID, Score, Text) %>%
  unnest_tokens(word, Text) %>%
  filter(!word %in% stop_words$word,        # Remove stopwords
         !str_detect(word, "^[0-9]+$"))     # Remove numbers

# Step 2: Load NRC lexicon, keep only emotion-related words
nrc <- get_sentiments("nrc") %>%
  filter(!sentiment %in% c("positive", "negative"))

# Step 3: Join with NRC lexicon
reviews_emotion_sentiment <- reviews_emotion %>%
  inner_join(nrc, by = "word")
## Warning in inner_join(., nrc, by = "word"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 21 of `x` matches multiple rows in `y`.
## ℹ Row 3247 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
# Step 4: Group and count emotions by review type
emotion_counts <- reviews_emotion_sentiment %>%
  mutate(ReviewType = case_when(
    Score %in% c(1, 2) ~ "Poor (1–2 Stars)",
    Score %in% c(4, 5) ~ "Positive (4–5 Stars)"
  )) %>%
  count(ReviewType, sentiment) %>%
  group_by(ReviewType) %>%
  slice_max(n, n = 6)  # Top 6 emotions per group

# Step 5: Plot
ggplot(emotion_counts, aes(x = reorder(sentiment, n), y = n, fill = ReviewType)) +
  geom_col(show.legend = TRUE, position = "dodge") +
  facet_wrap(~ ReviewType, scales = "free_y") +
  coord_flip() +
  labs(
    title = "Top Emotions in Poor vs Positive Reviews",
    x = "Emotion",
    y = "Word Count"
  ) +
  theme_minimal()

Make a plot of the top emotions for unhelpful reviews (under 25%) and for helpful reviews (over 75%)

library(tidyverse)
library(tidytext)

# Load NRC emotions (excluding positive/negative)
nrc <- get_sentiments("nrc") %>%
  filter(!sentiment %in% c("positive", "negative"))

# Create Helpfulness Percent and group labels
reviews_helpfulness <- reviews %>%
  mutate(
    HelpfulnessPercent = if_else(HelpfulnessDenominator == 0, NA_real_,
                                 HelpfulnessNumerator / HelpfulnessDenominator),
    HelpfulnessGroup = case_when(
      HelpfulnessPercent < 0.25 ~ "Unhelpful (<25%)",
      HelpfulnessPercent > 0.75 ~ "Helpful (>75%)"
    )
  ) %>%
  filter(!is.na(HelpfulnessGroup))  # remove middle range or NAs

# Tokenize and clean
reviews_tokens <- reviews_helpfulness %>%
  unnest_tokens(word, Text) %>%
  filter(!word %in% stop_words$word,
         !str_detect(word, "^[0-9]+$"))

# Join with NRC emotions
reviews_emotions <- reviews_tokens %>%
  inner_join(nrc, by = "word")
## Warning in inner_join(., nrc, by = "word"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 7 of `x` matches multiple rows in `y`.
## ℹ Row 4888 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
# Count and get top 6 emotions per group
emotion_counts <- reviews_emotions %>%
  count(HelpfulnessGroup, sentiment) %>%
  group_by(HelpfulnessGroup) %>%
  slice_max(n, n = 6)

# Plot
ggplot(emotion_counts, aes(x = reorder(sentiment, n), y = n, fill = HelpfulnessGroup)) +
  geom_col(position = "dodge", show.legend = TRUE) +
  facet_wrap(~ HelpfulnessGroup, scales = "free_y") +
  coord_flip() +
  labs(title = "Top Emotions in Helpful vs Unhelpful Reviews",
       x = "Emotion",
       y = "Word Count") +
  theme_minimal()

##Part 3: Topic Modeling (LDA)

Create a Document-Term Matrix.

#used AI for this piece as I was struggling with the topics.
library(dplyr)
library(tidyr)
library(textstem)
## Loading required package: koRpus.lang.en
## Loading required package: koRpus
## Loading required package: sylly
## For information on available language packages for 'koRpus', run
## 
##   available.koRpus.lang()
## 
## and see ?install.koRpus.lang()
## 
## Attaching package: 'koRpus'
## The following object is masked from 'package:readr':
## 
##     tokenize
## The following object is masked from 'package:tm':
## 
##     readTagged
library(qdapDictionaries )
reviews <- amazon_reviews %>%
  mutate(
    # Remove HTML tags completely
    Text = gsub("<.*?>", "", Text),
    # Remove special characters
    Text = gsub("[^[:alnum:][:space:]']", "", Text),
    # Lemmatization
    Text = lemmatize_words(Text)
  ) %>%
  select(Score, Text) %>%
  filter(nchar(Text) > 100)  # Remove very short reviews

# 🔧 FIXED 2: Improved DTM construction with bi-grams and frequency filtering
library(tidytext)

# Create bigram-aware DTM
reviews_dtm <- reviews %>%
  unnest_tokens(phrase, Text, token = "ngrams", n = 2) %>%
  separate(phrase, c("word1", "word2"), sep = " ") %>%
  filter(
    !word1 %in% c(stop_words$word, Top200Words),
    !word2 %in% c(stop_words$word, Top200Words)
  ) %>%
  unite(term, word1, word2, sep = " ") %>%
  count(Score, term) %>%
  filter(n > 10) %>%  # Remove rare terms
  cast_dtm(Score, term, n)

# 🔧 FIXED 3: Find optimal topic number
library(ldatuning)
result <- FindTopicsNumber(
  reviews_dtm,
  topics = seq(2, 10, by = 1),
  metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010"),
  method = "Gibbs",
  control = list(seed = 1234),
  verbose = TRUE
)
## fit models... done.
## calculate metrics:
##   Griffiths2004... done.
##   CaoJuan2009... done.
##   Arun2010... done.
# Plot results to choose best k
FindTopicsNumber_plot(result)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the ldatuning package.
##   Please report the issue at <https://github.com/nikita-moor/ldatuning/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Apply LDA with k = 5 topics.

# Run LDA with k=5 topics

reviews_lda <- LDA(reviews_dtm, k = 5, control = list(seed = 1234))

reviews_lda
## A LDA_VEM topic model with 5 topics.

Show the top 10 terms for each topic.

# Load necessary libraries
library(tidyverse)
library(tidytext)
library(topicmodels)

# Get the top terms for each topic
top_terms <- terms(reviews_lda, 10)

# Define custom stopwords to remove unwanted terms like "br", "1", "2"
custom_stopwords <- c("br", "1", "2")

reviews_filtered <- reviews %>%
  mutate(word_count = str_count(Text, "\\w+")) %>%
  filter(word_count > 20)

# Loop through each topic and filter out unwanted words
for (i in 1:ncol(top_terms)) {
  cat("\nTopic", i, "Top Terms (Filtered):\n")
  filtered_terms <- top_terms[, i]  # Get the terms for the current topic
  filtered_terms <- filtered_terms[!(filtered_terms %in% custom_stopwords)]  # Remove the unwanted terms
  
  # Print the filtered terms for the current topic
  print(filtered_terms)
}
## 
## Topic 1 Top Terms (Filtered):
##  [1] "peanut butter"    "ice cream"        "green tea"        "hot chocolate"   
##  [5] "corn syrup"       "dog food"         "customer service" "brown rice"      
##  [9] "grocery store"    "monk fruit"      
## 
## Topic 2 Top Terms (Filtered):
##  [1] "green tea"        "hot chocolate"    "grocery store"    "customer service"
##  [5] "corn syrup"       "dog food"         "gluten free"      "orange pekoe"    
##  [9] "wheat grass"      "agave nectar"    
## 
## Topic 3 Top Terms (Filtered):
##  [1] "green tea"        "highly recommend" "peanut butter"    "grocery store"   
##  [5] "gluten free"      "dark chocolate"   "dog food"         "health benefits" 
##  [9] "cat food"         "free shipping"   
## 
## Topic 4 Top Terms (Filtered):
##  [1] "customer service" "pill pockets"     "maple syrup"      "extra bold"      
##  [5] "blood sugar"      "expiration date"  "gluten free"      "extra virgin"    
##  [9] "bob's red"        "manuka honey"    
## 
## Topic 5 Top Terms (Filtered):
##  [1] "kona pods"            "green tea"            "monosodium glutamate"
##  [4] "customer service"     "glutamic acid"        "corn syrup"          
##  [7] "dog food"             "brown rice"           "peanut butter"       
## [10] "grocery store"

Interpret what each topic might represent (e.g., complaints about packaging, praise for taste, etc.).

Answer: Each topic seems to be about products that are recommended at a grocery store generally. Topic 1: Talks about variety of food products with a grocery store perhaps suggesting where to buy these items. Topic 2: Talks about beverages and healthy food products, also mentioning custoemer service & grocery store, focusing on buying experience. Topic 3: Has a highly recommend, health benefits, free shipping - sounding positive and mentions both human and pet foods. Topic 4: This topic includes customer service, specialty foods(bob’s red, extra virgin) and product specific concerns such as expiration data. Topic 5: Specialty beverages and food additives(glutamic acid, corn syrup) and mention of staple food(brown rice, peanut butter), while customer service and grocery store appear again.

Part 4: Reflection Questions

  1. How do sentiment scores align with the star ratings? A. Sentiment scores are generally expected to close match star ratings.As seen in the ggplot above, trust, joy and anticipation are the most common emotions in both poor and positive reviews their word counts are much higher in positive reviews. Surprise only apears in positive reviews. Positive reviews are dominated by trust and joy, indicating happy customers - positive sentiment. Poor reviews still mention trust and joy, but contain sadness, fear and disgust a bit more, showing their negative experiences. The word anticipation in both suggest that customers are discussing their expectations - a neutral sentiment.

  2. How do sentiment scores align with helpfulness? Helpful reviews have a higher word count for all emotions especially trust, joy, and anticipation, while unhelpful reviews have a very low word count for all emotions, so shorter reviews or brief reviews were observed. This suggests that the reviews that are emotionally expressive and detailed are perceived as more helpful by the reviewers.

  3. What are the most common topics found in the reviews? The most common topics were: product-specific discussions mentioning peanut butter, green tea, maple syrup and dog food, etc. health and dietary discussions mentioning blood sugar, manuka honey, extra virgin Customer service, grocery store and free shipping relfecting the buyer’s experiences and service quality Pet products were mentioned in the discussions - dog food cat food, pill pockets

  4. Were there any surprises in the sentiment or topics? I felt it was interesting to see both positive and reviews within the same star rating groups. Eg. sadness and fear appear in positive reviews as well, indicating that positive reviews also mention negative aspects - so customers are being quite. expressive. Also, the topics had both pet food as well as adult food & it indicated that customers are discussing both food topics together in their single review, so this might need more topic separation later on.