Load Libraries and Data

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readr)

reviews <- read_csv("Book2.csv")
## Rows: 599 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (16): id, name, asins, brand, categories, primaryCategories, imageURLs,...
## dbl   (3): reviews.id, reviews.numHelpful, reviews.rating
## lgl   (2): reviews.dateAdded, reviews.doRecommend
## dttm  (3): dateAdded, dateUpdated, reviews.date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(reviews)
## # A tibble: 6 × 24
##   id        dateAdded           dateUpdated         name  asins brand categories
##   <chr>     <dttm>              <dttm>              <chr> <chr> <chr> <chr>     
## 1 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 2 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 3 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 4 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 5 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 6 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## # ℹ 17 more variables: primaryCategories <chr>, imageURLs <chr>, keys <chr>,
## #   manufacturer <chr>, manufacturerNumber <chr>, reviews.date <dttm>,
## #   reviews.dateAdded <lgl>, reviews.dateSeen <chr>, reviews.doRecommend <lgl>,
## #   reviews.id <dbl>, reviews.numHelpful <dbl>, reviews.rating <dbl>,
## #   reviews.sourceURLs <chr>, reviews.text <chr>, reviews.title <chr>,
## #   reviews.username <chr>, sourceURLs <chr>
names(reviews)
##  [1] "id"                  "dateAdded"           "dateUpdated"        
##  [4] "name"                "asins"               "brand"              
##  [7] "categories"          "primaryCategories"   "imageURLs"          
## [10] "keys"                "manufacturer"        "manufacturerNumber" 
## [13] "reviews.date"        "reviews.dateAdded"   "reviews.dateSeen"   
## [16] "reviews.doRecommend" "reviews.id"          "reviews.numHelpful" 
## [19] "reviews.rating"      "reviews.sourceURLs"  "reviews.text"       
## [22] "reviews.title"       "reviews.username"    "sourceURLs"
reviews <- reviews %>%
  mutate(
    sentiment = case_when(
      reviews.rating >= 4 ~ "Positive",
      reviews.rating == 3 ~ "Neutral",
      TRUE ~ "Negative"
    )
  )
ggplot(reviews,
       aes(x = sentiment,
           fill = sentiment)) +
  geom_bar() +
  labs(
    title = "Distribution of Customer Sentiments",
    x = "Sentiment",
    y = "Number of Reviews"
  )

sent_counts <- reviews %>%
  count(sentiment)

ggplot(sent_counts,
       aes(x = "",
           y = n,
           fill = sentiment)) +
  geom_col() +
  coord_polar("y") +
  labs(
    title = "Percentage Distribution of Customer Sentiments"
  )

ggplot(reviews,
       aes(x = factor(reviews.rating),
           fill = sentiment)) +
  geom_bar() +
  labs(
    title = "Ratings and Sentiment Relationship",
    x = "Rating",
    y = "Count"
  )