Load Libraries and Data
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readr)
reviews <- read_csv("Book2.csv")
## Rows: 599 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): id, name, asins, brand, categories, primaryCategories, imageURLs,...
## dbl (3): reviews.id, reviews.numHelpful, reviews.rating
## lgl (2): reviews.dateAdded, reviews.doRecommend
## dttm (3): dateAdded, dateUpdated, reviews.date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(reviews)
## # A tibble: 6 × 24
## id dateAdded dateUpdated name asins brand categories
## <chr> <dttm> <dttm> <chr> <chr> <chr> <chr>
## 1 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 2 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 3 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 4 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 5 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## 6 AVqVGZNv… 2017-03-03 16:56:05 2018-10-25 16:36:31 "Ama… B00Z… Amaz… Computers…
## # ℹ 17 more variables: primaryCategories <chr>, imageURLs <chr>, keys <chr>,
## # manufacturer <chr>, manufacturerNumber <chr>, reviews.date <dttm>,
## # reviews.dateAdded <lgl>, reviews.dateSeen <chr>, reviews.doRecommend <lgl>,
## # reviews.id <dbl>, reviews.numHelpful <dbl>, reviews.rating <dbl>,
## # reviews.sourceURLs <chr>, reviews.text <chr>, reviews.title <chr>,
## # reviews.username <chr>, sourceURLs <chr>
names(reviews)
## [1] "id" "dateAdded" "dateUpdated"
## [4] "name" "asins" "brand"
## [7] "categories" "primaryCategories" "imageURLs"
## [10] "keys" "manufacturer" "manufacturerNumber"
## [13] "reviews.date" "reviews.dateAdded" "reviews.dateSeen"
## [16] "reviews.doRecommend" "reviews.id" "reviews.numHelpful"
## [19] "reviews.rating" "reviews.sourceURLs" "reviews.text"
## [22] "reviews.title" "reviews.username" "sourceURLs"
reviews <- reviews %>%
mutate(
sentiment = case_when(
reviews.rating >= 4 ~ "Positive",
reviews.rating == 3 ~ "Neutral",
TRUE ~ "Negative"
)
)
ggplot(reviews,
aes(x = sentiment,
fill = sentiment)) +
geom_bar() +
labs(
title = "Distribution of Customer Sentiments",
x = "Sentiment",
y = "Number of Reviews"
)

sent_counts <- reviews %>%
count(sentiment)
ggplot(sent_counts,
aes(x = "",
y = n,
fill = sentiment)) +
geom_col() +
coord_polar("y") +
labs(
title = "Percentage Distribution of Customer Sentiments"
)

ggplot(reviews,
aes(x = factor(reviews.rating),
fill = sentiment)) +
geom_bar() +
labs(
title = "Ratings and Sentiment Relationship",
x = "Rating",
y = "Count"
)
