Inspired by: https://www.youtube.com/watch?v=l37n_HDD1qs
library(tidyverse)
library(stringr)
library(purrr)
library(rvest)
library(robotstxt)
# paths_allowed(paths = c("https://www.amazon.com/Best-Sellers-Unlocked-Cell-Phones/zgbs/wireless/2407749011"))
phones <- read_html("https://www.amazon.com/Best-Sellers-Unlocked-Cell-Phones/zgbs/wireless/2407749011")
# phones
phonename <- phones %>%
rvest::html_nodes('body') %>%
xml2::xml_find_all("//span[contains(@class, 'aok-inline-block')]") %>%
rvest::html_text() %>%
str_squish()
# phonename
phonename <- phonename[phonename != ""] # remove three missing items at the bottom
# phonename
truncnames <- phonename %>%
str_split('(\\,| \\(\\d| \\(\\w|\\-\\d|\\||[w]\\/|\\s\\-)') %>%
map_chr(1) %>%
str_trunc(25, "right", ellipsis = "")
# truncnames
costs <- phonename %>%
str_split('\\$')
# costs
lower <- c()
for (i in 1:length(costs)){
lower[i] <- costs[[i]][2] # address the second element of each list
}
lowerprice <- lower %>%
str_replace('\\ -', "") %>%
str_trim() %>%
as.numeric()
# lowerprice
#plot(lowerprice)
upper <- c()
for (i in 1:length(costs)){
upper[i] <- costs[[i]][3]
}
# There are NA's. Let's fill them with the lower bound values:
for (i in 1:length(upper)){
if(is.na(upper[i])){
upper[i] <- lower[i]
}
}
# upper
#plot(upper)
The upper vector is character. If you try as.numeric() on it, the 1,000.00 format is not read correctly. Let’s take out the commas from these prices and keep the points.
upperprice <- as.numeric(
gsub(
# ONLY for strings containing numerics, comma, numerics
"^([0-9]+),([0-9]+).([0-9]+)$",
# Substitute by the first part, dot, second part
"\\1\\2.\\3",
upper
))
# upperprice
#plot(upperprice)
prices <- as.data.frame(cbind(truncnames, lowerprice, upperprice))
head(prices) %>%
knitr::kable()
truncnames | lowerprice | upperprice |
---|---|---|
Google Pixel 4a | 349.99 | 629.99 |
Moto E | 129.99 | 499.99 |
Apple iPhone 8 | 218 | 599.77 |
Samsung Galaxy A20s A207M | 157.99 | 245.12 |
Samsung Galaxy S9 | 249.88 | 999.77 |
Samsung Galaxy A10S A107M | 127.99 | 199 |
prices$lowerprice <- as.numeric(prices$lowerprice)
prices$upperprice <- as.numeric(prices$upperprice)
# head(prices)
# If NAs in upper:
# sum(is.na(prices$upperprice))
for (i in 1:length(prices$upperprice)){
if(is.na(prices$upperprice[i])){
prices$upperprice[i] <- prices$lowerprice[i]
}
}
# head(prices)
#
# plot(prices$upperprice-prices$lowerprice)
# prices[which.max(prices$upperprice-prices$lowerprice),]
# prices[which.min(prices$upperprice-prices$lowerprice),]
In the next part, I get rid of duplicates by completing the information (manually).
library(ggplot2)
library(ggalt)
# head(prices)
#prices$truncnames <- factor(prices$truncnames, levels=as.character(prices$truncnames))
#phonename
prices$truncnames[3] <- "Apple iPhone 8 Gold"
prices$truncnames[37] <- "Apple iPhone 8 Silver"
prices$truncnames[35] <- "OUKITEL WP5 Face ID"
#prices$truncnames <- factor(prices$truncnames, levels=as.character(prices$truncnames))
prices$truncnames[47] <- "Pixel 4 Renewed"
#prices$truncnames <- factor(prices$truncnames, levels=as.character(prices$truncnames))
#prices$truncnames[50] <- "Google Pixel 3"
#prices$truncnames[46] <- "Google Pixel 3a"
prices$truncnames <- factor(prices$truncnames, levels=as.character(prices$truncnames))
Arrange all phones by upperprice for plotting. Add colour code for brands if necessary.
prices2 <- prices %>%
arrange(upperprice)
prices2 <- prices2 %>%
mutate(fill_col = case_when(
str_detect(truncnames, "Samsung") ~ "dark blue",
str_detect(truncnames, "Apple") ~ "black",
TRUE ~ "black"
))
prices2$truncnames <- factor(prices2$truncnames, levels=as.character(prices2$truncnames))
head(prices2) %>%
knitr::kable()
truncnames | lowerprice | upperprice | fill_col |
---|---|---|---|
BLU Advance A120 | 19.99 | 49.99 | black |
BLU Studio Mini | 64.99 | 69.99 | black |
BLU VIVO X6 | 109.99 | 129.99 | black |
UMIDIGI A7 Pro Unlocked C | 127.49 | 144.49 | black |
Ulefone Note 9P 4G Unlock | 149.99 | 149.99 | black |
OUKITEL WP5 Face ID | 155.99 | 155.99 | black |
gg <- ggplot(prices2, aes(x=lowerprice, xend=upperprice, y=truncnames, group=truncnames)) +
geom_dumbbell(color="#a3c4dc", size=0.75,
colour_xend="#0e668b"
) +
scale_x_continuous() +
labs(x=NULL,
y=NULL,
title="Dumbbell Chart for \nTop-Selling Smartphones",
subtitle="Prices are often set just below \n$500 and $1000",
caption="Source: amazon.com scraped with 'rvest'") +
theme(plot.title = element_text(face="bold"),
plot.background=element_rect(fill="#f7f7f7"),
panel.background=element_rect(fill="#f7f7f7"),
panel.grid.minor=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.major.x=element_line(),
axis.ticks=element_blank(),
legend.position="top",
panel.border=element_blank()) +
annotate("segment", x = 1450, xend = 1050, y = 43, yend = 43, colour = "black", size=0.3, alpha=0.6, arrow=arrow()) +
annotate("segment", x = 950, xend = 550, y = 29, yend = 29, colour = "black", size=0.3, alpha=0.6, arrow=arrow())
plot(gg)
gg2 <- ggplot(prices2, aes(x=lowerprice, xend=upperprice, y=truncnames, group=truncnames)) +
geom_dumbbell(color="#a3c4dc", size=0.75,
colour_xend="#0e668b"
) +
scale_x_continuous() +
labs(x=NULL,
y=NULL,
title="Dumbbell Chart for \nTop-Selling Smartphones",
subtitle="Samsung leads with \nlow- and hi-end models",
caption="Source: amazon.com scraped with 'rvest'") +
theme(plot.title = element_text(face="bold"),
plot.background=element_rect(fill="#f7f7f7"),
panel.background=element_rect(fill="#f7f7f7"),
panel.grid.minor=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.major.x=element_line(),
axis.text.y = element_text(color=prices2$fill_col),
axis.ticks=element_blank(),
legend.position="top",
panel.border=element_blank())
plot(gg2)
For dumbbell plot, see http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html#Diverging%20Dot%20Plot
For conditional colours in ggplot, see https://community.rstudio.com/t/setting-colours-in-ggplot-conditional-on-value/8328
For theme elements, see https://ggplot2.tidyverse.org/reference/theme.html
phonename
## [1] "Google Pixel 4a - New Unlocked Android Smartphone - 128 GB of Storage - Up to 24 Hour Battery - Just Black with Google Pixel 4a Case, Basically Black 4.6 out of 5 stars 472 $349.99 - $629.99"
## [2] "Moto E | Unlocked | Made for US by Motorola | 2/32GB | 13MP Camera | 2020 | Blue, XT2052-1, 2/32 GB | Moto E | 13MP Camera | Blue | US 4.2 out of 5 stars 147 $129.99 - $499.99"
## [3] "Apple iPhone 8, 64GB, Gold - For AT&T (Renewed) 4.3 out of 5 stars 11,246 $218.00 - $599.77"
## [4] "Samsung Galaxy A20s A207M/DS, 32GB/3GB RAM Dual SIM 6.5''HD+ Snapdragon 450, Factory Unlocked (International Version) - (Red) 4.3 out of 5 stars 5,320 $157.99 - $245.12"
## [5] "Samsung Galaxy S9, 64GB, Coral Blue - For AT&T (Renewed) 4.3 out of 5 stars 6,743 $249.88 - $999.77"
## [6] "Samsung Galaxy A10S A107M 32GB Unlocked GSM DUOS Phone w/ Dual 13MP & 2MP Camera (International Variant/US Compatible LTE) – Black 4.2 out of 5 stars 3,103 $127.99 - $199.00"
## [7] "Google Pixel 5 - 5G Android Phone - Water Resistant - Unlocked Smartphone with Night Sight and Ultrawide Lens - Just Black $699.99"
## [8] "Apple iPhone 11, 64GB, Black - Fully Unlocked (Renewed) 4.4 out of 5 stars 352 $549.11 - $835.00"
## [9] "Samsung Galaxy A51 (SM-A515F/DS) Dual SIM 128GB, GSM Unlocked - Prism Crush Black 4.3 out of 5 stars 1,236 $264.99 - $411.92"
## [10] "Samsung Galaxy S20 FE 5G | Factory Unlocked Android Cell Phone | 128 GB | US Version Smartphone | Pro-Grade Camera, 30X Space Zoom, Night Mode | Cloud Navy $599.99"
## [11] "Samsung Galaxy A10e 32GB A102U GSM/CDMA Unlocked Phone - Black 4.3 out of 5 stars 5,322 $159.99 - $499.99"
## [12] "Samsung Electronics Galaxy Note 20 5G Factory Unlocked Android Cell Phone | US Version | 128GB of Storage | Mobile Gaming Smartphone | Long-Lasting Battery | Mystic Bronze (SM-N981UZNAXAA) 4.4 out of 5 stars 254 $799.00 - $1,249.99"
## [13] "Apple iPhone X, 64GB, Silver - For AT&T (Renewed) 4.2 out of 5 stars 4,728 $383.99 - $783.08"
## [14] "Apple iPhone 7 Plus, 32GB, Gold - For Verizon (Renewed) 4.2 out of 5 stars 8,970 $239.91 - $588.34"
## [15] "UMIDIGI A7 Pro Unlocked Cell Phones(4GB+64GB) 6.3\" FHD+ Full Screen, 4150mAh High Capacity Battery Smartphone with 16MP AI Quad Camera, Android 10 and Dual 4G Volte(Cosmic Black). 4.3 out of 5 stars 651 $127.49 - $144.49"
## [16] "Samsung Galaxy S9+, 64GB, Coral Blue - For Verizon (Renewed) 4.3 out of 5 stars 5,194 $277.04 - $499.77"
## [17] "Moto E6 | Unlocked | Made for US by Motorola | 2/16GB | 13MP Camera | Blue 4.1 out of 5 stars 233 $125.99 - $401.35"
## [18] "BLU Advance A120-1.8\" Display, GSM Unlocked 3G, VGA Camera -Black 3.1 out of 5 stars 26 $19.99 - $49.99"
## [19] "Samsung Galaxy A01 (A015M) 16GB, Dual SIM, GSM Unlocked, 5.7” Display Smartphone - International Version - Black 4.2 out of 5 stars 115 $98.79 - $169.99"
## [20] "BLU G9 Pro -6.3” Full HD Smartphone with Triple Main Camera, 128GB+4GB RAM -Black 4.3 out of 5 stars 289 $149.95 - $249.99"
## [21] "Apple iPhone XS, 64GB, Gold - For AT&T (Renewed) 4.3 out of 5 stars 3,128 $444.11 - $999.77"
## [22] "Apple iPhone 11 Pro, 64GB, Gold - Fully Unlocked (Renewed) 4.3 out of 5 stars 129 $737.99 - $999.99"
## [23] "BLU Studio Mini -5.5HD Smartphone, 32GB+2GB Ram -Black 3.9 out of 5 stars 1,339 $64.99 - $69.99"
## [24] "Samsung Galaxy S20 5G Factory Unlocked New Android Cell Phone US Version | 128GB of Storage | Fingerprint ID and Facial Recognition | Long-Lasting Battery | Cloud Blue 4.6 out of 5 stars 250 $979.00 - $1,599.99"
## [25] "BLU VIVO X6-6.1” HD+ Display, 64GB+3GB RAM - Gradient Blue 4.0 out of 5 stars 1,051 $109.99 - $129.99"
## [26] "Samsung Galaxy Note 9, 128GB, Cloud Silver - For AT&T (Renewed) 4.4 out of 5 stars 6,861 $370.00 - $599.88"
## [27] "Ulefone Note 9P 4G Unlocked Cell Phones, 6.52\" HD+ Waterdrop Screen,16MP Triple Rear Camera, Android 10 Octa-Core 4GB + 64GB,4500mAh Big Battery, Face Unlock Finger Reader Unlocked Smartphones - Black 4.5 out of 5 stars 59 $149.99"
## [28] "Apple iPhone Xs Max, 64GB, Space Gray - For Sprint (Renewed) 4.4 out of 5 stars 3,539 $514.00 - $1,339.32"
## [29] "Apple iPhone 11 Pro Max, 64GB, Gold - Fully Unlocked (Renewed) 4.0 out of 5 stars 49 $849.99 - $1,449.97"
## [30] "Samsung Galaxy A71 (SM-A715F/DS) Dual SIM 4G LTE 128GB, GSM Factory Unlocked - International Version - No Warranty - Blue 4.4 out of 5 stars 1,155 $353.99 - $409.99"
## [31] "Samsung Galaxy A31-128GB / 4GB - A315G/DSL Unlocked Dual Sim Phone w/Quad Camera 48MP+8MP+5MP+5MP GSM International Version (Prism Crush Black) 4.5 out of 5 stars 361 $224.99 - $269.98"
## [32] "Moto G7 with Alexa Hands-Free – Unlocked – 64 GB – Ceramic Black (US Warranty) – Verizon, AT&T, T–Mobile, Sprint, Boost, Cricket, & Metro 4.3 out of 5 stars 4,646 $129.99 - $299.99"
## [33] "OnePlus 8 Glacial Green,<U+200B> 5G Unlocked Android Smartphone U.S Version, 8GB RAM+128GB Storage, 90Hz Fluid Display,Triple Camera, with Alexa Built-in, 4.4 out of 5 stars 589 $699.00 - $929.00"
## [34] "Samsung Galaxy S10+ Factory Unlocked Android Cell Phone | US Version | 1TB of Storage | Fingerprint ID and Facial Recognition | Long-Lasting Battery | Ceramic Black 4.6 out of 5 stars 40 $699.99 - $1,799.99"
## [35] "OUKITEL WP5 (2020) Rugged Cell Phones Unlocked Android 10 Smartphone 8000mAh Battery Triple Camera 4 LED Flashlights 4GB+32GB IP68 Waterproof 5.5 HD+ Global GSM 4G Dual SIM Phone Face ID Fingerprint 4.3 out of 5 stars 333 $155.99"
## [36] "Pixel 4 - Clearly White - 64GB - Unlocked with Pixel 4 Case, Blue-ish 3.0 out of 5 stars 1 $551.12 - $999.00"
## [37] "Apple iPhone 8, 64GB, Silver - Fully Unlocked (Renewed) 4.4 out of 5 stars 244 13 offers from $254.99"
## [38] "Samsung Galaxy S8+, 64GB, Arctic Silver - For AT&T (Renewed) 4.3 out of 5 stars 1,939 $215.00 - $399.77"
## [39] "Samsung Galaxy S8, 64GB, Arctic Silver - For AT&T (Renewed) 4.3 out of 5 stars 55 $203.77 - $499.77"
## [40] "Samsung Galaxy S10+, 1TB, Ceramic Black - Fully Unlocked (Renewed) 4.2 out of 5 stars 2,861 $459.00 - $1,199.77"
## [41] "Apple iPhone 7 32GB Unlocked GSM Quad-Core Phone w/ 12MP Camera - Gold (Renewed) 4.0 out of 5 stars 2,116 $175.00 - $284.97"
## [42] "OUKITEL WP5 (2020) Rugged Smartphone, 8000mAh Battery IP68 Waterproof Android 10 Unlocked Cell Phones 4G LTE Dual SIM, 5.5inches 4GB 32GB Triple Camera Face/Fingerprint Unlock GPS Global Version 4.4 out of 5 stars 403 $155.99"
## [43] "Samsung Galaxy Note 10 Lite N770F 128GB Dual-SIM GSM Unlocked Phone (International Variant/US Compatible LTE) - Aura Black 4.4 out of 5 stars 752 $455.00 - $476.00"
## [44] "Google Pixel XL, 32GB Unlocked GSM - Quite Black (Renewed) 3.9 out of 5 stars 323 $85.00 - $189.98"
## [45] "Samsung Galaxy S10, 128GB, Flamingo Pink - Fully Unlocked (Renewed) 4.3 out of 5 stars 1,593 $415.00 - $999.77"
## [46] "Samsung Galaxy S8 64GB Phone - 5.8in Unlocked Smartphone - Midnight Black (Renewed) 4.2 out of 5 stars 786 19 offers from $217.00"
## [47] "Pixel 4 - Clearly White - 64GB - Unlocked (Renewed) 4.5 out of 5 stars 46 $364.49 - $999.77"
## [48] "Xiaomi Redmi Note 9 Pro 128GB + 6GB RAM, 6.67\" FHD+ DotDisplay, 64MP AI Quad Camera, Qualcomm Snapdragon 720G LTE Factory Unlocked Smartphone - International Version (Glacier White) 4.4 out of 5 stars 415 $267.33 - $289.00"
## [49] "Xiaomi Redmi Note 9S 6.67\" 48MP International Global Version (Aurora Blue, 4GB/64GB) 4.6 out of 5 stars 1,523 $200.99 - $259.99"
## [50] "Apple iPhone SE, 64GB, Black - Fully Unlocked (Renewed) 4.2 out of 5 stars 28 $355.00 - $480.00"
# Make data long for further use
# library(tidyr)
# long <- gather(prices, bound, value, lowerprice:upperprice, factor_key = T)
# head(long)
# library(dplyr)
# long$bound2 = case_when(long$bound == "lowerprice" ~ "1",
# TRUE ~ "2")
# long$bound2 <- as.factor(long$bound2)