Purpose of this doc

This document is an analysis of price, performance, and value using passmark.com’s gpu benchmark data.(https://www.videocardbenchmark.net/gpu_list.php)
However, if there are missing values in the price, the price cannot be obtained, so all are removed.

library(dplyr)
library(httr)
library(rvest)
res <- GET(url = 'https://www.videocardbenchmark.net/gpu_list.php')
df <- res %>%
  read_html(encoding = 'utf-8') %>% 
  html_node(css = "#cputable") %>% 
  html_table(trim = T, fill = T)

df %>% str()
'data.frame':   2091 obs. of  5 variables:
 $ Videocard Name                     : chr  "128 DDR Radeon 9700 TX w/TV-Out" "128 DDR Radeon 9800 Pro" "128MB DDR Radeon 9800 Pro" "128MB RADEON X600 SE" ...
 $ Passmark G3D Mark(higher is better): int  44 62 66 49 8214 37 67 826 5 2 ...
 $ Rank(lower is better)              : int  1788 1729 1717 1772 106 1813 1712 795 1970 2069 ...
 $ Videocard Value(higher is better)  : num  NA NA NA NA NA NA NA NA NA NA ...
 $ Price(USD)                         : chr  NA NA NA NA ...
colnames(df)<-c("name","passmark","rank","value","price")
df %>% sapply(is.na) %>% colSums() # verify missing value
    name passmark     rank    value    price 
       0        0        0     1633     1633 
df %>% dplyr::filter(!is.na(price)) -> df # remove rows of missing value of in df$price
df %>% sapply(is.na) %>% colSums() # verify missing value
    name passmark     rank    value    price 
       0        0        0        0        0 
library(stringr)
df$price <- df$price %>% 
  str_replace_all("\\$|\\*|\\,","") %>% 
  as.numeric() # remove string and convert with numeric at df$price

# Correlation test with passmark, value and price
df[,c(2,4,5)] %>% cor(use = "everything", method = c("pearson", "kendall", "spearman"))
          passmark      value      price
passmark 1.0000000  0.5659466  0.3956899
value    0.5659466  1.0000000 -0.1485808
price    0.3956899 -0.1485808  1.0000000
cor.test(df$passmark, df$price, method = "pearson")

    Pearson's product-moment correlation

data:  df$passmark and df$price
t = 9.2005, df = 456, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.3155019 0.4702668
sample estimates:
      cor 
0.3956899 
cor.test(df$value, df$price, method = "pearson")

    Pearson's product-moment correlation

data:  df$value and df$price
t = -3.2084, df = 456, p-value = 0.001429
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.23698134 -0.05774004
sample estimates:
       cor 
-0.1485808 
# data exploration with graph
library(ggplot2)
# passmark > 10000
df %>% 
  filter(passmark > 10000) %>% 
  ggplot()+
  aes(x = passmark, y = price, color=value, label=name)+
  geom_jitter()+
  geom_text(check_overlap = T, size=3, vjust=-0.5, hjust=0.5, angle=40)

# passmark < 10000
df %>% 
  filter(passmark < 10000) %>% 
  ggplot()+
  aes(x = passmark, y = price, color=value, label=name)+
  geom_jitter()+
  geom_text(check_overlap = T, size=3, vjust=-0.5, hjust=0.5, angle=40)