This document is an analysis of price, performance, and value using passmark.com’s gpu benchmark data.(https://www.videocardbenchmark.net/gpu_list.php)
However, if there are missing values in the price, the price cannot be obtained, so all are removed.
library(dplyr)
library(httr)
library(rvest)
res <- GET(url = 'https://www.videocardbenchmark.net/gpu_list.php')
df <- res %>%
read_html(encoding = 'utf-8') %>%
html_node(css = "#cputable") %>%
html_table(trim = T, fill = T)
df %>% str()
'data.frame': 2091 obs. of 5 variables:
$ Videocard Name : chr "128 DDR Radeon 9700 TX w/TV-Out" "128 DDR Radeon 9800 Pro" "128MB DDR Radeon 9800 Pro" "128MB RADEON X600 SE" ...
$ Passmark G3D Mark(higher is better): int 44 62 66 49 8214 37 67 826 5 2 ...
$ Rank(lower is better) : int 1788 1729 1717 1772 106 1813 1712 795 1970 2069 ...
$ Videocard Value(higher is better) : num NA NA NA NA NA NA NA NA NA NA ...
$ Price(USD) : chr NA NA NA NA ...
colnames(df)<-c("name","passmark","rank","value","price")
df %>% sapply(is.na) %>% colSums() # verify missing value
name passmark rank value price
0 0 0 1633 1633
df %>% dplyr::filter(!is.na(price)) -> df # remove rows of missing value of in df$price
df %>% sapply(is.na) %>% colSums() # verify missing value
name passmark rank value price
0 0 0 0 0
library(stringr)
df$price <- df$price %>%
str_replace_all("\\$|\\*|\\,","") %>%
as.numeric() # remove string and convert with numeric at df$price
# Correlation test with passmark, value and price
df[,c(2,4,5)] %>% cor(use = "everything", method = c("pearson", "kendall", "spearman"))
passmark value price
passmark 1.0000000 0.5659466 0.3956899
value 0.5659466 1.0000000 -0.1485808
price 0.3956899 -0.1485808 1.0000000
cor.test(df$passmark, df$price, method = "pearson")
Pearson's product-moment correlation
data: df$passmark and df$price
t = 9.2005, df = 456, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3155019 0.4702668
sample estimates:
cor
0.3956899
cor.test(df$value, df$price, method = "pearson")
Pearson's product-moment correlation
data: df$value and df$price
t = -3.2084, df = 456, p-value = 0.001429
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.23698134 -0.05774004
sample estimates:
cor
-0.1485808
# data exploration with graph
library(ggplot2)
# passmark > 10000
df %>%
filter(passmark > 10000) %>%
ggplot()+
aes(x = passmark, y = price, color=value, label=name)+
geom_jitter()+
geom_text(check_overlap = T, size=3, vjust=-0.5, hjust=0.5, angle=40)
# passmark < 10000
df %>%
filter(passmark < 10000) %>%
ggplot()+
aes(x = passmark, y = price, color=value, label=name)+
geom_jitter()+
geom_text(check_overlap = T, size=3, vjust=-0.5, hjust=0.5, angle=40)