library(dplyr) install.packages(“dplyr”)
library(dplyr) library(ggplot2) library(tidyverse)
data <- read.csv("~/Documents/statistics(1)/laptop_prices.csv")
sample_size <- nrow(data) * 0.5
#nrow(data): This function returns the number of rows in the original dataset #The sample size is calculated as 50% of the total number of rows
set.seed(123)
df_1 <- data %>% sample_n(sample_size, replace = TRUE)
df_2 <- data %>% sample_n(sample_size, replace = TRUE)
df_3 <- data %>% sample_n(sample_size, replace = TRUE)
df_4 <- data %>% sample_n(sample_size, replace = TRUE)
df_5 <- data %>% sample_n(sample_size, replace = TRUE)
head(df_1)
## Company Product TypeName Inches Ram OS
## 1 Asus ZenBook Flip 2 in 1 Convertible 13.3 8 Windows 10
## 2 Asus Pro P2540UA-AB51 Notebook 15.6 8 Windows 10
## 3 Lenovo V310-15IKB (i5-7200U/8GB/1TB Notebook 15.6 8 Windows 10
## 4 Lenovo ThinkPad L470 Notebook 14.0 8 Windows 7
## 5 Dell Inspiron 5379 2 in 1 Convertible 13.3 8 Windows 10
## 6 MSI GP62M Leopard Gaming 15.6 8 Windows 10
## Weight Price_euros Screen ScreenW ScreenH Touchscreen IPSpanel RetinaDisplay
## 1 1.27 928 Full HD 1920 1080 Yes Yes No
## 2 2.37 749 Full HD 1920 1080 No No No
## 3 1.90 813 Full HD 1920 1080 No No No
## 4 2.02 1340 Full HD 1920 1080 No No No
## 5 1.62 839 Full HD 1920 1080 Yes Yes No
## 6 2.20 1199 Full HD 1920 1080 No No No
## CPU_company CPU_freq CPU_model PrimaryStorage SecondaryStorage
## 1 Intel 2.5 Core i5 7200U 256 0
## 2 Intel 2.5 Core i5 7200U 1024 0
## 3 Intel 2.5 Core i5 7200U 128 1024
## 4 Intel 2.3 Core i5 6200U 256 0
## 5 Intel 1.8 Core i7 8550U 1024 0
## 6 Intel 2.8 Core i7 7700HQ 128 1024
## PrimaryStorageType SecondaryStorageType GPU_company GPU_model
## 1 SSD No Intel HD Graphics 620
## 2 HDD No Intel HD Graphics 620
## 3 SSD HDD AMD R17M-M1-70
## 4 SSD No Intel HD Graphics 520
## 5 HDD No Intel UHD Graphics 620
## 6 SSD HDD Nvidia GeForce GTX 1050
#set.seed(): This function ensures that the random sampling process can be reproduced, you will get the same random samples. #Without setting the seed, the random samples would differ every time you run the script.
summary_1 <- summary(df_1)
summary_2 <- summary(df_2)
summary_3 <- summary(df_3)
summary_4 <- summary(df_4)
summary_5 <- summary(df_5)
print(summary_1)
## Company Product TypeName Inches
## Length:637 Length:637 Length:637 Min. :10.10
## Class :character Class :character Class :character 1st Qu.:14.00
## Mode :character Mode :character Mode :character Median :15.60
## Mean :15.04
## 3rd Qu.:15.60
## Max. :18.40
## Ram OS Weight Price_euros
## Min. : 2.000 Length:637 Min. :0.690 Min. : 199
## 1st Qu.: 4.000 Class :character 1st Qu.:1.500 1st Qu.: 589
## Median : 8.000 Mode :character Median :2.090 Median : 961
## Mean : 8.314 Mean :2.061 Mean :1131
## 3rd Qu.: 8.000 3rd Qu.:2.320 3rd Qu.:1487
## Max. :32.000 Max. :4.600 Max. :5499
## Screen ScreenW ScreenH Touchscreen
## Length:637 Min. :1366 Min. : 768 Length:637
## Class :character 1st Qu.:1920 1st Qu.:1080 Class :character
## Mode :character Median :1920 Median :1080 Mode :character
## Mean :1887 Mean :1068
## 3rd Qu.:1920 3rd Qu.:1080
## Max. :3840 Max. :2160
## IPSpanel RetinaDisplay CPU_company CPU_freq
## Length:637 Length:637 Length:637 Min. :0.900
## Class :character Class :character Class :character 1st Qu.:1.800
## Mode :character Mode :character Mode :character Median :2.500
## Mean :2.283
## 3rd Qu.:2.700
## Max. :3.600
## CPU_model PrimaryStorage SecondaryStorage PrimaryStorageType
## Length:637 Min. : 16.0 Min. : 0.0 Length:637
## Class :character 1st Qu.: 256.0 1st Qu.: 0.0 Class :character
## Mode :character Median : 256.0 Median : 0.0 Mode :character
## Mean : 452.4 Mean : 185.6
## 3rd Qu.: 512.0 3rd Qu.: 0.0
## Max. :2048.0 Max. :2048.0
## SecondaryStorageType GPU_company GPU_model
## Length:637 Length:637 Length:637
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
#Group-by analysis for categorical variables (e.g., ‘Company’) across all subsamples #grouping by ‘Company’ and calculating the mean price and count for each subsample
group_by_company_1 <- df_1 %>% group_by(Company) %>% summarise(Mean_Price = mean(Price_euros, na.rm = TRUE), Count = n())
group_by_company_2 <- df_2 %>% group_by(Company) %>% summarise(Mean_Price = mean(Price_euros, na.rm = TRUE), Count = n())
group_by_company_3 <- df_3 %>% group_by(Company) %>% summarise(Mean_Price = mean(Price_euros, na.rm = TRUE), Count = n())
group_by_company_4 <- df_4 %>% group_by(Company) %>% summarise(Mean_Price = mean(Price_euros, na.rm = TRUE), Count = n())
group_by_company_5 <- df_5 %>% group_by(Company) %>% summarise(Mean_Price = mean(Price_euros, na.rm = TRUE), Count = n())
# Display group by company results for each sample
print("Sample 1 - Grouped by Company")
## [1] "Sample 1 - Grouped by Company"
print(group_by_company_1)
## # A tibble: 17 × 3
## Company Mean_Price Count
## <chr> <dbl> <int>
## 1 Acer 612. 57
## 2 Apple 1654. 7
## 3 Asus 1023. 78
## 4 Chuwi 381. 3
## 5 Dell 1282. 153
## 6 Google 1275 1
## 7 HP 1057. 111
## 8 LG 2299 1
## 9 Lenovo 1001. 143
## 10 MSI 1850. 32
## 11 Mediacom 300. 5
## 12 Microsoft 1682. 9
## 13 Razer 3132. 7
## 14 Samsung 1189 3
## 15 Toshiba 1238. 22
## 16 Vero 203. 1
## 17 Xiaomi 1017. 4
print("Sample 2 - Grouped by Company")
## [1] "Sample 2 - Grouped by Company"
print(group_by_company_2)
## # A tibble: 17 × 3
## Company Mean_Price Count
## <chr> <dbl> <int>
## 1 Acer 562. 40
## 2 Apple 1357. 13
## 3 Asus 1131. 72
## 4 Chuwi 349. 2
## 5 Dell 1250. 155
## 6 Google 1275 1
## 7 HP 1092. 127
## 8 Huawei 1499 1
## 9 Lenovo 1046. 140
## 10 MSI 1707. 32
## 11 Mediacom 309 4
## 12 Microsoft 1461. 4
## 13 Razer 2999 4
## 14 Samsung 1724 4
## 15 Toshiba 1394. 34
## 16 Vero 211. 1
## 17 Xiaomi 1090. 3
print("Sample 3 - Grouped by Company")
## [1] "Sample 3 - Grouped by Company"
print(group_by_company_3)
## # A tibble: 17 × 3
## Company Mean_Price Count
## <chr> <dbl> <int>
## 1 Acer 763. 40
## 2 Apple 1405. 15
## 3 Asus 1178. 66
## 4 Dell 1128. 137
## 5 Fujitsu 739 1
## 6 HP 1104. 161
## 7 Huawei 1499 1
## 8 LG 1899 1
## 9 Lenovo 1057. 145
## 10 MSI 1754. 24
## 11 Mediacom 290. 4
## 12 Microsoft 1089 1
## 13 Razer 1964 2
## 14 Samsung 1082. 6
## 15 Toshiba 1335. 28
## 16 Vero 260 2
## 17 Xiaomi 1045. 3
print("Sample 4 - Grouped by Company")
## [1] "Sample 4 - Grouped by Company"
print(group_by_company_4)
## # A tibble: 18 × 3
## Company Mean_Price Count
## <chr> <dbl> <int>
## 1 Acer 591. 56
## 2 Apple 1778. 9
## 3 Asus 1123. 66
## 4 Chuwi 349. 2
## 5 Dell 1095. 148
## 6 Fujitsu 724 2
## 7 Google 1559 1
## 8 HP 1123. 129
## 9 Huawei 1449 3
## 10 LG 2099 1
## 11 Lenovo 1082. 151
## 12 MSI 1750. 22
## 13 Mediacom 252 2
## 14 Microsoft 1411. 4
## 15 Razer 4999 4
## 16 Samsung 1517. 6
## 17 Toshiba 1376. 27
## 18 Vero 219. 4
print("Sample 5 - Grouped by Company")
## [1] "Sample 5 - Grouped by Company"
print(group_by_company_5)
## # A tibble: 18 × 3
## Company Mean_Price Count
## <chr> <dbl> <int>
## 1 Acer 719. 46
## 2 Apple 1640. 12
## 3 Asus 1049. 77
## 4 Dell 1125. 141
## 5 Fujitsu 649 1
## 6 Google 1737 2
## 7 HP 1119. 131
## 8 Huawei 1499 1
## 9 LG 1899 2
## 10 Lenovo 1073. 149
## 11 MSI 1606. 24
## 12 Mediacom 257 2
## 13 Microsoft 1833. 2
## 14 Razer 3574 4
## 15 Samsung 1567. 6
## 16 Toshiba 1198. 32
## 17 Vero 206. 3
## 18 Xiaomi 1000. 2
#for displaying group by company results for each sample we used print()
#We’ll use a threshold of 2 standard deviations from the mean as an anomaly threshold
threshold <- 2
anomalies_1 <- df_1 %>% filter(Price_euros > mean(Price_euros) + threshold * sd(Price_euros))
anomalies_2 <- df_2 %>% filter(Price_euros > mean(Price_euros) + threshold * sd(Price_euros))
anomalies_3 <- df_3 %>% filter(Price_euros > mean(Price_euros) + threshold * sd(Price_euros))
anomalies_4 <- df_4 %>% filter(Price_euros > mean(Price_euros) + threshold * sd(Price_euros))
anomalies_5 <- df_5 %>% filter(Price_euros > mean(Price_euros) + threshold * sd(Price_euros))
# Display anomalies for each subsample
print("Anomalies in Sample 1")
## [1] "Anomalies in Sample 1"
print(anomalies_1)
## Company Product TypeName Inches Ram OS
## 1 Dell Alienware 17 Gaming 17.3 16 Windows 10
## 2 Dell XPS 15 Notebook 15.6 32 Windows 10
## 3 Dell Alienware 15 Gaming 15.6 16 Windows 10
## 4 HP ZBook 17 Workstation 17.3 8 Windows 7
## 5 Razer Blade Pro Gaming 17.3 32 Windows 10
## 6 Dell Precision 7520 Workstation 15.6 16 Windows 10
## 7 Razer Blade Pro Gaming 14.0 16 Windows 10
## 8 Lenovo Thinkpad P71 Notebook 17.3 8 Windows 10
## 9 Dell Alienware 17 Gaming 17.3 16 Windows 10
## 10 Dell Alienware 17 Gaming 17.3 32 Windows 10
## 11 Dell Alienware 17 Gaming 17.3 32 Windows 10
## 12 Lenovo IdeaPad Y910-17ISK Gaming 17.3 32 Windows 10
## 13 Dell XPS 15 Notebook 15.6 32 Windows 10
## 14 Razer Blade Pro Gaming 14.0 16 Windows 10
## 15 Razer Blade Pro Gaming 17.3 32 Windows 10
## 16 MSI GT80S 6QF-074US Gaming 18.4 32 Windows 10
## 17 Dell XPS 13 Ultrabook 13.3 16 Windows 10
## 18 Dell Alienware 15 Gaming 15.6 16 Windows 10
## 19 Dell Alienware 15 Gaming 15.6 16 Windows 10
## 20 Lenovo Thinkpad Yoga 2 in 1 Convertible 14.0 16 Windows 10
## 21 Razer Blade Pro Gaming 14.0 16 Windows 10
## 22 MSI GT73VR Titan Gaming 17.3 16 Windows 10
## 23 Microsoft Surface Laptop Ultrabook 13.5 16 Windows 10 S
## 24 Dell Alienware 17 Gaming 17.3 16 Windows 10
## 25 Dell Alienware 15 Gaming 15.6 16 Windows 10
## 26 Dell Alienware 15 Gaming 15.6 16 Windows 10
## 27 Dell Alienware 17 Gaming 17.3 32 Windows 10
## 28 Lenovo IdeaPad Y900-17ISK Gaming 17.3 16 Windows 10
## 29 Dell Alienware 17 Gaming 17.3 32 Windows 10
## 30 Dell XPS 15 Notebook 15.6 32 Windows 10
## 31 Lenovo IdeaPad Y900-17ISK Gaming 17.3 32 Windows 10
## Weight Price_euros Screen ScreenW ScreenH Touchscreen IPSpanel
## 1 4.36 3154.00 4K Ultra HD 3840 2160 No Yes
## 2 2.06 2639.00 4K Ultra HD 3840 2160 Yes No
## 3 3.49 2813.75 Full HD 1920 1080 No No
## 4 3.00 3949.40 Full HD 1920 1080 No Yes
## 5 3.49 5499.00 4K Ultra HD 3840 2160 Yes No
## 6 2.80 3055.00 4K Ultra HD 3840 2160 No No
## 7 1.95 2899.00 Full HD 1920 1080 No No
## 8 3.40 2999.00 Full HD 1920 1080 No Yes
## 9 4.36 3154.00 4K Ultra HD 3840 2160 No Yes
## 10 4.36 3659.40 4K Ultra HD 3840 2160 No No
## 11 4.42 3147.37 4K Ultra HD 3840 2160 No Yes
## 12 4.60 2663.00 Full HD 1920 1080 No Yes
## 13 2.06 2639.00 4K Ultra HD 3840 2160 Yes No
## 14 1.95 2599.00 Full HD 1920 1080 No No
## 15 3.49 5499.00 4K Ultra HD 3840 2160 Yes No
## 16 4.40 2799.00 Full HD 1920 1080 No No
## 17 1.20 2680.00 Quad HD+ 3200 1800 Yes No
## 18 3.49 2813.75 Full HD 1920 1080 No No
## 19 3.49 2813.75 Full HD 1920 1080 No No
## 20 1.42 2824.00 Standard 2560 1440 Yes No
## 21 1.95 2599.00 Full HD 1920 1080 No No
## 22 4.14 2729.00 Full HD 1920 1080 No No
## 23 1.25 2589.00 Standard 2256 1504 Yes No
## 24 4.42 3012.77 Full HD 1920 1080 No Yes
## 25 3.21 2774.63 Full HD 1920 1080 No No
## 26 3.21 2774.63 Full HD 1920 1080 No No
## 27 4.42 3072.89 Full HD 1920 1080 No Yes
## 28 4.60 2749.99 Full HD 1920 1080 No Yes
## 29 4.42 3149.00 Full HD 1920 1080 No Yes
## 30 2.06 2639.00 4K Ultra HD 3840 2160 Yes No
## 31 4.60 3240.00 Full HD 1920 1080 No Yes
## RetinaDisplay CPU_company CPU_freq CPU_model PrimaryStorage
## 1 No Intel 2.8 Core i7 7700HQ 512
## 2 No Intel 2.8 Core i7 7700HQ 1024
## 3 No Intel 2.9 Core i7 7820HK 256
## 4 No Intel 2.6 Core i7 6700HQ 256
## 5 No Intel 2.9 Core i7 7820HK 512
## 6 No Intel 3.0 Xeon E3-1505M V6 256
## 7 No Intel 2.8 Core i7 7700HQ 512
## 8 No Intel 2.8 Core i7 7700HQ 256
## 9 No Intel 2.8 Core i7 7700HQ 512
## 10 No Intel 2.8 Core i7 7700HQ 1024
## 11 No Intel 2.8 Core i7 7700HQ 512
## 12 No Intel 2.7 Core i7 6820HK 256
## 13 No Intel 2.8 Core i7 7700HQ 1024
## 14 No Intel 2.8 Core i7 7700HQ 256
## 15 No Intel 2.9 Core i7 7820HK 512
## 16 No Intel 2.9 Core i7 6920HQ 512
## 17 No Intel 2.7 Core i7 7500U 512
## 18 No Intel 2.9 Core i7 7820HK 256
## 19 No Intel 2.9 Core i7 7820HK 256
## 20 No Intel 2.7 Core i7 7500U 1024
## 21 No Intel 2.8 Core i7 7700HQ 256
## 22 No Intel 2.9 Core i7 7820HK 512
## 23 No Intel 2.5 Core i7 7660U 512
## 24 No Intel 2.8 Core i7 7700HQ 128
## 25 No Intel 2.8 Core i7 7700HQ 256
## 26 No Intel 2.8 Core i7 7700HQ 256
## 27 No Intel 2.8 Core i7 7700HQ 512
## 28 No Intel 2.7 Core i7 6820HK 256
## 29 No Intel 2.8 Core i7 7700HQ 256
## 30 No Intel 2.8 Core i7 7700HQ 1024
## 31 No Intel 2.7 Core i7 6820HK 512
## SecondaryStorage PrimaryStorageType SecondaryStorageType GPU_company
## 1 1024 SSD HDD Nvidia
## 2 0 SSD No Nvidia
## 3 1024 SSD HDD Nvidia
## 4 0 SSD No Nvidia
## 5 0 SSD No Nvidia
## 6 1024 SSD HDD Nvidia
## 7 0 SSD No Nvidia
## 8 0 SSD No Nvidia
## 9 1024 SSD HDD Nvidia
## 10 1024 SSD HDD Nvidia
## 11 1024 SSD HDD Nvidia
## 12 1024 SSD HDD Nvidia
## 13 0 SSD No Nvidia
## 14 0 SSD No Nvidia
## 15 0 SSD No Nvidia
## 16 1024 SSD HDD Nvidia
## 17 0 SSD No Intel
## 18 1024 SSD HDD Nvidia
## 19 1024 SSD HDD Nvidia
## 20 0 SSD No Intel
## 21 0 SSD No Nvidia
## 22 1024 SSD HDD Nvidia
## 23 0 SSD No Intel
## 24 1024 SSD HDD Nvidia
## 25 1024 SSD HDD Nvidia
## 26 1024 SSD HDD Nvidia
## 27 1024 SSD HDD Nvidia
## 28 1024 SSD Hybrid Nvidia
## 29 1024 SSD HDD Nvidia
## 30 0 SSD No Nvidia
## 31 1024 SSD Hybrid Nvidia
## GPU_model
## 1 GeForce GTX 1070
## 2 GeForce GTX 1050
## 3 GeForce GTX 1070
## 4 Quadro M3000M
## 5 GeForce GTX 1080
## 6 Quadro M1200
## 7 GeForce GTX 1060
## 8 Quadro M620M
## 9 GeForce GTX 1070
## 10 GeForce GTX 1070
## 11 GeForce GTX 1070
## 12 GeForce GTX 1070
## 13 GeForce GTX 1050
## 14 GeForce GTX 1060
## 15 GeForce GTX 1080
## 16 GTX 980 SLI
## 17 HD Graphics 620
## 18 GeForce GTX 1070
## 19 GeForce GTX 1070
## 20 HD Graphics 620
## 21 GeForce GTX 1060
## 22 GeForce GTX 1070
## 23 Iris Plus Graphics 640
## 24 GeForce GTX 1070
## 25 GeForce GTX 1070
## 26 GeForce GTX 1070
## 27 GeForce GTX 1070
## 28 GeForce GTX 980M
## 29 GeForce GTX 1070
## 30 GeForce GTX 1050
## 31 GeForce GTX 980M
print("Anomalies in Sample 2")
## [1] "Anomalies in Sample 2"
print(anomalies_2)
## Company Product TypeName Inches Ram OS Weight
## 1 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 2 Lenovo IdeaPad Y900-17ISK Gaming 17.3 32 Windows 10 4.60
## 3 Apple MacBook Pro Ultrabook 15.4 16 macOS 1.83
## 4 HP Elitebook Folio Ultrabook 12.5 8 Windows 10 1.09
## 5 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 6 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 7 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 8 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.42
## 9 Lenovo Thinkpad X1 Ultrabook 14.0 16 Windows 10 1.13
## 10 HP Zbook 17 Workstation 17.3 16 Windows 7 3.00
## 11 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 12 HP Zbook 17 Workstation 17.3 16 Windows 7 3.00
## 13 Razer Blade Pro Gaming 17.3 32 Windows 10 3.49
## 14 Dell XPS 13 Ultrabook 13.3 16 Windows 10 1.20
## 15 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.36
## 16 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.36
## 17 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.36
## 18 Toshiba Portege X30-D-10L Ultrabook 13.3 32 Windows 10 1.05
## 19 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.42
## 20 Asus ROG G703VI-E5062T Gaming 17.3 32 Windows 10 4.70
## 21 Asus ROG G701VI Gaming 17.3 32 Windows 10 3.80
## 22 Toshiba Portege X30-D-10L Ultrabook 13.3 32 Windows 10 1.05
## 23 Toshiba Portege X30-D-10L Ultrabook 13.3 32 Windows 10 1.05
## 24 Razer Blade Pro Gaming 14.0 16 Windows 10 1.95
## 25 Lenovo IdeaPad Y900-17ISK Gaming 17.3 32 Windows 10 4.60
## 26 MSI GT73VR Titan Gaming 17.3 16 Windows 10 4.14
## 27 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.42
## 28 Asus ROG G701VI Gaming 17.3 32 Windows 10 3.80
## 29 HP ZBook 17 Workstation 17.3 8 Windows 7 3.00
## Price_euros Screen ScreenW ScreenH Touchscreen IPSpanel RetinaDisplay
## 1 3147.37 4K Ultra HD 3840 2160 No Yes No
## 2 3240.00 Full HD 1920 1080 No Yes No
## 3 2858.00 Standard 2880 1800 No Yes Yes
## 4 3100.00 4K Ultra HD 3840 2160 Yes No No
## 5 3147.37 4K Ultra HD 3840 2160 No Yes No
## 6 3588.80 Full HD 1920 1080 No Yes No
## 7 2800.00 Full HD 1920 1080 No No No
## 8 2799.00 Standard 2560 1440 No Yes No
## 9 2625.00 Standard 2560 1440 No Yes No
## 10 4389.00 Full HD 1920 1080 No Yes No
## 11 3588.80 Full HD 1920 1080 No Yes No
## 12 4389.00 Full HD 1920 1080 No Yes No
## 13 5499.00 4K Ultra HD 3840 2160 Yes No No
## 14 2680.00 Quad HD+ 3200 1800 Yes No No
## 15 3154.00 4K Ultra HD 3840 2160 No Yes No
## 16 3154.00 4K Ultra HD 3840 2160 No Yes No
## 17 3154.00 4K Ultra HD 3840 2160 No Yes No
## 18 2799.00 Full HD 1920 1080 Yes No No
## 19 2699.00 Full HD 1920 1080 No Yes No
## 20 3890.00 Full HD 1920 1080 No No No
## 21 2799.00 Full HD 1920 1080 No Yes No
## 22 2799.00 Full HD 1920 1080 Yes No No
## 23 2799.00 Full HD 1920 1080 Yes No No
## 24 2899.00 Full HD 1920 1080 No No No
## 25 3240.00 Full HD 1920 1080 No Yes No
## 26 2729.00 Full HD 1920 1080 No No No
## 27 2799.00 Standard 2560 1440 No Yes No
## 28 2799.00 Full HD 1920 1080 No Yes No
## 29 3949.40 Full HD 1920 1080 No Yes No
## CPU_company CPU_freq CPU_model PrimaryStorage SecondaryStorage
## 1 Intel 2.8 Core i7 7700HQ 512 1024
## 2 Intel 2.7 Core i7 6820HK 512 1024
## 3 Intel 2.9 Core i7 512 0
## 4 Intel 1.2 Core M 6Y75 240 0
## 5 Intel 2.8 Core i7 7700HQ 512 1024
## 6 Intel 2.8 Core i7 7700HQ 1024 1024
## 7 Intel 2.6 Core i7 6700HQ 256 1024
## 8 Intel 2.9 Core i7 7820HK 256 1024
## 9 Intel 2.7 Core i7 7500U 1024 0
## 10 Intel 2.9 Xeon E3-1535M v5 256 0
## 11 Intel 2.8 Core i7 7700HQ 1024 1024
## 12 Intel 2.9 Xeon E3-1535M v5 256 0
## 13 Intel 2.9 Core i7 7820HK 512 0
## 14 Intel 2.7 Core i7 7500U 512 0
## 15 Intel 2.8 Core i7 7700HQ 512 1024
## 16 Intel 2.8 Core i7 7700HQ 512 1024
## 17 Intel 2.8 Core i7 7700HQ 512 1024
## 18 Intel 2.7 Core i7 7500U 512 0
## 19 Intel 2.8 Core i7 7700HQ 256 1024
## 20 Intel 2.9 Core i7 7820HK 512 1024
## 21 Intel 2.7 Core i7 6820HK 512 0
## 22 Intel 2.7 Core i7 7500U 512 0
## 23 Intel 2.7 Core i7 7500U 512 0
## 24 Intel 2.8 Core i7 7700HQ 512 0
## 25 Intel 2.7 Core i7 6820HK 512 1024
## 26 Intel 2.9 Core i7 7820HK 512 1024
## 27 Intel 2.9 Core i7 7820HK 256 1024
## 28 Intel 2.7 Core i7 6820HK 512 0
## 29 Intel 2.6 Core i7 6700HQ 256 0
## PrimaryStorageType SecondaryStorageType GPU_company GPU_model
## 1 SSD HDD Nvidia GeForce GTX 1070
## 2 SSD Hybrid Nvidia GeForce GTX 980M
## 3 SSD No AMD Radeon Pro 560
## 4 SSD No Intel HD Graphics 515
## 5 SSD HDD Nvidia GeForce GTX 1070
## 6 SSD HDD Nvidia GeForce GTX 1070M
## 7 SSD HDD Nvidia GeForce GTX 1070
## 8 SSD HDD Nvidia GeForce GTX 1070
## 9 SSD No Intel HD Graphics 620
## 10 SSD No Nvidia Quadro M2000M
## 11 SSD HDD Nvidia GeForce GTX 1070M
## 12 SSD No Nvidia Quadro M2000M
## 13 SSD No Nvidia GeForce GTX 1080
## 14 SSD No Intel HD Graphics 620
## 15 SSD HDD Nvidia GeForce GTX 1070
## 16 SSD HDD Nvidia GeForce GTX 1070
## 17 SSD HDD Nvidia GeForce GTX 1070
## 18 SSD No Intel HD Graphics 620
## 19 SSD HDD Nvidia GeForce GTX 1070
## 20 SSD HDD Nvidia GeForce GTX 1080
## 21 SSD No Nvidia GeForce GTX 1080
## 22 SSD No Intel HD Graphics 620
## 23 SSD No Intel HD Graphics 620
## 24 SSD No Nvidia GeForce GTX 1060
## 25 SSD Hybrid Nvidia GeForce GTX 980M
## 26 SSD HDD Nvidia GeForce GTX 1070
## 27 SSD HDD Nvidia GeForce GTX 1070
## 28 SSD No Nvidia GeForce GTX 1080
## 29 SSD No Nvidia Quadro M3000M
print("Anomalies in Sample 3")
## [1] "Anomalies in Sample 3"
print(anomalies_3)
## Company Product TypeName Inches Ram OS Weight
## 1 Asus Rog GL702VS-BA023T Gaming 17.3 16 Windows 10 2.90
## 2 HP Spectre x360 2 in 1 Convertible 13.3 16 Windows 10 1.29
## 3 Lenovo Thinkpad P51s Workstation 15.6 16 Windows 10 2.50
## 4 Toshiba Portege X30-D-10L Ultrabook 13.3 32 Windows 10 1.05
## 5 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 6 Asus ROG G701VI Gaming 17.3 32 Windows 10 3.80
## 7 Razer Blade Pro Gaming 14.0 16 Windows 10 1.95
## 8 Dell XPS 15 Notebook 15.6 16 Windows 10 2.06
## 9 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.36
## 10 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 11 MSI GT73EVR 7RE Gaming 17.3 16 Windows 10 4.14
## 12 Asus ROG G703VI-E5062T Gaming 17.3 32 Windows 10 4.70
## 13 Lenovo Thinkpad X1 Ultrabook 14.0 8 Windows 10 1.13
## 14 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.42
## 15 MSI GT73VR Titan Gaming 17.3 16 Windows 10 4.14
## 16 MSI GT80S 6QF-074US Gaming 18.4 32 Windows 10 4.40
## 17 HP Spectre x360 2 in 1 Convertible 13.3 16 Windows 10 1.29
## 18 Acer Predator G9-793 Gaming 17.3 16 Windows 10 4.20
## 19 HP Spectre x360 2 in 1 Convertible 13.3 16 Windows 10 1.29
## 20 Asus ROG G701VI Gaming 17.3 32 Windows 10 3.80
## 21 Lenovo ThinkPad X1 2 in 1 Convertible 14.0 16 Windows 10 2.80
## 22 MSI GS73VR 7RG Gaming 17.3 16 Windows 10 2.43
## 23 Dell XPS 15 Notebook 15.6 16 Windows 10 2.06
## Price_euros Screen ScreenW ScreenH Touchscreen IPSpanel RetinaDisplay
## 1 2419.00 Full HD 1920 1080 No No No
## 2 2449.00 4K Ultra HD 3840 2160 Yes Yes No
## 3 3299.00 4K Ultra HD 3840 2160 No Yes No
## 4 2799.00 Full HD 1920 1080 Yes No No
## 5 3072.89 Full HD 1920 1080 No Yes No
## 6 2799.00 Full HD 1920 1080 No Yes No
## 7 2899.00 Full HD 1920 1080 No No No
## 8 2399.00 4K Ultra HD 3840 2160 Yes No No
## 9 3154.00 4K Ultra HD 3840 2160 No Yes No
## 10 3149.00 Full HD 1920 1080 No Yes No
## 11 2499.00 Full HD 1920 1080 No No No
## 12 3890.00 Full HD 1920 1080 No No No
## 13 2499.00 Full HD 1920 1080 No Yes No
## 14 2799.00 Standard 2560 1440 No Yes No
## 15 2729.00 Full HD 1920 1080 No No No
## 16 2799.00 Full HD 1920 1080 No No No
## 17 2449.00 4K Ultra HD 3840 2160 Yes Yes No
## 18 2599.00 Full HD 1920 1080 No Yes No
## 19 2449.00 4K Ultra HD 3840 2160 Yes Yes No
## 20 2799.00 Full HD 1920 1080 No Yes No
## 21 2440.00 Standard 2560 1440 Yes Yes No
## 22 2449.00 Full HD 1920 1080 No No No
## 23 2397.00 4K Ultra HD 3840 2160 Yes No No
## CPU_company CPU_freq CPU_model PrimaryStorage SecondaryStorage
## 1 Intel 2.8 Core i7 7700HQ 256 1024
## 2 Intel 1.8 Core i7 8550U 1024 0
## 3 Intel 2.8 Core i7 7600U 1024 0
## 4 Intel 2.7 Core i7 7500U 512 0
## 5 Intel 2.8 Core i7 7700HQ 512 1024
## 6 Intel 2.7 Core i7 6820HK 512 0
## 7 Intel 2.8 Core i7 7700HQ 512 0
## 8 Intel 2.8 Core i7 7700HQ 1024 0
## 9 Intel 2.8 Core i7 7700HQ 512 1024
## 10 Intel 2.8 Core i7 7700HQ 256 1024
## 11 Intel 2.8 Core i7 7700HQ 512 1024
## 12 Intel 2.9 Core i7 7820HK 512 1024
## 13 Intel 2.7 Core i7 7500U 512 0
## 14 Intel 2.9 Core i7 7820HK 256 1024
## 15 Intel 2.9 Core i7 7820HK 512 1024
## 16 Intel 2.9 Core i7 6920HQ 512 1024
## 17 Intel 1.8 Core i7 8550U 1024 0
## 18 Intel 2.8 Core i7 7700HQ 256 1024
## 19 Intel 1.8 Core i7 8550U 1024 0
## 20 Intel 2.7 Core i7 6820HK 512 0
## 21 Intel 2.6 Core i7 6600U 512 0
## 22 Intel 2.8 Core i7 7700HQ 256 2048
## 23 Intel 2.8 Core i7 7700HQ 512 0
## PrimaryStorageType SecondaryStorageType GPU_company GPU_model
## 1 SSD HDD Nvidia GeForce GTX 1070
## 2 SSD No Intel UHD Graphics 620
## 3 SSD No Nvidia Quadro M520M
## 4 SSD No Intel HD Graphics 620
## 5 SSD HDD Nvidia GeForce GTX 1070
## 6 SSD No Nvidia GeForce GTX 1080
## 7 SSD No Nvidia GeForce GTX 1060
## 8 SSD No Nvidia GeForce GTX 1050
## 9 SSD HDD Nvidia GeForce GTX 1070
## 10 SSD HDD Nvidia GeForce GTX 1070
## 11 SSD HDD Nvidia GeForce GTX 1070
## 12 SSD HDD Nvidia GeForce GTX 1080
## 13 SSD No Intel HD Graphics 620
## 14 SSD HDD Nvidia GeForce GTX 1070
## 15 SSD HDD Nvidia GeForce GTX 1070
## 16 SSD HDD Nvidia GTX 980 SLI
## 17 SSD No Intel UHD Graphics 620
## 18 SSD HDD Nvidia GeForce GTX 1070
## 19 SSD No Intel UHD Graphics 620
## 20 SSD No Nvidia GeForce GTX 1080
## 21 SSD No Intel HD Graphics 520
## 22 SSD HDD Nvidia GeForce GTX 1070
## 23 SSD No Nvidia GeForce GTX 1050
print("Anomalies in Sample 4")
## [1] "Anomalies in Sample 4"
print(anomalies_4)
## Company Product TypeName Inches Ram OS Weight Price_euros
## 1 HP Zbook 17 Workstation 17.3 16 Windows 7 3.00 4389.00
## 2 HP Zbook 17 Workstation 17.3 16 Windows 7 3.00 4389.00
## 3 Lenovo ThinkPad X1 Notebook 14.0 16 Windows 7 1.10 2620.00
## 4 Apple MacBook Pro Ultrabook 15.4 16 macOS 1.83 2858.00
## 5 HP Zbook 17 Workstation 17.3 16 Windows 7 3.00 4389.00
## 6 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.42 2699.00
## 7 Dell XPS 15 Notebook 15.6 32 Windows 10 2.06 2639.00
## 8 Razer Blade Pro Gaming 17.3 32 Windows 10 3.49 5499.00
## 9 Razer Blade Pro Gaming 17.3 32 Windows 10 3.49 6099.00
## 10 Dell XPS 13 Ultrabook 13.3 16 Windows 10 1.20 2680.00
## 11 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42 3072.89
## 12 Asus ROG Zephyrus Gaming 15.6 24 Windows 10 2.24 2968.00
## 13 HP ZBook 17 Workstation 17.3 8 Windows 7 3.00 2899.00
## 14 Razer Blade Pro Gaming 17.3 32 Windows 10 3.49 5499.00
## 15 HP Elitebook Folio Ultrabook 12.5 8 Windows 10 1.09 3100.00
## 16 Lenovo ThinkPad P70 Notebook 17.3 16 Windows 7 2.40 2968.00
## 17 Razer Blade Pro Gaming 14.0 16 Windows 10 1.95 2899.00
## 18 Lenovo ThinkPad P70 Notebook 17.3 16 Windows 7 2.40 2968.00
## 19 MSI GT80S 6QF-074US Gaming 18.4 32 Windows 10 4.40 2799.00
## Screen ScreenW ScreenH Touchscreen IPSpanel RetinaDisplay CPU_company
## 1 Full HD 1920 1080 No Yes No Intel
## 2 Full HD 1920 1080 No Yes No Intel
## 3 Standard 2560 1440 No Yes No Intel
## 4 Standard 2880 1800 No Yes Yes Intel
## 5 Full HD 1920 1080 No Yes No Intel
## 6 Full HD 1920 1080 No Yes No Intel
## 7 4K Ultra HD 3840 2160 Yes No No Intel
## 8 4K Ultra HD 3840 2160 Yes No No Intel
## 9 4K Ultra HD 3840 2160 Yes No No Intel
## 10 Quad HD+ 3200 1800 Yes No No Intel
## 11 Full HD 1920 1080 No Yes No Intel
## 12 Full HD 1920 1080 No No No Intel
## 13 Full HD 1920 1080 No Yes No Intel
## 14 4K Ultra HD 3840 2160 Yes No No Intel
## 15 4K Ultra HD 3840 2160 Yes No No Intel
## 16 4K Ultra HD 3840 2160 No Yes No Intel
## 17 Full HD 1920 1080 No No No Intel
## 18 4K Ultra HD 3840 2160 No Yes No Intel
## 19 Full HD 1920 1080 No No No Intel
## CPU_freq CPU_model PrimaryStorage SecondaryStorage PrimaryStorageType
## 1 2.9 Xeon E3-1535M v5 256 0 SSD
## 2 2.9 Xeon E3-1535M v5 256 0 SSD
## 3 2.6 Core i7 6600U 256 0 SSD
## 4 2.9 Core i7 512 0 SSD
## 5 2.9 Xeon E3-1535M v5 256 0 SSD
## 6 2.8 Core i7 7700HQ 256 1024 SSD
## 7 2.8 Core i7 7700HQ 1024 0 SSD
## 8 2.9 Core i7 7820HK 512 0 SSD
## 9 2.9 Core i7 7820HK 1024 0 SSD
## 10 2.7 Core i7 7500U 512 0 SSD
## 11 2.8 Core i7 7700HQ 512 1024 SSD
## 12 2.8 Core i7 7700HQ 512 0 SSD
## 13 2.6 Core i7 6700HQ 1024 0 HDD
## 14 2.9 Core i7 7820HK 512 0 SSD
## 15 1.2 Core M 6Y75 240 0 SSD
## 16 2.7 Core i7 6820HQ 512 0 SSD
## 17 2.8 Core i7 7700HQ 512 0 SSD
## 18 2.7 Core i7 6820HQ 512 0 SSD
## 19 2.9 Core i7 6920HQ 512 1024 SSD
## SecondaryStorageType GPU_company GPU_model
## 1 No Nvidia Quadro M2000M
## 2 No Nvidia Quadro M2000M
## 3 No Intel HD Graphics 520
## 4 No AMD Radeon Pro 560
## 5 No Nvidia Quadro M2000M
## 6 HDD Nvidia GeForce GTX 1070
## 7 No Nvidia GeForce GTX 1050
## 8 No Nvidia GeForce GTX 1080
## 9 No Nvidia GeForce GTX 1080
## 10 No Intel HD Graphics 620
## 11 HDD Nvidia GeForce GTX 1070
## 12 No Nvidia GeForce GTX1080
## 13 No AMD FirePro W6150M
## 14 No Nvidia GeForce GTX 1080
## 15 No Intel HD Graphics 515
## 16 No Nvidia Quadro 3000M
## 17 No Nvidia GeForce GTX 1060
## 18 No Nvidia Quadro 3000M
## 19 HDD Nvidia GTX 980 SLI
print("Anomalies in Sample 5")
## [1] "Anomalies in Sample 5"
print(anomalies_5)
## Company Product TypeName Inches Ram OS Weight
## 1 Lenovo Thinkpad X1 Ultrabook 14.0 16 Windows 10 1.13
## 2 Razer Blade Pro Gaming 14.0 16 Windows 10 1.95
## 3 Dell Alienware 17 Gaming 17.3 16 Windows 10 4.36
## 4 Apple MacBook Pro Ultrabook 15.4 16 macOS 1.83
## 5 Acer Predator G9-793 Gaming 17.3 16 Windows 10 4.20
## 6 Lenovo ThinkPad X1 2 in 1 Convertible 14.0 8 Windows 10 1.42
## 7 Apple MacBook Pro Ultrabook 15.4 16 macOS 1.83
## 8 Dell Alienware 15 Gaming 15.6 16 Windows 10 3.49
## 9 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 10 Lenovo Thinkpad P51s Workstation 15.6 16 Windows 10 2.50
## 11 HP Elitebook Folio Ultrabook 12.5 8 Windows 10 1.09
## 12 Lenovo Thinkpad Yoga 2 in 1 Convertible 14.0 16 Windows 10 1.42
## 13 Lenovo Thinkpad X1 Ultrabook 14.0 16 Windows 10 1.13
## 14 Razer Blade Pro Gaming 17.3 32 Windows 10 3.49
## 15 Asus ROG G703VI-E5062T Gaming 17.3 32 Windows 10 4.70
## 16 Razer Blade Pro Gaming 14.0 16 Windows 10 1.95
## 17 Asus ROG G703VI-E5062T Gaming 17.3 32 Windows 10 4.70
## 18 HP ZBook 17 Workstation 17.3 8 Windows 7 3.00
## 19 Lenovo IdeaPad Y910-17ISK Gaming 17.3 32 Windows 10 4.60
## 20 Lenovo IdeaPad Y900-17ISK Gaming 17.3 16 Windows 10 4.60
## 21 Lenovo IdeaPad Y900-17ISK Gaming 17.3 16 Windows 10 4.60
## 22 HP EliteBook x360 2 in 1 Convertible 13.3 8 Windows 10 1.28
## 23 Dell Alienware 17 Gaming 17.3 32 Windows 10 4.42
## 24 Dell Alienware 17 Gaming 15.6 16 Windows 10 4.42
## 25 Dell Alienware 15 Gaming 15.6 16 Windows 10 3.49
## 26 HP Elitebook Folio Ultrabook 12.5 8 Windows 10 1.09
## 27 HP ZBook 17 Workstation 17.3 8 Windows 7 3.00
## 28 Lenovo IdeaPad Y910-17ISK Gaming 17.3 32 Windows 10 4.60
## Price_euros Screen ScreenW ScreenH Touchscreen IPSpanel RetinaDisplay
## 1 2625.00 Standard 2560 1440 No Yes No
## 2 3499.00 Full HD 1920 1080 No No No
## 3 3154.00 4K Ultra HD 3840 2160 No Yes No
## 4 2537.45 Standard 2880 1800 No Yes Yes
## 5 2599.00 Full HD 1920 1080 No Yes No
## 6 2590.00 Standard 2560 1440 Yes No No
## 7 2858.00 Standard 2880 1800 No Yes Yes
## 8 2813.75 Full HD 1920 1080 No No No
## 9 3072.89 Full HD 1920 1080 No Yes No
## 10 3299.00 4K Ultra HD 3840 2160 No Yes No
## 11 3100.00 4K Ultra HD 3840 2160 Yes No No
## 12 2824.00 Standard 2560 1440 Yes No No
## 13 2625.00 Standard 2560 1440 No Yes No
## 14 6099.00 4K Ultra HD 3840 2160 Yes No No
## 15 3890.00 Full HD 1920 1080 No No No
## 16 2899.00 Full HD 1920 1080 No No No
## 17 3890.00 Full HD 1920 1080 No No No
## 18 3949.40 Full HD 1920 1080 No Yes No
## 19 2663.00 Full HD 1920 1080 No Yes No
## 20 2749.99 Full HD 1920 1080 No Yes No
## 21 2749.99 Full HD 1920 1080 No Yes No
## 22 2559.00 Full HD 1920 1080 Yes No No
## 23 3147.37 4K Ultra HD 3840 2160 No Yes No
## 24 2868.99 4K Ultra HD 3840 2160 No Yes No
## 25 2813.75 Full HD 1920 1080 No No No
## 26 3100.00 4K Ultra HD 3840 2160 Yes No No
## 27 2899.00 Full HD 1920 1080 No Yes No
## 28 2663.00 Full HD 1920 1080 No Yes No
## CPU_company CPU_freq CPU_model PrimaryStorage SecondaryStorage
## 1 Intel 2.7 Core i7 7500U 1024 0
## 2 Intel 2.8 Core i7 7700HQ 1024 0
## 3 Intel 2.8 Core i7 7700HQ 512 1024
## 4 Intel 2.7 Core i7 512 0
## 5 Intel 2.8 Core i7 7700HQ 256 1024
## 6 Intel 2.7 Core i7 7500U 256 0
## 7 Intel 2.9 Core i7 512 0
## 8 Intel 2.9 Core i7 7820HK 256 1024
## 9 Intel 2.8 Core i7 7700HQ 512 1024
## 10 Intel 2.8 Core i7 7600U 1024 0
## 11 Intel 1.2 Core M 6Y75 240 0
## 12 Intel 2.7 Core i7 7500U 1024 0
## 13 Intel 2.7 Core i7 7500U 1024 0
## 14 Intel 2.9 Core i7 7820HK 1024 0
## 15 Intel 2.9 Core i7 7820HK 512 1024
## 16 Intel 2.8 Core i7 7700HQ 512 0
## 17 Intel 2.9 Core i7 7820HK 512 1024
## 18 Intel 2.6 Core i7 6700HQ 256 0
## 19 Intel 2.7 Core i7 6820HK 256 1024
## 20 Intel 2.7 Core i7 6820HK 256 1024
## 21 Intel 2.7 Core i7 6820HK 256 1024
## 22 Intel 2.8 Core i7 7600U 256 0
## 23 Intel 2.8 Core i7 7700HQ 512 1024
## 24 Intel 2.8 Core i7 7700HQ 256 1024
## 25 Intel 2.9 Core i7 7820HK 256 1024
## 26 Intel 1.2 Core M 6Y75 240 0
## 27 Intel 2.6 Core i7 6700HQ 1024 0
## 28 Intel 2.7 Core i7 6820HK 256 1024
## PrimaryStorageType SecondaryStorageType GPU_company GPU_model
## 1 SSD No Intel HD Graphics 620
## 2 SSD No Nvidia GeForce GTX 1060
## 3 SSD HDD Nvidia GeForce GTX 1070
## 4 SSD No AMD Radeon Pro 455
## 5 SSD HDD Nvidia GeForce GTX 1070
## 6 SSD No Intel HD Graphics 620
## 7 SSD No AMD Radeon Pro 560
## 8 SSD HDD Nvidia GeForce GTX 1070
## 9 SSD HDD Nvidia GeForce GTX 1070
## 10 SSD No Nvidia Quadro M520M
## 11 SSD No Intel HD Graphics 515
## 12 SSD No Intel HD Graphics 620
## 13 SSD No Intel HD Graphics 620
## 14 SSD No Nvidia GeForce GTX 1080
## 15 SSD HDD Nvidia GeForce GTX 1080
## 16 SSD No Nvidia GeForce GTX 1060
## 17 SSD HDD Nvidia GeForce GTX 1080
## 18 SSD No Nvidia Quadro M3000M
## 19 SSD HDD Nvidia GeForce GTX 1070
## 20 SSD Hybrid Nvidia GeForce GTX 980M
## 21 SSD Hybrid Nvidia GeForce GTX 980M
## 22 SSD No Intel HD Graphics 620
## 23 SSD HDD Nvidia GeForce GTX 1070
## 24 SSD HDD Nvidia GeForce GTX 1070
## 25 SSD HDD Nvidia GeForce GTX 1070
## 26 SSD No Intel HD Graphics 515
## 27 HDD No AMD FirePro W6150M
## 28 SSD HDD Nvidia GeForce GTX 1070
#An anomaly in one sub-sample that wouldn’t be considered an anomaly in another could be referred to as a contextual anomaly. #In the presence of anomalies (like extremely high or low prices), the simulation may show that the average price in one subsample is skewed compared to others. #An anomaly in one subsample might not be an anomaly in another due to differences in composition or sample size.
# libraries
library(ggplot2)
library(dplyr)
# Each subsample has the Sample column before combining them
df_1$Sample <- 'Sample 1'
df_2$Sample <- 'Sample 2'
df_3$Sample <- 'Sample 3'
df_4$Sample <- 'Sample 4'
df_5$Sample <- 'Sample 5'
# Combine all samples into one data frame
combined_df <- bind_rows(df_1, df_2, df_3, df_4, df_5)
# Combine anomalies
anomalies_combined <- bind_rows(anomalies_1, anomalies_2, anomalies_3, anomalies_4, anomalies_5)
# column to flag anomalies
combined_df$Is_Anomaly <- ifelse(combined_df$Price_euros %in% anomalies_combined$Price_euros, "Anomaly", "Normal")
# box plot
ggplot(combined_df, aes(x = Company, y = Price_euros, fill = Is_Anomaly)) +
geom_boxplot(outlier.shape = NA) + # Boxplot without plotting default outliers
geom_jitter(aes(color = Is_Anomaly), width = 0.2, size = 2) + # Highlight anomalies
facet_wrap(~ Sample) + # Separate by sample (now fixed)
scale_fill_manual(values = c("Normal" = "lightblue", "Anomaly" = "red")) +
scale_color_manual(values = c("Normal" = "black", "Anomaly" = "red")) +
coord_flip() + # Flip coordinates for better label readability
theme(axis.text.y = element_text(size = 8)) + # Reduce text size for company labels
labs(title = "Boxplot of Price by Company with Anomalies Highlighted",
x = "Company",
y = "Price (Euros)") +
theme_minimal()
#Anomalies from all subsamples are combined into one data frame with a Sample column to distinguish them.
#Box Plot: Each company’s price distribution is displayed as a box plot, grouped by sample.
#Anomalies: Anomalies are overlaid as red jittered points on the box plots, making them easy to identify.
#Facet Plot: Each subsample is visualized in separate facets to compare anomalies across subsamples.
#Anomalies from all subsamples are combined into one data frame with a Sample column to distinguish them.
#simulate drawing random samples 1000 times from each subsample and calculate the mean of ‘Price_euros’
monte_carlo_simulation <- function(data, num_simulations = 1000, sample_size) {
mean_prices <- numeric(num_simulations)
for (i in 1:num_simulations) {
sample <- data %>% sample_n(sample_size, replace = TRUE)
mean_prices[i] <- mean(sample$Price_euros, na.rm = TRUE)
}
return(mean_prices)
}
# Run Monte Carlo simulations for each sample
set.seed(123)
sample_size <- floor(nrow(df_1) * 0.5)
mc_results_1 <- monte_carlo_simulation(df_1, sample_size = sample_size)
mc_results_2 <- monte_carlo_simulation(df_2, sample_size = sample_size)
mc_results_3 <- monte_carlo_simulation(df_3, sample_size = sample_size)
mc_results_4 <- monte_carlo_simulation(df_4, sample_size = sample_size)
mc_results_5 <- monte_carlo_simulation(df_5, sample_size = sample_size)
# Combine results into a single data frame
mc_data <- data.frame(
Sample = rep(c("Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5"), each = 1000),
Price = c(mc_results_1, mc_results_2, mc_results_3, mc_results_4, mc_results_5)
)
# Plot the Monte Carlo results
ggplot(mc_data, aes(x = Price, fill = Sample)) +
geom_density(alpha = 0.5) +
labs(title = "Monte Carlo Simulation - Price Distribution Across Samples",
x = "Price (Euros)", y = "Density") +
theme_minimal()
#by Monte Carlo Simulation we can understand the variability of the price distributions across subsamples. #The results are plotted using ggplot2 to visualize the distribution of prices across all samples. #Sample 2 has price from 1050 to 1290. But Sample 3 has highest density remaining all samples have least difference of density.
mc_stats <- data.frame(
Sample = c("Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5"),
Mean = c(mean(mc_results_1), mean(mc_results_2), mean(mc_results_3), mean(mc_results_4), mean(mc_results_5)),
SD = c(sd(mc_results_1), sd(mc_results_2), sd(mc_results_3), sd(mc_results_4), sd(mc_results_5))
)
# Print the summary of Monte Carlo results
print(mc_stats)
## Sample Mean SD
## 1 Sample 1 1128.677 40.85229
## 2 Sample 2 1154.147 39.62451
## 3 Sample 3 1121.614 35.65404
## 4 Sample 4 1122.744 41.53441
## 5 Sample 5 1121.749 39.10562
#We summarize the Monte Carlo simulation by calculating the mean and standard deviation of the price distribution for each subsample. #This gives an idea of how similar or different the subsamples are in terms of average price. #The sub samples have no larger differences, they have smaller differences between them. #sample 3 and sample 5 have lesser difference in mean, they have no anomalies, lesser mean difference is negligible #Sample 2 has higher mean compared to other samples. #This difference could be considered as anomaly #There are consistent aspects across all subsamples. #They have less mean differences between the samples except ssmple 2 which is not quite higher difference. #Standard deviation is also have lesser differences.