IBM-Summit
We had discussed HPC race previously. See: https://rpubs.com/alex-lev/696179, https://rpubs.com/alex-lev/694840, https://rpubs.com/alex-lev/693131, https://rpubs.com/alex-lev/553777
Data for November 2020 can be downloaded here: https://www.top500.org/lists/top500/2020/11/
Note: the original names of variables were changed (truncated, concatenated and simplified) for the purpose of data exploration and visualization as much as possible.
library(MASS)
library(klaR)
library(tidyverse)
library(readxl)
library(DT)
library(knitr)
TOP500_202011 <- read_excel("top500/TOP500_202011.xlsx")
names(TOP500_202011)
## [1] "Rank" "PreviousRank"
## [3] "FirstAppearance" "FirstRank"
## [5] "Name" "Computer"
## [7] "Site" "Manufacturer"
## [9] "Country" "Year"
## [11] "Segment" "TotalCores"
## [13] "AcceleratorCoProcessorCores" "Rmax"
## [15] "Rpeak" "Nmax"
## [17] "Nhalf" "HPCG"
## [19] "Power" "PowerSource"
## [21] "PowerEfficiency" "Architecture"
## [23] "Processor" "ProcessorTechnology"
## [25] "ProcessorSpeed" "OperatingSystem"
## [27] "OSFamily" "AcceleratorCoProcessor"
## [29] "CoresperSocket" "ProcessorGeneration"
## [31] "SystemModel" "SystemFamily"
## [33] "InterconnectFamily" "Interconnect"
## [35] "Continent" "SiteID"
## [37] "SystemID"
We want to classify HPC mainframes of China and USA, applying Linear Discriminant Analysis (LDA).
TOP500_USCH <- as_tibble(TOP500_202011) %>% filter(Country %in% c("United States","China")) %>% select(Country, Year, TotalCores, Rmax, Rpeak, Nmax, Power, PowerEfficiency, ProcessorSpeed, CoresperSocket) %>%mutate (Country=as.factor(Country)) %>% drop_na()
TOP500_USCH %>% datatable() #kable(align = "r",caption = "TOP500: China - USA")
This dataset above contains 98 individuals (mainframes) and 10 variables.
set.seed(12345)
MisClassError <- c()
for (i in 1:3000) {
train <- sample_frac(TOP500_USCH,0.5,replace = F)
test <-TOP500_USCH %>% setdiff(train)
fit.lda<-lda(Country~., data = train)
prd.lda<-predict(fit.lda,test)
MisClassError[i] <-sum(ifelse(prd.lda$class!=test$Country,1,0))/length(test$Country)
}
summary(1-MisClassError)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5652 0.7826 0.8125 0.8147 0.8511 0.9792
quantile(1-MisClassError,c(0.1,0.5,0.9))
## 10% 50% 90%
## 0.7446809 0.8125000 0.8775510
print(paste('Mean Classifucation Accuracy =',round(mean(1-MisClassError),3)))
## [1] "Mean Classifucation Accuracy = 0.815"
par(mfrow=c(1,1))
hist(1-MisClassError,breaks = 10,col="blue",main = "True Classification Proportion",
xlab = "Proportion")
Here we generate classification charts by specifications (variables), applied in lda model above.
fit.lda<-lda(Country~., data = train)
fit.lda
## Call:
## lda(Country ~ ., data = train)
##
## Prior probabilities of groups:
## China United States
## 0.6122449 0.3877551
##
## Group means:
## Year TotalCores Rmax Rpeak Nmax Power
## China 2018.333 86191.87 1845.025 5556.772 4152753 839.5687
## United States 2017.579 290629.68 15259.898 21096.322 5710221 2834.3679
## PowerEfficiency ProcessorSpeed CoresperSocket
## China 3.620117 2344.333 14.66667
## United States 5.570498 2387.895 28.00000
##
## Coefficients of linear discriminants:
## LD1
## Year -2.309067e-01
## TotalCores 6.023370e-07
## Rmax 8.044349e-05
## Rpeak -7.267139e-05
## Nmax 4.807595e-08
## Power 1.736957e-04
## PowerEfficiency 1.190417e-03
## ProcessorSpeed 1.501589e-03
## CoresperSocket 8.499059e-02
plot(fit.lda)
prd.lda<-predict(fit.lda,test)
cbind(prd.lda,test) %>% kable(align = "r",caption = "TOP500: China - USA")
| class | posterior.China | posterior.United States | LD1 | Country | Year | TotalCores | Rmax | Rpeak | Nmax | Power | PowerEfficiency | ProcessorSpeed | CoresperSocket |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| United States | 0.0325934 | 0.9674066 | 2.2677565 | United States | 2018 | 1572480 | 94640.00 | 125712.000 | 11902464 | 7438.28 | 12.7233715 | 3100 | 22 |
| United States | 0.0000000 | 1.0000000 | 27.2249760 | China | 2016 | 10649600 | 93014.59 | 125435.904 | 12288000 | 15371.00 | 6.0513040 | 1450 | 260 |
| United States | 0.0074150 | 0.9925850 | 3.0734836 | China | 2018 | 4981760 | 61444.50 | 100678.664 | 9773000 | 18482.00 | 3.3245590 | 2200 | 12 |
| United States | 0.0050262 | 0.9949738 | 3.2827541 | United States | 2016 | 622336 | 14014.70 | 27880.653 | 6984960 | 3939.00 | 3.5579335 | 1400 | 68 |
| United States | 0.2164626 | 0.7835374 | 1.1422629 | United States | 2016 | 241108 | 5951.55 | 7107.149 | 6500160 | 4407.00 | 1.3504765 | 2800 | 10 |
| China | 0.5566414 | 0.4433586 | 0.3324499 | China | 2018 | 163840 | 4325.00 | 6134.170 | 1876000 | 380.00 | 11.3815789 | 2000 | 32 |
| United States | 0.3811640 | 0.6188360 | 0.7133828 | United States | 2020 | 71424 | 4281.00 | 6628.150 | 5491968 | 960.00 | 4.4593750 | 2900 | 24 |
| United States | 0.3940838 | 0.6059162 | 0.6842665 | United States | 2015 | 145920 | 4042.46 | 5369.856 | 8338176 | 1800.00 | 2.2458111 | 2300 | 16 |
| China | 0.7798834 | 0.2201166 | -0.2224824 | United States | 2014 | 72800 | 3577.00 | 6131.840 | 2336000 | 1498.90 | 2.3864167 | 2200 | 10 |
| China | 0.7038960 | 0.2961040 | -0.0090202 | United States | 2016 | 60512 | 3307.00 | 4896.512 | 1419552 | 349.50 | 9.4620887 | 2200 | 20 |
| United States | 0.0898173 | 0.9101827 | 1.6929276 | United States | 2014 | 225984 | 3143.52 | 4881.254 | 2541684 | 6327.55 | 0.4967989 | 2700 | 12 |
| United States | 0.2119009 | 0.7880991 | 1.1567611 | United States | 2015 | 152692 | 3126.24 | 5610.481 | 5572800 | 4819.50 | 0.6486648 | 2300 | 18 |
| United States | 0.3918472 | 0.6081528 | 0.6892818 | United States | 2013 | 194616 | 2539.13 | 3388.032 | 4032000 | 1384.00 | 1.8346315 | 2600 | 8 |
| United States | 0.3716777 | 0.6283223 | 0.7350012 | United States | 2014 | 100064 | 2485.00 | 3682.355 | 6441408 | 1465.78 | 1.6953431 | 2300 | 16 |
| China | 0.7186991 | 0.2813009 | -0.0475855 | United States | 2019 | 46080 | 2375.54 | 3686.400 | 4872960 | 578.10 | 4.1092199 | 2500 | 20 |
| United States | 0.0102314 | 0.9897686 | 2.8997476 | United States | 2020 | 19840 | 2356.00 | 2812.800 | 1262888 | 89.94 | 26.1952413 | 2250 | 64 |
| China | 0.9395062 | 0.0604938 | -1.0129765 | China | 2019 | 67200 | 2192.00 | 4515.840 | 5097000 | 740.00 | 2.9621622 | 2100 | 16 |
| China | 0.7901082 | 0.2098918 | -0.2548926 | China | 2019 | 40960 | 2109.00 | 3434.086 | 5153000 | 430.00 | 4.9046512 | 2600 | 16 |
| China | 0.9652747 | 0.0347253 | -1.3243595 | China | 2019 | 44032 | 2087.00 | 3843.379 | 503000 | 260.00 | 8.0269231 | 2300 | 12 |
| China | 0.6909118 | 0.3090882 | 0.0238948 | China | 2014 | 174720 | 2071.39 | 3074.534 | 2366976 | 997.00 | 2.0776229 | 2200 | 12 |
| China | 0.9606012 | 0.0393988 | -1.2542226 | China | 2018 | 121920 | 1979.00 | 4096.512 | 6769000 | 1216.00 | 1.6274671 | 2100 | 8 |
| China | 0.9411639 | 0.0588361 | -1.0287814 | China | 2019 | 60800 | 1976.00 | 4085.760 | 4848000 | 660.00 | 2.9939394 | 2100 | 16 |
| China | 0.8569216 | 0.1430784 | -0.5032888 | China | 2019 | 47600 | 1955.00 | 3960.320 | 3744000 | 650.00 | 3.0076923 | 2600 | 14 |
| China | 0.8842030 | 0.1157970 | -0.6332138 | China | 2019 | 53280 | 1952.00 | 3921.408 | 3494000 | 570.00 | 3.4245614 | 2300 | 18 |
| China | 0.9455579 | 0.0544421 | -1.0727910 | China | 2019 | 58112 | 1934.00 | 3905.126 | 3870000 | 630.00 | 3.0698413 | 2100 | 16 |
| China | 0.8575688 | 0.1424312 | -0.5061177 | China | 2019 | 46200 | 1901.00 | 3843.840 | 3689000 | 630.00 | 3.0174603 | 2600 | 14 |
| China | 0.9419946 | 0.0580054 | -1.0368600 | China | 2019 | 55040 | 1838.00 | 3698.688 | 4613000 | 600.00 | 3.0633333 | 2100 | 16 |
| China | 0.9417742 | 0.0582258 | -1.0347059 | China | 2019 | 54240 | 1815.00 | 3644.928 | 4626000 | 600.00 | 3.0250000 | 2100 | 16 |
| China | 0.7993553 | 0.2006447 | -0.2852175 | China | 2019 | 30600 | 1814.00 | 2674.100 | 824000 | 180.00 | 10.0777778 | 2500 | 20 |
| China | 0.9273206 | 0.0726794 | -0.9078296 | China | 2018 | 105000 | 1790.00 | 3696.000 | 7657000 | 1100.00 | 1.6272727 | 2200 | 10 |
| China | 0.9278801 | 0.0721199 | -0.9122859 | China | 2018 | 97920 | 1736.00 | 7206.912 | 6750000 | 1000.00 | 1.7360000 | 2300 | 12 |
| United States | 0.4429828 | 0.5570172 | 0.5766908 | United States | 2015 | 57600 | 1703.28 | 2304.000 | 3450432 | 2880.00 | 0.5914167 | 2500 | 12 |
| China | 0.9102326 | 0.0897674 | -0.7849283 | China | 2018 | 97920 | 1683.00 | 3446.784 | 6750000 | 1050.00 | 1.6028571 | 2200 | 12 |
| United States | 0.2103187 | 0.7896813 | 1.1618427 | United States | 2011 | 31524 | 1653.92 | 2467.699 | 3745920 | 558.60 | 2.9608306 | 2100 | 20 |
| China | 0.5127871 | 0.4872129 | 0.4267976 | United States | 2015 | 48960 | 1635.02 | 2036.736 | 3950400 | 956.25 | 1.7098248 | 2600 | 12 |
| China | 0.9322958 | 0.0677042 | -0.9486215 | China | 2018 | 92000 | 1562.00 | 3238.400 | 7167000 | 960.00 | 1.6270833 | 2200 | 10 |
| China | 0.7302297 | 0.2697703 | -0.0784871 | United States | 2016 | 53352 | 1524.72 | 1792.627 | 4268160 | 603.40 | 2.5268810 | 2100 | 18 |
| China | 0.8107501 | 0.1892499 | -0.3240628 | United States | 2019 | 23040 | 1464.00 | 1739.776 | 2582016 | 94.00 | 15.5744681 | 2400 | 20 |
| China | 0.8959148 | 0.1040852 | -0.6972881 | China | 2018 | 26180 | 1432.90 | 2153.536 | 1186560 | 159.94 | 8.9589846 | 2400 | 14 |
| China | 0.8992554 | 0.1007446 | -0.7167281 | China | 2018 | 30240 | 1418.00 | 2413.824 | 936000 | 240.00 | 5.9083333 | 2400 | 14 |
| China | 0.8397313 | 0.1602687 | -0.4317595 | China | 2018 | 52080 | 1416.00 | 1999.872 | 4895000 | 560.00 | 2.5285714 | 2400 | 14 |
| China | 0.9280924 | 0.0719076 | -0.9139850 | China | 2018 | 78000 | 1405.00 | 5740.800 | 6024000 | 800.00 | 1.7562500 | 2300 | 12 |
| China | 0.7942162 | 0.2057838 | -0.2682392 | China | 2018 | 28000 | 1405.00 | 2329.600 | 3590000 | 350.00 | 4.0142857 | 2600 | 14 |
| China | 0.9024748 | 0.0975252 | -0.7360120 | China | 2018 | 30624 | 1389.00 | 2459.002 | 829000 | 176.00 | 7.8920455 | 2400 | 14 |
| China | 0.9452403 | 0.0547597 | -1.0695006 | China | 2019 | 43200 | 1379.00 | 2903.040 | 4086000 | 480.00 | 2.8729167 | 2100 | 16 |
| China | 0.9196537 | 0.0803463 | -0.8497442 | China | 2018 | 78720 | 1345.00 | 2770.944 | 6052000 | 810.00 | 1.6604938 | 2200 | 12 |
#posterior probabilities of classification combined with initial classes (Parties)
table(prd.lda$class,test$Country)#confusion matrix
##
## China United States
## China 26 6
## United States 2 12
partimat(Country~log(TotalCores)+log(Rpeak), data = train,method="lda")
Country~log(TotalCores)+log(Rpeak).