# Load Dataset and required library
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.3
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.5.3
## corrplot 0.84 loaded
library(klaR)
## Warning: package 'klaR' was built under R version 3.5.3
## Loading required package: MASS
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.5.3
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(rattle)
## Warning: package 'rattle' was built under R version 3.5.3
## Rattle: A free graphical interface for data science with R.
## Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
## 
## Attaching package: 'rattle'
## The following object is masked from 'package:randomForest':
## 
##     importance
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.5.3
## Loading required package: rpart
## Warning: package 'rpart' was built under R version 3.5.3
nba_raw <- read.csv("C:/Users/shibo/OneDrive/Desktop/520 HW/2012-18_officialBoxScore.csv")

# Data Cleaning and Pre-processing

# Remove repeat columns
nba_reduced_col <- nba_raw[,1:61]

# Remove repeat rows
select <- seq(1,nrow(nba_reduced_col),3)
nba_reduced <- nba_reduced_col[select,]

# Remove irrelevant columns 
drop_col <- c("gmDate", "teamAbbr", "seasTyp", "gmTime","offLNm", "offFNm", "teamConf", "teamDiv", "teamMin", "teamDayOff", "teamPTS5", "teamPTS6", "teamPTS7", "teamPTS8", "teamTS%")
nba <- nba_reduced[ ,!(names(nba_reduced) %in% drop_col)]
str(nba)
## 'data.frame':    14762 obs. of  47 variables:
##  $ teamLoc   : Factor w/ 2 levels "Away","Home": 1 2 1 2 1 2 1 2 1 2 ...
##  $ teamRslt  : Factor w/ 2 levels "Loss","Win": 1 2 1 2 2 1 1 2 2 1 ...
##  $ teamPTS   : int  84 94 107 120 99 91 75 84 90 88 ...
##  $ teamAST   : int  26 22 24 25 22 24 19 18 22 18 ...
##  $ teamTO    : int  13 21 16 8 12 14 22 16 19 10 ...
##  $ teamSTL   : int  11 7 4 8 9 6 9 13 3 12 ...
##  $ teamBLK   : int  10 5 2 5 5 5 5 11 10 8 ...
##  $ teamPF    : int  19 21 23 20 25 21 22 14 16 18 ...
##  $ teamFGA   : int  90 79 75 79 85 77 88 85 78 91 ...
##  $ teamFGM   : int  32 36 39 43 40 38 33 30 37 33 ...
##  $ teamFG.   : num  0.356 0.456 0.52 0.544 0.471 ...
##  $ team2PA   : int  58 59 62 63 70 64 70 60 67 74 ...
##  $ team2PM   : int  24 29 33 35 35 35 29 23 32 27 ...
##  $ team2P.   : num  0.414 0.491 0.532 0.556 0.5 ...
##  $ team3PA   : int  32 20 13 16 15 13 18 25 11 17 ...
##  $ team3PM   : int  8 7 6 8 5 3 4 7 5 6 ...
##  $ team3P.   : num  0.25 0.35 0.462 0.5 0.333 ...
##  $ teamFTA   : int  20 22 28 32 18 31 11 21 16 19 ...
##  $ teamFTM   : int  12 15 23 26 14 12 5 17 11 16 ...
##  $ teamFT.   : num  0.6 0.682 0.821 0.812 0.778 ...
##  $ teamORB   : int  18 18 7 5 9 15 16 14 9 15 ...
##  $ teamDRB   : int  21 36 34 31 31 31 38 33 37 27 ...
##  $ teamTRB   : int  39 54 41 36 40 46 54 47 46 42 ...
##  $ teamPTS1  : int  24 31 25 31 25 29 22 25 27 25 ...
##  $ teamPTS2  : int  15 19 29 31 23 17 16 21 21 23 ...
##  $ teamPTS3  : int  23 24 22 31 26 20 14 19 15 23 ...
##  $ teamPTS4  : int  22 20 31 27 25 25 23 19 27 17 ...
##  $ teamTREB. : num  41.9 58.1 53.2 46.8 46.5 ...
##  $ teamASST. : num  81.2 61.1 61.5 58.1 55 ...
##  $ teamTS.   : num  0.425 0.53 0.613 0.645 0.533 ...
##  $ teamEFG.  : num  0.4 0.5 0.56 0.595 0.5 ...
##  $ teamOREB. : num  33.3 46.2 18.4 12.8 22.5 ...
##  $ teamDREB. : num  53.8 66.7 87.2 81.6 67.4 ...
##  $ teamTO.   : num  11.63 19.15 15.49 7.91 11.44 ...
##  $ teamSTL.  : num  12.37 7.87 4.21 8.42 9.83 ...
##  $ teamBLK.  : num  11.24 5.62 2.11 5.26 5.46 ...
##  $ teamBLKR  : num  17.24 8.47 3.23 7.94 7.14 ...
##  $ teamPPS   : num  0.933 1.19 1.427 1.519 1.165 ...
##  $ teamFIC   : num  67.2 74 75.2 97 72.2 ...
##  $ teamFIC40 : num  56 61.7 62.7 80.8 60.2 ...
##  $ teamOrtg  : num  94.4 105.7 112.7 126.3 108.1 ...
##  $ teamDrtg  : num  105.7 94.4 126.3 112.7 99.4 ...
##  $ teamEDiff : num  -11.24 11.24 -13.69 13.69 8.74 ...
##  $ teamPlay. : num  0.376 0.439 0.464 0.524 0.455 ...
##  $ teamAR    : num  18.9 16.7 18.9 19.8 17.3 ...
##  $ teamAST.TO: num  2 1.05 1.5 3.12 1.83 ...
##  $ teamSTL.TO: num  84.6 33.3 25 100 75 ...
# Convert teamLoc into numberic
nba$teamLoc <- as.numeric(nba$teamLoc)

# Correlation Matrix
cor_matrix <- cor(nba[,-2])
corrplot(cor_matrix)

# train and test data
nba_glm <- as.data.frame(cbind(Result=as.factor(nba$teamRslt),nba[-2]))
nba_glm$Result <- as.factor(nba_glm$Result)

index <- createDataPartition(nba_glm$Result, p = 0.8, list = FALSE)
train_glm = nba_glm[index,]
test_glm = nba_glm[-index,]

# Logistic Regression Model, Fit an unsatuated model
# teamDrtg, teamOrtg, teamEDiff are linear combinations of each other, provides no information
model_glm <- glm(Result ~. -teamDrtg - teamEDiff - team3PA - team3PM - teamTRB - teamFIC, data=train_glm, family = binomial)
summary(model_glm)
## 
## Call:
## glm(formula = Result ~ . - teamDrtg - teamEDiff - team3PA - team3PM - 
##     teamTRB - teamFIC, family = binomial, data = train_glm)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.8029  -0.4341  -0.0039   0.4172   3.7931  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.142e+01  6.802e+00  -4.619 3.86e-06 ***
## teamLoc      4.016e-01  5.915e-02   6.791 1.12e-11 ***
## teamPTS      3.368e-01  9.702e-01   0.347 0.728479    
## teamAST     -4.771e-01  1.003e-01  -4.757 1.97e-06 ***
## teamTO       3.195e-01  1.249e-01   2.557 0.010546 *  
## teamSTL     -2.739e-01  1.817e-01  -1.507 0.131711    
## teamBLK     -3.621e-01  1.960e-01  -1.847 0.064682 .  
## teamPF       1.314e-01  2.302e-02   5.711 1.12e-08 ***
## teamFGA      1.786e-01  7.982e-02   2.238 0.025252 *  
## teamFGM     -1.237e+00  2.909e+00  -0.425 0.670649    
## teamFG.      1.916e+01  2.262e+01   0.847 0.396988    
## team2PA      3.452e-02  3.462e-02   0.997 0.318684    
## team2PM      4.088e-01  9.759e-01   0.419 0.675297    
## team2P.     -1.472e+00  3.595e+00  -0.409 0.682259    
## team3P.      6.728e-02  9.888e-01   0.068 0.945748    
## teamFTA      1.923e-01  5.444e-02   3.532 0.000413 ***
## teamFTM     -5.772e-01  9.663e-01  -0.597 0.550286    
## teamFT.      3.452e-01  8.303e-01   0.416 0.677547    
## teamORB     -2.969e-01  9.004e-02  -3.298 0.000974 ***
## teamDRB      9.777e-02  3.759e-02   2.601 0.009295 ** 
## teamPTS1    -3.561e-01  3.346e-02 -10.641  < 2e-16 ***
## teamPTS2    -3.595e-01  3.331e-02 -10.790  < 2e-16 ***
## teamPTS3    -3.528e-01  3.340e-02 -10.562  < 2e-16 ***
## teamPTS4    -3.531e-01  3.330e-02 -10.603  < 2e-16 ***
## teamTREB.    6.983e-02  4.263e-02   1.638 0.101405    
## teamASST.    1.322e-04  3.405e-02   0.004 0.996902    
## teamTS.     -7.815e+00  1.790e+01  -0.437 0.662447    
## teamEFG.    -1.625e+01  2.150e+01  -0.756 0.449793    
## teamOREB.   -2.695e-02  3.670e-02  -0.734 0.462762    
## teamDREB.   -4.022e-02  1.508e-02  -2.668 0.007638 ** 
## teamTO.     -3.051e-02  1.588e-01  -0.192 0.847611    
## teamSTL.     3.005e-01  1.725e-01   1.742 0.081501 .  
## teamBLK.     1.607e-01  1.834e-01   0.876 0.380830    
## teamBLKR    -3.737e-02  4.306e-02  -0.868 0.385402    
## teamPPS      5.900e+00  4.346e+00   1.358 0.174608    
## teamFIC40    4.837e-01  5.269e-02   9.180  < 2e-16 ***
## teamOrtg     3.173e-01  2.911e-02  10.901  < 2e-16 ***
## teamPlay.   -2.207e+01  1.545e+01  -1.429 0.153084    
## teamAR       1.857e-01  1.983e-01   0.937 0.348927    
## teamAST.TO  -6.464e-02  1.583e-01  -0.408 0.682955    
## teamSTL.TO   2.403e-03  4.187e-03   0.574 0.566011    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 16373.5  on 11810  degrees of freedom
## Residual deviance:  7493.7  on 11770  degrees of freedom
## AIC: 7575.7
## 
## Number of Fisher Scoring iterations: 6
predict_glm <- ifelse(predict(model_glm, test_glm, type="response") > 0.5, "Win", "Loss")
confusionMatrix(factor(predict_glm), factor(test_glm$Result))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Loss  Win
##       Loss 1295  223
##       Win   181 1252
##                                           
##                Accuracy : 0.8631          
##                  95% CI : (0.8502, 0.8753)
##     No Information Rate : 0.5002          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.7262          
##                                           
##  Mcnemar's Test P-Value : 0.04137         
##                                           
##             Sensitivity : 0.8774          
##             Specificity : 0.8488          
##          Pos Pred Value : 0.8531          
##          Neg Pred Value : 0.8737          
##              Prevalence : 0.5002          
##          Detection Rate : 0.4388          
##    Detection Prevalence : 0.5144          
##       Balanced Accuracy : 0.8631          
##                                           
##        'Positive' Class : Loss            
## 
# Tune the model by reducing not significant predictors. The accuracy is about the same, we will use the reduced model
model_glm_tune <- glm(Result ~ teamLoc + teamAST + teamTO + teamBLK + teamPF + teamFGA + teamFTA + teamORB + teamPTS1 + teamPTS2 +teamPTS3 + teamPTS4 + teamTREB. + teamDREB. + teamFIC40 + teamOrtg, data = train_glm, family = binomial)
summary(model_glm_tune)
## 
## Call:
## glm(formula = Result ~ teamLoc + teamAST + teamTO + teamBLK + 
##     teamPF + teamFGA + teamFTA + teamORB + teamPTS1 + teamPTS2 + 
##     teamPTS3 + teamPTS4 + teamTREB. + teamDREB. + teamFIC40 + 
##     teamOrtg, family = binomial, data = train_glm)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.1415  -0.4443  -0.0051   0.4266   3.6169  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -16.592014   0.991764 -16.730  < 2e-16 ***
## teamLoc       0.406028   0.058336   6.960  3.4e-12 ***
## teamAST      -0.401388   0.013588 -29.540  < 2e-16 ***
## teamTO        0.209206   0.015141  13.817  < 2e-16 ***
## teamBLK      -0.303973   0.016139 -18.834  < 2e-16 ***
## teamPF        0.146494   0.009047  16.193  < 2e-16 ***
## teamFGA       0.145220   0.009045  16.055  < 2e-16 ***
## teamFTA       0.115892   0.005793  20.005  < 2e-16 ***
## teamORB      -0.384321   0.016411 -23.419  < 2e-16 ***
## teamPTS1     -0.416650   0.014966 -27.840  < 2e-16 ***
## teamPTS2     -0.417937   0.014838 -28.167  < 2e-16 ***
## teamPTS3     -0.413121   0.014857 -27.807  < 2e-16 ***
## teamPTS4     -0.410994   0.014454 -28.435  < 2e-16 ***
## teamTREB.     0.166131   0.014971  11.097  < 2e-16 ***
## teamDREB.    -0.074022   0.007517  -9.848  < 2e-16 ***
## teamFIC40     0.526851   0.013339  39.497  < 2e-16 ***
## teamOrtg      0.148477   0.010452  14.205  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 16373.5  on 11810  degrees of freedom
## Residual deviance:  7652.4  on 11794  degrees of freedom
## AIC: 7686.4
## 
## Number of Fisher Scoring iterations: 6
predict_glm_tune <- ifelse(predict(model_glm_tune, test_glm, type="response") > 0.5, "Win", "Loss")
confusionMatrix(factor(predict_glm_tune), factor(test_glm$Result))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Loss  Win
##       Loss 1290  222
##       Win   186 1253
##                                          
##                Accuracy : 0.8617         
##                  95% CI : (0.8488, 0.874)
##     No Information Rate : 0.5002         
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.7235         
##                                          
##  Mcnemar's Test P-Value : 0.08314        
##                                          
##             Sensitivity : 0.8740         
##             Specificity : 0.8495         
##          Pos Pred Value : 0.8532         
##          Neg Pred Value : 0.8707         
##              Prevalence : 0.5002         
##          Detection Rate : 0.4371         
##    Detection Prevalence : 0.5124         
##       Balanced Accuracy : 0.8617         
##                                          
##        'Positive' Class : Loss           
## 
# Principal Component Analysis
nba_pca <- prcomp(nba[,-2], scale = TRUE, center = TRUE)

# Principal Components Explaining Detials
VarExp <- nba_pca$sdev^2/sum(nba_pca$sdev^2)
VarExpcum <- cbind(1:46, cumsum(VarExp))
plot(cumsum(VarExp), xlab = "Principal Component", ylab = "Variance Explained", type = "o")

abline(h=1, col = "red")

# First 20 components explain 99% of the dataset
nba_final <- as.data.frame(cbind(Result=as.factor(nba$teamRslt),nba_pca$x))
nba_final$Result <- as.factor(nba_final$Result)

# Create Training and Validation Datasets
split <- sample(1:nrow(nba_final), nrow(nba_final)*0.8)
train <- nba_final[split,]
test <- nba_final[-split,]

# Naive Bayes Method
model_nb <- suppressWarnings(train(x = train[,2:20], y = train$Result,method = "nb"))
predict_nb = suppressWarnings(predict(model_nb, test[1:2953,2:20]))
confusionMatrix(predict_nb, test$Result[1:2953])
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2
##          1 1410   85
##          2   72 1386
##                                           
##                Accuracy : 0.9468          
##                  95% CI : (0.9381, 0.9546)
##     No Information Rate : 0.5019          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8937          
##                                           
##  Mcnemar's Test P-Value : 0.3382          
##                                           
##             Sensitivity : 0.9514          
##             Specificity : 0.9422          
##          Pos Pred Value : 0.9431          
##          Neg Pred Value : 0.9506          
##              Prevalence : 0.5019          
##          Detection Rate : 0.4775          
##    Detection Prevalence : 0.5063          
##       Balanced Accuracy : 0.9468          
##                                           
##        'Positive' Class : 1               
## 
# K-Nearest Neighbor Method
model_knn <- train(x =train[,2:20], y = train$Result, method = "knn")
predict_knn <- predict(model_knn, test[1:2953,2:20])
confusionMatrix(predict_knn, test$Result[1:2953])
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2
##          1 1345  154
##          2  137 1317
##                                          
##                Accuracy : 0.9015         
##                  95% CI : (0.8901, 0.912)
##     No Information Rate : 0.5019         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.8029         
##                                          
##  Mcnemar's Test P-Value : 0.3483         
##                                          
##             Sensitivity : 0.9076         
##             Specificity : 0.8953         
##          Pos Pred Value : 0.8973         
##          Neg Pred Value : 0.9058         
##              Prevalence : 0.5019         
##          Detection Rate : 0.4555         
##    Detection Prevalence : 0.5076         
##       Balanced Accuracy : 0.9014         
##                                          
##        'Positive' Class : 1              
## 
# Random Forest
model_rf <- train(x = train[,2:20], y = train$Result, method = "rf")
predict_rf = predict(model_rf, test[1:2953,2:20])
confusionMatrix(predict_rf, test$Result[1:2953])
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2
##          1 1366  100
##          2  116 1371
##                                          
##                Accuracy : 0.9269         
##                  95% CI : (0.9169, 0.936)
##     No Information Rate : 0.5019         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.8537         
##                                          
##  Mcnemar's Test P-Value : 0.3074         
##                                          
##             Sensitivity : 0.9217         
##             Specificity : 0.9320         
##          Pos Pred Value : 0.9318         
##          Neg Pred Value : 0.9220         
##              Prevalence : 0.5019         
##          Detection Rate : 0.4626         
##    Detection Prevalence : 0.4964         
##       Balanced Accuracy : 0.9269         
##                                          
##        'Positive' Class : 1              
## 
varimp_rf <- varImp(model_rf)
varimp_rf
## rf variable importance
## 
##       Overall
## PC1  100.0000
## PC8   23.4437
## PC3   20.0113
## PC10  16.2926
## PC17  10.1021
## PC5    7.5665
## PC19   6.3170
## PC7    5.3425
## PC2    4.1875
## PC18   3.5946
## PC4    3.4537
## PC6    1.8930
## PC16   1.7730
## PC11   1.0554
## PC12   0.5777
## PC15   0.4822
## PC9    0.2724
## PC13   0.1616
## PC14   0.0000
plot(varimp_rf, main = "Variable Importance with Random Forest")

# Decision Tree
model_DT <- train(x = train[,2:20], y = train$Result, metric = "Accuracy", method = "rpart")
predict_DT = predict(model_DT, test[1:2953,2:20])
confusionMatrix(predict_DT, test$Result[1:2953])
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2
##          1 1237  417
##          2  245 1054
##                                           
##                Accuracy : 0.7758          
##                  95% CI : (0.7603, 0.7908)
##     No Information Rate : 0.5019          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5514          
##                                           
##  Mcnemar's Test P-Value : 3.01e-11        
##                                           
##             Sensitivity : 0.8347          
##             Specificity : 0.7165          
##          Pos Pred Value : 0.7479          
##          Neg Pred Value : 0.8114          
##              Prevalence : 0.5019          
##          Detection Rate : 0.4189          
##    Detection Prevalence : 0.5601          
##       Balanced Accuracy : 0.7756          
##                                           
##        'Positive' Class : 1               
## 
plot(model_DT$finalModel)
text(model_DT$finalModel)

fancyRpartPlot(model_DT$finalModel)

# Treebag
model_bag <- train(x = train[,2:20], y = train$Result, method = "treebag")

predict_bag = predict(model_bag, test[1:2953,2:20])
confusionMatrix(predict_bag, test$Result[1:2953])
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    1    2
##          1 1336  149
##          2  146 1322
##                                           
##                Accuracy : 0.9001          
##                  95% CI : (0.8887, 0.9107)
##     No Information Rate : 0.5019          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8002          
##                                           
##  Mcnemar's Test P-Value : 0.9073          
##                                           
##             Sensitivity : 0.9015          
##             Specificity : 0.8987          
##          Pos Pred Value : 0.8997          
##          Neg Pred Value : 0.9005          
##              Prevalence : 0.5019          
##          Detection Rate : 0.4524          
##    Detection Prevalence : 0.5029          
##       Balanced Accuracy : 0.9001          
##                                           
##        'Positive' Class : 1               
## 
# Algorithm Performance Comparison
model_comparison <- resamples(list(BAG = model_bag,  DT = model_DT, NB = model_nb, KNN = model_knn, RF = model_rf))
summary(model_comparison)
## 
## Call:
## summary.resamples(object = model_comparison)
## 
## Models: BAG, DT, NB, KNN, RF 
## Number of resamples: 25 
## 
## Accuracy 
##          Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## BAG 0.8832534 0.8898383 0.8933333 0.8930203 0.8949930 0.9020060    0
## DT  0.7653722 0.7777264 0.7832344 0.7817263 0.7860120 0.7944306    0
## NB  0.9417051 0.9471275 0.9520642 0.9509892 0.9548699 0.9598431    0
## KNN 0.8689173 0.8754301 0.8772210 0.8776569 0.8798244 0.8869965    0
## RF  0.9179534 0.9252040 0.9282759 0.9275006 0.9297682 0.9350139    0
## 
## Kappa 
##          Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## BAG 0.7665069 0.7796622 0.7866591 0.7860152 0.7899650 0.8039408    0
## DT  0.5309771 0.5551563 0.5661716 0.5634459 0.5719414 0.5889245    0
## NB  0.8834122 0.8942525 0.9041307 0.9019666 0.9097403 0.9196696    0
## KNN 0.7378252 0.7508278 0.7544055 0.7552852 0.7595857 0.7739898    0
## RF  0.8359089 0.8504095 0.8565440 0.8549822 0.8595250 0.8700045    0
# Graphically Compare Performance
scales <- list(x = list(relation = "free"),
               y = list(relation = "free"))

bwplot(model_comparison, scales = scales)