0. Import imbalance and Dataset

library(imbalance)
head(newthyroid1, 10)
## # A tibble: 10 x 6
##    T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class   
##      <int>    <dbl>            <dbl>              <dbl>     <dbl> <fct>   
##  1     105      7.3              1.5                1.5      -0.1 negative
##  2      67     23.3              7.4                1.8      -0.6 positive
##  3     111      8.4              1.5                0.8       1.2 negative
##  4      89     14.3              4.1                0.5       0.2 positive
##  5     105      9.5              1.8                1.6       3.6 negative
##  6     110     20.3              3.7                0.6       0.2 positive
##  7      84     21.5              2.7                1.1      -0.6 positive
##  8     113     11.1              1.7                0.8       2.3 negative
##  9      97      7.8              1.3                1.2       0.9 negative
## 10     106     13.4              3                  1.1       0   positive
table(newthyroid1$Class)
## 
## negative positive 
##      180       35

You can use imbalanceRatio as,

imbalanceRatio(newthyroid1)
## [1] 0.1944444

1. MWMOTE

newMWMOTE <- mwmote(newthyroid1, numInstances = 100)
newMWMOTE
## # A tibble: 100 x 6
##    T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class   
##      <int>    <dbl>            <dbl>              <dbl>     <dbl> <fct>   
##  1      99     14.6             2.98              0.955    0.0457 positive
##  2      96     13.3             3.73              0.648   -0.0221 positive
##  3      90     22.2             5.13              0.530    0.0704 positive
##  4      87     20.8             3.94              1.16    -0.0625 positive
##  5     109     15.9             2.06              0.935   -0.112  positive
##  6      91     14.2             3.92              0.596    0.168  positive
##  7      96     16.5             5.53              1.1     -0.112  positive
##  8     106     13.4             3                 1.1      0      positive
##  9     105     17.6             2.64              0.977    0.0684 positive
## 10      73     14.2             6.39              0.462   -0.0892 positive
## # ... with 90 more rows
plotComparison(newthyroid1, rbind(newthyroid1, newMWMOTE), attrs = names(newthyroid1)[1:3])

2. RACOG

newRACOG <- racog(newthyroid1, numInstances = 100)
newRACOG
## # A tibble: 100 x 6
##    T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class   
##      <int>    <dbl>            <dbl>              <dbl>     <dbl> <fct>   
##  1      65     25.3              5.8                0.5       0.1 positive
##  2      67     23.3              7.4                1.8      -0.6 positive
##  3      84     11.1              2.7                1.8      -0.6 positive
##  4      98     16.7              4.3                0.6       0.2 positive
##  5      89     21.8              7.1                0.5       0.2 positive
##  6     105     22.3              3.3                1.1       0   positive
##  7     106     13.4              3                  1.1       0   positive
##  8     139     16.4              3.8                0.7      -0.2 positive
##  9      97     17.2              1.8                1.2      -0.2 positive
## 10     111     16                2.1                0.7      -0.1 positive
## # ... with 90 more rows
plotComparison(newthyroid1, rbind(newthyroid1, newRACOG), attrs = names(newthyroid1)[1:3])

3. RWO

newRWO <- rwo(newthyroid1, numInstances = 100)
newRWO
## # A tibble: 100 x 6
##    T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class   
##      <int>    <dbl>            <dbl>              <dbl>     <dbl> <fct>   
##  1     193     17.6             6.13              1.69      0.190 positive
##  2     125     12.0             2.60              0.106     0.191 positive
##  3     159     21.3             4.81              0.916     0.297 positive
##  4      14     14.2             1.99              1.23     -0.198 positive
##  5      60     22.7             2.09              1.37     -0.497 positive
##  6      93     10.5             1.69              0.701    -0.207 positive
##  7      38     25.0             5.60              0.920     0.302 positive
##  8     117     13.1             3.36              0.493     0.207 positive
##  9      93     13.6             1.59              1.46      0.296 positive
## 10     146     16.7             1.05              0.334     0.406 positive
## # ... with 90 more rows
plotComparison(newthyroid1, rbind(newthyroid1, newRWO), attrs = names(newthyroid1)[1:3])

4. PDFOS

newPDFOS <- pdfos(newthyroid1, numInstances = 100)
newPDFOS
## # A tibble: 100 x 6
##    T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class   
##      <int>    <dbl>            <dbl>              <dbl>     <dbl> <fct>   
##  1     155    20.1             1.64              1.10      0.660  positive
##  2      96    12.3             3.07              0.286    -0.218  positive
##  3     105     9.15            3.92              1.72     -0.317  positive
##  4      98    10.3             3.83             -0.302     0.760  positive
##  5      91    19.4             4.40              0.398     0.110  positive
##  6      66    15.5             4.27              0.0874    0.103  positive
##  7     106     9.93           -3.03              0.816     0.0440 positive
##  8      98    14.0             4.98              0.765    -0.0553 positive
##  9      98    11.8             3.89              1.30      0.156  positive
## 10     100    14.7            -0.169             0.890     0.526  positive
## # ... with 90 more rows
plotComparison(newthyroid1, rbind(newthyroid1, newPDFOS), attrs = names(newthyroid1)[1:3])

5. NEATER

filtered <- neater(newthyroid1, newSamples = newPDFOS, iterations = 500)
## [1] "21 samples filtered by NEATER"
plotComparison(newthyroid1, rbind(newthyroid1, filtered), attrs = names(newthyroid1)[1:3])

6. oversample

filtered2 <- oversample(newthyroid1, ratio = 1, method = "PDFOS", filtering = TRUE, iterations = 500)
## [1] "24 samples filtered by NEATER"
head(filtered2, 20)
## # A tibble: 20 x 6
##    T3resin Thyroxin Triiodothyronine Thyroidstimulating TSH_value Class   
##      <int>    <dbl>            <dbl>              <dbl>     <dbl> <fct>   
##  1     105      7.3              1.5                1.5      -0.1 negative
##  2      67     23.3              7.4                1.8      -0.6 positive
##  3     111      8.4              1.5                0.8       1.2 negative
##  4      89     14.3              4.1                0.5       0.2 positive
##  5     105      9.5              1.8                1.6       3.6 negative
##  6     110     20.3              3.7                0.6       0.2 positive
##  7      84     21.5              2.7                1.1      -0.6 positive
##  8     113     11.1              1.7                0.8       2.3 negative
##  9      97      7.8              1.3                1.2       0.9 negative
## 10     106     13.4              3                  1.1       0   positive
## 11     104      6.3              2                  1.2       4   negative
## 12     112      5.9              1.7                2         1.3 negative
## 13     120      1.9              0.7               18.5      24   negative
## 14     118      3.6              1.5               11.6      48.8 negative
## 15     106      9.4              1.7                0.9       3.1 negative
## 16      99     13                3.6                0.7      -0.1 positive
## 17     107     13.8              1.5                1         1.9 negative
## 18     111     16                2.1                0.9      -0.1 positive
## 19     129     11.9              2.7                1.2       3.5 negative
## 20     115      6.3              1.2                4.7      14.4 negative
dim(filtered2)
## [1] 336   6
table(filtered2$Class)
## 
## negative positive 
##      180      156