DBSCAN Clustering

Performing DBSCAN Clustering on the given IRIS Data set.

loading library necessary

library("dbscan")
## Warning: package 'dbscan' was built under R version 4.1.3

Loading the data set

library(data.table)
## Warning: package 'data.table' was built under R version 4.1.2
m<-read.csv("http://bit.ly/IrisDataset")
head(m)
##   sepal_length sepal_width petal_length petal_width     species
## 1          5.1         3.5          1.4         0.2 Iris-setosa
## 2          4.9         3.0          1.4         0.2 Iris-setosa
## 3          4.7         3.2          1.3         0.2 Iris-setosa
## 4          4.6         3.1          1.5         0.2 Iris-setosa
## 5          5.0         3.6          1.4         0.2 Iris-setosa
## 6          5.4         3.9          1.7         0.4 Iris-setosa

Removing class labels

m1<-m[,c(1,2,3,4)]
head(m1)
##   sepal_length sepal_width petal_length petal_width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4

Applying our DBSCAN algorithm. We want minimum 4 points with in a distance of eps(0.4)

db<-dbscan(m1,eps=0.4,MinPts = 4)
## Warning in dbscan(m1, eps = 0.4, MinPts = 4): converting argument MinPts (fpc)
## to minPts (dbscan)!

Printing out the clustering results

print(db)
## DBSCAN clustering for 150 objects.
## Parameters: eps = 0.4, minPts = 4
## The clustering contains 4 cluster(s) and 25 noise points.
## 
##  0  1  2  3  4 
## 25 47 38 36  4 
## 
## Available fields: cluster, eps, minPts

Plotting our clusters

hullplot(m1,db$cluster)

Challenge 1

Loading the data set

library(data.table)
df1 <- fread("http://bit.ly/MSDBSCANClusteringDataset")
head(df1)
##     Area   Per   Comp   Len   Wid As_Co Gr_Len Label
## 1: 15.26 14.84 0.8710 5.763 3.312 2.221  5.220     1
## 2: 14.88 14.57 0.8811 5.554 3.333 1.018  4.956     1
## 3: 14.29 14.09 0.9050 5.291 3.337 2.699  4.825     1
## 4: 13.84 13.94 0.8955 5.324 3.379 2.259  4.805     1
## 5: 16.14 14.99 0.9034 5.658 3.562 1.355  5.175     1
## 6: 14.38 14.21 0.8951 5.386 3.312 2.462  4.956     1
anyNA(df1)
## [1] FALSE
  1. Removing class labels
df11<-df1[,c(1,2,3,4,5,6,7)]
head(df11)
##     Area   Per   Comp   Len   Wid As_Co Gr_Len
## 1: 15.26 14.84 0.8710 5.763 3.312 2.221  5.220
## 2: 14.88 14.57 0.8811 5.554 3.333 1.018  4.956
## 3: 14.29 14.09 0.9050 5.291 3.337 2.699  4.825
## 4: 13.84 13.94 0.8955 5.324 3.379 2.259  4.805
## 5: 16.14 14.99 0.9034 5.658 3.562 1.355  5.175
## 6: 14.38 14.21 0.8951 5.386 3.312 2.462  4.956
  1. Applying our DBSCAN algorithm. We want minimum 4 points with in a distance of eps(0.4)
db<-dbscan(df11,eps=0.4,MinPts = 4)
## Warning in dbscan(df11, eps = 0.4, MinPts = 4): converting argument MinPts (fpc)
## to minPts (dbscan)!
  1. Printing out the clustering results
print(db)
## DBSCAN clustering for 210 objects.
## Parameters: eps = 0.4, minPts = 4
## The clustering contains 10 cluster(s) and 153 noise points.
## 
##   0   1   2   3   4   5   6   7   8   9  10 
## 153   6   4   4  11   4   6   8   6   4   4 
## 
## Available fields: cluster, eps, minPts
  1. Plotting our clusters
hullplot(df11,db$cluster)
## Warning in hullplot(df11, db$cluster): Not enough colors. Some colors will be
## reused.

Challenge 2

Loading the data set

library(data.table)
df2 <- fread("http://bit.ly/MSDBSCANClusteringDataset2")
head(df2)
##    MMSI  SOG Longitude Latitude   COG
## 1:    1  0.0 -14.61289 8.368005   3.4
## 2:    1  0.0 -14.61285 8.368035 359.8
## 3:    1  0.0 -14.61285 8.368033 357.8
## 4:    2 11.5 -14.00422 8.250355 116.0
## 5:    1  0.0 -14.61284 8.368013 356.6
## 6:    2 11.6 -14.00360 8.250152 116.0
anyNA(df2)
## [1] FALSE
  1. Removing class labels
df21<-df2[,c(2,3,4,5)]
head(df21)
##     SOG Longitude Latitude   COG
## 1:  0.0 -14.61289 8.368005   3.4
## 2:  0.0 -14.61285 8.368035 359.8
## 3:  0.0 -14.61285 8.368033 357.8
## 4: 11.5 -14.00422 8.250355 116.0
## 5:  0.0 -14.61284 8.368013 356.6
## 6: 11.6 -14.00360 8.250152 116.0
  1. Applying our DBSCAN algorithm. We want minimum 4 points with in a distance of eps(0.4)
db<-dbscan(df21,eps=0.4,MinPts = 4)
## Warning in dbscan(df21, eps = 0.4, MinPts = 4): converting argument MinPts (fpc)
## to minPts (dbscan)!
  1. Printing out the clustering results
print(db)
## DBSCAN clustering for 81159 objects.
## Parameters: eps = 0.4, minPts = 4
## The clustering contains 860 cluster(s) and 5783 noise points.
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
##  5783  3743  2644 20748  2205    81  1573 21856    18    16    28    10    41 
##    13    14    15    16    17    18    19    20    21    22    23    24    25 
##   639    10  5998    18    43    29     4  5301     7     8    14    33     5 
##    26    27    28    29    30    31    32    33    34    35    36    37    38 
##    10    33   159   170    69    86    17    70    52     5     4    11    43 
##    39    40    41    42    43    44    45    46    47    48    49    50    51 
##     6    33    53    11    84    43     8     5    24    44    24   260    34 
##    52    53    54    55    56    57    58    59    60    61    62    63    64 
##   105    13     7     4    61    42     5     4    11     4    46    56     5 
##    65    66    67    68    69    70    71    72    73    74    75    76    77 
##    39    21    11     8     8    21   150     5     7    17    11    20     5 
##    78    79    80    81    82    83    84    85    86    87    88    89    90 
##    12    17     7    99    33     8     5     8    92     4    85     4     4 
##    91    92    93    94    95    96    97    98    99   100   101   102   103 
##     7     6     9    23    10     5     5     4     5     7     4    56     9 
##   104   105   106   107   108   109   110   111   112   113   114   115   116 
##     7     9     8     4     5    17    17    18    13    11    14     6    10 
##   117   118   119   120   121   122   123   124   125   126   127   128   129 
##    13    25     4     4     4     4     9    10     6     9     9    41    11 
##   130   131   132   133   134   135   136   137   138   139   140   141   142 
##     5     5     4    44    56    16     4    36     5     8    12     5     8 
##   143   144   145   146   147   148   149   150   151   152   153   154   155 
##     4    22     7     5     9    13     5    20     9     9     5     5     6 
##   156   157   158   159   160   161   162   163   164   165   166   167   168 
##     4     9    10     8     6     4     9    13     4     4     8    12     8 
##   169   170   171   172   173   174   175   176   177   178   179   180   181 
##    10    10     5    47    19    46    14     5    31     5     5    21     6 
##   182   183   184   185   186   187   188   189   190   191   192   193   194 
##    13    18     5     8    10    49     8     5    14    19    11     5     7 
##   195   196   197   198   199   200   201   202   203   204   205   206   207 
##     7     5     7     6     4     6     5    11    14    26   153    14     8 
##   208   209   210   211   212   213   214   215   216   217   218   219   220 
##     9    12     5     4     4    26    14     5     6    16    24    14    18 
##   221   222   223   224   225   226   227   228   229   230   231   232   233 
##     7    11    14     8     6     9     7    11    13     6    15    10    11 
##   234   235   236   237   238   239   240   241   242   243   244   245   246 
##     4    10     4     4     5     6     4     7     7    19   106     4     8 
##   247   248   249   250   251   252   253   254   255   256   257   258   259 
##     5     4     5     7     4    34     6     6    10    63    20     8    11 
##   260   261   262   263   264   265   266   267   268   269   270   271   272 
##     6     6     5     9    14     4    12    53     4     5    19     7     6 
##   273   274   275   276   277   278   279   280   281   282   283   284   285 
##     5    83     7    50    30    10     4    25     4     4    11     4    11 
##   286   287   288   289   290   291   292   293   294   295   296   297   298 
##    13    17    27     6    71    20     4    13     6    12    11     7    74 
##   299   300   301   302   303   304   305   306   307   308   309   310   311 
##     4     7    15     4    30     5     4    14    55    17    10     4     6 
##   312   313   314   315   316   317   318   319   320   321   322   323   324 
##     9     4    86     5    12     9     9     4    24    10     9    34     4 
##   325   326   327   328   329   330   331   332   333   334   335   336   337 
##    34    10     4    11    16     5     4    33    18     6     4     6     6 
##   338   339   340   341   342   343   344   345   346   347   348   349   350 
##    15     4    10    10     5    30     7    11     7    21    18     6    16 
##   351   352   353   354   355   356   357   358   359   360   361   362   363 
##     8     4     5     4    14    10    27    23     5    10    21     3     6 
##   364   365   366   367   368   369   370   371   372   373   374   375   376 
##    15    19    16     8     4     7     9     7     6     5     5    18    14 
##   377   378   379   380   381   382   383   384   385   386   387   388   389 
##     5    21     5    14    17    15     8     9     4    12     4     7    66 
##   390   391   392   393   394   395   396   397   398   399   400   401   402 
##     5    14    17    13     4     4    10    42    17    12     4     8     4 
##   403   404   405   406   407   408   409   410   411   412   413   414   415 
##    37     5     5     5     4     9     5     4     4    10     5     4    13 
##   416   417   418   419   420   421   422   423   424   425   426   427   428 
##     8    13    60     5     4     4     4    16     4     4     9     5     4 
##   429   430   431   432   433   434   435   436   437   438   439   440   441 
##     4     4     8     7     5    12    18     6     5    34   106     6     6 
##   442   443   444   445   446   447   448   449   450   451   452   453   454 
##     9     4     4     5     4     4     4     5     4     4     4     4     5 
##   455   456   457   458   459   460   461   462   463   464   465   466   467 
##    10     4    87     4     5    10     4     7    35     7     4     5     4 
##   468   469   470   471   472   473   474   475   476   477   478   479   480 
##     7     4     5     6     4     5     4     4     8    13     5    37     4 
##   481   482   483   484   485   486   487   488   489   490   491   492   493 
##     4     6     4     6     8     4    21     7    12    10    23    18     5 
##   494   495   496   497   498   499   500   501   502   503   504   505   506 
##     5     4     4     5     4     4     4     4     4     5     8     5     6 
##   507   508   509   510   511   512   513   514   515   516   517   518   519 
##     5     8    17     7     7     8    15     9     9     7     4    19    19 
##   520   521   522   523   524   525   526   527   528   529   530   531   532 
##     4     7     5     4     7     6     6     8     4    32     6     6     6 
##   533   534   535   536   537   538   539   540   541   542   543   544   545 
##     5     5    16    11     4     4     4    16    10     9     4     5     4 
##   546   547   548   549   550   551   552   553   554   555   556   557   558 
##     4     4     5     5     5     5     4     4     6    15     6     4     5 
##   559   560   561   562   563   564   565   566   567   568   569   570   571 
##     4     4    10    14    17     4     9     4     4     4    16     6     6 
##   572   573   574   575   576   577   578   579   580   581   582   583   584 
##     4    28    22     4     9     6     4     4     4     7     7    15    13 
##   585   586   587   588   589   590   591   592   593   594   595   596   597 
##    12     8    45    94     4     5     4     6    46    11     5     5     4 
##   598   599   600   601   602   603   604   605   606   607   608   609   610 
##    10     5    78     7     5     6     6    20     8    18     5     8     8 
##   611   612   613   614   615   616   617   618   619   620   621   622   623 
##     3     4     6     4     4     6     5    18     5     4     8     6     4 
##   624   625   626   627   628   629   630   631   632   633   634   635   636 
##    25     8     7     6     4     4     6     9     5     4     7     6     9 
##   637   638   639   640   641   642   643   644   645   646   647   648   649 
##     4     4     5     4    16     8     9     6    14     4     4     4     5 
##   650   651   652   653   654   655   656   657   658   659   660   661   662 
##     4     4     6     4     4    21     4    19    32     4     5     3     8 
##   663   664   665   666   667   668   669   670   671   672   673   674   675 
##    10     6     8     6     4     4     6     8     4     4     4     7     4 
##   676   677   678   679   680   681   682   683   684   685   686   687   688 
##     4    11    12    13     5     5     5     5     6     5     4     7    12 
##   689   690   691   692   693   694   695   696   697   698   699   700   701 
##    10     7     5     4     4    13     4    10     5     7     4     5     5 
##   702   703   704   705   706   707   708   709   710   711   712   713   714 
##     4     5     6     8     5     4     4     4     4     5     4     6     6 
##   715   716   717   718   719   720   721   722   723   724   725   726   727 
##     9     4    20    11     4     4     5     8     4     4     7     8     8 
##   728   729   730   731   732   733   734   735   736   737   738   739   740 
##     4     5     4     4     7     7     8     4     7     4     4     7     6 
##   741   742   743   744   745   746   747   748   749   750   751   752   753 
##    14     6    10     7     4     4     4    12     4     4    10     4     4 
##   754   755   756   757   758   759   760   761   762   763   764   765   766 
##     4    10     4     4     8     4     4     4     4     7     6     6     4 
##   767   768   769   770   771   772   773   774   775   776   777   778   779 
##     5    12     4     5     6     5    12     5     4     5     3     4     4 
##   780   781   782   783   784   785   786   787   788   789   790   791   792 
##     4     7     4     4     8     4     4     4     4     8     7     4     4 
##   793   794   795   796   797   798   799   800   801   802   803   804   805 
##     5     4     4     4     6    11    15     8     4    20    18     8     4 
##   806   807   808   809   810   811   812   813   814   815   816   817   818 
##     4     4     4     4     4     6     4     4    10     6     6     4     8 
##   819   820   821   822   823   824   825   826   827   828   829   830   831 
##     4     4    13    11     6     3     5     6     5     4     4     4     4 
##   832   833   834   835   836   837   838   839   840   841   842   843   844 
##     4     4     5     4     5     4     6     6     6    18     7     7    15 
##   845   846   847   848   849   850   851   852   853   854   855   856   857 
##     5     4     7     5     7     5    13     4     2     5     6    32     4 
##   858   859   860 
##     5     4     4 
## 
## Available fields: cluster, eps, minPts
  1. Plotting our clusters
hullplot(df21,db$cluster)
## Warning in hullplot(df21, db$cluster): Not enough colors. Some colors will be
## reused.

Challenge 3

Loading the data set

library(data.table)
df3 <- fread("http://bit.ly/MSDBSCANClusteringDataset3")
head(df3)
##           V1         V2
## 1:  0.000000  1.0000000
## 2:  8.622185  1.9357958
## 3: -4.736710 -7.9709577
## 4:  9.621222  0.9254231
## 5:  6.162095 -0.2732544
## 6:  8.697488 -1.0574521
anyNA(df3)
## [1] FALSE
  1. Applying our DBSCAN algorithm. We want minimum 4 points with in a distance of eps(0.4)
db<-dbscan(df3,eps=0.4,MinPts = 4)
## Warning in dbscan(df3, eps = 0.4, MinPts = 4): converting argument MinPts (fpc)
## to minPts (dbscan)!
  1. Printing out the clustering results
print(db)
## DBSCAN clustering for 1001 objects.
## Parameters: eps = 0.4, minPts = 4
## The clustering contains 4 cluster(s) and 66 noise points.
## 
##   0   1   2   3   4 
##  66 310 308 312   5 
## 
## Available fields: cluster, eps, minPts
  1. Plotting our clusters
hullplot(df3,db$cluster)