## # A tibble: 132 × 10
## country description hourlyRate jobSuccess locality name skills title
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 Egypt "let the wor… 25 97% Giza Abee… Pytho… Data…
## 2 Serbia "I am master… 15 97% Belgrade Niko… Data … Rese…
## 3 Philippines "Specializes… 60 100% Manila Rami… Micro… Exce…
## 4 United States "Data Scienc… 500 100% Wyckoff Jaso… Deep … Data…
## 5 Pakistan "\U0001f534\… 25 82% Karachi Muha… Data … Data…
## 6 India "Hello,\nI h… 20 86% Bhopal Prag… Adobe… Prof…
## 7 India "Former Odoo… 35 97% Katch Deve… Pytho… Odoo…
## 8 Pakistan "Expert in M… 60.0 98% Karachi Dr .… Resea… Expe…
## 9 Bangladesh "TOP-RATED D… 30 100% Dhaka Maha… Micro… Meta…
## 10 United States "Welcome to … 45 <NA> Las Cru… Tren… Micro… Data…
## # ℹ 122 more rows
## # ℹ 2 more variables: totalHours <dbl>, totalJobs <dbl>
up=upwork %>%
dplyr::select(c(-description,-title)) %>%
separate(col = skills, into = c("skill1","skill2","skill3","skill4"),sep = "\\|") %>%
mutate(jobSuccess=str_remove(jobSuccess,"\\%"),jobSuccess=as.numeric(jobSuccess),jobSuccess=if_else(jobSuccess>95,"high","low"))
up## # A tibble: 132 × 11
## country hourlyRate jobSuccess locality name skill1 skill2 skill3 skill4
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Egypt 25 high Giza Abee… Python Machi… Scrapy Data …
## 2 Serbia 15 high Belgrade Niko… Data … Googl… Onlin… Lead …
## 3 Philippines 60 high Manila Rami… Micro… Excel… Excel… Visua…
## 4 United Stat… 500 high Wyckoff Jaso… Deep … Python Data … Machi…
## 5 Pakistan 25 low Karachi Muha… Data … Machi… pandas Micro…
## 6 India 20 low Bhopal Prag… Adobe… MATLAB Polit… Python
## 7 India 35 high Katch Deve… Python Odoo … Artif… Odoo
## 8 Pakistan 60.0 high Karachi Dr .… Resea… Medic… Medic… Acade…
## 9 Bangladesh 30 high Dhaka Maha… Micro… Dashb… SQL Table…
## 10 United Stat… 45 <NA> Las Cru… Tren… Micro… Python SQL Data …
## # ℹ 122 more rows
## # ℹ 2 more variables: totalHours <dbl>, totalJobs <dbl>
## # A tibble: 132 × 3
## hourlyRate totalHours totalJobs
## <dbl> <dbl> <dbl>
## 1 25 405 69
## 2 15 354 68
## 3 60 15 351
## 4 500 434 80
## 5 25 4 11
## 6 20 14 74
## 7 35 917 54
## 8 60.0 212 129
## 9 30 347 21
## 10 45 60 2
## # ℹ 122 more rows
impute=imputeLearner("regr.rpart")
imputed=impute(up.num,classes = list(numeric=impute))
up.imp=imputed$data
up.imp## hourlyRate totalHours totalJobs
## 1 25.00 405.00000 69
## 2 15.00 354.00000 68
## 3 60.00 15.00000 351
## 4 500.00 434.00000 80
## 5 25.00 4.00000 11
## 6 20.00 14.00000 74
## 7 35.00 917.00000 54
## 8 59.99 212.00000 129
## 9 30.00 347.00000 21
## 10 45.00 60.00000 2
## 11 15.00 92.22222 12
## 12 60.00 50.00000 24
## 13 25.00 902.00000 160
## 14 100.00 520.00000 138
## 15 25.00 97.00000 2
## 16 20.00 682.00000 67
## 17 25.00 821.00000 45
## 18 14.99 79.00000 32
## 19 40.00 69.00000 159
## 20 45.00 956.00000 37
## 21 30.00 706.00000 76
## 22 40.00 209.00000 28
## 23 65.00 210.00000 29
## 24 7.99 178.00000 364
## 25 35.00 709.00000 165
## 26 10.00 92.22222 26
## 27 150.00 917.00000 60
## 28 10.00 227.00000 39
## 29 5.50 996.00000 209
## 30 18.00 497.00000 96
## 31 30.00 22.00000 3
## 32 30.00 88.00000 486
## 33 15.00 71.00000 31
## 34 30.00 442.00000 15
## 35 30.00 8.00000 51
## 36 38.00 763.00000 124
## 37 40.00 82.00000 12
## 38 5.99 107.00000 158
## 39 22.50 413.00000 49
## 40 20.00 36.00000 3
## 41 20.00 798.00000 353
## 42 10.00 212.00000 18
## 43 4.00 221.00000 45
## 44 20.00 607.00000 284
## 45 25.00 737.00000 64
## 46 30.00 182.00000 55
## 47 25.00 505.00000 38
## 48 30.00 985.00000 11
## 49 36.00 274.00000 44
## 50 28.00 59.00000 14
## 51 10.00 361.00000 176
## 52 29.00 932.00000 51
## 53 5.00 941.00000 146
## 54 40.00 80.00000 16
## 55 30.00 187.00000 244
## 56 35.00 56.00000 54
## 57 20.00 990.00000 44
## 58 6.50 216.00000 398
## 59 70.00 106.47368 1
## 60 15.00 29.00000 2
## 61 10.00 1.00000 4
## 62 20.00 193.00000 19
## 63 20.00 92.22222 1
## 64 45.00 433.00000 18
## 65 4.44 91.00000 408
## 66 125.00 980.00000 66
## 67 7.00 272.00000 6
## 68 20.00 52.00000 6
## 69 20.00 523.00000 99
## 70 25.00 25.00000 52
## 71 35.00 188.00000 22
## 72 25.00 1.00000 37
## 73 60.00 23.00000 19
## 74 7.00 821.00000 136
## 75 20.00 92.22222 2
## 76 25.00 17.00000 2
## 77 5.00 92.22222 2
## 78 38.00 46.00000 16
## 79 30.00 264.00000 30
## 80 50.00 272.00000 6
## 81 25.00 908.00000 38
## 82 20.00 318.00000 12
## 83 35.00 861.00000 26
## 84 12.00 121.00000 100
## 85 50.00 105.00000 13
## 86 7.00 383.00000 195
## 87 15.00 477.00000 54
## 88 30.00 6.00000 6
## 89 50.00 28.00000 11
## 90 45.00 66.00000 12
## 91 55.00 203.00000 64
## 92 25.00 380.00000 92
## 93 18.00 92.22222 1
## 94 25.00 9.00000 14
## 95 35.00 80.00000 10
## 96 50.00 26.00000 4
## 97 16.00 88.00000 8
## 98 125.00 528.00000 80
## 99 20.00 43.00000 5
## 100 30.00 231.19048 29
## 101 25.00 393.00000 148
## 102 120.00 178.00000 7
## 103 75.00 22.00000 3
## 104 45.00 4.00000 29
## 105 50.00 806.00000 148
## 106 50.00 29.00000 33
## 107 60.00 32.00000 42
## 108 40.00 346.00000 424
## 109 25.00 149.00000 22
## 110 10.00 92.22222 9
## 111 14.95 188.00000 270
## 112 20.00 2.00000 9
## 113 11.11 31.00000 28
## 114 17.00 167.00000 7
## 115 15.00 37.00000 21
## 116 33.00 436.00000 73
## 117 18.00 468.00000 89
## 118 77.77 482.00000 261
## 119 25.00 4.00000 6
## 120 25.00 158.00000 16
## 121 22.22 994.00000 3
## 122 3.00 25.00000 192
## 123 30.00 22.00000 13
## 124 30.00 2.00000 28
## 125 21.00 12.00000 3
## 126 35.00 353.00000 88
## 127 30.00 145.00000 11
## 128 10.00 516.00000 129
## 129 15.00 17.00000 5
## 130 6.00 531.00000 219
## 131 50.00 100.00000 24
## 132 20.00 253.00000 160
## Importance of components:
## PC1 PC2 PC3
## Standard deviation 1.1004 1.0164 0.8696
## Proportion of Variance 0.4036 0.3443 0.2520
## Cumulative Proportion 0.4036 0.7480 1.0000
pca.data=up %>%
mutate_if(is.numeric,scale,scale=FALSE ) %>%
mutate(pca1=pca$x[,1],pca2=pca$x[,2])
ggplot(pca.data) +
aes(x = pca1, y = pca2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "pca plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)pca.plot=ggplot(pca.data) +
aes(x = pca1, y = pca2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "pca plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)+
facet_wrap(~country)
plotly::ggplotly(pca.plot)library(Rtsne)
tsne=Rtsne(up.scale,preplexity=5,theta = 0,max_iter = 900)
tsne.data=up %>%
mutate_if(is.numeric,scale,scale=FALSE ) %>%
mutate(tsne1=tsne$Y[,1],tsne2=tsne$Y[,2])
ggplot(tsne.data) +
aes(x = tsne1, y = tsne2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "pca plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)tsne.plot=ggplot(tsne.data) +
aes(x = tsne1, y = tsne2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "tsne plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)+
facet_wrap(~country)
plotly::ggplotly(tsne.plot)library(umap)
umap=umap(up.scale,n_neighbors=5,metric="manhattan",n_epochs=600,min_dist=0.05)
umap.data=up %>%
mutate_if(is.numeric,scale,scale=FALSE) %>%
mutate(umap1=umap$layout[,1],umap2=umap$layout[,2])
umap.plot=ggplot(umap.data) +
aes(x = umap1, y = umap2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "pca plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)+
facet_wrap(~country)
plotly::ggplotly(umap.plot)ggplot(umap.data) +
aes(x = umap1, y = umap2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "umap plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)library(kohonen)
somgrid=somgrid(xdim = 3, ydim = 3, topo = "rectangular",
neighbourhood.fct = "bubble", toroidal = FALSE)
som=supersom(up.scale,somgrid,alpha = c(0.1,0.01),rlen = 2000)
som## SOM of size 3x3 with a rectangular topology.
## Training data included.
type = c("codes", "changes", "counts","dist.neighbours", "mapping", "quality")
purrr::map(type,function(x){
plot(som,type=x,shape="straight")
})## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## [1] 17 4 1 6 18 50 6 10 20
##
## [[4]]
## 1 2 3 4 5 6 7 8
## 3.446503 18.118001 88.790006 4.722470 16.203448 23.714661 7.734587 3.168496
## 9
## 1.915159
##
## [[5]]
## NULL
##
## [[6]]
## [1] 4.070341e-01 7.336615e-01 3.439399e-27 9.602033e-01 2.106950e-01
## [6] 1.328706e-01 5.072100e-01 4.704987e-01 2.256991e-01
###lle
library(Rdimtools)
lle=do.lle(up.scale,2,type=c("knn",5))
lle.data=up %>%
mutate_if(is.numeric,scale,scale=FALSE) %>%
mutate(lle1=lle$Y[,1],lle2=lle$Y[,2])
lle.plot=ggplot(lle.data) +
aes(x = lle1, y = lle2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "pca plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)+
facet_wrap(~country)
plotly::ggplotly(lle.plot)ggplot(lle.data) +
aes(x = lle1, y = lle2, colour = jobSuccess) +
geom_point(shape = "circle", size = 1.5) +
labs(title = "LLE plot in terms of success") +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(size = 15L,
face = "bold",
hjust = 0.5)
)###now for clustering
###kmeans cluustering
library(factoextra)
library(cluster)
library(clValid)
library(stats)
up.scale.df=as.data.frame(up.scale)
#for the optimal amount of cluster
fviz_nbclust(up.scale,FUNcluster =stats::kmeans,method = c("gap_stat"))cluster=clValid(up.scale,nClust = 2:10,clMethods = "kmeans",validation = "internal",metric = "euclidean")
cluster##
## Call:
## clValid(obj = up.scale, nClust = 2:10, clMethods = "kmeans",
## validation = "internal", metric = "euclidean")
##
## Clustering Methods:
## kmeans
##
## Cluster sizes:
## 2 3 4 5 6 7 8 9 10
##
## Validation measures:
## Connectivity Dunn Silhouette
##
## Clustering Methods:
## kmeans
##
## Cluster sizes:
## 2 3 4 5 6 7 8 9 10
##
## Validation Measures:
## 2 3 4 5 6 7 8 9 10
##
## kmeans Connectivity 2.9290 23.6829 36.7937 41.9056 32.5548 34.8214 49.4087 52.2587 62.5762
## Dunn 1.3264 0.0826 0.0592 0.0618 0.1147 0.1147 0.0784 0.0784 0.0459
## Silhouette 0.8123 0.4533 0.5285 0.5320 0.4722 0.4779 0.4598 0.4550 0.3799
##
## Optimal Scores:
##
## Score Method Clusters
## Connectivity 2.9290 kmeans 2
## Dunn 1.3264 kmeans 2
## Silhouette 0.8123 kmeans 2
#thus let us choose the optimal cluster 2
kmean=kmeans(up.scale.df,centers = 2,nstart = 100,iter.max=600)
kmeans=fviz_cluster(kmean,up.scale,repel = F,ellipse.type = "euclid",star.plot=T,geom = "text",show_labels=T) + theme(panel.background = element_rect(fill = "aliceblue"))
plotly::ggplotly(kmeans)##now for hierarchical clustering
#there are two type of hierarchical clustering algorithm mainly
library(cluster)
purrr::map(c("stability","internal"),function(x){
hl=clValid(up.scale.df,nClust = 3:6,clMethods = "agnes",validation = x,method = "complete",metric = "euclidean")
summary(hl)
hl
})##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation Measures:
## 3 4 5 6
##
## agnes APN 0.2044 0.3179 0.1950 0.2499
## AD 1.6996 1.6279 1.3690 1.3284
## ADM 0.6024 0.7734 0.5897 0.5999
## FOM 0.9970 0.9964 0.9890 0.9848
##
## Optimal Scores:
##
## Score Method Clusters
## APN 0.1950 agnes 5
## AD 1.3284 agnes 6
## ADM 0.5897 agnes 5
## FOM 0.9848 agnes 6
##
##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation Measures:
## 3 4 5 6
##
## agnes Connectivity 14.9357 22.1472 30.0139 31.9679
## Dunn 0.1219 0.1332 0.1090 0.1120
## Silhouette 0.5210 0.4426 0.5092 0.4949
##
## Optimal Scores:
##
## Score Method Clusters
## Connectivity 14.9357 agnes 3
## Dunn 0.1332 agnes 4
## Silhouette 0.5210 agnes 3
## [[1]]
##
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes",
## validation = x, metric = "euclidean", method = "complete")
##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation measures:
## APN AD ADM FOM
##
##
## [[2]]
##
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes",
## validation = x, metric = "euclidean", method = "complete")
##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation measures:
## Connectivity Dunn Silhouette
purrr::map(c("stability","internal"),function(x){
h2=clValid(up.scale.df,nClust = 3:6,clMethods = "agnes",validation = x,method = "ward",metric = "euclidean")
summary(h2)
h2
})##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation Measures:
## 3 4 5 6
##
## agnes APN 0.2608 0.2848 0.2774 0.3946
## AD 1.5385 1.3983 1.3119 1.2478
## ADM 0.6838 0.7417 0.7004 0.6864
## FOM 0.9870 0.9854 0.9768 0.9728
##
## Optimal Scores:
##
## Score Method Clusters
## APN 0.2608 agnes 3
## AD 1.2478 agnes 6
## ADM 0.6838 agnes 3
## FOM 0.9728 agnes 6
##
##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation Measures:
## 3 4 5 6
##
## agnes Connectivity 16.7937 20.5849 23.4762 29.0337
## Dunn 0.0965 0.1323 0.1504 0.1758
## Silhouette 0.4688 0.5328 0.5400 0.5218
##
## Optimal Scores:
##
## Score Method Clusters
## Connectivity 16.7937 agnes 3
## Dunn 0.1758 agnes 6
## Silhouette 0.5400 agnes 5
## [[1]]
##
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes",
## validation = x, metric = "euclidean", method = "ward")
##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation measures:
## APN AD ADM FOM
##
##
## [[2]]
##
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes",
## validation = x, metric = "euclidean", method = "ward")
##
## Clustering Methods:
## agnes
##
## Cluster sizes:
## 3 4 5 6
##
## Validation measures:
## Connectivity Dunn Silhouette
library(cluster)
##cluster::agnes
hcut.agg=agnes(up.scale,metric = "euclidean",method = "complete")
h.clust=cutree(hcut.agg,k=3)
fviz_dend(hcut.agg,3,type="phylogenic",show_labels = T)#dicisive clustering
purrr::map(c("stability","internal"),function(x){
diana=clValid(up.scale.df,nClust = 3:10,clMethods = "diana",metric = "euclidean",validation = x)
summary(diana)
diana
})##
## Clustering Methods:
## diana
##
## Cluster sizes:
## 3 4 5 6 7 8 9 10
##
## Validation Measures:
## 3 4 5 6 7 8 9 10
##
## diana APN 0.1546 0.2409 0.1547 0.2955 0.2345 0.3442 0.3424 0.3625
## AD 1.6351 1.5565 1.3353 1.2921 1.1913 1.1487 1.1315 1.1078
## ADM 0.5387 0.7010 0.6198 0.6364 0.6363 0.6554 0.6591 0.6474
## FOM 1.0023 0.9910 0.9871 0.9705 0.9737 0.9740 0.9726 0.9722
##
## Optimal Scores:
##
## Score Method Clusters
## APN 0.1546 diana 3
## AD 1.1078 diana 10
## ADM 0.5387 diana 3
## FOM 0.9705 diana 6
##
##
## Clustering Methods:
## diana
##
## Cluster sizes:
## 3 4 5 6 7 8 9 10
##
## Validation Measures:
## 3 4 5 6 7 8 9 10
##
## diana Connectivity 22.3766 27.0302 30.9976 34.2901 43.7020 45.2405 48.0028 52.3552
## Dunn 0.0978 0.1021 0.1449 0.1529 0.0872 0.1003 0.1016 0.1167
## Silhouette 0.4724 0.4203 0.5147 0.4548 0.4063 0.4275 0.4229 0.4293
##
## Optimal Scores:
##
## Score Method Clusters
## Connectivity 22.3766 diana 3
## Dunn 0.1529 diana 6
## Silhouette 0.5147 diana 5
## [[1]]
##
## Call:
## clValid(obj = up.scale.df, nClust = 3:10, clMethods = "diana",
## validation = x, metric = "euclidean")
##
## Clustering Methods:
## diana
##
## Cluster sizes:
## 3 4 5 6 7 8 9 10
##
## Validation measures:
## APN AD ADM FOM
##
##
## [[2]]
##
## Call:
## clValid(obj = up.scale.df, nClust = 3:10, clMethods = "diana",
## validation = x, metric = "euclidean")
##
## Clustering Methods:
## diana
##
## Cluster sizes:
## 3 4 5 6 7 8 9 10
##
## Validation measures:
## Connectivity Dunn Silhouette
diana.clust=cluster::diana(up.scale.df,metric = "euclidean")
diana.cut=cutree(diana.clust,k=3)
fviz_dend(diana.clust,k=3,type = "phylogenic")#model based clustering
#automated
library(mclust)
mc=Mclust(up.scale.df)
#6 cluster found 6
summary(mc)## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VVI (diagonal, varying volume and shape) model with 6 components:
##
## log-likelihood n df BIC ICL
## -260.6433 132 41 -721.4815 -747.9398
##
## Clustering table:
## 1 2 3 4 5 6
## 35 23 6 22 19 27
mc.clust=cbind(pca.data,cluster=factor(mc$classification))
ggplot(mc.clust) +
aes(x = pca1, y = pca2, colour = cluster) +
geom_point(shape = "circle", size = 1.5) +
scale_color_hue(direction = 1) +
theme_minimal()mc.optimal=clValid(up.scale.df,nClust =2:10,clMethods = "model",validation = "stability")
summary(mc.optimal)##
## Clustering Methods:
## model
##
## Cluster sizes:
## 2 3 4 5 6 7 8 9 10
##
## Validation Measures:
## 2 3 4 5 6 7 8 9 10
##
## model APN 0.1678 0.3032 0.2423 0.3236 0.3689 0.3055 0.3228 NA NA
## AD 1.5960 1.5588 1.3499 1.3522 1.2660 1.2455 1.2078 NA NA
## ADM 0.5478 0.7527 0.6713 0.7283 0.7567 0.7874 0.7516 NA NA
## FOM 0.9821 0.9718 0.9542 0.9607 0.9460 0.9771 0.9809 NA NA
##
## Optimal Scores:
##
## Score Method Clusters
## APN 0.1678 model 2
## AD 1.2078 model 8
## ADM 0.5478 model 2
## FOM 0.9460 model 6
## [[1]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 4
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
##
## 0 1
## 1 131
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[2]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 5
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
##
## 0 1
## 1 131
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[3]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 6
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
##
## 0 1
## 1 131
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[4]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 7
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
##
## 0 1
## 1 131
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[5]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 8
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 2 noise points.
##
## 0 1
## 2 130
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[6]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 9
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 2 noise points.
##
## 0 1
## 2 130
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[7]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 10
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 2 noise points.
##
## 0 1
## 2 130
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[8]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 11
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[9]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 12
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[10]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 13
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[11]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 14
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[12]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 15
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[13]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 16
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[14]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 17
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[15]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 18
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[16]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 19
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[17]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 20
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[18]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 21
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
##
## 0 1
## 3 129
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[19]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 22
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
##
## 0 1
## 7 125
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[20]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 23
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
##
## 0 1
## 7 125
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[21]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 24
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
##
## 0 1
## 7 125
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[22]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 25
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
##
## 0 1
## 7 125
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[23]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 26
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
##
## 0 1
## 7 125
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[24]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 27
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
##
## 0 1
## 7 125
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[25]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 28
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
##
## 0 1
## 8 124
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[26]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 29
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
##
## 0 1
## 8 124
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[27]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 30
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
##
## 0 1
## 8 124
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[28]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 31
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
##
## 0 1
## 8 124
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[29]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 32
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
##
## 0 1
## 8 124
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[30]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 33
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 9 noise points.
##
## 0 1
## 9 123
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[31]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 34
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 9 noise points.
##
## 0 1
## 9 123
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[32]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 35
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 9 noise points.
##
## 0 1
## 9 123
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[33]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 36
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
##
## 0 1
## 10 122
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[34]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 37
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
##
## 0 1
## 10 122
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[35]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 38
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
##
## 0 1
## 10 122
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[36]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 39
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
##
## 0 1
## 10 122
##
## Available fields: cluster, eps, minPts, dist, borderPoints
##
## [[37]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 40
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
##
## 0 1
## 10 122
##
## Available fields: cluster, eps, minPts, dist, borderPoints
## dbscan Pts=132 MinPts=4 eps=1.8
## 0 1
## border 1 1
## seed 0 130
## total 1 131
## [[1]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 2
## The clustering contains 38 cluster(s) and 23 noise points.
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 23 2 2 5 2 3 3 2 7 3 2 5 2 2 3 2 3 2 3 2 2 2 2 3 2 2
## 26 27 28 29 30 31 32 33 34 35 36 37 38
## 3 4 2 3 2 3 5 2 2 2 2 3 8
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
##
## [[2]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 3
## The clustering contains 13 cluster(s) and 54 noise points.
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 54 3 3 3 12 4 5 9 14 7 5 3 3 7
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
##
## [[3]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 4
## The clustering contains 2 cluster(s) and 28 noise points.
##
## 0 1 2
## 28 13 91
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
##
## [[4]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 5
## The clustering contains 3 cluster(s) and 51 noise points.
##
## 0 1 2 3
## 51 11 63 7
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
##
## [[5]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 6
## The clustering contains 3 cluster(s) and 55 noise points.
##
## 0 1 2 3
## 55 10 61 6
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
##
## [[6]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 7
## The clustering contains 2 cluster(s) and 35 noise points.
##
## 0 1 2
## 35 10 87
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
##
## [[7]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 8
## The clustering contains 2 cluster(s) and 35 noise points.
##
## 0 1 2
## 35 10 87
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
##
## [[8]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 9
## The clustering contains 2 cluster(s) and 34 noise points.
##
## 0 1 2
## 34 10 88
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 4
## The clustering contains 2 cluster(s) and 28 noise points.
##
## 0 1 2
## 28 13 91
##
## Available fields: cluster, minPts, coredist, cluster_scores,
## membership_prob, outlier_scores, hc
## country hourlyRate jobSuccess locality
## 1 Egypt -9.96174242 high Giza
## 2 Serbia -19.96174242 high Belgrade
## 3 Philippines 25.03825758 high Manila
## 4 United States 465.03825758 high Wyckoff
## 5 Pakistan -9.96174242 low Karachi
## 6 India -14.96174242 low Bhopal
## 7 India 0.03825758 high Katch
## 8 Pakistan 25.02825758 high Karachi
## 9 Bangladesh -4.96174242 high Dhaka
## 10 United States 10.03825758 <NA> Las Cruces
## 11 Pakistan -19.96174242 high Karachi
## 12 Egypt 25.03825758 low Aswan
## 13 India -9.96174242 low New Delhi
## 14 United States 65.03825758 high Edison
## 15 Ethiopia -9.96174242 <NA> Addis Ababa
## 16 Pakistan -14.96174242 high Gujrat
## 17 India -9.96174242 low New Delhi
## 18 Nigeria -19.97174242 low Lekki Peninsula
## 19 Finland 5.03825758 low Oulu
## 20 Pakistan 10.03825758 low Peshawar
## 21 India -4.96174242 low Delhi
## 22 India 5.03825758 high Noida
## 23 Cyprus 30.03825758 high Limassol
## 24 India -26.97174242 high Mohali
## 25 India 0.03825758 low New Delhi
## 26 Pakistan -24.96174242 low Lahore
## 27 Turkey 115.03825758 low Istanbul
## 28 India -24.96174242 high New Delhi
## 29 Pakistan -29.46174242 high Bahawalpur
## 30 <NA> -16.96174242 high Elk Grove Village
## 31 Pakistan -4.96174242 <NA> Karachi
## 32 Egypt -4.96174242 high Cairo
## 33 India -19.96174242 high Indore
## 34 Pakistan -4.96174242 high Bahawalpur
## 35 Pakistan -4.96174242 high Lahore
## 36 Pakistan 3.03825758 high Karachi
## 37 Egypt 5.03825758 high Zagazig
## 38 Bangladesh -28.97174242 high Sylhet
## 39 India -12.46174242 high Bhutli
## 40 Pakistan -14.96174242 <NA> Karachi
## 41 <NA> -14.96174242 high Stone Mountain
## 42 Pakistan -24.96174242 high Faisalabad
## 43 Pakistan -30.96174242 low Rajanpur
## 44 Bangladesh -14.96174242 high Dhaka
## 45 India -9.96174242 high Gurgaon
## 46 Pakistan -4.96174242 high Karachi
## 47 India -9.96174242 high Mohali
## 48 India -4.96174242 high Vadodara
## 49 Pakistan 1.03825758 high Islamabad
## 50 India -6.96174242 high Ghaziabad
## 51 Pakistan -24.96174242 high Islamabad
## 52 India -5.96174242 high Jaipur
## 53 Bangladesh -29.96174242 high Khulna
## 54 Germany 5.03825758 high Igersheim
## 55 Australia -4.96174242 low Holroyd
## 56 Bangladesh 0.03825758 high Dhaka
## 57 Saudi Arabia -14.96174242 high Riyadh
## 58 Bangladesh -28.46174242 low Rajshahi
## 59 Pakistan 35.03825758 <NA> Lahore
## 60 Nigeria -19.96174242 high Port Harcourt
## 61 Bangladesh -24.96174242 <NA> Brahmanbaria
## 62 Pakistan -14.96174242 high Quetta
## 63 Latvia -14.96174242 <NA> Riga
## 64 India 10.03825758 high Bengaluru
## 65 India -30.52174242 high Coimbatore, Tamil Nadu
## 66 <NA> 90.03825758 high Clovis
## 67 Philippines -27.96174242 high Mandaluyong City
## 68 Pakistan -14.96174242 high Lahore
## 69 Pakistan -14.96174242 low Peshawar
## 70 Pakistan -9.96174242 high Lahore
## 71 Pakistan 0.03825758 high Quetta
## 72 Pakistan -9.96174242 high Islamabad
## 73 Bangladesh 25.03825758 high Dhaka
## 74 Pakistan -27.96174242 high Allah Abad
## 75 Israel -14.96174242 <NA> Qiryat Motsqin
## 76 Egypt -9.96174242 <NA> Alexandria
## 77 Egypt -29.96174242 <NA> Shubra al Khaymah
## 78 Algeria 3.03825758 low Bourkika
## 79 Pakistan -4.96174242 high Chashma, Mianwali
## 80 India 15.03825758 high Delhi
## 81 India -9.96174242 high New Delhi
## 82 France -14.96174242 low Velizy-Villacoublay
## 83 India 0.03825758 high Bathinda
## 84 Pakistan -22.96174242 low Rawalpindi
## 85 Morocco 15.03825758 low Casablanca
## 86 Pakistan -27.96174242 high Haveli Lakha Okara
## 87 Pakistan -19.96174242 high Lahore
## 88 Egypt -4.96174242 high Cairo
## 89 Algeria 15.03825758 low Algiers
## 90 United States 10.03825758 high Leesburg
## 91 Brazil 20.03825758 high Sao Paulo
## 92 India -9.96174242 low Chandigarh
## 93 Brazil -16.96174242 <NA> Recife
## 94 Egypt -9.96174242 high Cairo
## 95 Pakistan 0.03825758 low Karachi
## 96 India 15.03825758 high Hyderabad
## 97 Yemen -18.96174242 high Taiz
## 98 United States 90.03825758 high Chicago
## 99 Pakistan -14.96174242 high Karachi
## 100 Bangladesh -4.96174242 low Mymensingh
## 101 Philippines -9.96174242 low Cagayan de Oro City
## 102 United States 85.03825758 high Chicago
## 103 United States 40.03825758 high Berkeley
## 104 Bangladesh 10.03825758 high Dhaka
## 105 United States 15.03825758 high Lagrangeville
## 106 Egypt 15.03825758 high Cairo
## 107 Ukraine 25.03825758 high Kharkiv
## 108 Pakistan 5.03825758 low Sialkot
## 109 Nepal -9.96174242 low Kathmandu
## 110 Egypt -24.96174242 high Alexandria
## 111 Bangladesh -20.01174242 high Khulna
## 112 Egypt -14.96174242 low Cairo
## 113 Pakistan -23.85174242 low Bahawalpur
## 114 Pakistan -17.96174242 <NA> Lahore
## 115 India -19.96174242 low Bengaluru
## 116 <NA> -1.96174242 high Fort Pierce
## 117 Bangladesh -16.96174242 high Bogra
## 118 Canada 42.80825758 high Vancouver
## 119 Pakistan -9.96174242 low Multan
## 120 Pakistan -9.96174242 high Lahore
## 121 Mexico -12.74174242 <NA> Tijuana
## 122 Pakistan -31.96174242 low Quetta
## 123 Georgia -4.96174242 high Tbilisi
## 124 Nigeria -4.96174242 high Okitipupa
## 125 Pakistan -13.96174242 high Lahore
## 126 Kenya 0.03825758 high Nairobi
## 127 India -4.96174242 high Noida
## 128 Pakistan -24.96174242 low Lodhran
## 129 Argentina -19.96174242 high Buenos Aires
## 130 Bangladesh -28.96174242 high Jessore
## 131 Morocco 15.03825758 high Casablanca
## 132 India -14.96174242 high Dehradun
## name
## 1 Abeer K.
## 2 Nikola M.
## 3 Ramil, Jr. A.
## 4 Jason K.
## 5 Muhammad Noman B.
## 6 Pragyesh S.
## 7 Devendra R.
## 8 Dr . M Mansoor M.
## 9 Mahabubur R.
## 10 Trenton S.
## 11 Fasih U.
## 12 Islam A.
## 13 Ajay B.
## 14 Shankar M.
## 15 Yabebal F.
## 16 Wasi U.
## 17 Uttam A.
## 18 Emmanuel O.
## 19 Doreen B.
## 20 Muhammad Ameer H.
## 21 Mani V.
## 22 Nidhi B.
## 23 Nick V.
## 24 Sakshi B.
## 25 Rajveer S.
## 26 Muhammad N.
## 27 Pavel A.
## 28 Snehit D.
## 29 Muhammad Gulraiz H.
## 30 Muhammad U.
## 31 Aveen F.
## 32 Mahmoud D.
## 33 Parv Y.
## 34 Junaid U.
## 35 Zeeshan A.
## 36 Sana S.
## 37 Zeyad M.
## 38 Bashir A.
## 39 Ajit T.
## 40 Zain A.
## 41 Mfonabasi I.
## 42 Ahmad I.
## 43 Rajab S.
## 44 Md Omor Faruk B.
## 45 Noushad A.
## 46 Nabeel F.
## 47 vikas k.
## 48 Rutvik P.
## 49 Malik Haseeb H.
## 50 Varun D.
## 51 Muhammad M.
## 52 Pratik D.
## 53 Eishwar c.
## 54 Marvin S.
## 55 Fahad A.
## 56 MD. Shahriar Al Kasib Khan S.
## 57 Ahmed o.
## 58 Sumy A.
## 59 Aamir I.
## 60 Ugonna B.
## 61 Ashraful Islam E.
## 62 Bakht M.
## 63 Ebrahim A.
## 64 Arjun V.
## 65 Naveen K.
## 66 Michael E.
## 67 Marlon B.
## 68 Ishtiaque A.
## 69 Syeda F.
## 70 Ali A.
## 71 Abdul R.
## 72 Khan Bahadar K.
## 73 Muhammad Sarwar Jahan M.
## 74 Abdul W.
## 75 Aleksei Z.
## 76 Ahmed S.
## 77 Yousef W.
## 78 Madjid E.
## 79 Muhammad Faheem S.
## 80 Ishaan N.
## 81 Priyanka R.
## 82 yassine g.
## 83 Rohit K.
## 84 Faizan Z.
## 85 Ismail H.
## 86 Zahida A.
## 87 Soban A.
## 88 Salma H.
## 89 Ahlem K.
## 90 Dave C.
## 91 Paulo O.
## 92 Shubham S.
## 93 Sergio P.
## 94 Mohamed A.
## 95 Arif S.
## 96 K R.
## 97 Mohammed A.
## 98 James S.
## 99 Faiz R.
## 100 Musabbir Ahmed A.
## 101 Roy L.
## 102 Chris H.
## 103 Matthew S.
## 104 Khan M.
## 105 Terrence C.
## 106 Ehab K.
## 107 Konstantin B.
## 108 Ehtisham R.
## 109 Manish A.
## 110 Hossam H.
## 111 Sk. Shahidul I.
## 112 Nesma A.
## 113 Abdul M.
## 114 AILYAH .
## 115 Sahaj G.
## 116 Misty M.
## 117 Md Mujammel H.
## 118 Robert O.
## 119 Muhammad Umer F.
## 120 Muhammad Junaid A.
## 121 Amaury H.
## 122 Faizan S.
## 123 Luka S.
## 124 Olusegun F.
## 125 Meer Naib K.
## 126 Dennis M.
## 127 Virender B.
## 128 Ali Y.
## 129 Matias W.
## 130 Antu C.
## 131 Adnane R.
## 132 Parthsarthi D.
## skill1
## 1 Python
## 2 Data Entry
## 3 Microsoft Excel
## 4 Deep Learning
## 5 Data Science
## 6 Adobe Illustrator
## 7 Python
## 8 Research Paper Writing
## 9 Microsoft Power BI
## 10 Microsoft SQL Server
## 11 Apache Spark
## 12 Microsoft Power BI
## 13 Python
## 14 Python
## 15 Python
## 16 Scrapy
## 17 Python
## 18 Data Entry
## 19 Accounting
## 20 Microsoft Excel
## 21 Python
## 22 Python
## 23 Tableau
## 24 SEO Writing
## 25 Data Visualization
## 26 Python
## 27 Data Science
## 28 Python
## 29 Data Cleaning
## 30 Market Research
## 31 pandas
## 32 Microsoft Power BI
## 33 Python
## 34 Data Engineering
## 35 Python
## 36 Presentation Design
## 37 OpenCV
## 38 Data Extraction
## 39 Google Sheets
## 40 Apache Spark
## 41 Microsoft Word
## 42 Python
## 43 Email Marketing
## 44 Web Scraping
## 45 Data Mining
## 46 Data Visualization
## 47 Java
## 48 Tableau
## 49 Artificial Neural Network
## 50 SQL
## 51 Microsoft Office
## 52 Neo4j
## 53 List Building
## 54 Python
## 55 WordPress
## 56 Machine Learning
## 57 Synthetic Chemistry
## 58 Data Scraping
## 59 RStudio
## 60 Microsoft Office
## 61 Microsoft Excel PowerPivot
## 62 Coding Lesson
## 63 Databricks Platform
## 64 Looker
## 65 Real Estate
## 66 Microsoft PowerPoint
## 67 Data Entry
## 68 Tableau
## 69 Search Engine Optimization Close the tooltip \n Search engine optimization (SEO) is the process of improving the visibility of a website or a web page in search engines via the "natural" or un-paid ("organic" or "algorithmic") search results.\n \n wikipedia.org
## 70 Machine Learning
## 71 Machine Learning
## 72 Python
## 73 Python
## 74 Data Entry
## 75 R
## 76 Data Visualization
## 77 Microsoft Power BI
## 78 Data Processing
## 79 Python
## 80 Python
## 81 Machine Learning
## 82 Microsoft Power BI
## 83 PostgreSQL
## 84 WordPress
## 85 Keras
## 86 Social Media Lead Generation
## 87 Deep Neural Network
## 88 Python
## 89 Agile Project Management
## 90 Python
## 91 Python
## 92 pandas
## 93 Microsoft Power BI
## 94 Data Science
## 95 Data Science
## 96 Scala
## 97 Data Analysis
## 98 Data Analysis
## 99 Python
## 100 Deep Learning
## 101 Microsoft SQL Server
## 102 Product Management
## 103 R
## 104 Data Extraction
## 105 Linear Regression
## 106 Python Scikit-Learn
## 107 Amazon Web Services
## 108 Microsoft Excel
## 109 Project Plans
## 110 Microsoft Power BI
## 111 Amazon
## 112 Deep Neural Network
## 113 Python
## 114 Google Sheets
## 115 Data Science
## 116 Data Entry
## 117 Influencer Marketing
## 118 Search Engine Optimization
## 119 Exploratory Data Analysis
## 120 TypeScript
## 121 Data Science
## 122 List Building
## 123 Python
## 124 Python
## 125 Python Script
## 126 Data Science
## 127 SQL
## 128 Online Research
## 129 Python
## 130 Data Entry
## 131 Python
## 132 Graphic Design
## skill2
## 1 Machine Learning
## 2 Google Docs
## 3 Excel Formula
## 4 Python
## 5 Machine Learning
## 6 MATLAB
## 7 Odoo Development
## 8 Medical Writing
## 9 Dashboard
## 10 Python
## 11 Data Science
## 12 Python
## 13 Full-Stack Development
## 14 Data Science
## 15 SQL
## 16 Selenium
## 17 JavaScript
## 18 Microsoft Excel
## 19 Academic Research
## 20 Microsoft Power BI
## 21 Machine Learning Framework
## 22 Computer Vision
## 23 Looker Studio
## 24 Search Engine Optimization
## 25 Looker Studio
## 26 Data Scraping
## 27 Deep Learning
## 28 Scrapy
## 29 Lead Generation Strategy
## 30 Lead Generation
## 31 Machine Learning
## 32 Microsoft Excel
## 33 Lead Generation
## 34 Python
## 35 Selenium
## 36 Data Entry
## 37 R
## 38 Email List
## 39 Presentations
## 40 Informatica
## 41 Microsoft Excel
## 42 TensorFlow
## 43 Data Cleaning
## 44 Selenium
## 45 Scrapy
## 46 SQL
## 47 Ruby on Rails
## 48 R
## 49 Python
## 50 Looker
## 51 Social Media Lead Generation
## 52 Oracle PLSQL
## 53 Lead Generation
## 54 Matplotlib
## 55 Microsoft Visio
## 56 Microsoft Power BI
## 57 Product Formulation
## 58 LinkedIn Sales Navigator
## 59 Tableau
## 60 Google Docs
## 61 Data Science
## 62 Software Development
## 63 Classification
## 64 Data Visualization
## 65 List Building
## 66 Dashboard
## 67 Data Scraping
## 68 R
## 69 Biology Close the tooltip \n Biology is a natural science concerned with the study of life and living organisms, including their structure, function, growth, evolution, distribution, and taxonomy.\n \n wikipedia.org
## 70 Scrapy
## 71 Deep Learning
## 72 Technical Writing
## 73 Feature Extraction
## 74 Lead Generation
## 75 Python
## 76 Python
## 77 Python
## 78 SQL
## 79 Apache Spark
## 80 Microsoft Excel
## 81 Elasticsearch
## 82 Data Science
## 83 TensorFlow
## 84 Blockchain
## 85 Machine Learning
## 86 Google Sheets
## 87 Computer Vision
## 88 Machine Learning
## 89 Machine Learning
## 90 React
## 91 SQL
## 92 SQL
## 93 Data Analysis
## 94 Python
## 95 ChatGPT
## 96 Python
## 97 Data Scraping
## 98 R
## 99 SQL
## 100 Model Optimization
## 101 Database Design
## 102 Project Management
## 103 Data Visualization
## 104 Python
## 105 IBM SPSS
## 106 NumPy
## 107 Google Cloud Platform
## 108 AppleScript
## 109 Budget Management
## 110 Seaborn
## 111 Amazon FBA
## 112 Apache Spark
## 113 SQL
## 114 Python
## 115 SQL
## 116 List Building
## 117 Online Research
## 118 Sales
## 119 Machine Learning
## 120 JavaScript
## 121 Python
## 122 Data Entry
## 123 NumPy
## 124 Linear Regression
## 125 Data Analysis
## 126 Python
## 127 Python
## 128 Quantitative Research
## 129 Matplotlib
## 130 Data Extraction
## 131 Business Process Automation
## 132 YouTube Marketing
## skill3 skill4 totalHours
## 1 Scrapy Data Science 98.29268
## 2 Online Research Lead Generation 47.29268
## 3 Excel Macros Visual Basic for Applications -291.70732
## 4 Data Science Machine Learning 127.29268
## 5 pandas Microsoft Power BI -302.70732
## 6 Political Science Python -292.70732
## 7 Artificial Intelligence Odoo 610.29268
## 8 Medical Editing Academic Research -94.70732
## 9 SQL Tableau 40.29268
## 10 SQL Data Analysis -246.70732
## 11 Data Visualization Python NA
## 12 SQL Data Scraping -256.70732
## 13 Back-End Development Front-End Development 595.29268
## 14 Chief Architect Google Cloud Platform 213.29268
## 15 Amazon Web Services GitHub -209.70732
## 16 Web Scraping Python 375.29268
## 17 Dashboard Looker Studio 514.29268
## 18 Problem Solving Google Analytics -227.70732
## 19 Health Science Psychology -237.70732
## 20 Data Visualization Looker Studio 649.29268
## 21 Artificial Intelligence React 399.29268
## 22 Natural Language Processing Python Scikit-Learn -97.70732
## 23 SQL Business Intelligence -96.70732
## 24 Meeting Notes PHP -128.70732
## 25 Google Sheets JavaScript 402.29268
## 26 List Building Lead Generation NA
## 27 Natural Language Generation Natural Language Processing 610.29268
## 28 MySQL Python Script -79.70732
## 29 Lead Generation Data Entry 689.29268
## 30 Data Entry List Building 190.29268
## 31 Python Data Science -284.70732
## 32 Business Analysis Excel Macros -218.70732
## 33 Looker Studio Data Science -235.70732
## 34 Data Integration SQL 135.29268
## 35 Bot Development Data Science -298.70732
## 36 Microsoft Office Infographic 456.29268
## 37 Python Scikit-Learn Feature Extraction -224.70732
## 38 Data Entry List Building -199.70732
## 39 Microsoft Excel Data Visualization 106.29268
## 40 SQL Python -270.70732
## 41 Data Entry Google Docs 491.29268
## 42 Python Scikit-Learn Deep Learning -94.70732
## 43 Lead Generation Salesforce CRM -85.70732
## 44 Data Scraping Beautiful Soup 300.29268
## 45 Python Data Extraction 430.29268
## 46 Apache Hive BigQuery -124.70732
## 47 Node.js Vue.js 198.29268
## 48 Python Microsoft Power BI 678.29268
## 49 Machine Learning Deep Learning -32.70732
## 50 Microsoft Power BI Business Intelligence -247.70732
## 51 Data Entry Lead Generation 54.29268
## 52 Content Writing Jakarta Server Pages 625.29268
## 53 Copy & Paste Data Entry 634.29268
## 54 Plotly Data Analysis -226.70732
## 55 Microsoft Office Business Process Modeling -119.70732
## 56 SQL Python -250.70732
## 57 Inorganic Chemistry Cosmetics 683.29268
## 58 Data Collection Database Management -90.70732
## 59 Data Analysis Microsoft Excel NA
## 60 Data Entry Online Research -277.70732
## 61 Data Analysis Financial Analysis -305.70732
## 62 Technology Tutoring Web Application -113.70732
## 63 Java Apache Spark NA
## 64 Machine Learning Model Domo 126.29268
## 65 Data Entry Lead Generation -215.70732
## 66 Data Visualization Tableau 673.29268
## 67 Bookkeeping Cost Accounting -34.70732
## 68 Data Visualization SQL -254.70732
## 69 Editing & Proofreading Citation Style 216.29268
## 70 SQL Python -281.70732
## 71 Python Data Science -118.70732
## 72 Academic Research Research Proposals -305.70732
## 73 Deep Learning Natural Language Processing -283.70732
## 74 List Building Google Docs 514.29268
## 75 Microsoft Power BI Data Analysis NA
## 76 SQL Tableau -289.70732
## 77 Microsoft Excel Data Analysis NA
## 78 Python Tableau -260.70732
## 79 Machine Learning Data Science -42.70732
## 80 Statistics Data Science -34.70732
## 81 MongoDB Kibana 601.29268
## 82 Microsoft Excel Data Mining 11.29268
## 83 Django NumPy 554.29268
## 84 Data Scraping Data Mining -185.70732
## 85 Python Apache Spark -201.70732
## 86 Report Social Media Marketing 76.29268
## 87 Python PyTorch 170.29268
## 88 pandas Python Scikit-Learn -300.70732
## 89 Data Analysis Scrum -278.70732
## 90 Node.js Mobile App Development -240.70732
## 91 Microsoft Power BI Data Analysis -103.70732
## 92 Python Matplotlib 73.29268
## 93 Machine Learning Matplotlib NA
## 94 Climate Science Data Scraping -297.70732
## 95 Angular .NET Core -226.70732
## 96 Data Science Machine Learning -280.70732
## 97 Browser Automation Python -218.70732
## 98 Stata Statistics 221.29268
## 99 Amazon SageMaker Natural Language Processing -263.70732
## 100 Computer Vision Data Science NA
## 101 Database Administration SQL Server Integration Services 86.29268
## 102 Machine Learning Big Data -128.70732
## 103 Python Scientific Research -284.70732
## 104 Google Sheets Automation -302.70732
## 105 Machine Learning Data Visualization 499.29268
## 106 Deep Learning Natural Language Processing -277.70732
## 107 Docker Kubernetes -274.70732
## 108 Zapier Google Sheets 39.29268
## 109 Agile Project Management Technical Project Management -157.70732
## 110 Microsoft Excel Machine Learning NA
## 111 Amazon Seller Central Amazon Wholesale -118.70732
## 112 Java Python -304.70732
## 113 Data Analysis Data Analytics -275.70732
## 114 Data Mining Django -139.70732
## 115 Machine Learning Model Python -269.70732
## 116 Company Research Database Management 129.29268
## 117 Twitter/X Ads Manager WordPress 161.29268
## 118 Copywriting Conversion Rate Optimization 175.29268
## 119 WooCommerce Interactive Data Visualization -302.70732
## 120 Django Angular -148.70732
## 121 Machine Learning Deep Learning 687.29268
## 122 Lead Generation LinkedIn Recruiting -281.70732
## 123 pandas MySQL -284.70732
## 124 Mathematics Tableau -304.70732
## 125 Jupyter Notebook MySQL -294.70732
## 126 Model Optimization Apache Spark 46.29268
## 127 JavaScript Elasticsearch -161.70732
## 128 Market Research Competitive Analysis 209.29268
## 129 Plotly SQL -289.70732
## 130 List Building Data Scraping 224.29268
## 131 UiPath JavaScript -206.70732
## 132 Social Media Marketing Strategy Data Science -53.70732
## totalJobs pca1 pca2 cluster
## 1 -5.7121212 -0.1904780785 0.11902678 2
## 2 -6.7121212 -0.0222205774 0.32776170 2
## 3 276.2878788 -1.2609284683 0.74672602 0
## 4 5.2878788 -2.2092244335 -8.91026876 0
## 5 -63.7121212 1.1536535877 0.05999681 2
## 6 -0.7121212 0.7325519907 0.40289790 2
## 7 -20.7121212 -1.3568555211 -0.35234145 1
## 8 54.2878788 -0.2635653195 -0.22539945 0
## 9 -53.7121212 0.2463103139 -0.14322740 2
## 10 -72.7121212 1.0000923627 -0.38161966 2
## 11 -62.7121212 0.9752671607 0.21672641 2
## 12 -50.7121212 0.8191959657 -0.57549742 2
## 13 85.2878788 -1.9828896482 0.26859717 0
## 14 63.2878788 -1.2187814053 -1.08536355 0
## 15 -72.7121212 0.9904956137 -0.01605936 2
## 16 -7.7121212 -0.8208487469 0.08703093 1
## 17 -29.7121212 -1.0278917203 -0.15619199 1
## 18 -42.7121212 0.8746199792 0.30256287 2
## 19 84.2878788 -0.0406651275 0.33746507 0
## 20 -37.7121212 -1.3772593820 -0.62785742 1
## 21 1.2878788 -0.9773872428 -0.07808896 1
## 22 -46.7121212 0.4909638748 -0.24652552 2
## 23 -45.7121212 0.3831881443 -0.71984167 2
## 24 289.2878788 -1.5318411633 1.72055081 0
## 25 90.2878788 -1.5932871918 0.18100746 0
## 26 -48.7121212 0.9023739883 0.36806608 2
## 27 -14.7121212 -1.8508724051 -2.52201699 0
## 28 -35.7121212 0.4935804319 0.36194722 2
## 29 134.2878788 -2.4552254966 0.79592582 0
## 30 21.2878788 -0.5618196906 0.32083397 2
## 31 -71.7121212 1.1437350708 -0.07511604 2
## 32 411.2878788 -2.2106031904 1.82716936 0
## 33 -43.7121212 0.9003564016 0.30182248 2
## 34 -59.7121212 0.0585100666 -0.20815340 2
## 35 -23.7121212 0.8596193084 0.12279119 2
## 36 49.2878788 -1.4631377939 -0.06338274 1
## 37 -62.7121212 0.9009839225 -0.25575072 2
## 38 83.2878788 0.0093078830 0.96584202 2
## 39 -25.7121212 -0.0674088978 0.08331993 2
## 40 -71.7121212 1.1497132779 0.10960345 2
## 41 278.2878788 -2.9912673804 1.18028635 0
## 42 -56.7121212 0.6684711413 0.28446697 2
## 43 -29.7121212 0.4919470539 0.50296904 2
## 44 209.2878788 -2.0772496876 0.98678244 0
## 45 -10.7121212 -0.9524625147 -0.04404083 1
## 46 -19.7121212 0.4164545797 0.06379255 2
## 47 -36.7121212 -0.2248132923 -0.04798827 2
## 48 -63.7121212 -1.2153938650 -0.45815750 1
## 49 -30.7121212 0.2452233129 -0.13427920 2
## 50 -60.7121212 0.9902355267 -0.00894013 2
## 51 101.2878788 -0.7339280102 0.85183729 2
## 52 -23.7121212 -1.3492215682 -0.25634623 1
## 53 71.2878788 -1.9046301417 0.57733237 0
## 54 -58.7121212 0.8793032722 -0.23889931 2
## 55 169.2878788 -0.8462383383 0.81714044 0
## 56 -20.7121212 0.7050637488 0.01872041 2
## 57 -30.7121212 -1.4062419921 -0.13764613 1
## 58 323.2878788 -1.8419541681 1.86850683 0
## 59 -73.7121212 0.7966515607 -0.88252817 2
## 60 -72.7121212 1.1928470845 0.20399936 2
## 61 -70.7121212 1.2664189235 0.31943766 2
## 62 -55.7121212 0.6678493532 0.10589967 2
## 63 -73.7121212 1.0283076517 0.07737884 2
## 64 -56.7121212 0.0009525236 -0.47841213 2
## 65 333.2878788 -1.6006421838 2.00164641 0
## 66 -8.7121212 -1.9426865359 -2.04830109 0
## 67 -68.7121212 0.6160457185 0.26786648 2
## 68 -68.7121212 1.0915438619 0.11470010 2
## 69 24.2878788 -0.6518381355 0.28347035 2
## 70 -22.7121212 0.8320428924 0.21483864 2
## 71 -52.7121212 0.6007126267 -0.16608293 2
## 72 -37.7121212 0.9887814018 0.16522130 2
## 73 -55.7121212 0.9169432577 -0.58384818 2
## 74 61.2878788 -1.5589800984 0.55092402 0
## 75 -72.7121212 1.0216900911 0.08137621 2
## 76 -72.7121212 1.1820792857 0.01841793 2
## 77 -72.7121212 1.0809481156 0.36750574 2
## 78 -58.7121212 0.9686274027 -0.18609586 2
## 79 -44.7121212 0.3855203290 -0.07148089 2
## 80 -68.7121212 0.4461727150 -0.55237152 2
## 81 -36.7121212 -1.1899160400 -0.22166763 1
## 82 -62.7121212 0.4148227894 0.02404732 2
## 83 -48.7121212 -1.0374552561 -0.44013367 1
## 84 25.2878788 0.3358565355 0.61331853 0
## 85 -61.7121212 0.7997807066 -0.45241859 2
## 86 120.2878788 -0.9004955650 0.97553195 2
## 87 -20.7121212 -0.2241346258 0.21878970 2
## 88 -68.7121212 1.1621991237 -0.05622848 2
## 89 -63.7121212 0.9974151120 -0.42722894 2
## 90 -62.7121212 0.9195479821 -0.34423177 2
## 91 -10.7121212 0.2078424470 -0.38616397 2
## 92 17.2878788 -0.2828120730 0.22174042 2
## 93 -73.7121212 1.0362087216 0.11552944 2
## 94 -60.7121212 1.1218269266 0.06983408 2
## 95 -64.7121212 0.9387613102 -0.16750701 2
## 96 -70.7121212 1.0485276274 -0.45434859 2
## 97 -66.7121212 1.0078982283 0.18348127 2
## 98 5.2878788 -0.9528846364 -1.79754123 0
## 99 -69.7121212 1.1197145855 0.11458143 2
## 100 -45.7121212 0.4707100026 -0.06133847 2
## 101 73.2878788 -0.6845278089 0.43999051 2
## 102 -67.7121212 0.3881285215 -1.84313449 0
## 103 -71.7121212 0.9659609974 -0.93350465 2
## 104 -45.7121212 0.9555267990 -0.24955660 2
## 105 73.2878788 -1.7723418897 -0.21488106 0
## 106 -41.7121212 0.8494339846 -0.33971779 2
## 107 -32.7121212 0.7431862026 -0.49578739 2
## 108 349.2878788 -2.4576771300 1.27739020 0
## 109 -52.7121212 0.7336150164 0.04147777 2
## 110 -65.7121212 1.0148725171 0.30011081 2
## 111 195.2878788 -0.9612341566 1.20772437 0
## 112 -65.7121212 1.1914309753 0.14824051 2
## 113 -46.7121212 1.0313685002 0.38127194 2
## 114 -67.7121212 0.8213763778 0.12636227 2
## 115 -53.7121212 1.0479550674 0.27650164 2
## 116 -1.7121212 -0.3227912732 -0.03094612 2
## 117 14.2878788 -0.4460476858 0.30535040 2
## 118 186.2878788 -1.8539187130 -0.15326649 0
## 119 -68.7121212 1.1867413903 0.04000996 2
## 120 -58.7121212 0.7517672165 0.01361487 2
## 121 -71.7121212 -1.1532713819 -0.34560929 1
## 122 117.2878788 -0.0075038114 1.19412694 0
## 123 -61.7121212 1.0775594656 -0.03514236 2
## 124 -46.7121212 1.0261919758 0.03343750 2
## 125 -71.7121212 1.2032378445 0.10087134 2
## 126 13.2878788 -0.2311876912 0.02663400 2
## 127 -63.7121212 0.7962346910 -0.09614593 2
## 128 54.2878788 -0.7940960302 0.59716120 2
## 129 -69.7121212 1.2017319538 0.22116306 2
## 130 144.2878788 -1.4097962758 1.02676112 0
## 131 -50.7121212 0.7389615204 -0.40629270 2
## 132 85.2878788 -0.4089144343 0.64367071 2
ggplot(dd) +
aes(x = pca1, y = pca2, colour = cluster) +
geom_point(shape = "circle", size = 1.5) +
scale_color_hue(direction = 1) +
theme_minimal()