upwork data scientist analysis

omon das

2024-03-29

library(tidyverse)
library(mlr)
library(naniar)
library(clusterSim)
library(clValid)
upwork=read_csv("D:\\wallpapers and photos\\csv\\upwork_data_scientists_.csv")

upwork
## # A tibble: 132 × 10
##    country       description   hourlyRate jobSuccess locality name  skills title
##    <chr>         <chr>              <dbl> <chr>      <chr>    <chr> <chr>  <chr>
##  1 Egypt         "let the wor…       25   97%        Giza     Abee… Pytho… Data…
##  2 Serbia        "I am master…       15   97%        Belgrade Niko… Data … Rese…
##  3 Philippines   "Specializes…       60   100%       Manila   Rami… Micro… Exce…
##  4 United States "Data Scienc…      500   100%       Wyckoff  Jaso… Deep … Data…
##  5 Pakistan      "\U0001f534\…       25   82%        Karachi  Muha… Data … Data…
##  6 India         "Hello,\nI h…       20   86%        Bhopal   Prag… Adobe… Prof…
##  7 India         "Former Odoo…       35   97%        Katch    Deve… Pytho… Odoo…
##  8 Pakistan      "Expert in M…       60.0 98%        Karachi  Dr .… Resea… Expe…
##  9 Bangladesh    "TOP-RATED D…       30   100%       Dhaka    Maha… Micro… Meta…
## 10 United States "Welcome to …       45   <NA>       Las Cru… Tren… Micro… Data…
## # ℹ 122 more rows
## # ℹ 2 more variables: totalHours <dbl>, totalJobs <dbl>
up=upwork %>% 
  dplyr::select(c(-description,-title)) %>% 
  separate(col = skills, into = c("skill1","skill2","skill3","skill4"),sep = "\\|") %>% 
  mutate(jobSuccess=str_remove(jobSuccess,"\\%"),jobSuccess=as.numeric(jobSuccess),jobSuccess=if_else(jobSuccess>95,"high","low")) 



up
## # A tibble: 132 × 11
##    country      hourlyRate jobSuccess locality name  skill1 skill2 skill3 skill4
##    <chr>             <dbl> <chr>      <chr>    <chr> <chr>  <chr>  <chr>  <chr> 
##  1 Egypt              25   high       Giza     Abee… Python Machi… Scrapy Data …
##  2 Serbia             15   high       Belgrade Niko… Data … Googl… Onlin… Lead …
##  3 Philippines        60   high       Manila   Rami… Micro… Excel… Excel… Visua…
##  4 United Stat…      500   high       Wyckoff  Jaso… Deep … Python Data … Machi…
##  5 Pakistan           25   low        Karachi  Muha… Data … Machi… pandas Micro…
##  6 India              20   low        Bhopal   Prag… Adobe… MATLAB Polit… Python
##  7 India              35   high       Katch    Deve… Python Odoo … Artif… Odoo  
##  8 Pakistan           60.0 high       Karachi  Dr .… Resea… Medic… Medic… Acade…
##  9 Bangladesh         30   high       Dhaka    Maha… Micro… Dashb… SQL    Table…
## 10 United Stat…       45   <NA>       Las Cru… Tren… Micro… Python SQL    Data …
## # ℹ 122 more rows
## # ℹ 2 more variables: totalHours <dbl>, totalJobs <dbl>
up.num=up %>%
  select_if(is.numeric)

up.num
## # A tibble: 132 × 3
##    hourlyRate totalHours totalJobs
##         <dbl>      <dbl>     <dbl>
##  1       25          405        69
##  2       15          354        68
##  3       60           15       351
##  4      500          434        80
##  5       25            4        11
##  6       20           14        74
##  7       35          917        54
##  8       60.0        212       129
##  9       30          347        21
## 10       45           60         2
## # ℹ 122 more rows
ggplot(up.num,aes(x=totalHours))+
  geom_density()

impute=imputeLearner("regr.rpart")
 
imputed=impute(up.num,classes = list(numeric=impute))



up.imp=imputed$data


up.imp
##     hourlyRate totalHours totalJobs
## 1        25.00  405.00000        69
## 2        15.00  354.00000        68
## 3        60.00   15.00000       351
## 4       500.00  434.00000        80
## 5        25.00    4.00000        11
## 6        20.00   14.00000        74
## 7        35.00  917.00000        54
## 8        59.99  212.00000       129
## 9        30.00  347.00000        21
## 10       45.00   60.00000         2
## 11       15.00   92.22222        12
## 12       60.00   50.00000        24
## 13       25.00  902.00000       160
## 14      100.00  520.00000       138
## 15       25.00   97.00000         2
## 16       20.00  682.00000        67
## 17       25.00  821.00000        45
## 18       14.99   79.00000        32
## 19       40.00   69.00000       159
## 20       45.00  956.00000        37
## 21       30.00  706.00000        76
## 22       40.00  209.00000        28
## 23       65.00  210.00000        29
## 24        7.99  178.00000       364
## 25       35.00  709.00000       165
## 26       10.00   92.22222        26
## 27      150.00  917.00000        60
## 28       10.00  227.00000        39
## 29        5.50  996.00000       209
## 30       18.00  497.00000        96
## 31       30.00   22.00000         3
## 32       30.00   88.00000       486
## 33       15.00   71.00000        31
## 34       30.00  442.00000        15
## 35       30.00    8.00000        51
## 36       38.00  763.00000       124
## 37       40.00   82.00000        12
## 38        5.99  107.00000       158
## 39       22.50  413.00000        49
## 40       20.00   36.00000         3
## 41       20.00  798.00000       353
## 42       10.00  212.00000        18
## 43        4.00  221.00000        45
## 44       20.00  607.00000       284
## 45       25.00  737.00000        64
## 46       30.00  182.00000        55
## 47       25.00  505.00000        38
## 48       30.00  985.00000        11
## 49       36.00  274.00000        44
## 50       28.00   59.00000        14
## 51       10.00  361.00000       176
## 52       29.00  932.00000        51
## 53        5.00  941.00000       146
## 54       40.00   80.00000        16
## 55       30.00  187.00000       244
## 56       35.00   56.00000        54
## 57       20.00  990.00000        44
## 58        6.50  216.00000       398
## 59       70.00  106.47368         1
## 60       15.00   29.00000         2
## 61       10.00    1.00000         4
## 62       20.00  193.00000        19
## 63       20.00   92.22222         1
## 64       45.00  433.00000        18
## 65        4.44   91.00000       408
## 66      125.00  980.00000        66
## 67        7.00  272.00000         6
## 68       20.00   52.00000         6
## 69       20.00  523.00000        99
## 70       25.00   25.00000        52
## 71       35.00  188.00000        22
## 72       25.00    1.00000        37
## 73       60.00   23.00000        19
## 74        7.00  821.00000       136
## 75       20.00   92.22222         2
## 76       25.00   17.00000         2
## 77        5.00   92.22222         2
## 78       38.00   46.00000        16
## 79       30.00  264.00000        30
## 80       50.00  272.00000         6
## 81       25.00  908.00000        38
## 82       20.00  318.00000        12
## 83       35.00  861.00000        26
## 84       12.00  121.00000       100
## 85       50.00  105.00000        13
## 86        7.00  383.00000       195
## 87       15.00  477.00000        54
## 88       30.00    6.00000         6
## 89       50.00   28.00000        11
## 90       45.00   66.00000        12
## 91       55.00  203.00000        64
## 92       25.00  380.00000        92
## 93       18.00   92.22222         1
## 94       25.00    9.00000        14
## 95       35.00   80.00000        10
## 96       50.00   26.00000         4
## 97       16.00   88.00000         8
## 98      125.00  528.00000        80
## 99       20.00   43.00000         5
## 100      30.00  231.19048        29
## 101      25.00  393.00000       148
## 102     120.00  178.00000         7
## 103      75.00   22.00000         3
## 104      45.00    4.00000        29
## 105      50.00  806.00000       148
## 106      50.00   29.00000        33
## 107      60.00   32.00000        42
## 108      40.00  346.00000       424
## 109      25.00  149.00000        22
## 110      10.00   92.22222         9
## 111      14.95  188.00000       270
## 112      20.00    2.00000         9
## 113      11.11   31.00000        28
## 114      17.00  167.00000         7
## 115      15.00   37.00000        21
## 116      33.00  436.00000        73
## 117      18.00  468.00000        89
## 118      77.77  482.00000       261
## 119      25.00    4.00000         6
## 120      25.00  158.00000        16
## 121      22.22  994.00000         3
## 122       3.00   25.00000       192
## 123      30.00   22.00000        13
## 124      30.00    2.00000        28
## 125      21.00   12.00000         3
## 126      35.00  353.00000        88
## 127      30.00  145.00000        11
## 128      10.00  516.00000       129
## 129      15.00   17.00000         5
## 130       6.00  531.00000       219
## 131      50.00  100.00000        24
## 132      20.00  253.00000       160
ggplot(up.imp,aes(x=totalHours,fill=totalJobs))+
  geom_density()

#dimension reduction

####

#pca

up.scale=up.imp %>% scale()


pca=prcomp(up.scale)

summary(pca)
## Importance of components:
##                           PC1    PC2    PC3
## Standard deviation     1.1004 1.0164 0.8696
## Proportion of Variance 0.4036 0.3443 0.2520
## Cumulative Proportion  0.4036 0.7480 1.0000
pca.data=up %>% 
  mutate_if(is.numeric,scale,scale=FALSE ) %>% 
  mutate(pca1=pca$x[,1],pca2=pca$x[,2])




ggplot(pca.data) +
  aes(x = pca1, y = pca2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "pca plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )

pca.plot=ggplot(pca.data) +
  aes(x = pca1, y = pca2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "pca plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )+
  facet_wrap(~country)


plotly::ggplotly(pca.plot)
library(Rtsne)

tsne=Rtsne(up.scale,preplexity=5,theta = 0,max_iter = 900)

tsne.data=up %>% 
  mutate_if(is.numeric,scale,scale=FALSE ) %>% 
  mutate(tsne1=tsne$Y[,1],tsne2=tsne$Y[,2])

ggplot(tsne.data) +
  aes(x = tsne1, y = tsne2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "pca plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )

tsne.plot=ggplot(tsne.data) +
  aes(x = tsne1, y = tsne2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "tsne plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )+
  facet_wrap(~country)
plotly::ggplotly(tsne.plot)
library(umap)

umap=umap(up.scale,n_neighbors=5,metric="manhattan",n_epochs=600,min_dist=0.05)

umap.data=up %>% 
  mutate_if(is.numeric,scale,scale=FALSE) %>% 
  mutate(umap1=umap$layout[,1],umap2=umap$layout[,2])


umap.plot=ggplot(umap.data) +
  aes(x = umap1, y = umap2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "pca plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )+
  facet_wrap(~country)
plotly::ggplotly(umap.plot)
ggplot(umap.data) +
  aes(x = umap1, y = umap2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "umap plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )

library(kohonen)

somgrid=somgrid(xdim = 3, ydim = 3, topo =  "rectangular",
        neighbourhood.fct = "bubble", toroidal = FALSE)

som=supersom(up.scale,somgrid,alpha = c(0.1,0.01),rlen = 2000)

som
## SOM of size 3x3 with a rectangular topology.
## Training data included.
type = c("codes", "changes", "counts","dist.neighbours", "mapping", "quality")



purrr::map(type,function(x){
  plot(som,type=x,shape="straight")
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## [1] 17  4  1  6 18 50  6 10 20
## 
## [[4]]
##         1         2         3         4         5         6         7         8 
##  3.446503 18.118001 88.790006  4.722470 16.203448 23.714661  7.734587  3.168496 
##         9 
##  1.915159 
## 
## [[5]]
## NULL
## 
## [[6]]
## [1] 4.070341e-01 7.336615e-01 3.439399e-27 9.602033e-01 2.106950e-01
## [6] 1.328706e-01 5.072100e-01 4.704987e-01 2.256991e-01
###lle

library(Rdimtools)

lle=do.lle(up.scale,2,type=c("knn",5))

lle.data=up %>% 
  mutate_if(is.numeric,scale,scale=FALSE) %>% 
  mutate(lle1=lle$Y[,1],lle2=lle$Y[,2])


lle.plot=ggplot(lle.data) +
  aes(x = lle1, y = lle2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "pca plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )+
  facet_wrap(~country)
plotly::ggplotly(lle.plot)
ggplot(lle.data) +
  aes(x = lle1, y = lle2, colour = jobSuccess) +
  geom_point(shape = "circle", size = 1.5) +
  labs(title = "LLE plot in terms of success") +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(size = 15L,
    face = "bold",
    hjust = 0.5)
  )

###now for clustering 

###kmeans cluustering


library(factoextra)

library(cluster)
library(clValid)
library(stats)

up.scale.df=as.data.frame(up.scale)




#for the optimal amount of cluster

fviz_nbclust(up.scale,FUNcluster =stats::kmeans,method = c("gap_stat"))

fviz_nbclust(up.scale,FUNcluster =stats:: kmeans,method = c("silhouette"))

cluster=clValid(up.scale,nClust = 2:10,clMethods = "kmeans",validation = "internal",metric = "euclidean")

cluster
## 
## Call:
## clValid(obj = up.scale, nClust = 2:10, clMethods = "kmeans", 
##     validation = "internal", metric = "euclidean")
## 
## Clustering Methods:
##  kmeans 
## 
## Cluster sizes:
##  2 3 4 5 6 7 8 9 10 
## 
## Validation measures:
##  Connectivity Dunn Silhouette
summary(cluster)
## 
## Clustering Methods:
##  kmeans 
## 
## Cluster sizes:
##  2 3 4 5 6 7 8 9 10 
## 
## Validation Measures:
##                            2       3       4       5       6       7       8       9      10
##                                                                                             
## kmeans Connectivity   2.9290 23.6829 36.7937 41.9056 32.5548 34.8214 49.4087 52.2587 62.5762
##        Dunn           1.3264  0.0826  0.0592  0.0618  0.1147  0.1147  0.0784  0.0784  0.0459
##        Silhouette     0.8123  0.4533  0.5285  0.5320  0.4722  0.4779  0.4598  0.4550  0.3799
## 
## Optimal Scores:
## 
##              Score  Method Clusters
## Connectivity 2.9290 kmeans 2       
## Dunn         1.3264 kmeans 2       
## Silhouette   0.8123 kmeans 2
#thus let us choose the optimal cluster 2

kmean=kmeans(up.scale.df,centers = 2,nstart = 100,iter.max=600)


kmeans=fviz_cluster(kmean,up.scale,repel = F,ellipse.type = "euclid",star.plot=T,geom = "text",show_labels=T) + theme(panel.background = element_rect(fill = "aliceblue"))

plotly::ggplotly(kmeans)
##now for hierarchical clustering

#there are two type of hierarchical clustering algorithm mainly 
library(cluster)
purrr::map(c("stability","internal"),function(x){
hl=clValid(up.scale.df,nClust = 3:6,clMethods = "agnes",validation = x,method = "complete",metric = "euclidean")


summary(hl)

hl
})
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation Measures:
##                 3      4      5      6
##                                       
## agnes APN  0.2044 0.3179 0.1950 0.2499
##       AD   1.6996 1.6279 1.3690 1.3284
##       ADM  0.6024 0.7734 0.5897 0.5999
##       FOM  0.9970 0.9964 0.9890 0.9848
## 
## Optimal Scores:
## 
##     Score  Method Clusters
## APN 0.1950 agnes  5       
## AD  1.3284 agnes  6       
## ADM 0.5897 agnes  5       
## FOM 0.9848 agnes  6       
## 
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation Measures:
##                           3       4       5       6
##                                                    
## agnes Connectivity  14.9357 22.1472 30.0139 31.9679
##       Dunn           0.1219  0.1332  0.1090  0.1120
##       Silhouette     0.5210  0.4426  0.5092  0.4949
## 
## Optimal Scores:
## 
##              Score   Method Clusters
## Connectivity 14.9357 agnes  3       
## Dunn          0.1332 agnes  4       
## Silhouette    0.5210 agnes  3
## [[1]]
## 
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes", 
##     validation = x, metric = "euclidean", method = "complete")
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation measures:
##  APN AD ADM FOM 
## 
## 
## [[2]]
## 
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes", 
##     validation = x, metric = "euclidean", method = "complete")
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation measures:
##  Connectivity Dunn Silhouette
purrr::map(c("stability","internal"),function(x){
h2=clValid(up.scale.df,nClust = 3:6,clMethods = "agnes",validation = x,method = "ward",metric = "euclidean")


summary(h2)

h2
})
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation Measures:
##                 3      4      5      6
##                                       
## agnes APN  0.2608 0.2848 0.2774 0.3946
##       AD   1.5385 1.3983 1.3119 1.2478
##       ADM  0.6838 0.7417 0.7004 0.6864
##       FOM  0.9870 0.9854 0.9768 0.9728
## 
## Optimal Scores:
## 
##     Score  Method Clusters
## APN 0.2608 agnes  3       
## AD  1.2478 agnes  6       
## ADM 0.6838 agnes  3       
## FOM 0.9728 agnes  6       
## 
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation Measures:
##                           3       4       5       6
##                                                    
## agnes Connectivity  16.7937 20.5849 23.4762 29.0337
##       Dunn           0.0965  0.1323  0.1504  0.1758
##       Silhouette     0.4688  0.5328  0.5400  0.5218
## 
## Optimal Scores:
## 
##              Score   Method Clusters
## Connectivity 16.7937 agnes  3       
## Dunn          0.1758 agnes  6       
## Silhouette    0.5400 agnes  5
## [[1]]
## 
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes", 
##     validation = x, metric = "euclidean", method = "ward")
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation measures:
##  APN AD ADM FOM 
## 
## 
## [[2]]
## 
## Call:
## clValid(obj = up.scale.df, nClust = 3:6, clMethods = "agnes", 
##     validation = x, metric = "euclidean", method = "ward")
## 
## Clustering Methods:
##  agnes 
## 
## Cluster sizes:
##  3 4 5 6 
## 
## Validation measures:
##  Connectivity Dunn Silhouette
library(cluster)
##cluster::agnes

hcut.agg=agnes(up.scale,metric = "euclidean",method = "complete")

h.clust=cutree(hcut.agg,k=3)

fviz_dend(hcut.agg,3,type="phylogenic",show_labels = T)

fviz_cluster(list(data = up.scale.df, cluster = h.clust))

#dicisive clustering
purrr::map(c("stability","internal"),function(x){

diana=clValid(up.scale.df,nClust = 3:10,clMethods = "diana",metric = "euclidean",validation = x)

summary(diana)
diana
})
## 
## Clustering Methods:
##  diana 
## 
## Cluster sizes:
##  3 4 5 6 7 8 9 10 
## 
## Validation Measures:
##                 3      4      5      6      7      8      9     10
##                                                                   
## diana APN  0.1546 0.2409 0.1547 0.2955 0.2345 0.3442 0.3424 0.3625
##       AD   1.6351 1.5565 1.3353 1.2921 1.1913 1.1487 1.1315 1.1078
##       ADM  0.5387 0.7010 0.6198 0.6364 0.6363 0.6554 0.6591 0.6474
##       FOM  1.0023 0.9910 0.9871 0.9705 0.9737 0.9740 0.9726 0.9722
## 
## Optimal Scores:
## 
##     Score  Method Clusters
## APN 0.1546 diana  3       
## AD  1.1078 diana  10      
## ADM 0.5387 diana  3       
## FOM 0.9705 diana  6       
## 
## 
## Clustering Methods:
##  diana 
## 
## Cluster sizes:
##  3 4 5 6 7 8 9 10 
## 
## Validation Measures:
##                           3       4       5       6       7       8       9      10
##                                                                                    
## diana Connectivity  22.3766 27.0302 30.9976 34.2901 43.7020 45.2405 48.0028 52.3552
##       Dunn           0.0978  0.1021  0.1449  0.1529  0.0872  0.1003  0.1016  0.1167
##       Silhouette     0.4724  0.4203  0.5147  0.4548  0.4063  0.4275  0.4229  0.4293
## 
## Optimal Scores:
## 
##              Score   Method Clusters
## Connectivity 22.3766 diana  3       
## Dunn          0.1529 diana  6       
## Silhouette    0.5147 diana  5
## [[1]]
## 
## Call:
## clValid(obj = up.scale.df, nClust = 3:10, clMethods = "diana", 
##     validation = x, metric = "euclidean")
## 
## Clustering Methods:
##  diana 
## 
## Cluster sizes:
##  3 4 5 6 7 8 9 10 
## 
## Validation measures:
##  APN AD ADM FOM 
## 
## 
## [[2]]
## 
## Call:
## clValid(obj = up.scale.df, nClust = 3:10, clMethods = "diana", 
##     validation = x, metric = "euclidean")
## 
## Clustering Methods:
##  diana 
## 
## Cluster sizes:
##  3 4 5 6 7 8 9 10 
## 
## Validation measures:
##  Connectivity Dunn Silhouette
diana.clust=cluster::diana(up.scale.df,metric = "euclidean")

diana.cut=cutree(diana.clust,k=3)
fviz_dend(diana.clust,k=3,type = "phylogenic")

fviz_cluster(list(data=up.scale.df,cluster=diana.cut))

#model based clustering

#automated

library(mclust)

mc=Mclust(up.scale.df)

#6 cluster found 6
summary(mc)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VVI (diagonal, varying volume and shape) model with 6 components: 
## 
##  log-likelihood   n df       BIC       ICL
##       -260.6433 132 41 -721.4815 -747.9398
## 
## Clustering table:
##  1  2  3  4  5  6 
## 35 23  6 22 19 27
fviz_mclust(mc,"classification",ellipse.type="convex",geom =  "text")

fviz_mclust(mc,"uncertainty")

fviz_mclust(mc,"BIC")

mc.clust=cbind(pca.data,cluster=factor(mc$classification))

               
               

ggplot(mc.clust) +
 aes(x = pca1, y = pca2, colour = cluster) +
 geom_point(shape = "circle", size = 1.5) +
 scale_color_hue(direction = 1) +
 theme_minimal()

mc.optimal=clValid(up.scale.df,nClust =2:10,clMethods = "model",validation = "stability")


summary(mc.optimal)
## 
## Clustering Methods:
##  model 
## 
## Cluster sizes:
##  2 3 4 5 6 7 8 9 10 
## 
## Validation Measures:
##                 2      3      4      5      6      7      8      9     10
##                                                                          
## model APN  0.1678 0.3032 0.2423 0.3236 0.3689 0.3055 0.3228     NA     NA
##       AD   1.5960 1.5588 1.3499 1.3522 1.2660 1.2455 1.2078     NA     NA
##       ADM  0.5478 0.7527 0.6713 0.7283 0.7567 0.7874 0.7516     NA     NA
##       FOM  0.9821 0.9718 0.9542 0.9607 0.9460 0.9771 0.9809     NA     NA
## 
## Optimal Scores:
## 
##     Score  Method Clusters
## APN 0.1678 model  2       
## AD  1.2078 model  8       
## ADM 0.5478 model  2       
## FOM 0.9460 model  6
#it seems Mclust choose FOM
library(fpc)
library(dbscan)

kNNdistplot(up.scale.df,k=4)
abline(h=1.8)

purrr::map(4:40,function(x){
  
density=dbscan::dbscan(up.scale,eps = 1.8,minPts = x)

density
})
## [[1]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 4
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
## 
##   0   1 
##   1 131 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[2]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 5
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
## 
##   0   1 
##   1 131 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[3]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 6
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
## 
##   0   1 
##   1 131 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[4]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 7
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 1 noise points.
## 
##   0   1 
##   1 131 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[5]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 8
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 2 noise points.
## 
##   0   1 
##   2 130 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[6]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 9
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 2 noise points.
## 
##   0   1 
##   2 130 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[7]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 10
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 2 noise points.
## 
##   0   1 
##   2 130 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[8]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 11
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[9]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 12
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[10]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 13
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[11]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 14
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[12]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 15
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[13]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 16
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[14]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 17
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[15]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 18
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[16]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 19
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[17]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 20
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[18]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 21
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 3 noise points.
## 
##   0   1 
##   3 129 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[19]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 22
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
## 
##   0   1 
##   7 125 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[20]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 23
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
## 
##   0   1 
##   7 125 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[21]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 24
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
## 
##   0   1 
##   7 125 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[22]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 25
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
## 
##   0   1 
##   7 125 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[23]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 26
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
## 
##   0   1 
##   7 125 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[24]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 27
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 7 noise points.
## 
##   0   1 
##   7 125 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[25]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 28
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
## 
##   0   1 
##   8 124 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[26]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 29
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
## 
##   0   1 
##   8 124 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[27]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 30
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
## 
##   0   1 
##   8 124 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[28]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 31
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
## 
##   0   1 
##   8 124 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[29]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 32
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 8 noise points.
## 
##   0   1 
##   8 124 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[30]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 33
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 9 noise points.
## 
##   0   1 
##   9 123 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[31]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 34
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 9 noise points.
## 
##   0   1 
##   9 123 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[32]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 35
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 9 noise points.
## 
##   0   1 
##   9 123 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[33]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 36
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
## 
##   0   1 
##  10 122 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[34]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 37
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
## 
##   0   1 
##  10 122 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[35]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 38
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
## 
##   0   1 
##  10 122 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[36]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 39
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
## 
##   0   1 
##  10 122 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
## 
## [[37]]
## DBSCAN clustering for 132 objects.
## Parameters: eps = 1.8, minPts = 40
## Using euclidean distances and borderpoints = TRUE
## The clustering contains 1 cluster(s) and 10 noise points.
## 
##   0   1 
##  10 122 
## 
## Available fields: cluster, eps, minPts, dist, borderPoints
density=fpc::dbscan(up.scale,eps = 1.8,MinPts = 4,showplot = 1)

density
## dbscan Pts=132 MinPts=4 eps=1.8
##        0   1
## border 1   1
## seed   0 130
## total  1 131
fviz_cluster(density,up.scale.df)

purrr::map(2:9,function(x){

hdbscan=dbscan::hdbscan(up.scale,x)
hdbscan
  })
## [[1]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 2
## The clustering contains 38 cluster(s) and 23 noise points.
## 
##  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 
## 23  2  2  5  2  3  3  2  7  3  2  5  2  2  3  2  3  2  3  2  2  2  2  3  2  2 
## 26 27 28 29 30 31 32 33 34 35 36 37 38 
##  3  4  2  3  2  3  5  2  2  2  2  3  8 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
## 
## [[2]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 3
## The clustering contains 13 cluster(s) and 54 noise points.
## 
##  0  1  2  3  4  5  6  7  8  9 10 11 12 13 
## 54  3  3  3 12  4  5  9 14  7  5  3  3  7 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
## 
## [[3]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 4
## The clustering contains 2 cluster(s) and 28 noise points.
## 
##  0  1  2 
## 28 13 91 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
## 
## [[4]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 5
## The clustering contains 3 cluster(s) and 51 noise points.
## 
##  0  1  2  3 
## 51 11 63  7 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
## 
## [[5]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 6
## The clustering contains 3 cluster(s) and 55 noise points.
## 
##  0  1  2  3 
## 55 10 61  6 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
## 
## [[6]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 7
## The clustering contains 2 cluster(s) and 35 noise points.
## 
##  0  1  2 
## 35 10 87 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
## 
## [[7]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 8
## The clustering contains 2 cluster(s) and 35 noise points.
## 
##  0  1  2 
## 35 10 87 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
## 
## [[8]]
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 9
## The clustering contains 2 cluster(s) and 34 noise points.
## 
##  0  1  2 
## 34 10 88 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
hdbscan=dbscan::hdbscan(up.scale,4)
hdbscan
## HDBSCAN clustering for 132 objects.
## Parameters: minPts = 4
## The clustering contains 2 cluster(s) and 28 noise points.
## 
##  0  1  2 
## 28 13 91 
## 
## Available fields: cluster, minPts, coredist, cluster_scores,
##                   membership_prob, outlier_scores, hc
dd=cbind(pca.data,cluster=factor(hdbscan$cluster))


dd
##           country   hourlyRate jobSuccess               locality
## 1           Egypt  -9.96174242       high                   Giza
## 2          Serbia -19.96174242       high               Belgrade
## 3     Philippines  25.03825758       high                 Manila
## 4   United States 465.03825758       high                Wyckoff
## 5        Pakistan  -9.96174242        low                Karachi
## 6           India -14.96174242        low                 Bhopal
## 7           India   0.03825758       high                  Katch
## 8        Pakistan  25.02825758       high                Karachi
## 9      Bangladesh  -4.96174242       high                  Dhaka
## 10  United States  10.03825758       <NA>             Las Cruces
## 11       Pakistan -19.96174242       high                Karachi
## 12          Egypt  25.03825758        low                  Aswan
## 13          India  -9.96174242        low              New Delhi
## 14  United States  65.03825758       high                 Edison
## 15       Ethiopia  -9.96174242       <NA>            Addis Ababa
## 16       Pakistan -14.96174242       high                 Gujrat
## 17          India  -9.96174242        low              New Delhi
## 18        Nigeria -19.97174242        low        Lekki Peninsula
## 19        Finland   5.03825758        low                   Oulu
## 20       Pakistan  10.03825758        low               Peshawar
## 21          India  -4.96174242        low                  Delhi
## 22          India   5.03825758       high                  Noida
## 23         Cyprus  30.03825758       high               Limassol
## 24          India -26.97174242       high                 Mohali
## 25          India   0.03825758        low              New Delhi
## 26       Pakistan -24.96174242        low                 Lahore
## 27         Turkey 115.03825758        low               Istanbul
## 28          India -24.96174242       high              New Delhi
## 29       Pakistan -29.46174242       high             Bahawalpur
## 30           <NA> -16.96174242       high      Elk Grove Village
## 31       Pakistan  -4.96174242       <NA>                Karachi
## 32          Egypt  -4.96174242       high                  Cairo
## 33          India -19.96174242       high                 Indore
## 34       Pakistan  -4.96174242       high             Bahawalpur
## 35       Pakistan  -4.96174242       high                 Lahore
## 36       Pakistan   3.03825758       high                Karachi
## 37          Egypt   5.03825758       high                Zagazig
## 38     Bangladesh -28.97174242       high                 Sylhet
## 39          India -12.46174242       high                 Bhutli
## 40       Pakistan -14.96174242       <NA>                Karachi
## 41           <NA> -14.96174242       high         Stone Mountain
## 42       Pakistan -24.96174242       high             Faisalabad
## 43       Pakistan -30.96174242        low               Rajanpur
## 44     Bangladesh -14.96174242       high                  Dhaka
## 45          India  -9.96174242       high                Gurgaon
## 46       Pakistan  -4.96174242       high                Karachi
## 47          India  -9.96174242       high                 Mohali
## 48          India  -4.96174242       high               Vadodara
## 49       Pakistan   1.03825758       high              Islamabad
## 50          India  -6.96174242       high              Ghaziabad
## 51       Pakistan -24.96174242       high              Islamabad
## 52          India  -5.96174242       high                 Jaipur
## 53     Bangladesh -29.96174242       high                 Khulna
## 54        Germany   5.03825758       high              Igersheim
## 55      Australia  -4.96174242        low                Holroyd
## 56     Bangladesh   0.03825758       high                  Dhaka
## 57   Saudi Arabia -14.96174242       high                 Riyadh
## 58     Bangladesh -28.46174242        low               Rajshahi
## 59       Pakistan  35.03825758       <NA>                 Lahore
## 60        Nigeria -19.96174242       high          Port Harcourt
## 61     Bangladesh -24.96174242       <NA>           Brahmanbaria
## 62       Pakistan -14.96174242       high                 Quetta
## 63         Latvia -14.96174242       <NA>                   Riga
## 64          India  10.03825758       high              Bengaluru
## 65          India -30.52174242       high Coimbatore, Tamil Nadu
## 66           <NA>  90.03825758       high                 Clovis
## 67    Philippines -27.96174242       high       Mandaluyong City
## 68       Pakistan -14.96174242       high                 Lahore
## 69       Pakistan -14.96174242        low               Peshawar
## 70       Pakistan  -9.96174242       high                 Lahore
## 71       Pakistan   0.03825758       high                 Quetta
## 72       Pakistan  -9.96174242       high              Islamabad
## 73     Bangladesh  25.03825758       high                  Dhaka
## 74       Pakistan -27.96174242       high             Allah Abad
## 75         Israel -14.96174242       <NA>         Qiryat Motsqin
## 76          Egypt  -9.96174242       <NA>             Alexandria
## 77          Egypt -29.96174242       <NA>      Shubra al Khaymah
## 78        Algeria   3.03825758        low               Bourkika
## 79       Pakistan  -4.96174242       high      Chashma, Mianwali
## 80          India  15.03825758       high                  Delhi
## 81          India  -9.96174242       high              New Delhi
## 82         France -14.96174242        low    Velizy-Villacoublay
## 83          India   0.03825758       high               Bathinda
## 84       Pakistan -22.96174242        low             Rawalpindi
## 85        Morocco  15.03825758        low             Casablanca
## 86       Pakistan -27.96174242       high     Haveli Lakha Okara
## 87       Pakistan -19.96174242       high                 Lahore
## 88          Egypt  -4.96174242       high                  Cairo
## 89        Algeria  15.03825758        low                Algiers
## 90  United States  10.03825758       high               Leesburg
## 91         Brazil  20.03825758       high              Sao Paulo
## 92          India  -9.96174242        low             Chandigarh
## 93         Brazil -16.96174242       <NA>                 Recife
## 94          Egypt  -9.96174242       high                  Cairo
## 95       Pakistan   0.03825758        low                Karachi
## 96          India  15.03825758       high              Hyderabad
## 97          Yemen -18.96174242       high                   Taiz
## 98  United States  90.03825758       high                Chicago
## 99       Pakistan -14.96174242       high                Karachi
## 100    Bangladesh  -4.96174242        low             Mymensingh
## 101   Philippines  -9.96174242        low    Cagayan de Oro City
## 102 United States  85.03825758       high                Chicago
## 103 United States  40.03825758       high               Berkeley
## 104    Bangladesh  10.03825758       high                  Dhaka
## 105 United States  15.03825758       high          Lagrangeville
## 106         Egypt  15.03825758       high                  Cairo
## 107       Ukraine  25.03825758       high                Kharkiv
## 108      Pakistan   5.03825758        low                Sialkot
## 109         Nepal  -9.96174242        low              Kathmandu
## 110         Egypt -24.96174242       high             Alexandria
## 111    Bangladesh -20.01174242       high                 Khulna
## 112         Egypt -14.96174242        low                  Cairo
## 113      Pakistan -23.85174242        low             Bahawalpur
## 114      Pakistan -17.96174242       <NA>                 Lahore
## 115         India -19.96174242        low              Bengaluru
## 116          <NA>  -1.96174242       high            Fort Pierce
## 117    Bangladesh -16.96174242       high                  Bogra
## 118        Canada  42.80825758       high              Vancouver
## 119      Pakistan  -9.96174242        low                 Multan
## 120      Pakistan  -9.96174242       high                 Lahore
## 121        Mexico -12.74174242       <NA>                Tijuana
## 122      Pakistan -31.96174242        low                 Quetta
## 123       Georgia  -4.96174242       high                Tbilisi
## 124       Nigeria  -4.96174242       high              Okitipupa
## 125      Pakistan -13.96174242       high                 Lahore
## 126         Kenya   0.03825758       high                Nairobi
## 127         India  -4.96174242       high                  Noida
## 128      Pakistan -24.96174242        low                Lodhran
## 129     Argentina -19.96174242       high           Buenos Aires
## 130    Bangladesh -28.96174242       high                Jessore
## 131       Morocco  15.03825758       high             Casablanca
## 132         India -14.96174242       high               Dehradun
##                              name
## 1                        Abeer K.
## 2                       Nikola M.
## 3                   Ramil, Jr. A.
## 4                        Jason K.
## 5               Muhammad Noman B.
## 6                     Pragyesh S.
## 7                     Devendra R.
## 8               Dr . M Mansoor M.
## 9                    Mahabubur R.
## 10                     Trenton S.
## 11                       Fasih U.
## 12                       Islam A.
## 13                        Ajay B.
## 14                     Shankar M.
## 15                     Yabebal F.
## 16                        Wasi U.
## 17                       Uttam A.
## 18                    Emmanuel O.
## 19                      Doreen B.
## 20              Muhammad Ameer H.
## 21                        Mani V.
## 22                       Nidhi B.
## 23                        Nick V.
## 24                      Sakshi B.
## 25                     Rajveer S.
## 26                    Muhammad N.
## 27                       Pavel A.
## 28                      Snehit D.
## 29            Muhammad Gulraiz H.
## 30                    Muhammad U.
## 31                       Aveen F.
## 32                     Mahmoud D.
## 33                        Parv Y.
## 34                      Junaid U.
## 35                     Zeeshan A.
## 36                        Sana S.
## 37                       Zeyad M.
## 38                      Bashir A.
## 39                        Ajit T.
## 40                        Zain A.
## 41                   Mfonabasi I.
## 42                       Ahmad I.
## 43                       Rajab S.
## 44               Md Omor Faruk B.
## 45                     Noushad A.
## 46                      Nabeel F.
## 47                       vikas k.
## 48                      Rutvik P.
## 49                Malik Haseeb H.
## 50                       Varun D.
## 51                    Muhammad M.
## 52                      Pratik D.
## 53                     Eishwar c.
## 54                      Marvin S.
## 55                       Fahad A.
## 56  MD. Shahriar Al Kasib Khan S.
## 57                       Ahmed o.
## 58                        Sumy A.
## 59                       Aamir I.
## 60                      Ugonna B.
## 61              Ashraful Islam E.
## 62                       Bakht M.
## 63                     Ebrahim A.
## 64                       Arjun V.
## 65                      Naveen K.
## 66                     Michael E.
## 67                      Marlon B.
## 68                   Ishtiaque A.
## 69                       Syeda F.
## 70                         Ali A.
## 71                       Abdul R.
## 72                Khan Bahadar K.
## 73       Muhammad Sarwar Jahan M.
## 74                       Abdul W.
## 75                     Aleksei Z.
## 76                       Ahmed S.
## 77                      Yousef W.
## 78                      Madjid E.
## 79             Muhammad Faheem S.
## 80                      Ishaan N.
## 81                    Priyanka R.
## 82                     yassine g.
## 83                       Rohit K.
## 84                      Faizan Z.
## 85                      Ismail H.
## 86                      Zahida A.
## 87                       Soban A.
## 88                       Salma H.
## 89                       Ahlem K.
## 90                        Dave C.
## 91                       Paulo O.
## 92                     Shubham S.
## 93                      Sergio P.
## 94                     Mohamed A.
## 95                        Arif S.
## 96                           K R.
## 97                    Mohammed A.
## 98                       James S.
## 99                        Faiz R.
## 100             Musabbir Ahmed A.
## 101                        Roy L.
## 102                      Chris H.
## 103                    Matthew S.
## 104                       Khan M.
## 105                   Terrence C.
## 106                       Ehab K.
## 107                 Konstantin B.
## 108                   Ehtisham R.
## 109                     Manish A.
## 110                     Hossam H.
## 111               Sk. Shahidul I.
## 112                      Nesma A.
## 113                      Abdul M.
## 114                      AILYAH .
## 115                      Sahaj G.
## 116                      Misty M.
## 117                Md Mujammel H.
## 118                     Robert O.
## 119              Muhammad Umer F.
## 120            Muhammad Junaid A.
## 121                     Amaury H.
## 122                     Faizan S.
## 123                       Luka S.
## 124                   Olusegun F.
## 125                  Meer Naib K.
## 126                     Dennis M.
## 127                   Virender B.
## 128                        Ali Y.
## 129                     Matias W.
## 130                       Antu C.
## 131                     Adnane R.
## 132                Parthsarthi D.
##                                                                                                                                                                                                                                                                          skill1
## 1                                                                                                                                                                                                                                                                        Python
## 2                                                                                                                                                                                                                                                                    Data Entry
## 3                                                                                                                                                                                                                                                               Microsoft Excel
## 4                                                                                                                                                                                                                                                                 Deep Learning
## 5                                                                                                                                                                                                                                                                  Data Science
## 6                                                                                                                                                                                                                                                             Adobe Illustrator
## 7                                                                                                                                                                                                                                                                        Python
## 8                                                                                                                                                                                                                                                        Research Paper Writing
## 9                                                                                                                                                                                                                                                            Microsoft Power BI
## 10                                                                                                                                                                                                                                                         Microsoft SQL Server
## 11                                                                                                                                                                                                                                                                 Apache Spark
## 12                                                                                                                                                                                                                                                           Microsoft Power BI
## 13                                                                                                                                                                                                                                                                       Python
## 14                                                                                                                                                                                                                                                                       Python
## 15                                                                                                                                                                                                                                                                       Python
## 16                                                                                                                                                                                                                                                                       Scrapy
## 17                                                                                                                                                                                                                                                                       Python
## 18                                                                                                                                                                                                                                                                   Data Entry
## 19                                                                                                                                                                                                                                                                   Accounting
## 20                                                                                                                                                                                                                                                              Microsoft Excel
## 21                                                                                                                                                                                                                                                                       Python
## 22                                                                                                                                                                                                                                                                       Python
## 23                                                                                                                                                                                                                                                                      Tableau
## 24                                                                                                                                                                                                                                                                  SEO Writing
## 25                                                                                                                                                                                                                                                           Data Visualization
## 26                                                                                                                                                                                                                                                                       Python
## 27                                                                                                                                                                                                                                                                 Data Science
## 28                                                                                                                                                                                                                                                                       Python
## 29                                                                                                                                                                                                                                                                Data Cleaning
## 30                                                                                                                                                                                                                                                              Market Research
## 31                                                                                                                                                                                                                                                                       pandas
## 32                                                                                                                                                                                                                                                           Microsoft Power BI
## 33                                                                                                                                                                                                                                                                       Python
## 34                                                                                                                                                                                                                                                             Data Engineering
## 35                                                                                                                                                                                                                                                                       Python
## 36                                                                                                                                                                                                                                                          Presentation Design
## 37                                                                                                                                                                                                                                                                       OpenCV
## 38                                                                                                                                                                                                                                                              Data Extraction
## 39                                                                                                                                                                                                                                                                Google Sheets
## 40                                                                                                                                                                                                                                                                 Apache Spark
## 41                                                                                                                                                                                                                                                               Microsoft Word
## 42                                                                                                                                                                                                                                                                       Python
## 43                                                                                                                                                                                                                                                              Email Marketing
## 44                                                                                                                                                                                                                                                                 Web Scraping
## 45                                                                                                                                                                                                                                                                  Data Mining
## 46                                                                                                                                                                                                                                                           Data Visualization
## 47                                                                                                                                                                                                                                                                         Java
## 48                                                                                                                                                                                                                                                                      Tableau
## 49                                                                                                                                                                                                                                                    Artificial Neural Network
## 50                                                                                                                                                                                                                                                                          SQL
## 51                                                                                                                                                                                                                                                             Microsoft Office
## 52                                                                                                                                                                                                                                                                        Neo4j
## 53                                                                                                                                                                                                                                                                List Building
## 54                                                                                                                                                                                                                                                                       Python
## 55                                                                                                                                                                                                                                                                    WordPress
## 56                                                                                                                                                                                                                                                             Machine Learning
## 57                                                                                                                                                                                                                                                          Synthetic Chemistry
## 58                                                                                                                                                                                                                                                                Data Scraping
## 59                                                                                                                                                                                                                                                                      RStudio
## 60                                                                                                                                                                                                                                                             Microsoft Office
## 61                                                                                                                                                                                                                                                   Microsoft Excel PowerPivot
## 62                                                                                                                                                                                                                                                                Coding Lesson
## 63                                                                                                                                                                                                                                                          Databricks Platform
## 64                                                                                                                                                                                                                                                                       Looker
## 65                                                                                                                                                                                                                                                                  Real Estate
## 66                                                                                                                                                                                                                                                         Microsoft PowerPoint
## 67                                                                                                                                                                                                                                                                   Data Entry
## 68                                                                                                                                                                                                                                                                      Tableau
## 69  Search Engine Optimization Close the tooltip  \n  Search engine optimization (SEO) is the process of improving the visibility of a website or a web page in search engines via the "natural" or un-paid ("organic" or "algorithmic") search results.\n  \n    wikipedia.org
## 70                                                                                                                                                                                                                                                             Machine Learning
## 71                                                                                                                                                                                                                                                             Machine Learning
## 72                                                                                                                                                                                                                                                                       Python
## 73                                                                                                                                                                                                                                                                       Python
## 74                                                                                                                                                                                                                                                                   Data Entry
## 75                                                                                                                                                                                                                                                                            R
## 76                                                                                                                                                                                                                                                           Data Visualization
## 77                                                                                                                                                                                                                                                           Microsoft Power BI
## 78                                                                                                                                                                                                                                                              Data Processing
## 79                                                                                                                                                                                                                                                                       Python
## 80                                                                                                                                                                                                                                                                       Python
## 81                                                                                                                                                                                                                                                             Machine Learning
## 82                                                                                                                                                                                                                                                           Microsoft Power BI
## 83                                                                                                                                                                                                                                                                   PostgreSQL
## 84                                                                                                                                                                                                                                                                    WordPress
## 85                                                                                                                                                                                                                                                                        Keras
## 86                                                                                                                                                                                                                                                 Social Media Lead Generation
## 87                                                                                                                                                                                                                                                          Deep Neural Network
## 88                                                                                                                                                                                                                                                                       Python
## 89                                                                                                                                                                                                                                                     Agile Project Management
## 90                                                                                                                                                                                                                                                                       Python
## 91                                                                                                                                                                                                                                                                       Python
## 92                                                                                                                                                                                                                                                                       pandas
## 93                                                                                                                                                                                                                                                           Microsoft Power BI
## 94                                                                                                                                                                                                                                                                 Data Science
## 95                                                                                                                                                                                                                                                                 Data Science
## 96                                                                                                                                                                                                                                                                        Scala
## 97                                                                                                                                                                                                                                                                Data Analysis
## 98                                                                                                                                                                                                                                                                Data Analysis
## 99                                                                                                                                                                                                                                                                       Python
## 100                                                                                                                                                                                                                                                               Deep Learning
## 101                                                                                                                                                                                                                                                        Microsoft SQL Server
## 102                                                                                                                                                                                                                                                          Product Management
## 103                                                                                                                                                                                                                                                                           R
## 104                                                                                                                                                                                                                                                             Data Extraction
## 105                                                                                                                                                                                                                                                           Linear Regression
## 106                                                                                                                                                                                                                                                         Python Scikit-Learn
## 107                                                                                                                                                                                                                                                         Amazon Web Services
## 108                                                                                                                                                                                                                                                             Microsoft Excel
## 109                                                                                                                                                                                                                                                               Project Plans
## 110                                                                                                                                                                                                                                                          Microsoft Power BI
## 111                                                                                                                                                                                                                                                                      Amazon
## 112                                                                                                                                                                                                                                                         Deep Neural Network
## 113                                                                                                                                                                                                                                                                      Python
## 114                                                                                                                                                                                                                                                               Google Sheets
## 115                                                                                                                                                                                                                                                                Data Science
## 116                                                                                                                                                                                                                                                                  Data Entry
## 117                                                                                                                                                                                                                                                        Influencer Marketing
## 118                                                                                                                                                                                                                                                  Search Engine Optimization
## 119                                                                                                                                                                                                                                                   Exploratory Data Analysis
## 120                                                                                                                                                                                                                                                                  TypeScript
## 121                                                                                                                                                                                                                                                                Data Science
## 122                                                                                                                                                                                                                                                               List Building
## 123                                                                                                                                                                                                                                                                      Python
## 124                                                                                                                                                                                                                                                                      Python
## 125                                                                                                                                                                                                                                                               Python Script
## 126                                                                                                                                                                                                                                                                Data Science
## 127                                                                                                                                                                                                                                                                         SQL
## 128                                                                                                                                                                                                                                                             Online Research
## 129                                                                                                                                                                                                                                                                      Python
## 130                                                                                                                                                                                                                                                                  Data Entry
## 131                                                                                                                                                                                                                                                                      Python
## 132                                                                                                                                                                                                                                                              Graphic Design
##                                                                                                                                                                                                                            skill2
## 1                                                                                                                                                                                                                Machine Learning
## 2                                                                                                                                                                                                                     Google Docs
## 3                                                                                                                                                                                                                   Excel Formula
## 4                                                                                                                                                                                                                          Python
## 5                                                                                                                                                                                                                Machine Learning
## 6                                                                                                                                                                                                                          MATLAB
## 7                                                                                                                                                                                                                Odoo Development
## 8                                                                                                                                                                                                                 Medical Writing
## 9                                                                                                                                                                                                                       Dashboard
## 10                                                                                                                                                                                                                         Python
## 11                                                                                                                                                                                                                   Data Science
## 12                                                                                                                                                                                                                         Python
## 13                                                                                                                                                                                                         Full-Stack Development
## 14                                                                                                                                                                                                                   Data Science
## 15                                                                                                                                                                                                                            SQL
## 16                                                                                                                                                                                                                       Selenium
## 17                                                                                                                                                                                                                     JavaScript
## 18                                                                                                                                                                                                                Microsoft Excel
## 19                                                                                                                                                                                                              Academic Research
## 20                                                                                                                                                                                                             Microsoft Power BI
## 21                                                                                                                                                                                                     Machine Learning Framework
## 22                                                                                                                                                                                                                Computer Vision
## 23                                                                                                                                                                                                                  Looker Studio
## 24                                                                                                                                                                                                     Search Engine Optimization
## 25                                                                                                                                                                                                                  Looker Studio
## 26                                                                                                                                                                                                                  Data Scraping
## 27                                                                                                                                                                                                                  Deep Learning
## 28                                                                                                                                                                                                                         Scrapy
## 29                                                                                                                                                                                                       Lead Generation Strategy
## 30                                                                                                                                                                                                                Lead Generation
## 31                                                                                                                                                                                                               Machine Learning
## 32                                                                                                                                                                                                                Microsoft Excel
## 33                                                                                                                                                                                                                Lead Generation
## 34                                                                                                                                                                                                                         Python
## 35                                                                                                                                                                                                                       Selenium
## 36                                                                                                                                                                                                                     Data Entry
## 37                                                                                                                                                                                                                              R
## 38                                                                                                                                                                                                                     Email List
## 39                                                                                                                                                                                                                  Presentations
## 40                                                                                                                                                                                                                    Informatica
## 41                                                                                                                                                                                                                Microsoft Excel
## 42                                                                                                                                                                                                                     TensorFlow
## 43                                                                                                                                                                                                                  Data Cleaning
## 44                                                                                                                                                                                                                       Selenium
## 45                                                                                                                                                                                                                         Scrapy
## 46                                                                                                                                                                                                                            SQL
## 47                                                                                                                                                                                                                  Ruby on Rails
## 48                                                                                                                                                                                                                              R
## 49                                                                                                                                                                                                                         Python
## 50                                                                                                                                                                                                                         Looker
## 51                                                                                                                                                                                                   Social Media Lead Generation
## 52                                                                                                                                                                                                                   Oracle PLSQL
## 53                                                                                                                                                                                                                Lead Generation
## 54                                                                                                                                                                                                                     Matplotlib
## 55                                                                                                                                                                                                                Microsoft Visio
## 56                                                                                                                                                                                                             Microsoft Power BI
## 57                                                                                                                                                                                                            Product Formulation
## 58                                                                                                                                                                                                       LinkedIn Sales Navigator
## 59                                                                                                                                                                                                                        Tableau
## 60                                                                                                                                                                                                                    Google Docs
## 61                                                                                                                                                                                                                   Data Science
## 62                                                                                                                                                                                                           Software Development
## 63                                                                                                                                                                                                                 Classification
## 64                                                                                                                                                                                                             Data Visualization
## 65                                                                                                                                                                                                                  List Building
## 66                                                                                                                                                                                                                      Dashboard
## 67                                                                                                                                                                                                                  Data Scraping
## 68                                                                                                                                                                                                                              R
## 69  Biology Close the tooltip  \n  Biology is a natural science concerned with the study of life and living organisms, including their structure, function, growth, evolution, distribution, and taxonomy.\n  \n    wikipedia.org
## 70                                                                                                                                                                                                                         Scrapy
## 71                                                                                                                                                                                                                  Deep Learning
## 72                                                                                                                                                                                                              Technical Writing
## 73                                                                                                                                                                                                             Feature Extraction
## 74                                                                                                                                                                                                                Lead Generation
## 75                                                                                                                                                                                                                         Python
## 76                                                                                                                                                                                                                         Python
## 77                                                                                                                                                                                                                         Python
## 78                                                                                                                                                                                                                            SQL
## 79                                                                                                                                                                                                                   Apache Spark
## 80                                                                                                                                                                                                                Microsoft Excel
## 81                                                                                                                                                                                                                  Elasticsearch
## 82                                                                                                                                                                                                                   Data Science
## 83                                                                                                                                                                                                                     TensorFlow
## 84                                                                                                                                                                                                                     Blockchain
## 85                                                                                                                                                                                                               Machine Learning
## 86                                                                                                                                                                                                                  Google Sheets
## 87                                                                                                                                                                                                                Computer Vision
## 88                                                                                                                                                                                                               Machine Learning
## 89                                                                                                                                                                                                               Machine Learning
## 90                                                                                                                                                                                                                          React
## 91                                                                                                                                                                                                                            SQL
## 92                                                                                                                                                                                                                            SQL
## 93                                                                                                                                                                                                                  Data Analysis
## 94                                                                                                                                                                                                                         Python
## 95                                                                                                                                                                                                                        ChatGPT
## 96                                                                                                                                                                                                                         Python
## 97                                                                                                                                                                                                                  Data Scraping
## 98                                                                                                                                                                                                                              R
## 99                                                                                                                                                                                                                            SQL
## 100                                                                                                                                                                                                            Model Optimization
## 101                                                                                                                                                                                                               Database Design
## 102                                                                                                                                                                                                            Project Management
## 103                                                                                                                                                                                                            Data Visualization
## 104                                                                                                                                                                                                                        Python
## 105                                                                                                                                                                                                                      IBM SPSS
## 106                                                                                                                                                                                                                         NumPy
## 107                                                                                                                                                                                                         Google Cloud Platform
## 108                                                                                                                                                                                                                   AppleScript
## 109                                                                                                                                                                                                             Budget Management
## 110                                                                                                                                                                                                                       Seaborn
## 111                                                                                                                                                                                                                    Amazon FBA
## 112                                                                                                                                                                                                                  Apache Spark
## 113                                                                                                                                                                                                                           SQL
## 114                                                                                                                                                                                                                        Python
## 115                                                                                                                                                                                                                           SQL
## 116                                                                                                                                                                                                                 List Building
## 117                                                                                                                                                                                                               Online Research
## 118                                                                                                                                                                                                                         Sales
## 119                                                                                                                                                                                                              Machine Learning
## 120                                                                                                                                                                                                                    JavaScript
## 121                                                                                                                                                                                                                        Python
## 122                                                                                                                                                                                                                    Data Entry
## 123                                                                                                                                                                                                                         NumPy
## 124                                                                                                                                                                                                             Linear Regression
## 125                                                                                                                                                                                                                 Data Analysis
## 126                                                                                                                                                                                                                        Python
## 127                                                                                                                                                                                                                        Python
## 128                                                                                                                                                                                                         Quantitative Research
## 129                                                                                                                                                                                                                    Matplotlib
## 130                                                                                                                                                                                                               Data Extraction
## 131                                                                                                                                                                                                   Business Process Automation
## 132                                                                                                                                                                                                             YouTube Marketing
##                              skill3                          skill4 totalHours
## 1                            Scrapy                    Data Science   98.29268
## 2                   Online Research                 Lead Generation   47.29268
## 3                      Excel Macros   Visual Basic for Applications -291.70732
## 4                      Data Science                Machine Learning  127.29268
## 5                            pandas              Microsoft Power BI -302.70732
## 6                 Political Science                          Python -292.70732
## 7           Artificial Intelligence                            Odoo  610.29268
## 8                   Medical Editing               Academic Research  -94.70732
## 9                               SQL                         Tableau   40.29268
## 10                              SQL                   Data Analysis -246.70732
## 11               Data Visualization                          Python         NA
## 12                              SQL                   Data Scraping -256.70732
## 13             Back-End Development           Front-End Development  595.29268
## 14                  Chief Architect           Google Cloud Platform  213.29268
## 15              Amazon Web Services                          GitHub -209.70732
## 16                     Web Scraping                          Python  375.29268
## 17                        Dashboard                   Looker Studio  514.29268
## 18                  Problem Solving                Google Analytics -227.70732
## 19                   Health Science                      Psychology -237.70732
## 20               Data Visualization                   Looker Studio  649.29268
## 21          Artificial Intelligence                           React  399.29268
## 22      Natural Language Processing             Python Scikit-Learn  -97.70732
## 23                              SQL           Business Intelligence  -96.70732
## 24                    Meeting Notes                             PHP -128.70732
## 25                    Google Sheets                      JavaScript  402.29268
## 26                    List Building                 Lead Generation         NA
## 27      Natural Language Generation     Natural Language Processing  610.29268
## 28                            MySQL                   Python Script  -79.70732
## 29                  Lead Generation                      Data Entry  689.29268
## 30                       Data Entry                   List Building  190.29268
## 31                           Python                    Data Science -284.70732
## 32                Business Analysis                    Excel Macros -218.70732
## 33                    Looker Studio                    Data Science -235.70732
## 34                 Data Integration                             SQL  135.29268
## 35                  Bot Development                    Data Science -298.70732
## 36                 Microsoft Office                     Infographic  456.29268
## 37              Python Scikit-Learn              Feature Extraction -224.70732
## 38                       Data Entry                   List Building -199.70732
## 39                  Microsoft Excel              Data Visualization  106.29268
## 40                              SQL                          Python -270.70732
## 41                       Data Entry                     Google Docs  491.29268
## 42              Python Scikit-Learn                   Deep Learning  -94.70732
## 43                  Lead Generation                  Salesforce CRM  -85.70732
## 44                    Data Scraping                  Beautiful Soup  300.29268
## 45                           Python                 Data Extraction  430.29268
## 46                      Apache Hive                        BigQuery -124.70732
## 47                          Node.js                          Vue.js  198.29268
## 48                           Python              Microsoft Power BI  678.29268
## 49                 Machine Learning                   Deep Learning  -32.70732
## 50               Microsoft Power BI           Business Intelligence -247.70732
## 51                       Data Entry                 Lead Generation   54.29268
## 52                  Content Writing            Jakarta Server Pages  625.29268
## 53                     Copy & Paste                      Data Entry  634.29268
## 54                           Plotly                   Data Analysis -226.70732
## 55                 Microsoft Office       Business Process Modeling -119.70732
## 56                              SQL                          Python -250.70732
## 57              Inorganic Chemistry                       Cosmetics  683.29268
## 58                  Data Collection             Database Management  -90.70732
## 59                    Data Analysis                 Microsoft Excel         NA
## 60                       Data Entry                 Online Research -277.70732
## 61                    Data Analysis              Financial Analysis -305.70732
## 62              Technology Tutoring                 Web Application -113.70732
## 63                             Java                    Apache Spark         NA
## 64           Machine Learning Model                            Domo  126.29268
## 65                       Data Entry                 Lead Generation -215.70732
## 66               Data Visualization                         Tableau  673.29268
## 67                      Bookkeeping                 Cost Accounting  -34.70732
## 68               Data Visualization                             SQL -254.70732
## 69           Editing & Proofreading                  Citation Style  216.29268
## 70                              SQL                          Python -281.70732
## 71                           Python                    Data Science -118.70732
## 72                Academic Research              Research Proposals -305.70732
## 73                    Deep Learning     Natural Language Processing -283.70732
## 74                    List Building                     Google Docs  514.29268
## 75               Microsoft Power BI                   Data Analysis         NA
## 76                              SQL                         Tableau -289.70732
## 77                  Microsoft Excel                   Data Analysis         NA
## 78                           Python                         Tableau -260.70732
## 79                 Machine Learning                    Data Science  -42.70732
## 80                       Statistics                    Data Science  -34.70732
## 81                          MongoDB                          Kibana  601.29268
## 82                  Microsoft Excel                     Data Mining   11.29268
## 83                           Django                           NumPy  554.29268
## 84                    Data Scraping                     Data Mining -185.70732
## 85                           Python                    Apache Spark -201.70732
## 86                           Report          Social Media Marketing   76.29268
## 87                           Python                         PyTorch  170.29268
## 88                           pandas             Python Scikit-Learn -300.70732
## 89                    Data Analysis                           Scrum -278.70732
## 90                          Node.js          Mobile App Development -240.70732
## 91               Microsoft Power BI                   Data Analysis -103.70732
## 92                           Python                      Matplotlib   73.29268
## 93                 Machine Learning                      Matplotlib         NA
## 94                  Climate Science                   Data Scraping -297.70732
## 95                          Angular                       .NET Core -226.70732
## 96                     Data Science                Machine Learning -280.70732
## 97               Browser Automation                          Python -218.70732
## 98                            Stata                      Statistics  221.29268
## 99                 Amazon SageMaker     Natural Language Processing -263.70732
## 100                 Computer Vision                    Data Science         NA
## 101         Database Administration SQL Server Integration Services   86.29268
## 102                Machine Learning                        Big Data -128.70732
## 103                          Python             Scientific Research -284.70732
## 104                   Google Sheets                      Automation -302.70732
## 105                Machine Learning              Data Visualization  499.29268
## 106                   Deep Learning     Natural Language Processing -277.70732
## 107                          Docker                      Kubernetes -274.70732
## 108                          Zapier                   Google Sheets   39.29268
## 109        Agile Project Management    Technical Project Management -157.70732
## 110                 Microsoft Excel                Machine Learning         NA
## 111           Amazon Seller Central                Amazon Wholesale -118.70732
## 112                            Java                          Python -304.70732
## 113                   Data Analysis                  Data Analytics -275.70732
## 114                     Data Mining                          Django -139.70732
## 115          Machine Learning Model                          Python -269.70732
## 116                Company Research             Database Management  129.29268
## 117           Twitter/X Ads Manager                       WordPress  161.29268
## 118                     Copywriting    Conversion Rate Optimization  175.29268
## 119                     WooCommerce  Interactive Data Visualization -302.70732
## 120                          Django                         Angular -148.70732
## 121                Machine Learning                   Deep Learning  687.29268
## 122                 Lead Generation             LinkedIn Recruiting -281.70732
## 123                          pandas                           MySQL -284.70732
## 124                     Mathematics                         Tableau -304.70732
## 125                Jupyter Notebook                           MySQL -294.70732
## 126              Model Optimization                    Apache Spark   46.29268
## 127                      JavaScript                   Elasticsearch -161.70732
## 128                 Market Research            Competitive Analysis  209.29268
## 129                          Plotly                             SQL -289.70732
## 130                   List Building                   Data Scraping  224.29268
## 131                          UiPath                      JavaScript -206.70732
## 132 Social Media Marketing Strategy                    Data Science  -53.70732
##       totalJobs          pca1        pca2 cluster
## 1    -5.7121212 -0.1904780785  0.11902678       2
## 2    -6.7121212 -0.0222205774  0.32776170       2
## 3   276.2878788 -1.2609284683  0.74672602       0
## 4     5.2878788 -2.2092244335 -8.91026876       0
## 5   -63.7121212  1.1536535877  0.05999681       2
## 6    -0.7121212  0.7325519907  0.40289790       2
## 7   -20.7121212 -1.3568555211 -0.35234145       1
## 8    54.2878788 -0.2635653195 -0.22539945       0
## 9   -53.7121212  0.2463103139 -0.14322740       2
## 10  -72.7121212  1.0000923627 -0.38161966       2
## 11  -62.7121212  0.9752671607  0.21672641       2
## 12  -50.7121212  0.8191959657 -0.57549742       2
## 13   85.2878788 -1.9828896482  0.26859717       0
## 14   63.2878788 -1.2187814053 -1.08536355       0
## 15  -72.7121212  0.9904956137 -0.01605936       2
## 16   -7.7121212 -0.8208487469  0.08703093       1
## 17  -29.7121212 -1.0278917203 -0.15619199       1
## 18  -42.7121212  0.8746199792  0.30256287       2
## 19   84.2878788 -0.0406651275  0.33746507       0
## 20  -37.7121212 -1.3772593820 -0.62785742       1
## 21    1.2878788 -0.9773872428 -0.07808896       1
## 22  -46.7121212  0.4909638748 -0.24652552       2
## 23  -45.7121212  0.3831881443 -0.71984167       2
## 24  289.2878788 -1.5318411633  1.72055081       0
## 25   90.2878788 -1.5932871918  0.18100746       0
## 26  -48.7121212  0.9023739883  0.36806608       2
## 27  -14.7121212 -1.8508724051 -2.52201699       0
## 28  -35.7121212  0.4935804319  0.36194722       2
## 29  134.2878788 -2.4552254966  0.79592582       0
## 30   21.2878788 -0.5618196906  0.32083397       2
## 31  -71.7121212  1.1437350708 -0.07511604       2
## 32  411.2878788 -2.2106031904  1.82716936       0
## 33  -43.7121212  0.9003564016  0.30182248       2
## 34  -59.7121212  0.0585100666 -0.20815340       2
## 35  -23.7121212  0.8596193084  0.12279119       2
## 36   49.2878788 -1.4631377939 -0.06338274       1
## 37  -62.7121212  0.9009839225 -0.25575072       2
## 38   83.2878788  0.0093078830  0.96584202       2
## 39  -25.7121212 -0.0674088978  0.08331993       2
## 40  -71.7121212  1.1497132779  0.10960345       2
## 41  278.2878788 -2.9912673804  1.18028635       0
## 42  -56.7121212  0.6684711413  0.28446697       2
## 43  -29.7121212  0.4919470539  0.50296904       2
## 44  209.2878788 -2.0772496876  0.98678244       0
## 45  -10.7121212 -0.9524625147 -0.04404083       1
## 46  -19.7121212  0.4164545797  0.06379255       2
## 47  -36.7121212 -0.2248132923 -0.04798827       2
## 48  -63.7121212 -1.2153938650 -0.45815750       1
## 49  -30.7121212  0.2452233129 -0.13427920       2
## 50  -60.7121212  0.9902355267 -0.00894013       2
## 51  101.2878788 -0.7339280102  0.85183729       2
## 52  -23.7121212 -1.3492215682 -0.25634623       1
## 53   71.2878788 -1.9046301417  0.57733237       0
## 54  -58.7121212  0.8793032722 -0.23889931       2
## 55  169.2878788 -0.8462383383  0.81714044       0
## 56  -20.7121212  0.7050637488  0.01872041       2
## 57  -30.7121212 -1.4062419921 -0.13764613       1
## 58  323.2878788 -1.8419541681  1.86850683       0
## 59  -73.7121212  0.7966515607 -0.88252817       2
## 60  -72.7121212  1.1928470845  0.20399936       2
## 61  -70.7121212  1.2664189235  0.31943766       2
## 62  -55.7121212  0.6678493532  0.10589967       2
## 63  -73.7121212  1.0283076517  0.07737884       2
## 64  -56.7121212  0.0009525236 -0.47841213       2
## 65  333.2878788 -1.6006421838  2.00164641       0
## 66   -8.7121212 -1.9426865359 -2.04830109       0
## 67  -68.7121212  0.6160457185  0.26786648       2
## 68  -68.7121212  1.0915438619  0.11470010       2
## 69   24.2878788 -0.6518381355  0.28347035       2
## 70  -22.7121212  0.8320428924  0.21483864       2
## 71  -52.7121212  0.6007126267 -0.16608293       2
## 72  -37.7121212  0.9887814018  0.16522130       2
## 73  -55.7121212  0.9169432577 -0.58384818       2
## 74   61.2878788 -1.5589800984  0.55092402       0
## 75  -72.7121212  1.0216900911  0.08137621       2
## 76  -72.7121212  1.1820792857  0.01841793       2
## 77  -72.7121212  1.0809481156  0.36750574       2
## 78  -58.7121212  0.9686274027 -0.18609586       2
## 79  -44.7121212  0.3855203290 -0.07148089       2
## 80  -68.7121212  0.4461727150 -0.55237152       2
## 81  -36.7121212 -1.1899160400 -0.22166763       1
## 82  -62.7121212  0.4148227894  0.02404732       2
## 83  -48.7121212 -1.0374552561 -0.44013367       1
## 84   25.2878788  0.3358565355  0.61331853       0
## 85  -61.7121212  0.7997807066 -0.45241859       2
## 86  120.2878788 -0.9004955650  0.97553195       2
## 87  -20.7121212 -0.2241346258  0.21878970       2
## 88  -68.7121212  1.1621991237 -0.05622848       2
## 89  -63.7121212  0.9974151120 -0.42722894       2
## 90  -62.7121212  0.9195479821 -0.34423177       2
## 91  -10.7121212  0.2078424470 -0.38616397       2
## 92   17.2878788 -0.2828120730  0.22174042       2
## 93  -73.7121212  1.0362087216  0.11552944       2
## 94  -60.7121212  1.1218269266  0.06983408       2
## 95  -64.7121212  0.9387613102 -0.16750701       2
## 96  -70.7121212  1.0485276274 -0.45434859       2
## 97  -66.7121212  1.0078982283  0.18348127       2
## 98    5.2878788 -0.9528846364 -1.79754123       0
## 99  -69.7121212  1.1197145855  0.11458143       2
## 100 -45.7121212  0.4707100026 -0.06133847       2
## 101  73.2878788 -0.6845278089  0.43999051       2
## 102 -67.7121212  0.3881285215 -1.84313449       0
## 103 -71.7121212  0.9659609974 -0.93350465       2
## 104 -45.7121212  0.9555267990 -0.24955660       2
## 105  73.2878788 -1.7723418897 -0.21488106       0
## 106 -41.7121212  0.8494339846 -0.33971779       2
## 107 -32.7121212  0.7431862026 -0.49578739       2
## 108 349.2878788 -2.4576771300  1.27739020       0
## 109 -52.7121212  0.7336150164  0.04147777       2
## 110 -65.7121212  1.0148725171  0.30011081       2
## 111 195.2878788 -0.9612341566  1.20772437       0
## 112 -65.7121212  1.1914309753  0.14824051       2
## 113 -46.7121212  1.0313685002  0.38127194       2
## 114 -67.7121212  0.8213763778  0.12636227       2
## 115 -53.7121212  1.0479550674  0.27650164       2
## 116  -1.7121212 -0.3227912732 -0.03094612       2
## 117  14.2878788 -0.4460476858  0.30535040       2
## 118 186.2878788 -1.8539187130 -0.15326649       0
## 119 -68.7121212  1.1867413903  0.04000996       2
## 120 -58.7121212  0.7517672165  0.01361487       2
## 121 -71.7121212 -1.1532713819 -0.34560929       1
## 122 117.2878788 -0.0075038114  1.19412694       0
## 123 -61.7121212  1.0775594656 -0.03514236       2
## 124 -46.7121212  1.0261919758  0.03343750       2
## 125 -71.7121212  1.2032378445  0.10087134       2
## 126  13.2878788 -0.2311876912  0.02663400       2
## 127 -63.7121212  0.7962346910 -0.09614593       2
## 128  54.2878788 -0.7940960302  0.59716120       2
## 129 -69.7121212  1.2017319538  0.22116306       2
## 130 144.2878788 -1.4097962758  1.02676112       0
## 131 -50.7121212  0.7389615204 -0.40629270       2
## 132  85.2878788 -0.4089144343  0.64367071       2
ggplot(dd) +
  aes(x = pca1, y = pca2, colour = cluster) +
  geom_point(shape = "circle", size = 1.5) +
  scale_color_hue(direction = 1) +
  theme_minimal()