1 Step. Collection data

2 step. Read all of data

vacancy=read.csv("vacancy.csv")
str(vacancy)
## 'data.frame':    16215 obs. of  24 variables:
##  $ X                : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ specialization_id: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ vacancy_name     : Factor w/ 6084 levels "'Менеджер по консолидированным авто отправкам",..: 4882 4882 984 3309 4695 3845 3116 6015 221 2108 ...
##  $ salary_to        : int  NA NA NA 150000 300000 NA 500000 NA NA NA ...
##  $ salary_from      : int  NA NA NA 70000 150000 NA 120000 NA NA 100000 ...
##  $ salary_currency  : Factor w/ 6 levels "EUR","KGS","KZT",..: NA NA NA 3 3 NA 3 NA NA 3 ...
##  $ requirement      : Factor w/ 10639 levels "1- знание английского и русского языков, умение писать официальные письма, креатив.",..: 3598 3598 4591 1112 10081 5996 1638 3181 398 9275 ...
##  $ responsibility   : Factor w/ 10133 levels "# установи_ контакт с покупателем.",..: 2064 2064 7041 7835 8476 6074 7123 6994 489 8723 ...
##  $ area_id          : int  159 159 160 160 159 174 160 160 160 160 ...
##  $ area_name        : Factor w/ 54 levels "Акколь","Аксай (Казахстан)",..: 9 9 7 7 9 29 7 7 7 7 ...
##  $ created_at       : Factor w/ 11966 levels "2016-11-17T01:03:52+0300",..: 11941 11941 11920 11916 11889 11886 11886 11883 11878 11871 ...
##  $ vacancy_url      : Factor w/ 12384 levels "https://hh.ru/applicant/vacancy_response?vacancyId=11617814",..: 9546 9546 7796 12367 5790 9423 6988 7708 6972 566 ...
##  $ employer_id      : int  942300 942300 984620 1491586 2271389 60607 1288140 60607 39474 1065869 ...
##  $ employer_name    : Factor w/ 5399 levels "(1 st) ТМ Aviator",..: 1611 1611 3886 4332 1338 3832 1797 3832 1475 1896 ...
##  $ employer_url     : Factor w/ 5399 levels "https://hh.ru/employer/1000042",..: 5277 5277 5371 1111 2935 4615 652 4615 4446 165 ...
##  $ employer_checked : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ address_city     : Factor w/ 95 levels "Абай","Аксай",..: 8 8 7 7 NA NA NA NA NA 7 ...
##  $ address_building : Factor w/ 694 levels " 2/14","1","1 ",..: 183 183 445 584 NA NA NA NA NA 108 ...
##  $ address_street   : Factor w/ 762 levels "1-й микрорайон",..: 472 472 421 264 NA NA NA NA NA 626 ...
##  $ address_lat      : num  51.1 51.1 43.2 43.2 NA ...
##  $ address_long     : num  71.4 71.4 76.9 76.9 NA ...
##  $ vacancy_id       : int  19161263 19161263 19125118 19224792 19066750 19157842 19109433 19123608 19109088 18545370 ...
##  $ vacancy_type     : Factor w/ 3 levels "anonymous","direct",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ vacancy_alter_url: Factor w/ 12384 levels "https://career.ru/vacancy/11617814",..: 9851 9851 8242 12372 6398 9736 7505 8160 7489 1703 ...

3 step. Start analysis

almaty=subset(vacancy,vacancy$area_id==160)
itTelecom=subset(almaty, almaty$specialization_id==1)
marketing=subset(almaty, almaty$specialization_id==3)
bugalteria=subset(almaty, almaty$specialization_id==2)
adminpersonal=subset(almaty, almaty$specialization_id==4)
bank=subset(almaty, almaty$specialization_id==5)
build=subset(almaty, almaty$specialization_id==20)
transport=subset(almaty, almaty$specialization_id==21)

*This is the average minimum salary proposed for each specialization

salaryIt=na.omit(itTelecom$salary_from)
meanOfSalaryIt=round(mean(salaryIt),0)

salaryMarket=na.omit(marketing$salary_from)
meanOfSalaryMarket=round(mean(salaryMarket),0)

salarybugalteria=na.omit(bugalteria$salary_from)
meanOfSalarybugalteria=round(mean(salarybugalteria),0)

salaryAdmin=na.omit(adminpersonal$salary_from)
meanOfSalaryAdmin=round(mean(salaryAdmin),0)

salarybank=na.omit(bank$salary_from)
meanOfSalarybank=round(mean(salarybank),0)

salarybuild=na.omit(build$salary_from)
meanOfSalarybuild=round(mean(salarybuild),0)

salaryTransport=na.omit(transport$salary_from)
meanOfSalaryTransport=round(mean(salaryTransport),0)

meanOfSalary<-c(meanOfSalarybank,meanOfSalaryAdmin,meanOfSalarybugalteria,meanOfSalaryMarket,meanOfSalaryIt,meanOfSalaryTransport,meanOfSalarybuild)
namesSalary<-c("банк","управ","бугал","продажи","ит","транс","строит")
print(meanOfSalary)
## [1] 118473  89441 119957 106611 135691 109817 157345
a=data.frame(namesSalary,meanOfSalary)
plot(a)

* 4. Further analysis explores these moments: * In some cities, the biggest and the minimum wage * The most popular profession in Kazakhstan and most popular profession each city separately * In which specialization wage the biggest * And so on * to be continued…