Group: A1 & A2

This is top 5-10 data scientist skills in retail trade industry from the finding of nine top retail companies in US, and it is a part of team project 3.

Finding for “Which are the most valued data science skills?”

Ans:TeamWrok, Machine Learning, Statistic, R

Method:

I perform web scraping skill to gain data scientist job description from six top retail trade companies. And I read in raw data as a string, comparing and matching the skills in the string by using demanding skills in all general data science skills. After the skill matching, I sum up the demand form 6 top retail companies. My retail trade ranking is based on the highest demand from the research companies.

R

library(XML)
library(rvest)
## Warning: package 'rvest' was built under R version 3.4.2
## Loading required package: xml2
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
## 
##     xml
library(RCurl)
## Loading required package: bitops
library(xlsx)
## Loading required package: rJava
## 
## Attaching package: 'rJava'
## The following object is masked from 'package:RCurl':
## 
##     clone
## Loading required package: xlsxjars
library(stringr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
## 
##     complete
library(urltools)
## Warning: package 'urltools' was built under R version 3.4.2
## 
## Attaching package: 'urltools'
## The following object is masked from 'package:xml2':
## 
##     url_parse
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.2
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)

Part 1 : Read in raw data from html Webpages

Job 1

link1<-"https://careers.walmart.com/us/jobs/883210BR-data-scientist-sunnyvale-ca"
web1<-read_html(link1)
job1<-web1 %>% html_node(".qualification__value ") %>% html_text()

Job 2

link2<-"https://careers.homedepot.com/job/7720425/data-scientist-space-optimization-atlanta-ga/"
web2<-read_html(link2)
job2<-web2 %>% html_node("#gtm-jobdetail-desc") %>% html_text()

Job 3

link3<-"https://jobs.cvshealth.com/job/woonsocket/senior-manager-data-science/5770/5560710"
web3<-read_html(link3)
job3<-web3 %>% html_node(".ats-description") %>% html_text()

Job 4

link4<-"https://www.glassdoor.com/job-listing/principal-data-scientist-target-JV_IC2940587_KO0,24_KE25,31.htm?jl=2552587569&ctt=1508462573479"
web4<-read_html(link4)
job4<-web4 %>% html_node(".desc") %>% html_text()

Job 5

link5<-"http://careers.lb.com/job/7671684/data-scientist-bangalore-in/"
web5<-read_html(link5)
job5<-web5 %>% html_node("#gtm-jobdetail-desc") %>% html_text()

Job 6

link6<-"https://www.ziprecruiter.com/jobs/j-c-penney-corporation-inc-da33e2ae/senior-data-scientist-big-data-9f3e9a31"
web6<-read_html(link6)
job6<-web6 %>% html_node("ul") %>% html_text()

Part 2 Built a data frame

Grab company’s name from url text

Company<-c(link1,link2,link3,link4,link5,link6)
Company<-urltools::domain(Company)
Company<-gsub(".*\\.(.*)\\..*","\\1", Company, perl=T)
Company[4]<-"Target"
Company[5]<-"JCPenny"

Creat a table t

requirement<-c(job1,job2,job3,job4,job5,job6)
t<-data.frame(Company,requirement)
t
##        Company
## 1      walmart
## 2    homedepot
## 3    cvshealth
## 4       Target
## 5      JCPenny
## 6 ziprecruiter
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    requirement
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Ph.D. in computer science or similar field or MS with 2+ years of related experienceDeep knowledge of machine learning, information retrieval, data mining, statistics, NLP or related field.Good functional coding skills in C++, Java, Scala in addition to good knowledge of one of the scripting languages such as Python or Perl.Experience working with large data sets and distributed computing tools a plus (Map/Reduce, Hadoop, Hive, Spark etc.)Superior ability to analyze and interpret the results of product experiments.Proven experience working with statistical languages such as R.Strong communication skills both written and verbal
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Position Purpose: The Data Scientist will possess a broad skill set with great depth associated with any of the following skills: data mining, optimization, statistics, machine learning, programming, data modeling, database design, retail domain knowledge, and reporting. Specifically, the person will assist in the development of the Home Depot advanced analytical infrastructure, process, and capabilities that informs decision making. The Data Scientist will also serve as a mentor for others and provide on-the-job training and/or guidance. Additionally, the Data Scientist will develop and reinforce partnerships with various business units and lead large-scope data projects that will enable advanced analytics projects to be successful in a variety of implementation scenarios. This role will use a variety of technologies to utilize newer data platforms that allow for the storage and analysis of large volumes of data. This person will assist the team in leveraging these platforms across a variety of analytical applications to serve the business. In addition to utilizing standard Descriptive and Predictive methodologies, the scope of the job will also include the design and development of technology solution and applications for space optimization, including predictive modeling, cluster analysis and anomaly detection.\n Major Tasks, Responsibilities & Key Accountabilities: \n30%-Design, develop, and implement advanced analytical architectures to enable business insights\n20%-Codes, Analyzes and Interprets advanced analytical methods applied to data from various sources. Compiles reports, charts, and tables based on business requirements.\n20%-Utilize, enhance, and support the Home Depot Advanced Analytics Process Framework in projects. Assist in the evaluation of software, technologies, and platforms to optimize the advanced analytical environment. Function as a technical liaison between the business and various I.T. functional leads for advanced analytical project\n20%-Consult with business teams on the appropriate selection, utilization, and interpretation of advanced statistical methodologies. 5%-Assist project teams with the creation and review of relevant project artifacts, particularly in respect to I.T. Review Board tollgates.\n5%-Communicate and educate technical and non-technical employees on advanced analytics.\n Nature and Scope: This position reports to Sr Manager, Space OptimizationThis position has no Direct Reports \n Environmental Job Requirements: Located in a comfortable indoor area. Any unpleasant conditions would be infrequent and not objectionable. \n Travel: Typically requires overnight travel less than 10% of the time.Standard Minimum Qualifications:  Must be eighteen years of age or older. Must be able to legally work in the U.S. \n Education Required: The knowledge, skills and abilities typically acquired through the completion of a Master<U+0092>s Degree Program or equivalent degree in a field of study related to the job. \n Work Experience Required: 3 years\n Physical Requirements: Most of the time is spent sitting in a comfortable position and there is frequent opportunity to move about. On rare occasions, there may be a need to move or lift light articles. \n Additional Qualifications:  Extensive work experience with large database platforms (e.g., Teradata, ParAccel, Hadoop, Mahout, Cassandra, etc.) or other similar platforms Broad Experience in Data Modeling Broad Experience with Business Requirements Analysis Broad Experience with Business Intelligence reporting Broad Experience with analytical application design \n Preferred Qualifications: \nMaster<U+0092>s and/or Ph.D. Degree in Data Science, Analytics, Statistics, Math or related quantitative field\nExtensive work experience in Application Development, Optimization, Data Mining, Data Modeling, and Statistical Analysis\nPrevious work experience in Retail\nBroad experience with large-scale data analysis and a demonstrated ability to identify key insights from data to solve business problems\nExtensive programming experience in R, Python, CPLEX, SQL, Java\n Knowledge, Skills, Abilities and Competencies: Ability to design and implement advanced analytical applications Programming skills Statistical Analysis skills Data Visualization skills Strong communication and data presentation skills Ability to quickly adapt to new technologies, tools and techniques Flexible and responsive; able to perform in a fast paced, dynamic work environment and meet aggressive deadlines Ability to work with technical and non-technical team members
## 3 \r\n\r\n\r\n  Job ID:\r\n    685303BR Business Area:\r\n    CVS HealthPrimary Location:\r\n    RI <U+2012> Woonsocket Job Type:\r\n    Full Time  Job Category:\r\n    \r\nCustomer Service, Corporate\r\n      Clinical Licensure Required:\r\n    N/A Location Code OF001         \r\n        Position Summary:Leverage data and analytics to identify and develop innovative and out-of-the-box analytical solutions to support personalization strategies across customer cohorts, this may include targeting, segmentation, offer optimization, multi-channel contact cadence, and messaging.  Lead development of advanced analytical solutions using applied statistical techniques, such as: linear regression, factor analysis, logistic regression, generalized linear models, tree models (CHAID, CART, MART), correspondence and cluster analysis, survival analysis, and, attrition/retention modeling. Partner with cross-functional leads on customer growth and execution teams to launch these strategies and close loop performance management.  Leverage analytics to support next generation loyalty program design.  Own execution and delivery of analytical projects and insights to internal business partners and present to the leadership.  Utilize modeling software, data management and system requirements, scoring process, and predictive model output integration into business systems.  Evaluate/develop tools, methodologies or infrastructure building capability and foster innovation. Help and direct others on projects, including project management, business problem solving, guidance on methodologies and quality assurance of results.  Access database tables. Write simple queries by performing different types of Joins.  Manage different data types , Perform aggregations joining different tables. Create new tables from existing tables & copy data & table attributes from the source table.  Use advanced ranking functions, Ordered Analytic Functions for Group, Sum, Cumulative & Remaining Window Aggregations. Create volatile & global temporary tables. Code correlated sub queries, which reduce the need for temporary tables. Code complex queries which need either sub-query o sequential queries to support complex analysis. Implement column compression for performance & reduced storage costs using single or multi-value compression. Tune queries with execution without screwing up performance.  Access, create new, manipulate and manage datasets. Use SAS functions, Loops and arrays and handle errors. Generate reports using proc report procedure and enhance using SAS reporting options. Access data using PROC SQL, connect to different databases using SAS ACCESS, transfer data between environments.  Automate programs by defining and calling macros using the SAS Macro Language. Create re-usable SAS macro codes. Use statistical procedures SAS STAT such as PROC MEANS, PROC FREQ, PROC REG, and PROC ANOVA.  Perform Descriptive statistics, frequency counts, cross tabulations, correlations, and t-tests. Use the techniques Regression Analysis, Multivariate Regression Analysis, Linear/Logistic Regression, Classification & Regression Trees, Factor analysis, Cluster analysis, and CHAID. Direct a team of 1 to 2 data scientists and analytical vendor/partner resources, guiding them in the analytical process and solution development and integration with personalization plans, as well as merchandising strategies and decisions. Required Qualifications:A Bachelor<U+0092>s degree or foreign equivalent in Math, Statistics, Economics, Operations Research, Computer Science, Engineering, or related field and 5 years of  experience in customer analysis & insight development, customer behavioral analytics, and database marketing.  Experience must include 5 years in Advanced SQL programming, Advanced SAS programming, Predictive Model Development, Advanced Statistical Analysis, and, Marketing & Customer Analysis. Must also have 3 years of analytic team management/leadership experience.  Must apply by 11/05/17. Preferred Qualifications:Please see required qualifications. Education:A Bachelor<U+0092>s degree or foreign equivalent in Math, Statistics, Economics, Operations Research, Computer Science, Engineering, or related field.  Please see required qualifications. Business Overview:CVS Health, through our unmatched breadth of service offerings, is transforming the delivery of health care services in the U.S. We are an innovative, fast-growing company guided by values that focus on teamwork, integrity and respect for our colleagues and customers. What are we looking for in our colleagues? We seek fresh ideas, new perspectives, a diversity of experiences, and a dedication to service that will help us better meet the needs of the many people and businesses that rely on us each day. As the nation<U+0092>s largest pharmacy health care provider, we offer a wide range of exciting and fulfilling career opportunities across our three business units <U+0096> MinuteClinic, pharmacy benefit management (PBM) and retail pharmacy. Our energetic and service-oriented colleagues work hard every day to make a positive difference in the lives of our customers.CVS Health is an equal opportunity employer. We do not discriminate in hiring or employment against any individual on the basis of race, ethnicity, ancestry, color, religion, sex/gender (including pregnancy), national origin, sexual orientation, gender identity or expression, physical or mental disability, medical condition, age, veteran status, military status, marital status, genetic information, citizenship status, unemployment status, political affiliation, or on any other basis or characteristic prohibited by applicable federal, state or local law. CVS Health will consider qualified job candidates with criminal histories in a manner consistent with federal, state and local laws. CVS Health will not discharge or in any other manner discriminate against any Colleague or applicant for employment because such Colleague or applicant has inquired about, discussed, or disclosed the compensation of the Colleague or applicant or another Colleague or applicant. Furthermore, we comply with the laws and regulations set forth in the following EEO is the Law Poster: EEO IS THE LAW and EEO IS THE LAW SUPPLEMENTFederal law requires employers to provide reasonable accommodation to qualified individuals with disabilities. If you require assistance to apply for this job, please contact us by clicking AA EEO CVS HealthFor inquiries related to the application process or technical issues please contact the Kenexa Helpdesk at 1-855-338-5609. For technical issues with the Virtual Job Tryout assessment, contact the Shaker Help Desk at 1-877-987-5352. Please note that we only accept resumes via our corporate website: https://jobs.cvshealth.com/\r\n       \r\n         \r\n         \r\n        
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           JOIN US AS A PRINCIPAL LEAD DATA SCIENTISTAbout this Opportunity Support all business areas of Target with critical data analysis that helps team members make profitable decisions. Become a forecast expert, business analyst or team lead and utilize tools like decision trees, clustering, regression, time series, structural equation modeling, linear programming, genetic algorithms, SAS, SQL, VBA and OLAP  Use your skills, experience and talents to be a part of groundbreaking thinking and visionary goals. As a Principal Lead Data Scientist, you'll take the lead as you Interprets problems and provide solutions to business problems using data analysis, data mining, optimization tools, and machine learning techniques and statisticsDesign and build data-science and technology based algorithmic solutions to address business needs of TargetInterprets problems and provide solutions to business problems using data analysis, data mining, optimization tools, and machine learning techniques and statisticsDesign large scale models using Logistic Regression, Linear Models Family (Poisson models, Survival models, Hierarchical models, Naïve-Bayesian estimators), Conjoint Analysis, Spatial models, Time-series modelsApplies a global approach to analytical solutions-both within a business area and across the enterpriseUnderstands interrelationships and impacts of data and technology upon the Target environmentResponsible for project prioritization, planning, documentation (objective, requirements, analysis plan, modeling, results, support, training, etc.) and execution.Develops strategic plans and provides analytical leadership across multiple projects end to endMaintains a current understanding of the retail industry and provides an analytical vision for determining the impact of retail trends for BI&ADrives Research & Development initiatives to develop standards for new analytical areasPrepares and conducts trainings and mentors team members on latest analytical methodologiesEngages & inspires team members to deep dive into analytical problems.Partners with technology teams to drive prototyping efforts of analytical solutionsParticipates in internal & external technology & analytical forums and discussionsRequirementsM.Tech, M.Sc. (+10 years of relevant exp), Ph.D. (+7 years of relevant experience) in Engineering, Operation Research, Mathematics, Engineering, Statistics preferred5+ years of experience deploying algorithms in a production environmentExperience designing algorithms on Hadoop ecosystemThorough knowledge in 2 or more open source softwares, like Python, Pig, hive, Mahout, R etcA strong passion for empirical research and for answering complex questions with dataExcellent written and verbal communication skillsDemonstrated ability to work with ambiguous problem definitions, recognize dependencies and deliver impactful solutions through logical problem solving and technical ideationsAbility to learn new analytical methods and technologies and apply in practical business problemsAbility to independently drive innovative programs.Must be able to handle multiple strategic projects and handle Global cross functional teams.Ability to work with voluminous data from various sources.Benefits Eligible team members will receive market competitive package including competitive pay, health, accidental and life insurance coverage, gratuity and provident fund, training and development and other perks and benefits. Target is an Equal Employment Opportunity Employer and is a drug-free workplace. About Target® Expect the Best. At Target, we have a vision: to become the best - the best culture and brand, the best place for growth and the company with the best reputation. We offer an inclusive, collaborative and energetic work environment that rewards those who perform. We deliver engaging, innovative and on-trend experiences for our team members and our guests. We invest in our team members' futures by developing leaders and providing a breadth of opportunities for professional development. It takes the best to become the best, and we are committed to building a team that does the right thing for our guests, shareholders, team members and communities.  Minneapolis-based Target Corporation serves guests at stores nationwide and at Target.com. Target is committed to providing a fun and convenient shopping experience with access to unique and highly differentiated products at affordable prices. Since 1946, the corporation has given 5 percent of its income through community grants and programs like Take Charge of Education®.
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Description\n\nPurpose\nEnterprise Analytics group is responsible for managing customer data, statistical modeling and advanced analytics requests from all the brands <U+0096> Victoria<U+0092>s Secret, Bath & Body Works, Henri Bendel and La Senza. The Data Scientist will be responsible to analyze and model structured data using advanced statistical methods and implement algorithms and software needed to develop solutions to business questions and present findings with actionable recommendations. Responsibilities\n<U+0095>\tAnalyze and model structured data using advanced statistical methods and implement algorithms and software needed to perform analysis<U+0095>\tDeliver projects around customer & marketing analytics including ad-hoc requests originating from enterprise wide brands.<U+0095>\tDetermine the significant relevance of new data sets to a business strategy and how to potentially integrate into existing investment strategies.<U+0095>\tContribute to in house product development based on analytical methodologies.<U+0095>\tQuery structured & unstructured data (from hybrid sources & platforms) and perform exploratory data analysis for further advanced modeling <U+0095>\tPresent ideas and findings with actionable recommendations in an easily consumable manner.<U+0095>\tParticipate in peer reviews of projects & assist other analysts to process & synthesize multiple & large data sets\nQualifications\nEducation\n<U+0095>\tB.S. degree in Computer Science, Mathematics, Statistics<U+0095>\tM.S.  in Computer Science,  applied mathematics, statistics, operations research, quantitative social sciences will be preferred\nSkills / Experience\n<U+0095>\t3-5 years of relevant experience in customer/marketing/merchandising analytics role preferably in retail domain<U+0095>\tStrong understanding of computer technology with strong programming skills. Expert level proficiency R, Python. <U+0095>\tExperience in working with relational databases (Teradata, Oracle) with advanced SQL programming skills<U+0095>\tIn-depth knowledge of statistical procedures that are applied in Supervised / Unsupervised problems<U+0095>\tBasic-Intermediate level proficiency in SAS (Base SAS, Enterprise Guide, Enterprise Miner) & in UNIX<U+0095>\tTrack record of applying machine learning techniques to marketing and merchandizing ideas<U+0095>\tExperience in Big Data platforms like Hadoop platforms (Map-R, Hortonworks & others) , Aster and Graph Databases<U+0095>\tStrong written & verbal communication skills. Ability to communicate complicated statistical analytical concepts & solutions to business stakeholders in a simplified comprehendible manner<U+0095>\tExperience in operations research / optimization will be good to have.\nPersonal Characteristics<U+0095>\tUnique individuals that are innovators<U+0095>\tHighly energized personality with a positive attitude and ability to work with minimal supervision, prioritize, multi task and work under tight timelines<U+0095>\tOrganized and able to present ideas<U+0095>\tStrong Work Ethic. Assertive yet willing to work within a team and take on any task<U+0095>\tProfessional demeanoro\tAble to present, communicate, and is approachable by business executiveso\tHas self-reliance and uses good common sense o\tIs able to build strong relationships and build out network with peers and data providers<U+0095>\tIntellectually curiouso\tIs able to identify and present obscure data that is unique and usefulo\tGenuinely interested in data and how data can drive returnso\tStays current with business results, strategies, industry standards and best practices especially in the field of data sciences & machine learning
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Apply your expertise in machine learning/data mining techniques and build forecasting, prediction, segmentation, recommendation and fraud detection systems in retail domainCollaborate with product and engineering teams to solve business problems and identify opportunitiesBuild ML algorithms to drive personalized and relevant customer experienceExtend/augment company data with third party data, enhance data collection procedures to include information required to build analytic systemsAssist with designing and building infrastructure to facilitate analytics and experimentationPresent results and make recommendations in a clear manner to leadershipProvide thought leadership and mentor data science community across the organization

Creat a function to check Key word in a string

x<-""
y<-function(x){
      r<- c(0,0,0,0,0,0) 
      count=0
      for (i in 0:nrow(t)){
            if (isTRUE(grepl(x, t$requirement[i]))){ 
                 r[i]=1
                 i=i+1
            }
            else{i=i+1}
      }
      return(r)
}

Creat skill binary data set

MachineLearning<-y("machine learning")
Model<-y("model")
Statistic<-y("statistic")
BA<-y("businese analytic")

#Combine two cases
BigD<-y("big data")
LargeD<-y("large data")
BigData<-BigD+LargeD

R<-y("R")
DataMining<-y("data mining")
Py<-y("Python")
Excel<-y("Excel")
VBA<-y("VBA")
SAS<-y("SAS")
SPSS<-y("SPSS")
Mathlab<-y("Mathlab")

SQL<-y("SQL")
Java<-y("java")
Hadoop<-y("Hadoop")
c<-y("C\\+\\+")
Scala<-y("scala")
Hive<-y("Hive")
Perl<-y("Perl")
Spark<-y("Spark")
Tableau<-y("Tableau")
Teradata<-y("teradata")

Speak<-y("Communication")
Write<-y("write")
Leadership<-y("leadership")
TeamWork<-y("team")


k<-data.frame(MachineLearning,Model,Statistic,BA,BigData,R,DataMining,Py,Excel,VBA,SAS,SPSS ,Mathlab,SQL,Java,Hadoop,c,Scala,Hive,Perl,Spark,Tableau,Teradata,Speak,Write,Leadership,TeamWork)

k
##   MachineLearning Model Statistic BA BigData R DataMining Py Excel VBA SAS
## 1               1     0         1  0       1 1          1  1     0   0   0
## 2               1     1         1  0       1 1          1  1     0   0   0
## 3               0     1         1  0       0 1          0  0     0   0   1
## 4               1     1         1  0       0 1          1  1     1   1   1
## 5               1     1         1  0       1 1          0  1     0   0   1
## 6               1     0         0  0       0 0          1  0     0   0   0
##   SPSS Mathlab SQL Java Hadoop c Scala Hive Perl Spark Tableau Teradata
## 1    0       0   0    0      1 1     0    1    1     1       0        0
## 2    0       0   1    0      1 0     0    0    0     0       0        0
## 3    0       0   1    0      0 0     0    0    0     0       0        0
## 4    0       0   1    0      1 0     0    0    0     0       0        0
## 5    0       0   1    0      1 0     0    0    0     0       0        0
## 6    0       0   0    0      0 0     0    0    0     0       0        0
##   Speak Write Leadership TeamWork
## 1     0     0          0        0
## 2     0     0          0        1
## 3     0     0          1        1
## 4     0     0          1        1
## 5     0     0          0        1
## 6     0     0          1        1

Part 3: Statistic Result

Find top 9 skills based on job demand

sumCol<-colSums(k, na.rm = FALSE)
skill<-c("MachineLearning","Model","Statistic","BA","BigData","R","DataMining","Py","Excel","VBA","SAS","SPSS","Mathlab","SQL","Java","Hadoop","c","Scala","Hive","Perl","Spark","Tableau","Teradata","Speak","Write","Leadership","TeamWork")

a<-rbind(skill,sumCol)
a<-as.data.frame(t(a))

a$sumCol<- as.numeric(as.character(a$sumCol))
a<-a[order(a$sumCol)[1:27],]
plot_ly(a, x = ~sumCol, y = ~skill, type = 'bar', orientation = 'h', name = 'top skils for Data Scientist in retail trade')
topmost<-a[order(a$sumCol,decreasing=TRUE )[1:9],]
ggplot(topmost, aes(x = topmost$skill, y = topmost$sumCol, fill = topmost$sumCol)) + 
  geom_bar(stat = "identity") +
  xlab("Skills") + 
  ylab("Frequency") + 
  theme(legend.position = "none",  
        axis.text.x = element_text(angle = 65, hjust = 1)) +
  ggtitle("Topmost Skills of Data Scientist in Retail Trade")