Load Database

Get normalized database from MySQL server and browse.

azuredb = dbConnect(MySQL(), user=params$dbuser, password=params$dbpass, dbname=params$dbname, host=params$dbhost)

jobs <- dbGetQuery(azuredb, 
  "SELECT 
      j.id as id, t.title as title, c.cname as company, o.val as onsite_remote, 
        j.descr as `description`, j.salary as salary, j.location as location, j.criteria as criteria, 
        j.posted as posted, j.link as link
   FROM 
      jobs j, job_title t, company c, onsite_remote o
   WHERE 
      j.title = t.id AND j.cid = c.id AND j.orid = o.id;")
head(jobs)
##   id                          title company onsite_remote
## 1  1                   Data Analyst  PayPal        onsite
## 2  2                   Data Analyst  PayPal        onsite
## 3  3                   Data Analyst  PayPal        onsite
## 4  4 Data Analyst - Recent Graduate  PayPal        onsite
## 5  5                   Data Analyst  PayPal        onsite
## 6  6                   Data Analyst  PayPal        onsite
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       description
## 1 At PayPal (NASDAQ: PYPL), we believe that every person has the right to participate fully in the global economy. Our mission is to democratize financial services to ensure that everyone, regardless of background or economic standing, has access to affordable, convenient, and secure products and services to take control of their financial lives.Job Description Summary:What you need to know about the role - We’re looking for a Risk Analyst to join the team and help in our efforts to manage our global seller risk strategy. You will work on the complex task of keeping fraudsters and bad actors away from PayPal’s platform. If you’re looking to make an impact at the red-hot intersection of e-Commerce, online payment and financial risk management, you’re at the right place. Meet our team The PayPal Seller Risk team is responsible for keeping the PayPal platform safe and secure and delivering a satisfactory experience for both the sellers and buyers. This is an exciting, fast paced team where the contribution by team members can significantly impact PayPal’s bottom line as well as our customers’ experience.Job Description:Your way to impactRisk Data Analysts apply their analytical and technical skills into researching data, monitoring and creating logics that stop fraudulent activity on PayPal’s platform. They analyze existing loss trends and define logics, solutions and operational processes that prevent them - while delivering best-in-class customer experience for PayPal users.Risk Data Analysts work closely with one another, and with Risk Operations in order to keep learning about current fraud. They also partner closely with PayPal Product teams to define features and integrate risk prevention tools to support business goals.Our team members are excited about solving analytical problems. They are bright, they are responsible, and they know how to follow through. If you’re also like that, we’d love to hear from you.Your day to dayFight fraudulent activity in one’s domain of ownership by analyzing existing loss trends through review of specific cases, research on our payments database and incorporation of feedback from operations teams.Based on data-driven analysis, formulating risk solutions to ensure optimal balance between user experience, business enablement, operational expense and loss exposure related to proposed solutionsWork closely with product teams on development of new products to identify potential gaps that might create loss, and define requirements to mitigate those risks. Have a good understanding of general business trends and directions to be able to put own work in the broad business context.Work with various partners within PayPal to coordinate the execution of fraud mitigation solutions.Demonstrate flexibility that is needed to succeed within a matrix organization while being proactive and comfortable with working alongside stakeholders from different domains.Develop controls and monitoring dashboards to ensure performance against business goal, regulatory requirements and business priorities.What Do You Need To Bring-Proficiency in SQL and ExcelProficiency in at least one statistical analysis tool: SAS / R / Python/ HadoopStrong analytical skills: ability to build quick estimates using back-of-the-envelope analysis, structure (and, if needed, execute) more complex analyses, pull together business cases, navigate multidimensional sets of tradeoffs. Above all, the job calls for comfort with data – ability to manipulate it, question its validity, interpret it, and develop recommendations based on itStrong written, oral, and interpersonal skills, including the ability to explain and/or present analysisDedicated, proactive, curious and eager to learn new approaches / methodologies – a mustOur Benefits:At PayPal, we’re committed to building an equitable and inclusive global economy. And we can’t do this without our most important asset—you. That’s why we offer benefits to help you thrive in every stage of life. We champion your financial, physical, and mental health by offering valuable benefits and resources to help you care for the whole you.We have great benefits including a flexible work environment, employee shares options, health and life insurance and more. To learn more about our benefits please visit https://www.paypalbenefits.comWho We Are:To learn more about our culture and community visit https://about.pypl.com/who-we-are/default.aspxPayPal has remained at the forefront of the digital payment revolution for more than 20 years. By leveraging technology to make financial services and commerce more convenient, affordable, and secure, the PayPal platform is empowering more than 400 million consumers and merchants in more than 200 markets to join and thrive in the global economy. For more information, visit paypal.com.PayPal provides equal employment opportunity (EEO) to all persons regardless of age, color, national origin, citizenship status, physical or mental disability, race, religion, creed, gender, sex, pregnancy, sexual orientation, gender identity and/or expression, genetic information, marital status, status with regard to public assistance, veteran status, or any other characteristic protected by federal, state or local law. In addition, PayPal will provide reasonable accommodations for qualified individuals with disabilities. If you are unable to submit an application because of incompatible assistive technology or a disability, please contact us at paypalglobaltalentacquisition@paypal.com.As part of PayPal’s commitment to employees’ health and safety, we have established in-office Covid-19 protocols and requirements, based on expert guidance. Depending on location, this might include a Covid-19 vaccination requirement for any employee whose role requires them to work onsite. Employees may request reasonable accommodation based on a medical condition or religious belief that prevents them from being vaccinated.REQ ID R0096181
## 2 At PayPal (NASDAQ: PYPL), we believe that every person has the right to participate fully in the global economy. Our mission is to democratize financial services to ensure that everyone, regardless of background or economic standing, has access to affordable, convenient, and secure products and services to take control of their financial lives.Job Description Summary:What you need to know about the role - We’re looking for a Risk Analyst to join the team and help in our efforts to manage our global seller risk strategy. You will work on the complex task of keeping fraudsters and bad actors away from PayPal’s platform. If you’re looking to make an impact at the red-hot intersection of e-Commerce, online payment and financial risk management, you’re at the right place. Meet our team The PayPal Seller Risk team is responsible for keeping the PayPal platform safe and secure and delivering a satisfactory experience for both the sellers and buyers. This is an exciting, fast paced team where the contribution by team members can significantly impact PayPal’s bottom line as well as our customers’ experience.Job Description:Your way to impactRisk Data Analysts apply their analytical and technical skills into researching data, monitoring and creating logics that stop fraudulent activity on PayPal’s platform. They analyze existing loss trends and define logics, solutions and operational processes that prevent them - while delivering best-in-class customer experience for PayPal users.Risk Data Analysts work closely with one another, and with Risk Operations in order to keep learning about current fraud. They also partner closely with PayPal Product teams to define features and integrate risk prevention tools to support business goals.Our team members are excited about solving analytical problems. They are bright, they are responsible, and they know how to follow through. If you’re also like that, we’d love to hear from you.Your day to dayFight fraudulent activity in one’s domain of ownership by analyzing existing loss trends through review of specific cases, research on our payments database and incorporation of feedback from operations teams.Based on data-driven analysis, formulating risk solutions to ensure optimal balance between user experience, business enablement, operational expense and loss exposure related to proposed solutionsWork closely with product teams on development of new products to identify potential gaps that might create loss, and define requirements to mitigate those risks. Have a good understanding of general business trends and directions to be able to put own work in the broad business context.Work with various partners within PayPal to coordinate the execution of fraud mitigation solutions.Demonstrate flexibility that is needed to succeed within a matrix organization while being proactive and comfortable with working alongside stakeholders from different domains.Develop controls and monitoring dashboards to ensure performance against business goal, regulatory requirements and business priorities.What Do You Need To Bring-Proficiency in SQL and ExcelProficiency in at least one statistical analysis tool: SAS / R / Python/ HadoopStrong analytical skills: ability to build quick estimates using back-of-the-envelope analysis, structure (and, if needed, execute) more complex analyses, pull together business cases, navigate multidimensional sets of tradeoffs. Above all, the job calls for comfort with data – ability to manipulate it, question its validity, interpret it, and develop recommendations based on itStrong written, oral, and interpersonal skills, including the ability to explain and/or present analysisDedicated, proactive, curious and eager to learn new approaches / methodologies – a mustOur Benefits:At PayPal, we’re committed to building an equitable and inclusive global economy. And we can’t do this without our most important asset—you. That’s why we offer benefits to help you thrive in every stage of life. We champion your financial, physical, and mental health by offering valuable benefits and resources to help you care for the whole you.We have great benefits including a flexible work environment, employee shares options, health and life insurance and more. To learn more about our benefits please visit https://www.paypalbenefits.comWho We Are:To learn more about our culture and community visit https://about.pypl.com/who-we-are/default.aspxPayPal has remained at the forefront of the digital payment revolution for more than 20 years. By leveraging technology to make financial services and commerce more convenient, affordable, and secure, the PayPal platform is empowering more than 400 million consumers and merchants in more than 200 markets to join and thrive in the global economy. For more information, visit paypal.com.PayPal provides equal employment opportunity (EEO) to all persons regardless of age, color, national origin, citizenship status, physical or mental disability, race, religion, creed, gender, sex, pregnancy, sexual orientation, gender identity and/or expression, genetic information, marital status, status with regard to public assistance, veteran status, or any other characteristic protected by federal, state or local law. In addition, PayPal will provide reasonable accommodations for qualified individuals with disabilities. If you are unable to submit an application because of incompatible assistive technology or a disability, please contact us at paypalglobaltalentacquisition@paypal.com.As part of PayPal’s commitment to employees’ health and safety, we have established in-office Covid-19 protocols and requirements, based on expert guidance. Depending on location, this might include a Covid-19 vaccination requirement for any employee whose role requires them to work onsite. Employees may request reasonable accommodation based on a medical condition or religious belief that prevents them from being vaccinated.REQ ID R0096181
## 3 At PayPal (NASDAQ: PYPL), we believe that every person has the right to participate fully in the global economy. Our mission is to democratize financial services to ensure that everyone, regardless of background or economic standing, has access to affordable, convenient, and secure products and services to take control of their financial lives.Job Description Summary:What you need to know about the role - We’re looking for a Risk Analyst to join the team and help in our efforts to manage our global seller risk strategy. You will work on the complex task of keeping fraudsters and bad actors away from PayPal’s platform. If you’re looking to make an impact at the red-hot intersection of e-Commerce, online payment and financial risk management, you’re at the right place. Meet our team The PayPal Seller Risk team is responsible for keeping the PayPal platform safe and secure and delivering a satisfactory experience for both the sellers and buyers. This is an exciting, fast paced team where the contribution by team members can significantly impact PayPal’s bottom line as well as our customers’ experience.Job Description:Your way to impactRisk Data Analysts apply their analytical and technical skills into researching data, monitoring and creating logics that stop fraudulent activity on PayPal’s platform. They analyze existing loss trends and define logics, solutions and operational processes that prevent them - while delivering best-in-class customer experience for PayPal users.Risk Data Analysts work closely with one another, and with Risk Operations in order to keep learning about current fraud. They also partner closely with PayPal Product teams to define features and integrate risk prevention tools to support business goals.Our team members are excited about solving analytical problems. They are bright, they are responsible, and they know how to follow through. If you’re also like that, we’d love to hear from you.Your day to dayFight fraudulent activity in one’s domain of ownership by analyzing existing loss trends through review of specific cases, research on our payments database and incorporation of feedback from operations teams.Based on data-driven analysis, formulating risk solutions to ensure optimal balance between user experience, business enablement, operational expense and loss exposure related to proposed solutionsWork closely with product teams on development of new products to identify potential gaps that might create loss, and define requirements to mitigate those risks. Have a good understanding of general business trends and directions to be able to put own work in the broad business context.Work with various partners within PayPal to coordinate the execution of fraud mitigation solutions.Demonstrate flexibility that is needed to succeed within a matrix organization while being proactive and comfortable with working alongside stakeholders from different domains.Develop controls and monitoring dashboards to ensure performance against business goal, regulatory requirements and business priorities.What Do You Need To Bring-Proficiency in SQL and ExcelProficiency in at least one statistical analysis tool: SAS / R / Python/ HadoopStrong analytical skills: ability to build quick estimates using back-of-the-envelope analysis, structure (and, if needed, execute) more complex analyses, pull together business cases, navigate multidimensional sets of tradeoffs. Above all, the job calls for comfort with data – ability to manipulate it, question its validity, interpret it, and develop recommendations based on itStrong written, oral, and interpersonal skills, including the ability to explain and/or present analysisDedicated, proactive, curious and eager to learn new approaches / methodologies – a mustOur Benefits:At PayPal, we’re committed to building an equitable and inclusive global economy. And we can’t do this without our most important asset—you. That’s why we offer benefits to help you thrive in every stage of life. We champion your financial, physical, and mental health by offering valuable benefits and resources to help you care for the whole you.We have great benefits including a flexible work environment, employee shares options, health and life insurance and more. To learn more about our benefits please visit https://www.paypalbenefits.comWho We Are:To learn more about our culture and community visit https://about.pypl.com/who-we-are/default.aspxPayPal has remained at the forefront of the digital payment revolution for more than 20 years. By leveraging technology to make financial services and commerce more convenient, affordable, and secure, the PayPal platform is empowering more than 400 million consumers and merchants in more than 200 markets to join and thrive in the global economy. For more information, visit paypal.com.PayPal provides equal employment opportunity (EEO) to all persons regardless of age, color, national origin, citizenship status, physical or mental disability, race, religion, creed, gender, sex, pregnancy, sexual orientation, gender identity and/or expression, genetic information, marital status, status with regard to public assistance, veteran status, or any other characteristic protected by federal, state or local law. In addition, PayPal will provide reasonable accommodations for qualified individuals with disabilities. If you are unable to submit an application because of incompatible assistive technology or a disability, please contact us at paypalglobaltalentacquisition@paypal.com.As part of PayPal’s commitment to employees’ health and safety, we have established in-office Covid-19 protocols and requirements, based on expert guidance. Depending on location, this might include a Covid-19 vaccination requirement for any employee whose role requires them to work onsite. Employees may request reasonable accommodation based on a medical condition or religious belief that prevents them from being vaccinated.REQ ID R0096181
## 4                                                                                                                                                                                                                                                                                                                                                                                                                              At PayPal (NASDAQ: PYPL), we believe that every person has the right to participate fully in the global economy. Our mission is to democratize financial services to ensure that everyone, regardless of background or economic standing, has access to affordable, convenient, and secure products and services to take control of their financial lives.Job Description Summary:We are seeking world-class problem solvers who have a passion for data and a relentless focus on execution and delivery. You will be most successful with a healthy combination of both, technical skills and business acumen. As a Data Analyst you will generate insights by conducting extensive analyses of PayPal’s rich data. In the process, you will develop a deep understanding of the payments business, our site functionality, further strengthen your analytic, leadership and presentation skills and gain exposure to a wide variety of functional teams within PayPal.Job Description:Key ResponsibilitiesUnderstand how to make data visually appealing and simple to both navigate and comprehend for end-users  Aggregate data from various sources to construct streamlined data pipelines and integrate data from multiple PayPal systems  Identify key metrics and build exec-facing dashboards to track the progress of the business and its highest priority initiatives Identify key business levers, establish cause & effect, perform analyses, and communicate key findings to various stakeholders to facilitate data driven decision-making Work closely across the matrix with teams like Finance, Marketing, Product, Engineering and senior executives Lead and participate in special projects/initiatives: innovate and implement large-scale quality improvements to processes and/or systems by conducting data analysis and making recommendations, troubleshooting technical issues, and refining processes around customer support  Basic Requirements:Experience with any or multiple of the following: Python, Java, Tableau, Jupyter Notebooks, Teradata, Hadoop/Hive, Oracle, JavaScript, SQL, Airflow, Linux, Perl, PHP Excellent understanding of computer science fundamentals, data structures, and algorithms Demonstrated experience, familiarity and ease with handling large data sets and crunching numbers Information Retrieval (search/recommendation/classification) experience or Human Judgment/User Interface experience Strong written and verbal communication skills with the ability to translate complex problems into simpler terms, and effectively influence both peers and senior leadership Recent Graduate Position Information and Requirements:This is a Recent Graduate Full-Time position. Must have graduated within the past 12 months, or will be graduating by Spring 2023, with a Bachelor’s or Master’s degree in Computer Science, Mathematics, Statistics, or related field from an accredited college or university. Must reside in the U.S. Must be able to obtain authorization to work in the U.S. Application ProcessNote - This job posting is a general advertisement for all PayPal Data Analyst opportunities for recent graduates. Applicants will be asked to complete a skills assessment via HackerRank Following the assessment, applicants will hear back from a University Recruiter with next steps.Position Location: Varies within U.S. PayPal Office LocationsglobaluniversitygraduatedataanalyticsOur Benefits:At PayPal, we’re committed to building an equitable and inclusive global economy. And we can’t do this without our most important asset—you. That’s why we offer benefits to help you thrive in every stage of life. We champion your financial, physical, and mental health by offering valuable benefits and resources to help you care for the whole you.We have great benefits including a flexible work environment, employee shares options, health and life insurance and more. To learn more about our benefits please visit https://www.paypalbenefits.comWho We Are:To learn more about our culture and community visit https://about.pypl.com/who-we-are/default.aspxPayPal has remained at the forefront of the digital payment revolution for more than 20 years. By leveraging technology to make financial services and commerce more convenient, affordable, and secure, the PayPal platform is empowering more than 400 million consumers and merchants in more than 200 markets to join and thrive in the global economy. For more information, visit paypal.com.PayPal provides equal employment opportunity (EEO) to all persons regardless of age, color, national origin, citizenship status, physical or mental disability, race, religion, creed, gender, sex, pregnancy, sexual orientation, gender identity and/or expression, genetic information, marital status, status with regard to public assistance, veteran status, or any other characteristic protected by federal, state or local law. In addition, PayPal will provide reasonable accommodations for qualified individuals with disabilities. If you are unable to submit an application because of incompatible assistive technology or a disability, please contact us at paypalglobaltalentacquisition@paypal.com.As part of PayPal’s commitment to employees’ health and safety, we have established in-office Covid-19 protocols and requirements, based on expert guidance. Depending on location, this might include a Covid-19 vaccination requirement for any employee whose role requires them to work onsite. Employees may request reasonable accommodation based on a medical condition or religious belief that prevents them from being vaccinated.REQ ID R0092223
## 5 At PayPal (NASDAQ: PYPL), we believe that every person has the right to participate fully in the global economy. Our mission is to democratize financial services to ensure that everyone, regardless of background or economic standing, has access to affordable, convenient, and secure products and services to take control of their financial lives.Job Description Summary:What you need to know about the role - We’re looking for a Risk Analyst to join the team and help in our efforts to manage our global seller risk strategy. You will work on the complex task of keeping fraudsters and bad actors away from PayPal’s platform. If you’re looking to make an impact at the red-hot intersection of e-Commerce, online payment and financial risk management, you’re at the right place. Meet our team The PayPal Seller Risk team is responsible for keeping the PayPal platform safe and secure and delivering a satisfactory experience for both the sellers and buyers. This is an exciting, fast paced team where the contribution by team members can significantly impact PayPal’s bottom line as well as our customers’ experience.Job Description:Your way to impactRisk Data Analysts apply their analytical and technical skills into researching data, monitoring and creating logics that stop fraudulent activity on PayPal’s platform. They analyze existing loss trends and define logics, solutions and operational processes that prevent them - while delivering best-in-class customer experience for PayPal users.Risk Data Analysts work closely with one another, and with Risk Operations in order to keep learning about current fraud. They also partner closely with PayPal Product teams to define features and integrate risk prevention tools to support business goals.Our team members are excited about solving analytical problems. They are bright, they are responsible, and they know how to follow through. If you’re also like that, we’d love to hear from you.Your day to dayFight fraudulent activity in one’s domain of ownership by analyzing existing loss trends through review of specific cases, research on our payments database and incorporation of feedback from operations teams.Based on data-driven analysis, formulating risk solutions to ensure optimal balance between user experience, business enablement, operational expense and loss exposure related to proposed solutionsWork closely with product teams on development of new products to identify potential gaps that might create loss, and define requirements to mitigate those risks. Have a good understanding of general business trends and directions to be able to put own work in the broad business context.Work with various partners within PayPal to coordinate the execution of fraud mitigation solutions.Demonstrate flexibility that is needed to succeed within a matrix organization while being proactive and comfortable with working alongside stakeholders from different domains.Develop controls and monitoring dashboards to ensure performance against business goal, regulatory requirements and business priorities.What Do You Need To Bring-Proficiency in SQL and ExcelProficiency in at least one statistical analysis tool: SAS / R / Python/ HadoopStrong analytical skills: ability to build quick estimates using back-of-the-envelope analysis, structure (and, if needed, execute) more complex analyses, pull together business cases, navigate multidimensional sets of tradeoffs. Above all, the job calls for comfort with data – ability to manipulate it, question its validity, interpret it, and develop recommendations based on itStrong written, oral, and interpersonal skills, including the ability to explain and/or present analysisDedicated, proactive, curious and eager to learn new approaches / methodologies – a mustOur Benefits:At PayPal, we’re committed to building an equitable and inclusive global economy. And we can’t do this without our most important asset—you. That’s why we offer benefits to help you thrive in every stage of life. We champion your financial, physical, and mental health by offering valuable benefits and resources to help you care for the whole you.We have great benefits including a flexible work environment, employee shares options, health and life insurance and more. To learn more about our benefits please visit https://www.paypalbenefits.comWho We Are:To learn more about our culture and community visit https://about.pypl.com/who-we-are/default.aspxPayPal has remained at the forefront of the digital payment revolution for more than 20 years. By leveraging technology to make financial services and commerce more convenient, affordable, and secure, the PayPal platform is empowering more than 400 million consumers and merchants in more than 200 markets to join and thrive in the global economy. For more information, visit paypal.com.PayPal provides equal employment opportunity (EEO) to all persons regardless of age, color, national origin, citizenship status, physical or mental disability, race, religion, creed, gender, sex, pregnancy, sexual orientation, gender identity and/or expression, genetic information, marital status, status with regard to public assistance, veteran status, or any other characteristic protected by federal, state or local law. In addition, PayPal will provide reasonable accommodations for qualified individuals with disabilities. If you are unable to submit an application because of incompatible assistive technology or a disability, please contact us at paypalglobaltalentacquisition@paypal.com.As part of PayPal’s commitment to employees’ health and safety, we have established in-office Covid-19 protocols and requirements, based on expert guidance. Depending on location, this might include a Covid-19 vaccination requirement for any employee whose role requires them to work onsite. Employees may request reasonable accommodation based on a medical condition or religious belief that prevents them from being vaccinated.REQ ID R0096181
## 6 At PayPal (NASDAQ: PYPL), we believe that every person has the right to participate fully in the global economy. Our mission is to democratize financial services to ensure that everyone, regardless of background or economic standing, has access to affordable, convenient, and secure products and services to take control of their financial lives.Job Description Summary:What you need to know about the role - We’re looking for a Risk Analyst to join the team and help in our efforts to manage our global seller risk strategy. You will work on the complex task of keeping fraudsters and bad actors away from PayPal’s platform. If you’re looking to make an impact at the red-hot intersection of e-Commerce, online payment and financial risk management, you’re at the right place. Meet our team The PayPal Seller Risk team is responsible for keeping the PayPal platform safe and secure and delivering a satisfactory experience for both the sellers and buyers. This is an exciting, fast paced team where the contribution by team members can significantly impact PayPal’s bottom line as well as our customers’ experience.Job Description:Your way to impactRisk Data Analysts apply their analytical and technical skills into researching data, monitoring and creating logics that stop fraudulent activity on PayPal’s platform. They analyze existing loss trends and define logics, solutions and operational processes that prevent them - while delivering best-in-class customer experience for PayPal users.Risk Data Analysts work closely with one another, and with Risk Operations in order to keep learning about current fraud. They also partner closely with PayPal Product teams to define features and integrate risk prevention tools to support business goals.Our team members are excited about solving analytical problems. They are bright, they are responsible, and they know how to follow through. If you’re also like that, we’d love to hear from you.Your day to dayFight fraudulent activity in one’s domain of ownership by analyzing existing loss trends through review of specific cases, research on our payments database and incorporation of feedback from operations teams.Based on data-driven analysis, formulating risk solutions to ensure optimal balance between user experience, business enablement, operational expense and loss exposure related to proposed solutionsWork closely with product teams on development of new products to identify potential gaps that might create loss, and define requirements to mitigate those risks. Have a good understanding of general business trends and directions to be able to put own work in the broad business context.Work with various partners within PayPal to coordinate the execution of fraud mitigation solutions.Demonstrate flexibility that is needed to succeed within a matrix organization while being proactive and comfortable with working alongside stakeholders from different domains.Develop controls and monitoring dashboards to ensure performance against business goal, regulatory requirements and business priorities.What Do You Need To Bring-Proficiency in SQL and ExcelProficiency in at least one statistical analysis tool: SAS / R / Python/ HadoopStrong analytical skills: ability to build quick estimates using back-of-the-envelope analysis, structure (and, if needed, execute) more complex analyses, pull together business cases, navigate multidimensional sets of tradeoffs. Above all, the job calls for comfort with data – ability to manipulate it, question its validity, interpret it, and develop recommendations based on itStrong written, oral, and interpersonal skills, including the ability to explain and/or present analysisDedicated, proactive, curious and eager to learn new approaches / methodologies – a mustOur Benefits:At PayPal, we’re committed to building an equitable and inclusive global economy. And we can’t do this without our most important asset—you. That’s why we offer benefits to help you thrive in every stage of life. We champion your financial, physical, and mental health by offering valuable benefits and resources to help you care for the whole you.We have great benefits including a flexible work environment, employee shares options, health and life insurance and more. To learn more about our benefits please visit https://www.paypalbenefits.comWho We Are:To learn more about our culture and community visit https://about.pypl.com/who-we-are/default.aspxPayPal has remained at the forefront of the digital payment revolution for more than 20 years. By leveraging technology to make financial services and commerce more convenient, affordable, and secure, the PayPal platform is empowering more than 400 million consumers and merchants in more than 200 markets to join and thrive in the global economy. For more information, visit paypal.com.PayPal provides equal employment opportunity (EEO) to all persons regardless of age, color, national origin, citizenship status, physical or mental disability, race, religion, creed, gender, sex, pregnancy, sexual orientation, gender identity and/or expression, genetic information, marital status, status with regard to public assistance, veteran status, or any other characteristic protected by federal, state or local law. In addition, PayPal will provide reasonable accommodations for qualified individuals with disabilities. If you are unable to submit an application because of incompatible assistive technology or a disability, please contact us at paypalglobaltalentacquisition@paypal.com.As part of PayPal’s commitment to employees’ health and safety, we have established in-office Covid-19 protocols and requirements, based on expert guidance. Depending on location, this might include a Covid-19 vaccination requirement for any employee whose role requires them to work onsite. Employees may request reasonable accommodation based on a medical condition or religious belief that prevents them from being vaccinated.REQ ID R0096181
##   salary                 location
## 1    NaN             Timonium, MD
## 2    NaN            Annapolis, MD
## 3    NaN           Wilmington, DE
## 4    NaN                Omaha, NE
## 5    NaN  Virginia, United States
## 6    NaN Wisconsin, United States
##                                                                                                                                                                                                                      criteria
## 1 [{'Seniority level': 'Not Applicable'}, {'Employment type': 'Full-time'}, {'Job function': 'Information Technology'}, {'Industries': 'Software Development, Technology, Information and Internet, and Financial Services'}]
## 2 [{'Seniority level': 'Not Applicable'}, {'Employment type': 'Full-time'}, {'Job function': 'Information Technology'}, {'Industries': 'Software Development, Technology, Information and Internet, and Financial Services'}]
## 3 [{'Seniority level': 'Not Applicable'}, {'Employment type': 'Full-time'}, {'Job function': 'Information Technology'}, {'Industries': 'Software Development, Technology, Information and Internet, and Financial Services'}]
## 4 [{'Seniority level': 'Not Applicable'}, {'Employment type': 'Full-time'}, {'Job function': 'Information Technology'}, {'Industries': 'Software Development, Technology, Information and Internet, and Financial Services'}]
## 5 [{'Seniority level': 'Not Applicable'}, {'Employment type': 'Full-time'}, {'Job function': 'Information Technology'}, {'Industries': 'Software Development, Technology, Information and Internet, and Financial Services'}]
## 6 [{'Seniority level': 'Not Applicable'}, {'Employment type': 'Full-time'}, {'Job function': 'Information Technology'}, {'Industries': 'Software Development, Technology, Information and Internet, and Financial Services'}]
##       posted
## 1 2022-11-16
## 2 2022-11-16
## 3 2022-11-17
## 4 2022-11-22
## 5 2022-11-17
## 6 2022-11-17
##                                                                                                                                                                                                                               link
## 1                 https://www.linkedin.com/jobs/view/data-analyst-at-paypal-3357036349?refId=pqDQ2g6BkMrTWMjuTeGHXw%3D%3D&trackingId=tIAR5tcEmVVvSHvcheIFXg%3D%3D&position=17&pageNum=0&trk=public_jobs_jserp-result_search-card\r
## 2                 https://www.linkedin.com/jobs/view/data-analyst-at-paypal-3357037326?refId=pqDQ2g6BkMrTWMjuTeGHXw%3D%3D&trackingId=wDBwHWDipnSAb2ovc5P7jQ%3D%3D&position=16&pageNum=0&trk=public_jobs_jserp-result_search-card\r
## 3               https://www.linkedin.com/jobs/view/data-analyst-at-paypal-3359606410?refId=pqDQ2g6BkMrTWMjuTeGHXw%3D%3D&trackingId=RrdwYqLhdOH%2BDureLOwswg%3D%3D&position=15&pageNum=0&trk=public_jobs_jserp-result_search-card\r
## 4 https://www.linkedin.com/jobs/view/data-analyst-recent-graduate-at-paypal-3364469255?refId=pqDQ2g6BkMrTWMjuTeGHXw%3D%3D&trackingId=yEM49KphKpecOROIzc0HFA%3D%3D&position=14&pageNum=0&trk=public_jobs_jserp-result_search-card\r
## 5               https://www.linkedin.com/jobs/view/data-analyst-at-paypal-3359604765?refId=pqDQ2g6BkMrTWMjuTeGHXw%3D%3D&trackingId=Y0kNBfvsvXi%2BEeV7crgdJQ%3D%3D&position=13&pageNum=0&trk=public_jobs_jserp-result_search-card\r
## 6               https://www.linkedin.com/jobs/view/data-analyst-at-paypal-3359605653?refId=pqDQ2g6BkMrTWMjuTeGHXw%3D%3D&trackingId=d6nQ5raH5urS2vddF5%2FJMQ%3D%3D&position=12&pageNum=0&trk=public_jobs_jserp-result_search-card\r

Parse Description

Prepare to find skills as listed in skills_list_str in the description variable. This list of data science skills were collected from a few places across the web.

skills_list_str <- 'python,r,sql,machine learning,data mining,data visualization,big data,sql,nosql,hadoop,spark,statistics,a/b,cleaning,data warehouse,etl,data lake,communication,teamwork,sklearn,scikit,pandas,numpy,tensorflow,keras,pytorch,database,mysql,postgresql,oracle,mongo,cloud,aws,azure,google cloud,git,deep learning,dnn,neural network,powerbi,tableau,teradata,javascript,airflow,linux,perl,java,php,bachelors,masters,phd,doctorate'
skills_list <- strsplit(skills_list_str,split=",",fixed=TRUE)[[1]]
jobs[, 'skills'] = ""

Iterate over job descriptions and check for skill.

jobs$description <- str_remove_all(tolower(jobs$description),"'")
for (row in 1:nrow(jobs)) {
  job <- jobs[row,]
  for(skill in skills_list) {
    if(str_detect(jobs[row,]$description,paste0("\\b",skill,"\\b"))) {
      jobs[row,]$skills <- paste(jobs[row,]$skills,skill,",")
    }
  }
}

Create normalized table of skills mapped to unique job ID in jobs data frame.

jobs.skills <- jobs %>%
  pivot_longer(skills) %>%
  mutate(value = strsplit(as.character(value), ",")) %>%
  unnest(value) %>%
  group_by(id, value) %>%
  summarize(value = unique(value), .groups = "drop") %>%
  as.data.frame()
head(jobs.skills)
##   id      value
## 1  1  database 
## 2  1    python 
## 3  1         r 
## 4  1       sql 
## 5  2  database 
## 6  2    python

Tidy Criteria

Split up criteria variable to create separate variables for seniority, employment type, job function, and industry.

jobs <- jobs %>%
    mutate(seniority = str_extract(criteria, "(?<='Seniority level': ')(.*?)(?='\\})"))
jobs <- jobs %>%
    mutate(etype = str_extract(criteria, "(?<='Employment type': ')(.*?)(?='\\})"))
jobs <- jobs %>%
    mutate(jfunction = str_extract(criteria, "(?<='Job function': ')(.*?)(?='\\})"))
jobs <- jobs %>%
    mutate(industries = str_extract(criteria, "(?<='Industries': ')(.*?)(?='\\})"))

Normalized table of industries mapped to unique job ID in jobs data frame.

jobs.industries <- jobs %>%
  pivot_longer(industries) %>%
  mutate(value = strsplit(as.character(value), ",")) %>%
  unnest(value) %>%
  group_by(id, value) %>%
  summarize(value = unique(value), .groups = "drop") %>%
  as.data.frame()
jobs.industries <- jobs.industries %>%
  mutate(across('value', ~str_remove_all(., 'and ')))

Tidy Title

List of unique raw titles below. All are data analysts. Some job titles have other information included in the field (e.g., remote/hybrid information).

unique(jobs$title)
##  [1] "Data Analyst"                                       
##  [2] "Data Analyst - Recent Graduate"                     
##  [3] "Entry-Level Data Analyst"                           
##  [4] "Analyst (Global Data and Analytics)"                
##  [5] "Data Analyst/Collector"                             
##  [6] "Data Analytics Analyst"                             
##  [7] "Data Analyst, Money"                                
##  [8] "Data Analyst I"                                     
##  [9] "Entry Level Data Analyst"                           
## [10] "Data Analyst - Remote"                              
## [11] "Data Analyst III - Remote"                          
## [12] "WFH//Data Analyst"                                  
## [13] "Data Analyst (SQL)"                                 
## [14] "Data Analyst - remote!"                             
## [15] "SQL Data Analyst"                                   
## [16] "Data Analyst (Global Remote)"                       
## [17] "Data Analyst - (Remote - US)"                       
## [18] "Data Analyst (Remote)"                              
## [19] "REMOTE Data Analyst (SQL, Operations)"              
## [20] "Data Visualization Analyst"                         
## [21] "Analyst - Data Visualization"                       
## [22] "Senior Data Analyst"                                
## [23] "Data Analyst I (entry level)"                       
## [24] "Analyst, Data and Analytics"                        
## [25] "Marketing Data Analyst"                             
## [26] "Data Analyst (SQL, Teraform, Tableau) III - Remote" 
## [27] "Junior Data Analyst"                                
## [28] "Business Data Analyst"                              
## [29] "Data Analyst (Hybrid)"                              
## [30] "Data Analyst - Weekly Hybrid Remote/Onsite Schedule"
## [31] "Consultant/Data Analyst"                            
## [32] "Junior Data Analyst-Entry Level"                    
## [33] "Data Analyst/ $100M Valuation/ Hybrid"              
## [34] "Associate Data Analyst"                             
## [35] "Data Analyst - Energy"                              
## [36] "Group Data Analyst"

To be tidy, every cell can only have one piece of information. To this end, pull out remote/hybrid details from “title”. As a check, we verify that this info matches with the existing “onsite_remote” column.

jobs <- jobs |>
  # pull extra detail from "tite" to variable "analyst_detail"
  mutate(
    analyst_detail = title |>
      str_remove("Data Analyst") |>
      str_remove("Data analyst") |>
      str_remove(" - ") |>
      str_trim(),
    # pull details into new column for onsite_remote for QC
    analyst_detail_onsite_remote = str_detect(analyst_detail, "Remote") |
      str_detect(analyst_detail, "remote") |
      str_detect(analyst_detail, "Hybrid") |
      str_detect(analyst_detail, "REMOTE") |
      str_detect(analyst_detail, "WFH") |
      str_detect(analyst_detail, "Onsite"),
    # Remove these details and more from our list of details using REGEX
    title_clean = title |>
      str_replace("analyst", "Analyst") |> # standardize capitalization
      str_remove("Remote") |> # remove remote/onsite qualifiers
      str_remove("remote") |>
      str_remove("Hybrid") |>
      str_remove("REMOTE") |>
      str_remove("WFH") |> 
      str_remove("Onsite")|>
      str_remove("Weekly.*Schedule") |> 
      str_remove("\\(.*\\)") |> # remove parentheticals
      str_remove("\\/.*\\/") |> # remove content within slashes
      str_remove("//") |> # remove other symbols to standardize
      str_remove("!") |>
      str_remove("- $") |> 
      str_replace("  "," ") |> 
      str_trim() |> 
      as.factor()
  )

Here are all the values in the title_clean column:

levels(jobs$title_clean)
##  [1] "Analyst"                         "Analyst - Data Visualization"   
##  [3] "Analyst, Data and Analytics"     "Associate Data Analyst"         
##  [5] "Business Data Analyst"           "Consultant/Data Analyst"        
##  [7] "Data Analyst"                    "Data Analyst - Energy"          
##  [9] "Data Analyst - Recent Graduate"  "Data Analyst I"                 
## [11] "Data Analyst III"                "Data Analyst, Money"            
## [13] "Data Analyst/Collector"          "Data Analytics Analyst"         
## [15] "Data Visualization Analyst"      "Entry Level Data Analyst"       
## [17] "Entry-Level Data Analyst"        "Group Data Analyst"             
## [19] "Junior Data Analyst"             "Junior Data Analyst-Entry Level"
## [21] "Marketing Data Analyst"          "Senior Data Analyst"            
## [23] "SQL Data Analyst"

As a check, we browsed the data to verify that remote/onsite job title qualifiers matched with specifications in the remote_onsite column. Everything looks consistent:

jobs_onsite_check <- jobs |> 
  filter(analyst_detail_onsite_remote) |> 
  select(title, onsite_remote) |> 
  unique()

head(jobs_onsite_check, nrow(jobs_onsite_check))
##                                                  title onsite_remote
## 1                                Data Analyst - Remote        remote
## 3                            Data Analyst III - Remote        remote
## 4                                    WFH//Data Analyst        remote
## 5                               Data Analyst - remote!        remote
## 19                        Data Analyst (Global Remote)        remote
## 21                        Data Analyst - (Remote - US)        remote
## 83                               Data Analyst (Remote)        remote
## 85               REMOTE Data Analyst (SQL, Operations)        remote
## 88  Data Analyst (SQL, Teraform, Tableau) III - Remote        remote
## 89                               Data Analyst (Hybrid)        hybrid
## 90 Data Analyst - Weekly Hybrid Remote/Onsite Schedule        hybrid
## 91               Data Analyst/ $100M Valuation/ Hybrid        hybrid

Clean Location

Next we work with location, which contains data with varying levels of granularity (e.g., some at state level, some at city level). We make columns consistent by separating data into state and location columns. We split location column into two separate columns–one for city and one for state below:

jobs <- jobs |>
  mutate(
    location_1 = str_extract(location, "[^,]+"),
    # get location before the comma, typically city
    location_2 = str_extract(location, "(?<=,\\s).+") # use REGEX to get location after the comma, typically state
  )

We use the built in R data set to ensure the state variable is consistent; we use state abbreviations throughout. I also manually fix one city-state combination (Columbus, South Carolina Metropolitan Area to Columbus, SC).

# get state names and abbreviations from built-in dataset
states <- data.frame(state.abb, state.name) |>
  rename(location_1 = state.name)

jobs <- jobs |>
  left_join(states) |> # join to get state abbrevs
  mutate(
    # gen state (abbreviation) variable
    state = if_else(location_2 == "United States", state.abb, location_2) |>
      as.factor(),
    
    city = if_else(is.na(state.abb) |
                     location_2 != "United States", location_1, NA) |>
      as.factor()
  ) |>
  # fix South Carolina Metropolitan Area incorrect state
  mutate(state = if_else(state == "South Carolina Metropolitan Area", "SC", state) |>
           as.factor())
## Joining with `by = join_by(location_1)`

Here are the unique cities and states in which jobs were posted:

levels(jobs$city)
##  [1] "Alpharetta"                                
##  [2] "Ann Arbor"                                 
##  [3] "Annapolis"                                 
##  [4] "Atlanta"                                   
##  [5] "Atlanta Metropolitan Area"                 
##  [6] "Austin"                                    
##  [7] "Bellevue"                                  
##  [8] "Boston"                                    
##  [9] "Bridgewater"                               
## [10] "Buffalo-Niagara Falls Area"                
## [11] "Burbank"                                   
## [12] "Camden"                                    
## [13] "Carlsbad"                                  
## [14] "Cedar Rapids"                              
## [15] "Charlotte"                                 
## [16] "Charlotte Metro"                           
## [17] "Chicago"                                   
## [18] "Cincinnati"                                
## [19] "Cincinnati Metropolitan Area"              
## [20] "Cleveland"                                 
## [21] "Columbia"                                  
## [22] "Columbus"                                  
## [23] "Conshohocken"                              
## [24] "Crystal City"                              
## [25] "Dallas"                                    
## [26] "Dallas-Fort Worth Metroplex"               
## [27] "Delray Beach"                              
## [28] "Denver Metropolitan Area"                  
## [29] "Des Moines"                                
## [30] "Detroit"                                   
## [31] "District of Columbia"                      
## [32] "Dublin"                                    
## [33] "Eden Prairie"                              
## [34] "Fort Lauderdale"                           
## [35] "Franklin"                                  
## [36] "Georgia"                                   
## [37] "Greater Sacramento"                        
## [38] "Greater Scranton Area"                     
## [39] "Greater Tampa Bay Area"                    
## [40] "Greensboro--Winston-Salem--High Point Area"
## [41] "Hartford"                                  
## [42] "Hoffman Estates"                           
## [43] "Home"                                      
## [44] "Houston"                                   
## [45] "Huntersville"                              
## [46] "Irvine"                                    
## [47] "Irving"                                    
## [48] "Kansas City Metropolitan Area"             
## [49] "Knoxville"                                 
## [50] "Los Angeles"                               
## [51] "Los Angeles Metropolitan Area"             
## [52] "Manassas"                                  
## [53] "Miami"                                     
## [54] "Minneapolis"                               
## [55] "Miramar"                                   
## [56] "Mishawaka"                                 
## [57] "Morgan Hill"                               
## [58] "Morris Plains"                             
## [59] "Mount Pleasant"                            
## [60] "Nashville"                                 
## [61] "Nashville Metropolitan Area"               
## [62] "New Haven"                                 
## [63] "New York"                                  
## [64] "New York City Metropolitan Area"           
## [65] "Novato"                                    
## [66] "O'Fallon"                                  
## [67] "Oakbrook Terrace"                          
## [68] "Oklahoma City"                             
## [69] "Omaha"                                     
## [70] "Ontario"                                   
## [71] "Orlando"                                   
## [72] "Palo Alto"                                 
## [73] "Philadelphia"                              
## [74] "Phoenix"                                   
## [75] "Pittsburgh"                                
## [76] "Plano"                                     
## [77] "Pleasanton"                                
## [78] "Queens"                                    
## [79] "Quincy"                                    
## [80] "Raleigh"                                   
## [81] "Richfield"                                 
## [82] "San Antonio"                               
## [83] "San Diego"                                 
## [84] "San Francisco"                             
## [85] "San Francisco Bay Area"                    
## [86] "San Jose"                                  
## [87] "Santa Clara"                               
## [88] "Scottsdale"                                
## [89] "Seattle"                                   
## [90] "South Barrington"                          
## [91] "Southaven"                                 
## [92] "Sun City"                                  
## [93] "Tampa"                                     
## [94] "Tempe"                                     
## [95] "Timonium"                                  
## [96] "United States"                             
## [97] "Washington"                                
## [98] "Wilmington"                                
## [99] "Woodcliff Lake"

And here are the unique states:

levels(jobs$state)
##  [1] "AL" "AZ" "CA" "CT" "DC" "DE" "FL" "GA" "IA" "IL" "IN" "KS" "MA" "MD" "MI"
## [16] "MN" "MO" "MS" "NC" "NE" "NJ" "NY" "OH" "OK" "PA" "SC" "TN" "TX" "VA" "WA"
## [31] "WI"

Check Company

Browsing companies for opportunities for standardization. It appears that this column is OK as is.

head(jobs |> select(company) |> unique())
##                           company
## 1                          PayPal
## 29       The Federal Savings Bank
## 31                London Approach
## 33                    Motley Rice
## 34 Chicago Teachers' Pension Fund
## 36                    FreightPlus

Clean Onsite/Remote

Browsing onsite_remote column for opportunities for standardization. The values are OK and converted to a factor variable.

jobs <- jobs  |>
  mutate(onsite_remote = as.factor(onsite_remote))

levels(jobs$onsite_remote)
## [1] "hybrid" "onsite" "remote"

Clean and Tidy Salaries

Summary of salary adjustment steps:

1- find what rows are missing salary ranges

2- find what fields have salary information

3- split the salary column into 2 columns for range

4- move salary information for the rows that contain the information in other rows

5- reformat the hourly wages to salary numbers

1: Find what rows are missing salary information

#only missing salaries
salary_finder_02 <- jobs |> 
  filter(
    salary == NaN
  ) 

2: Find what fields have salary ranges

It is helpful to know which fields have salary information. Since the original columns have the information that were split wider to give new columns, we will only search the original columns.

#title
count(
  salary_finder_02 |>
  mutate(
    title = str_extract_all(title, "salary")
  ) |>
        filter(
          title == "salary" 
        )
)
##   n
## 1 0
count(
salary_finder_02 |>
  mutate(
    title = str_extract_all(title, "$")
  ) |>
      filter(
      title == "$" 
      )
)
##   n
## 1 0
#company
count(
  salary_finder_02 |>
  mutate(
    company = str_extract_all(company, "salary") 
  ) |>
      filter(
        company == "salary"
      )
)
##   n
## 1 0
count(
salary_finder_02 |>
  mutate(
    company = str_extract_all(company, "$")
  ) |>
      filter(
      company == "$" 
      )
)
##   n
## 1 0
#description
count(
  salary_finder_02 |>
  mutate(
    description = str_extract_all(description, "salary") 
  ) |>
      filter(
        description == "salary"
      )
)
##     n
## 1 147
count(
salary_finder_02 |>
  mutate(
    description = str_extract_all(description, "$")
  ) |>
      filter(
      description == "$" 
      )
)
##   n
## 1 0
#location
count(
  salary_finder_02 |>
  mutate(
    location = str_extract_all(location, "salary") 
  ) |>
      filter(
        location == "salary"
      )
)
##   n
## 1 0
count(
salary_finder_02 |>
  mutate(
    location = str_extract_all(location, "$")
  ) |>
      filter(
      location == "$" 
      )
)
##   n
## 1 0
#criteria
count(
  salary_finder_02 |>
  mutate(
    criteria = str_extract_all(criteria, "salary") 
  ) |>
      filter(
        criteria == "salary"
      )
)
##   n
## 1 0
count(
salary_finder_02 |>
  mutate(
    criteria = str_extract_all(criteria, "$")
  ) |>
      filter(
      criteria == "$" 
      )
)
##   n
## 1 0

The only salary information looks to be in the description with the words salary.

3: Split the salary column into 2 columns for range

jobs <- jobs |>
  mutate(
    sal_high = str_extract(salary, "(?<=-).*"),
    .after = salary
  ) |>
      mutate(
        sal_low = str_extract(salary, ".*(?=-)"),
        .before = sal_high
      ) 

4: Move salary information for the rows that contain the information in other rows

#rows where salary information is already available
salary_exist <- jobs |> 
  filter(
    salary != NaN
  ) 

#rows where there is no salary information
salary_finder_01 <- jobs |> 
  filter(
    salary == NaN
  ) 

#filling all available salaries from description
salary_finder_03 <- salary_finder_01 |>
  mutate(
    description_02 = str_extract_all(description, "(Salary.*\\d+[,]\\d+)|(salary.*\\d+[,]\\d+)"),
    .before = "description"
  ) |>
      filter(
        description_02 != "character(0)"
      ) |> 
          mutate(
            description_03 = as.character(str_extract_all(description_02, "\\d+[,]\\d+")),
            .before = "description"
          ) |>
              mutate(
                sal_low = str_extract(description_03, "\\d+[,]\\d+"),
                sal_high = str_extract(description_03, "(?<=(\\s)).*"),
                sal_high = str_extract(sal_high, "\\d+[,]\\d+"),
                sal_high = ifelse(is.na(sal_high), sal_low, sal_high),
                salary = paste(sal_low, " - ", sal_high),
                description_02 = NULL,
                description_03 = NULL
              )

#rows where there was no salary information available
salary_finder_02 <- salary_finder_01 |>
  mutate(
    description_02 = str_extract_all(description, "(Salary.*\\d+[,]\\d+)|(salary.*\\d+[,]\\d+)"),
    .before = "description"
  ) |>
      filter(
        description_02 == "character(0)"
      ) |>
          mutate(
            description_02 = NULL
          )

#joining all rows back together
jobs_salary <- rbind(salary_exist, salary_finder_02, salary_finder_03)
jobs_salary <- jobs_salary[order(jobs_salary$id), ]

5: Reformat the hourly wages to salary numbers

#new data frame for exploring
jobs_clean <- jobs_salary 

#making sal_low and sal_high numeric
jobs_clean$sal_low <- gsub("[$,]", "", jobs_clean$sal_low) |>
  sapply(as.numeric)
jobs_clean$sal_high <- gsub("[$,]", "", jobs_clean$sal_high) |>
  sapply(as.numeric)

#only rows with values for exploring
no_na_low <- jobs_clean[!is.na(jobs_clean$sal_low), ]

#plot histogram of all values
ggplot(no_na_low, aes(sal_low)) + 
  geom_histogram(bins = 50)

#cut off the first group
sal_to_adjust <- no_na_low |>
  filter(
    sal_low < 25000
  )

#plot the first group
ggplot(sal_to_adjust, aes(sal_low)) + 
  geom_histogram(bins = 8)

The distribution showing clear groups: below 1000 are hourly wages, above 5000 and below 7000 are monthly wages

#sal_low less than 7000 and greater than 5000: x12
#sal_low less than 1000: x2080

#monthly conversion
sal_monthly <- jobs_clean |>
      filter(
        sal_low > 5000,
        sal_low < 7000
      ) |>
          mutate(
            sal_low = sal_low * 12,
            sal_high = sal_high * 12
          )

#hourly conversion
sal_hourly <- jobs_clean |>
      filter(
        sal_low < 1000
      ) |>
          mutate(
            sal_low = sal_low * 2080,
            sal_high = sal_high * 2080
          )

#non converted df
sal_correct <- jobs_clean |>
      filter(
        sal_low > 7000
      ) 

sal_na <- jobs_clean |>
      filter(
        is.na(sal_low)
      ) 

#stick together the fixed salaries data frame
jobs <- rbind(sal_monthly, sal_hourly, sal_correct, sal_na)
jobs <- jobs[order(jobs$id), ]

#peak at the data
glimpse(jobs)
## Rows: 2,845
## Columns: 25
## $ id                           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13…
## $ title                        <chr> "Data Analyst", "Data Analyst", "Data Ana…
## $ company                      <chr> "PayPal", "PayPal", "PayPal", "PayPal", "…
## $ onsite_remote                <fct> onsite, onsite, onsite, onsite, onsite, o…
## $ description                  <chr> "at paypal (nasdaq: pypl), we believe tha…
## $ salary                       <chr> "NaN", "NaN", "NaN", "NaN", "NaN", "NaN",…
## $ sal_low                      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ sal_high                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ location                     <chr> "Timonium, MD", "Annapolis, MD", "Wilming…
## $ criteria                     <chr> "[{'Seniority level': 'Not Applicable'}, …
## $ posted                       <chr> "2022-11-16", "2022-11-16", "2022-11-17",…
## $ link                         <chr> "https://www.linkedin.com/jobs/view/data-…
## $ skills                       <chr> " python , r , sql , sql , database ,", "…
## $ seniority                    <chr> "Not Applicable", "Not Applicable", "Not …
## $ etype                        <chr> "Full-time", "Full-time", "Full-time", "F…
## $ jfunction                    <chr> "Information Technology", "Information Te…
## $ industries                   <chr> "Software Development, Technology, Inform…
## $ analyst_detail               <chr> "", "", "", "Recent Graduate", "", "", ""…
## $ analyst_detail_onsite_remote <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
## $ title_clean                  <fct> "Data Analyst", "Data Analyst", "Data Ana…
## $ location_1                   <chr> "Timonium", "Annapolis", "Wilmington", "O…
## $ location_2                   <chr> "MD", "MD", "DE", "NE", "United States", …
## $ state.abb                    <chr> NA, NA, NA, NA, "VA", "WI", "IN", "TN", N…
## $ state                        <fct> MD, MD, DE, NE, VA, WI, IN, TN, NA, MD, I…
## $ city                         <fct> Timonium, Annapolis, Wilmington, Omaha, N…

Visualizations for analysis and targeted tidying

Most requested skills

Here we determine the most requested skills (regardless of associated salary). Below is a list of the top 10 requested skills in our sample of job postings, where count indicates the number of job postings in our sample that mentioned this skill, and perc is the percentage of postings in our sample that mentioned this skill.

top_10_skills <- jobs.skills |>
  group_by(value) |>
  summarize(count = n(),
            perc = n() / nrow(jobs)) |>
  arrange(desc(count)) |>
  head(10)

top_10_skills
## # A tibble: 10 × 3
##    value                  count  perc
##    <chr>                  <int> <dbl>
##  1 " sql "                 2002 0.704
##  2 " communication "       1112 0.391
##  3 " python "              1089 0.383
##  4 " tableau "             1030 0.362
##  5 " statistics "           896 0.315
##  6 " r "                    785 0.276
##  7 " database "             619 0.218
##  8 " data visualization "   454 0.160
##  9 " etl "                  413 0.145
## 10 " oracle "               358 0.126

The most requested skill to have is sql , appearing in 70 percent of our job listings.

fig2 <- ggplot(jobs.skills, aes(x = fct_infreq(value))) +
  geom_bar() +
  coord_flip() +
  labs(x = "skill")

fig2

Above is a comprehensive account of skills we found and their respective number of mentions.

Compare SQL, R, Python salaries

#only fields with salary values
salary_exist_02 <- jobs |> 
  filter(
    salary != NaN
  ) |>
      mutate(
        average_sal = ((sal_low + sal_high) / 2),
        .after = sal_low
      )

#pull out all skills of sql, python and R.
sql_py_r <- salary_exist_02 |>
  mutate(
    sql_py_r_skill = paste(str_extract(skills, "sql "),",", str_extract(skills, "python "), ",", str_extract(skills, "r ")),
    .after = skills
  )

#widen df
wide_sql_py_r <- sql_py_r |>
  separate_wider_delim(sql_py_r_skill, delim = " , ", names = c("sql","python","r"))

#pivot the data frame longer
tidy_sql_py_r <- wide_sql_py_r |>
  pivot_longer(
    cols = c(sql,python,r),
    names_to = "name_drop",
    values_to = "sql_py_r"
  ) |>
      mutate(
        name_drop = NULL
      )


#graph
ggplot(tidy_sql_py_r, aes(x = sql_py_r, y = average_sal)) + 
  geom_boxplot(color = "purple", fill="lavender") 

Compare Bachelor, Master, PHD salaries

#pull out all bachelor,master,phd,doctorate from skills.
edu_level <- salary_exist_02 |>
  mutate(
    edu_skill = paste(str_extract(skills, "bachelors "),",", str_extract(skills, "masters "), ",", str_extract(skills, "phd ")),
    .after = skills
  )

#widen df
wide_edu <- edu_level |>
  separate_wider_delim(edu_skill, delim = " , ", names = c("bachelor","master","phd"))

#pivot the data frame longer
tidy_edu <- wide_edu |>
  pivot_longer(
    cols = c(bachelor,master,phd),
    names_to = "name_drop",
    values_to = "education"
  ) |>
      mutate(
        name_drop = NULL
      )


#graph
ggplot(tidy_edu, aes(x = education, y = sal_low)) + 
  geom_boxplot(aes(x = education, y = average_sal), color = "blue", fill="lightblue")

In the above graph, the blue boxplots show the low end of the listed salaries and the red boxplots show the high end of the listed salaries.

Salary by Skills

#pull out all skills of sql, python and R.
top_skills_viz <- salary_exist_02 |>
  mutate(
    top_skill = paste(str_extract(skills, "sql "),",", str_extract(skills, "communication "), ",", str_extract(skills, "python "), ",", str_extract(skills, "tableau "), ",", str_extract(skills, "statistics "), ",", str_extract(skills, "r ")),
    .after = skills
  ) 

#widen df
wide_top_skills_viz <- top_skills_viz |>
  separate_wider_delim(top_skill, delim = " , ", names = c("sql","communication","python","tableau", "statistics","r"))

#pivot the data frame longer
tidy_top_skills_viz <- wide_top_skills_viz |>
  pivot_longer(
    cols = c(sql,communication,python,tableau,statistics,r),
    names_to = "name_drop",
    values_to = "top_skill"
  ) |>
      mutate(
        name_drop = NULL
      )

#graph
ggplot(tidy_top_skills_viz, aes(x = top_skill, y = average_sal)) + 
  geom_boxplot(color = "red", fill="pink") 

Salary by Skills in New York State

#new york state values
ny_top_skills <- tidy_top_skills_viz |>
  filter(
    state == "NY"
  )

#graph
ggplot(ny_top_skills, aes(x = top_skill, y = average_sal)) + 
  geom_boxplot(color = "darkgreen", fill="palegreen") 

Analysis:

The top skills seen in the frequency graphs are SQL, communication, Python, Tableau, statistics, and R. Because of this, we know the most requested skills for the data science/analyst career.

From these top skills, we can see that Python, R and statistics are the skills requested with the highest average salary associated with their job postings. The lowest value for salary is for job postings that do not contain any of the top skills, as well as the lowest IQR range values.