library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.7
## v tidyr   0.8.2     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tm)
## Warning: package 'tm' was built under R version 3.5.3
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.5.2
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.5.3
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.5.3
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 3.5.2
library(RColorBrewer)

WrkShp

https://www.glassdoor.com/Job/san-francisco-ca-data-analysis-intern-jobs-SRCH_IL.0,16_IC1147401_KO17,37.htm?src=GD_JOB_AD&rdserp=true&srs=EI_JOBS&jl=3323583664&ao=389273&s=21&guid=0000016cf9a616d1b98f722309893ea0&pos=102&t=EMPLOYER_SEARCH_RESULTS

wrkshp <- c("At WRKSHP, we are a music technology company. Our main app, Next Music, is a platform for interactive music. We create experiences that are simple, beautiful, and get better with time. We believe in teamwork, intellectual honesty and excellence. We take ownership of our work, constantly question our assumptions, and only ship products we are proud to put our names on. 

We feature and work with some of the biggest artists like Steve Aoki, Dimitri Vegas & Like Mike, Hardwell, R3HAB, 3LAU, and more. We help promote and market the release of new songs and albums for artists every day. 

Responsibilities:
Work closely with product and assist them in investigations, deep dives and consuming game performance metrics on a regular basis.
Devise and run A/B tests and apply statistical techniques for analysis and measurement.
Write SQL for exploratory analysis, data preparation and data movement.
Document analysis findings and communicate these effectively via written, oral and visual methods.
Evangelize data knowledge and insights: manage communications with your stakeholders and other teams, collaborate with both technical and non-technical colleagues to complete data projects and ensure all parties can use the insights to further improve
Maintain a customer-centric focus: strive to be a domain and product expert through data, develop trust among your peers and stakeholders, and ensure that your team has access to data to make decisions
Prioritize and execute in the face of ambiguity: work with stakeholders and mentors to distill the problem, adapt your tools to answer complicated questions, and identify the trade-offs between speed and quality of different approaches
Perform data verification and validation to ensure that instrumentation, aggregation, reporting, and analyses are accurate and complete. Devise and implement solutions when gaps or errors are found.
Requirements:
Enthusiasm about music!
1+ years of experience as an analyst or in highly analytical roles
BA/BS degree in Business, Economics, Statistics, Computer Science or related quantitative discipline
Experience working with business teams to understand and define problems for analysis; experience dealing with ambiguous business problems and translating them into feasible analytic exercises to provide answers.
Experience using at least one programming language to move and parse data sets.
A self-starter, detail-oriented, and believes in continuous learning and improvement
Strong communication skills and ability to build relationships with technical and non-technical colleagues
Relentless curiosity
Able to identify stakeholders, build relationships, and influence others to get work done
Is comfortable in a fast-paced environment, and ideally has experience in a high-growth startup
Able to take complex problems and break them down into their component parts
Strong knowledge and proven abilities with SQL.
Experience with cloud-based computing (especially Amazon Web Services), procedural programming (especially PHP or Python), EMR frameworks such as Spark or Hadoop, and data science and machine learning methods such as k-means clustering, random forest, decision trees, etc. are all a plus")

Google

https://www.glassdoor.com/Job/san-francisco-data-analyst-intern-jobs-SRCH_IL.0,13_IC1147401_KO14,33.htm?fromAge=7&jl=3056865646&ja=104426064&guid=0000016cd50240758653c3016546740c&pos=102&srs=EMAIL_JOB_ALERT&s=224&ao=37049&utm_source=jobalert&utm_medium=email&utm_campaign=jobAlertAlert&utm_content=ja-jobtitle&utm_term=

google <- c("Must be currently enrolled in a BS, MS or PhD degree program in Computer Science, Linguistics, Computational Linguistics, Statistics, Biostatistics, Applied Mathematics, Operations Research, Economics, Natural Sciences including Biomedical, Chemistry, Materials Science, Physical Modeling, Physics and Scientific Computing or a related technical field or equivalent practical experience.
Experience (classroom or work related) in one or more areas of computer science, such as Natural Language Understanding, Neural Networks, Computer Vision, Machine Learning, Deep Learning, Algorithmic Foundations of Optimization, Data Science, Privacy, Trust & Safety, Software Engineering, Programming Languages, Distributed Systems, Human Computer Interaction, Networking,Operating Systems, Computer Architecture, Data Mining and/or Machine Intelligence (Artificial Intelligence).
Experience with one or more general purpose programming languages including: C/C++, Java, MATLAB, Go or Python.
Preferred qualifications:
Returning to a degree program after completion of the internship.
Relevant work experience, including internships, full time industry experience or as a researcher in a lab.
Ability to design and execute a research agenda.
Contribution to research communities and/or efforts, including publishing papers (i.e. being listed as author) in major conferences or journals.

Must be currently enrolled in a BS, MS or PhD degree program in Computer Science, Linguistics, Computational Linguistics, Statistics, Biostatistics, Applied Mathematics, Operations Research, Economics, Natural Sciences including Biomedical, Chemistry, Materials Science, Physical Modeling, Physics and Scientific Computing or a related technical field or equivalent practical experience.
Experience (classroom or work related) in one or more areas of computer science, such as Natural Language Understanding, Neural Networks, Computer Vision, Machine Learning, Deep Learning, Algorithmic Foundations of Optimization, Data Science, Privacy, Trust & Safety, Software Engineering, Programming Languages, Distributed Systems, Human Computer Interaction, Networking,Operating Systems, Computer Architecture, Data Mining and/or Machine Intelligence (Artificial Intelligence).
Experience with one or more general purpose programming languages including: C/C++, Java, MATLAB, Go or Python.
Preferred qualifications:
Returning to a degree program after completion of the internship.
Relevant work experience, including internships, full time industry experience or as a researcher in a lab.
Ability to design and execute a research agenda.
Contribution to research communities and/or efforts, including publishing papers (i.e. being listed as author) in major conferences or journals.")

Quest Analytics

https://www.glassdoor.com/Job/san-francisco-data-analyst-intern-jobs-SRCH_IL.0,13_IC1147401_KO14,33.htm?fromAge=7&jl=3330628024&ja=104426064&guid=0000016cd50240758653c3016546740c&pos=103&srs=EMAIL_JOB_ALERT&s=224&ao=148364&utm_source=jobalert&utm_medium=email&utm_campaign=jobAlertAlert&utm_content=ja-jobtitle&utm_term=

quest <- c("Measuring the quality of different data sources
Relating external public or licensed data to our internal entity representations
Measuring the quality of our algorithms
Contributing to the development of our algorithms
Big data processing tools
Apache Spark
Databricks
Business intelligence platforms and tools
Looker
AWS Redshift
Automation and data pipelines tool
Kafka
Jenkins
Docker
Airflow
Here's how you will make an impact?
Collaborate with a team of product managers, analysts and other developers to define and complete data projects from data ingestion, to analysis to recommendations.
Develop methodology to gain insights into our proprietary and external data sets
Support data science team in evaluating quality of our entity creation and mapping algorithms
Understand and perform quality checks of our business intelligence, like discovering inconsistencies in our datasets
Demonstrate ability to work well independently, collaborate with teammates, and influence across organizational boundaries
Play a key role in creating both product and client-facing analytics.
Perform analysis, develop reports, visualizations, statistical analysis and computations using Python, R, or Scala
What you'll need for this role?
Experience working with large datasets
Working knowledge of statistics and visualization.
Expert SQL skills. Able to create and to evaluate complex SQL statements involving numerous tables and complex relationships.
Strong skills working with one or more of the following languages:
Python
R
Scala
Experience working with data frames and data frame modules or tools such as Pandas, Jupyter, Zeppelin, Databricks, or similar.
Fast learner; curiosity about and passion for data.

Bonus Points
Experience with Apache Spark
Practical experience with Databricks
Experience with Healthcare Provider Data


Qualifications and Requirements
Bachelors or Masters degree in a technical field
2 Years of relevant work experience")

Travel Insights

https://www.glassdoor.com/Job/san-francisco-data-analyst-intern-jobs-SRCH_IL.0,13_IC1147401_KO14,33.htm?fromAge=7&jl=3332129299&ja=104426064&guid=0000016cd50240758653c3016546740c&pos=104&srs=EMAIL_JOB_ALERT&s=224&ao=654525&utm_source=jobalert&utm_medium=email&utm_campaign=jobAlertAlert&utm_content=ja-jobtitle&utm_term=

travel <- c("Extend Sojern's technical expertise in travel insights and identifying opportunities for improving either our data footprint or our data use/interpretation.
Work closely with business stakeholders to understand and translate their needs into data insights that drive client relationships and help stakeholders and, by extension, their clients do their jobs more effectively,
Translate our complex data into easily consumable reports that facilitate the garnering of actionable take-aways.
Visualize key insights into Tableau dashboards, MS Excel, MS PowerPoint, and deliverables, express ideas and absorb/understand stakeholder's needs.
Cooperate with global teams on projects that deliver client and partner value.
The Expertise We're Looking For:

Experience in producing actionable insights that are based on rigorous analysis of complex and large data sets.
Extensive hands-on experience in data analytics, reporting, and software engineering technologies and processes often used to process, distill, and present large data sets.
Experience in collaborating with Engineering, Data Science, and Product teams on data, analytics, and reporting projects/products.
Experience in working across an organization, championing the collaboration of all stakeholders toward the common goal of maximizing the value delivered.
High-level of comfort in communicating effectively across internal teams and with external stakeholders, e.g., clients, partners.
Knowledge of advertising technologies, techniques, pitfalls, opportunities; e.g., experience with ad delivery platforms such as AppNexus, MediaMath, DV360, and FB.
The Skills You Bring:

1+ years of experience in extracting insights from data by leveraging data retrieval (e.g., SQL, HQL or BigQuery SQL), data manipulation (e.g., Excel, R, Matlab, SAS, Python notebooks), and data visualization (e.g., Tableau, Looker) technologies.
Ability to work independently or in a group in a fast-paced environment, balance multiple priorities, work with remote offices around the world.
Excellent oral and written communication skills; comfortable presenting to clients and to all levels of organization.
BS in Computer Science, Mathematics, Statistics, Physics, or related field is required; MS/PhD in STEM field is a plus.
Proficient in Spanish (reading and writing) is a plus in order to better serve our growing LATAM clients.
The Value You Deliver:
Maintain the focus on core initiatives, such as thought-leadership reporting, client and partner actionable insights, insights product evolution, and data need specification and evolution.
Support our efforts of packaging our extensive travel insights into products that guide our clients through their media planning efforts.
Through close collaboration with your team and the Product team, contribute in the ideation, design, prototyping, and productization of actionable Insights reports that leverage our vast travel data to deliver best-in-class and scalable travel insights.
Take on an active role in our data-driven culture and foster/cultivate relationships with other teams to develop the tools and processes needed to maximize the inherent value of our data.")

Addastaff

https://www.glassdoor.com/Job/san-francisco-data-analyst-intern-jobs-SRCH_IL.0,13_IC1147401_KO14,33.htm?fromAge=7&jl=3329077654&ja=104426064&guid=0000016cd50240758653c3016546740c&pos=105&srs=EMAIL_JOB_ALERT&s=224&ao=735722&utm_source=jobalert&utm_medium=email&utm_campaign=jobAlertAlert&utm_content=ja-jobtitle&utm_term=

addastaff <- c("BA or BS required
5+ years of experience in data analysis and/or data management
3+ years of experience with background in wealth management and/or client reporting/portal
Experience working with Agile and/or Scrum methodologies
Expert in writing SQL
Excellent attention to detail and experience debugging code
Experience with Canonical Data Models (nice to have)
Knowledge of Private Wealth Management/Investment Management or Financials 

- Management business and systems - Preferred
Salesforce platform and data model principles - Preferred
TFS/JIRA or other issue tracking application - Required")

The Concord Group

https://www.glassdoor.com/Job/san-francisco-data-analyst-intern-jobs-SRCH_IL.0,13_IC1147401_KO14,33.htm?fromAge=7&jl=3308921916&ja=104426064&guid=0000016cd50240758653c3016546740c&pos=101&srs=EMAIL_JOB_ALERT&s=224&ao=14295&utm_source=jobalert&utm_medium=email&utm_campaign=jobAlertAlert&utm_content=ja-jobtitle&utm_term=

concord <- c("Primary research on real estate subjects and entities around the globe
Interviews with Real Estate professionals to gain insight into markets
Creating reports and maps in Excel suitable for professional presentation
Interacting with team members, principals and clients
Currently pursuing a bachelor's degree
Excellent quantitative, communication, and writing skills
Outgoing and personable
Ability to balance multiple assignments in a fast-paced environment
Strong interest in real estate economics and development is preferred
To apply, please complete the linked application form with your resume and cover letter. 

The Concord Group provides equal employment opportunities (EEO) to all applicants without regard to race, color, religion, sex, national origin, age, disability or genetics. Must be authorized to work in the United States without sponsorship. 

Requirements 

Successful candidates will have the following qualifications:
Have or currently pursuing a bachelor's degree
Excellent quantitative skills
Excellent communication, both verbal and writing, skills
Outgoing and personable - Comfortable speaking with professionals over the phone and interacting with team members internally
Ability to balance multiple assignments in a fast-paced environment
Strong interest in real estate economics and development is preferred
Ability to work in our San Francisco office full days (9:00 am - 5:00 pm) 2 to 5 days/week for a minimum of six weeks. Candidates able to stay on longer will be given preferance.")

Test function(s)

wordcloud(wrkshp, random.order = FALSE, colors= brewer.pal(20, "Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(google, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(addastaff, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(concord, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(quest, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(travel, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

Combining it together

word_summary <- c(addastaff,concord,google,quest,travel)

wordcloud(word_summary, random.order = FALSE)
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

Clean it

clean <- function(x){

x <-tolower(x)

x <-removeWords(x,stopwords('en'))

x <-removePunctuation(x)

x <-stripWhitespace(x)

return(x) }
cleaned_wrkshp <- clean(wrkshp)
cleaned_google <- clean(google)
cleaned_addastaff <- clean(addastaff)
cleaned_concord <- clean(concord)
cleaned_quest <- clean(quest)
cleaned_travel <- clean(travel)
cleaned_sum <- clean(word_summary)

Re-do the wordclouds

wordcloud(cleaned_wrkshp, random.order = FALSE, colors= brewer.pal(20, "Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(cleaned_google, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(cleaned_addastaff, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(cleaned_concord, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(cleaned_quest, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(cleaned_travel, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

wordcloud(cleaned_sum, random.order = FALSE, colors= brewer.pal(20,"Reds"))
## Warning in brewer.pal(20, "Reds"): n too large, allowed maximum for palette Reds is 9
## Returning the palette you asked for with that many colors
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents