Import data

# csv file
data <- read_csv("../00_data/languages.csv")
data
## # A tibble: 4,303 × 49
##    pldb_id    title      description type  appeared creators website domain_name
##    <chr>      <chr>      <chr>       <chr>    <dbl> <chr>    <chr>   <chr>      
##  1 java       Java       <NA>        pl        1995 James G… https:… <NA>       
##  2 javascript JavaScript <NA>        pl        1995 Brendan… <NA>    <NA>       
##  3 c          C          <NA>        pl        1972 Dennis … <NA>    <NA>       
##  4 python     Python     <NA>        pl        1991 Guido v… https:… python.org 
##  5 sql        SQL        <NA>        quer…     1974 Donald … <NA>    <NA>       
##  6 cpp        C++        <NA>        pl        1985 Bjarne … http:/… isocpp.org 
##  7 html       HTML       <NA>        text…     1991 Tim Ber… <NA>    <NA>       
##  8 xml        XML        <NA>        data…     1996 <NA>     <NA>    <NA>       
##  9 php        PHP        <NA>        pl        1995 Rasmus … https:… php.net    
## 10 perl       Perl       <NA>        pl        1987 Larry W… https:… perl.org   
## # ℹ 4,293 more rows
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## #   isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## #   language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## #   github_repo_forks <dbl>, github_repo_updated <dbl>,
## #   github_repo_subscribers <dbl>, github_repo_created <dbl>,
## #   github_repo_description <chr>, github_repo_issues <dbl>, …
# State one question
# Does a programming language with more jobs have more users?

# Plot data
ggplot(data = data, aes(x = number_of_users, y = number_of_jobs)) +
  geom_point(color= "cornflowerblue", alpha = 0.6) +
  geom_smooth(method = "lm", 
              formula = y ~ poly(x, 2), 
              color = "indianred3") +
    scale_y_continuous(label = scales::comma) +
    scale_x_continuous(label = scales::comma) +
    labs(x = "Number of Users",
         y = "Number of Jobs",
         title = "Programming Languages - Users vs. Jobs")

# Interpret
# There is a clear positive relationship between the number of users and the number of jobs a programming language has.