Import data
# csv file
data <- read_csv("../00_data/languages.csv")
data
## # A tibble: 4,303 × 49
## pldb_id title description type appeared creators website domain_name
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 java Java <NA> pl 1995 James G… https:… <NA>
## 2 javascript JavaScript <NA> pl 1995 Brendan… <NA> <NA>
## 3 c C <NA> pl 1972 Dennis … <NA> <NA>
## 4 python Python <NA> pl 1991 Guido v… https:… python.org
## 5 sql SQL <NA> quer… 1974 Donald … <NA> <NA>
## 6 cpp C++ <NA> pl 1985 Bjarne … http:/… isocpp.org
## 7 html HTML <NA> text… 1991 Tim Ber… <NA> <NA>
## 8 xml XML <NA> data… 1996 <NA> <NA> <NA>
## 9 php PHP <NA> pl 1995 Rasmus … https:… php.net
## 10 perl Perl <NA> pl 1987 Larry W… https:… perl.org
## # ℹ 4,293 more rows
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## # isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## # language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## # github_repo_forks <dbl>, github_repo_updated <dbl>,
## # github_repo_subscribers <dbl>, github_repo_created <dbl>,
## # github_repo_description <chr>, github_repo_issues <dbl>, …
# State one question
# Does a programming language with more jobs have more users?
# Plot data
ggplot(data = data, aes(x = number_of_users, y = number_of_jobs)) +
geom_point(color= "cornflowerblue", alpha = 0.6) +
geom_smooth(method = "lm",
formula = y ~ poly(x, 2),
color = "indianred3") +
scale_y_continuous(label = scales::comma) +
scale_x_continuous(label = scales::comma) +
labs(x = "Number of Users",
y = "Number of Jobs",
title = "Programming Languages - Users vs. Jobs")

# Interpret
# There is a clear positive relationship between the number of users and the number of jobs a programming language has.