Import data
# csv file
data <- read_csv("../00_data/languages.csv")
data
## # A tibble: 4,303 × 49
## pldb_id title description type appeared creators website domain_name
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 java Java <NA> pl 1995 James G… https:… <NA>
## 2 javascript JavaScript <NA> pl 1995 Brendan… <NA> <NA>
## 3 c C <NA> pl 1972 Dennis … <NA> <NA>
## 4 python Python <NA> pl 1991 Guido v… https:… python.org
## 5 sql SQL <NA> quer… 1974 Donald … <NA> <NA>
## 6 cpp C++ <NA> pl 1985 Bjarne … http:/… isocpp.org
## 7 html HTML <NA> text… 1991 Tim Ber… <NA> <NA>
## 8 xml XML <NA> data… 1996 <NA> <NA> <NA>
## 9 php PHP <NA> pl 1995 Rasmus … https:… php.net
## 10 perl Perl <NA> pl 1987 Larry W… https:… perl.org
## # ℹ 4,293 more rows
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## # isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## # language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## # github_repo_forks <dbl>, github_repo_updated <dbl>,
## # github_repo_subscribers <dbl>, github_repo_created <dbl>,
## # github_repo_description <chr>, github_repo_issues <dbl>, …
Apply the following dplyr verbs to your data
Filter rows
# Languages with over 1 million users
filtered_data <- filter(data, number_of_users >= 1000000)
filtered_data
## # A tibble: 13 × 49
## pldb_id title description type appeared creators website domain_name
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 java Java <NA> pl 1995 James G… https:… <NA>
## 2 javascript JavaScript <NA> pl 1995 Brendan… <NA> <NA>
## 3 c C <NA> pl 1972 Dennis … <NA> <NA>
## 4 python Python <NA> pl 1991 Guido v… https:… python.org
## 5 sql SQL <NA> quer… 1974 Donald … <NA> <NA>
## 6 cpp C++ <NA> pl 1985 Bjarne … http:/… isocpp.org
## 7 html HTML <NA> text… 1991 Tim Ber… <NA> <NA>
## 8 xml XML <NA> data… 1996 <NA> <NA> <NA>
## 9 php PHP <NA> pl 1995 Rasmus … https:… php.net
## 10 matlab MATLAB <NA> pl 1984 Cleve M… http:/… <NA>
## 11 mysql MySQL <NA> quer… 1995 David A… http:/… mysql.com
## 12 css CSS <NA> styl… 1996 Håkon W… <NA> <NA>
## 13 r R <NA> pl 1993 Ross Ih… https:… r-project.…
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## # isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## # language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## # github_repo_forks <dbl>, github_repo_updated <dbl>,
## # github_repo_subscribers <dbl>, github_repo_created <dbl>,
## # github_repo_description <chr>, github_repo_issues <dbl>,
## # github_repo_first_commit <dbl>, github_language <chr>, …
Arrange rows
# Sort most popular languages (over 1 million users) from newest to oldest
arrange(filtered_data, desc(appeared))
## # A tibble: 13 × 49
## pldb_id title description type appeared creators website domain_name
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 xml XML <NA> data… 1996 <NA> <NA> <NA>
## 2 css CSS <NA> styl… 1996 Håkon W… <NA> <NA>
## 3 java Java <NA> pl 1995 James G… https:… <NA>
## 4 javascript JavaScript <NA> pl 1995 Brendan… <NA> <NA>
## 5 php PHP <NA> pl 1995 Rasmus … https:… php.net
## 6 mysql MySQL <NA> quer… 1995 David A… http:/… mysql.com
## 7 r R <NA> pl 1993 Ross Ih… https:… r-project.…
## 8 python Python <NA> pl 1991 Guido v… https:… python.org
## 9 html HTML <NA> text… 1991 Tim Ber… <NA> <NA>
## 10 cpp C++ <NA> pl 1985 Bjarne … http:/… isocpp.org
## 11 matlab MATLAB <NA> pl 1984 Cleve M… http:/… <NA>
## 12 sql SQL <NA> quer… 1974 Donald … <NA> <NA>
## 13 c C <NA> pl 1972 Dennis … <NA> <NA>
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## # isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## # language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## # github_repo_forks <dbl>, github_repo_updated <dbl>,
## # github_repo_subscribers <dbl>, github_repo_created <dbl>,
## # github_repo_description <chr>, github_repo_issues <dbl>,
## # github_repo_first_commit <dbl>, github_language <chr>, …
Select columns
# Prints just the language names and types
select(filtered_data, title, type)
## # A tibble: 13 × 2
## title type
## <chr> <chr>
## 1 Java pl
## 2 JavaScript pl
## 3 C pl
## 4 Python pl
## 5 SQL queryLanguage
## 6 C++ pl
## 7 HTML textMarkup
## 8 XML dataNotation
## 9 PHP pl
## 10 MATLAB pl
## 11 MySQL queryLanguage
## 12 CSS stylesheetLanguage
## 13 R pl
Add columns
# Adds a column that shows how many years ago the language was created
mutated_data <- filtered_data %>%
mutate("Years Since Appearance" = 2026 - appeared)
select(mutated_data, "Years Since Appearance")
## # A tibble: 13 × 1
## `Years Since Appearance`
## <dbl>
## 1 31
## 2 31
## 3 54
## 4 35
## 5 52
## 6 41
## 7 35
## 8 30
## 9 31
## 10 42
## 11 31
## 12 30
## 13 33
Summarize by groups
# Finds the average creation year of the languages
summarize(filtered_data, average_year = mean(appeared, na.rm = TRUE))
## # A tibble: 1 × 1
## average_year
## <dbl>
## 1 1989.