Import data

# csv file
data <- read_csv("../00_data/languages.csv")
data
## # A tibble: 4,303 × 49
##    pldb_id    title      description type  appeared creators website domain_name
##    <chr>      <chr>      <chr>       <chr>    <dbl> <chr>    <chr>   <chr>      
##  1 java       Java       <NA>        pl        1995 James G… https:… <NA>       
##  2 javascript JavaScript <NA>        pl        1995 Brendan… <NA>    <NA>       
##  3 c          C          <NA>        pl        1972 Dennis … <NA>    <NA>       
##  4 python     Python     <NA>        pl        1991 Guido v… https:… python.org 
##  5 sql        SQL        <NA>        quer…     1974 Donald … <NA>    <NA>       
##  6 cpp        C++        <NA>        pl        1985 Bjarne … http:/… isocpp.org 
##  7 html       HTML       <NA>        text…     1991 Tim Ber… <NA>    <NA>       
##  8 xml        XML        <NA>        data…     1996 <NA>     <NA>    <NA>       
##  9 php        PHP        <NA>        pl        1995 Rasmus … https:… php.net    
## 10 perl       Perl       <NA>        pl        1987 Larry W… https:… perl.org   
## # ℹ 4,293 more rows
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## #   isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## #   language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## #   github_repo_forks <dbl>, github_repo_updated <dbl>,
## #   github_repo_subscribers <dbl>, github_repo_created <dbl>,
## #   github_repo_description <chr>, github_repo_issues <dbl>, …

Apply the following dplyr verbs to your data

Filter rows

# Languages with over 1 million users
filtered_data <- filter(data, number_of_users >= 1000000)
filtered_data
## # A tibble: 13 × 49
##    pldb_id    title      description type  appeared creators website domain_name
##    <chr>      <chr>      <chr>       <chr>    <dbl> <chr>    <chr>   <chr>      
##  1 java       Java       <NA>        pl        1995 James G… https:… <NA>       
##  2 javascript JavaScript <NA>        pl        1995 Brendan… <NA>    <NA>       
##  3 c          C          <NA>        pl        1972 Dennis … <NA>    <NA>       
##  4 python     Python     <NA>        pl        1991 Guido v… https:… python.org 
##  5 sql        SQL        <NA>        quer…     1974 Donald … <NA>    <NA>       
##  6 cpp        C++        <NA>        pl        1985 Bjarne … http:/… isocpp.org 
##  7 html       HTML       <NA>        text…     1991 Tim Ber… <NA>    <NA>       
##  8 xml        XML        <NA>        data…     1996 <NA>     <NA>    <NA>       
##  9 php        PHP        <NA>        pl        1995 Rasmus … https:… php.net    
## 10 matlab     MATLAB     <NA>        pl        1984 Cleve M… http:/… <NA>       
## 11 mysql      MySQL      <NA>        quer…     1995 David A… http:/… mysql.com  
## 12 css        CSS        <NA>        styl…     1996 Håkon W… <NA>    <NA>       
## 13 r          R          <NA>        pl        1993 Ross Ih… https:… r-project.…
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## #   isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## #   language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## #   github_repo_forks <dbl>, github_repo_updated <dbl>,
## #   github_repo_subscribers <dbl>, github_repo_created <dbl>,
## #   github_repo_description <chr>, github_repo_issues <dbl>,
## #   github_repo_first_commit <dbl>, github_language <chr>, …

Arrange rows

# Sort most popular languages (over 1 million users) from newest to oldest
arrange(filtered_data, desc(appeared))
## # A tibble: 13 × 49
##    pldb_id    title      description type  appeared creators website domain_name
##    <chr>      <chr>      <chr>       <chr>    <dbl> <chr>    <chr>   <chr>      
##  1 xml        XML        <NA>        data…     1996 <NA>     <NA>    <NA>       
##  2 css        CSS        <NA>        styl…     1996 Håkon W… <NA>    <NA>       
##  3 java       Java       <NA>        pl        1995 James G… https:… <NA>       
##  4 javascript JavaScript <NA>        pl        1995 Brendan… <NA>    <NA>       
##  5 php        PHP        <NA>        pl        1995 Rasmus … https:… php.net    
##  6 mysql      MySQL      <NA>        quer…     1995 David A… http:/… mysql.com  
##  7 r          R          <NA>        pl        1993 Ross Ih… https:… r-project.…
##  8 python     Python     <NA>        pl        1991 Guido v… https:… python.org 
##  9 html       HTML       <NA>        text…     1991 Tim Ber… <NA>    <NA>       
## 10 cpp        C++        <NA>        pl        1985 Bjarne … http:/… isocpp.org 
## 11 matlab     MATLAB     <NA>        pl        1984 Cleve M… http:/… <NA>       
## 12 sql        SQL        <NA>        quer…     1974 Donald … <NA>    <NA>       
## 13 c          C          <NA>        pl        1972 Dennis … <NA>    <NA>       
## # ℹ 41 more variables: domain_name_registered <dbl>, reference <chr>,
## #   isbndb <dbl>, book_count <dbl>, semantic_scholar <dbl>,
## #   language_rank <dbl>, github_repo <chr>, github_repo_stars <dbl>,
## #   github_repo_forks <dbl>, github_repo_updated <dbl>,
## #   github_repo_subscribers <dbl>, github_repo_created <dbl>,
## #   github_repo_description <chr>, github_repo_issues <dbl>,
## #   github_repo_first_commit <dbl>, github_language <chr>, …

Select columns

# Prints just the language names and types
select(filtered_data, title, type)
## # A tibble: 13 × 2
##    title      type              
##    <chr>      <chr>             
##  1 Java       pl                
##  2 JavaScript pl                
##  3 C          pl                
##  4 Python     pl                
##  5 SQL        queryLanguage     
##  6 C++        pl                
##  7 HTML       textMarkup        
##  8 XML        dataNotation      
##  9 PHP        pl                
## 10 MATLAB     pl                
## 11 MySQL      queryLanguage     
## 12 CSS        stylesheetLanguage
## 13 R          pl

Add columns

# Adds a column that shows how many years ago the language was created
mutated_data <- filtered_data %>% 
    mutate("Years Since Appearance" = 2026 - appeared)
select(mutated_data, "Years Since Appearance")
## # A tibble: 13 × 1
##    `Years Since Appearance`
##                       <dbl>
##  1                       31
##  2                       31
##  3                       54
##  4                       35
##  5                       52
##  6                       41
##  7                       35
##  8                       30
##  9                       31
## 10                       42
## 11                       31
## 12                       30
## 13                       33

Summarize by groups

# Finds the average creation year of the languages
summarize(filtered_data, average_year = mean(appeared, na.rm = TRUE))
## # A tibble: 1 × 1
##   average_year
##          <dbl>
## 1        1989.