Speed Up Your Data Analysis with data.table

Gentle introduction

Author

DSA_406_001_SP25_wk4_vmorale2

Today’s Lesson: Efficient Big Data Analysis with data.table

Objectives

  • Understand why data.table is crucial for big data analysis

  • Learn the syntax and key features of data.table

  • Practice with real-world Spotify data

  • Compare performance with traditional methods

Why data.table?

data.table is designed for handling large datasets (1GB to 100GB) with:

  • Lightning-fast operations

  • Memory efficiency

  • Concise syntax

  • Enhanced features for data manipulation

Setting Up

First, let’s install and load our required packages:

# Install and load required packages
required_packages <- c("data.table", "tidyverse", "microbenchmark", "pryr")

# Install missing packages
for (pkg in required_packages) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg)
  }
}

# Load libraries individually
#library(data.table)
#library(tidyverse)
#library(microbenchmark) # For precise timing
#library(pryr) # For memory tracking

#Load libraries
lapply(required_packages, library, character.only = TRUE)
[[1]]
[1] "data.table" "stats"      "graphics"   "grDevices"  "utils"     
[6] "datasets"   "methods"    "base"      

[[2]]
 [1] "lubridate"  "forcats"    "stringr"    "dplyr"      "purrr"     
 [6] "readr"      "tidyr"      "tibble"     "ggplot2"    "tidyverse" 
[11] "data.table" "stats"      "graphics"   "grDevices"  "utils"     
[16] "datasets"   "methods"    "base"      

[[3]]
 [1] "microbenchmark" "lubridate"      "forcats"        "stringr"       
 [5] "dplyr"          "purrr"          "readr"          "tidyr"         
 [9] "tibble"         "ggplot2"        "tidyverse"      "data.table"    
[13] "stats"          "graphics"       "grDevices"      "utils"         
[17] "datasets"       "methods"        "base"          

[[4]]
 [1] "pryr"           "microbenchmark" "lubridate"      "forcats"       
 [5] "stringr"        "dplyr"          "purrr"          "readr"         
 [9] "tidyr"          "tibble"         "ggplot2"        "tidyverse"     
[13] "data.table"     "stats"          "graphics"       "grDevices"     
[17] "utils"          "datasets"       "methods"        "base"          

Let’s read our Spotify data using data.table:

# Read the dataset using fread (much faster than read.csv() or read_csv())
#spotify_dt <- fread("https://docs.google.com/spreadsheets/d/e/2PACX-1vT9iNNoFknWSmgIglkiEXnmXkJplHHeEf9wPn07tvtOV-5eTh-MdYnLS3eyST_Bx2OXpd0GC9dw5gBt/pub?gid=1801845240&single=true&output=csv")

#read in teh data from data folder
spotify_dt <- fread("week_4/data/spotify_clean.csv")


spotify_csv <- read_csv("week_4/data/spotify_clean.csv")

Lets test it out with the fread function

# Let's measure memory usage
mem_before <- pryr::mem_used()
system.time(spotify_dt <- data.table::fread("week_4/data/spotify_clean.csv"))
   user  system elapsed 
  0.034   0.006   0.015 
mem_after_fread <- pryr::mem_used()
fread_memory <- mem_after_fread - mem_before

✅Now you try:

Now your turn try it out with the readr function read_csv

#comment and write your code below:
mem_before <- pryr::mem_used()
system.time(spotify_dt <- readr::read_csv("week_4/data/spotify_clean.csv"))
Rows: 9999 Columns: 30
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (11): track_name, artist_name_s, album_name, album_artist_name_s, album...
dbl  (16): disc_number, track_number, track_duration_ms, popularity, danceab...
lgl   (2): explicit, album_genres
dttm  (1): added_at

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
   user  system elapsed 
  0.147   0.016   0.047 
mem_after_fread <- pryr::mem_used()
fread_memory <- mem_after_fread - mem_before

When you have an existing data frame (like one from tidyverse or base R like spotify_csv), you can convert it to a data.table using setDT().

# Option 1: setDT() - converts in place (memory efficient)
#setDT(spotify_csv)  # Converts the existing object to data.table

# Option 2: as.data.table() - creates a copy (uses more memory)
#spotify_dt <- as.data.table(spotify_csv)
  1. setDT() is more memory efficient because it modifies the existing object
  2. as.data.table() creates a copy, using twice the memory
  3. For big data, it’s best to read directly with fread() rather than converting
  4. The conversion only needs to happen if you start with a regular data frame

Here is a breakdown for comparison:

compare_methods <- function(file_path) {
 # Print initial memory state
 cat("Initial memory state:\n")
 print(gc())
 
 # Create container for results
 results <- list()
 
 # 1. fread method
 cat("\nTesting fread method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 dt1 <- data.table::fread(file_path)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$fread <- list(
   memory = (mem_after - mem_before) / 1024^2,  # Convert to MB
   time = end_time - start_time,
   size = format(object.size(dt1), units = "MB")
 )
 cat("fread object size:", results$fread$size, "\n")
 
 # 2. read_csv method
 cat("\nTesting read_csv method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 csv <- readr::read_csv(file_path)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$read_csv <- list(
   memory = (mem_after - mem_before) / 1024^2,
   time = end_time - start_time,
   size = format(object.size(csv), units = "MB")
 )
 cat("read_csv object size:", results$read_csv$size, "\n")
 
 # 3. setDT method
 cat("\nTesting setDT method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 csv_copy <- data.table::copy(csv)
 data.table::setDT(csv_copy)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$setDT <- list(
   memory = (mem_after - mem_before) / 1024^2,
   time = end_time - start_time,
   size = format(object.size(csv_copy), units = "MB")
 )
 cat("setDT object size:", results$setDT$size, "\n")
 
 # 4. as.data.table method
 cat("\nTesting as.data.table method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 dt2 <- data.table::as.data.table(csv)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$as_data_table <- list(
   memory = (mem_after - mem_before) / 1024^2,
   time = end_time - start_time,
   size = format(object.size(dt2), units = "MB")
 )
 cat("as.data.table object size:", results$as_data_table$size, "\n")
 
 # Final memory state
 cat("\nFinal memory state:\n")
 print(gc())
 
 # Create summary data.frame
 summary_df <- data.frame(
   method = names(results),
   memory_mb = sapply(results, function(x) round(x$memory, 2)),
   time_secs = sapply(results, function(x) round(as.numeric(x$time), 4)),
   final_size = sapply(results, function(x) x$size)
 )
 
 return(summary_df)
}

# Use the function
results <- compare_methods("week_4/data/spotify_clean.csv")
Initial memory state:
          used (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
Ncells 1170717 62.6    2366067 126.4         NA  2366067 126.4
Vcells 2763002 21.1    8388608  64.0      16384  3651914  27.9

Testing fread method...
fread object size: 5.5 Mb 

Testing read_csv method...
Rows: 9999 Columns: 30
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (11): track_name, artist_name_s, album_name, album_artist_name_s, album...
dbl  (16): disc_number, track_number, track_duration_ms, popularity, danceab...
lgl   (2): explicit, album_genres
dttm  (1): added_at

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
read_csv object size: 5.8 Mb 

Testing setDT method...
setDT object size: 5.8 Mb 

Testing as.data.table method...
as.data.table object size: 5.8 Mb 

Final memory state:
          used (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
Ncells 1179062 63.0    2366067 126.4         NA  2366067 126.4
Vcells 4167084 31.8    8388608  64.0      16384  4403103  33.6
print("\nFinal Results:")
[1] "\nFinal Results:"
print(results)
                     method memory_mb time_secs final_size
fread                 fread      4.13    0.0127     5.5 Mb
read_csv           read_csv      2.23    0.0441     5.8 Mb
setDT                 setDT      2.38    0.0008     5.8 Mb
as_data_table as_data_table      2.32    0.0006     5.8 Mb

🔍 Investigation Exercise:

Without using any external resources, what can you discover about this dataset using data.table methods?

💡Think about….

  • How many observations and variables are there?

  • What types of data are present?

# Use these data.table commands to gather clues
colnames(spotify_dt)
 [1] "track_name"          "artist_name_s"       "album_name"         
 [4] "album_artist_name_s" "album_release_date"  "disc_number"        
 [7] "track_number"        "track_duration_ms"   "explicit"           
[10] "popularity"          "isrc"                "added_by"           
[13] "added_at"            "artist_genres"       "danceability"       
[16] "energy"              "key"                 "loudness"           
[19] "mode"                "speechiness"         "acousticness"       
[22] "instrumentalness"    "liveness"            "valence"            
[25] "tempo"               "time_signature"      "album_genres"       
[28] "label"               "copyrights"          "main_genre"         
str(spotify_dt)
spc_tbl_ [9,999 × 30] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ track_name         : chr [1:9999] "Justified & Ancient - Stand by the Jams" "I Know You Want Me (Calle Ocho)" "From the Bottom of My Broken Heart" "Apeman - 2014 Remastered Version" ...
 $ artist_name_s      : chr [1:9999] "The KLF" "Pitbull" "Britney Spears" "The Kinks" ...
 $ album_name         : chr [1:9999] "Songs Collection" "Pitbull Starring In Rebelution" "...Baby One More Time (Digital Deluxe Version)" "Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)" ...
 $ album_artist_name_s: chr [1:9999] "The KLF" "Pitbull" "Britney Spears" "The Kinks" ...
 $ album_release_date : chr [1:9999] "1992-08-03" "2009-10-23" "1999-01-12" "2014-10-20" ...
 $ disc_number        : num [1:9999] 1 1 1 1 1 1 1 1 1 1 ...
 $ track_number       : num [1:9999] 3 3 6 11 9 4 1 1 2 2 ...
 $ track_duration_ms  : num [1:9999] 216270 237120 312533 233400 448720 ...
 $ explicit           : logi [1:9999] FALSE FALSE FALSE FALSE FALSE FALSE ...
 $ popularity         : num [1:9999] 0 64 56 42 0 79 78 61 74 0 ...
 $ isrc               : chr [1:9999] "QMARG1760056" "USJAY0900144" "USJI19910455" "GB5KW1499822" ...
 $ added_by           : chr [1:9999] "spotify:user:bradnumber1" "spotify:user:bradnumber1" "spotify:user:bradnumber1" "spotify:user:bradnumber1" ...
 $ added_at           : POSIXct[1:9999], format: "2020-03-05 09:20:39" "2021-08-08 09:26:31" ...
 $ artist_genres      : chr [1:9999] "acid house,ambient house,big beat,hip house" "dance pop,miami hip hop,pop" "dance pop,pop" "album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter" ...
 $ danceability       : num [1:9999] 0.617 0.825 0.677 0.683 0.319 0.671 0.56 0.48 0.357 0.562 ...
 $ energy             : num [1:9999] 0.872 0.743 0.665 0.728 0.627 0.71 0.68 0.628 0.653 0.681 ...
 $ key                : num [1:9999] 8 2 7 9 0 9 6 6 9 11 ...
 $ loudness           : num [1:9999] -12.3 -6 -5.17 -8.92 -9.61 ...
 $ mode               : num [1:9999] 1 1 1 1 1 1 0 1 1 0 ...
 $ speechiness        : num [1:9999] 0.048 0.149 0.0305 0.259 0.0687 0.0356 0.321 0.0262 0.0654 0.0871 ...
 $ acousticness       : num [1:9999] 0.0158 0.0142 0.56 0.568 0.675 0.0393 0.555 0.174 0.0828 0.113 ...
 $ instrumentalness   : num [1:9999] 1.12e-01 2.12e-05 1.01e-06 5.08e-05 7.29e-05 1.12e-05 0.00 3.28e-05 0.00 0.00 ...
 $ liveness           : num [1:9999] 0.408 0.237 0.338 0.0384 0.289 0.0387 0.116 0.0753 0.0844 0.11 ...
 $ valence            : num [1:9999] 0.504 0.8 0.706 0.833 0.497 0.834 0.319 0.541 0.522 0.357 ...
 $ tempo              : num [1:9999] 111.5 127 75 75.3 85.8 ...
 $ time_signature     : num [1:9999] 4 4 4 4 4 4 4 4 4 4 ...
 $ album_genres       : logi [1:9999] NA NA NA NA NA NA ...
 $ label              : chr [1:9999] "Jams Communications" "Mr.305/Polo Grounds Music/J Records" "Jive" "Sanctuary Records" ...
 $ copyrights         : chr [1:9999] "C 1992 Copyright Control, P 1992 Jams Communications" "P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment" "P (P) 1999 Zomba Recording LLC" "C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company" ...
 $ main_genre         : chr [1:9999] "Dance/Electronic" "Pop" "Pop" "Rock" ...
 - attr(*, "spec")=
  .. cols(
  ..   track_name = col_character(),
  ..   artist_name_s = col_character(),
  ..   album_name = col_character(),
  ..   album_artist_name_s = col_character(),
  ..   album_release_date = col_character(),
  ..   disc_number = col_double(),
  ..   track_number = col_double(),
  ..   track_duration_ms = col_double(),
  ..   explicit = col_logical(),
  ..   popularity = col_double(),
  ..   isrc = col_character(),
  ..   added_by = col_character(),
  ..   added_at = col_datetime(format = ""),
  ..   artist_genres = col_character(),
  ..   danceability = col_double(),
  ..   energy = col_double(),
  ..   key = col_double(),
  ..   loudness = col_double(),
  ..   mode = col_double(),
  ..   speechiness = col_double(),
  ..   acousticness = col_double(),
  ..   instrumentalness = col_double(),
  ..   liveness = col_double(),
  ..   valence = col_double(),
  ..   tempo = col_double(),
  ..   time_signature = col_double(),
  ..   album_genres = col_logical(),
  ..   label = col_character(),
  ..   copyrights = col_character(),
  ..   main_genre = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
spotify_dt <- fread("week_4/data/spotify_clean.csv")

if (nrow(spotify_dt) == 0) {
  stop("Error: spotify_clean.csv did not load correctly. Check the file path!")
}

# Then apply your filter
popular_tracks <- spotify_dt[popularity > 75]

# Clue 1: Basic structure and dimensions
spotify_dt[, .N]  # Number of rows
[1] 9999
ncol(spotify_dt)  # Number of columns
[1] 30
# Clue 2: Column names and types
spotify_dt[, lapply(.SD, class)]  # Shows class of each column
   track_name artist_name_s album_name album_artist_name_s album_release_date
       <char>        <char>     <char>              <char>             <char>
1:  character     character  character           character          character
2:  character     character  character           character          character
   disc_number track_number track_duration_ms explicit popularity      isrc
        <char>       <char>            <char>   <char>     <char>    <char>
1:     integer      integer           integer  logical    integer character
2:     integer      integer           integer  logical    integer character
    added_by added_at artist_genres danceability  energy     key loudness
      <char>   <char>        <char>       <char>  <char>  <char>   <char>
1: character  POSIXct     character      numeric numeric integer  numeric
2: character   POSIXt     character      numeric numeric integer  numeric
      mode speechiness acousticness instrumentalness liveness valence   tempo
    <char>      <char>       <char>           <char>   <char>  <char>  <char>
1: integer     numeric      numeric          numeric  numeric numeric numeric
2: integer     numeric      numeric          numeric  numeric numeric numeric
   time_signature album_genres     label copyrights main_genre
           <char>       <char>    <char>     <char>     <char>
1:        integer      logical character  character  character
2:        integer      logical character  character  character
#Clure 3: look at data content
spotify_dt[1:5]
                                track_name      artist_name_s
                                    <char>             <char>
1: Justified & Ancient - Stand by the Jams            The KLF
2:         I Know You Want Me (Calle Ocho)            Pitbull
3:      From the Bottom of My Broken Heart     Britney Spears
4:        Apeman - 2014 Remastered Version          The Kinks
5:      You Can't Always Get What You Want The Rolling Stones
                                                               album_name
                                                                   <char>
1:                                                       Songs Collection
2:                                         Pitbull Starring In Rebelution
3:                         ...Baby One More Time (Digital Deluxe Version)
4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
5:                                                           Let It Bleed
   album_artist_name_s album_release_date disc_number track_number
                <char>             <char>       <int>        <int>
1:             The KLF         1992-08-03           1            3
2:             Pitbull         2009-10-23           1            3
3:      Britney Spears         1999-01-12           1            6
4:           The Kinks         2014-10-20           1           11
5:  The Rolling Stones         1969-12-05           1            9
   track_duration_ms explicit popularity         isrc                 added_by
               <int>   <lgcl>      <int>       <char>                   <char>
1:            216270    FALSE          0 QMARG1760056 spotify:user:bradnumber1
2:            237120    FALSE         64 USJAY0900144 spotify:user:bradnumber1
3:            312533    FALSE         56 USJI19910455 spotify:user:bradnumber1
4:            233400    FALSE         42 GB5KW1499822 spotify:user:bradnumber1
5:            448720    FALSE          0 USA176910100 spotify:user:bradnumber1
              added_at
                <POSc>
1: 2020-03-05 09:20:39
2: 2021-08-08 09:26:31
3: 2021-08-08 09:26:31
4: 2021-08-08 09:26:31
5: 2021-08-08 09:26:31
                                                                                                             artist_genres
                                                                                                                    <char>
1:                                                                             acid house,ambient house,big beat,hip house
2:                                                                                             dance pop,miami hip hop,pop
3:                                                                                                           dance pop,pop
4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
5:                                                                           album rock,british invasion,classic rock,rock
   danceability energy   key loudness  mode speechiness acousticness
          <num>  <num> <int>    <num> <int>       <num>        <num>
1:        0.617  0.872     8  -12.305     1      0.0480       0.0158
2:        0.825  0.743     2   -5.995     1      0.1490       0.0142
3:        0.677  0.665     7   -5.171     1      0.0305       0.5600
4:        0.683  0.728     9   -8.920     1      0.2590       0.5680
5:        0.319  0.627     0   -9.611     1      0.0687       0.6750
   instrumentalness liveness valence   tempo time_signature album_genres
              <num>    <num>   <num>   <num>          <int>       <lgcl>
1:         1.12e-01   0.4080   0.504 111.458              4           NA
2:         2.12e-05   0.2370   0.800 127.045              4           NA
3:         1.01e-06   0.3380   0.706  74.981              4           NA
4:         5.08e-05   0.0384   0.833  75.311              4           NA
5:         7.29e-05   0.2890   0.497  85.818              4           NA
                                 label
                                <char>
1:                 Jams Communications
2: Mr.305/Polo Grounds Music/J Records
3:                                Jive
4:                   Sanctuary Records
5:               Universal Music Group
                                                                                                   copyrights
                                                                                                       <char>
1:                                                       C 1992 Copyright Control, P 1992 Jams Communications
2:                                        P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
3:                                                                             P (P) 1999 Zomba Recording LLC
4: C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
5:                                   C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
         main_genre
             <char>
1: Dance/Electronic
2:              Pop
3:              Pop
4:             Rock
5:             Rock
# Clue 4: Unique values in categorical columns
spotify_dt[, lapply(.SD, function(x) uniqueN(x)), .SDcols = is.character]
   track_name artist_name_s album_name album_artist_name_s album_release_date
        <int>         <int>      <int>               <int>              <int>
1:       8259          4130       6636                3295               3333
    isrc added_by artist_genres label copyrights main_genre
   <int>    <int>         <int> <int>      <int>      <int>
1:  8949        1          2816  1467       5379         10

Data.table Operations - data.table Syntax: [ i, j, by ]

Now let’s explore data.table’s powerful syntax for data manipulation. Think of these operations as your detective’s toolkit:

  • i (WHERE): Your filter 🔍 (selects specific rows)(like filter() & selec())

  • j (SELECT): Your magnifying glass 🔬 (focuses on specific columns or computations) (like select() or summarize())

  • by (GROUP BY): Your filing system 📁 (organizes findings)(like group_by())

1.) Basic Operations

  1. Selecting Rows (i)

When you only specify i, data.table returns a data.table (like a data frame), not a vector.

str(spotify_dt)
Classes 'data.table' and 'data.frame':  9999 obs. of  30 variables:
 $ track_name         : chr  "Justified & Ancient - Stand by the Jams" "I Know You Want Me (Calle Ocho)" "From the Bottom of My Broken Heart" "Apeman - 2014 Remastered Version" ...
 $ artist_name_s      : chr  "The KLF" "Pitbull" "Britney Spears" "The Kinks" ...
 $ album_name         : chr  "Songs Collection" "Pitbull Starring In Rebelution" "...Baby One More Time (Digital Deluxe Version)" "Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)" ...
 $ album_artist_name_s: chr  "The KLF" "Pitbull" "Britney Spears" "The Kinks" ...
 $ album_release_date : chr  "1992-08-03" "2009-10-23" "1999-01-12" "2014-10-20" ...
 $ disc_number        : int  1 1 1 1 1 1 1 1 1 1 ...
 $ track_number       : int  3 3 6 11 9 4 1 1 2 2 ...
 $ track_duration_ms  : int  216270 237120 312533 233400 448720 193346 173799 240546 259800 208733 ...
 $ explicit           : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
 $ popularity         : int  0 64 56 42 0 79 78 61 74 0 ...
 $ isrc               : chr  "QMARG1760056" "USJAY0900144" "USJI19910455" "GB5KW1499822" ...
 $ added_by           : chr  "spotify:user:bradnumber1" "spotify:user:bradnumber1" "spotify:user:bradnumber1" "spotify:user:bradnumber1" ...
 $ added_at           : POSIXct, format: "2020-03-05 09:20:39" "2021-08-08 09:26:31" ...
 $ artist_genres      : chr  "acid house,ambient house,big beat,hip house" "dance pop,miami hip hop,pop" "dance pop,pop" "album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter" ...
 $ danceability       : num  0.617 0.825 0.677 0.683 0.319 0.671 0.56 0.48 0.357 0.562 ...
 $ energy             : num  0.872 0.743 0.665 0.728 0.627 0.71 0.68 0.628 0.653 0.681 ...
 $ key                : int  8 2 7 9 0 9 6 6 9 11 ...
 $ loudness           : num  -12.3 -6 -5.17 -8.92 -9.61 ...
 $ mode               : int  1 1 1 1 1 1 0 1 1 0 ...
 $ speechiness        : num  0.048 0.149 0.0305 0.259 0.0687 0.0356 0.321 0.0262 0.0654 0.0871 ...
 $ acousticness       : num  0.0158 0.0142 0.56 0.568 0.675 0.0393 0.555 0.174 0.0828 0.113 ...
 $ instrumentalness   : num  1.12e-01 2.12e-05 1.01e-06 5.08e-05 7.29e-05 1.12e-05 0.00 3.28e-05 0.00 0.00 ...
 $ liveness           : num  0.408 0.237 0.338 0.0384 0.289 0.0387 0.116 0.0753 0.0844 0.11 ...
 $ valence            : num  0.504 0.8 0.706 0.833 0.497 0.834 0.319 0.541 0.522 0.357 ...
 $ tempo              : num  111.5 127 75 75.3 85.8 ...
 $ time_signature     : int  4 4 4 4 4 4 4 4 4 4 ...
 $ album_genres       : logi  NA NA NA NA NA NA ...
 $ label              : chr  "Jams Communications" "Mr.305/Polo Grounds Music/J Records" "Jive" "Sanctuary Records" ...
 $ copyrights         : chr  "C 1992 Copyright Control, P 1992 Jams Communications" "P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment" "P (P) 1999 Zomba Recording LLC" "C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company" ...
 $ main_genre         : chr  "Dance/Electronic" "Pop" "Pop" "Rock" ...
 - attr(*, ".internal.selfref")=<externalptr> 
spotify_dt <- fread("week_4/data/spotify_clean.csv")


# Select first 5 rows
spotify_dt[1:5]
                                track_name      artist_name_s
                                    <char>             <char>
1: Justified & Ancient - Stand by the Jams            The KLF
2:         I Know You Want Me (Calle Ocho)            Pitbull
3:      From the Bottom of My Broken Heart     Britney Spears
4:        Apeman - 2014 Remastered Version          The Kinks
5:      You Can't Always Get What You Want The Rolling Stones
                                                               album_name
                                                                   <char>
1:                                                       Songs Collection
2:                                         Pitbull Starring In Rebelution
3:                         ...Baby One More Time (Digital Deluxe Version)
4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
5:                                                           Let It Bleed
   album_artist_name_s album_release_date disc_number track_number
                <char>             <char>       <int>        <int>
1:             The KLF         1992-08-03           1            3
2:             Pitbull         2009-10-23           1            3
3:      Britney Spears         1999-01-12           1            6
4:           The Kinks         2014-10-20           1           11
5:  The Rolling Stones         1969-12-05           1            9
   track_duration_ms explicit popularity         isrc                 added_by
               <int>   <lgcl>      <int>       <char>                   <char>
1:            216270    FALSE          0 QMARG1760056 spotify:user:bradnumber1
2:            237120    FALSE         64 USJAY0900144 spotify:user:bradnumber1
3:            312533    FALSE         56 USJI19910455 spotify:user:bradnumber1
4:            233400    FALSE         42 GB5KW1499822 spotify:user:bradnumber1
5:            448720    FALSE          0 USA176910100 spotify:user:bradnumber1
              added_at
                <POSc>
1: 2020-03-05 09:20:39
2: 2021-08-08 09:26:31
3: 2021-08-08 09:26:31
4: 2021-08-08 09:26:31
5: 2021-08-08 09:26:31
                                                                                                             artist_genres
                                                                                                                    <char>
1:                                                                             acid house,ambient house,big beat,hip house
2:                                                                                             dance pop,miami hip hop,pop
3:                                                                                                           dance pop,pop
4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
5:                                                                           album rock,british invasion,classic rock,rock
   danceability energy   key loudness  mode speechiness acousticness
          <num>  <num> <int>    <num> <int>       <num>        <num>
1:        0.617  0.872     8  -12.305     1      0.0480       0.0158
2:        0.825  0.743     2   -5.995     1      0.1490       0.0142
3:        0.677  0.665     7   -5.171     1      0.0305       0.5600
4:        0.683  0.728     9   -8.920     1      0.2590       0.5680
5:        0.319  0.627     0   -9.611     1      0.0687       0.6750
   instrumentalness liveness valence   tempo time_signature album_genres
              <num>    <num>   <num>   <num>          <int>       <lgcl>
1:         1.12e-01   0.4080   0.504 111.458              4           NA
2:         2.12e-05   0.2370   0.800 127.045              4           NA
3:         1.01e-06   0.3380   0.706  74.981              4           NA
4:         5.08e-05   0.0384   0.833  75.311              4           NA
5:         7.29e-05   0.2890   0.497  85.818              4           NA
                                 label
                                <char>
1:                 Jams Communications
2: Mr.305/Polo Grounds Music/J Records
3:                                Jive
4:                   Sanctuary Records
5:               Universal Music Group
                                                                                                   copyrights
                                                                                                       <char>
1:                                                       C 1992 Copyright Control, P 1992 Jams Communications
2:                                        P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
3:                                                                             P (P) 1999 Zomba Recording LLC
4: C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
5:                                   C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
         main_genre
             <char>
1: Dance/Electronic
2:              Pop
3:              Pop
4:             Rock
5:             Rock
# Filter by popularity
spotify_dt[popularity > 75]
                                 track_name                       artist_name_s
                                     <char>                              <char>
   1:            Don't Stop - 2004 Remaster                       Fleetwood Mac
   2:       Eastside (with Halsey & Khalid)        benny blanco, Halsey, Khalid
   3:                      Here Without You                        3 Doors Down
   4:    We Will Rock You - Remastered 2011                               Queen
   5:                  Too Good At Goodbyes                           Sam Smith
  ---                                                                          
1013: B.O.T.A. (Baddest Of Them All) - Edit Eliza Rose, Interplanetary Criminal
1014:                   Push Up - Main Edit                              Creeds
1015:                    Baby Don't Hurt Me David Guetta, Anne-Marie, Coi Leray
1016:          Kernkraft 400 (A Better Day)                          Topic, A7S
1017:                               Lay Low                              Tiësto
                                  album_name
                                      <char>
   1:                                Rumours
   2:        Eastside (with Halsey & Khalid)
   3:                      Away From The Sun
   4:      News Of The World (2011 Remaster)
   5: The Thrill Of It All (Special Edition)
  ---                                       
1013:         B.O.T.A. (Baddest Of Them All)
1014:                    Push Up (Main Edit)
1015:                     Baby Don't Hurt Me
1016:           Kernkraft 400 (A Better Day)
1017:                                Lay Low
                      album_artist_name_s album_release_date disc_number
                                   <char>             <char>       <int>
   1:                       Fleetwood Mac         1977-02-04           1
   2:        benny blanco, Halsey, Khalid         2018-07-12           1
   3:                        3 Doors Down         2002-11-12           1
   4:                               Queen         1977-10-28           1
   5:                           Sam Smith         2017-11-03           1
  ---                                                                   
1013: Eliza Rose, Interplanetary Criminal         2022-08-12           1
1014:                              Creeds         2023-03-31           1
1015: David Guetta, Anne-Marie, Coi Leray         2023-04-06           1
1016:                          Topic, A7S         2022-06-17           1
1017:                              Tiësto         2023-01-06           1
      track_number track_duration_ms explicit popularity         isrc
             <int>             <int>   <lgcl>      <int>       <char>
   1:            4            193346    FALSE         79 USWB10400049
   2:            1            173799    FALSE         78 USUM71809132
   3:            6            238733    FALSE         80 USUR10200764
   4:            1            122066    FALSE         82 GBUM71029618
   5:            1            201000    FALSE         87 GBUM71704089
  ---                                                                
1013:            1            226626    FALSE         82 QZAKB2166405
1014:            1            139300    FALSE         86 DEE862300564
1015:            1            140017    FALSE         94 UKWLG2300016
1016:            1            165800    FALSE         79 DECE72201091
1017:            1            153442    FALSE         87 NLZ542202348
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2022-08-31 00:08:18
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
1013: spotify:user:bradnumber1 2023-07-09 01:59:43
1014: spotify:user:bradnumber1 2023-07-11 01:16:38
1015: spotify:user:bradnumber1 2023-07-11 05:37:45
1016: spotify:user:bradnumber1 2023-07-11 10:57:25
1017: spotify:user:bradnumber1 2023-07-18 22:06:36
                                                                       artist_genres
                                                                              <char>
   1:                              album rock,classic rock,rock,soft rock,yacht rock
   2:                        pop,electropop,etherpop,indie poptimism,pop,pop,pop r&b
   3:                                    alternative metal,nu metal,post-grunge,rock
   4:                                                    classic rock,glam rock,rock
   5:                                                                     pop,uk pop
  ---                                                                               
1013:                                                house,breaks,experimental house
1014:                                                                           <NA>
1015: big room,dance pop,edm,pop,pop dance,pop,new jersey underground rap,trap queen
1016:           german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
1017:               big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.671  0.710     9   -7.724     1      0.0356      0.03930
   2:        0.560  0.680     6   -7.648     0      0.3210      0.55500
   3:        0.557  0.533    10   -6.817     0      0.0252      0.04920
   4:        0.692  0.497     2   -7.316     1      0.1190      0.67600
   5:        0.681  0.372     5   -8.237     1      0.0432      0.64000
  ---                                                                  
1013:        0.736  0.906     0   -7.589     1      0.0480      0.16400
1014:        0.767  0.830     7   -8.780     1      0.2060      0.20900
1015:        0.602  0.910     7   -3.404     1      0.0308      0.00126
1016:        0.623  0.727    11   -5.570     0      0.0562      0.18400
1017:        0.534  0.855     1   -4.923     0      0.1830      0.06070
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-05   0.0387   0.834 118.745              4           NA
   2:         0.00e+00   0.1160   0.319  89.391              4           NA
   3:         0.00e+00   0.2050   0.233 143.994              4           NA
   4:         0.00e+00   0.2590   0.475  81.308              4           NA
   5:         0.00e+00   0.1690   0.476  91.873              4           NA
  ---                                                                      
1013:         5.85e-01   0.1060   0.698 137.001              4           NA
1014:         8.36e-01   0.0582   0.187  75.023              4           NA
1015:         1.74e-04   0.1200   0.228 127.944              4           NA
1016:         2.02e-05   0.3090   0.400 125.975              4           NA
1017:         2.63e-04   0.3460   0.420 122.060              4           NA
                           label
                          <char>
   1:       Rhino/Warner Records
   2: Benny Blanco Solo Album PS
   3:          Universal Records
   4:                        EMI
   5:                    Capitol
  ---                           
1013:             Warner Records
1014:     Columbia/B1 Recordings
1015:              Parlophone UK
1016:                     Virgin
1017:            Musical Freedom
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                               C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
   2:                                                                                       C © 2018 Friends Keep Secrets/Interscope Records, P ℗ 2018 Friends Keep Secrets/Interscope Records
   3:                                                    C © 2002 Universal Records, a Division of UMG Recordings, Inc., P ℗ 2002 Universal Motown Records, a division of UMG Recordings, Inc.
   4: C © 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV, P ℗ 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV
   5:                                                                                                 C © 2017 Universal Music Operations Limited, P ℗ 2017 Universal Music Operations Limited
  ---                                                                                                                                                                                         
1013:                       C Under exclusive licence to Warner Music UK Limited, © 2022 One House X Limited, P Under exclusive licence to Warner Music UK Limited, ℗ 2022 One House X Limited
1014:                                       P (P) 2023 Rave Alert Records, under exclusive license to Ministry of Sound/Arista Records/B1 Recordings GmbH, a Sony Music Entertainment company.
1015:                                                       C Under license to Warner Music UK Limited, © 2023 What A DJ Ltd, P Under license to Warner Music UK Limited, ℗ 2023 What A DJ Ltd
1016:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
1017:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
            main_genre
                <char>
   1:             Rock
   2:              Pop
   3:             Rock
   4:             Rock
   5:              Pop
  ---                 
1013: Dance/Electronic
1014:            Other
1015:              Pop
1016:              Pop
1017:              Pop
# Multiple conditions
spotify_dt[popularity > 75 & main_genre == "Rock"]
                             track_name               artist_name_s
                                 <char>                      <char>
  1:         Don't Stop - 2004 Remaster               Fleetwood Mac
  2:                   Here Without You                3 Doors Down
  3: We Will Rock You - Remastered 2011                       Queen
  4:                            My Hero                Foo Fighters
  5:                I Love Rock 'N Roll Joan Jett & the Blackhearts
 ---                                                               
230:                    One Step Closer                 Linkin Park
231:  Good Riddance (Time of Your Life)                   Green Day
232:                 When I Come Around                   Green Day
233:        Rebel Rebel - 2016 Remaster                 David Bowie
234:                           Rhiannon               Fleetwood Mac
                                  album_name         album_artist_name_s
                                      <char>                      <char>
  1:                                 Rumours               Fleetwood Mac
  2:                       Away From The Sun                3 Doors Down
  3:       News Of The World (2011 Remaster)                       Queen
  4:                The Colour And The Shape                Foo Fighters
  5: I Love Rock 'N' Roll (Expanded Edition) Joan Jett & the Blackhearts
 ---                                                                    
230:           Hybrid Theory (Bonus Edition)                 Linkin Park
231:                                  Nimrod                   Green Day
232:                                  Dookie                   Green Day
233:            Diamond Dogs (2016 Remaster)                 David Bowie
234:                           Fleetwood Mac               Fleetwood Mac
     album_release_date disc_number track_number track_duration_ms explicit
                 <char>       <int>        <int>             <int>   <lgcl>
  1:         1977-02-04           1            4            193346    FALSE
  2:         2002-11-12           1            6            238733    FALSE
  3:         1977-10-28           1            1            122066    FALSE
  4:         1997-05-20           1            7            260026    FALSE
  5:         1981-11-18           1            1            175173    FALSE
 ---                                                                       
230:         2000-10-24           1            2            157333    FALSE
231:         1997-10-14           1           17            153466     TRUE
232:         1994-02-01           1           10            178000    FALSE
233:         1974-05-24           1            6            274746    FALSE
234:         1975-07-11           1            4            252773    FALSE
     popularity         isrc                 added_by            added_at
          <int>       <char>                   <char>              <POSc>
  1:         79 USWB10400049 spotify:user:bradnumber1 2022-08-31 00:08:18
  2:         80 USUR10200764 spotify:user:bradnumber1 2021-08-08 09:26:31
  3:         82 GBUM71029618 spotify:user:bradnumber1 2021-08-08 09:26:31
  4:         79 USRW29600007 spotify:user:bradnumber1 2021-08-08 09:26:31
  5:         81 USBH18100118 spotify:user:bradnumber1 2021-08-08 09:26:31
 ---                                                                     
230:         83 USWB10002399 spotify:user:bradnumber1 2021-08-08 09:26:31
231:         82 USRE19700545 spotify:user:bradnumber1 2021-08-08 09:26:31
232:         80 USRE19900154 spotify:user:bradnumber1 2021-08-08 09:26:31
233:         76 USJT11600006 spotify:user:bradnumber1 2021-08-08 09:26:31
234:         77 USWB19900188 spotify:user:bradnumber1 2021-08-08 09:26:31
                                                                      artist_genres
                                                                             <char>
  1:                              album rock,classic rock,rock,soft rock,yacht rock
  2:                                    alternative metal,nu metal,post-grunge,rock
  3:                                                    classic rock,glam rock,rock
  4: alternative metal,alternative rock,modern rock,permanent wave,post-grunge,rock
  5:                                                                 glam punk,rock
 ---                                                                               
230:                          alternative metal,nu metal,post-grunge,rap metal,rock
231:                                           modern rock,permanent wave,punk,rock
232:                                           modern rock,permanent wave,punk,rock
233:                            art rock,classic rock,glam rock,permanent wave,rock
234:                              album rock,classic rock,rock,soft rock,yacht rock
     danceability energy   key loudness  mode speechiness acousticness
            <num>  <num> <int>    <num> <int>       <num>        <num>
  1:        0.671  0.710     9   -7.724     1      0.0356     0.039300
  2:        0.557  0.533    10   -6.817     0      0.0252     0.049200
  3:        0.692  0.497     2   -7.316     1      0.1190     0.676000
  4:        0.398  0.927     9   -4.212     1      0.0567     0.000062
  5:        0.535  0.716     4   -5.025     1      0.0431     0.326000
 ---                                                                  
230:        0.492  0.969     9   -4.419     1      0.0491     0.001400
231:        0.475  0.432     7   -7.844     1      0.0302     0.183000
232:        0.531  0.909    11   -4.236     1      0.0340     0.017500
233:        0.635  0.686     9  -15.648     1      0.0530     0.209000
234:        0.723  0.490     9  -14.745     0      0.0296     0.111000
     instrumentalness liveness valence   tempo time_signature album_genres
                <num>    <num>   <num>   <num>          <int>       <lgcl>
  1:         1.12e-05   0.0387   0.834 118.745              4           NA
  2:         0.00e+00   0.2050   0.233 143.994              4           NA
  3:         0.00e+00   0.2590   0.475  81.308              4           NA
  4:         1.77e-04   0.0575   0.311  77.106              4           NA
  5:         0.00e+00   0.6590   0.901  94.379              4           NA
 ---                                                                      
230:         1.32e-06   0.0787   0.538  95.136              4           NA
231:         0.00e+00   0.1610   0.648  94.605              4           NA
232:         0.00e+00   0.2310   0.814  97.546              4           NA
233:         1.76e-02   0.2820   0.464 125.988              4           NA
234:         9.46e-02   0.0923   0.795 129.012              4           NA
                    label
                   <char>
  1: Rhino/Warner Records
  2:    Universal Records
  3:                  EMI
  4:    RCA Records Label
  5:    Legacy Recordings
 ---                     
230:       Warner Records
231:              Reprise
232:              Reprise
233:        Parlophone UK
234:       Warner Records
                                                                                                                                                                                                 copyrights
                                                                                                                                                                                                     <char>
  1:                                                                                                                                             C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
  2:                                                                  C © 2002 Universal Records, a Division of UMG Recordings, Inc., P ℗ 2002 Universal Motown Records, a division of UMG Recordings, Inc.
  3:               C © 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV, P ℗ 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV
  4:                                                                                                                                                                       P (P) 1997 Roswell Records, Inc.
  5:                                                                                                               P (P) 1981 Blackheart Records Group, under exclusive license to Sony Music Entertainment
 ---                                                                                                                                                                                                       
230:                                                                                                                                             C © 2000 Warner Records Inc., P ℗ 2000 Warner Records Inc.
231: C © 1997 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S., P ℗ 1997 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S.
232: C © 1994 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S., P ℗ 1994 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S.
233:               C © 2016 Jones/Tintoretto Entertainment Co. LLC., P ℗ 1974, 2016 Jones/Tintoretto Entertainment Co., LLC under exclusive license to Parlophone Records Ltd, a Warner Music Group Company
234:                                                                                                                                             C © 1975 Warner Records Inc., P ℗ 1975 Warner Records Inc.
     main_genre
         <char>
  1:       Rock
  2:       Rock
  3:       Rock
  4:       Rock
  5:       Rock
 ---           
230:       Rock
231:       Rock
232:       Rock
233:       Rock
234:       Rock
# Excluding certain values
spotify_dt[key != 10]
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9375:                               Kernkraft 400 (A Better Day)
9376:                               Never Say Never - Radio Edit
9377: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9378:                                                    Lay Low
9379:                                                Padam Padam
                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9375:                   Topic, A7S
9376:                    Vandalism
9377: Spiller, Sophie Ellis-Bextor
9378:                       Tiësto
9379:                Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                                       Songs Collection
   2:                                         Pitbull Starring In Rebelution
   3:                         ...Baby One More Time (Digital Deluxe Version)
   4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   5:                                                           Let It Bleed
  ---                                                                       
9375:                                           Kernkraft 400 (A Better Day)
9376:                                                        Never Say Never
9377:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9378:                                                                Lay Low
9379:                                                            Padam Padam
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:             The KLF         1992-08-03           1            3
   2:             Pitbull         2009-10-23           1            3
   3:      Britney Spears         1999-01-12           1            6
   4:           The Kinks         2014-10-20           1           11
   5:  The Rolling Stones         1969-12-05           1            9
  ---                                                                
9375:          Topic, A7S         2022-06-17           1            1
9376:           Vandalism         2005-10-24           1            1
9377:             Spiller         2000-08-14           1            1
9378:              Tiësto         2023-01-06           1            1
9379:       Kylie Minogue         2023-05-19           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            216270    FALSE          0 QMARG1760056
   2:            237120    FALSE         64 USJAY0900144
   3:            312533    FALSE         56 USJI19910455
   4:            233400    FALSE         42 GB5KW1499822
   5:            448720    FALSE          0 USA176910100
  ---                                                   
9375:            165800    FALSE         79 DECE72201091
9376:            176640    FALSE         17 AUVC00503711
9377:            227619    FALSE         62 GBCPZ0019728
9378:            153442    FALSE         87 NLZ542202348
9379:            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2020-03-05 09:20:39
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
9375: spotify:user:bradnumber1 2023-07-11 10:57:25
9376: spotify:user:bradnumber1 2023-07-16 09:38:19
9377: spotify:user:bradnumber1 2023-07-16 09:39:17
9378: spotify:user:bradnumber1 2023-07-18 22:06:36
9379: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                             acid house,ambient house,big beat,hip house
   2:                                                                                             dance pop,miami hip hop,pop
   3:                                                                                                           dance pop,pop
   4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   5:                                                                           album rock,british invasion,classic rock,rock
  ---                                                                                                                        
9375:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
9376:                                                                                       australian dance,melbourne bounce
9377:                                                                  disco house,vocal house,dance pop,europop,new wave pop
9378:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
9379:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.617  0.872     8  -12.305     1      0.0480     0.015800
   2:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   3:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   4:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   5:        0.319  0.627     0   -9.611     1      0.0687     0.675000
  ---                                                                  
9375:        0.623  0.727    11   -5.570     0      0.0562     0.184000
9376:        0.720  0.841     9   -6.373     1      0.0340     0.000354
9377:        0.719  0.806     9   -6.802     0      0.0389     0.000132
9378:        0.534  0.855     1   -4.923     0      0.1830     0.060700
9379:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-01   0.4080   0.504 111.458              4           NA
   2:         2.12e-05   0.2370   0.800 127.045              4           NA
   3:         1.01e-06   0.3380   0.706  74.981              4           NA
   4:         5.08e-05   0.0384   0.833  75.311              4           NA
   5:         7.29e-05   0.2890   0.497  85.818              4           NA
  ---                                                                      
9375:         2.02e-05   0.3090   0.400 125.975              4           NA
9376:         1.12e-02   0.3380   0.767 130.978              4           NA
9377:         8.89e-02   0.3610   0.626 123.037              4           NA
9378:         2.63e-04   0.3460   0.420 122.060              4           NA
9379:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1:                 Jams Communications
   2: Mr.305/Polo Grounds Music/J Records
   3:                                Jive
   4:                   Sanctuary Records
   5:               Universal Music Group
  ---                                    
9375:                              Virgin
9376:                             Vicious
9377:                    Defected Records
9378:                     Musical Freedom
9379:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                                     C 1992 Copyright Control, P 1992 Jams Communications
   2:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   3:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   4:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   5:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
  ---                                                                                                                                                                                         
9375:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
9376:                                                                       C 2005 Vicious, a division of Vicious Recordings Pty Ltd, P 2005 Vicious, a division of Vicious Recordings Pty Ltd
9377:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
9378:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
9379: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
            main_genre
                <char>
   1: Dance/Electronic
   2:              Pop
   3:              Pop
   4:             Rock
   5:             Rock
  ---                 
9375:              Pop
9376: Dance/Electronic
9377:              Pop
9378:              Pop
9379:              Pop
# Select tracks by genre
spotify_dt[main_genre %in% c("Rock", "Pop")]
                                                      track_name
                                                          <char>
   1:                            I Know You Want Me (Calle Ocho)
   2:                         From the Bottom of My Broken Heart
   3:                           Apeman - 2014 Remastered Version
   4:                         You Can't Always Get What You Want
   5:                                 Don't Stop - 2004 Remaster
  ---                                                           
8103:                                         Baby Don't Hurt Me
8104:                               Kernkraft 400 (A Better Day)
8105: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
8106:                                                    Lay Low
8107:                                                Padam Padam
                            artist_name_s
                                   <char>
   1:                             Pitbull
   2:                      Britney Spears
   3:                           The Kinks
   4:                  The Rolling Stones
   5:                       Fleetwood Mac
  ---                                    
8103: David Guetta, Anne-Marie, Coi Leray
8104:                          Topic, A7S
8105:        Spiller, Sophie Ellis-Bextor
8106:                              Tiësto
8107:                       Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                         Pitbull Starring In Rebelution
   2:                         ...Baby One More Time (Digital Deluxe Version)
   3: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   4:                                                           Let It Bleed
   5:                                                                Rumours
  ---                                                                       
8103:                                                     Baby Don't Hurt Me
8104:                                           Kernkraft 400 (A Better Day)
8105:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
8106:                                                                Lay Low
8107:                                                            Padam Padam
                      album_artist_name_s album_release_date disc_number
                                   <char>             <char>       <int>
   1:                             Pitbull         2009-10-23           1
   2:                      Britney Spears         1999-01-12           1
   3:                           The Kinks         2014-10-20           1
   4:                  The Rolling Stones         1969-12-05           1
   5:                       Fleetwood Mac         1977-02-04           1
  ---                                                                   
8103: David Guetta, Anne-Marie, Coi Leray         2023-04-06           1
8104:                          Topic, A7S         2022-06-17           1
8105:                             Spiller         2000-08-14           1
8106:                              Tiësto         2023-01-06           1
8107:                       Kylie Minogue         2023-05-19           1
      track_number track_duration_ms explicit popularity         isrc
             <int>             <int>   <lgcl>      <int>       <char>
   1:            3            237120    FALSE         64 USJAY0900144
   2:            6            312533    FALSE         56 USJI19910455
   3:           11            233400    FALSE         42 GB5KW1499822
   4:            9            448720    FALSE          0 USA176910100
   5:            4            193346    FALSE         79 USWB10400049
  ---                                                                
8103:            1            140017    FALSE         94 UKWLG2300016
8104:            1            165800    FALSE         79 DECE72201091
8105:            1            227619    FALSE         62 GBCPZ0019728
8106:            1            153442    FALSE         87 NLZ542202348
8107:            1            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2021-08-08 09:26:31
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2022-08-31 00:08:18
  ---                                             
8103: spotify:user:bradnumber1 2023-07-11 05:37:45
8104: spotify:user:bradnumber1 2023-07-11 10:57:25
8105: spotify:user:bradnumber1 2023-07-16 09:39:17
8106: spotify:user:bradnumber1 2023-07-18 22:06:36
8107: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                                             dance pop,miami hip hop,pop
   2:                                                                                                           dance pop,pop
   3: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   4:                                                                           album rock,british invasion,classic rock,rock
   5:                                                                       album rock,classic rock,rock,soft rock,yacht rock
  ---                                                                                                                        
8103:                                          big room,dance pop,edm,pop,pop dance,pop,new jersey underground rap,trap queen
8104:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
8105:                                                                  disco house,vocal house,dance pop,europop,new wave pop
8106:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
8107:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   2:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   3:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   4:        0.319  0.627     0   -9.611     1      0.0687     0.675000
   5:        0.671  0.710     9   -7.724     1      0.0356     0.039300
  ---                                                                  
8103:        0.602  0.910     7   -3.404     1      0.0308     0.001260
8104:        0.623  0.727    11   -5.570     0      0.0562     0.184000
8105:        0.719  0.806     9   -6.802     0      0.0389     0.000132
8106:        0.534  0.855     1   -4.923     0      0.1830     0.060700
8107:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         2.12e-05   0.2370   0.800 127.045              4           NA
   2:         1.01e-06   0.3380   0.706  74.981              4           NA
   3:         5.08e-05   0.0384   0.833  75.311              4           NA
   4:         7.29e-05   0.2890   0.497  85.818              4           NA
   5:         1.12e-05   0.0387   0.834 118.745              4           NA
  ---                                                                      
8103:         1.74e-04   0.1200   0.228 127.944              4           NA
8104:         2.02e-05   0.3090   0.400 125.975              4           NA
8105:         8.89e-02   0.3610   0.626 123.037              4           NA
8106:         2.63e-04   0.3460   0.420 122.060              4           NA
8107:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1: Mr.305/Polo Grounds Music/J Records
   2:                                Jive
   3:                   Sanctuary Records
   4:               Universal Music Group
   5:                Rhino/Warner Records
  ---                                    
8103:                       Parlophone UK
8104:                              Virgin
8105:                    Defected Records
8106:                     Musical Freedom
8107:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   2:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   3:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   4:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
   5:                                                                                                                               C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
  ---                                                                                                                                                                                         
8103:                                                       C Under license to Warner Music UK Limited, © 2023 What A DJ Ltd, P Under license to Warner Music UK Limited, ℗ 2023 What A DJ Ltd
8104:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
8105:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
8106:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
8107: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
      main_genre
          <char>
   1:        Pop
   2:        Pop
   3:       Rock
   4:       Rock
   5:       Rock
  ---           
8103:        Pop
8104:        Pop
8105:        Pop
8106:        Pop
8107:        Pop

✅Now you try:

Selecting Rows (i)

#comment and write your code below:
spotify_dt[danceability > 0.75 & main_genre == "Other"] 
                                track_name       artist_name_s
                                    <char>              <char>
  1:             Together We Are Beautiful         Fern Kinney
  2:                          Ghostbusters      Ray Parker Jr.
  3:                       Don't You Worry             Madasun
  4:                           We Are Done The Madden Brothers
  5:  A Little Bit (Double M’s Radio Edit)             Pandora
 ---                                                          
145:                           Aerial Love        Daniel Johns
146: (Girls Girls Girls Were) Made To Love        Eddie Hodges
147:         Lambada - Original Radio Edit               Kaoma
148:                   Push Up - Main Edit              Creeds
149:                         Kernkraft 400       Zombie Nation
                           album_name album_artist_name_s album_release_date
                               <char>              <char>             <char>
  1:                        Groove Me         Fern Kinney         1979-03-15
  2:                    Greatest Hits      Ray Parker Jr.         1993-10-12
  3:                    The Way It Is             Madasun         2000-01-01
  4:                      We Are Done The Madden Brothers         2014-01-01
  5: Skitzmix 2 (Mixed by Nick Skitz)          Nick Skitz         2014-12-12
 ---                                                                        
145:                             Talk        Daniel Johns         2015-05-22
146:          Rare Oldies But Goodies     Various Artists         2011-01-01
147:                       World Beat               Kaoma               1989
148:              Push Up (Main Edit)              Creeds         2023-03-31
149:       Kernkraft 400 Single Mixes       Zombie Nation         2006-03-07
     disc_number track_number track_duration_ms explicit popularity
           <int>        <int>             <int>   <lgcl>      <int>
  1:           1            5            257213    FALSE         42
  2:           1            1            239133    FALSE         62
  3:           1            1            241720    FALSE         29
  4:           1            1            216467    FALSE          0
  5:           1            2            214306    FALSE         36
 ---                                                               
145:           1            1            214947    FALSE         25
146:           1           25            146893    FALSE          0
147:           1            1            208840    FALSE         62
148:           1            1            139300    FALSE         86
149:           1            1            285173    FALSE         57
                isrc                 added_by            added_at
              <char>                   <char>              <POSc>
  1:    USMR50671008 spotify:user:bradnumber1 2021-08-08 09:26:31
  2:    USAR18400117 spotify:user:bradnumber1 2021-08-08 09:26:31
  3:    GBUM72005603 spotify:user:bradnumber1 2021-08-08 09:26:31
  4:    USUM71406586 spotify:user:bradnumber1 2021-08-08 09:26:31
  5:    AUXL31400786 spotify:user:bradnumber1 2021-08-08 09:26:31
 ---                                                             
145:    AUEL01400059 spotify:user:bradnumber1 2021-08-08 09:26:31
146:    USA371175652 spotify:user:bradnumber1 2021-08-08 09:26:31
147:    FR6V80939969 spotify:user:bradnumber1 2021-08-08 09:26:31
148:    DEE862300564 spotify:user:bradnumber1 2023-07-11 01:16:38
149: DE-Z20-06-00038 spotify:user:bradnumber1 2023-07-11 10:57:16
            artist_genres danceability energy   key loudness  mode speechiness
                   <char>        <num>  <num> <int>    <num> <int>       <num>
  1:                 <NA>        0.761  0.484    10  -14.976     1      0.0381
  2:                 <NA>        0.778  0.719     4   -9.698     1      0.0345
  3:                 <NA>        0.818  0.493     6   -5.701     0      0.0309
  4:                 <NA>        0.780  0.706     1   -8.346     0      0.0321
  5:                 <NA>        0.870  0.817     9   -9.020     1      0.0537
 ---                                                                          
145:                 <NA>        0.763  0.247     7   -9.163     0      0.0421
146: deep adult standards        0.751  0.348     9  -10.542     1      0.0583
147:                 zouk        0.753  0.588     5  -12.454     1      0.0331
148:                 <NA>        0.767  0.830     7   -8.780     1      0.2060
149:        german techno        0.798  0.430     8   -7.839     0      0.0868
     acousticness instrumentalness liveness valence   tempo time_signature
            <num>            <num>    <num>   <num>   <num>          <int>
  1:       0.3660         2.81e-05   0.1890   0.826 110.450              4
  2:       0.0123         2.62e-02   0.2970   0.729 115.400              4
  3:       0.0611         0.00e+00   0.0994   0.840 103.635              4
  4:       0.1390         1.84e-04   0.1060   0.834 121.155              4
  5:       0.1360         1.64e-02   0.0790   0.847 131.994              4
 ---                                                                      
145:       0.4890         3.97e-02   0.1070   0.263  91.999              4
146:       0.2160         0.00e+00   0.0510   0.513  88.941              4
147:       0.2280         1.03e-03   0.0449   0.970 118.915              4
148:       0.2090         8.36e-01   0.0582   0.187  75.023              4
149:       0.0055         9.01e-01   0.1460   0.487 140.064              4
     album_genres                                label
           <lgcl>                               <char>
  1:           NA                       Malaco Records
  2:           NA                        Arista/Legacy
  3:           NA      UMC (Universal Music Catalogue)
  4:           NA EMI Recorded Music Australia Pty Ltd
  5:           NA                            LNG Music
 ---                                                  
145:           NA                               Eleven
146:           NA              Master Classics Records
147:           NA                              Lambada
148:           NA               Columbia/B1 Recordings
149:           NA                          UKW Records
                                                                                                                                             copyrights
                                                                                                                                                 <char>
  1:                                                                                           C 1979 Malaco Records, Inc., P 1979 Malaco Records, Inc.
  2:                                                                                                     P This compilation (P) 2019 Arista Records LLC
  3:                                                                                               C © 2000 V2 Music Limited, P ℗ 2000 V2 Music Limited
  4:                                                                                       C © 2014 Capitol Records, LLC, P ℗ 2014 Capitol Records, LLC
  5:                                                                                                                 C 2014 LNG Music, P 2014 LNG Music
 ---                                                                                                                                                   
145:                                                                 C © 2015 Eleven: A Music Company Pty Ltd, P ℗ 2015 Eleven: A Music Company Pty Ltd
146:                                                                             C (C) 2011 Master Classics Records, P (P) 2011 Master Classics Records
147:                                                                                                       C 1989 KP&P – Lambada, P 1989 KP&P – Lambada
148: P (P) 2023 Rave Alert Records, under exclusive license to Ministry of Sound/Arista Records/B1 Recordings GmbH, a Sony Music Entertainment company.
149:                                                                                                 C 2006 Copyright Control, P 2006 Copyright Control
     main_genre
         <char>
  1:      Other
  2:      Other
  3:      Other
  4:      Other
  5:      Other
 ---           
145:      Other
146:      Other
147:      Other
148:      Other
149:      Other

AS a reminder

Comparison Operators

  • == means “exactly equal to”
  • != means “not equal to”
  • > means “greater than”
  • < means “less than”
  • >= means “greater than or equal to”
  • <= means “less than or equal to”

Logical Operators

  • & means “AND” (both conditions must be true)
  • | means “OR” (at least one condition must be true)
  • ! means “NOT” (reverses the condition)

2.) Column Operations (j)

The j argument lets you select columns and perform computations:

# Select specific columns
spotify_dt[, .(track_name, artist_name_s, popularity)]
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                     artist_name_s popularity
                            <char>      <int>
   1:                      The KLF          0
   2:                      Pitbull         64
   3:               Britney Spears         56
   4:                    The Kinks         42
   5:           The Rolling Stones          0
  ---                                        
9995:                   Topic, A7S         79
9996:                    Vandalism         17
9997: Spiller, Sophie Ellis-Bextor         62
9998:                       Tiësto         87
9999:                Kylie Minogue         69
# Compute summary statistics
spotify_dt[, .(avg_popularity = mean(popularity),
              max_energy = max(energy))]
   avg_popularity max_energy
            <num>      <num>
1:       37.62466         NA
# Multiple computations
spotify_dt[, .(
  track_count = .N,
  avg_duration = mean(track_duration_ms)/1000,
  avg_popularity = mean(popularity)
)]
   track_count avg_duration avg_popularity
         <int>        <num>          <num>
1:        9999      224.815       37.62466
# Select specific columns
spotify_dt[, .(artist_name_s, track_name)]
                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9995:                   Topic, A7S
9996:                    Vandalism
9997: Spiller, Sophie Ellis-Bextor
9998:                       Tiësto
9999:                Kylie Minogue
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
# Create new columns in output
spotify_dt[, .(
  song = track_name,
  artist = artist_name_s,
  popularity_score = popularity/100
)]
                                                            song
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                            artist popularity_score
                            <char>            <num>
   1:                      The KLF             0.00
   2:                      Pitbull             0.64
   3:               Britney Spears             0.56
   4:                    The Kinks             0.42
   5:           The Rolling Stones             0.00
  ---                                              
9995:                   Topic, A7S             0.79
9996:                    Vandalism             0.17
9997: Spiller, Sophie Ellis-Bextor             0.62
9998:                       Tiësto             0.87
9999:                Kylie Minogue             0.69
# Calculate summary statistics
spotify_dt[, .(
  avg_popularity = mean(popularity),
  total_tracks = .N,
  avg_duration = mean(track_duration_ms)/1000  # in seconds
)]
   avg_popularity total_tracks avg_duration
            <num>        <int>        <num>
1:       37.62466         9999      224.815

✅Now you try:

Selecting and computing (i)

#comment and write your code below:
  1. Grouping Operations (by)
# Group by genre
spotify_dt[, .(
  track_count = .N,
  avg_popularity = mean(popularity),
  avg_danceability = mean(danceability)
), by = main_genre]
                main_genre track_count avg_popularity avg_danceability
                    <char>       <int>          <num>            <num>
 1:       Dance/Electronic         254       32.76378        0.6779331
 2:                    Pop        5694       40.27116        0.6243319
 3:                   Rock        2413       37.57563        0.5480435
 4:                Hip Hop         214       32.50935        0.7607477
 5:                  Other         946       26.09831               NA
 6:               R&B/Soul         302       34.93709        0.6557980
 7:             Jazz/Blues          54       22.24074        0.5241852
 8:           Folk/Country          91       34.65934        0.5683407
 9:       Reggae/Caribbean          27       48.22222        0.7564815
10: Classical/Instrumental           4       14.75000        0.5580000
# Multiple grouping variables
spotify_dt[, .(
  track_count = .N,
  avg_popularity = mean(popularity)
), by = .(main_genre, explicit)]
                main_genre explicit track_count avg_popularity
                    <char>   <lgcl>       <int>          <num>
 1:       Dance/Electronic    FALSE         252       32.81746
 2:                    Pop    FALSE        5345       39.74275
 3:                   Rock    FALSE        2363       37.50994
 4:                Hip Hop    FALSE         132       31.15152
 5:                  Other    FALSE         924       25.98377
 6:               R&B/Soul    FALSE         301       35.05316
 7:                    Pop     TRUE         349       48.36390
 8:             Jazz/Blues    FALSE          54       22.24074
 9:                Hip Hop     TRUE          82       34.69512
10:           Folk/Country    FALSE          89       33.40449
11:       Reggae/Caribbean    FALSE          27       48.22222
12:                   Rock     TRUE          50       40.68000
13:                  Other     TRUE          22       30.90909
14: Classical/Instrumental     TRUE           1        0.00000
15:       Dance/Electronic     TRUE           2       26.00000
16: Classical/Instrumental    FALSE           3       19.66667
17:           Folk/Country     TRUE           2       90.50000
18:               R&B/Soul     TRUE           1        0.00000
# Group by genre for popularity analysis
genre_stats <- spotify_dt[, .(
  avg_popularity = mean(popularity, na.rm = TRUE),
  track_count = .N,
  avg_duration = mean(track_duration_ms)/1000
), by = main_genre][]

✅Now you try:

Grouping (i)

#comment and write your code below:

Think of it like a sentence:

i = “Which rows do you want?”

j = “What do you want to do with them?”

by = “How should they be grouped?”

5. Advanced Features

  1. Adding/Updating Columns (:=)

We use the empty brackets at the end to print but you could have also

# Add duration in seconds
spotify_dt[, duration_sec := track_duration_ms/1000][]
Index: <main_genre>
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9995:                   Topic, A7S
9996:                    Vandalism
9997: Spiller, Sophie Ellis-Bextor
9998:                       Tiësto
9999:                Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                                       Songs Collection
   2:                                         Pitbull Starring In Rebelution
   3:                         ...Baby One More Time (Digital Deluxe Version)
   4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   5:                                                           Let It Bleed
  ---                                                                       
9995:                                           Kernkraft 400 (A Better Day)
9996:                                                        Never Say Never
9997:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                                Lay Low
9999:                                                            Padam Padam
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:             The KLF         1992-08-03           1            3
   2:             Pitbull         2009-10-23           1            3
   3:      Britney Spears         1999-01-12           1            6
   4:           The Kinks         2014-10-20           1           11
   5:  The Rolling Stones         1969-12-05           1            9
  ---                                                                
9995:          Topic, A7S         2022-06-17           1            1
9996:           Vandalism         2005-10-24           1            1
9997:             Spiller         2000-08-14           1            1
9998:              Tiësto         2023-01-06           1            1
9999:       Kylie Minogue         2023-05-19           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            216270    FALSE          0 QMARG1760056
   2:            237120    FALSE         64 USJAY0900144
   3:            312533    FALSE         56 USJI19910455
   4:            233400    FALSE         42 GB5KW1499822
   5:            448720    FALSE          0 USA176910100
  ---                                                   
9995:            165800    FALSE         79 DECE72201091
9996:            176640    FALSE         17 AUVC00503711
9997:            227619    FALSE         62 GBCPZ0019728
9998:            153442    FALSE         87 NLZ542202348
9999:            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2020-03-05 09:20:39
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
9995: spotify:user:bradnumber1 2023-07-11 10:57:25
9996: spotify:user:bradnumber1 2023-07-16 09:38:19
9997: spotify:user:bradnumber1 2023-07-16 09:39:17
9998: spotify:user:bradnumber1 2023-07-18 22:06:36
9999: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                             acid house,ambient house,big beat,hip house
   2:                                                                                             dance pop,miami hip hop,pop
   3:                                                                                                           dance pop,pop
   4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   5:                                                                           album rock,british invasion,classic rock,rock
  ---                                                                                                                        
9995:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
9996:                                                                                       australian dance,melbourne bounce
9997:                                                                  disco house,vocal house,dance pop,europop,new wave pop
9998:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
9999:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.617  0.872     8  -12.305     1      0.0480     0.015800
   2:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   3:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   4:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   5:        0.319  0.627     0   -9.611     1      0.0687     0.675000
  ---                                                                  
9995:        0.623  0.727    11   -5.570     0      0.0562     0.184000
9996:        0.720  0.841     9   -6.373     1      0.0340     0.000354
9997:        0.719  0.806     9   -6.802     0      0.0389     0.000132
9998:        0.534  0.855     1   -4.923     0      0.1830     0.060700
9999:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-01   0.4080   0.504 111.458              4           NA
   2:         2.12e-05   0.2370   0.800 127.045              4           NA
   3:         1.01e-06   0.3380   0.706  74.981              4           NA
   4:         5.08e-05   0.0384   0.833  75.311              4           NA
   5:         7.29e-05   0.2890   0.497  85.818              4           NA
  ---                                                                      
9995:         2.02e-05   0.3090   0.400 125.975              4           NA
9996:         1.12e-02   0.3380   0.767 130.978              4           NA
9997:         8.89e-02   0.3610   0.626 123.037              4           NA
9998:         2.63e-04   0.3460   0.420 122.060              4           NA
9999:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1:                 Jams Communications
   2: Mr.305/Polo Grounds Music/J Records
   3:                                Jive
   4:                   Sanctuary Records
   5:               Universal Music Group
  ---                                    
9995:                              Virgin
9996:                             Vicious
9997:                    Defected Records
9998:                     Musical Freedom
9999:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                                     C 1992 Copyright Control, P 1992 Jams Communications
   2:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   3:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   4:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   5:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
  ---                                                                                                                                                                                         
9995:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
9996:                                                                       C 2005 Vicious, a division of Vicious Recordings Pty Ltd, P 2005 Vicious, a division of Vicious Recordings Pty Ltd
9997:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
9998:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
9999: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
            main_genre duration_sec
                <char>        <num>
   1: Dance/Electronic      216.270
   2:              Pop      237.120
   3:              Pop      312.533
   4:             Rock      233.400
   5:             Rock      448.720
  ---                              
9995:              Pop      165.800
9996: Dance/Electronic      176.640
9997:              Pop      227.619
9998:              Pop      153.442
9999:              Pop      166.266
# Multiple column updates
spotify_dt[, ':='(
  popularity_scaled = popularity/100,
  is_popular = popularity > 75
)]

spotify_dt
Index: <main_genre>
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9995:                   Topic, A7S
9996:                    Vandalism
9997: Spiller, Sophie Ellis-Bextor
9998:                       Tiësto
9999:                Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                                       Songs Collection
   2:                                         Pitbull Starring In Rebelution
   3:                         ...Baby One More Time (Digital Deluxe Version)
   4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   5:                                                           Let It Bleed
  ---                                                                       
9995:                                           Kernkraft 400 (A Better Day)
9996:                                                        Never Say Never
9997:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                                Lay Low
9999:                                                            Padam Padam
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:             The KLF         1992-08-03           1            3
   2:             Pitbull         2009-10-23           1            3
   3:      Britney Spears         1999-01-12           1            6
   4:           The Kinks         2014-10-20           1           11
   5:  The Rolling Stones         1969-12-05           1            9
  ---                                                                
9995:          Topic, A7S         2022-06-17           1            1
9996:           Vandalism         2005-10-24           1            1
9997:             Spiller         2000-08-14           1            1
9998:              Tiësto         2023-01-06           1            1
9999:       Kylie Minogue         2023-05-19           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            216270    FALSE          0 QMARG1760056
   2:            237120    FALSE         64 USJAY0900144
   3:            312533    FALSE         56 USJI19910455
   4:            233400    FALSE         42 GB5KW1499822
   5:            448720    FALSE          0 USA176910100
  ---                                                   
9995:            165800    FALSE         79 DECE72201091
9996:            176640    FALSE         17 AUVC00503711
9997:            227619    FALSE         62 GBCPZ0019728
9998:            153442    FALSE         87 NLZ542202348
9999:            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2020-03-05 09:20:39
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
9995: spotify:user:bradnumber1 2023-07-11 10:57:25
9996: spotify:user:bradnumber1 2023-07-16 09:38:19
9997: spotify:user:bradnumber1 2023-07-16 09:39:17
9998: spotify:user:bradnumber1 2023-07-18 22:06:36
9999: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                             acid house,ambient house,big beat,hip house
   2:                                                                                             dance pop,miami hip hop,pop
   3:                                                                                                           dance pop,pop
   4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   5:                                                                           album rock,british invasion,classic rock,rock
  ---                                                                                                                        
9995:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
9996:                                                                                       australian dance,melbourne bounce
9997:                                                                  disco house,vocal house,dance pop,europop,new wave pop
9998:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
9999:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.617  0.872     8  -12.305     1      0.0480     0.015800
   2:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   3:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   4:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   5:        0.319  0.627     0   -9.611     1      0.0687     0.675000
  ---                                                                  
9995:        0.623  0.727    11   -5.570     0      0.0562     0.184000
9996:        0.720  0.841     9   -6.373     1      0.0340     0.000354
9997:        0.719  0.806     9   -6.802     0      0.0389     0.000132
9998:        0.534  0.855     1   -4.923     0      0.1830     0.060700
9999:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-01   0.4080   0.504 111.458              4           NA
   2:         2.12e-05   0.2370   0.800 127.045              4           NA
   3:         1.01e-06   0.3380   0.706  74.981              4           NA
   4:         5.08e-05   0.0384   0.833  75.311              4           NA
   5:         7.29e-05   0.2890   0.497  85.818              4           NA
  ---                                                                      
9995:         2.02e-05   0.3090   0.400 125.975              4           NA
9996:         1.12e-02   0.3380   0.767 130.978              4           NA
9997:         8.89e-02   0.3610   0.626 123.037              4           NA
9998:         2.63e-04   0.3460   0.420 122.060              4           NA
9999:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1:                 Jams Communications
   2: Mr.305/Polo Grounds Music/J Records
   3:                                Jive
   4:                   Sanctuary Records
   5:               Universal Music Group
  ---                                    
9995:                              Virgin
9996:                             Vicious
9997:                    Defected Records
9998:                     Musical Freedom
9999:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                                     C 1992 Copyright Control, P 1992 Jams Communications
   2:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   3:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   4:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   5:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
  ---                                                                                                                                                                                         
9995:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
9996:                                                                       C 2005 Vicious, a division of Vicious Recordings Pty Ltd, P 2005 Vicious, a division of Vicious Recordings Pty Ltd
9997:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
9998:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
9999: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
            main_genre duration_sec popularity_scaled is_popular
                <char>        <num>             <num>     <lgcl>
   1: Dance/Electronic      216.270              0.00      FALSE
   2:              Pop      237.120              0.64      FALSE
   3:              Pop      312.533              0.56      FALSE
   4:             Rock      233.400              0.42      FALSE
   5:             Rock      448.720              0.00      FALSE
  ---                                                           
9995:              Pop      165.800              0.79       TRUE
9996: Dance/Electronic      176.640              0.17      FALSE
9997:              Pop      227.619              0.62      FALSE
9998:              Pop      153.442              0.87       TRUE
9999:              Pop      166.266              0.69      FALSE

✅Now you try:

#comment and write your code below:

# Create new columns
spotify_dt[, `:=`(
  is_popular = popularity > 75,
  loudness_energy_ratio = loudness / energy
)]

# Verify new columns
names(spotify_dt) 
 [1] "track_name"            "artist_name_s"         "album_name"           
 [4] "album_artist_name_s"   "album_release_date"    "disc_number"          
 [7] "track_number"          "track_duration_ms"     "explicit"             
[10] "popularity"            "isrc"                  "added_by"             
[13] "added_at"              "artist_genres"         "danceability"         
[16] "energy"                "key"                   "loudness"             
[19] "mode"                  "speechiness"           "acousticness"         
[22] "instrumentalness"      "liveness"              "valence"              
[25] "tempo"                 "time_signature"        "album_genres"         
[28] "label"                 "copyrights"            "main_genre"           
[31] "duration_sec"          "popularity_scaled"     "is_popular"           
[34] "loudness_energy_ratio"
#from data.table
spotify_dt[, names(.SD)]
 [1] "track_name"            "artist_name_s"         "album_name"           
 [4] "album_artist_name_s"   "album_release_date"    "disc_number"          
 [7] "track_number"          "track_duration_ms"     "explicit"             
[10] "popularity"            "isrc"                  "added_by"             
[13] "added_at"              "artist_genres"         "danceability"         
[16] "energy"                "key"                   "loudness"             
[19] "mode"                  "speechiness"           "acousticness"         
[22] "instrumentalness"      "liveness"              "valence"              
[25] "tempo"                 "time_signature"        "album_genres"         
[28] "label"                 "copyrights"            "main_genre"           
[31] "duration_sec"          "popularity_scaled"     "is_popular"           
[34] "loudness_energy_ratio"

Working with Dates

Here’s how to handle dates in data.table:

# Load lubridate
library(lubridate)

# Convert and extract date components
spotify_dt <- spotify_dt[, `:=`(
  release_date = as.Date(album_release_date),
  release_year = year(as.Date(album_release_date)),
  release_month = month(as.Date(album_release_date)),
  release_day = day(as.Date(album_release_date))
)]

# View the new date columns
spotify_dt[, .(release_date, release_year, release_month, release_day)][1:5]
   release_date release_year release_month release_day
         <Date>        <num>         <num>       <int>
1:   1992-08-03         1992             8           3
2:   2009-10-23         2009            10          23
3:   1999-01-12         1999             1          12
4:   2014-10-20         2014            10          20
5:   1969-12-05         1969            12           5
class(spotify_dt$album_release_date)
[1] "character"

Ordering Results

Data.table provides efficient ways to sort:

# Sort genres by popularity (descending)
genre_stats[order(-avg_popularity)]
                main_genre avg_popularity track_count avg_duration
                    <char>          <num>       <int>        <num>
 1:       Reggae/Caribbean       48.22222          27     210.8963
 2:                    Pop       40.27116        5694     223.3444
 3:                   Rock       37.57563        2413     233.6300
 4:               R&B/Soul       34.93709         302     227.6889
 5:           Folk/Country       34.65934          91     205.5994
 6:       Dance/Electronic       32.76378         254     246.2988
 7:                Hip Hop       32.50935         214     232.4100
 8:                  Other       26.09831         946     208.5390
 9:             Jazz/Blues       22.24074          54     168.0612
10: Classical/Instrumental       14.75000           4     159.5265
# Get top 5 most popular genres
genre_stats[order(-avg_popularity)][1:5]
         main_genre avg_popularity track_count avg_duration
             <char>          <num>       <int>        <num>
1: Reggae/Caribbean       48.22222          27     210.8963
2:              Pop       40.27116        5694     223.3444
3:             Rock       37.57563        2413     233.6300
4:         R&B/Soul       34.93709         302     227.6889
5:     Folk/Country       34.65934          91     205.5994
year_genre_stats <- spotify_dt[, .(
    avg_popularity = mean(popularity),
    track_count = .N,
    avg_energy = mean(energy)
), by = .(release_year)]

# Sort by multiple columns
year_genre_stats[order(release_year, -avg_popularity)]
    release_year avg_popularity track_count avg_energy
           <num>          <num>       <int>      <num>
 1:         1956       68.00000           1  0.6600000
 2:         1958       55.33333           6  0.5676667
 3:         1959       35.00000           2  0.2385000
 4:         1960       61.00000           2  0.8260000
 5:         1961       24.16667           6  0.6656667
 6:         1962       33.57895          19  0.5368947
 7:         1963       32.13333          45  0.5427844
 8:         1964       40.00000          46  0.6658696
 9:         1965       52.51613          31  0.5777097
10:         1966       33.30233          43  0.6424419
11:         1967       44.56250          48  0.5366667
12:         1968       43.87805          41  0.5942927
13:         1969       48.48889          45  0.5497111
14:         1970       50.62500          56  0.5608393
15:         1971       42.90909          44  0.6505000
16:         1972       42.32500          40  0.5486250
17:         1973       49.07273          55  0.5865455
18:         1974       38.13333          30  0.6115667
19:         1975       43.42500          40  0.6191250
20:         1976       50.14634          41  0.6497317
21:         1977       50.55172          58  0.6022759
22:         1978       48.56604          53  0.6504528
23:         1979       44.45283          53  0.6116604
24:         1980       51.33333          45  0.6395556
25:         1981       51.20000          45  0.6677111
26:         1982       47.80851          47  0.6705745
27:         1983       42.55357          56  0.7012679
28:         1984       48.64815          54  0.6755185
29:         1985       40.46154          65  0.7196308
30:         1986       50.05556          72  0.6955417
31:         1987       36.12844         109  0.6350550
32:         1988       40.35294          85  0.6454235
33:         1989       44.08974          78  0.6550231
34:         1990       42.80247          81  0.6516914
35:         1991       38.70787          89  0.6743933
36:         1992       37.28409          88  0.6474659
37:         1993       38.81609          87  0.6679770
38:         1994       39.17073          82  0.6669024
39:         1995       37.00781         128  0.6582813
40:         1996       37.84211         114  0.6303070
41:         1997       39.68217         129  0.7040884
42:         1998       36.08088         136  0.7423088
43:         1999       43.38400         125  0.7390288
44:         2000       36.28859         149  0.7102148
45:         2001       37.68280         186  0.7078441
46:         2002       37.61765         170  0.6864525
47:         2003       36.37500         176  0.7010170
48:         2004       38.66857         175  0.7082629
49:         2005       30.91250         240  0.7182292
50:         2006       33.20321         187  0.6895882
51:         2007       32.03167         221  0.7046335
52:         2008       32.25574         305  0.7465705
53:         2009       31.95455         374  0.7561578
54:         2010       32.20896         335  0.7757433
55:         2011       33.42559         383  0.7296240
56:         2012       35.45299         351  0.7361425
57:         2013       31.26780         295  0.7019861
58:         2014       32.59850         401  0.6773738
59:         2015       32.12250         400  0.6740375
60:         2016       33.14550         378  0.6490534
61:         2017       36.43391         348  0.6564569
62:         2018       43.81609         261  0.6452682
63:         2019       48.44550         211  0.6600806
64:         2020       49.54688         256  0.6631406
65:         2021       41.00966         207  0.6741739
66:         2022       68.91667          60  0.7245000
67:         2023       76.78788          33  0.6893333
68:           NA       36.23457        1377         NA
    release_year avg_popularity track_count avg_energy

🔍 Practice Exercise:

Using the Spotify dataset, create an analysis that:

  1. Filters for songs after 2010

  2. Calculates average audio features by genre

  3. Identifies the top 3 genres by popularity

# solution here
spotify_dt[year(as.Date(album_release_date)) >= 2010,
          .(avg_popularity = mean(popularity),
            avg_danceability = mean(danceability),
            avg_energy = mean(energy),
            track_count = .N),
          by = main_genre
          ][order(-avg_popularity)
          ][1:3]
         main_genre avg_popularity avg_danceability avg_energy track_count
             <char>          <num>            <num>      <num>       <int>
1: Reggae/Caribbean       43.57143        0.7291429  0.7142857           7
2:              Pop       42.13373        0.6329859  0.6930078        2834
3:          Hip Hop       37.21154        0.7300096  0.7036635         104

Ordering Results

Data.table provides efficient ways to sort:

# Sort genres by popularity (descending)
genre_stats[order(-avg_popularity)]
                main_genre avg_popularity track_count avg_duration
                    <char>          <num>       <int>        <num>
 1:       Reggae/Caribbean       48.22222          27     210.8963
 2:                    Pop       40.27116        5694     223.3444
 3:                   Rock       37.57563        2413     233.6300
 4:               R&B/Soul       34.93709         302     227.6889
 5:           Folk/Country       34.65934          91     205.5994
 6:       Dance/Electronic       32.76378         254     246.2988
 7:                Hip Hop       32.50935         214     232.4100
 8:                  Other       26.09831         946     208.5390
 9:             Jazz/Blues       22.24074          54     168.0612
10: Classical/Instrumental       14.75000           4     159.5265
# Sort by multiple columns
year_genre_stats[order(release_year, -avg_popularity)]
    release_year avg_popularity track_count avg_energy
           <num>          <num>       <int>      <num>
 1:         1956       68.00000           1  0.6600000
 2:         1958       55.33333           6  0.5676667
 3:         1959       35.00000           2  0.2385000
 4:         1960       61.00000           2  0.8260000
 5:         1961       24.16667           6  0.6656667
 6:         1962       33.57895          19  0.5368947
 7:         1963       32.13333          45  0.5427844
 8:         1964       40.00000          46  0.6658696
 9:         1965       52.51613          31  0.5777097
10:         1966       33.30233          43  0.6424419
11:         1967       44.56250          48  0.5366667
12:         1968       43.87805          41  0.5942927
13:         1969       48.48889          45  0.5497111
14:         1970       50.62500          56  0.5608393
15:         1971       42.90909          44  0.6505000
16:         1972       42.32500          40  0.5486250
17:         1973       49.07273          55  0.5865455
18:         1974       38.13333          30  0.6115667
19:         1975       43.42500          40  0.6191250
20:         1976       50.14634          41  0.6497317
21:         1977       50.55172          58  0.6022759
22:         1978       48.56604          53  0.6504528
23:         1979       44.45283          53  0.6116604
24:         1980       51.33333          45  0.6395556
25:         1981       51.20000          45  0.6677111
26:         1982       47.80851          47  0.6705745
27:         1983       42.55357          56  0.7012679
28:         1984       48.64815          54  0.6755185
29:         1985       40.46154          65  0.7196308
30:         1986       50.05556          72  0.6955417
31:         1987       36.12844         109  0.6350550
32:         1988       40.35294          85  0.6454235
33:         1989       44.08974          78  0.6550231
34:         1990       42.80247          81  0.6516914
35:         1991       38.70787          89  0.6743933
36:         1992       37.28409          88  0.6474659
37:         1993       38.81609          87  0.6679770
38:         1994       39.17073          82  0.6669024
39:         1995       37.00781         128  0.6582813
40:         1996       37.84211         114  0.6303070
41:         1997       39.68217         129  0.7040884
42:         1998       36.08088         136  0.7423088
43:         1999       43.38400         125  0.7390288
44:         2000       36.28859         149  0.7102148
45:         2001       37.68280         186  0.7078441
46:         2002       37.61765         170  0.6864525
47:         2003       36.37500         176  0.7010170
48:         2004       38.66857         175  0.7082629
49:         2005       30.91250         240  0.7182292
50:         2006       33.20321         187  0.6895882
51:         2007       32.03167         221  0.7046335
52:         2008       32.25574         305  0.7465705
53:         2009       31.95455         374  0.7561578
54:         2010       32.20896         335  0.7757433
55:         2011       33.42559         383  0.7296240
56:         2012       35.45299         351  0.7361425
57:         2013       31.26780         295  0.7019861
58:         2014       32.59850         401  0.6773738
59:         2015       32.12250         400  0.6740375
60:         2016       33.14550         378  0.6490534
61:         2017       36.43391         348  0.6564569
62:         2018       43.81609         261  0.6452682
63:         2019       48.44550         211  0.6600806
64:         2020       49.54688         256  0.6631406
65:         2021       41.00966         207  0.6741739
66:         2022       68.91667          60  0.7245000
67:         2023       76.78788          33  0.6893333
68:           NA       36.23457        1377         NA
    release_year avg_popularity track_count avg_energy
# Get top 5 most popular genres
genre_stats[order(-avg_popularity)][1:5]
         main_genre avg_popularity track_count avg_duration
             <char>          <num>       <int>        <num>
1: Reggae/Caribbean       48.22222          27     210.8963
2:              Pop       40.27116        5694     223.3444
3:             Rock       37.57563        2413     233.6300
4:         R&B/Soul       34.93709         302     227.6889
5:     Folk/Country       34.65934          91     205.5994

Chaining Operations

Data.table allows you to chain operations efficiently:

# Complex analysis chain
spotify_dt[popularity > 70
          ][, .(avg_energy = mean(energy),
                track_count = .N),
            by = main_genre
          ][order(-avg_energy)]
         main_genre avg_energy track_count
             <char>      <num>       <int>
1: Dance/Electronic  0.7640625          16
2:              Pop  0.6937229        1065
3:             Rock  0.6842514         395
4: Reggae/Caribbean  0.6568333           6
5:          Hip Hop  0.6486471          34
6:         R&B/Soul  0.6468684          38
7:            Other  0.6176429          28
8:     Folk/Country  0.5845833          12
  1. Special Symbols
  • .N: Counts rows (like n() in tidyverse)
  • .SD: Subset of data (useful for applying functions to multiple columns)
  • .(): Create a list (like select() or for multiple aggregations)
# Use .SD for multiple column operations
spotify_dt[, lapply(.SD, mean), 
          .SDcols = c("danceability", "energy", "valence"),
          by = main_genre]
                main_genre danceability    energy   valence
                    <char>        <num>     <num>     <num>
 1:       Dance/Electronic    0.6779331 0.8231969 0.5845252
 2:                    Pop    0.6243319 0.6954419 0.5665183
 3:                   Rock    0.5480435 0.6759753 0.5957726
 4:                Hip Hop    0.7607477 0.7146449 0.6088201
 5:                  Other           NA        NA        NA
 6:               R&B/Soul    0.6557980 0.6104570 0.6947848
 7:             Jazz/Blues    0.5241852 0.5895185 0.7212222
 8:           Folk/Country    0.5683407 0.5402066 0.5439121
 9:       Reggae/Caribbean    0.7564815 0.6411481 0.7690000
10: Classical/Instrumental    0.5580000 0.5570000 0.6500000

Performance Tips (Extra)

  1. set key columns for faster subsetting:

# Set key for faster lookups setkey(spotify_dt, main_genre) # Fast subset using key spotify_dt["Rock"]

# Set key for faster lookups 

setkey(spotify_dt, main_genre)  

# Fast subset using key 

spotify_dt["Rock"]
Key: <main_genre>
                                                   track_name
                                                       <char>
   1:                        Apeman - 2014 Remastered Version
   2:                      You Can't Always Get What You Want
   3:                              Don't Stop - 2004 Remaster
   4: Something About The Way You Look Tonight - Edit Version
   5:                                           Juke Box Hero
  ---                                                        
2409:                                     Blowin' in the Wind
2410:                                           She's My Baby
2411:                      Yellow Submarine - Remastered 2009
2412:                                     When You Were Young
2413:                                     Prisoner of Society
           artist_name_s
                  <char>
   1:          The Kinks
   2: The Rolling Stones
   3:      Fleetwood Mac
   4:         Elton John
   5:          Foreigner
  ---                   
2409:          Bob Dylan
2410:     Johnny O'Keefe
2411:        The Beatles
2412:        The Killers
2413:     The Living End
                                                                  album_name
                                                                      <char>
   1: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   2:                                                           Let It Bleed
   3:                                                                Rumours
   4:                          Candle In The Wind 1997 / Something About ...
   5:                                                           4 (Expanded)
  ---                                                                       
2409:                                             The Freewheelin' Bob Dylan
2410:                                            The Very Best Of (Standard)
2411:                                                  Revolver (Remastered)
2412:                                                             Sam's Town
2413:                                                         The Living End
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:           The Kinks         2014-10-20           1           11
   2:  The Rolling Stones         1969-12-05           1            9
   3:       Fleetwood Mac         1977-02-04           1            4
   4:          Elton John         1997-01-01           1            1
   5:           Foreigner               1981           1            2
  ---                                                                
2409:           Bob Dylan         1963-05-27           1            1
2410:      Johnny O'Keefe         2008-08-23           1            4
2411:         The Beatles         1966-08-05           1            6
2412:         The Killers         2006-10-03           1            3
2413:      The Living End         1999-01-15           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            233400    FALSE         42 GB5KW1499822
   2:            448720    FALSE          0 USA176910100
   3:            193346    FALSE         79 USWB10400049
   4:            240546    FALSE         61 GBAMS9700013
   5:            259800    FALSE         74 USAT20803006
  ---                                                   
2409:            165426    FALSE         71 USSM19900534
2410:            125226    FALSE         16 AUWA00800460
2411:            158880    FALSE         61 GBAYE0601498
2412:            220426    FALSE          1 USUM70605164
2413:            229066    FALSE         47 AUMPO9800004
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2021-08-08 09:26:31
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2022-08-31 00:08:18
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
2409: spotify:user:bradnumber1 2021-08-08 09:26:31
2410: spotify:user:bradnumber1 2021-08-08 09:26:31
2411: spotify:user:bradnumber1 2021-08-08 09:26:31
2412: spotify:user:bradnumber1 2021-08-08 09:26:31
2413: spotify:user:bradnumber1 2021-08-08 09:26:31
                                                                                                                artist_genres
                                                                                                                       <char>
   1: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   2:                                                                           album rock,british invasion,classic rock,rock
   3:                                                                       album rock,classic rock,rock,soft rock,yacht rock
   4:                                                                                   glam rock,mellow gold,piano rock,rock
   5:                                  album rock,classic rock,glam metal,hard rock,heartland rock,mellow gold,rock,soft rock
  ---                                                                                                                        
2409:                                              classic rock,country rock,folk,folk rock,rock,roots rock,singer-songwriter
2410:                                                                                                         australian rock
2411:                                               beatlesque,british invasion,classic rock,merseybeat,psychedelic rock,rock
2412:                                                             alternative rock,dance rock,modern rock,permanent wave,rock
2413:                                                              australian alternative rock,australian rock,australian ska
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.683 0.7280     9   -8.920     1      0.2590     0.568000
   2:        0.319 0.6270     0   -9.611     1      0.0687     0.675000
   3:        0.671 0.7100     9   -7.724     1      0.0356     0.039300
   4:        0.480 0.6280     6   -7.643     1      0.0262     0.174000
   5:        0.357 0.6530     9   -5.554     1      0.0654     0.082800
  ---                                                                  
2409:        0.380 0.0993     2  -20.567     1      0.0509     0.914000
2410:        0.582 0.7720     2   -5.028     1      0.0332     0.691000
2411:        0.604 0.5490     1   -9.873     1      0.0389     0.531000
2412:        0.467 0.9880    11   -3.313     1      0.1120     0.000152
2413:        0.275 0.9930     9   -5.474     0      0.1380     0.000186
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         5.08e-05   0.0384   0.833  75.311              4           NA
   2:         7.29e-05   0.2890   0.497  85.818              4           NA
   3:         1.12e-05   0.0387   0.834 118.745              4           NA
   4:         3.28e-05   0.0753   0.541 143.412              4           NA
   5:         0.00e+00   0.0844   0.522 176.647              4           NA
  ---                                                                      
2409:         2.32e-05   0.0605   0.440 174.874              4           NA
2410:         1.75e-04   0.1240   0.731 131.613              4           NA
2411:         0.00e+00   0.4380   0.696 111.398              4           NA
2412:         4.84e-02   0.2800   0.321 130.435              4           NA
2413:         0.00e+00   0.3020   0.190 148.725              4           NA
                      label
                     <char>
   1:     Sanctuary Records
   2: Universal Music Group
   3:  Rhino/Warner Records
   4:                   EMI
   5:        Rhino Atlantic
  ---                      
2409:              Columbia
2410:          WM Australia
2411:         EMI Catalogue
2412: Universal Music Group
2413:               Reprise
                                                                                                                                       copyrights
                                                                                                                                           <char>
   1:                                  C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   2:                                                                    C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
   3:                                                                                  C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
   4:                                                         C © 1997 Mercury Records Limited, P This Compilation ℗ 1997 Mercury Records Limited
   5: C © 2002 Atlantic Recording Corp., marketed by Rhino Entertainment Company, a Warner Music Group company, P ℗ 2002 Atlantic Recording Corp.
  ---                                                                                                                                            
2409:                                                                                    P Originally Released 1963 Sony Music Entertainment Inc.
2410:                                                    C © 2008 Warner Music Australia Pty Limited, P ℗ 2008 Warner Music Australia Pty Limited
2411:                                   C © 2015 Apple Corps Ltd., P ℗ 2015 Calderstone Productions Limited (a division of Universal Music Group)
2412:                                                            C © 2006 The Island Def Jam Music Group, P ℗ 2006 The Island Def Jam Music Group
2413:                                                                                    C © 1999 Reprise Records, P ℗ 1998, 1999 Reprise Records
      main_genre duration_sec popularity_scaled is_popular
          <char>        <num>             <num>     <lgcl>
   1:       Rock      233.400              0.42      FALSE
   2:       Rock      448.720              0.00      FALSE
   3:       Rock      193.346              0.79       TRUE
   4:       Rock      240.546              0.61      FALSE
   5:       Rock      259.800              0.74      FALSE
  ---                                                     
2409:       Rock      165.426              0.71      FALSE
2410:       Rock      125.226              0.16      FALSE
2411:       Rock      158.880              0.61      FALSE
2412:       Rock      220.426              0.01      FALSE
2413:       Rock      229.066              0.47      FALSE
      loudness_energy_ratio release_date release_year release_month release_day
                      <num>       <Date>        <num>         <num>       <int>
   1:            -12.252747   2014-10-20         2014            10          20
   2:            -15.328549   1969-12-05         1969            12           5
   3:            -10.878873   1977-02-04         1977             2           4
   4:            -12.170382   1997-01-01         1997             1           1
   5:             -8.505360         <NA>           NA            NA          NA
  ---                                                                          
2409:           -207.119839   1963-05-27         1963             5          27
2410:             -6.512953   2008-08-23         2008             8          23
2411:            -17.983607   1966-08-05         1966             8           5
2412:             -3.353239   2006-10-03         2006            10           3
2413:             -5.512588   1999-01-15         1999             1          15
  1. Avoid copies with :=:
# More efficient than creating new objects 
spotify_dt[, efficiency_ratio := energy/danceability]
  1. Use .SDcols for column operations:
# Efficient multiple column operations 
audio_features <- c("danceability", "energy", "valence") 

spotify_dt[, lapply(.SD, mean), .SDcols = audio_features, by = main_genre]
Key: <main_genre>
                main_genre danceability    energy   valence
                    <char>        <num>     <num>     <num>
 1: Classical/Instrumental    0.5580000 0.5570000 0.6500000
 2:       Dance/Electronic    0.6779331 0.8231969 0.5845252
 3:           Folk/Country    0.5683407 0.5402066 0.5439121
 4:                Hip Hop    0.7607477 0.7146449 0.6088201
 5:             Jazz/Blues    0.5241852 0.5895185 0.7212222
 6:                  Other           NA        NA        NA
 7:                    Pop    0.6243319 0.6954419 0.5665183
 8:               R&B/Soul    0.6557980 0.6104570 0.6947848
 9:       Reggae/Caribbean    0.7564815 0.6411481 0.7690000
10:                   Rock    0.5480435 0.6759753 0.5957726

Watch out for these common mistakes:

  • Forgetting the .() when selecting multiple columns
  • Using $ instead of := for assignments
  • Forgetting to add [] at the end when you want to see results

Remember:

  • tidyverse is great for readability and smaller datasets
  • data.table shines with big data and performance needs
  • Both have their place in your R toolkit

🎊🎉 Congratulations you finished week 4 and now on to a mini project analysis using data.table functions.

💡 Mini-Project Guidelines:

Create a comprehensive analysis using data.table that:

Demonstrates at least three different data.table operations Includes performance comparisons with equivalent dplyr operations Creates insightful visualizations from the results

Remember to:

  • Document your code

  • Explain your analysis choices

  • Consider performance implications

  • Think about scalability

Want more….

  • Explore more advanced data.table features

Prompt for Chat GPT/Claude/Gemini

You are a data scientist using exploring a large data set using exploratory data analysis principles. Your task is to create code in R using data.table functions to …. {Add what you want it to do}

Analysis: Who are the most popular artists (top 5) in their respective categories - such as Pop, Rock, and Other

Prompt for ChatGPT: You are a data scientist using exploring a large data set using exploratory data analysis principles. Your task is to create code in R using data.table functions to find out which artists have the most danceability.

#load the dataset
spotify_dt <- fread("week_4/data/spotify_clean.csv")

#compute average danceability by artist, trach count is more than 2
artist_dance <- spotify_dt[, .(
  track_count = .N,
  avg_danceability = mean(danceability)
), by = artist_name_s] [track_count > 10]

#filter out artist with more than 0.75 danceability
artist_dance[avg_danceability >= 0.80]
Empty data.table (0 rows and 3 cols): artist_name_s,track_count,avg_danceability
#sort the artists in descending order to get moset danceable first
artist_danceability <- artist_dance[order(-avg_danceability)]

#display the top 10 artists
print(head(artist_danceability, 10))
        artist_name_s track_count avg_danceability
               <char>       <int>            <num>
 1:            Eminem          23        0.7915217
 2:            Prince          15        0.7909333
 3:       Nicki Minaj          11        0.7731818
 4:   Michael Jackson          21        0.7664286
 5:   Black Eyed Peas          28        0.7599286
 6:     Janet Jackson          14        0.7599286
 7:    Jennifer Lopez          13        0.7559231
 8: Justin Timberlake          13        0.7335385
 9:    Britney Spears          27        0.7272963
10:    Meghan Trainor          12        0.7260000
#find which genre is most danceable by grouping
dance_genre_dt <- spotify_dt[, .(
  track_count = .N,
  avg_danceability_genre = mean(danceability)
), by = main_genre]

# create a visualization for genre and average danceability
ggplot(dance_genre_dt, aes(x = main_genre, y = avg_danceability_genre)) +
  geom_col(fill = "plum") +
  coord_flip() +
  labs(title = "Average Danceability By Genre", x = "Genre", y = "Average Danceability") +
  theme_minimal()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_col()`).