Speed Up Your Data Analysis with data.table

Gentle introduction

Author

DSA_406_001_SP25_wk4_dlcoutts

Today’s Lesson: Efficient Big Data Analysis with data.table

Objectives

Understand why data.table is crucial for big data analysis
Learn the syntax and key features of data.table
Practice with real-world Spotify data
Compare performance with traditional methods

Why data.table?

data.table is designed for handling large datasets (1GB to 100GB) with:

Lightning-fast operations
Memory efficiency
Concise syntax
Enhanced features for data manipulation

Setting Up

First, let’s install and load our required packages:

# Install and load required packages
required_packages <- c("data.table", "tidyverse", "microbenchmark", "pryr")

# Install missing packages
for (pkg in required_packages) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg)
  }
}

# Load libraries individually
#library(data.table)
#library(tidyverse)
#library(microbenchmark) # For precise timing
#library(pryr) # For memory tracking

#Load libraries
lapply(required_packages, library, character.only = TRUE)

[[1]]
[1] "data.table" "stats"      "graphics"   "grDevices"  "utils"     
[6] "datasets"   "methods"    "base"      

[[2]]
 [1] "lubridate"  "forcats"    "stringr"    "dplyr"      "purrr"     
 [6] "readr"      "tidyr"      "tibble"     "ggplot2"    "tidyverse" 
[11] "data.table" "stats"      "graphics"   "grDevices"  "utils"     
[16] "datasets"   "methods"    "base"      

[[3]]
 [1] "microbenchmark" "lubridate"      "forcats"        "stringr"       
 [5] "dplyr"          "purrr"          "readr"          "tidyr"         
 [9] "tibble"         "ggplot2"        "tidyverse"      "data.table"    
[13] "stats"          "graphics"       "grDevices"      "utils"         
[17] "datasets"       "methods"        "base"          

[[4]]
 [1] "pryr"           "microbenchmark" "lubridate"      "forcats"       
 [5] "stringr"        "dplyr"          "purrr"          "readr"         
 [9] "tidyr"          "tibble"         "ggplot2"        "tidyverse"     
[13] "data.table"     "stats"          "graphics"       "grDevices"     
[17] "utils"          "datasets"       "methods"        "base"

Let’s read our Spotify data using data.table:

# Read the dataset using fread (much faster than read.csv() or read_csv())
#spotify_dt <- fread("https://docs.google.com/spreadsheets/d/e/2PACX-1vT9iNNoFknWSmgIglkiEXnmXkJplHHeEf9wPn07tvtOV-5eTh-MdYnLS3eyST_Bx2OXpd0GC9dw5gBt/pub?gid=1801845240&single=true&output=csv")

#read in teh data from data folder
spotify_dt <- fread("spotify_clean.csv")


spotify_csv <- read_csv("spotify_clean.csv")

Lets test it out with the fread function

# Let's measure memory usage
mem_before <- pryr::mem_used()
system.time(spotify_dt <- data.table::fread("spotify_clean.csv"))

   user  system elapsed 
   0.00    0.00    0.03

mem_after_fread <- pryr::mem_used()
fread_memory <- mem_after_fread - mem_before

✅Now you try:

Now your turn try it out with the readr function read_csv

#comment and write your code below:

When you have an existing data frame (like one from tidyverse or base R like spotify_csv), you can convert it to a data.table using setDT().

# Option 1: setDT() - converts in place (memory efficient)
#setDT(spotify_csv)  # Converts the existing object to data.table

# Option 2: as.data.table() - creates a copy (uses more memory)
#spotify_dt <- as.data.table(spotify_csv)

setDT() is more memory efficient because it modifies the existing object
as.data.table() creates a copy, using twice the memory
For big data, it’s best to read directly with fread() rather than converting
The conversion only needs to happen if you start with a regular data frame

Here is a breakdown for comparison:

compare_methods <- function(file_path) {
 # Print initial memory state
 cat("Initial memory state:\n")
 print(gc())
 
 # Create container for results
 results <- list()
 
 # 1. fread method
 cat("\nTesting fread method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 dt1 <- data.table::fread(file_path)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$fread <- list(
   memory = (mem_after - mem_before) / 1024^2,  # Convert to MB
   time = end_time - start_time,
   size = format(object.size(dt1), units = "MB")
 )
 cat("fread object size:", results$fread$size, "\n")
 
 # 2. read_csv method
 cat("\nTesting read_csv method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 csv <- readr::read_csv(file_path)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$read_csv <- list(
   memory = (mem_after - mem_before) / 1024^2,
   time = end_time - start_time,
   size = format(object.size(csv), units = "MB")
 )
 cat("read_csv object size:", results$read_csv$size, "\n")
 
 # 3. setDT method
 cat("\nTesting setDT method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 csv_copy <- data.table::copy(csv)
 data.table::setDT(csv_copy)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$setDT <- list(
   memory = (mem_after - mem_before) / 1024^2,
   time = end_time - start_time,
   size = format(object.size(csv_copy), units = "MB")
 )
 cat("setDT object size:", results$setDT$size, "\n")
 
 # 4. as.data.table method
 cat("\nTesting as.data.table method...\n")
 mem_before <- pryr::mem_used()
 start_time <- Sys.time()
 dt2 <- data.table::as.data.table(csv)
 end_time <- Sys.time()
 mem_after <- pryr::mem_used()
 results$as_data_table <- list(
   memory = (mem_after - mem_before) / 1024^2,
   time = end_time - start_time,
   size = format(object.size(dt2), units = "MB")
 )
 cat("as.data.table object size:", results$as_data_table$size, "\n")
 
 # Final memory state
 cat("\nFinal memory state:\n")
 print(gc())
 
 # Create summary data.frame
 summary_df <- data.frame(
   method = names(results),
   memory_mb = sapply(results, function(x) round(x$memory, 2)),
   time_secs = sapply(results, function(x) round(as.numeric(x$time), 4)),
   final_size = sapply(results, function(x) x$size)
 )
 
 return(summary_df)
}

# Use the function
results <- compare_methods("spotify_clean.csv")

Initial memory state:
          used (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 1166594 62.4    2428697 129.8  2428697 129.8
Vcells 2958005 22.6    8388608  64.0  3498290  26.7

Testing fread method...
fread object size: 5.5 Mb 

Testing read_csv method...

Rows: 9999 Columns: 30
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (11): track_name, artist_name_s, album_name, album_artist_name_s, album...
dbl  (16): disc_number, track_number, track_duration_ms, popularity, danceab...
lgl   (2): explicit, album_genres
dttm  (1): added_at

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

read_csv object size: 5.8 Mb 

Testing setDT method...
setDT object size: 5.8 Mb 

Testing as.data.table method...
as.data.table object size: 5.8 Mb 

Final memory state:
          used (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 1171794 62.6    2428697 129.8  2428697 129.8
Vcells 4308790 32.9   10146329  77.5  4544803  34.7

print("\nFinal Results:")

[1] "\nFinal Results:"

print(results)

                     method memory_mb time_secs final_size
fread                 fread      3.55    0.0383     5.5 Mb
read_csv           read_csv      2.23    0.4564     5.8 Mb
setDT                 setDT      2.39    0.0027     5.8 Mb
as_data_table as_data_table      2.32    0.0023     5.8 Mb

🔍 Investigation Exercise:

Without using any external resources, what can you discover about this dataset using data.table methods?

💡Think about….

How many observations and variables are there?
What types of data are present?

# Use these data.table commands to gather clues:

# Clue 1: Basic structure and dimensions
spotify_dt[, .N]  # Number of rows

[1] 9999

ncol(spotify_dt)  # Number of columns

[1] 30

# Clue 2: Column names and types
spotify_dt[, lapply(.SD, class)]  # Shows class of each column

   track_name artist_name_s album_name album_artist_name_s album_release_date
       <char>        <char>     <char>              <char>             <char>
1:  character     character  character           character          character
2:  character     character  character           character          character
   disc_number track_number track_duration_ms explicit popularity      isrc
        <char>       <char>            <char>   <char>     <char>    <char>
1:     integer      integer           integer  logical    integer character
2:     integer      integer           integer  logical    integer character
    added_by added_at artist_genres danceability  energy     key loudness
      <char>   <char>        <char>       <char>  <char>  <char>   <char>
1: character  POSIXct     character      numeric numeric integer  numeric
2: character   POSIXt     character      numeric numeric integer  numeric
      mode speechiness acousticness instrumentalness liveness valence   tempo
    <char>      <char>       <char>           <char>   <char>  <char>  <char>
1: integer     numeric      numeric          numeric  numeric numeric numeric
2: integer     numeric      numeric          numeric  numeric numeric numeric
   time_signature album_genres     label copyrights main_genre
           <char>       <char>    <char>     <char>     <char>
1:        integer      logical character  character  character
2:        integer      logical character  character  character

#Clure 3: look at data content
spotify_dt[1:5]

                                track_name      artist_name_s
                                    <char>             <char>
1: Justified & Ancient - Stand by the Jams            The KLF
2:         I Know You Want Me (Calle Ocho)            Pitbull
3:      From the Bottom of My Broken Heart     Britney Spears
4:        Apeman - 2014 Remastered Version          The Kinks
5:      You Can't Always Get What You Want The Rolling Stones
                                                               album_name
                                                                   <char>
1:                                                       Songs Collection
2:                                         Pitbull Starring In Rebelution
3:                         ...Baby One More Time (Digital Deluxe Version)
4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
5:                                                           Let It Bleed
   album_artist_name_s album_release_date disc_number track_number
                <char>             <char>       <int>        <int>
1:             The KLF         1992-08-03           1            3
2:             Pitbull         2009-10-23           1            3
3:      Britney Spears         1999-01-12           1            6
4:           The Kinks         2014-10-20           1           11
5:  The Rolling Stones         1969-12-05           1            9
   track_duration_ms explicit popularity         isrc                 added_by
               <int>   <lgcl>      <int>       <char>                   <char>
1:            216270    FALSE          0 QMARG1760056 spotify:user:bradnumber1
2:            237120    FALSE         64 USJAY0900144 spotify:user:bradnumber1
3:            312533    FALSE         56 USJI19910455 spotify:user:bradnumber1
4:            233400    FALSE         42 GB5KW1499822 spotify:user:bradnumber1
5:            448720    FALSE          0 USA176910100 spotify:user:bradnumber1
              added_at
                <POSc>
1: 2020-03-05 09:20:39
2: 2021-08-08 09:26:31
3: 2021-08-08 09:26:31
4: 2021-08-08 09:26:31
5: 2021-08-08 09:26:31
                                                                                                             artist_genres
                                                                                                                    <char>
1:                                                                             acid house,ambient house,big beat,hip house
2:                                                                                             dance pop,miami hip hop,pop
3:                                                                                                           dance pop,pop
4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
5:                                                                           album rock,british invasion,classic rock,rock
   danceability energy   key loudness  mode speechiness acousticness
          <num>  <num> <int>    <num> <int>       <num>        <num>
1:        0.617  0.872     8  -12.305     1      0.0480       0.0158
2:        0.825  0.743     2   -5.995     1      0.1490       0.0142
3:        0.677  0.665     7   -5.171     1      0.0305       0.5600
4:        0.683  0.728     9   -8.920     1      0.2590       0.5680
5:        0.319  0.627     0   -9.611     1      0.0687       0.6750
   instrumentalness liveness valence   tempo time_signature album_genres
              <num>    <num>   <num>   <num>          <int>       <lgcl>
1:         1.12e-01   0.4080   0.504 111.458              4           NA
2:         2.12e-05   0.2370   0.800 127.045              4           NA
3:         1.01e-06   0.3380   0.706  74.981              4           NA
4:         5.08e-05   0.0384   0.833  75.311              4           NA
5:         7.29e-05   0.2890   0.497  85.818              4           NA
                                 label
                                <char>
1:                 Jams Communications
2: Mr.305/Polo Grounds Music/J Records
3:                                Jive
4:                   Sanctuary Records
5:               Universal Music Group
                                                                                                   copyrights
                                                                                                       <char>
1:                                                       C 1992 Copyright Control, P 1992 Jams Communications
2:                                        P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
3:                                                                             P (P) 1999 Zomba Recording LLC
4: C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
5:                                   C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
         main_genre
             <char>
1: Dance/Electronic
2:              Pop
3:              Pop
4:             Rock
5:             Rock

# Clue 4: Unique values in categorical columns
spotify_dt[, lapply(.SD, function(x) uniqueN(x)), .SDcols = is.character]

   track_name artist_name_s album_name album_artist_name_s album_release_date
        <int>         <int>      <int>               <int>              <int>
1:       8259          4130       6636                3295               3333
    isrc added_by artist_genres label copyrights main_genre
   <int>    <int>         <int> <int>      <int>      <int>
1:  8949        1          2816  1467       5379         10

Data.table Operations - data.table Syntax: [ i, j, by ]

Now let’s explore data.table’s powerful syntax for data manipulation. Think of these operations as your detective’s toolkit:

i (WHERE): Your filter 🔍 (selects specific rows)(like filter() & selec())
j (SELECT): Your magnifying glass 🔬 (focuses on specific columns or computations) (like select() or summarize())
by (GROUP BY): Your filing system 📁 (organizes findings)(like group_by())

1.) Basic Operations

Selecting Rows (i)

When you only specify i, data.table returns a data.table (like a data frame), not a vector.

# Select first 5 rows
spotify_dt[1:5]

                                track_name      artist_name_s
                                    <char>             <char>
1: Justified & Ancient - Stand by the Jams            The KLF
2:         I Know You Want Me (Calle Ocho)            Pitbull
3:      From the Bottom of My Broken Heart     Britney Spears
4:        Apeman - 2014 Remastered Version          The Kinks
5:      You Can't Always Get What You Want The Rolling Stones
                                                               album_name
                                                                   <char>
1:                                                       Songs Collection
2:                                         Pitbull Starring In Rebelution
3:                         ...Baby One More Time (Digital Deluxe Version)
4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
5:                                                           Let It Bleed
   album_artist_name_s album_release_date disc_number track_number
                <char>             <char>       <int>        <int>
1:             The KLF         1992-08-03           1            3
2:             Pitbull         2009-10-23           1            3
3:      Britney Spears         1999-01-12           1            6
4:           The Kinks         2014-10-20           1           11
5:  The Rolling Stones         1969-12-05           1            9
   track_duration_ms explicit popularity         isrc                 added_by
               <int>   <lgcl>      <int>       <char>                   <char>
1:            216270    FALSE          0 QMARG1760056 spotify:user:bradnumber1
2:            237120    FALSE         64 USJAY0900144 spotify:user:bradnumber1
3:            312533    FALSE         56 USJI19910455 spotify:user:bradnumber1
4:            233400    FALSE         42 GB5KW1499822 spotify:user:bradnumber1
5:            448720    FALSE          0 USA176910100 spotify:user:bradnumber1
              added_at
                <POSc>
1: 2020-03-05 09:20:39
2: 2021-08-08 09:26:31
3: 2021-08-08 09:26:31
4: 2021-08-08 09:26:31
5: 2021-08-08 09:26:31
                                                                                                             artist_genres
                                                                                                                    <char>
1:                                                                             acid house,ambient house,big beat,hip house
2:                                                                                             dance pop,miami hip hop,pop
3:                                                                                                           dance pop,pop
4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
5:                                                                           album rock,british invasion,classic rock,rock
   danceability energy   key loudness  mode speechiness acousticness
          <num>  <num> <int>    <num> <int>       <num>        <num>
1:        0.617  0.872     8  -12.305     1      0.0480       0.0158
2:        0.825  0.743     2   -5.995     1      0.1490       0.0142
3:        0.677  0.665     7   -5.171     1      0.0305       0.5600
4:        0.683  0.728     9   -8.920     1      0.2590       0.5680
5:        0.319  0.627     0   -9.611     1      0.0687       0.6750
   instrumentalness liveness valence   tempo time_signature album_genres
              <num>    <num>   <num>   <num>          <int>       <lgcl>
1:         1.12e-01   0.4080   0.504 111.458              4           NA
2:         2.12e-05   0.2370   0.800 127.045              4           NA
3:         1.01e-06   0.3380   0.706  74.981              4           NA
4:         5.08e-05   0.0384   0.833  75.311              4           NA
5:         7.29e-05   0.2890   0.497  85.818              4           NA
                                 label
                                <char>
1:                 Jams Communications
2: Mr.305/Polo Grounds Music/J Records
3:                                Jive
4:                   Sanctuary Records
5:               Universal Music Group
                                                                                                   copyrights
                                                                                                       <char>
1:                                                       C 1992 Copyright Control, P 1992 Jams Communications
2:                                        P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
3:                                                                             P (P) 1999 Zomba Recording LLC
4: C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
5:                                   C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
         main_genre
             <char>
1: Dance/Electronic
2:              Pop
3:              Pop
4:             Rock
5:             Rock

# Filter by popularity
spotify_dt[popularity > 75]

                                 track_name                       artist_name_s
                                     <char>                              <char>
   1:            Don't Stop - 2004 Remaster                       Fleetwood Mac
   2:       Eastside (with Halsey & Khalid)        benny blanco, Halsey, Khalid
   3:                      Here Without You                        3 Doors Down
   4:    We Will Rock You - Remastered 2011                               Queen
   5:                  Too Good At Goodbyes                           Sam Smith
  ---                                                                          
1013: B.O.T.A. (Baddest Of Them All) - Edit Eliza Rose, Interplanetary Criminal
1014:                   Push Up - Main Edit                              Creeds
1015:                    Baby Don't Hurt Me David Guetta, Anne-Marie, Coi Leray
1016:          Kernkraft 400 (A Better Day)                          Topic, A7S
1017:                               Lay Low                              Tiësto
                                  album_name
                                      <char>
   1:                                Rumours
   2:        Eastside (with Halsey & Khalid)
   3:                      Away From The Sun
   4:      News Of The World (2011 Remaster)
   5: The Thrill Of It All (Special Edition)
  ---                                       
1013:         B.O.T.A. (Baddest Of Them All)
1014:                    Push Up (Main Edit)
1015:                     Baby Don't Hurt Me
1016:           Kernkraft 400 (A Better Day)
1017:                                Lay Low
                      album_artist_name_s album_release_date disc_number
                                   <char>             <char>       <int>
   1:                       Fleetwood Mac         1977-02-04           1
   2:        benny blanco, Halsey, Khalid         2018-07-12           1
   3:                        3 Doors Down         2002-11-12           1
   4:                               Queen         1977-10-28           1
   5:                           Sam Smith         2017-11-03           1
  ---                                                                   
1013: Eliza Rose, Interplanetary Criminal         2022-08-12           1
1014:                              Creeds         2023-03-31           1
1015: David Guetta, Anne-Marie, Coi Leray         2023-04-06           1
1016:                          Topic, A7S         2022-06-17           1
1017:                              Tiësto         2023-01-06           1
      track_number track_duration_ms explicit popularity         isrc
             <int>             <int>   <lgcl>      <int>       <char>
   1:            4            193346    FALSE         79 USWB10400049
   2:            1            173799    FALSE         78 USUM71809132
   3:            6            238733    FALSE         80 USUR10200764
   4:            1            122066    FALSE         82 GBUM71029618
   5:            1            201000    FALSE         87 GBUM71704089
  ---                                                                
1013:            1            226626    FALSE         82 QZAKB2166405
1014:            1            139300    FALSE         86 DEE862300564
1015:            1            140017    FALSE         94 UKWLG2300016
1016:            1            165800    FALSE         79 DECE72201091
1017:            1            153442    FALSE         87 NLZ542202348
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2022-08-31 00:08:18
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
1013: spotify:user:bradnumber1 2023-07-09 01:59:43
1014: spotify:user:bradnumber1 2023-07-11 01:16:38
1015: spotify:user:bradnumber1 2023-07-11 05:37:45
1016: spotify:user:bradnumber1 2023-07-11 10:57:25
1017: spotify:user:bradnumber1 2023-07-18 22:06:36
                                                                       artist_genres
                                                                              <char>
   1:                              album rock,classic rock,rock,soft rock,yacht rock
   2:                        pop,electropop,etherpop,indie poptimism,pop,pop,pop r&b
   3:                                    alternative metal,nu metal,post-grunge,rock
   4:                                                    classic rock,glam rock,rock
   5:                                                                     pop,uk pop
  ---                                                                               
1013:                                                house,breaks,experimental house
1014:                                                                           <NA>
1015: big room,dance pop,edm,pop,pop dance,pop,new jersey underground rap,trap queen
1016:           german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
1017:               big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.671  0.710     9   -7.724     1      0.0356      0.03930
   2:        0.560  0.680     6   -7.648     0      0.3210      0.55500
   3:        0.557  0.533    10   -6.817     0      0.0252      0.04920
   4:        0.692  0.497     2   -7.316     1      0.1190      0.67600
   5:        0.681  0.372     5   -8.237     1      0.0432      0.64000
  ---                                                                  
1013:        0.736  0.906     0   -7.589     1      0.0480      0.16400
1014:        0.767  0.830     7   -8.780     1      0.2060      0.20900
1015:        0.602  0.910     7   -3.404     1      0.0308      0.00126
1016:        0.623  0.727    11   -5.570     0      0.0562      0.18400
1017:        0.534  0.855     1   -4.923     0      0.1830      0.06070
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-05   0.0387   0.834 118.745              4           NA
   2:         0.00e+00   0.1160   0.319  89.391              4           NA
   3:         0.00e+00   0.2050   0.233 143.994              4           NA
   4:         0.00e+00   0.2590   0.475  81.308              4           NA
   5:         0.00e+00   0.1690   0.476  91.873              4           NA
  ---                                                                      
1013:         5.85e-01   0.1060   0.698 137.001              4           NA
1014:         8.36e-01   0.0582   0.187  75.023              4           NA
1015:         1.74e-04   0.1200   0.228 127.944              4           NA
1016:         2.02e-05   0.3090   0.400 125.975              4           NA
1017:         2.63e-04   0.3460   0.420 122.060              4           NA
                           label
                          <char>
   1:       Rhino/Warner Records
   2: Benny Blanco Solo Album PS
   3:          Universal Records
   4:                        EMI
   5:                    Capitol
  ---                           
1013:             Warner Records
1014:     Columbia/B1 Recordings
1015:              Parlophone UK
1016:                     Virgin
1017:            Musical Freedom
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                               C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
   2:                                                                                       C © 2018 Friends Keep Secrets/Interscope Records, P ℗ 2018 Friends Keep Secrets/Interscope Records
   3:                                                    C © 2002 Universal Records, a Division of UMG Recordings, Inc., P ℗ 2002 Universal Motown Records, a division of UMG Recordings, Inc.
   4: C © 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV, P ℗ 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV
   5:                                                                                                 C © 2017 Universal Music Operations Limited, P ℗ 2017 Universal Music Operations Limited
  ---                                                                                                                                                                                         
1013:                       C Under exclusive licence to Warner Music UK Limited, © 2022 One House X Limited, P Under exclusive licence to Warner Music UK Limited, ℗ 2022 One House X Limited
1014:                                       P (P) 2023 Rave Alert Records, under exclusive license to Ministry of Sound/Arista Records/B1 Recordings GmbH, a Sony Music Entertainment company.
1015:                                                       C Under license to Warner Music UK Limited, © 2023 What A DJ Ltd, P Under license to Warner Music UK Limited, ℗ 2023 What A DJ Ltd
1016:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
1017:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
            main_genre
                <char>
   1:             Rock
   2:              Pop
   3:             Rock
   4:             Rock
   5:              Pop
  ---                 
1013: Dance/Electronic
1014:            Other
1015:              Pop
1016:              Pop
1017:              Pop

# Multiple conditions
spotify_dt[popularity > 75 & main_genre == "Rock"]

                             track_name               artist_name_s
                                 <char>                      <char>
  1:         Don't Stop - 2004 Remaster               Fleetwood Mac
  2:                   Here Without You                3 Doors Down
  3: We Will Rock You - Remastered 2011                       Queen
  4:                            My Hero                Foo Fighters
  5:                I Love Rock 'N Roll Joan Jett & the Blackhearts
 ---                                                               
230:                    One Step Closer                 Linkin Park
231:  Good Riddance (Time of Your Life)                   Green Day
232:                 When I Come Around                   Green Day
233:        Rebel Rebel - 2016 Remaster                 David Bowie
234:                           Rhiannon               Fleetwood Mac
                                  album_name         album_artist_name_s
                                      <char>                      <char>
  1:                                 Rumours               Fleetwood Mac
  2:                       Away From The Sun                3 Doors Down
  3:       News Of The World (2011 Remaster)                       Queen
  4:                The Colour And The Shape                Foo Fighters
  5: I Love Rock 'N' Roll (Expanded Edition) Joan Jett & the Blackhearts
 ---                                                                    
230:           Hybrid Theory (Bonus Edition)                 Linkin Park
231:                                  Nimrod                   Green Day
232:                                  Dookie                   Green Day
233:            Diamond Dogs (2016 Remaster)                 David Bowie
234:                           Fleetwood Mac               Fleetwood Mac
     album_release_date disc_number track_number track_duration_ms explicit
                 <char>       <int>        <int>             <int>   <lgcl>
  1:         1977-02-04           1            4            193346    FALSE
  2:         2002-11-12           1            6            238733    FALSE
  3:         1977-10-28           1            1            122066    FALSE
  4:         1997-05-20           1            7            260026    FALSE
  5:         1981-11-18           1            1            175173    FALSE
 ---                                                                       
230:         2000-10-24           1            2            157333    FALSE
231:         1997-10-14           1           17            153466     TRUE
232:         1994-02-01           1           10            178000    FALSE
233:         1974-05-24           1            6            274746    FALSE
234:         1975-07-11           1            4            252773    FALSE
     popularity         isrc                 added_by            added_at
          <int>       <char>                   <char>              <POSc>
  1:         79 USWB10400049 spotify:user:bradnumber1 2022-08-31 00:08:18
  2:         80 USUR10200764 spotify:user:bradnumber1 2021-08-08 09:26:31
  3:         82 GBUM71029618 spotify:user:bradnumber1 2021-08-08 09:26:31
  4:         79 USRW29600007 spotify:user:bradnumber1 2021-08-08 09:26:31
  5:         81 USBH18100118 spotify:user:bradnumber1 2021-08-08 09:26:31
 ---                                                                     
230:         83 USWB10002399 spotify:user:bradnumber1 2021-08-08 09:26:31
231:         82 USRE19700545 spotify:user:bradnumber1 2021-08-08 09:26:31
232:         80 USRE19900154 spotify:user:bradnumber1 2021-08-08 09:26:31
233:         76 USJT11600006 spotify:user:bradnumber1 2021-08-08 09:26:31
234:         77 USWB19900188 spotify:user:bradnumber1 2021-08-08 09:26:31
                                                                      artist_genres
                                                                             <char>
  1:                              album rock,classic rock,rock,soft rock,yacht rock
  2:                                    alternative metal,nu metal,post-grunge,rock
  3:                                                    classic rock,glam rock,rock
  4: alternative metal,alternative rock,modern rock,permanent wave,post-grunge,rock
  5:                                                                 glam punk,rock
 ---                                                                               
230:                          alternative metal,nu metal,post-grunge,rap metal,rock
231:                                           modern rock,permanent wave,punk,rock
232:                                           modern rock,permanent wave,punk,rock
233:                            art rock,classic rock,glam rock,permanent wave,rock
234:                              album rock,classic rock,rock,soft rock,yacht rock
     danceability energy   key loudness  mode speechiness acousticness
            <num>  <num> <int>    <num> <int>       <num>        <num>
  1:        0.671  0.710     9   -7.724     1      0.0356     0.039300
  2:        0.557  0.533    10   -6.817     0      0.0252     0.049200
  3:        0.692  0.497     2   -7.316     1      0.1190     0.676000
  4:        0.398  0.927     9   -4.212     1      0.0567     0.000062
  5:        0.535  0.716     4   -5.025     1      0.0431     0.326000
 ---                                                                  
230:        0.492  0.969     9   -4.419     1      0.0491     0.001400
231:        0.475  0.432     7   -7.844     1      0.0302     0.183000
232:        0.531  0.909    11   -4.236     1      0.0340     0.017500
233:        0.635  0.686     9  -15.648     1      0.0530     0.209000
234:        0.723  0.490     9  -14.745     0      0.0296     0.111000
     instrumentalness liveness valence   tempo time_signature album_genres
                <num>    <num>   <num>   <num>          <int>       <lgcl>
  1:         1.12e-05   0.0387   0.834 118.745              4           NA
  2:         0.00e+00   0.2050   0.233 143.994              4           NA
  3:         0.00e+00   0.2590   0.475  81.308              4           NA
  4:         1.77e-04   0.0575   0.311  77.106              4           NA
  5:         0.00e+00   0.6590   0.901  94.379              4           NA
 ---                                                                      
230:         1.32e-06   0.0787   0.538  95.136              4           NA
231:         0.00e+00   0.1610   0.648  94.605              4           NA
232:         0.00e+00   0.2310   0.814  97.546              4           NA
233:         1.76e-02   0.2820   0.464 125.988              4           NA
234:         9.46e-02   0.0923   0.795 129.012              4           NA
                    label
                   <char>
  1: Rhino/Warner Records
  2:    Universal Records
  3:                  EMI
  4:    RCA Records Label
  5:    Legacy Recordings
 ---                     
230:       Warner Records
231:              Reprise
232:              Reprise
233:        Parlophone UK
234:       Warner Records
                                                                                                                                                                                                 copyrights
                                                                                                                                                                                                     <char>
  1:                                                                                                                                             C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
  2:                                                                  C © 2002 Universal Records, a Division of UMG Recordings, Inc., P ℗ 2002 Universal Motown Records, a division of UMG Recordings, Inc.
  3:               C © 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV, P ℗ 2011 Queen Productions Ltd. under exclusive licence to Universal International Music BV
  4:                                                                                                                                                                       P (P) 1997 Roswell Records, Inc.
  5:                                                                                                               P (P) 1981 Blackheart Records Group, under exclusive license to Sony Music Entertainment
 ---                                                                                                                                                                                                       
230:                                                                                                                                             C © 2000 Warner Records Inc., P ℗ 2000 Warner Records Inc.
231: C © 1997 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S., P ℗ 1997 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S.
232: C © 1994 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S., P ℗ 1994 Reprise Records for the U.S. and WEA International Inc. for the world outside of the U.S.
233:               C © 2016 Jones/Tintoretto Entertainment Co. LLC., P ℗ 1974, 2016 Jones/Tintoretto Entertainment Co., LLC under exclusive license to Parlophone Records Ltd, a Warner Music Group Company
234:                                                                                                                                             C © 1975 Warner Records Inc., P ℗ 1975 Warner Records Inc.
     main_genre
         <char>
  1:       Rock
  2:       Rock
  3:       Rock
  4:       Rock
  5:       Rock
 ---           
230:       Rock
231:       Rock
232:       Rock
233:       Rock
234:       Rock

# Excluding certain values
spotify_dt[key != 10]

                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9375:                               Kernkraft 400 (A Better Day)
9376:                               Never Say Never - Radio Edit
9377: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9378:                                                    Lay Low
9379:                                                Padam Padam
                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9375:                   Topic, A7S
9376:                    Vandalism
9377: Spiller, Sophie Ellis-Bextor
9378:                       Tiësto
9379:                Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                                       Songs Collection
   2:                                         Pitbull Starring In Rebelution
   3:                         ...Baby One More Time (Digital Deluxe Version)
   4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   5:                                                           Let It Bleed
  ---                                                                       
9375:                                           Kernkraft 400 (A Better Day)
9376:                                                        Never Say Never
9377:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9378:                                                                Lay Low
9379:                                                            Padam Padam
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:             The KLF         1992-08-03           1            3
   2:             Pitbull         2009-10-23           1            3
   3:      Britney Spears         1999-01-12           1            6
   4:           The Kinks         2014-10-20           1           11
   5:  The Rolling Stones         1969-12-05           1            9
  ---                                                                
9375:          Topic, A7S         2022-06-17           1            1
9376:           Vandalism         2005-10-24           1            1
9377:             Spiller         2000-08-14           1            1
9378:              Tiësto         2023-01-06           1            1
9379:       Kylie Minogue         2023-05-19           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            216270    FALSE          0 QMARG1760056
   2:            237120    FALSE         64 USJAY0900144
   3:            312533    FALSE         56 USJI19910455
   4:            233400    FALSE         42 GB5KW1499822
   5:            448720    FALSE          0 USA176910100
  ---                                                   
9375:            165800    FALSE         79 DECE72201091
9376:            176640    FALSE         17 AUVC00503711
9377:            227619    FALSE         62 GBCPZ0019728
9378:            153442    FALSE         87 NLZ542202348
9379:            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2020-03-05 09:20:39
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
9375: spotify:user:bradnumber1 2023-07-11 10:57:25
9376: spotify:user:bradnumber1 2023-07-16 09:38:19
9377: spotify:user:bradnumber1 2023-07-16 09:39:17
9378: spotify:user:bradnumber1 2023-07-18 22:06:36
9379: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                             acid house,ambient house,big beat,hip house
   2:                                                                                             dance pop,miami hip hop,pop
   3:                                                                                                           dance pop,pop
   4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   5:                                                                           album rock,british invasion,classic rock,rock
  ---                                                                                                                        
9375:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
9376:                                                                                       australian dance,melbourne bounce
9377:                                                                  disco house,vocal house,dance pop,europop,new wave pop
9378:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
9379:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.617  0.872     8  -12.305     1      0.0480     0.015800
   2:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   3:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   4:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   5:        0.319  0.627     0   -9.611     1      0.0687     0.675000
  ---                                                                  
9375:        0.623  0.727    11   -5.570     0      0.0562     0.184000
9376:        0.720  0.841     9   -6.373     1      0.0340     0.000354
9377:        0.719  0.806     9   -6.802     0      0.0389     0.000132
9378:        0.534  0.855     1   -4.923     0      0.1830     0.060700
9379:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-01   0.4080   0.504 111.458              4           NA
   2:         2.12e-05   0.2370   0.800 127.045              4           NA
   3:         1.01e-06   0.3380   0.706  74.981              4           NA
   4:         5.08e-05   0.0384   0.833  75.311              4           NA
   5:         7.29e-05   0.2890   0.497  85.818              4           NA
  ---                                                                      
9375:         2.02e-05   0.3090   0.400 125.975              4           NA
9376:         1.12e-02   0.3380   0.767 130.978              4           NA
9377:         8.89e-02   0.3610   0.626 123.037              4           NA
9378:         2.63e-04   0.3460   0.420 122.060              4           NA
9379:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1:                 Jams Communications
   2: Mr.305/Polo Grounds Music/J Records
   3:                                Jive
   4:                   Sanctuary Records
   5:               Universal Music Group
  ---                                    
9375:                              Virgin
9376:                             Vicious
9377:                    Defected Records
9378:                     Musical Freedom
9379:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                                     C 1992 Copyright Control, P 1992 Jams Communications
   2:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   3:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   4:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   5:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
  ---                                                                                                                                                                                         
9375:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
9376:                                                                       C 2005 Vicious, a division of Vicious Recordings Pty Ltd, P 2005 Vicious, a division of Vicious Recordings Pty Ltd
9377:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
9378:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
9379: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
            main_genre
                <char>
   1: Dance/Electronic
   2:              Pop
   3:              Pop
   4:             Rock
   5:             Rock
  ---                 
9375:              Pop
9376: Dance/Electronic
9377:              Pop
9378:              Pop
9379:              Pop

# Select tracks by genre
spotify_dt[main_genre %in% c("Rock", "Pop")]

                                                      track_name
                                                          <char>
   1:                            I Know You Want Me (Calle Ocho)
   2:                         From the Bottom of My Broken Heart
   3:                           Apeman - 2014 Remastered Version
   4:                         You Can't Always Get What You Want
   5:                                 Don't Stop - 2004 Remaster
  ---                                                           
8103:                                         Baby Don't Hurt Me
8104:                               Kernkraft 400 (A Better Day)
8105: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
8106:                                                    Lay Low
8107:                                                Padam Padam
                            artist_name_s
                                   <char>
   1:                             Pitbull
   2:                      Britney Spears
   3:                           The Kinks
   4:                  The Rolling Stones
   5:                       Fleetwood Mac
  ---                                    
8103: David Guetta, Anne-Marie, Coi Leray
8104:                          Topic, A7S
8105:        Spiller, Sophie Ellis-Bextor
8106:                              Tiësto
8107:                       Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                         Pitbull Starring In Rebelution
   2:                         ...Baby One More Time (Digital Deluxe Version)
   3: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   4:                                                           Let It Bleed
   5:                                                                Rumours
  ---                                                                       
8103:                                                     Baby Don't Hurt Me
8104:                                           Kernkraft 400 (A Better Day)
8105:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
8106:                                                                Lay Low
8107:                                                            Padam Padam
                      album_artist_name_s album_release_date disc_number
                                   <char>             <char>       <int>
   1:                             Pitbull         2009-10-23           1
   2:                      Britney Spears         1999-01-12           1
   3:                           The Kinks         2014-10-20           1
   4:                  The Rolling Stones         1969-12-05           1
   5:                       Fleetwood Mac         1977-02-04           1
  ---                                                                   
8103: David Guetta, Anne-Marie, Coi Leray         2023-04-06           1
8104:                          Topic, A7S         2022-06-17           1
8105:                             Spiller         2000-08-14           1
8106:                              Tiësto         2023-01-06           1
8107:                       Kylie Minogue         2023-05-19           1
      track_number track_duration_ms explicit popularity         isrc
             <int>             <int>   <lgcl>      <int>       <char>
   1:            3            237120    FALSE         64 USJAY0900144
   2:            6            312533    FALSE         56 USJI19910455
   3:           11            233400    FALSE         42 GB5KW1499822
   4:            9            448720    FALSE          0 USA176910100
   5:            4            193346    FALSE         79 USWB10400049
  ---                                                                
8103:            1            140017    FALSE         94 UKWLG2300016
8104:            1            165800    FALSE         79 DECE72201091
8105:            1            227619    FALSE         62 GBCPZ0019728
8106:            1            153442    FALSE         87 NLZ542202348
8107:            1            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2021-08-08 09:26:31
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2022-08-31 00:08:18
  ---                                             
8103: spotify:user:bradnumber1 2023-07-11 05:37:45
8104: spotify:user:bradnumber1 2023-07-11 10:57:25
8105: spotify:user:bradnumber1 2023-07-16 09:39:17
8106: spotify:user:bradnumber1 2023-07-18 22:06:36
8107: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                                             dance pop,miami hip hop,pop
   2:                                                                                                           dance pop,pop
   3: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   4:                                                                           album rock,british invasion,classic rock,rock
   5:                                                                       album rock,classic rock,rock,soft rock,yacht rock
  ---                                                                                                                        
8103:                                          big room,dance pop,edm,pop,pop dance,pop,new jersey underground rap,trap queen
8104:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
8105:                                                                  disco house,vocal house,dance pop,europop,new wave pop
8106:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
8107:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   2:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   3:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   4:        0.319  0.627     0   -9.611     1      0.0687     0.675000
   5:        0.671  0.710     9   -7.724     1      0.0356     0.039300
  ---                                                                  
8103:        0.602  0.910     7   -3.404     1      0.0308     0.001260
8104:        0.623  0.727    11   -5.570     0      0.0562     0.184000
8105:        0.719  0.806     9   -6.802     0      0.0389     0.000132
8106:        0.534  0.855     1   -4.923     0      0.1830     0.060700
8107:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         2.12e-05   0.2370   0.800 127.045              4           NA
   2:         1.01e-06   0.3380   0.706  74.981              4           NA
   3:         5.08e-05   0.0384   0.833  75.311              4           NA
   4:         7.29e-05   0.2890   0.497  85.818              4           NA
   5:         1.12e-05   0.0387   0.834 118.745              4           NA
  ---                                                                      
8103:         1.74e-04   0.1200   0.228 127.944              4           NA
8104:         2.02e-05   0.3090   0.400 125.975              4           NA
8105:         8.89e-02   0.3610   0.626 123.037              4           NA
8106:         2.63e-04   0.3460   0.420 122.060              4           NA
8107:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1: Mr.305/Polo Grounds Music/J Records
   2:                                Jive
   3:                   Sanctuary Records
   4:               Universal Music Group
   5:                Rhino/Warner Records
  ---                                    
8103:                       Parlophone UK
8104:                              Virgin
8105:                    Defected Records
8106:                     Musical Freedom
8107:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   2:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   3:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   4:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
   5:                                                                                                                               C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
  ---                                                                                                                                                                                         
8103:                                                       C Under license to Warner Music UK Limited, © 2023 What A DJ Ltd, P Under license to Warner Music UK Limited, ℗ 2023 What A DJ Ltd
8104:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
8105:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
8106:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
8107: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
      main_genre
          <char>
   1:        Pop
   2:        Pop
   3:       Rock
   4:       Rock
   5:       Rock
  ---           
8103:        Pop
8104:        Pop
8105:        Pop
8106:        Pop
8107:        Pop

✅Now you try:

Selecting Rows (i)

#comment and write your code below:
spotify_dt[popularity < 50]

                                   track_name         artist_name_s
                                       <char>                <char>
   1: Justified & Ancient - Stand by the Jams               The KLF
   2:        Apeman - 2014 Remastered Version             The Kinks
   3:      You Can't Always Get What You Want    The Rolling Stones
   4:                                   Mercy          Shawn Mendes
   5:     Listen to the Band - Single Version           The Monkees
  ---                                                              
5767:                     When You Were Young           The Killers
5768:                     Prisoner of Society        The Living End
5769:                         Bridge of Light The Future Hit Makers
5770:                              Get A Life           Freestylers
5771:            Never Say Never - Radio Edit             Vandalism
                                                                  album_name
                                                                      <char>
   1:                                                       Songs Collection
   2: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   3:                                                           Let It Bleed
   4:                                                    Illuminate (Deluxe)
   5:                                                The Best of The Monkees
  ---                                                                       
5767:                                                             Sam's Town
5768:                                                         The Living End
5769:                                           The Greatest Tribute to Pink
5770:                                                            Raw As F**k
5771:                                                        Never Say Never
        album_artist_name_s album_release_date disc_number track_number
                     <char>             <char>       <int>        <int>
   1:               The KLF         1992-08-03           1            3
   2:             The Kinks         2014-10-20           1           11
   3:    The Rolling Stones         1969-12-05           1            9
   4:          Shawn Mendes         2016-09-23           1            2
   5:           The Monkees               2003           1           25
  ---                                                                  
5767:           The Killers         2006-10-03           1            3
5768:        The Living End         1999-01-15           1            1
5769: The Future Hit Makers         2011-12-01           1            1
5770:           Freestylers         2004-03-26           1            4
5771:             Vandalism         2005-10-24           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            216270    FALSE          0 QMARG1760056
   2:            233400    FALSE         42 GB5KW1499822
   3:            448720    FALSE          0 USA176910100
   4:            208733    FALSE          0 USUM71603531
   5:            149106    FALSE         31 USRH10281208
  ---                                                   
5767:            220426    FALSE          1 USUM70605164
5768:            229066    FALSE         47 AUMPO9800004
5769:            243696    FALSE         11 USA371570720
5770:            312720    FALSE         22 GBFNF0300017
5771:            176640    FALSE         17 AUVC00503711
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2020-03-05 09:20:39
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
5767: spotify:user:bradnumber1 2021-08-08 09:26:31
5768: spotify:user:bradnumber1 2021-08-08 09:26:31
5769: spotify:user:bradnumber1 2023-07-07 00:45:05
5770: spotify:user:bradnumber1 2023-07-10 04:13:07
5771: spotify:user:bradnumber1 2023-07-16 09:38:19
                                                                                                                                                      artist_genres
                                                                                                                                                             <char>
   1:                                                                                                                   acid house,ambient house,big beat,hip house
   2:                                       album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   3:                                                                                                                 album rock,british invasion,classic rock,rock
   4:                                                                                                                                    canadian pop,pop,viral pop
   5: album rock,beatlesque,bubblegum pop,classic rock,folk rock,heartland rock,mellow gold,psychedelic rock,rock-and-roll,singer-songwriter,soft rock,sunshine pop
  ---                                                                                                                                                              
5767:                                                                                                   alternative rock,dance rock,modern rock,permanent wave,rock
5768:                                                                                                    australian alternative rock,australian rock,australian ska
5769:                                                                                                                                                          <NA>
5770:                                                                                                                                            big beat,breakbeat
5771:                                                                                                                             australian dance,melbourne bounce
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.617  0.872     8  -12.305     1      0.0480     0.015800
   2:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   3:        0.319  0.627     0   -9.611     1      0.0687     0.675000
   4:        0.562  0.681    11   -4.934     0      0.0871     0.113000
   5:        0.260  0.659     2   -8.980     1      0.0625     0.114000
  ---                                                                  
5767:        0.467  0.988    11   -3.313     1      0.1120     0.000152
5768:        0.275  0.993     9   -5.474     0      0.1380     0.000186
5769:        0.551  0.338     5   -8.587     1      0.0310     0.288000
5770:        0.710  0.952    11   -6.286     0      0.0557     0.023200
5771:        0.720  0.841     9   -6.373     1      0.0340     0.000354
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-01   0.4080   0.504 111.458              4           NA
   2:         5.08e-05   0.0384   0.833  75.311              4           NA
   3:         7.29e-05   0.2890   0.497  85.818              4           NA
   4:         0.00e+00   0.1100   0.357 148.064              4           NA
   5:         2.95e-05   0.4990   0.537 184.380              4           NA
  ---                                                                      
5767:         4.84e-02   0.2800   0.321 130.435              4           NA
5768:         0.00e+00   0.3020   0.190 148.725              4           NA
5769:         0.00e+00   0.1050   0.162 143.669              4           NA
5770:         1.21e-04   0.2860   0.889 137.976              4           NA
5771:         1.12e-02   0.3380   0.767 130.978              4           NA
                      label
                     <char>
   1:   Jams Communications
   2:     Sanctuary Records
   3: Universal Music Group
   4: Universal Music Group
   5:                 Rhino
  ---                      
5767: Universal Music Group
5768:               Reprise
5769:         Big Eye Music
5770:      Altra Moda Music
5771:               Vicious
                                                                                                                                                            copyrights
                                                                                                                                                                <char>
   1:                                                                                                             C 1992 Copyright Control, P 1992 Jams Communications
   2:                                                       C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   3:                                                                                         C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
   4:                                         C © 2016 Island Records, a division of UMG Recordings, Inc., P ℗ 2016 Island Records, a division of UMG Recordings, Inc.
   5: C © 2003 Warner Strategic Marketing. Warner Music Group, an AOL Time Warner Co., P ℗ 2003 Warner Strategic Marketing. Warner Music Group, an AOL Time Warner Co.
  ---                                                                                                                                                                 
5767:                                                                                 C © 2006 The Island Def Jam Music Group, P ℗ 2006 The Island Def Jam Music Group
5768:                                                                                                         C © 1999 Reprise Records, P ℗ 1998, 1999 Reprise Records
5769:                                                                                                                                             C 2011 Big Eye Music
5770:                                                                                                                 C 2004 Altra Moda Music, P 2004 Altra Moda Music
5771:                                               C 2005 Vicious, a division of Vicious Recordings Pty Ltd, P 2005 Vicious, a division of Vicious Recordings Pty Ltd
            main_genre
                <char>
   1: Dance/Electronic
   2:             Rock
   3:             Rock
   4:              Pop
   5:              Pop
  ---                 
5767:             Rock
5768:             Rock
5769:            Other
5770:            Other
5771: Dance/Electronic

AS a reminder

Comparison Operators

== means “exactly equal to”
!= means “not equal to”
> means “greater than”
< means “less than”
>= means “greater than or equal to”
<= means “less than or equal to”

Logical Operators

& means “AND” (both conditions must be true)
| means “OR” (at least one condition must be true)
! means “NOT” (reverses the condition)

2.) Column Operations (j)

The j argument lets you select columns and perform computations:

# Select specific columns
spotify_dt[, .(track_name, artist_name_s, popularity)]

                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                     artist_name_s popularity
                            <char>      <int>
   1:                      The KLF          0
   2:                      Pitbull         64
   3:               Britney Spears         56
   4:                    The Kinks         42
   5:           The Rolling Stones          0
  ---                                        
9995:                   Topic, A7S         79
9996:                    Vandalism         17
9997: Spiller, Sophie Ellis-Bextor         62
9998:                       Tiësto         87
9999:                Kylie Minogue         69

# Compute summary statistics
spotify_dt[, .(avg_popularity = mean(popularity),
              max_energy = max(energy))]

   avg_popularity max_energy
            <num>      <num>
1:       37.62466         NA

# Multiple computations
spotify_dt[, .(
  track_count = .N,
  avg_duration = mean(track_duration_ms)/1000,
  avg_popularity = mean(popularity)
)]

   track_count avg_duration avg_popularity
         <int>        <num>          <num>
1:        9999      224.815       37.62466

# Select specific columns
spotify_dt[, .(artist_name_s, track_name)]

                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9995:                   Topic, A7S
9996:                    Vandalism
9997: Spiller, Sophie Ellis-Bextor
9998:                       Tiësto
9999:                Kylie Minogue
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam

# Create new columns in output
spotify_dt[, .(
  song = track_name,
  artist = artist_name_s,
  popularity_score = popularity/100
)]

                                                            song
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                            artist popularity_score
                            <char>            <num>
   1:                      The KLF             0.00
   2:                      Pitbull             0.64
   3:               Britney Spears             0.56
   4:                    The Kinks             0.42
   5:           The Rolling Stones             0.00
  ---                                              
9995:                   Topic, A7S             0.79
9996:                    Vandalism             0.17
9997: Spiller, Sophie Ellis-Bextor             0.62
9998:                       Tiësto             0.87
9999:                Kylie Minogue             0.69

# Calculate summary statistics
spotify_dt[, .(
  avg_popularity = mean(popularity),
  total_tracks = .N,
  avg_duration = mean(track_duration_ms)/1000  # in seconds
)]

   avg_popularity total_tracks avg_duration
            <num>        <int>        <num>
1:       37.62466         9999      224.815

✅Now you try:

Selecting and computing (i)

#comment and write your code below:

Grouping Operations (by)

# Group by genre
spotify_dt[, .(
  track_count = .N,
  avg_popularity = mean(popularity),
  avg_danceability = mean(danceability)
), by = main_genre]

                main_genre track_count avg_popularity avg_danceability
                    <char>       <int>          <num>            <num>
 1:       Dance/Electronic         254       32.76378        0.6779331
 2:                    Pop        5694       40.27116        0.6243319
 3:                   Rock        2413       37.57563        0.5480435
 4:                Hip Hop         214       32.50935        0.7607477
 5:                  Other         946       26.09831               NA
 6:               R&B/Soul         302       34.93709        0.6557980
 7:             Jazz/Blues          54       22.24074        0.5241852
 8:           Folk/Country          91       34.65934        0.5683407
 9:       Reggae/Caribbean          27       48.22222        0.7564815
10: Classical/Instrumental           4       14.75000        0.5580000

# Multiple grouping variables
spotify_dt[, .(
  track_count = .N,
  avg_popularity = mean(popularity)
), by = .(main_genre, explicit)]

                main_genre explicit track_count avg_popularity
                    <char>   <lgcl>       <int>          <num>
 1:       Dance/Electronic    FALSE         252       32.81746
 2:                    Pop    FALSE        5345       39.74275
 3:                   Rock    FALSE        2363       37.50994
 4:                Hip Hop    FALSE         132       31.15152
 5:                  Other    FALSE         924       25.98377
 6:               R&B/Soul    FALSE         301       35.05316
 7:                    Pop     TRUE         349       48.36390
 8:             Jazz/Blues    FALSE          54       22.24074
 9:                Hip Hop     TRUE          82       34.69512
10:           Folk/Country    FALSE          89       33.40449
11:       Reggae/Caribbean    FALSE          27       48.22222
12:                   Rock     TRUE          50       40.68000
13:                  Other     TRUE          22       30.90909
14: Classical/Instrumental     TRUE           1        0.00000
15:       Dance/Electronic     TRUE           2       26.00000
16: Classical/Instrumental    FALSE           3       19.66667
17:           Folk/Country     TRUE           2       90.50000
18:               R&B/Soul     TRUE           1        0.00000

# Group by genre for popularity analysis
genre_stats <- spotify_dt[, .(
  avg_popularity = mean(popularity, na.rm = TRUE),
  track_count = .N,
  avg_duration = mean(track_duration_ms)/1000
), by = main_genre][]

✅Now you try:

Grouping (i)

#comment and write your code below:

Think of it like a sentence:

i = “Which rows do you want?”

j = “What do you want to do with them?”

by = “How should they be grouped?”

5. Advanced Features

Adding/Updating Columns (:=)

We use the empty brackets at the end to print but you could have also

# Add duration in seconds
spotify_dt[, duration_sec := track_duration_ms/1000][]

Index: <main_genre>
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9995:                   Topic, A7S
9996:                    Vandalism
9997: Spiller, Sophie Ellis-Bextor
9998:                       Tiësto
9999:                Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                                       Songs Collection
   2:                                         Pitbull Starring In Rebelution
   3:                         ...Baby One More Time (Digital Deluxe Version)
   4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   5:                                                           Let It Bleed
  ---                                                                       
9995:                                           Kernkraft 400 (A Better Day)
9996:                                                        Never Say Never
9997:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                                Lay Low
9999:                                                            Padam Padam
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:             The KLF         1992-08-03           1            3
   2:             Pitbull         2009-10-23           1            3
   3:      Britney Spears         1999-01-12           1            6
   4:           The Kinks         2014-10-20           1           11
   5:  The Rolling Stones         1969-12-05           1            9
  ---                                                                
9995:          Topic, A7S         2022-06-17           1            1
9996:           Vandalism         2005-10-24           1            1
9997:             Spiller         2000-08-14           1            1
9998:              Tiësto         2023-01-06           1            1
9999:       Kylie Minogue         2023-05-19           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            216270    FALSE          0 QMARG1760056
   2:            237120    FALSE         64 USJAY0900144
   3:            312533    FALSE         56 USJI19910455
   4:            233400    FALSE         42 GB5KW1499822
   5:            448720    FALSE          0 USA176910100
  ---                                                   
9995:            165800    FALSE         79 DECE72201091
9996:            176640    FALSE         17 AUVC00503711
9997:            227619    FALSE         62 GBCPZ0019728
9998:            153442    FALSE         87 NLZ542202348
9999:            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2020-03-05 09:20:39
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
9995: spotify:user:bradnumber1 2023-07-11 10:57:25
9996: spotify:user:bradnumber1 2023-07-16 09:38:19
9997: spotify:user:bradnumber1 2023-07-16 09:39:17
9998: spotify:user:bradnumber1 2023-07-18 22:06:36
9999: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                             acid house,ambient house,big beat,hip house
   2:                                                                                             dance pop,miami hip hop,pop
   3:                                                                                                           dance pop,pop
   4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   5:                                                                           album rock,british invasion,classic rock,rock
  ---                                                                                                                        
9995:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
9996:                                                                                       australian dance,melbourne bounce
9997:                                                                  disco house,vocal house,dance pop,europop,new wave pop
9998:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
9999:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.617  0.872     8  -12.305     1      0.0480     0.015800
   2:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   3:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   4:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   5:        0.319  0.627     0   -9.611     1      0.0687     0.675000
  ---                                                                  
9995:        0.623  0.727    11   -5.570     0      0.0562     0.184000
9996:        0.720  0.841     9   -6.373     1      0.0340     0.000354
9997:        0.719  0.806     9   -6.802     0      0.0389     0.000132
9998:        0.534  0.855     1   -4.923     0      0.1830     0.060700
9999:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-01   0.4080   0.504 111.458              4           NA
   2:         2.12e-05   0.2370   0.800 127.045              4           NA
   3:         1.01e-06   0.3380   0.706  74.981              4           NA
   4:         5.08e-05   0.0384   0.833  75.311              4           NA
   5:         7.29e-05   0.2890   0.497  85.818              4           NA
  ---                                                                      
9995:         2.02e-05   0.3090   0.400 125.975              4           NA
9996:         1.12e-02   0.3380   0.767 130.978              4           NA
9997:         8.89e-02   0.3610   0.626 123.037              4           NA
9998:         2.63e-04   0.3460   0.420 122.060              4           NA
9999:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1:                 Jams Communications
   2: Mr.305/Polo Grounds Music/J Records
   3:                                Jive
   4:                   Sanctuary Records
   5:               Universal Music Group
  ---                                    
9995:                              Virgin
9996:                             Vicious
9997:                    Defected Records
9998:                     Musical Freedom
9999:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                                     C 1992 Copyright Control, P 1992 Jams Communications
   2:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   3:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   4:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   5:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
  ---                                                                                                                                                                                         
9995:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
9996:                                                                       C 2005 Vicious, a division of Vicious Recordings Pty Ltd, P 2005 Vicious, a division of Vicious Recordings Pty Ltd
9997:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
9998:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
9999: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
            main_genre duration_sec
                <char>        <num>
   1: Dance/Electronic      216.270
   2:              Pop      237.120
   3:              Pop      312.533
   4:             Rock      233.400
   5:             Rock      448.720
  ---                              
9995:              Pop      165.800
9996: Dance/Electronic      176.640
9997:              Pop      227.619
9998:              Pop      153.442
9999:              Pop      166.266

# Multiple column updates
spotify_dt[, ':='(
  popularity_scaled = popularity/100,
  is_popular = popularity > 75
)]

spotify_dt

Index: <main_genre>
                                                      track_name
                                                          <char>
   1:                    Justified & Ancient - Stand by the Jams
   2:                            I Know You Want Me (Calle Ocho)
   3:                         From the Bottom of My Broken Heart
   4:                           Apeman - 2014 Remastered Version
   5:                         You Can't Always Get What You Want
  ---                                                           
9995:                               Kernkraft 400 (A Better Day)
9996:                               Never Say Never - Radio Edit
9997: Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                    Lay Low
9999:                                                Padam Padam
                     artist_name_s
                            <char>
   1:                      The KLF
   2:                      Pitbull
   3:               Britney Spears
   4:                    The Kinks
   5:           The Rolling Stones
  ---                             
9995:                   Topic, A7S
9996:                    Vandalism
9997: Spiller, Sophie Ellis-Bextor
9998:                       Tiësto
9999:                Kylie Minogue
                                                                  album_name
                                                                      <char>
   1:                                                       Songs Collection
   2:                                         Pitbull Starring In Rebelution
   3:                         ...Baby One More Time (Digital Deluxe Version)
   4: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   5:                                                           Let It Bleed
  ---                                                                       
9995:                                           Kernkraft 400 (A Better Day)
9996:                                                        Never Say Never
9997:             Groovejet (If This Ain't Love) [feat. Sophie Ellis-Bextor]
9998:                                                                Lay Low
9999:                                                            Padam Padam
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:             The KLF         1992-08-03           1            3
   2:             Pitbull         2009-10-23           1            3
   3:      Britney Spears         1999-01-12           1            6
   4:           The Kinks         2014-10-20           1           11
   5:  The Rolling Stones         1969-12-05           1            9
  ---                                                                
9995:          Topic, A7S         2022-06-17           1            1
9996:           Vandalism         2005-10-24           1            1
9997:             Spiller         2000-08-14           1            1
9998:              Tiësto         2023-01-06           1            1
9999:       Kylie Minogue         2023-05-19           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            216270    FALSE          0 QMARG1760056
   2:            237120    FALSE         64 USJAY0900144
   3:            312533    FALSE         56 USJI19910455
   4:            233400    FALSE         42 GB5KW1499822
   5:            448720    FALSE          0 USA176910100
  ---                                                   
9995:            165800    FALSE         79 DECE72201091
9996:            176640    FALSE         17 AUVC00503711
9997:            227619    FALSE         62 GBCPZ0019728
9998:            153442    FALSE         87 NLZ542202348
9999:            166266    FALSE         69 GB5KW2301017
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2020-03-05 09:20:39
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2021-08-08 09:26:31
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
9995: spotify:user:bradnumber1 2023-07-11 10:57:25
9996: spotify:user:bradnumber1 2023-07-16 09:38:19
9997: spotify:user:bradnumber1 2023-07-16 09:39:17
9998: spotify:user:bradnumber1 2023-07-18 22:06:36
9999: spotify:user:bradnumber1 2023-07-25 11:57:02
                                                                                                                artist_genres
                                                                                                                       <char>
   1:                                                                             acid house,ambient house,big beat,hip house
   2:                                                                                             dance pop,miami hip hop,pop
   3:                                                                                                           dance pop,pop
   4: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   5:                                                                           album rock,british invasion,classic rock,rock
  ---                                                                                                                        
9995:                                                    german dance,pop dance,pop edm,uk dance,pop dance,scandipop,uk dance
9996:                                                                                       australian dance,melbourne bounce
9997:                                                                  disco house,vocal house,dance pop,europop,new wave pop
9998:                                                        big room,brostep,dutch edm,edm,house,pop dance,slap house,trance
9999:                                                        australian dance,australian pop,dance pop,eurodance,new wave pop
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.617  0.872     8  -12.305     1      0.0480     0.015800
   2:        0.825  0.743     2   -5.995     1      0.1490     0.014200
   3:        0.677  0.665     7   -5.171     1      0.0305     0.560000
   4:        0.683  0.728     9   -8.920     1      0.2590     0.568000
   5:        0.319  0.627     0   -9.611     1      0.0687     0.675000
  ---                                                                  
9995:        0.623  0.727    11   -5.570     0      0.0562     0.184000
9996:        0.720  0.841     9   -6.373     1      0.0340     0.000354
9997:        0.719  0.806     9   -6.802     0      0.0389     0.000132
9998:        0.534  0.855     1   -4.923     0      0.1830     0.060700
9999:        0.744  0.620     5   -7.930     1      0.2460     0.214000
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         1.12e-01   0.4080   0.504 111.458              4           NA
   2:         2.12e-05   0.2370   0.800 127.045              4           NA
   3:         1.01e-06   0.3380   0.706  74.981              4           NA
   4:         5.08e-05   0.0384   0.833  75.311              4           NA
   5:         7.29e-05   0.2890   0.497  85.818              4           NA
  ---                                                                      
9995:         2.02e-05   0.3090   0.400 125.975              4           NA
9996:         1.12e-02   0.3380   0.767 130.978              4           NA
9997:         8.89e-02   0.3610   0.626 123.037              4           NA
9998:         2.63e-04   0.3460   0.420 122.060              4           NA
9999:         1.16e-03   0.1030   0.711 128.103              4           NA
                                    label
                                   <char>
   1:                 Jams Communications
   2: Mr.305/Polo Grounds Music/J Records
   3:                                Jive
   4:                   Sanctuary Records
   5:               Universal Music Group
  ---                                    
9995:                              Virgin
9996:                             Vicious
9997:                    Defected Records
9998:                     Musical Freedom
9999:                     Liberator Music
                                                                                                                                                                                    copyrights
                                                                                                                                                                                        <char>
   1:                                                                                                                                     C 1992 Copyright Control, P 1992 Jams Communications
   2:                                                                                                                      P (P) 2009 RCA/JIVE Label Group, a unit of Sony Music Entertainment
   3:                                                                                                                                                           P (P) 1999 Zomba Recording LLC
   4:                                                                               C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   5:                                                                                                                 C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
  ---                                                                                                                                                                                         
9995:                                                         C © 2022 Topic, under exclusive license to Universal Music GmbH, P ℗ 2022 Topic, under exclusive license to Universal Music GmbH
9996:                                                                       C 2005 Vicious, a division of Vicious Recordings Pty Ltd, P 2005 Vicious, a division of Vicious Recordings Pty Ltd
9997:                                                                                                                     C © 2021 Defected Records Limited, P ℗ 2021 Defected Records Limited
9998:                                                                                                                 C © 2023 Musical Freedom Label Ltd., P ℗ 2023 Musical Freedom Label Ltd.
9999: C 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited, P 2023 Kylie Minogue/Darenote under exclusive license to BMG Rights Management (UK) Limited
            main_genre duration_sec popularity_scaled is_popular
                <char>        <num>             <num>     <lgcl>
   1: Dance/Electronic      216.270              0.00      FALSE
   2:              Pop      237.120              0.64      FALSE
   3:              Pop      312.533              0.56      FALSE
   4:             Rock      233.400              0.42      FALSE
   5:             Rock      448.720              0.00      FALSE
  ---                                                           
9995:              Pop      165.800              0.79       TRUE
9996: Dance/Electronic      176.640              0.17      FALSE
9997:              Pop      227.619              0.62      FALSE
9998:              Pop      153.442              0.87       TRUE
9999:              Pop      166.266              0.69      FALSE

✅Now you try:

#comment and write your code below:

# Create new columns
spotify_dt[, `:=`(
  is_popular = popularity > 75,
  loudness_energy_ratio = loudness / energy
)]

# Verify new columns
names(spotify_dt)

 [1] "track_name"            "artist_name_s"         "album_name"           
 [4] "album_artist_name_s"   "album_release_date"    "disc_number"          
 [7] "track_number"          "track_duration_ms"     "explicit"             
[10] "popularity"            "isrc"                  "added_by"             
[13] "added_at"              "artist_genres"         "danceability"         
[16] "energy"                "key"                   "loudness"             
[19] "mode"                  "speechiness"           "acousticness"         
[22] "instrumentalness"      "liveness"              "valence"              
[25] "tempo"                 "time_signature"        "album_genres"         
[28] "label"                 "copyrights"            "main_genre"           
[31] "duration_sec"          "popularity_scaled"     "is_popular"           
[34] "loudness_energy_ratio"

#from data.table
spotify_dt[, names(.SD)]

 [1] "track_name"            "artist_name_s"         "album_name"           
 [4] "album_artist_name_s"   "album_release_date"    "disc_number"          
 [7] "track_number"          "track_duration_ms"     "explicit"             
[10] "popularity"            "isrc"                  "added_by"             
[13] "added_at"              "artist_genres"         "danceability"         
[16] "energy"                "key"                   "loudness"             
[19] "mode"                  "speechiness"           "acousticness"         
[22] "instrumentalness"      "liveness"              "valence"              
[25] "tempo"                 "time_signature"        "album_genres"         
[28] "label"                 "copyrights"            "main_genre"           
[31] "duration_sec"          "popularity_scaled"     "is_popular"           
[34] "loudness_energy_ratio"

Working with Dates

Here’s how to handle dates in data.table:

# Load lubridate
library(lubridate)

# Convert and extract date components
spotify_dt <- spotify_dt[, `:=`(
  release_date = as.Date(album_release_date),
  release_year = year(as.Date(album_release_date)),
  release_month = month(as.Date(album_release_date)),
  release_day = day(as.Date(album_release_date))
)]

# View the new date columns
spotify_dt[, .(release_date, release_year, release_month, release_day)][1:5]

   release_date release_year release_month release_day
         <Date>        <num>         <num>       <int>
1:   1992-08-03         1992             8           3
2:   2009-10-23         2009            10          23
3:   1999-01-12         1999             1          12
4:   2014-10-20         2014            10          20
5:   1969-12-05         1969            12           5

class(spotify_dt$album_release_date)

[1] "character"

Ordering Results

Data.table provides efficient ways to sort:

# Sort genres by popularity (descending)
genre_stats[order(-avg_popularity)]

                main_genre avg_popularity track_count avg_duration
                    <char>          <num>       <int>        <num>
 1:       Reggae/Caribbean       48.22222          27     210.8963
 2:                    Pop       40.27116        5694     223.3444
 3:                   Rock       37.57563        2413     233.6300
 4:               R&B/Soul       34.93709         302     227.6889
 5:           Folk/Country       34.65934          91     205.5994
 6:       Dance/Electronic       32.76378         254     246.2988
 7:                Hip Hop       32.50935         214     232.4100
 8:                  Other       26.09831         946     208.5390
 9:             Jazz/Blues       22.24074          54     168.0612
10: Classical/Instrumental       14.75000           4     159.5265

# Get top 5 most popular genres
genre_stats[order(-avg_popularity)][1:5]

         main_genre avg_popularity track_count avg_duration
             <char>          <num>       <int>        <num>
1: Reggae/Caribbean       48.22222          27     210.8963
2:              Pop       40.27116        5694     223.3444
3:             Rock       37.57563        2413     233.6300
4:         R&B/Soul       34.93709         302     227.6889
5:     Folk/Country       34.65934          91     205.5994

year_genre_stats <- spotify_dt[, .(
    avg_popularity = mean(popularity),
    track_count = .N,
    avg_energy = mean(energy)
), by = .(release_year)]

# Sort by multiple columns
year_genre_stats[order(release_year, -avg_popularity)]

    release_year avg_popularity track_count avg_energy
           <num>          <num>       <int>      <num>
 1:         1956       68.00000           1  0.6600000
 2:         1958       55.33333           6  0.5676667
 3:         1959       35.00000           2  0.2385000
 4:         1960       61.00000           2  0.8260000
 5:         1961       24.16667           6  0.6656667
 6:         1962       33.57895          19  0.5368947
 7:         1963       32.13333          45  0.5427844
 8:         1964       40.00000          46  0.6658696
 9:         1965       52.51613          31  0.5777097
10:         1966       33.30233          43  0.6424419
11:         1967       44.56250          48  0.5366667
12:         1968       43.87805          41  0.5942927
13:         1969       48.48889          45  0.5497111
14:         1970       50.62500          56  0.5608393
15:         1971       42.90909          44  0.6505000
16:         1972       42.32500          40  0.5486250
17:         1973       49.07273          55  0.5865455
18:         1974       38.13333          30  0.6115667
19:         1975       43.42500          40  0.6191250
20:         1976       50.14634          41  0.6497317
21:         1977       50.55172          58  0.6022759
22:         1978       48.56604          53  0.6504528
23:         1979       44.45283          53  0.6116604
24:         1980       51.33333          45  0.6395556
25:         1981       51.20000          45  0.6677111
26:         1982       47.80851          47  0.6705745
27:         1983       42.55357          56  0.7012679
28:         1984       48.64815          54  0.6755185
29:         1985       40.46154          65  0.7196308
30:         1986       50.05556          72  0.6955417
31:         1987       36.12844         109  0.6350550
32:         1988       40.35294          85  0.6454235
33:         1989       44.08974          78  0.6550231
34:         1990       42.80247          81  0.6516914
35:         1991       38.70787          89  0.6743933
36:         1992       37.28409          88  0.6474659
37:         1993       38.81609          87  0.6679770
38:         1994       39.17073          82  0.6669024
39:         1995       37.00781         128  0.6582813
40:         1996       37.84211         114  0.6303070
41:         1997       39.68217         129  0.7040884
42:         1998       36.08088         136  0.7423088
43:         1999       43.38400         125  0.7390288
44:         2000       36.28859         149  0.7102148
45:         2001       37.68280         186  0.7078441
46:         2002       37.61765         170  0.6864525
47:         2003       36.37500         176  0.7010170
48:         2004       38.66857         175  0.7082629
49:         2005       30.91250         240  0.7182292
50:         2006       33.20321         187  0.6895882
51:         2007       32.03167         221  0.7046335
52:         2008       32.25574         305  0.7465705
53:         2009       31.95455         374  0.7561578
54:         2010       32.20896         335  0.7757433
55:         2011       33.42559         383  0.7296240
56:         2012       35.45299         351  0.7361425
57:         2013       31.26780         295  0.7019861
58:         2014       32.59850         401  0.6773738
59:         2015       32.12250         400  0.6740375
60:         2016       33.14550         378  0.6490534
61:         2017       36.43391         348  0.6564569
62:         2018       43.81609         261  0.6452682
63:         2019       48.44550         211  0.6600806
64:         2020       49.54688         256  0.6631406
65:         2021       41.00966         207  0.6741739
66:         2022       68.91667          60  0.7245000
67:         2023       76.78788          33  0.6893333
68:           NA       36.23457        1377         NA
    release_year avg_popularity track_count avg_energy

🔍 Practice Exercise:

Using the Spotify dataset, create an analysis that:

Filters for songs after 2010
Calculates average audio features by genre
Identifies the top 3 genres by popularity

# solution here
spotify_dt[year(as.Date(album_release_date)) >= 2010,
          .(avg_popularity = mean(popularity),
            avg_danceability = mean(danceability),
            avg_energy = mean(energy),
            track_count = .N),
          by = main_genre
          ][order(-avg_popularity)
          ][1:3]

         main_genre avg_popularity avg_danceability avg_energy track_count
             <char>          <num>            <num>      <num>       <int>
1: Reggae/Caribbean       43.57143        0.7291429  0.7142857           7
2:              Pop       42.13373        0.6329859  0.6930078        2834
3:          Hip Hop       37.21154        0.7300096  0.7036635         104

Ordering Results

Data.table provides efficient ways to sort:

# Sort genres by popularity (descending)
genre_stats[order(-avg_popularity)]

                main_genre avg_popularity track_count avg_duration
                    <char>          <num>       <int>        <num>
 1:       Reggae/Caribbean       48.22222          27     210.8963
 2:                    Pop       40.27116        5694     223.3444
 3:                   Rock       37.57563        2413     233.6300
 4:               R&B/Soul       34.93709         302     227.6889
 5:           Folk/Country       34.65934          91     205.5994
 6:       Dance/Electronic       32.76378         254     246.2988
 7:                Hip Hop       32.50935         214     232.4100
 8:                  Other       26.09831         946     208.5390
 9:             Jazz/Blues       22.24074          54     168.0612
10: Classical/Instrumental       14.75000           4     159.5265

# Sort by multiple columns
year_genre_stats[order(release_year, -avg_popularity)]

    release_year avg_popularity track_count avg_energy
           <num>          <num>       <int>      <num>
 1:         1956       68.00000           1  0.6600000
 2:         1958       55.33333           6  0.5676667
 3:         1959       35.00000           2  0.2385000
 4:         1960       61.00000           2  0.8260000
 5:         1961       24.16667           6  0.6656667
 6:         1962       33.57895          19  0.5368947
 7:         1963       32.13333          45  0.5427844
 8:         1964       40.00000          46  0.6658696
 9:         1965       52.51613          31  0.5777097
10:         1966       33.30233          43  0.6424419
11:         1967       44.56250          48  0.5366667
12:         1968       43.87805          41  0.5942927
13:         1969       48.48889          45  0.5497111
14:         1970       50.62500          56  0.5608393
15:         1971       42.90909          44  0.6505000
16:         1972       42.32500          40  0.5486250
17:         1973       49.07273          55  0.5865455
18:         1974       38.13333          30  0.6115667
19:         1975       43.42500          40  0.6191250
20:         1976       50.14634          41  0.6497317
21:         1977       50.55172          58  0.6022759
22:         1978       48.56604          53  0.6504528
23:         1979       44.45283          53  0.6116604
24:         1980       51.33333          45  0.6395556
25:         1981       51.20000          45  0.6677111
26:         1982       47.80851          47  0.6705745
27:         1983       42.55357          56  0.7012679
28:         1984       48.64815          54  0.6755185
29:         1985       40.46154          65  0.7196308
30:         1986       50.05556          72  0.6955417
31:         1987       36.12844         109  0.6350550
32:         1988       40.35294          85  0.6454235
33:         1989       44.08974          78  0.6550231
34:         1990       42.80247          81  0.6516914
35:         1991       38.70787          89  0.6743933
36:         1992       37.28409          88  0.6474659
37:         1993       38.81609          87  0.6679770
38:         1994       39.17073          82  0.6669024
39:         1995       37.00781         128  0.6582813
40:         1996       37.84211         114  0.6303070
41:         1997       39.68217         129  0.7040884
42:         1998       36.08088         136  0.7423088
43:         1999       43.38400         125  0.7390288
44:         2000       36.28859         149  0.7102148
45:         2001       37.68280         186  0.7078441
46:         2002       37.61765         170  0.6864525
47:         2003       36.37500         176  0.7010170
48:         2004       38.66857         175  0.7082629
49:         2005       30.91250         240  0.7182292
50:         2006       33.20321         187  0.6895882
51:         2007       32.03167         221  0.7046335
52:         2008       32.25574         305  0.7465705
53:         2009       31.95455         374  0.7561578
54:         2010       32.20896         335  0.7757433
55:         2011       33.42559         383  0.7296240
56:         2012       35.45299         351  0.7361425
57:         2013       31.26780         295  0.7019861
58:         2014       32.59850         401  0.6773738
59:         2015       32.12250         400  0.6740375
60:         2016       33.14550         378  0.6490534
61:         2017       36.43391         348  0.6564569
62:         2018       43.81609         261  0.6452682
63:         2019       48.44550         211  0.6600806
64:         2020       49.54688         256  0.6631406
65:         2021       41.00966         207  0.6741739
66:         2022       68.91667          60  0.7245000
67:         2023       76.78788          33  0.6893333
68:           NA       36.23457        1377         NA
    release_year avg_popularity track_count avg_energy

# Get top 5 most popular genres
genre_stats[order(-avg_popularity)][1:5]

         main_genre avg_popularity track_count avg_duration
             <char>          <num>       <int>        <num>
1: Reggae/Caribbean       48.22222          27     210.8963
2:              Pop       40.27116        5694     223.3444
3:             Rock       37.57563        2413     233.6300
4:         R&B/Soul       34.93709         302     227.6889
5:     Folk/Country       34.65934          91     205.5994

Chaining Operations

Data.table allows you to chain operations efficiently:

# Complex analysis chain
spotify_dt[popularity > 70
          ][, .(avg_energy = mean(energy),
                track_count = .N),
            by = main_genre
          ][order(-avg_energy)]

         main_genre avg_energy track_count
             <char>      <num>       <int>
1: Dance/Electronic  0.7640625          16
2:              Pop  0.6937229        1065
3:             Rock  0.6842514         395
4: Reggae/Caribbean  0.6568333           6
5:          Hip Hop  0.6486471          34
6:         R&B/Soul  0.6468684          38
7:            Other  0.6176429          28
8:     Folk/Country  0.5845833          12

Special Symbols

.N: Counts rows (like n() in tidyverse)
.SD: Subset of data (useful for applying functions to multiple columns)
.(): Create a list (like select() or for multiple aggregations)

# Use .SD for multiple column operations
spotify_dt[, lapply(.SD, mean), 
          .SDcols = c("danceability", "energy", "valence"),
          by = main_genre]

                main_genre danceability    energy   valence
                    <char>        <num>     <num>     <num>
 1:       Dance/Electronic    0.6779331 0.8231969 0.5845252
 2:                    Pop    0.6243319 0.6954419 0.5665183
 3:                   Rock    0.5480435 0.6759753 0.5957726
 4:                Hip Hop    0.7607477 0.7146449 0.6088201
 5:                  Other           NA        NA        NA
 6:               R&B/Soul    0.6557980 0.6104570 0.6947848
 7:             Jazz/Blues    0.5241852 0.5895185 0.7212222
 8:           Folk/Country    0.5683407 0.5402066 0.5439121
 9:       Reggae/Caribbean    0.7564815 0.6411481 0.7690000
10: Classical/Instrumental    0.5580000 0.5570000 0.6500000

Performance Tips (Extra)

set key columns for faster subsetting:

# Set key for faster lookups setkey(spotify_dt, main_genre) # Fast subset using key spotify_dt["Rock"]

# Set key for faster lookups 

setkey(spotify_dt, main_genre)  

# Fast subset using key 

spotify_dt["Rock"]

Key: <main_genre>
                                                   track_name
                                                       <char>
   1:                        Apeman - 2014 Remastered Version
   2:                      You Can't Always Get What You Want
   3:                              Don't Stop - 2004 Remaster
   4: Something About The Way You Look Tonight - Edit Version
   5:                                           Juke Box Hero
  ---                                                        
2409:                                     Blowin' in the Wind
2410:                                           She's My Baby
2411:                      Yellow Submarine - Remastered 2009
2412:                                     When You Were Young
2413:                                     Prisoner of Society
           artist_name_s
                  <char>
   1:          The Kinks
   2: The Rolling Stones
   3:      Fleetwood Mac
   4:         Elton John
   5:          Foreigner
  ---                   
2409:          Bob Dylan
2410:     Johnny O'Keefe
2411:        The Beatles
2412:        The Killers
2413:     The Living End
                                                                  album_name
                                                                      <char>
   1: Lola vs. Powerman and the Moneygoround, Pt. One + Percy (Super Deluxe)
   2:                                                           Let It Bleed
   3:                                                                Rumours
   4:                          Candle In The Wind 1997 / Something About ...
   5:                                                           4 (Expanded)
  ---                                                                       
2409:                                             The Freewheelin' Bob Dylan
2410:                                            The Very Best Of (Standard)
2411:                                                  Revolver (Remastered)
2412:                                                             Sam's Town
2413:                                                         The Living End
      album_artist_name_s album_release_date disc_number track_number
                   <char>             <char>       <int>        <int>
   1:           The Kinks         2014-10-20           1           11
   2:  The Rolling Stones         1969-12-05           1            9
   3:       Fleetwood Mac         1977-02-04           1            4
   4:          Elton John         1997-01-01           1            1
   5:           Foreigner               1981           1            2
  ---                                                                
2409:           Bob Dylan         1963-05-27           1            1
2410:      Johnny O'Keefe         2008-08-23           1            4
2411:         The Beatles         1966-08-05           1            6
2412:         The Killers         2006-10-03           1            3
2413:      The Living End         1999-01-15           1            1
      track_duration_ms explicit popularity         isrc
                  <int>   <lgcl>      <int>       <char>
   1:            233400    FALSE         42 GB5KW1499822
   2:            448720    FALSE          0 USA176910100
   3:            193346    FALSE         79 USWB10400049
   4:            240546    FALSE         61 GBAMS9700013
   5:            259800    FALSE         74 USAT20803006
  ---                                                   
2409:            165426    FALSE         71 USSM19900534
2410:            125226    FALSE         16 AUWA00800460
2411:            158880    FALSE         61 GBAYE0601498
2412:            220426    FALSE          1 USUM70605164
2413:            229066    FALSE         47 AUMPO9800004
                      added_by            added_at
                        <char>              <POSc>
   1: spotify:user:bradnumber1 2021-08-08 09:26:31
   2: spotify:user:bradnumber1 2021-08-08 09:26:31
   3: spotify:user:bradnumber1 2022-08-31 00:08:18
   4: spotify:user:bradnumber1 2021-08-08 09:26:31
   5: spotify:user:bradnumber1 2021-08-08 09:26:31
  ---                                             
2409: spotify:user:bradnumber1 2021-08-08 09:26:31
2410: spotify:user:bradnumber1 2021-08-08 09:26:31
2411: spotify:user:bradnumber1 2021-08-08 09:26:31
2412: spotify:user:bradnumber1 2021-08-08 09:26:31
2413: spotify:user:bradnumber1 2021-08-08 09:26:31
                                                                                                                artist_genres
                                                                                                                       <char>
   1: album rock,art rock,british invasion,classic rock,folk rock,glam rock,protopunk,psychedelic rock,rock,singer-songwriter
   2:                                                                           album rock,british invasion,classic rock,rock
   3:                                                                       album rock,classic rock,rock,soft rock,yacht rock
   4:                                                                                   glam rock,mellow gold,piano rock,rock
   5:                                  album rock,classic rock,glam metal,hard rock,heartland rock,mellow gold,rock,soft rock
  ---                                                                                                                        
2409:                                              classic rock,country rock,folk,folk rock,rock,roots rock,singer-songwriter
2410:                                                                                                         australian rock
2411:                                               beatlesque,british invasion,classic rock,merseybeat,psychedelic rock,rock
2412:                                                             alternative rock,dance rock,modern rock,permanent wave,rock
2413:                                                              australian alternative rock,australian rock,australian ska
      danceability energy   key loudness  mode speechiness acousticness
             <num>  <num> <int>    <num> <int>       <num>        <num>
   1:        0.683 0.7280     9   -8.920     1      0.2590     0.568000
   2:        0.319 0.6270     0   -9.611     1      0.0687     0.675000
   3:        0.671 0.7100     9   -7.724     1      0.0356     0.039300
   4:        0.480 0.6280     6   -7.643     1      0.0262     0.174000
   5:        0.357 0.6530     9   -5.554     1      0.0654     0.082800
  ---                                                                  
2409:        0.380 0.0993     2  -20.567     1      0.0509     0.914000
2410:        0.582 0.7720     2   -5.028     1      0.0332     0.691000
2411:        0.604 0.5490     1   -9.873     1      0.0389     0.531000
2412:        0.467 0.9880    11   -3.313     1      0.1120     0.000152
2413:        0.275 0.9930     9   -5.474     0      0.1380     0.000186
      instrumentalness liveness valence   tempo time_signature album_genres
                 <num>    <num>   <num>   <num>          <int>       <lgcl>
   1:         5.08e-05   0.0384   0.833  75.311              4           NA
   2:         7.29e-05   0.2890   0.497  85.818              4           NA
   3:         1.12e-05   0.0387   0.834 118.745              4           NA
   4:         3.28e-05   0.0753   0.541 143.412              4           NA
   5:         0.00e+00   0.0844   0.522 176.647              4           NA
  ---                                                                      
2409:         2.32e-05   0.0605   0.440 174.874              4           NA
2410:         1.75e-04   0.1240   0.731 131.613              4           NA
2411:         0.00e+00   0.4380   0.696 111.398              4           NA
2412:         4.84e-02   0.2800   0.321 130.435              4           NA
2413:         0.00e+00   0.3020   0.190 148.725              4           NA
                      label
                     <char>
   1:     Sanctuary Records
   2: Universal Music Group
   3:  Rhino/Warner Records
   4:                   EMI
   5:        Rhino Atlantic
  ---                      
2409:              Columbia
2410:          WM Australia
2411:         EMI Catalogue
2412: Universal Music Group
2413:               Reprise
                                                                                                                                       copyrights
                                                                                                                                           <char>
   1:                                  C © 2014 Sanctuary Records Group Ltd., a BMG Company, P ℗ 2014 Sanctuary Records Group Ltd., a BMG Company
   2:                                                                    C © 2002 ABKCO Music & Records Inc., P ℗ 2002 ABKCO Music & Records Inc.
   3:                                                                                  C © 2004 Warner Records Inc., P ℗ 2004 Warner Records Inc.
   4:                                                         C © 1997 Mercury Records Limited, P This Compilation ℗ 1997 Mercury Records Limited
   5: C © 2002 Atlantic Recording Corp., marketed by Rhino Entertainment Company, a Warner Music Group company, P ℗ 2002 Atlantic Recording Corp.
  ---                                                                                                                                            
2409:                                                                                    P Originally Released 1963 Sony Music Entertainment Inc.
2410:                                                    C © 2008 Warner Music Australia Pty Limited, P ℗ 2008 Warner Music Australia Pty Limited
2411:                                   C © 2015 Apple Corps Ltd., P ℗ 2015 Calderstone Productions Limited (a division of Universal Music Group)
2412:                                                            C © 2006 The Island Def Jam Music Group, P ℗ 2006 The Island Def Jam Music Group
2413:                                                                                    C © 1999 Reprise Records, P ℗ 1998, 1999 Reprise Records
      main_genre duration_sec popularity_scaled is_popular
          <char>        <num>             <num>     <lgcl>
   1:       Rock      233.400              0.42      FALSE
   2:       Rock      448.720              0.00      FALSE
   3:       Rock      193.346              0.79       TRUE
   4:       Rock      240.546              0.61      FALSE
   5:       Rock      259.800              0.74      FALSE
  ---                                                     
2409:       Rock      165.426              0.71      FALSE
2410:       Rock      125.226              0.16      FALSE
2411:       Rock      158.880              0.61      FALSE
2412:       Rock      220.426              0.01      FALSE
2413:       Rock      229.066              0.47      FALSE
      loudness_energy_ratio release_date release_year release_month release_day
                      <num>       <Date>        <num>         <num>       <int>
   1:            -12.252747   2014-10-20         2014            10          20
   2:            -15.328549   1969-12-05         1969            12           5
   3:            -10.878873   1977-02-04         1977             2           4
   4:            -12.170382   1997-01-01         1997             1           1
   5:             -8.505360         <NA>           NA            NA          NA
  ---                                                                          
2409:           -207.119839   1963-05-27         1963             5          27
2410:             -6.512953   2008-08-23         2008             8          23
2411:            -17.983607   1966-08-05         1966             8           5
2412:             -3.353239   2006-10-03         2006            10           3
2413:             -5.512588   1999-01-15         1999             1          15

Avoid copies with :=:

# More efficient than creating new objects 
spotify_dt[, efficiency_ratio := energy/danceability]

Use .SDcols for column operations:

# Efficient multiple column operations 
audio_features <- c("danceability", "energy", "valence") 

spotify_dt[, lapply(.SD, mean), .SDcols = audio_features, by = main_genre]

Key: <main_genre>
                main_genre danceability    energy   valence
                    <char>        <num>     <num>     <num>
 1: Classical/Instrumental    0.5580000 0.5570000 0.6500000
 2:       Dance/Electronic    0.6779331 0.8231969 0.5845252
 3:           Folk/Country    0.5683407 0.5402066 0.5439121
 4:                Hip Hop    0.7607477 0.7146449 0.6088201
 5:             Jazz/Blues    0.5241852 0.5895185 0.7212222
 6:                  Other           NA        NA        NA
 7:                    Pop    0.6243319 0.6954419 0.5665183
 8:               R&B/Soul    0.6557980 0.6104570 0.6947848
 9:       Reggae/Caribbean    0.7564815 0.6411481 0.7690000
10:                   Rock    0.5480435 0.6759753 0.5957726

Watch out for these common mistakes:

Forgetting the .() when selecting multiple columns
Using $ instead of := for assignments
Forgetting to add [] at the end when you want to see results

Remember:

tidyverse is great for readability and smaller datasets
data.table shines with big data and performance needs
Both have their place in your R toolkit

🎊🎉 Congratulations you finished week 4 and now on to a mini project analysis using data.table functions.

💡 Mini-Project Guidelines:

Create a comprehensive analysis using data.table that:

Demonstrates at least three different data.table operations Includes performance comparisons with equivalent dplyr operations Creates insightful visualizations from the results

Remember to:

Document your code
Explain your analysis choices
Consider performance implications
Think about scalability

Want more….

Explore more advanced data.table features

Prompt for Chat GPT/Claude/Gemini

You are a data scientist using exploring a large data set using exploratory data analysis principles. Your task is to create code in R using data.table functions to …. {Add what you want it to do}

# Load necessary libraries
library(data.table)  # Efficient data handling
library(ggplot2)     # Visualization

# Read the dataset using fread for efficiency
spotify_dt <- fread("spotify_clean.csv")

# ---- 1. Filtering & Aggregation ----
# Find the top 5 genres by average popularity
top_genres <- spotify_dt[, .(
  avg_popularity = mean(popularity, na.rm = TRUE),  # Calculate average popularity
  track_count = .N  # Count number of songs
), by = main_genre][order(-avg_popularity)][1:5]  # Sort in descending order

print("Top 5 Genres by Popularity:")

[1] "Top 5 Genres by Popularity:"

print(top_genres)

         main_genre avg_popularity track_count
             <char>          <num>       <int>
1: Reggae/Caribbean       48.22222          27
2:              Pop       40.27116        5694
3:             Rock       37.57563        2413
4:         R&B/Soul       34.93709         302
5:     Folk/Country       34.65934          91

# ---- 2. Creating a New Column with := ----
# Add a column scaling popularity from 0 to 1
spotify_dt[, popularity_scaled := popularity / 100]

# View the first 5 rows to confirm the new column
print("First 5 rows with new popularity_scaled column:")

[1] "First 5 rows with new popularity_scaled column:"

print(spotify_dt[1:5, .(track_name, artist_name_s, popularity, popularity_scaled)])

                                track_name      artist_name_s popularity
                                    <char>             <char>      <int>
1: Justified & Ancient - Stand by the Jams            The KLF          0
2:         I Know You Want Me (Calle Ocho)            Pitbull         64
3:      From the Bottom of My Broken Heart     Britney Spears         56
4:        Apeman - 2014 Remastered Version          The Kinks         42
5:      You Can't Always Get What You Want The Rolling Stones          0
   popularity_scaled
               <num>
1:              0.00
2:              0.64
3:              0.56
4:              0.42
5:              0.00

# ---- 3. Grouping & Summarization ----
# Group by genre and calculate mean energy and danceability
genre_audio_stats <- spotify_dt[, .(
  avg_energy = mean(energy, na.rm = TRUE),
  avg_danceability = mean(danceability, na.rm = TRUE),
  track_count = .N
), by = main_genre][order(-avg_energy)]  # Sort by energy level

print("Genre-wise average energy and danceability:")

[1] "Genre-wise average energy and danceability:"

print(genre_audio_stats)

                main_genre avg_energy avg_danceability track_count
                    <char>      <num>            <num>       <int>
 1:       Dance/Electronic  0.8231969        0.6779331         254
 2:                Hip Hop  0.7146449        0.7607477         214
 3:                    Pop  0.6954419        0.6243319        5694
 4:                   Rock  0.6759753        0.5480435        2413
 5:       Reggae/Caribbean  0.6411481        0.7564815          27
 6:                  Other  0.6280373        0.5978034         946
 7:               R&B/Soul  0.6104570        0.6557980         302
 8:             Jazz/Blues  0.5895185        0.5241852          54
 9: Classical/Instrumental  0.5570000        0.5580000           4
10:           Folk/Country  0.5402066        0.5683407          91

# ---- Visualization ----
# Plot average energy vs. danceability per genre
ggplot(genre_audio_stats, aes(x = avg_danceability, y = avg_energy, size = track_count, label = main_genre)) +
  geom_point(alpha = 0.7, color = "blue") +
  geom_text(aes(label = ifelse(track_count > 100, as.character(main_genre), "")), hjust = 1.2, vjust = 1) +
  labs(title = "Energy vs. Danceability Across Genres",
       x = "Average Danceability",
       y = "Average Energy") +
  theme_minimal()