library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(readr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(conflicted)
#Reading the data set and printing first 5rows
data <- read.csv("dataset.csv")
head(data)
conflicted::conflicts_prefer(dplyr::filter)
[conflicted] Will prefer dplyr::filter over any other package.
# Filter dataset where explicit is "True" and sample 10,000 rows
sample_data <- data |> filter(explicit == "True") |> sample_n(9000)
data <- sample_data
nrow(data)
[1] 9000
# Display first few rows
head(data)
library(dplyr)

set.seed(123)  # Ensure reproducibility
# The set.seed(123) function ensures that the random sampling process produces the same results every time the code is run

# Determine sample size (50% of data)
sample_size <- round(nrow(data) * 0.5)

# Create 5 random samples with replacement
df_1 <- data |> sample_n(sample_size, replace = TRUE)
df_2 <- data |> sample_n(sample_size, replace = TRUE)
df_3 <- data |> sample_n(sample_size, replace = TRUE)
df_4 <- data |> sample_n(sample_size, replace = TRUE)
df_5 <- data |> sample_n(sample_size, replace = TRUE)

# Verify
dim(df_1); dim(df_2); dim(df_3); dim(df_4); dim(df_5)
[1] 4500   21
[1] 4500   21
[1] 4500   21
[1] 4500   21
[1] 4500   21
# Verifying whether dataframe-1 is having categorical and continuous variables
str(df_1)
'data.frame':   4500 obs. of  21 variables:
 $ X               : int  18659 85341 59510 33375 18378 112004 60876 71107 15666 39768 ...
 $ track_id        : chr  "1Ep3W7DYe6JxWsWHb3SKL1" "72wdyMXmB7cw11nURyEPKH" "05wAwEhXvp9ftiptQ9xbXl" "3xScUbjCK3Df7yedTWVQA4" ...
 $ artists         : chr  "Mo Mandel" "Oslo Ess" "Vista Versicle" "Slowly Slowly" ...
 $ album_name      : chr  "Negative Reinforcement" "Uleste Bøker Og Utgåtte Sko" "Needles" "Forget You" ...
 $ track_name      : chr  "Pee Pee Treats" "Alt Jeg Trenger" "Needles" "Forget You" ...
 $ popularity      : int  21 40 1 40 23 52 18 58 54 39 ...
 $ duration_ms     : int  149941 258586 200071 200506 278626 210146 198341 164760 156825 192472 ...
 $ explicit        : chr  "True" "True" "True" "True" ...
 $ danceability    : num  0.758 0.489 0.501 0.545 0.564 0.795 0.849 0.542 0.568 0.815 ...
 $ energy          : num  0.642 0.853 0.989 0.904 0.938 0.874 0.705 0.932 0.867 0.45 ...
 $ key             : int  6 9 2 1 5 1 2 4 0 2 ...
 $ loudness        : num  -12.07 -4.92 -1.62 -2.96 -6.82 ...
 $ mode            : int  1 1 1 1 1 1 1 1 1 1 ...
 $ speechiness     : num  0.954 0.0384 0.11 0.0663 0.938 0.0506 0.141 0.05 0.15 0.395 ...
 $ acousticness    : num  0.87 0.0271 0.000715 0.0297 0.781 0.0812 0.0334 0.0747 0.209 0.0303 ...
 $ instrumentalness: num  0.00 0.00 2.81e-06 0.00 0.00 9.06e-02 2.90e-05 0.00 0.00 4.66e-06 ...
 $ liveness        : num  0.773 0.199 0.2 0.064 0.93 0.065 0.305 0.613 0.407 0.344 ...
 $ valence         : num  0.5 0.511 0.113 0.839 0.356 0.2 0.35 0.698 0.441 0.228 ...
 $ tempo           : num  70.4 140.1 90 160 76.8 ...
 $ time_signature  : int  3 4 4 4 4 4 4 4 4 4 ...
 $ track_genre     : chr  "comedy" "punk-rock" "iranian" "emo" ...
sapply(df_1, class)
               X         track_id          artists       album_name       track_name       popularity      duration_ms 
       "integer"      "character"      "character"      "character"      "character"        "integer"        "integer" 
        explicit     danceability           energy              key         loudness             mode      speechiness 
     "character"        "numeric"        "numeric"        "integer"        "numeric"        "integer"        "numeric" 
    acousticness instrumentalness         liveness          valence            tempo   time_signature      track_genre 
       "numeric"        "numeric"        "numeric"        "numeric"        "numeric"        "integer"      "character" 
df_1 <- df_1 |> mutate_if(is.character, as.factor)
sum(sapply(df_1, is.factor))    # Count of categorical variables
[1] 6
sum(sapply(df_1, is.numeric))   # Count of continuous variables
[1] 15
# Checking counts using summarize
df_1 |> 
  summarize(
    num_cats = sum(sapply(df_1, is.factor)), 
    num_nums = sum(sapply(df_1, is.numeric))
  )
library(dplyr)
# To identify categorical variables names
cat_vars <- df_1 |> select(where(is.factor)) |> names()
print(cat_vars)
[1] "track_id"    "artists"     "album_name"  "track_name"  "explicit"    "track_genre"
library(dplyr)
# Extract and print names of continuous (numeric) variables
continuous_vars <- df_1 |> select(where(is.numeric)) |> colnames()
print(continuous_vars)
 [1] "X"                "popularity"       "duration_ms"      "danceability"     "energy"           "key"             
 [7] "loudness"         "mode"             "speechiness"      "acousticness"     "instrumentalness" "liveness"        
[13] "valence"          "tempo"            "time_signature"  
# Grouping df_1 by track_name and computing the mean for all numeric columns
df_1 |> 
  group_by(track_name) |> 
  summarize(across(where(is.numeric), mean, na.rm = TRUE))
Warning: There was 1 warning in `summarize()`.
ℹ In argument: `across(where(is.numeric), mean, na.rm = TRUE)`.
ℹ In group 1: `track_name = "'98 To Piano"`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#df_2

# Load necessary library
library(dplyr)

# Summarize the count of categorical (factor) and numeric variables in df_2
df_2 |> 
  summarize(
    num_cats = sum(sapply(df_2, is.factor)), 
    num_nums = sum(sapply(df_2, is.numeric))
  )
# Extract and print names of categorical variables
cat_vars_2 <- df_2 |> select(where(is.factor)) |> names()
print(cat_vars_2)
character(0)
# Extract and print names of numeric (continuous) variables
continuous_vars_2 <- df_2 |> select(where(is.numeric)) |> colnames()
print(continuous_vars_2)
 [1] "X"                "popularity"       "duration_ms"      "danceability"     "energy"           "key"             
 [7] "loudness"         "mode"             "speechiness"      "acousticness"     "instrumentalness" "liveness"        
[13] "valence"          "tempo"            "time_signature"  
# Group df_2 by track_genre and compute the mean for all numeric columns
df_2 |> 
  group_by(track_genre) |> 
  summarize(across(where(is.numeric), mean, na.rm = TRUE))

#df_3

library(dplyr)

df_3 |> 
  summarize(
    num_cats = sum(sapply(df_3, is.factor)), 
    num_nums = sum(sapply(df_3, is.numeric))
  )
NA
cat_vars_3 <- df_3 |> select(where(is.factor)) |> names()
print(cat_vars_3)
character(0)
continuous_vars_3 <- df_3 |> select(where(is.numeric)) |> colnames()
print(continuous_vars_3)
 [1] "X"                "popularity"       "duration_ms"      "danceability"     "energy"           "key"             
 [7] "loudness"         "mode"             "speechiness"      "acousticness"     "instrumentalness" "liveness"        
[13] "valence"          "tempo"            "time_signature"  

df_3 |> 
  group_by(track_id) |> 
  summarize(across(where(is.numeric), mean, na.rm = TRUE))
NA
library(dplyr)

df_4 |> 
  summarize(
    num_cats = sum(sapply(df_4, is.factor)), 
    num_nums = sum(sapply(df_4, is.numeric))
  )

cat_vars_4 <- df_4 |> select(where(is.factor)) |> names()
print(cat_vars_4)
character(0)
continuous_vars_4 <- df_4 |> select(where(is.numeric)) |> colnames()
print(continuous_vars_4)
 [1] "X"                "popularity"       "duration_ms"      "danceability"     "energy"           "key"             
 [7] "loudness"         "mode"             "speechiness"      "acousticness"     "instrumentalness" "liveness"        
[13] "valence"          "tempo"            "time_signature"  
df_4 |> 
  group_by(artists) |> 
  summarize(across(where(is.numeric), mean, na.rm = TRUE))
NA
library(dplyr)

df_5 |> 
  summarize(
    num_cats = sum(sapply(df_5, is.factor)), 
    num_nums = sum(sapply(df_5, is.numeric))
  )

cat_vars_5 <- df_5 |> select(where(is.factor)) |> names()
print(cat_vars_5)
character(0)
continuous_vars_5 <- df_5 |> select(where(is.numeric)) |> colnames()
print(continuous_vars_5)
 [1] "X"                "popularity"       "duration_ms"      "danceability"     "energy"           "key"             
 [7] "loudness"         "mode"             "speechiness"      "acousticness"     "instrumentalness" "liveness"        
[13] "valence"          "tempo"            "time_signature"  
df_5 |> 
  group_by(album_name) |> 
  summarize(across(where(is.numeric), mean, na.rm = TRUE))
# Summarising using lapply
lapply(list(df_1, df_2, df_3, df_4, df_5), summary)
[[1]]
       X                            track_id                      artists                           album_name  
 Min.   :   446   41oY4WCTj5kccfesTVFnvN:   8   Jhayco;Bad Bunny      :  34   Feliz Cumpleaños con Perreo:  34  
 1st Qu.: 28849   3F3uRFEXzCby4QCWoVmIA5:   7   Jack Harlow           :  31   Halloween con perreito     :  24  
 Median : 48174   0FYD3N8NI3yQ8mKvIMDJol:   6   Daddy Yankee;Bad Bunny:  30   Bad Vibes Forever          :  19  
 Mean   : 51140   19kiEGUN1df3omU4ChyjEX:   6   XXXTENTACION          :  29   Un Verano Sin Ti           :  19  
 3rd Qu.: 71931   5ls7oqk4nnxgPAJmGqZyTw:   6   Feid                  :  27   El perreo es el futuro     :  17  
 Max.   :112976   7LtXmsv58ijM3k2dP6h6fR:   6   Jhayco                :  25   Frescura y Perreo          :  16  
                  (Other)               :4461   (Other)               :4324   (Other)                    :4371  
                  track_name     popularity     duration_ms      explicit     danceability        energy            key        
 CÓMO SE SIENTE - Remix:  34   Min.   : 0.00   Min.   :  32306   True:4500   Min.   :0.0614   Min.   :0.0423   Min.   : 0.000  
 X ÚLTIMA VEZ          :  30   1st Qu.:20.00   1st Qu.: 162990               1st Qu.:0.5300   1st Qu.:0.5827   1st Qu.: 2.000  
 First Class           :  25   Median :36.00   Median : 194266               Median :0.6580   Median :0.7320   Median : 6.000  
 Normal                :  25   Mean   :36.17   Mean   : 206347               Mean   :0.6385   Mean   :0.7218   Mean   : 5.331  
 Pantysito             :  23   3rd Qu.:56.00   3rd Qu.: 232690               3rd Qu.:0.7730   3rd Qu.:0.8820   3rd Qu.: 8.000  
 HISTORY               :  16   Max.   :98.00   Max.   :4246206               Max.   :0.9630   Max.   :0.9990   Max.   :11.000  
 (Other)               :4347                                                                                                   
    loudness            mode         speechiness      acousticness     instrumentalness       liveness         valence      
 Min.   :-24.843   Min.   :0.0000   Min.   :0.0243   Min.   :0.00000   Min.   :0.0000000   Min.   :0.0241   Min.   :0.0224  
 1st Qu.: -7.975   1st Qu.:0.0000   1st Qu.:0.0592   1st Qu.:0.00957   1st Qu.:0.0000000   1st Qu.:0.1040   1st Qu.:0.3040  
 Median : -5.899   Median :1.0000   Median :0.1130   Median :0.09650   Median :0.0000016   Median :0.1490   Median :0.4685  
 Mean   : -6.446   Mean   :0.5878   Mean   :0.1910   Mean   :0.21213   Mean   :0.0524307   Mean   :0.2394   Mean   :0.4705  
 3rd Qu.: -4.245   3rd Qu.:1.0000   3rd Qu.:0.2382   3rd Qu.:0.32650   3rd Qu.:0.0004930   3rd Qu.:0.3190   3rd Qu.:0.6390  
 Max.   :  1.682   Max.   :1.0000   Max.   :0.9650   Max.   :0.97700   Max.   :0.9630000   Max.   :0.9920   Max.   :0.9750  
                                                                                                                            
     tempo        time_signature     track_genre  
 Min.   : 35.39   Min.   :1.000   comedy   : 302  
 1st Qu.: 96.96   1st Qu.:4.000   emo      : 217  
 Median :119.72   Median :4.000   sad      : 214  
 Mean   :121.10   Mean   :3.966   j-dance  : 178  
 3rd Qu.:140.12   3rd Qu.:4.000   funk     : 157  
 Max.   :205.66   Max.   :5.000   dancehall: 147  
                                  (Other)  :3285  

[[2]]
       X            track_id           artists           album_name         track_name          popularity    duration_ms     
 Min.   :    59   Length:4500        Length:4500        Length:4500        Length:4500        Min.   : 0.0   Min.   :  31186  
 1st Qu.: 23701   Class :character   Class :character   Class :character   Class :character   1st Qu.:21.0   1st Qu.: 163545  
 Median : 48113   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median :38.0   Median : 194746  
 Mean   : 50408                                                                               Mean   :37.1   Mean   : 205129  
 3rd Qu.: 71777                                                                               3rd Qu.:56.0   3rd Qu.: 231208  
 Max.   :112983                                                                               Max.   :98.0   Max.   :1482242  
   explicit          danceability        energy            key           loudness            mode         speechiness     
 Length:4500        Min.   :0.0614   Min.   :0.0423   Min.   : 0.00   Min.   :-24.843   Min.   :0.0000   Min.   :0.02440  
 Class :character   1st Qu.:0.5240   1st Qu.:0.5820   1st Qu.: 2.00   1st Qu.: -7.992   1st Qu.:0.0000   1st Qu.:0.05828  
 Mode  :character   Median :0.6580   Median :0.7290   Median : 6.00   Median : -5.923   Median :1.0000   Median :0.11000  
                    Mean   :0.6374   Mean   :0.7196   Mean   : 5.36   Mean   : -6.537   Mean   :0.5842   Mean   :0.19303  
                    3rd Qu.:0.7710   3rd Qu.:0.8790   3rd Qu.: 8.00   3rd Qu.: -4.496   3rd Qu.:1.0000   3rd Qu.:0.25000  
                    Max.   :0.9800   Max.   :1.0000   Max.   :11.00   Max.   :  0.915   Max.   :1.0000   Max.   :0.96500  
  acousticness     instrumentalness       liveness         valence           tempo        time_signature  track_genre       
 Min.   :0.00000   Min.   :0.0000000   Min.   :0.0196   Min.   :0.0215   Min.   : 35.39   Min.   :1.000   Length:4500       
 1st Qu.:0.00941   1st Qu.:0.0000000   1st Qu.:0.1020   1st Qu.:0.2970   1st Qu.: 96.12   1st Qu.:4.000   Class :character  
 Median :0.09620   Median :0.0000013   Median :0.1430   Median :0.4710   Median :119.97   Median :4.000   Mode  :character  
 Mean   :0.21220   Mean   :0.0491172   Mean   :0.2342   Mean   :0.4683   Mean   :121.60   Mean   :3.951                     
 3rd Qu.:0.33200   3rd Qu.:0.0005072   3rd Qu.:0.3120   3rd Qu.:0.6350   3rd Qu.:143.01   3rd Qu.:4.000                     
 Max.   :0.99500   Max.   :0.9710000   Max.   :0.9920   Max.   :0.9890   Max.   :213.78   Max.   :5.000                     

[[3]]
       X            track_id           artists           album_name         track_name          popularity     duration_ms     
 Min.   :   243   Length:4500        Length:4500        Length:4500        Length:4500        Min.   : 0.00   Min.   :  31186  
 1st Qu.: 29386   Class :character   Class :character   Class :character   Class :character   1st Qu.:19.00   1st Qu.: 162592  
 Median : 48520   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median :36.00   Median : 193985  
 Mean   : 51616                                                                               Mean   :35.64   Mean   : 204265  
 3rd Qu.: 72043                                                                               3rd Qu.:56.00   3rd Qu.: 232627  
 Max.   :112983                                                                               Max.   :98.00   Max.   :1482242  
   explicit          danceability        energy            key            loudness            mode         speechiness    
 Length:4500        Min.   :0.0614   Min.   :0.0759   Min.   : 0.000   Min.   :-24.843   Min.   :0.0000   Min.   :0.0243  
 Class :character   1st Qu.:0.5240   1st Qu.:0.5810   1st Qu.: 2.000   1st Qu.: -7.963   1st Qu.:0.0000   1st Qu.:0.0596  
 Mode  :character   Median :0.6570   Median :0.7360   Median : 6.000   Median : -5.921   Median :1.0000   Median :0.1110  
                    Mean   :0.6394   Mean   :0.7227   Mean   : 5.421   Mean   : -6.496   Mean   :0.5716   Mean   :0.1925  
                    3rd Qu.:0.7760   3rd Qu.:0.8840   3rd Qu.: 8.000   3rd Qu.: -4.327   3rd Qu.:1.0000   3rd Qu.:0.2470  
                    Max.   :0.9800   Max.   :1.0000   Max.   :11.000   Max.   :  1.104   Max.   :1.0000   Max.   :0.9650  
  acousticness     instrumentalness       liveness         valence           tempo        time_signature  track_genre       
 Min.   :0.00000   Min.   :0.0000000   Min.   :0.0268   Min.   :0.0215   Min.   : 45.10   Min.   :1.000   Length:4500       
 1st Qu.:0.00876   1st Qu.:0.0000000   1st Qu.:0.1040   1st Qu.:0.3040   1st Qu.: 96.96   1st Qu.:4.000   Class :character  
 Median :0.09965   Median :0.0000013   Median :0.1480   Median :0.4850   Median :119.97   Median :4.000   Mode  :character  
 Mean   :0.21426   Mean   :0.0495060   Mean   :0.2366   Mean   :0.4793   Mean   :121.77   Mean   :3.964                     
 3rd Qu.:0.33125   3rd Qu.:0.0004478   3rd Qu.:0.3160   3rd Qu.:0.6560   3rd Qu.:143.00   3rd Qu.:4.000                     
 Max.   :0.99200   Max.   :0.9710000   Max.   :0.9920   Max.   :0.9700   Max.   :208.95   Max.   :5.000                     

[[4]]
       X            track_id           artists           album_name         track_name          popularity     duration_ms     
 Min.   :   247   Length:4500        Length:4500        Length:4500        Length:4500        Min.   : 0.00   Min.   :  31186  
 1st Qu.: 28760   Class :character   Class :character   Class :character   Class :character   1st Qu.:20.00   1st Qu.: 163611  
 Median : 48608   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median :38.00   Median : 193145  
 Mean   : 51938                                                                               Mean   :36.78   Mean   : 204492  
 3rd Qu.: 72226                                                                               3rd Qu.:56.00   3rd Qu.: 229466  
 Max.   :112983                                                                               Max.   :98.00   Max.   :1482242  
   explicit          danceability        energy            key            loudness            mode         speechiness     
 Length:4500        Min.   :0.0614   Min.   :0.0423   Min.   : 0.000   Min.   :-24.843   Min.   :0.0000   Min.   :0.02420  
 Class :character   1st Qu.:0.5200   1st Qu.:0.5900   1st Qu.: 2.000   1st Qu.: -7.915   1st Qu.:0.0000   1st Qu.:0.05847  
 Mode  :character   Median :0.6535   Median :0.7310   Median : 6.000   Median : -5.907   Median :1.0000   Median :0.11000  
                    Mean   :0.6345   Mean   :0.7229   Mean   : 5.438   Mean   : -6.496   Mean   :0.5802   Mean   :0.19126  
                    3rd Qu.:0.7710   3rd Qu.:0.8830   3rd Qu.: 9.000   3rd Qu.: -4.414   3rd Qu.:1.0000   3rd Qu.:0.24900  
                    Max.   :0.9710   Max.   :1.0000   Max.   :11.000   Max.   :  1.821   Max.   :1.0000   Max.   :0.96500  
  acousticness      instrumentalness       liveness         valence           tempo        time_signature  track_genre       
 Min.   :0.000001   Min.   :0.0000000   Min.   :0.0196   Min.   :0.0215   Min.   : 35.39   Min.   :1.000   Length:4500       
 1st Qu.:0.008735   1st Qu.:0.0000000   1st Qu.:0.1040   1st Qu.:0.2980   1st Qu.: 97.01   1st Qu.:4.000   Class :character  
 Median :0.097700   Median :0.0000016   Median :0.1450   Median :0.4640   Median :120.00   Median :4.000   Mode  :character  
 Mean   :0.211339   Mean   :0.0525719   Mean   :0.2341   Mean   :0.4670   Mean   :122.20   Mean   :3.959                     
 3rd Qu.:0.331000   3rd Qu.:0.0005490   3rd Qu.:0.3140   3rd Qu.:0.6362   3rd Qu.:142.96   3rd Qu.:4.000                     
 Max.   :0.995000   Max.   :0.9950000   Max.   :0.9920   Max.   :0.9890   Max.   :206.76   Max.   :5.000                     

[[5]]
       X            track_id           artists           album_name         track_name          popularity     duration_ms     
 Min.   :    59   Length:4500        Length:4500        Length:4500        Length:4500        Min.   : 0.00   Min.   :  31240  
 1st Qu.: 28288   Class :character   Class :character   Class :character   Class :character   1st Qu.:21.00   1st Qu.: 161989  
 Median : 48168   Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median :38.00   Median : 193309  
 Mean   : 50792                                                                               Mean   :36.92   Mean   : 204436  
 3rd Qu.: 72004                                                                               3rd Qu.:56.00   3rd Qu.: 232616  
 Max.   :112905                                                                               Max.   :98.00   Max.   :1101318  
   explicit          danceability        energy            key            loudness            mode         speechiness    
 Length:4500        Min.   :0.1110   Min.   :0.0678   Min.   : 0.000   Min.   :-24.843   Min.   :0.0000   Min.   :0.0244  
 Class :character   1st Qu.:0.5190   1st Qu.:0.5800   1st Qu.: 2.000   1st Qu.: -7.932   1st Qu.:0.0000   1st Qu.:0.0583  
 Mode  :character   Median :0.6495   Median :0.7275   Median : 6.000   Median : -5.982   Median :1.0000   Median :0.1090  
                    Mean   :0.6322   Mean   :0.7193   Mean   : 5.382   Mean   : -6.491   Mean   :0.5702   Mean   :0.1894  
                    3rd Qu.:0.7660   3rd Qu.:0.8810   3rd Qu.: 8.000   3rd Qu.: -4.473   3rd Qu.:1.0000   3rd Qu.:0.2460  
                    Max.   :0.9660   Max.   :1.0000   Max.   :11.000   Max.   :  1.821   Max.   :1.0000   Max.   :0.9650  
  acousticness      instrumentalness       liveness         valence           tempo        time_signature  track_genre       
 Min.   :0.000001   Min.   :0.0000000   Min.   :0.0197   Min.   :0.0256   Min.   : 35.39   Min.   :1.000   Length:4500       
 1st Qu.:0.008170   1st Qu.:0.0000000   1st Qu.:0.1040   1st Qu.:0.2990   1st Qu.: 96.88   1st Qu.:4.000   Class :character  
 Median :0.094600   Median :0.0000012   Median :0.1480   Median :0.4780   Median :119.93   Median :4.000   Mode  :character  
 Mean   :0.210667   Mean   :0.0504810   Mean   :0.2360   Mean   :0.4739   Mean   :121.93   Mean   :3.958                     
 3rd Qu.:0.323000   3rd Qu.:0.0003340   3rd Qu.:0.3150   3rd Qu.:0.6470   3rd Qu.:143.98   3rd Qu.:4.000                     
 Max.   :0.995000   Max.   :0.9710000   Max.   :0.9920   Max.   :0.9750   Max.   :213.78   Max.   :5.000                     

1. How different are they?

For comparing random sub-samples of tracks,

2. What would you have called an anomaly in one sub-sample that you wouldn’t in another?

For this dataset, anomalies vary across sub-samples like:

3. Are there aspects of the data that are consistent among all sub-samples?

Conclusion

Going in a general way, By doing this kind of investigation on random sample, we have significant effects over the conclusions we draw. If we rely on random subsampling without ensuring that all types of variables—especially categorical ones—are represented, we will not get conclusions based on incomplete or biased samples. For instance, if a subsample omits important categorical variables, we might overlook patterns or relationships that exist between these variables and the outcome of interest.

In the future, this experience suggests that it’s important to: 1. Examine sampling methods carefully to ensure that both categorical and numeric variables are appropriately represented. 2. Use stratified sampling or ensure a balanced representation of categories if we’re working with imbalanced datasets or when certain groups are crucial for the analysis. 3. Consider the context of missing variables when interpreting results—if a certain category is underrepresented, it might skew results or lead to faulty conclusions about the relationships within the data.

By refining the sampling process and ensuring balanced representation, we can draw more robust, unbiased conclusions, making our analyses more reliable.

#Just trying Monte Carlo Simulation 

# Number of simulations
num_simulations <- 1000

# Store results of each simulation
results <- vector("list", num_simulations)

# Simulation process
for (i in 1:num_simulations) {
  # Randomly sample from the dataset
  sampled_data <- df_1[sample(nrow(df_1), size = nrow(df_1), replace = TRUE), ]
  
  # Count categorical and numeric variables in the sampled data
  cat_count <- sum(sapply(sampled_data, is.factor))
  num_count <- sum(sapply(sampled_data, is.numeric))
  
  # Store the results
  results[[i]] <- list(cat_count = cat_count, num_count = num_count)
}

# Convert results into a dataframe for easier analysis
simulation_results <- bind_rows(results)

# Summarize how often categorical variables appear across all simulations
cat_appearance_rate <- mean(simulation_results$cat_count > 0)

# Check the distribution of numeric vs categorical counts
table(simulation_results$cat_count > 0)

TRUE 
1000 
# Print the appearance rate of categorical variables
print(paste("Rate of Categorical Variables in Subsamples:", cat_appearance_rate))
[1] "Rate of Categorical Variables in Subsamples: 1"

Explnation:

  1. TRUE: This indicates that in each of the 1,000 simulations, at least one categorical variable was present in the subsample. So, in every random subsample, there was at least one categorical variable included.

  2. 1000: This is simply showing that we ran 1,000 simulations, which aligns with the number of iterations in the Monte Carlo simulation.

  3. Rate of Categorical Variables in Subsamples: 1: This means that in all 1,000 subsamples, categorical variables are present. The rate of 1 signifies that categorical variables were always included in the subsamples. So, there was no case where categorical variables were completely missing in any of the simulations.

Conclusions:

Next Steps:

I could expand the simulation to track how often different categories appear and explore if some categories are more prone to being sampled over others.

LS0tDQp0aXRsZTogIlJfRGF0YURpdmU0Ig0KYXV0aG9yOiAiRFNKIg0KZGF0ZTogIjIwMjUtMDItMDkiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGNvbmZsaWN0ZWQpDQpgYGANCg0KYGBge3J9DQojUmVhZGluZyB0aGUgZGF0YSBzZXQgYW5kIHByaW50aW5nIGZpcnN0IDVyb3dzDQpkYXRhIDwtIHJlYWQuY3N2KCJkYXRhc2V0LmNzdiIpDQpoZWFkKGRhdGEpDQpgYGANCg0KYGBge3J9DQpjb25mbGljdGVkOjpjb25mbGljdHNfcHJlZmVyKGRwbHlyOjpmaWx0ZXIpDQoNCiMgRmlsdGVyIGRhdGFzZXQgd2hlcmUgZXhwbGljaXQgaXMgIlRydWUiIGFuZCBzYW1wbGUgMTAsMDAwIHJvd3MNCnNhbXBsZV9kYXRhIDwtIGRhdGEgfD4gZmlsdGVyKGV4cGxpY2l0ID09ICJUcnVlIikgfD4gc2FtcGxlX24oOTAwMCkNCmRhdGEgPC0gc2FtcGxlX2RhdGENCm5yb3coZGF0YSkNCg0KIyBEaXNwbGF5IGZpcnN0IGZldyByb3dzDQpoZWFkKGRhdGEpDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KGRwbHlyKQ0KDQpzZXQuc2VlZCgxMjMpICAjIEVuc3VyZSByZXByb2R1Y2liaWxpdHkNCiMgVGhlIHNldC5zZWVkKDEyMykgZnVuY3Rpb24gZW5zdXJlcyB0aGF0IHRoZSByYW5kb20gc2FtcGxpbmcgcHJvY2VzcyBwcm9kdWNlcyB0aGUgc2FtZSByZXN1bHRzIGV2ZXJ5IHRpbWUgdGhlIGNvZGUgaXMgcnVuDQoNCiMgRGV0ZXJtaW5lIHNhbXBsZSBzaXplICg1MCUgb2YgZGF0YSkNCnNhbXBsZV9zaXplIDwtIHJvdW5kKG5yb3coZGF0YSkgKiAwLjUpDQoNCiMgQ3JlYXRlIDUgcmFuZG9tIHNhbXBsZXMgd2l0aCByZXBsYWNlbWVudA0KZGZfMSA8LSBkYXRhIHw+IHNhbXBsZV9uKHNhbXBsZV9zaXplLCByZXBsYWNlID0gVFJVRSkNCmRmXzIgPC0gZGF0YSB8PiBzYW1wbGVfbihzYW1wbGVfc2l6ZSwgcmVwbGFjZSA9IFRSVUUpDQpkZl8zIDwtIGRhdGEgfD4gc2FtcGxlX24oc2FtcGxlX3NpemUsIHJlcGxhY2UgPSBUUlVFKQ0KZGZfNCA8LSBkYXRhIHw+IHNhbXBsZV9uKHNhbXBsZV9zaXplLCByZXBsYWNlID0gVFJVRSkNCmRmXzUgPC0gZGF0YSB8PiBzYW1wbGVfbihzYW1wbGVfc2l6ZSwgcmVwbGFjZSA9IFRSVUUpDQoNCiMgVmVyaWZ5DQpkaW0oZGZfMSk7IGRpbShkZl8yKTsgZGltKGRmXzMpOyBkaW0oZGZfNCk7IGRpbShkZl81KQ0KYGBgDQoNCmBgYHtyfQ0KIyBWZXJpZnlpbmcgd2hldGhlciBkYXRhZnJhbWUtMSBpcyBoYXZpbmcgY2F0ZWdvcmljYWwgYW5kIGNvbnRpbnVvdXMgdmFyaWFibGVzDQpzdHIoZGZfMSkNCnNhcHBseShkZl8xLCBjbGFzcykNCmRmXzEgPC0gZGZfMSB8PiBtdXRhdGVfaWYoaXMuY2hhcmFjdGVyLCBhcy5mYWN0b3IpDQpzdW0oc2FwcGx5KGRmXzEsIGlzLmZhY3RvcikpICAgICMgQ291bnQgb2YgY2F0ZWdvcmljYWwgdmFyaWFibGVzDQpzdW0oc2FwcGx5KGRmXzEsIGlzLm51bWVyaWMpKSAgICMgQ291bnQgb2YgY29udGludW91cyB2YXJpYWJsZXMNCmBgYA0KYGBge3J9DQojIENoZWNraW5nIGNvdW50cyB1c2luZyBzdW1tYXJpemUNCmRmXzEgfD4gDQogIHN1bW1hcml6ZSgNCiAgICBudW1fY2F0cyA9IHN1bShzYXBwbHkoZGZfMSwgaXMuZmFjdG9yKSksIA0KICAgIG51bV9udW1zID0gc3VtKHNhcHBseShkZl8xLCBpcy5udW1lcmljKSkNCiAgKQ0KYGBgDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQojIFRvIGlkZW50aWZ5IGNhdGVnb3JpY2FsIHZhcmlhYmxlcyBuYW1lcw0KY2F0X3ZhcnMgPC0gZGZfMSB8PiBzZWxlY3Qod2hlcmUoaXMuZmFjdG9yKSkgfD4gbmFtZXMoKQ0KcHJpbnQoY2F0X3ZhcnMpDQpgYGANCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCiMgRXh0cmFjdCBhbmQgcHJpbnQgbmFtZXMgb2YgY29udGludW91cyAobnVtZXJpYykgdmFyaWFibGVzDQpjb250aW51b3VzX3ZhcnMgPC0gZGZfMSB8PiBzZWxlY3Qod2hlcmUoaXMubnVtZXJpYykpIHw+IGNvbG5hbWVzKCkNCnByaW50KGNvbnRpbnVvdXNfdmFycykNCmBgYA0KDQpgYGB7cn0NCiMgR3JvdXBpbmcgZGZfMSBieSB0cmFja19uYW1lIGFuZCBjb21wdXRpbmcgdGhlIG1lYW4gZm9yIGFsbCBudW1lcmljIGNvbHVtbnMNCmRmXzEgfD4gDQogIGdyb3VwX2J5KHRyYWNrX25hbWUpIHw+IA0KICBzdW1tYXJpemUoYWNyb3NzKHdoZXJlKGlzLm51bWVyaWMpLCBtZWFuLCBuYS5ybSA9IFRSVUUpKQ0KYGBgDQoNCmBgYHtyfQ0KI2RmXzINCg0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJ5DQpsaWJyYXJ5KGRwbHlyKQ0KDQojIFN1bW1hcml6ZSB0aGUgY291bnQgb2YgY2F0ZWdvcmljYWwgKGZhY3RvcikgYW5kIG51bWVyaWMgdmFyaWFibGVzIGluIGRmXzINCmRmXzIgfD4gDQogIHN1bW1hcml6ZSgNCiAgICBudW1fY2F0cyA9IHN1bShzYXBwbHkoZGZfMiwgaXMuZmFjdG9yKSksIA0KICAgIG51bV9udW1zID0gc3VtKHNhcHBseShkZl8yLCBpcy5udW1lcmljKSkNCiAgKQ0KYGBgDQoNCmBgYHtyfQ0KIyBFeHRyYWN0IGFuZCBwcmludCBuYW1lcyBvZiBjYXRlZ29yaWNhbCB2YXJpYWJsZXMNCmNhdF92YXJzXzIgPC0gZGZfMiB8PiBzZWxlY3Qod2hlcmUoaXMuZmFjdG9yKSkgfD4gbmFtZXMoKQ0KcHJpbnQoY2F0X3ZhcnNfMikNCmBgYA0KDQpgYGB7cn0NCiMgRXh0cmFjdCBhbmQgcHJpbnQgbmFtZXMgb2YgbnVtZXJpYyAoY29udGludW91cykgdmFyaWFibGVzDQpjb250aW51b3VzX3ZhcnNfMiA8LSBkZl8yIHw+IHNlbGVjdCh3aGVyZShpcy5udW1lcmljKSkgfD4gY29sbmFtZXMoKQ0KcHJpbnQoY29udGludW91c192YXJzXzIpDQpgYGANCg0KYGBge3J9DQojIEdyb3VwIGRmXzIgYnkgdHJhY2tfZ2VucmUgYW5kIGNvbXB1dGUgdGhlIG1lYW4gZm9yIGFsbCBudW1lcmljIGNvbHVtbnMNCmRmXzIgfD4gDQogIGdyb3VwX2J5KHRyYWNrX2dlbnJlKSB8PiANCiAgc3VtbWFyaXplKGFjcm9zcyh3aGVyZShpcy5udW1lcmljKSwgbWVhbiwgbmEucm0gPSBUUlVFKSkNCmBgYA0KYGBge3J9DQoNCiNkZl8zDQoNCmxpYnJhcnkoZHBseXIpDQoNCmRmXzMgfD4gDQogIHN1bW1hcml6ZSgNCiAgICBudW1fY2F0cyA9IHN1bShzYXBwbHkoZGZfMywgaXMuZmFjdG9yKSksIA0KICAgIG51bV9udW1zID0gc3VtKHNhcHBseShkZl8zLCBpcy5udW1lcmljKSkNCiAgKQ0KDQpgYGANCmBgYHtyfQ0KY2F0X3ZhcnNfMyA8LSBkZl8zIHw+IHNlbGVjdCh3aGVyZShpcy5mYWN0b3IpKSB8PiBuYW1lcygpDQpwcmludChjYXRfdmFyc18zKQ0KYGBgDQoNCmBgYHtyfQ0KY29udGludW91c192YXJzXzMgPC0gZGZfMyB8PiBzZWxlY3Qod2hlcmUoaXMubnVtZXJpYykpIHw+IGNvbG5hbWVzKCkNCnByaW50KGNvbnRpbnVvdXNfdmFyc18zKQ0KYGBgDQoNCmBgYHtyfQ0KDQpkZl8zIHw+IA0KICBncm91cF9ieSh0cmFja19pZCkgfD4gDQogIHN1bW1hcml6ZShhY3Jvc3Mod2hlcmUoaXMubnVtZXJpYyksIG1lYW4sIG5hLnJtID0gVFJVRSkpDQoNCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQoNCmRmXzQgfD4gDQogIHN1bW1hcml6ZSgNCiAgICBudW1fY2F0cyA9IHN1bShzYXBwbHkoZGZfNCwgaXMuZmFjdG9yKSksIA0KICAgIG51bV9udW1zID0gc3VtKHNhcHBseShkZl80LCBpcy5udW1lcmljKSkNCiAgKQ0KDQpjYXRfdmFyc180IDwtIGRmXzQgfD4gc2VsZWN0KHdoZXJlKGlzLmZhY3RvcikpIHw+IG5hbWVzKCkNCnByaW50KGNhdF92YXJzXzQpDQoNCmNvbnRpbnVvdXNfdmFyc180IDwtIGRmXzQgfD4gc2VsZWN0KHdoZXJlKGlzLm51bWVyaWMpKSB8PiBjb2xuYW1lcygpDQpwcmludChjb250aW51b3VzX3ZhcnNfNCkNCg0KZGZfNCB8PiANCiAgZ3JvdXBfYnkoYXJ0aXN0cykgfD4gDQogIHN1bW1hcml6ZShhY3Jvc3Mod2hlcmUoaXMubnVtZXJpYyksIG1lYW4sIG5hLnJtID0gVFJVRSkpDQoNCmBgYA0KYGBge3J9DQpsaWJyYXJ5KGRwbHlyKQ0KDQpkZl81IHw+IA0KICBzdW1tYXJpemUoDQogICAgbnVtX2NhdHMgPSBzdW0oc2FwcGx5KGRmXzUsIGlzLmZhY3RvcikpLCANCiAgICBudW1fbnVtcyA9IHN1bShzYXBwbHkoZGZfNSwgaXMubnVtZXJpYykpDQogICkNCg0KY2F0X3ZhcnNfNSA8LSBkZl81IHw+IHNlbGVjdCh3aGVyZShpcy5mYWN0b3IpKSB8PiBuYW1lcygpDQpwcmludChjYXRfdmFyc181KQ0KDQpjb250aW51b3VzX3ZhcnNfNSA8LSBkZl81IHw+IHNlbGVjdCh3aGVyZShpcy5udW1lcmljKSkgfD4gY29sbmFtZXMoKQ0KcHJpbnQoY29udGludW91c192YXJzXzUpDQoNCmRmXzUgfD4gDQogIGdyb3VwX2J5KGFsYnVtX25hbWUpIHw+IA0KICBzdW1tYXJpemUoYWNyb3NzKHdoZXJlKGlzLm51bWVyaWMpLCBtZWFuLCBuYS5ybSA9IFRSVUUpKQ0KYGBgDQpgYGB7cn0NCiMgU3VtbWFyaXNpbmcgdXNpbmcgbGFwcGx5DQpsYXBwbHkobGlzdChkZl8xLCBkZl8yLCBkZl8zLCBkZl80LCBkZl81KSwgc3VtbWFyeSkNCmBgYA0KDQoNCg0KIyMjIDEuICoqSG93IGRpZmZlcmVudCBhcmUgdGhleT8qKg0KICAgRm9yIGNvbXBhcmluZyAqKnJhbmRvbSBzdWItc2FtcGxlcyoqIG9mIHRyYWNrcywNCiAgIA0KICAgLSAqKlBvcHVsYXJpdHk6KiogVHJhY2tzIHdpdGggaGlnaGVyIHBvcHVsYXJpdHkgKHBvcHVsYXJpdHkgc2NvcmUgb2YgODAgb3IgbW9yZSkgYXJlIGhhdmluZyBoaWdoZXIgKiplbmVyZ3kqKiwgKipkYW5jZWFiaWxpdHkqKiwgYW5kICoqbG91ZG5lc3MqKiB2YWx1ZXMuIEluIGNvbnRyYXN0LCBsZXNzIHBvcHVsYXIgdHJhY2tzIGFyZSBoYXZpbmcgbG93ZXIgdmFsdWVzIGluIHRoZXNlIGZlYXR1cmVzLg0KICAgDQogICAtICoqR2VucmUtQmFzZWQgRGlmZmVyZW5jZXM6KiogQWNvdXN0aWMgdHJhY2tzIGhhdmUgbG93ZXIgKiplbmVyZ3kqKiBhbmQgKipkYW5jZWFiaWxpdHkqKiwgd2l0aCBoaWdoZXIgKiphY291c3RpY25lc3MqKiBhbmQgKippbnN0cnVtZW50YWxuZXNzKiogY29tcGFyZWQgdG8gZWxlY3Ryb25pYyBvciBwb3AgZ2VucmVzLg0KICAgDQoNCiMjIyAyLiAqKldoYXQgd291bGQgeW91IGhhdmUgY2FsbGVkIGFuIGFub21hbHkgaW4gb25lIHN1Yi1zYW1wbGUgdGhhdCB5b3Ugd291bGRuJ3QgaW4gYW5vdGhlcj8qKg0KICAgRm9yIHRoaXMgZGF0YXNldCwgYW5vbWFsaWVzIHZhcnkgYWNyb3NzIHN1Yi1zYW1wbGVzIGxpa2U6DQogICANCiAgIC0gQSB0cmFjayB3aXRoIGEgcG9wdWxhcml0eSBzY29yZSBvZiA1MCBvciBiZWxvdyBpcyBhbiBhbm9tYWx5IGluIGEgc3ViLXNhbXBsZSBvZiB0cmFja3Mgd2l0aCBwb3B1bGFyaXR5IG1vc3RseSBhYm92ZSA3MC4gQ29udmVyc2VseSwgaW4gYSBzdWItc2FtcGxlIHdpdGggbW9zdGx5IGxvdy1wb3B1bGFyaXR5IHRyYWNrcywgYSB0cmFjayB3aXRoIHBvcHVsYXJpdHkgb3ZlciA3MCBpcyB1bnVzdWFsLg0KICAgDQogICAtIEEgdHJhY2sgd2l0aCBleGNlcHRpb25hbGx5IGxvdyBlbmVyZ3kgKGUuZy4sIGJlbG93IDAuMikgaXMgYW4gYW5vbWFseSBpbiBhIHN1Yi1zYW1wbGUgZm9jdXNlZCBvbiBlbmVyZ2V0aWMgZ2VucmVzIGxpa2UgcG9wIG9yIEVETSwgYnV0IG5vcm1hbCBpbiBhIHN1Yi1zYW1wbGUgb2YgY2xhc3NpY2FsIG9yIGFjb3VzdGljIG11c2ljLg0KICAgDQogICAtIEluIER1cmF0aW9uIChtcyk6IEEgdHJhY2sgdGhhdCBpcyBzaWduaWZpY2FudGx5IGxvbmdlciAoZS5nLiwgb3ZlciA3IG1pbnV0ZXMpIGlzIGFuIGFub21hbHkgaW4gYSB0eXBpY2FsIDMtNCBtaW51dGUgcG9wL3JvY2sgc3ViLXNhbXBsZSBidXQgbm90IHVudXN1YWwgaW4gYSBjbGFzc2ljYWwgb3IgYW1iaWVudCBzdWItc2FtcGxlLg0KDQojIyMgMy4gKipBcmUgdGhlcmUgYXNwZWN0cyBvZiB0aGUgZGF0YSB0aGF0IGFyZSBjb25zaXN0ZW50IGFtb25nIGFsbCBzdWItc2FtcGxlcz8qKg0KICAgLSAqKkNvbW1vbiBGZWF0dXJlcyBBY3Jvc3MgU3ViLXNhbXBsZXM6KioNCiAgICAgLSBGcm9tIHRoZSBkZnMsICoqVGVtcG8qKiBpcyBmYWlybHkgY29uc2lzdGVudCBhY3Jvc3MgbW9zdCB0cmFja3MsIHRob3VnaCBjZXJ0YWluIGdlbnJlcyBsaWtlIGVsZWN0cm9uaWMgb3IgZGFuY2UgYXJlIGhhdmluZyBoaWdoZXIgdGVtcG9zLiBJbiBhbGwgc3ViLXNhbXBsZXMsIHRoZXJlIGlzIGxpa2VseSB0byBiZSBhIHdpZGUgcmFuZ2Ugb2YgKip2YWxlbmNlKiogKG1vb2QpLCAqKmRhbmNlYWJpbGl0eSoqLCBhbmQgKipsb3VkbmVzcyoqLCByZWZsZWN0aW5nIHRoZSBkaXZlcnNpdHkgaW4gbXVzaWMgc3R5bGVzLg0KICAgICAtICoqS2V5KiogYW5kICoqbW9kZSoqIHZhbHVlcyAobXVzaWNhbCBrZXkgYW5kIHNjYWxlKSBhcmUgcmVsYXRpdmVseSBjb25zaXN0ZW50IHdpdGhpbiBjZXJ0YWluIGdlbnJlcyAoZS5nLiwgcG9wIHRyYWNrcyB3aWxsIGxpa2VseSBoYXZlIG1vcmUgY29tbW9uIG1vZGVzIGFuZCBrZXlzKS4NCiAgICAgLSAqKlRpbWUgc2lnbmF0dXJlKiogc3RheWluZyB0aGUgc2FtZSAobW9zdGx5IDQvNCB0aW1lKSBhY3Jvc3MgYSBsYXJnZSBudW1iZXIgb2YgdHJhY2tzLCB0aG91Z2ggZ2VucmVzIGxpa2UgamF6eiBvciBjbGFzc2ljYWwgbWlnaHQgZmVhdHVyZSBhIGJyb2FkZXIgcmFuZ2Ugb2Ygc2lnbmF0dXJlcy4NCiAgICAgDQogICAgIA0KDQojIyMgKipDb25jbHVzaW9uKioNCkdvaW5nIGluIGEgZ2VuZXJhbCB3YXksDQpCeSBkb2luZyB0aGlzIGtpbmQgb2YgaW52ZXN0aWdhdGlvbiBvbiByYW5kb20gc2FtcGxlLCB3ZSBoYXZlIHNpZ25pZmljYW50IGVmZmVjdHMgb3ZlciB0aGUgY29uY2x1c2lvbnMgd2UgZHJhdy4gSWYgd2UgcmVseSBvbiByYW5kb20gc3Vic2FtcGxpbmcgd2l0aG91dCBlbnN1cmluZyB0aGF0IGFsbCB0eXBlcyBvZiB2YXJpYWJsZXPigJRlc3BlY2lhbGx5IGNhdGVnb3JpY2FsIG9uZXPigJRhcmUgcmVwcmVzZW50ZWQsIHdlIHdpbGwgbm90IGdldCBjb25jbHVzaW9ucyBiYXNlZCBvbiBpbmNvbXBsZXRlIG9yIGJpYXNlZCBzYW1wbGVzLiBGb3IgaW5zdGFuY2UsIGlmIGEgc3Vic2FtcGxlIG9taXRzIGltcG9ydGFudCBjYXRlZ29yaWNhbCB2YXJpYWJsZXMsIHdlIG1pZ2h0IG92ZXJsb29rIHBhdHRlcm5zIG9yIHJlbGF0aW9uc2hpcHMgdGhhdCBleGlzdCBiZXR3ZWVuIHRoZXNlIHZhcmlhYmxlcyBhbmQgdGhlIG91dGNvbWUgb2YgaW50ZXJlc3QuDQoNCkluIHRoZSBmdXR1cmUsIHRoaXMgZXhwZXJpZW5jZSBzdWdnZXN0cyB0aGF0IGl0J3MgaW1wb3J0YW50IHRvOg0KMS4gKipFeGFtaW5lIHNhbXBsaW5nIG1ldGhvZHMqKiBjYXJlZnVsbHkgdG8gZW5zdXJlIHRoYXQgYm90aCBjYXRlZ29yaWNhbCBhbmQgbnVtZXJpYyB2YXJpYWJsZXMgYXJlIGFwcHJvcHJpYXRlbHkgcmVwcmVzZW50ZWQuDQoyLiAqKlVzZSBzdHJhdGlmaWVkIHNhbXBsaW5nKiogb3IgZW5zdXJlIGEgYmFsYW5jZWQgcmVwcmVzZW50YXRpb24gb2YgY2F0ZWdvcmllcyBpZiB3ZSdyZSB3b3JraW5nIHdpdGggaW1iYWxhbmNlZCBkYXRhc2V0cyBvciB3aGVuIGNlcnRhaW4gZ3JvdXBzIGFyZSBjcnVjaWFsIGZvciB0aGUgYW5hbHlzaXMuDQozLiAqKkNvbnNpZGVyIHRoZSBjb250ZXh0IG9mIG1pc3NpbmcgdmFyaWFibGVzKiogd2hlbiBpbnRlcnByZXRpbmcgcmVzdWx0c+KAlGlmIGEgY2VydGFpbiBjYXRlZ29yeSBpcyB1bmRlcnJlcHJlc2VudGVkLCBpdCBtaWdodCBza2V3IHJlc3VsdHMgb3IgbGVhZCB0byBmYXVsdHkgY29uY2x1c2lvbnMgYWJvdXQgdGhlIHJlbGF0aW9uc2hpcHMgd2l0aGluIHRoZSBkYXRhLg0KDQpCeSByZWZpbmluZyB0aGUgc2FtcGxpbmcgcHJvY2VzcyBhbmQgZW5zdXJpbmcgYmFsYW5jZWQgcmVwcmVzZW50YXRpb24sIHdlIGNhbiBkcmF3IG1vcmUgcm9idXN0LCB1bmJpYXNlZCBjb25jbHVzaW9ucywgbWFraW5nIG91ciBhbmFseXNlcyBtb3JlIHJlbGlhYmxlLg0KICAgICANCmBgYHtyfQ0KI0p1c3QgdHJ5aW5nIE1vbnRlIENhcmxvIFNpbXVsYXRpb24gDQoNCiMgTnVtYmVyIG9mIHNpbXVsYXRpb25zDQpudW1fc2ltdWxhdGlvbnMgPC0gMTAwMA0KDQojIFN0b3JlIHJlc3VsdHMgb2YgZWFjaCBzaW11bGF0aW9uDQpyZXN1bHRzIDwtIHZlY3RvcigibGlzdCIsIG51bV9zaW11bGF0aW9ucykNCg0KIyBTaW11bGF0aW9uIHByb2Nlc3MNCmZvciAoaSBpbiAxOm51bV9zaW11bGF0aW9ucykgew0KICAjIFJhbmRvbWx5IHNhbXBsZSBmcm9tIHRoZSBkYXRhc2V0DQogIHNhbXBsZWRfZGF0YSA8LSBkZl8xW3NhbXBsZShucm93KGRmXzEpLCBzaXplID0gbnJvdyhkZl8xKSwgcmVwbGFjZSA9IFRSVUUpLCBdDQogIA0KICAjIENvdW50IGNhdGVnb3JpY2FsIGFuZCBudW1lcmljIHZhcmlhYmxlcyBpbiB0aGUgc2FtcGxlZCBkYXRhDQogIGNhdF9jb3VudCA8LSBzdW0oc2FwcGx5KHNhbXBsZWRfZGF0YSwgaXMuZmFjdG9yKSkNCiAgbnVtX2NvdW50IDwtIHN1bShzYXBwbHkoc2FtcGxlZF9kYXRhLCBpcy5udW1lcmljKSkNCiAgDQogICMgU3RvcmUgdGhlIHJlc3VsdHMNCiAgcmVzdWx0c1tbaV1dIDwtIGxpc3QoY2F0X2NvdW50ID0gY2F0X2NvdW50LCBudW1fY291bnQgPSBudW1fY291bnQpDQp9DQoNCiMgQ29udmVydCByZXN1bHRzIGludG8gYSBkYXRhZnJhbWUgZm9yIGVhc2llciBhbmFseXNpcw0Kc2ltdWxhdGlvbl9yZXN1bHRzIDwtIGJpbmRfcm93cyhyZXN1bHRzKQ0KDQojIFN1bW1hcml6ZSBob3cgb2Z0ZW4gY2F0ZWdvcmljYWwgdmFyaWFibGVzIGFwcGVhciBhY3Jvc3MgYWxsIHNpbXVsYXRpb25zDQpjYXRfYXBwZWFyYW5jZV9yYXRlIDwtIG1lYW4oc2ltdWxhdGlvbl9yZXN1bHRzJGNhdF9jb3VudCA+IDApDQoNCiMgQ2hlY2sgdGhlIGRpc3RyaWJ1dGlvbiBvZiBudW1lcmljIHZzIGNhdGVnb3JpY2FsIGNvdW50cw0KdGFibGUoc2ltdWxhdGlvbl9yZXN1bHRzJGNhdF9jb3VudCA+IDApDQoNCiMgUHJpbnQgdGhlIGFwcGVhcmFuY2UgcmF0ZSBvZiBjYXRlZ29yaWNhbCB2YXJpYWJsZXMNCnByaW50KHBhc3RlKCJSYXRlIG9mIENhdGVnb3JpY2FsIFZhcmlhYmxlcyBpbiBTdWJzYW1wbGVzOiIsIGNhdF9hcHBlYXJhbmNlX3JhdGUpKQ0KDQpgYGANCiMjIyBFeHBsbmF0aW9uOg0KDQoxLiAqKlRSVUUqKjogVGhpcyBpbmRpY2F0ZXMgdGhhdCBpbiBlYWNoIG9mIHRoZSAxLDAwMCBzaW11bGF0aW9ucywgKiphdCBsZWFzdCBvbmUgY2F0ZWdvcmljYWwgdmFyaWFibGUqKiB3YXMgcHJlc2VudCBpbiB0aGUgc3Vic2FtcGxlLiBTbywgaW4gZXZlcnkgcmFuZG9tIHN1YnNhbXBsZSwgdGhlcmUgd2FzIGF0IGxlYXN0IG9uZSBjYXRlZ29yaWNhbCB2YXJpYWJsZSBpbmNsdWRlZC4NCg0KMi4gKioxMDAwKio6IFRoaXMgaXMgc2ltcGx5IHNob3dpbmcgdGhhdCB3ZSByYW4gMSwwMDAgc2ltdWxhdGlvbnMsIHdoaWNoIGFsaWducyB3aXRoIHRoZSBudW1iZXIgb2YgaXRlcmF0aW9ucyBpbiB0aGUgTW9udGUgQ2FybG8gc2ltdWxhdGlvbi4NCg0KMy4gKipSYXRlIG9mIENhdGVnb3JpY2FsIFZhcmlhYmxlcyBpbiBTdWJzYW1wbGVzOiAxKio6IFRoaXMgbWVhbnMgdGhhdCBpbiBhbGwgMSwwMDAgc3Vic2FtcGxlcywgY2F0ZWdvcmljYWwgdmFyaWFibGVzIGFyZSBwcmVzZW50LiBUaGUgcmF0ZSBvZiAqKjEqKiBzaWduaWZpZXMgdGhhdCBjYXRlZ29yaWNhbCB2YXJpYWJsZXMgd2VyZSBhbHdheXMgaW5jbHVkZWQgaW4gdGhlIHN1YnNhbXBsZXMuIFNvLCB0aGVyZSB3YXMgbm8gY2FzZSB3aGVyZSBjYXRlZ29yaWNhbCB2YXJpYWJsZXMgd2VyZSBjb21wbGV0ZWx5IG1pc3NpbmcgaW4gYW55IG9mIHRoZSBzaW11bGF0aW9ucy4NCg0KDQojIyMgQ29uY2x1c2lvbnM6DQotICoqQ2F0ZWdvcmljYWwgVmFyaWFibGVzIGFyZSBBbHdheXMgUmVwcmVzZW50ZWQqKjogVGhlIHN1YnNhbXBsaW5nIHN0cmF0ZWd5LCBhcyBzZXQgdXAsIGFsd2F5cyBpbmNsdWRlcyBjYXRlZ29yaWNhbCB2YXJpYWJsZXMuIFRoaXMgc3VnZ2VzdHMgdGhhdCwgdW5kZXIgdGhlIGNvbmRpdGlvbnMgb2YgeW91ciBzaW11bGF0aW9uIChzYW1wbGUgc2l6ZSwgcmVwbGFjZW1lbnQpLCBvdXIgcmFuZG9tIHNhbXBsaW5nIHByb2NlZHVyZSBjb25zaXN0ZW50bHkgaW5jbHVkZXMgY2F0ZWdvcmljYWwgdmFyaWFibGVzLg0KICANCi0gKipObyBNaXNzaW5nIENhdGVnb3JpZXMqKjogVGhlIGZhY3QgdGhhdCB0aGUgcmF0ZSBpcyAxIGltcGxpZXMgdGhhdCBpbiBvdXIgc3Vic2FtcGxpbmcgYXBwcm9hY2gsIGNhdGVnb3JpY2FsIHZhcmlhYmxlcyBhcmUgYWx3YXlzIHByZXNlbnQsIHdoaWNoIGlzIGEgcG9zaXRpdmUgZmluZGluZy4gSXQgbWVhbnMgd2UgZG9uJ3QgbmVlZCB0byB3b3JyeSBhYm91dCBtaXNzaW5nIG91dCBvbiBjYXRlZ29yaWNhbCB2YXJpYWJsZXMgdW5kZXIgdGhlIGN1cnJlbnQgc2V0dXAuDQoNCg0KDQojIyMgTmV4dCBTdGVwczoNCkkgY291bGQgZXhwYW5kIHRoZSBzaW11bGF0aW9uIHRvIHRyYWNrIGhvdyBvZnRlbiBkaWZmZXJlbnQgY2F0ZWdvcmllcyBhcHBlYXIgYW5kIGV4cGxvcmUgaWYgc29tZSBjYXRlZ29yaWVzIGFyZSBtb3JlIHByb25lIHRvIGJlaW5nIHNhbXBsZWQgb3ZlciBvdGhlcnMu