PE-LA1-FINALE

Author

ANKITHA-SANJANA

EXPLORATORY DATA ANALYSIS : LA1

TEAM NAME: DEADLINE SURVIVORS

TEAM NUMBER: 32

TEAM MEMBERS:

ANKTHA KUMARI 1NT23IS027 SECTION A

SANJANA S SONDUR 1NT23IS193 SECTION D

library(tidyverse)   # Load core data manipulation and visualization packages
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)       # Read CSV and other data files
library(dplyr)       # Data manipulation (filter, select, mutate, etc.)
library(ggplot2)     # Data visualization
library(skimr)       # Summary statistics of dataset
library(janitor)     # Data cleaning and formatting tools

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
data <- read_csv("UCI_Credit_Card.csv")# Import dataset from CSV file
Rows: 30000 Columns: 25
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (25): ID, LIMIT_BAL, SEX, EDUCATION, MARRIAGE, AGE, PAY_0, PAY_2, PAY_3,...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)  # Open dataset in spreadsheet viewer
str(data) # Display structure (data types and columns)
spc_tbl_ [30,000 × 25] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ID                        : num [1:30000] 1 2 3 4 5 6 7 8 9 10 ...
 $ LIMIT_BAL                 : num [1:30000] 20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
 $ SEX                       : num [1:30000] 2 2 2 2 1 1 1 2 2 1 ...
 $ EDUCATION                 : num [1:30000] 2 2 2 2 2 1 1 2 3 3 ...
 $ MARRIAGE                  : num [1:30000] 1 2 2 1 1 2 2 2 1 2 ...
 $ AGE                       : num [1:30000] 24 26 34 37 57 37 29 23 28 35 ...
 $ PAY_0                     : num [1:30000] 2 -1 0 0 -1 0 0 0 0 -2 ...
 $ PAY_2                     : num [1:30000] 2 2 0 0 0 0 0 -1 0 -2 ...
 $ PAY_3                     : num [1:30000] -1 0 0 0 -1 0 0 -1 2 -2 ...
 $ PAY_4                     : num [1:30000] -1 0 0 0 0 0 0 0 0 -2 ...
 $ PAY_5                     : num [1:30000] -2 0 0 0 0 0 0 0 0 -1 ...
 $ PAY_6                     : num [1:30000] -2 2 0 0 0 0 0 -1 0 -1 ...
 $ BILL_AMT1                 : num [1:30000] 3913 2682 29239 46990 8617 ...
 $ BILL_AMT2                 : num [1:30000] 3102 1725 14027 48233 5670 ...
 $ BILL_AMT3                 : num [1:30000] 689 2682 13559 49291 35835 ...
 $ BILL_AMT4                 : num [1:30000] 0 3272 14331 28314 20940 ...
 $ BILL_AMT5                 : num [1:30000] 0 3455 14948 28959 19146 ...
 $ BILL_AMT6                 : num [1:30000] 0 3261 15549 29547 19131 ...
 $ PAY_AMT1                  : num [1:30000] 0 0 1518 2000 2000 ...
 $ PAY_AMT2                  : num [1:30000] 689 1000 1500 2019 36681 ...
 $ PAY_AMT3                  : num [1:30000] 0 1000 1000 1200 10000 657 38000 0 432 0 ...
 $ PAY_AMT4                  : num [1:30000] 0 1000 1000 1100 9000 ...
 $ PAY_AMT5                  : num [1:30000] 0 0 1000 1069 689 ...
 $ PAY_AMT6                  : num [1:30000] 0 2000 5000 1000 679 ...
 $ default.payment.next.month: num [1:30000] 1 1 0 0 0 0 0 0 0 0 ...
 - attr(*, "spec")=
  .. cols(
  ..   ID = col_double(),
  ..   LIMIT_BAL = col_double(),
  ..   SEX = col_double(),
  ..   EDUCATION = col_double(),
  ..   MARRIAGE = col_double(),
  ..   AGE = col_double(),
  ..   PAY_0 = col_double(),
  ..   PAY_2 = col_double(),
  ..   PAY_3 = col_double(),
  ..   PAY_4 = col_double(),
  ..   PAY_5 = col_double(),
  ..   PAY_6 = col_double(),
  ..   BILL_AMT1 = col_double(),
  ..   BILL_AMT2 = col_double(),
  ..   BILL_AMT3 = col_double(),
  ..   BILL_AMT4 = col_double(),
  ..   BILL_AMT5 = col_double(),
  ..   BILL_AMT6 = col_double(),
  ..   PAY_AMT1 = col_double(),
  ..   PAY_AMT2 = col_double(),
  ..   PAY_AMT3 = col_double(),
  ..   PAY_AMT4 = col_double(),
  ..   PAY_AMT5 = col_double(),
  ..   PAY_AMT6 = col_double(),
  ..   default.payment.next.month = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
summary(data)  # Statistical summary (min, max, mean, quartiles)
       ID          LIMIT_BAL            SEX          EDUCATION    
 Min.   :    1   Min.   :  10000   Min.   :1.000   Min.   :0.000  
 1st Qu.: 7501   1st Qu.:  50000   1st Qu.:1.000   1st Qu.:1.000  
 Median :15000   Median : 140000   Median :2.000   Median :2.000  
 Mean   :15000   Mean   : 167484   Mean   :1.604   Mean   :1.853  
 3rd Qu.:22500   3rd Qu.: 240000   3rd Qu.:2.000   3rd Qu.:2.000  
 Max.   :30000   Max.   :1000000   Max.   :2.000   Max.   :6.000  
    MARRIAGE          AGE            PAY_0             PAY_2        
 Min.   :0.000   Min.   :21.00   Min.   :-2.0000   Min.   :-2.0000  
 1st Qu.:1.000   1st Qu.:28.00   1st Qu.:-1.0000   1st Qu.:-1.0000  
 Median :2.000   Median :34.00   Median : 0.0000   Median : 0.0000  
 Mean   :1.552   Mean   :35.49   Mean   :-0.0167   Mean   :-0.1338  
 3rd Qu.:2.000   3rd Qu.:41.00   3rd Qu.: 0.0000   3rd Qu.: 0.0000  
 Max.   :3.000   Max.   :79.00   Max.   : 8.0000   Max.   : 8.0000  
     PAY_3             PAY_4             PAY_5             PAY_6        
 Min.   :-2.0000   Min.   :-2.0000   Min.   :-2.0000   Min.   :-2.0000  
 1st Qu.:-1.0000   1st Qu.:-1.0000   1st Qu.:-1.0000   1st Qu.:-1.0000  
 Median : 0.0000   Median : 0.0000   Median : 0.0000   Median : 0.0000  
 Mean   :-0.1662   Mean   :-0.2207   Mean   :-0.2662   Mean   :-0.2911  
 3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.: 0.0000  
 Max.   : 8.0000   Max.   : 8.0000   Max.   : 8.0000   Max.   : 8.0000  
   BILL_AMT1         BILL_AMT2        BILL_AMT3         BILL_AMT4      
 Min.   :-165580   Min.   :-69777   Min.   :-157264   Min.   :-170000  
 1st Qu.:   3559   1st Qu.:  2985   1st Qu.:   2666   1st Qu.:   2327  
 Median :  22382   Median : 21200   Median :  20089   Median :  19052  
 Mean   :  51223   Mean   : 49179   Mean   :  47013   Mean   :  43263  
 3rd Qu.:  67091   3rd Qu.: 64006   3rd Qu.:  60165   3rd Qu.:  54506  
 Max.   : 964511   Max.   :983931   Max.   :1664089   Max.   : 891586  
   BILL_AMT5        BILL_AMT6          PAY_AMT1         PAY_AMT2      
 Min.   :-81334   Min.   :-339603   Min.   :     0   Min.   :      0  
 1st Qu.:  1763   1st Qu.:   1256   1st Qu.:  1000   1st Qu.:    833  
 Median : 18105   Median :  17071   Median :  2100   Median :   2009  
 Mean   : 40311   Mean   :  38872   Mean   :  5664   Mean   :   5921  
 3rd Qu.: 50191   3rd Qu.:  49198   3rd Qu.:  5006   3rd Qu.:   5000  
 Max.   :927171   Max.   : 961664   Max.   :873552   Max.   :1684259  
    PAY_AMT3         PAY_AMT4         PAY_AMT5           PAY_AMT6       
 Min.   :     0   Min.   :     0   Min.   :     0.0   Min.   :     0.0  
 1st Qu.:   390   1st Qu.:   296   1st Qu.:   252.5   1st Qu.:   117.8  
 Median :  1800   Median :  1500   Median :  1500.0   Median :  1500.0  
 Mean   :  5226   Mean   :  4826   Mean   :  4799.4   Mean   :  5215.5  
 3rd Qu.:  4505   3rd Qu.:  4013   3rd Qu.:  4031.5   3rd Qu.:  4000.0  
 Max.   :896040   Max.   :621000   Max.   :426529.0   Max.   :528666.0  
 default.payment.next.month
 Min.   :0.0000            
 1st Qu.:0.0000            
 Median :0.0000            
 Mean   :0.2212            
 3rd Qu.:0.0000            
 Max.   :1.0000            
head(data) # Show first 6 rows
# A tibble: 6 × 25
     ID LIMIT_BAL   SEX EDUCATION MARRIAGE   AGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
  <dbl>     <dbl> <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     20000     2         2        1    24     2     2    -1    -1    -2
2     2    120000     2         2        2    26    -1     2     0     0     0
3     3     90000     2         2        2    34     0     0     0     0     0
4     4     50000     2         2        1    37     0     0     0     0     0
5     5     50000     1         2        1    57    -1     0    -1     0     0
6     6     50000     1         1        2    37     0     0     0     0     0
# ℹ 14 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>
tail(data) # Show last 6 rows
# A tibble: 6 × 25
     ID LIMIT_BAL   SEX EDUCATION MARRIAGE   AGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
  <dbl>     <dbl> <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 29995     80000     1         2        2    34     2     2     2     2     2
2 29996    220000     1         3        1    39     0     0     0     0     0
3 29997    150000     1         3        2    43    -1    -1    -1    -1     0
4 29998     30000     1         2        2    37     4     3     2    -1     0
5 29999     80000     1         3        1    41     1    -1     0     0     0
6 30000     50000     1         2        1    46     0     0     0     0     0
# ℹ 14 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>
dim(data)  # Number of rows and columns
[1] 30000    25
nrow(data)  # Number of rows
[1] 30000
ncol(data) #Number of columns
[1] 25
names(data)  # Column names
 [1] "ID"                         "LIMIT_BAL"                 
 [3] "SEX"                        "EDUCATION"                 
 [5] "MARRIAGE"                   "AGE"                       
 [7] "PAY_0"                      "PAY_2"                     
 [9] "PAY_3"                      "PAY_4"                     
[11] "PAY_5"                      "PAY_6"                     
[13] "BILL_AMT1"                  "BILL_AMT2"                 
[15] "BILL_AMT3"                  "BILL_AMT4"                 
[17] "BILL_AMT5"                  "BILL_AMT6"                 
[19] "PAY_AMT1"                   "PAY_AMT2"                  
[21] "PAY_AMT3"                   "PAY_AMT4"                  
[23] "PAY_AMT5"                   "PAY_AMT6"                  
[25] "default.payment.next.month"
class(data)  # Object type
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
typeof(data$LIMIT_BAL)  # Data type of specific column
[1] "double"
data <- mutate(data, newcol = NA); head(data) # Add new column
# A tibble: 6 × 26
     ID LIMIT_BAL   SEX EDUCATION MARRIAGE   AGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
  <dbl>     <dbl> <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     20000     2         2        1    24     2     2    -1    -1    -2
2     2    120000     2         2        2    26    -1     2     0     0     0
3     3     90000     2         2        2    34     0     0     0     0     0
4     4     50000     2         2        1    37     0     0     0     0     0
5     5     50000     1         2        1    57    -1     0    -1     0     0
6     6     50000     1         1        2    37     0     0     0     0     0
# ℹ 15 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   newcol <lgl>
data$newcol <- NULL; head(data) # Delete column
# A tibble: 6 × 25
     ID LIMIT_BAL   SEX EDUCATION MARRIAGE   AGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
  <dbl>     <dbl> <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     20000     2         2        1    24     2     2    -1    -1    -2
2     2    120000     2         2        2    26    -1     2     0     0     0
3     3     90000     2         2        2    34     0     0     0     0     0
4     4     50000     2         2        1    37     0     0     0     0     0
5     5     50000     1         2        1    57    -1     0    -1     0     0
6     6     50000     1         1        2    37     0     0     0     0     0
# ℹ 14 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>
data <- rename(data, GENDER = SEX); names(data)  # Rename column
 [1] "ID"                         "LIMIT_BAL"                 
 [3] "GENDER"                     "EDUCATION"                 
 [5] "MARRIAGE"                   "AGE"                       
 [7] "PAY_0"                      "PAY_2"                     
 [9] "PAY_3"                      "PAY_4"                     
[11] "PAY_5"                      "PAY_6"                     
[13] "BILL_AMT1"                  "BILL_AMT2"                 
[15] "BILL_AMT3"                  "BILL_AMT4"                 
[17] "BILL_AMT5"                  "BILL_AMT6"                 
[19] "PAY_AMT1"                   "PAY_AMT2"                  
[21] "PAY_AMT3"                   "PAY_AMT4"                  
[23] "PAY_AMT5"                   "PAY_AMT6"                  
[25] "default.payment.next.month"
data <- select(data, LIMIT_BAL, AGE, everything()); head(data)  # Reorder columns
# A tibble: 6 × 25
  LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
      <dbl> <dbl> <dbl>  <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     20000    24     1      2         2        1     2     2    -1    -1    -2
2    120000    26     2      2         2        2    -1     2     0     0     0
3     90000    34     3      2         2        2     0     0     0     0     0
4     50000    37     4      2         2        1     0     0     0     0     0
5     50000    57     5      1         2        1    -1     0    -1     0     0
6     50000    37     6      1         1        2     0     0     0     0     0
# ℹ 14 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>
data %>% filter(LIMIT_BAL > 50000)  # Filter rows
# A tibble: 22,324 × 25
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl>  <dbl>     <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1    120000    26     2      2         2        2    -1     2     0     0     0
 2     90000    34     3      2         2        2     0     0     0     0     0
 3    500000    29     7      1         1        2     0     0     0     0     0
 4    100000    23     8      2         2        2     0    -1    -1     0     0
 5    140000    28     9      2         3        1     0     0     2     0     0
 6    200000    34    11      2         3        2     0     0     2     0     0
 7    260000    51    12      2         1        2    -1    -1    -1    -1    -1
 8    630000    41    13      2         2        2    -1     0    -1    -1    -1
 9     70000    30    14      1         2        2     1     2     2     0     0
10    250000    29    15      1         1        2     0     0     0     0     0
# ℹ 22,314 more rows
# ℹ 14 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>
data %>% select(LIMIT_BAL, AGE)  # Select columns
# A tibble: 30,000 × 2
   LIMIT_BAL   AGE
       <dbl> <dbl>
 1     20000    24
 2    120000    26
 3     90000    34
 4     50000    37
 5     50000    57
 6     50000    37
 7    500000    29
 8    100000    23
 9    140000    28
10     20000    35
# ℹ 29,990 more rows
data$EDUCATION <- factor(data$EDUCATION); str(data) # Convert numeric to categorical (factor)
tibble [30,000 × 25] (S3: tbl_df/tbl/data.frame)
 $ LIMIT_BAL                 : num [1:30000] 20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
 $ AGE                       : num [1:30000] 24 26 34 37 57 37 29 23 28 35 ...
 $ ID                        : num [1:30000] 1 2 3 4 5 6 7 8 9 10 ...
 $ GENDER                    : num [1:30000] 2 2 2 2 1 1 1 2 2 1 ...
 $ EDUCATION                 : Factor w/ 7 levels "0","1","2","3",..: 3 3 3 3 3 2 2 3 4 4 ...
 $ MARRIAGE                  : num [1:30000] 1 2 2 1 1 2 2 2 1 2 ...
 $ PAY_0                     : num [1:30000] 2 -1 0 0 -1 0 0 0 0 -2 ...
 $ PAY_2                     : num [1:30000] 2 2 0 0 0 0 0 -1 0 -2 ...
 $ PAY_3                     : num [1:30000] -1 0 0 0 -1 0 0 -1 2 -2 ...
 $ PAY_4                     : num [1:30000] -1 0 0 0 0 0 0 0 0 -2 ...
 $ PAY_5                     : num [1:30000] -2 0 0 0 0 0 0 0 0 -1 ...
 $ PAY_6                     : num [1:30000] -2 2 0 0 0 0 0 -1 0 -1 ...
 $ BILL_AMT1                 : num [1:30000] 3913 2682 29239 46990 8617 ...
 $ BILL_AMT2                 : num [1:30000] 3102 1725 14027 48233 5670 ...
 $ BILL_AMT3                 : num [1:30000] 689 2682 13559 49291 35835 ...
 $ BILL_AMT4                 : num [1:30000] 0 3272 14331 28314 20940 ...
 $ BILL_AMT5                 : num [1:30000] 0 3455 14948 28959 19146 ...
 $ BILL_AMT6                 : num [1:30000] 0 3261 15549 29547 19131 ...
 $ PAY_AMT1                  : num [1:30000] 0 0 1518 2000 2000 ...
 $ PAY_AMT2                  : num [1:30000] 689 1000 1500 2019 36681 ...
 $ PAY_AMT3                  : num [1:30000] 0 1000 1000 1200 10000 657 38000 0 432 0 ...
 $ PAY_AMT4                  : num [1:30000] 0 1000 1000 1100 9000 ...
 $ PAY_AMT5                  : num [1:30000] 0 0 1000 1069 689 ...
 $ PAY_AMT6                  : num [1:30000] 0 2000 5000 1000 679 ...
 $ default.payment.next.month: num [1:30000] 1 1 0 0 0 0 0 0 0 0 ...
levels(data$EDUCATION) # Show factor levels
[1] "0" "1" "2" "3" "4" "5" "6"
data$EDUCATION <- reorder(data$EDUCATION, data$LIMIT_BAL); str(data)  # Reorder factor based on LIMIT_BAL
tibble [30,000 × 25] (S3: tbl_df/tbl/data.frame)
 $ LIMIT_BAL                 : num [1:30000] 20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
 $ AGE                       : num [1:30000] 24 26 34 37 57 37 29 23 28 35 ...
 $ ID                        : num [1:30000] 1 2 3 4 5 6 7 8 9 10 ...
 $ GENDER                    : num [1:30000] 2 2 2 2 1 1 1 2 2 1 ...
 $ EDUCATION                 : Factor w/ 7 levels "3","2","6","5",..: 2 2 2 2 2 5 5 2 1 1 ...
  ..- attr(*, "scores")= num [1:7(1d)] 217143 212956 147062 126550 220894 ...
  .. ..- attr(*, "dimnames")=List of 1
  .. .. ..$ : chr [1:7] "0" "1" "2" "3" ...
 $ MARRIAGE                  : num [1:30000] 1 2 2 1 1 2 2 2 1 2 ...
 $ PAY_0                     : num [1:30000] 2 -1 0 0 -1 0 0 0 0 -2 ...
 $ PAY_2                     : num [1:30000] 2 2 0 0 0 0 0 -1 0 -2 ...
 $ PAY_3                     : num [1:30000] -1 0 0 0 -1 0 0 -1 2 -2 ...
 $ PAY_4                     : num [1:30000] -1 0 0 0 0 0 0 0 0 -2 ...
 $ PAY_5                     : num [1:30000] -2 0 0 0 0 0 0 0 0 -1 ...
 $ PAY_6                     : num [1:30000] -2 2 0 0 0 0 0 -1 0 -1 ...
 $ BILL_AMT1                 : num [1:30000] 3913 2682 29239 46990 8617 ...
 $ BILL_AMT2                 : num [1:30000] 3102 1725 14027 48233 5670 ...
 $ BILL_AMT3                 : num [1:30000] 689 2682 13559 49291 35835 ...
 $ BILL_AMT4                 : num [1:30000] 0 3272 14331 28314 20940 ...
 $ BILL_AMT5                 : num [1:30000] 0 3455 14948 28959 19146 ...
 $ BILL_AMT6                 : num [1:30000] 0 3261 15549 29547 19131 ...
 $ PAY_AMT1                  : num [1:30000] 0 0 1518 2000 2000 ...
 $ PAY_AMT2                  : num [1:30000] 689 1000 1500 2019 36681 ...
 $ PAY_AMT3                  : num [1:30000] 0 1000 1000 1200 10000 657 38000 0 432 0 ...
 $ PAY_AMT4                  : num [1:30000] 0 1000 1000 1100 9000 ...
 $ PAY_AMT5                  : num [1:30000] 0 0 1000 1069 689 ...
 $ PAY_AMT6                  : num [1:30000] 0 2000 5000 1000 679 ...
 $ default.payment.next.month: num [1:30000] 1 1 0 0 0 0 0 0 0 0 ...
levels(data$EDUCATION) <- c("Unknown","Grad","Univ","HighSchool","Others","Other1","Other2") # Assign meaningful labels to categories
table(data$EDUCATION)# Count frequency of each category

   Unknown       Grad       Univ HighSchool     Others     Other1     Other2 
      4917      14030         51        280      10585         14        123 
data$EDUCATION <- droplevels(data$EDUCATION);str(data)  # Remove unused levels
tibble [30,000 × 25] (S3: tbl_df/tbl/data.frame)
 $ LIMIT_BAL                 : num [1:30000] 20000 120000 90000 50000 50000 50000 500000 100000 140000 20000 ...
 $ AGE                       : num [1:30000] 24 26 34 37 57 37 29 23 28 35 ...
 $ ID                        : num [1:30000] 1 2 3 4 5 6 7 8 9 10 ...
 $ GENDER                    : num [1:30000] 2 2 2 2 1 1 1 2 2 1 ...
 $ EDUCATION                 : Factor w/ 7 levels "Unknown","Grad",..: 2 2 2 2 2 5 5 2 1 1 ...
 $ MARRIAGE                  : num [1:30000] 1 2 2 1 1 2 2 2 1 2 ...
 $ PAY_0                     : num [1:30000] 2 -1 0 0 -1 0 0 0 0 -2 ...
 $ PAY_2                     : num [1:30000] 2 2 0 0 0 0 0 -1 0 -2 ...
 $ PAY_3                     : num [1:30000] -1 0 0 0 -1 0 0 -1 2 -2 ...
 $ PAY_4                     : num [1:30000] -1 0 0 0 0 0 0 0 0 -2 ...
 $ PAY_5                     : num [1:30000] -2 0 0 0 0 0 0 0 0 -1 ...
 $ PAY_6                     : num [1:30000] -2 2 0 0 0 0 0 -1 0 -1 ...
 $ BILL_AMT1                 : num [1:30000] 3913 2682 29239 46990 8617 ...
 $ BILL_AMT2                 : num [1:30000] 3102 1725 14027 48233 5670 ...
 $ BILL_AMT3                 : num [1:30000] 689 2682 13559 49291 35835 ...
 $ BILL_AMT4                 : num [1:30000] 0 3272 14331 28314 20940 ...
 $ BILL_AMT5                 : num [1:30000] 0 3455 14948 28959 19146 ...
 $ BILL_AMT6                 : num [1:30000] 0 3261 15549 29547 19131 ...
 $ PAY_AMT1                  : num [1:30000] 0 0 1518 2000 2000 ...
 $ PAY_AMT2                  : num [1:30000] 689 1000 1500 2019 36681 ...
 $ PAY_AMT3                  : num [1:30000] 0 1000 1000 1200 10000 657 38000 0 432 0 ...
 $ PAY_AMT4                  : num [1:30000] 0 1000 1000 1100 9000 ...
 $ PAY_AMT5                  : num [1:30000] 0 0 1000 1069 689 ...
 $ PAY_AMT6                  : num [1:30000] 0 2000 5000 1000 679 ...
 $ default.payment.next.month: num [1:30000] 1 1 0 0 0 0 0 0 0 0 ...
unique(data$GENDER) # Check unique values in GENDER
[1] 2 1
data$GENDER <- recode(data$GENDER, "1"="Male", "2"="Female") # Convert numeric gender codes to labels
table(data$GENDER) # Frequency of gender

Female   Male 
 18112  11888 
prop.table(table(data$GENDER)) # Proportion of each gender

   Female      Male 
0.6037333 0.3962667 
data$LIMIT_CAT <- cut(data$LIMIT_BAL, breaks=3) # Convert numeric LIMIT_BAL into categories
table(data$LIMIT_CAT)  # Count observations in each category

(9.01e+03,3.4e+05]  (3.4e+05,6.7e+05]    (6.7e+05,1e+06] 
             26412               3552                 36 
levels(data$LIMIT_CAT) # View category ranges
[1] "(9.01e+03,3.4e+05]" "(3.4e+05,6.7e+05]"  "(6.7e+05,1e+06]"   
data <- mutate(data, TOTAL_BILL = BILL_AMT1 + BILL_AMT2) # Create new column as sum of two bill amounts
head(data) 
# A tibble: 6 × 27
  LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
      <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     20000    24     1 Female Grad             1     2     2    -1    -1    -2
2    120000    26     2 Female Grad             2    -1     2     0     0     0
3     90000    34     3 Female Grad             2     0     0     0     0     0
4     50000    37     4 Female Grad             1     0     0     0     0     0
5     50000    57     5 Male   Grad             1    -1     0    -1     0     0
6     50000    37     6 Male   Others           2     0     0     0     0     0
# ℹ 16 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   LIMIT_CAT <fct>, TOTAL_BILL <dbl>
data$TOTAL_BILL[1:10] # Display first 10 values of new column
 [1]   7015   4407  43266  95223  14287 121469 779988  12256  25381      0
data %>% group_by(GENDER) %>% summarise(avg = mean(LIMIT_BAL))
# A tibble: 2 × 2
  GENDER     avg
  <chr>    <dbl>
1 Female 170086.
2 Male   163520.
# Group data by gender and calculate the average credit limit for each group
data %>% group_by(GENDER) %>% summarise(count = n())
# A tibble: 2 × 2
  GENDER count
  <chr>  <int>
1 Female 18112
2 Male   11888
# Count the number of observations (rows) for each gender group
data %>% group_by(GENDER) %>% summarise(sd = sd(LIMIT_BAL))
# A tibble: 2 × 2
  GENDER      sd
  <chr>    <dbl>
1 Female 125232.
2 Male   136250.
# Compute the standard deviation of credit limit for each gender group
data %>% group_by(GENDER) %>% summarise(min_val = min(LIMIT_BAL))
# A tibble: 2 × 2
  GENDER min_val
  <chr>    <dbl>
1 Female   10000
2 Male     10000
# Find the minimum credit limit within each gender group
data %>% group_by(GENDER) %>% summarise(max_val = max(LIMIT_BAL))
# A tibble: 2 × 2
  GENDER max_val
  <chr>    <dbl>
1 Female 1000000
2 Male    800000
# Find the maximum credit limit within each gender group
data %>% group_by(GENDER) %>% mutate(avg_limit = mean(LIMIT_BAL))
# A tibble: 30,000 × 28
# Groups:   GENDER [2]
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     20000    24     1 Female Grad             1     2     2    -1    -1    -2
 2    120000    26     2 Female Grad             2    -1     2     0     0     0
 3     90000    34     3 Female Grad             2     0     0     0     0     0
 4     50000    37     4 Female Grad             1     0     0     0     0     0
 5     50000    57     5 Male   Grad             1    -1     0    -1     0     0
 6     50000    37     6 Male   Others           2     0     0     0     0     0
 7    500000    29     7 Male   Others           2     0     0     0     0     0
 8    100000    23     8 Female Grad             2     0    -1    -1     0     0
 9    140000    28     9 Female Unknown          1     0     0     2     0     0
10     20000    35    10 Male   Unknown          2    -2    -2    -2    -2    -1
# ℹ 29,990 more rows
# ℹ 17 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   LIMIT_CAT <fct>, TOTAL_BILL <dbl>, avg_limit <dbl>
# Add a new column showing the average credit limit for each gender (repeated per row)
data %>% group_by(GENDER) %>% mutate(rank = rank(LIMIT_BAL))
# A tibble: 30,000 × 28
# Groups:   GENDER [2]
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     20000    24     1 Female Grad             1     2     2    -1    -1    -2
 2    120000    26     2 Female Grad             2    -1     2     0     0     0
 3     90000    34     3 Female Grad             2     0     0     0     0     0
 4     50000    37     4 Female Grad             1     0     0     0     0     0
 5     50000    57     5 Male   Grad             1    -1     0    -1     0     0
 6     50000    37     6 Male   Others           2     0     0     0     0     0
 7    500000    29     7 Male   Others           2     0     0     0     0     0
 8    100000    23     8 Female Grad             2     0    -1    -1     0     0
 9    140000    28     9 Female Unknown          1     0     0     2     0     0
10     20000    35    10 Male   Unknown          2    -2    -2    -2    -2    -1
# ℹ 29,990 more rows
# ℹ 17 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   LIMIT_CAT <fct>, TOTAL_BILL <dbl>, rank <dbl>
# Rank credit limit values within each gender group
mean(data$LIMIT_BAL)
[1] 167484.3
# Calculate the average (mean) credit limit across all observations
median(data$LIMIT_BAL)
[1] 140000
# Find the median (middle value) of the credit limit
sd(data$LIMIT_BAL)
[1] 129747.7
# Compute the standard deviation of the credit limit (measure of spread)
min(data$LIMIT_BAL)
[1] 10000
# Get the minimum (smallest) credit limit value
max(data$LIMIT_BAL)
[1] 1e+06
# Get the maximum (largest) credit limit value
data[order(data$LIMIT_BAL), ][1:10, ]
# A tibble: 10 × 27
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     10000    22    43 Male   Grad             2     0     0     0     0     0
 2     10000    56    67 Male   Grad             1     2     2     2     0     0
 3     10000    23   267 Male   Unknown          2     0     0     0     0     0
 4     10000    27   294 Male   Grad             2     0     0     2     0     0
 5     10000    24   368 Male   Grad             2    -1     2     2     2     0
 6     10000    22   519 Female Grad             2     1     2     0     0     0
 7     10000    46   533 Male   Grad             2     0     0     2     2     2
 8     10000    33   655 Male   Grad             2     0     0     0     0     0
 9     10000    22   703 Female Unknown          2     0     0     0     0    -2
10     10000    22   722 Male   Grad             2     0     0     0     0     0
# ℹ 16 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   LIMIT_CAT <fct>, TOTAL_BILL <dbl>
# Sort data in ascending order of credit limit and display the first 10
data[order(-data$LIMIT_BAL), ][1:10, ]
# A tibble: 10 × 27
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1   1000000    47  2198 Female Others           1     0     0     0    -1     0
 2    800000    53 14920 Male   Grad             1    -1    -1    -1     0     0
 3    800000    46 18007 Male   Others           2    -2    -2    -2    -1    -1
 4    780000    48 24321 Male   Others           1     0     0     0     0     0
 5    780000    41 28716 Female Grad             2    -2    -2    -2    -2    -2
 6    760000    54 21376 Male   Unknown          1     0     0     0     0     0
 7    750000    43  3734 Female Grad             1    -1    -1    -1    -2    -1
 8    750000    40  4604 Female Unknown          1    -2    -2    -1    -1    -1
 9    750000    41 10090 Female Others           1     0     0     0     0    -1
10    750000    28 13187 Female Others           2     1    -1    -1     0     0
# ℹ 16 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   LIMIT_CAT <fct>, TOTAL_BILL <dbl>
# Sort data in descending order of credit limit and display the first 10
subset(data, LIMIT_BAL > 100000)
# A tibble: 17,502 × 27
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1    120000    26     2 Female Grad             2    -1     2     0     0     0
 2    500000    29     7 Male   Others           2     0     0     0     0     0
 3    140000    28     9 Female Unknown          1     0     0     2     0     0
 4    200000    34    11 Female Unknown          2     0     0     2     0     0
 5    260000    51    12 Female Others           2    -1    -1    -1    -1    -1
 6    630000    41    13 Female Grad             2    -1     0    -1    -1    -1
 7    250000    29    15 Male   Others           2     0     0     0     0     0
 8    320000    49    18 Male   Others           1     0     0     0    -1    -1
 9    360000    49    19 Female Others           1     1    -2    -2    -2    -2
10    180000    29    20 Female Others           2     1    -2    -2    -2    -2
# ℹ 17,492 more rows
# ℹ 16 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   LIMIT_CAT <fct>, TOTAL_BILL <dbl>
# Filter dataset to include only rows where credit limit is greater than 100,000
subset(data, AGE > 30)
# A tibble: 18,987 × 27
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     90000    34     3 Female Grad             2     0     0     0     0     0
 2     50000    37     4 Female Grad             1     0     0     0     0     0
 3     50000    57     5 Male   Grad             1    -1     0    -1     0     0
 4     50000    37     6 Male   Others           2     0     0     0     0     0
 5     20000    35    10 Male   Unknown          2    -2    -2    -2    -2    -1
 6    200000    34    11 Female Unknown          2     0     0     2     0     0
 7    260000    51    12 Female Others           2    -1    -1    -1    -1    -1
 8    630000    41    13 Female Grad             2    -1     0    -1    -1    -1
 9    320000    49    18 Male   Others           1     0     0     0    -1    -1
10    360000    49    19 Female Others           1     1    -2    -2    -2    -2
# ℹ 18,977 more rows
# ℹ 16 more variables: PAY_6 <dbl>, BILL_AMT1 <dbl>, BILL_AMT2 <dbl>,
#   BILL_AMT3 <dbl>, BILL_AMT4 <dbl>, BILL_AMT5 <dbl>, BILL_AMT6 <dbl>,
#   PAY_AMT1 <dbl>, PAY_AMT2 <dbl>, PAY_AMT3 <dbl>, PAY_AMT4 <dbl>,
#   PAY_AMT5 <dbl>, PAY_AMT6 <dbl>, default.payment.next.month <dbl>,
#   LIMIT_CAT <fct>, TOTAL_BILL <dbl>
# Filter dataset to include only individuals older than 30
sum(is.na(data))
[1] 0
# Count the total number of missing (NA) values in the dataset
colSums(is.na(data))
                 LIMIT_BAL                        AGE 
                         0                          0 
                        ID                     GENDER 
                         0                          0 
                 EDUCATION                   MARRIAGE 
                         0                          0 
                     PAY_0                      PAY_2 
                         0                          0 
                     PAY_3                      PAY_4 
                         0                          0 
                     PAY_5                      PAY_6 
                         0                          0 
                 BILL_AMT1                  BILL_AMT2 
                         0                          0 
                 BILL_AMT3                  BILL_AMT4 
                         0                          0 
                 BILL_AMT5                  BILL_AMT6 
                         0                          0 
                  PAY_AMT1                   PAY_AMT2 
                         0                          0 
                  PAY_AMT3                   PAY_AMT4 
                         0                          0 
                  PAY_AMT5                   PAY_AMT6 
                         0                          0 
default.payment.next.month                  LIMIT_CAT 
                         0                          0 
                TOTAL_BILL 
                         0 
# Count the number of missing values in each column
any(is.na(data))
[1] FALSE
# Check if there are any missing values in the dataset (TRUE/FALSE)
numeric_data <- data[sapply(data, is.numeric)]
# Extract only numeric columns from the dataset
cor(numeric_data)[1:5,1:5]
            LIMIT_BAL         AGE          ID    MARRIAGE       PAY_0
LIMIT_BAL  1.00000000  0.14471280  0.02617916 -0.10813941 -0.27121433
AGE        0.14471280  1.00000000  0.01867773 -0.41416992 -0.03944738
ID         0.02617916  0.01867773  1.00000000 -0.02907943 -0.03057493
MARRIAGE  -0.10813941 -0.41416992 -0.02907943  1.00000000  0.01991719
PAY_0     -0.27121433 -0.03944738 -0.03057493  0.01991719  1.00000000
# Compute correlation matrix for all numeric variables
data_long <- pivot_longer(data,
                         cols = starts_with("BILL_AMT"),
                         names_to="Month",
                         values_to="Amount")  
# Convert data from wide format to long format for BILL_AMT columns
pivot_wider(data_long,
            names_from=Month,
            values_from=Amount)
# A tibble: 30,000 × 27
   LIMIT_BAL   AGE    ID GENDER EDUCATION MARRIAGE PAY_0 PAY_2 PAY_3 PAY_4 PAY_5
       <dbl> <dbl> <dbl> <chr>  <fct>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     20000    24     1 Female Grad             1     2     2    -1    -1    -2
 2    120000    26     2 Female Grad             2    -1     2     0     0     0
 3     90000    34     3 Female Grad             2     0     0     0     0     0
 4     50000    37     4 Female Grad             1     0     0     0     0     0
 5     50000    57     5 Male   Grad             1    -1     0    -1     0     0
 6     50000    37     6 Male   Others           2     0     0     0     0     0
 7    500000    29     7 Male   Others           2     0     0     0     0     0
 8    100000    23     8 Female Grad             2     0    -1    -1     0     0
 9    140000    28     9 Female Unknown          1     0     0     2     0     0
10     20000    35    10 Male   Unknown          2    -2    -2    -2    -2    -1
# ℹ 29,990 more rows
# ℹ 16 more variables: PAY_6 <dbl>, PAY_AMT1 <dbl>, PAY_AMT2 <dbl>,
#   PAY_AMT3 <dbl>, PAY_AMT4 <dbl>, PAY_AMT5 <dbl>, PAY_AMT6 <dbl>,
#   default.payment.next.month <dbl>, LIMIT_CAT <fct>, TOTAL_BILL <dbl>,
#   BILL_AMT1 <dbl>, BILL_AMT2 <dbl>, BILL_AMT3 <dbl>, BILL_AMT4 <dbl>,
#   BILL_AMT5 <dbl>, BILL_AMT6 <dbl>
# Convert data back from long format to wide format
ts_data <- ts(data$LIMIT_BAL) 
# Create a time series object from the LIMIT_BAL variable
time(ts_data) [1:20]
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
# Extract the time index from the time series
as.numeric(time(ts_data))[1:20]
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
# Convert the time index to numeric values
as.numeric(ts_data)[1:20]
 [1]  20000 120000  90000  50000  50000  50000 500000 100000 140000  20000
[11] 200000 260000 630000  70000 250000  50000  20000 320000 360000 180000
# Convert time series values into a numeric vector