DescriptiveStatistic1

Tomasz Kruczalak, Krzysica Stanisław, Hatice Tatli

2024-04-23

Description

We do our statistics based on self employment status. ‘Yes’ means self-employed person, ‘no’ no self-employed.

# Assuming the 'card' column is a factor variable
CreditCard$card <- factor(CreditCard$card)

# Calculate the ratio of monthly credit card expenditure to yearly income
CreditCard <- CreditCard %>%
  mutate(ratio = ifelse(expenditure != 0, income / expenditure, 0))
self_employed <- CreditCard %>%
  filter(selfemp == "yes" & expenditure != 0)

no_self_employed <- CreditCard %>%
  filter(selfemp == "no" & expenditure != 0)

summary_self_employed <- self_employed %>%
  summarise(
    mean_yearly_income = mean(income, na.rm = TRUE),
    median_yearly_income = median(income, na.rm = TRUE),
    sd_yearly_income = sd(income, na.rm = TRUE),
    min_yearly_income = min(income, na.rm = TRUE),
    max_yearly_income = max(income, na.rm = TRUE),
    
    mean_credit_expenditure = mean(expenditure, na.rm = TRUE),
    median_credit_expenditure = median(expenditure, na.rm = TRUE),
    sd_credit_expenditure = sd(expenditure, na.rm = TRUE),
    min_credit_expenditure = min(expenditure, na.rm = TRUE),
    max_credit_expenditure = max(expenditure, na.rm = TRUE),
    
    mean_age = mean(age, na.rm = TRUE),
    median_age = median(age, na.rm = TRUE),
    sd_age = sd(age, na.rm = TRUE),
    
    mean_ratio = mean(expenditure / income, na.rm = TRUE),
    median_ratio = median(expenditure / income, na.rm = TRUE),
    sd_ratio = sd(expenditure / income, na.rm = TRUE),
    min_ratio = min(expenditure / income, na.rm = TRUE),
    max_ratio = max(expenditure / income, na.rm = TRUE)
  )

summary_self_employed_vertical <- summary_self_employed %>%
  pivot_longer(
    cols = everything(),
    names_to = "Statistic",
    values_to = "Value"
  )

summary_no_self_employed <- no_self_employed %>%
  summarise(
    mean_yearly_income = mean(income, na.rm = TRUE),
    median_yearly_income = median(income, na.rm = TRUE),
    sd_yearly_income = sd(income, na.rm = TRUE),
    min_yearly_income = min(income, na.rm = TRUE),
    max_yearly_income = max(income, na.rm = TRUE),
    
    mean_credit_expenditure = mean(expenditure, na.rm = TRUE),
    median_credit_expenditure = median(expenditure, na.rm = TRUE),
    sd_credit_expenditure = sd(expenditure, na.rm = TRUE),
    min_credit_expenditure = min(expenditure, na.rm = TRUE),
    max_credit_expenditure = max(expenditure, na.rm = TRUE),
    
    mean_age = mean(age, na.rm = TRUE),
    median_age = median(age, na.rm = TRUE),
    sd_age = sd(age, na.rm = TRUE),
    
    mean_ratio = mean(expenditure / income, na.rm = TRUE),
    median_ratio = median(expenditure / income, na.rm = TRUE),
    sd_ratio = sd(expenditure / income, na.rm = TRUE),
    min_ratio = min(expenditure / income, na.rm = TRUE),
    max_ratio = max(expenditure / income, na.rm = TRUE)
  )

# Convert the summary statistics table to a vertical format
summary_no_self_employed_vertical <- summary_no_self_employed %>%
  pivot_longer(
    cols = everything(),
    names_to = "Statistic",
    values_to = "Value"
  )

Income: Self-employed individuals generally have a higher average income compared to non-self-employed individuals. However, there is a wider income range among self-employed individuals.

Credit Expenditure: Both groups have similar average credit expenditures, but there is wider variability in spending among non-self-employed individuals.

Summary for Self-Employed Individuals:

summary_self_employed_vertical
## # A tibble: 18 × 2
##    Statistic                   Value
##    <chr>                       <dbl>
##  1 mean_yearly_income           4.35
##  2 median_yearly_income         3.7 
##  3 sd_yearly_income             2.04
##  4 min_yearly_income            1.92
##  5 max_yearly_income           12.0 
##  6 mean_credit_expenditure    227.  
##  7 median_credit_expenditure  130.  
##  8 sd_credit_expenditure      265.  
##  9 min_credit_expenditure       4.58
## 10 max_credit_expenditure    1292.  
## 11 mean_age                    37.8 
## 12 median_age                  35.3 
## 13 sd_age                      11.5 
## 14 mean_ratio                  51.9 
## 15 median_ratio                33.7 
## 16 sd_ratio                    52.4 
## 17 min_ratio                    1.15
## 18 max_ratio                  215.

Summary for Non-Self-Employed Individuals:

summary_no_self_employed_vertical
## # A tibble: 18 × 2
##    Statistic                    Value
##    <chr>                        <dbl>
##  1 mean_yearly_income           3.41 
##  2 median_yearly_income         3    
##  3 sd_yearly_income             1.68 
##  4 min_yearly_income            0.21 
##  5 max_yearly_income           13.5  
##  6 mean_credit_expenditure    245.   
##  7 median_credit_expenditure  159.   
##  8 sd_credit_expenditure      290.   
##  9 min_credit_expenditure       0.312
## 10 max_credit_expenditure    3100.   
## 11 mean_age                    32.9  
## 12 median_age                  30.8  
## 13 sd_age                       9.94 
## 14 mean_ratio                  76.7  
## 15 median_ratio                52.5  
## 16 sd_ratio                    84.1  
## 17 min_ratio                    0.108
## 18 max_ratio                  755.

Plots

  options(warn = -1)

# Display the visualizations
grid.arrange(boxplot_income, scatterplot_income_age,
             ncol = 2)

grid.arrange(boxplot_age,
             histogram_age,
             ncol = 2)

grid.arrange(histogram_expenditure, boxplot_expenditure,
             ncol = 2)

grid.arrange(bar_chart_card, scatterplot_income_expenditure,
             ncol = 2)

density_plot_ratio