Hyejin Roh (h12428201)

1. Find data

2. Import data

mydata <- read.table("./Household.csv", header=TRUE, sep=",")

3. Display data

head(mydata)
##   CapitalRegion Assets Liabilities NetAssets
## 1            G1      1        5300     -5299
## 2            G1     10           0        10
## 3            G1     10           0        10
## 4            G1     10           0        10
## 5            G1     10           0        10
## 6            G1    100           0       100

4. Explain data

5. Data source

6. Data manipulation

library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
set.seed(2024)

mydata$CapitalRegion <- factor(mydata$CapitalRegion,
                            levels = c("G1", "G2"),
                            labels = c("Capital", "NonCapital"))

Capital <- mydata %>%
  filter(CapitalRegion == "Capital", NetAssets > 0)
Capital <- Capital[sample(nrow(Capital), 30), ]

NonCapital <- mydata %>%
  filter(CapitalRegion == "NonCapital", NetAssets > 0)
NonCapital <- NonCapital[sample(nrow(NonCapital), 30), ]

sample.mydata <- rbind(Capital, NonCapital)
sample.mydata <- sample.mydata[, c("CapitalRegion", "NetAssets")]
sample.mydata$LogNetAssets <- log(sample.mydata$NetAssets)

sample.mydata
##       CapitalRegion NetAssets LogNetAssets
## 5698        Capital     38300    10.553205
## 4645        Capital     54600    10.907789
## 3629        Capital     42552    10.658482
## 4796        Capital     66500    11.104957
## 5375        Capital     74396    11.217157
## 5009        Capital      7066     8.863050
## 2669        Capital     29400    10.288750
## 3488        Capital     40168    10.600826
## 105         Capital    102300    11.535665
## 1762        Capital     20220     9.914427
## 5119        Capital     55126    10.917377
## 5149        Capital     76740    11.248178
## 1035        Capital    150400    11.921054
## 4880        Capital    688300    13.441980
## 398         Capital    113768    11.641917
## 2978        Capital     18575     9.829572
## 3098        Capital      2420     7.791523
## 3634        Capital    426000    12.962195
## 3104        Capital     24940    10.124228
## 2773        Capital     30338    10.320156
## 4299        Capital     33000    10.404263
## 35          Capital     29740    10.300248
## 1385        Capital     16810     9.729729
## 4334        Capital      5500     8.612503
## 2641        Capital     29000    10.275051
## 3356        Capital     31266    10.350287
## 3288        Capital     36740    10.511621
## 3668        Capital      4313     8.369389
## 3548        Capital     25990    10.165467
## 1451        Capital      1800     7.495542
## 9658     NonCapital     60980    11.018301
## 8052     NonCapital      4565     8.426174
## 10516    NonCapital      7217     8.884195
## 8119     NonCapital     25400    10.142504
## 9303     NonCapital     57450    10.958670
## 6649     NonCapital     35650    10.481504
## 10858    NonCapital     64994    11.082050
## 1916     NonCapital     14913     9.609989
## 10116    NonCapital      6653     8.802823
## 9863     NonCapital      6350     8.756210
## 4020     NonCapital     22630    10.027032
## 212      NonCapital     42880    10.666161
## 1305     NonCapital    128358    11.762579
## 1849     NonCapital    138935    11.841761
## 4913     NonCapital      2656     7.884577
## 11574    NonCapital       900     6.802395
## 8026     NonCapital     32610    10.392374
## 9137     NonCapital       556     6.320768
## 3715     NonCapital     19519     9.879144
## 4249     NonCapital     23410    10.060919
## 8897     NonCapital      4560     8.425078
## 3860     NonCapital       145     4.976734
## 2441     NonCapital     12268     9.414750
## 9935     NonCapital     64336    11.071875
## 8656     NonCapital      5100     8.536996
## 4085     NonCapital     22910    10.039329
## 2234     NonCapital      5056     8.528331
## 2353     NonCapital     16200     9.692767
## 10867    NonCapital     59650    10.996249
## 1155     NonCapital    110864    11.616060

7. Descriptive statistics

library(psych)

describeBy(sample.mydata$LogNetAssets, group = sample.mydata$CapitalRegion)
## 
##  Descriptive statistics by group 
## group: Capital
##    vars  n mean   sd median trimmed  mad min   max range  skew kurtosis   se
## X1    1 30 10.4 1.31  10.38   10.42 0.81 7.5 13.44  5.95 -0.11      0.3 0.24
## ------------------------------------------------------------ 
## group: NonCapital
##    vars  n mean   sd median trimmed  mad  min   max range  skew kurtosis  se
## X1    1 30 9.57 1.63   9.95    9.74 1.62 4.98 11.84  6.87 -0.88      0.4 0.3

8-1. Data visualization - Histogram

library(ggplot2)
## 
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
Hist_Capital <- ggplot(
  sample.mydata %>% 
    filter(CapitalRegion == "Capital", is.finite(LogNetAssets)),
  aes(x = LogNetAssets)) +
  geom_histogram(binwidth = 0.5, col = "black", fill = "deepskyblue") +
  xlab("log(NetAssets)") +
  ylab("Amount") +
  ggtitle("Capital")

Hist_NonCapital <- ggplot(
  sample.mydata %>%
    filter(CapitalRegion == "NonCapital", is.finite(LogNetAssets)),
  aes(x = LogNetAssets)) +
  geom_histogram(binwidth = 0.5, col = "black", fill = "tomato") +
  xlab("log(NetAssets)") +
  ylab("Amount") +
  ggtitle("NonCapital")

library(ggpubr)
ggarrange(Hist_Capital, Hist_NonCapital,
          ncol = 2, nrow = 1)

8-2. Data visualization - Boxplot

Box_Capital <- ggplot(sample.mydata %>%
                        filter(CapitalRegion == "Capital"), 
                      aes(x = "", y = LogNetAssets)) +
  geom_boxplot(fill = "deepskyblue") +
  labs(title = "Capital", y = "log(NetAssets)", x = NULL) 

Box_Noncapital <- ggplot(sample.mydata %>%
                           filter(CapitalRegion == "NonCapital"), 
                         aes(x = "", y = LogNetAssets)) +
  geom_boxplot(fill = "tomato") +
  labs(title = "NonCapital", y = "log(NetAssets)", x = NULL) 

ggarrange(Box_Capital, Box_Noncapital, ncol = 2)