mydata <- read.table("./Household.csv", header=TRUE, sep=",")
head(mydata)
## CapitalRegion Assets Liabilities NetAssets
## 1 G1 1 5300 -5299
## 2 G1 10 0 10
## 3 G1 10 0 10
## 4 G1 10 0 10
## 5 G1 10 0 10
## 6 G1 100 0 100
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(2024)
mydata$CapitalRegion <- factor(mydata$CapitalRegion,
levels = c("G1", "G2"),
labels = c("Capital", "NonCapital"))
Capital <- mydata %>%
filter(CapitalRegion == "Capital", NetAssets > 0)
Capital <- Capital[sample(nrow(Capital), 30), ]
NonCapital <- mydata %>%
filter(CapitalRegion == "NonCapital", NetAssets > 0)
NonCapital <- NonCapital[sample(nrow(NonCapital), 30), ]
sample.mydata <- rbind(Capital, NonCapital)
sample.mydata <- sample.mydata[, c("CapitalRegion", "NetAssets")]
sample.mydata$LogNetAssets <- log(sample.mydata$NetAssets)
sample.mydata
## CapitalRegion NetAssets LogNetAssets
## 5698 Capital 38300 10.553205
## 4645 Capital 54600 10.907789
## 3629 Capital 42552 10.658482
## 4796 Capital 66500 11.104957
## 5375 Capital 74396 11.217157
## 5009 Capital 7066 8.863050
## 2669 Capital 29400 10.288750
## 3488 Capital 40168 10.600826
## 105 Capital 102300 11.535665
## 1762 Capital 20220 9.914427
## 5119 Capital 55126 10.917377
## 5149 Capital 76740 11.248178
## 1035 Capital 150400 11.921054
## 4880 Capital 688300 13.441980
## 398 Capital 113768 11.641917
## 2978 Capital 18575 9.829572
## 3098 Capital 2420 7.791523
## 3634 Capital 426000 12.962195
## 3104 Capital 24940 10.124228
## 2773 Capital 30338 10.320156
## 4299 Capital 33000 10.404263
## 35 Capital 29740 10.300248
## 1385 Capital 16810 9.729729
## 4334 Capital 5500 8.612503
## 2641 Capital 29000 10.275051
## 3356 Capital 31266 10.350287
## 3288 Capital 36740 10.511621
## 3668 Capital 4313 8.369389
## 3548 Capital 25990 10.165467
## 1451 Capital 1800 7.495542
## 9658 NonCapital 60980 11.018301
## 8052 NonCapital 4565 8.426174
## 10516 NonCapital 7217 8.884195
## 8119 NonCapital 25400 10.142504
## 9303 NonCapital 57450 10.958670
## 6649 NonCapital 35650 10.481504
## 10858 NonCapital 64994 11.082050
## 1916 NonCapital 14913 9.609989
## 10116 NonCapital 6653 8.802823
## 9863 NonCapital 6350 8.756210
## 4020 NonCapital 22630 10.027032
## 212 NonCapital 42880 10.666161
## 1305 NonCapital 128358 11.762579
## 1849 NonCapital 138935 11.841761
## 4913 NonCapital 2656 7.884577
## 11574 NonCapital 900 6.802395
## 8026 NonCapital 32610 10.392374
## 9137 NonCapital 556 6.320768
## 3715 NonCapital 19519 9.879144
## 4249 NonCapital 23410 10.060919
## 8897 NonCapital 4560 8.425078
## 3860 NonCapital 145 4.976734
## 2441 NonCapital 12268 9.414750
## 9935 NonCapital 64336 11.071875
## 8656 NonCapital 5100 8.536996
## 4085 NonCapital 22910 10.039329
## 2234 NonCapital 5056 8.528331
## 2353 NonCapital 16200 9.692767
## 10867 NonCapital 59650 10.996249
## 1155 NonCapital 110864 11.616060
library(psych)
describeBy(sample.mydata$LogNetAssets, group = sample.mydata$CapitalRegion)
##
## Descriptive statistics by group
## group: Capital
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 30 10.4 1.31 10.38 10.42 0.81 7.5 13.44 5.95 -0.11 0.3 0.24
## ------------------------------------------------------------
## group: NonCapital
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 30 9.57 1.63 9.95 9.74 1.62 4.98 11.84 6.87 -0.88 0.4 0.3
Capital: mean = 10.4, sd = 1.31, median = 10.38
NonCapital: mean = 9.57, sd = 1.63, median = 9.95
mean: Capital > NonCapital
sd: Capital < NonCapital, NonCapital group
has more dispersed data
median: Capital > NonCapital, Capital group
has higher net assets
Non Capital households might have more unequal net asset distributions
library(ggplot2)
##
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
Hist_Capital <- ggplot(
sample.mydata %>%
filter(CapitalRegion == "Capital", is.finite(LogNetAssets)),
aes(x = LogNetAssets)) +
geom_histogram(binwidth = 0.5, col = "black", fill = "deepskyblue") +
xlab("log(NetAssets)") +
ylab("Amount") +
ggtitle("Capital")
Hist_NonCapital <- ggplot(
sample.mydata %>%
filter(CapitalRegion == "NonCapital", is.finite(LogNetAssets)),
aes(x = LogNetAssets)) +
geom_histogram(binwidth = 0.5, col = "black", fill = "tomato") +
xlab("log(NetAssets)") +
ylab("Amount") +
ggtitle("NonCapital")
library(ggpubr)
ggarrange(Hist_Capital, Hist_NonCapital,
ncol = 2, nrow = 1)
Box_Capital <- ggplot(sample.mydata %>%
filter(CapitalRegion == "Capital"),
aes(x = "", y = LogNetAssets)) +
geom_boxplot(fill = "deepskyblue") +
labs(title = "Capital", y = "log(NetAssets)", x = NULL)
Box_Noncapital <- ggplot(sample.mydata %>%
filter(CapitalRegion == "NonCapital"),
aes(x = "", y = LogNetAssets)) +
geom_boxplot(fill = "tomato") +
labs(title = "NonCapital", y = "log(NetAssets)", x = NULL)
ggarrange(Box_Capital, Box_Noncapital, ncol = 2)