221105

library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

data("midwest")
glimpse(midwest)

## Rows: 437
## Columns: 28
## $ PID                  <int> 561, 562, 563, 564, 565, 566, 567, 568, 569, 570,…
## $ county               <chr> "ADAMS", "ALEXANDER", "BOND", "BOONE", "BROWN", "…
## $ state                <chr> "IL", "IL", "IL", "IL", "IL", "IL", "IL", "IL", "…
## $ area                 <dbl> 0.052, 0.014, 0.022, 0.017, 0.018, 0.050, 0.017, …
## $ poptotal             <int> 66090, 10626, 14991, 30806, 5836, 35688, 5322, 16…
## $ popdensity           <dbl> 1270.9615, 759.0000, 681.4091, 1812.1176, 324.222…
## $ popwhite             <int> 63917, 7054, 14477, 29344, 5264, 35157, 5298, 165…
## $ popblack             <int> 1702, 3496, 429, 127, 547, 50, 1, 111, 16, 16559,…
## $ popamerindian        <int> 98, 19, 35, 46, 14, 65, 8, 30, 8, 331, 51, 26, 17…
## $ popasian             <int> 249, 48, 16, 150, 5, 195, 15, 61, 23, 8033, 89, 3…
## $ popother             <int> 124, 9, 34, 1139, 6, 221, 0, 84, 6, 1596, 20, 7, …
## $ percwhite            <dbl> 96.71206, 66.38434, 96.57128, 95.25417, 90.19877,…
## $ percblack            <dbl> 2.57527614, 32.90043290, 2.86171703, 0.41225735, …
## $ percamerindan        <dbl> 0.14828264, 0.17880670, 0.23347342, 0.14932156, 0…
## $ percasian            <dbl> 0.37675897, 0.45172219, 0.10673071, 0.48691813, 0…
## $ percother            <dbl> 0.18762294, 0.08469791, 0.22680275, 3.69733169, 0…
## $ popadults            <int> 43298, 6724, 9669, 19272, 3979, 23444, 3583, 1132…
## $ perchsd              <dbl> 75.10740, 59.72635, 69.33499, 75.47219, 68.86152,…
## $ percollege           <dbl> 19.63139, 11.24331, 17.03382, 17.27895, 14.47600,…
## $ percprof             <dbl> 4.355859, 2.870315, 4.488572, 4.197800, 3.367680,…
## $ poppovertyknown      <int> 63628, 10529, 14235, 30337, 4815, 35107, 5241, 16…
## $ percpovertyknown     <dbl> 96.27478, 99.08714, 94.95697, 98.47757, 82.50514,…
## $ percbelowpoverty     <dbl> 13.151443, 32.244278, 12.068844, 7.209019, 13.520…
## $ percchildbelowpovert <dbl> 18.011717, 45.826514, 14.036061, 11.179536, 13.02…
## $ percadultpoverty     <dbl> 11.009776, 27.385647, 10.852090, 5.536013, 11.143…
## $ percelderlypoverty   <dbl> 12.443812, 25.228976, 12.697410, 6.217047, 19.200…
## $ inmetro              <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0…
## $ category             <chr> "AAR", "LHR", "AAR", "ALU", "AAR", "AAR", "LAR", …

#Unweighted
ggplot(midwest, aes(percwhite, percbelowpoverty)) +
  geom_point()

# Weight by population
ggplot(midwest, aes(percwhite, percbelowpoverty)) +
  geom_point(aes(size = poptotal / 1e6))

#1 e6 = 1*10^6
ggplot(midwest, aes(percwhite, percbelowpoverty)) +
  geom_point(aes(size = poptotal / 1e6)) +
  scale_size_area("Population|n(millions)", breaks = c(0.5, 1, 2, 4))

#Unweighted
ggplot(midwest, aes(percwhite, percbelowpoverty)) +
  geom_point() +
  geom_smooth(method=lm, size = 1)

## `geom_smooth()` using formula 'y ~ x'

# Weight by population
ggplot(midwest, aes(percwhite, percbelowpoverty)) +
  geom_point(aes(size = poptotal / 1e6)) +
  geom_smooth(aes(weight = poptotal), method = lm, size = 1)

## `geom_smooth()` using formula 'y ~ x'

#3.10 Diamonds Data

data("diamonds")
glimpse(diamonds)

## Rows: 53,940
## Columns: 10
## $ carat   <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.…
## $ cut     <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver…
## $ color   <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,…
## $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, …
## $ depth   <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64…
## $ table   <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58…
## $ price   <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34…
## $ x       <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.…
## $ y       <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.…
## $ z       <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.…

ggplot(diamonds, aes(depth)) +
  geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds, aes(depth)) +
  geom_histogram(binwidth = 0.1) +
  xlim(55, 70)

## Warning: Removed 45 rows containing non-finite values (stat_bin).

## Warning: Removed 2 rows containing missing values (geom_bar).

#depth는 수치형 데이터, cut은 범주형 데이터
ggplot(diamonds, aes(depth, fill = cut, colour = cut)) +
  geom_density(alpha = 0.2, na.rm=TRUE) +
  xlim(58, 68)

ggplot(diamonds, aes(cut, depth)) +
  geom_boxplot()

# 3.13 Statistical summaries

ggplot(diamonds, aes(color)) +
  geom_bar()

ggplot(diamonds, aes(color, price)) +
  geom_bar(stat="summary_bin", fun.y =mean)

## Warning: Ignoring unknown parameters: fun.y

## No summary function supplied, defaulting to `mean_se()`

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

#4.2 78p  Building a Scatterplot
ggplot(mpg, aes(displ, hwy, colour = factor(cyl))) +
  geom_line()

ggplot(mpg, aes(displ, hwy, colour = factor(cyl))) +
  geom_line()+
  geom_smooth(method="lm")

## `geom_smooth()` using formula 'y ~ x'

dplot <- ggplot(diamonds, aes(color, fill=cut)) +
  xlab(NULL) + ylab(NULL) + theme(legend.position = "none")
dplot+ geom_bar()

dplot + geom_bar(position = "fill")

dplot + geom_bar(position = "dodge")

221105

이동건

2022-11-05