빅데이터 분석 시각화 포플 #1

1) mpg데이터에서 연도별 배기량에 따른 도시/고속도로 연비를 꺽은선으로 그리시오.

(단, cty는 회색, hwy는 파란색, 2008년은 굵은 선으로 표현하시오)

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

mpg2 <- mpg %>% 
        group_by(year, displ) %>%
        summarise(m1 = mean(cty), m2 = mean(hwy))
mpg2

## # A tibble: 50 x 4
## # Groups:   year [2]
##     year displ    m1    m2
##    <int> <dbl> <dbl> <dbl>
##  1  1999   1.6  24.8  31.6
##  2  1999   1.8  20.7  29.4
##  3  1999   1.9  32.3  43  
##  4  1999   2    19.8  27.5
##  5  1999   2.2  20.7  27.3
##  6  1999   2.4  18.8  26.7
##  7  1999   2.5  18.3  25.5
##  8  1999   2.7  15.5  20  
##  9  1999   2.8  16.5  24.9
## 10  1999   3    18    25.6
## # ... with 40 more rows

ggplot(mpg2, aes(x=displ)) + 
  geom_line(aes(y=m1, color='1999 cty')) + 
  geom_line(aes(y=m2, color='1999 hwy')) +
  scale_colour_manual("", breaks = c("1999 cty", "1999 hwy"),
                      values = c("gray", "pink")) +
  xlab("배기량(cc)") +
  xlim(1, 7) +
  scale_y_continuous("연비(M/h)", limits = c(5, 50)) +
  labs(title = '연도별 통합 연비', subtitle = '(굵은선은 2008년)')

2) data(성적.csv) 데이터에서 국어 성적이 80점 이상인

학생들의 수를 성비가 보이도록 학급별로 막대그래프를 그리시오.

options(encoding=‘utf-8’) data <- read.csv(“data/성적.csv”)

d2 <- data %>% filter(국어 >= 80) %>% group_by(반, 성별) %>% summarise(cnt = n())

ggplot(data = d2, aes(반)) + geom_bar(aes(x = 반, fill=성별), width = 0.7) + scale_fill_discrete(name = “성별”) + labs(title = ‘국어 우수 학생’, subtitle = “(80점 이상)”)

3) 국어 성적이 95점 이상인 학생들의 점수별 밀도그래프를 그리시오.

d3 <- data %>% filter(국어 >= 95) d3

ggplot(d3, aes(국어)) + geom_density(aes(fill=factor(반)), alpha=0.5) + labs(title=“반별 국어 우수 학생”, subtitle = “(국어 성적 A+)”, x = “성적”, y = “밀도”, fill = “학급”)

4) midwest데이터에서 전체인구와 아시아계 인구의 관계를 알아보기 위한 그래프를 그리시오.

(단, 전체인구는 50만명 이하, 아시아계인구는 1만명 이하만 표시되게)

midwest <- as.data.frame(ggplot2::midwest)
head(midwest)

##   PID    county state  area poptotal popdensity popwhite popblack
## 1 561     ADAMS    IL 0.052    66090  1270.9615    63917     1702
## 2 562 ALEXANDER    IL 0.014    10626   759.0000     7054     3496
## 3 563      BOND    IL 0.022    14991   681.4091    14477      429
## 4 564     BOONE    IL 0.017    30806  1812.1176    29344      127
## 5 565     BROWN    IL 0.018     5836   324.2222     5264      547
## 6 566    BUREAU    IL 0.050    35688   713.7600    35157       50
##   popamerindian popasian popother percwhite  percblack percamerindan
## 1            98      249      124  96.71206  2.5752761     0.1482826
## 2            19       48        9  66.38434 32.9004329     0.1788067
## 3            35       16       34  96.57128  2.8617170     0.2334734
## 4            46      150     1139  95.25417  0.4122574     0.1493216
## 5            14        5        6  90.19877  9.3728581     0.2398903
## 6            65      195      221  98.51210  0.1401031     0.1821340
##    percasian  percother popadults  perchsd percollege percprof
## 1 0.37675897 0.18762294     43298 75.10740   19.63139 4.355859
## 2 0.45172219 0.08469791      6724 59.72635   11.24331 2.870315
## 3 0.10673071 0.22680275      9669 69.33499   17.03382 4.488572
## 4 0.48691813 3.69733169     19272 75.47219   17.27895 4.197800
## 5 0.08567512 0.10281014      3979 68.86152   14.47600 3.367680
## 6 0.54640215 0.61925577     23444 76.62941   18.90462 3.275891
##   poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert
## 1           63628         96.27478        13.151443             18.01172
## 2           10529         99.08714        32.244278             45.82651
## 3           14235         94.95697        12.068844             14.03606
## 4           30337         98.47757         7.209019             11.17954
## 5            4815         82.50514        13.520249             13.02289
## 6           35107         98.37200        10.399635             14.15882
##   percadultpoverty percelderlypoverty inmetro category
## 1        11.009776          12.443812       0      AAR
## 2        27.385647          25.228976       0      LHR
## 3        10.852090          12.697410       0      AAR
## 4         5.536013           6.217047       1      ALU
## 5        11.143211          19.200000       0      AAR
## 6         8.179287          11.008586       0      AAR

m2 <- midwest %>% filter(poptotal <= 500000 & popasian <= 10000)

ggplot(m2, aes(x=area, y=popasian))+
       geom_point(aes(col=state, size =popasian))+
       geom_smooth(method="auto",se=F) + 
       xlim(c(0, 0.1)) +
       ylim(c(0, 10000))+
       labs(subtitle = "Area Vs popasian",
       y="popasian",
       x="Area",
       title = "각 Area 별 아시아계 인구",
       caption = "Source : ggplot2::midwest")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).