#p196 part2 예제 4
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
food<-read.csv("6110000_서울특별시_07_24_04_P_일반음식점.csv",fileEncoding = "euc-kr",na="")
food1<-food %>% 
  rename(open_date=인허가일자,
         status=상세영업상태명,
         close_date=폐업일자,
         name=사업장명,
         type=업태구분명,
         address=소재지전체주소) %>%
  select("open_date","status","name","close_date","type","address")
#-----------------------결측치 탐색-------------------------
glimpse(food1)
## Rows: 474,140
## Columns: 6
## $ open_date  <int> 20200803, 20200803, 20200803, 20200803, 20200803, 20200803,…
## $ status     <chr> "영업", "영업", "영업", "영업", "영업", "영업", "영업", "영…
## $ name       <chr> "혼밥대왕 마곡점", "꾸어가게생선구이화곡점", "인생갈비탕", …
## $ close_date <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ type       <chr> "한식", "한식", "한식", "한식", "일식", "한식", "분식", "기…
## $ address    <chr> "서울특별시 강서구 마곡동 757 두산더랜드파크 B동 207호", "…
summary(is.na(food1))
##  open_date         status           name         close_date     
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:474140    FALSE:474140    FALSE:474140    FALSE:349716   
##                                                  TRUE :124424   
##     type          address       
##  Mode :logical   Mode :logical  
##  FALSE:474119    FALSE:473903   
##  TRUE :21        TRUE :237
#------------------------분석변수 확인 및 정리----------------------
table(food1$type)
## 
##                 193959.1505                    간이주점 
##                           1                           1 
##                    감성주점                      경양식 
##                         349                       47987 
##                        기타             기타 휴게음식점 
##                       33313                           3 
##                김밥(도시락)                        까페 
##                        1537                        7904 
##                      냉면집                        다방 
##                         126                           1 
##                  라이브카페                      룸살롱 
##                         276                           1 
##                    복어취급                        분식 
##                         113                       78766 
##                      뷔페식              식육(숯불구이) 
##                        2791                        1445 
##           식품등 수입판매업                  식품소분업 
##                           1                           1 
## 외국음식전문점(인도,태국등)                    이동조리 
##                        1993                         101 
##                일반조리판매                        일식 
##                           2                       17804 
##                    전통찻집          정종/대포집/소주방 
##                         823                       12757 
##                  제과점영업                      중국식 
##                           1                       14858 
##                    출장조리                      커피숍 
##                         370                           5 
##                    키즈카페                탕류(보신용) 
##                         101                         413 
##                  통닭(치킨)              패밀리레스트랑 
##                        9550                         238 
##                  패스트푸드                        한식 
##                        3911                      200172 
##                   호프/통닭                        횟집 
##                       34974                        1430
food1$type<- ifelse(food1$type%in%c("까페","다방","라이브카페","커피숍","카페"),"카페",food1$type)
food1$type<- ifelse(food1$type%in%c("통닭(치킨)","호프/통닭"),"치킨",food1$type)
food1$type<- ifelse(food1$type%in%c("일식","회집","횟집"),"회집",food1$type)
food1$type<- ifelse(food1$type%in%c("경양식","패밀리레스토랑"),"레스토랑",food1$type)
food1$type<- ifelse(food1$type%in%c("정종/대포집/소주방"),"소주방",food1$type)
food1$type<- ifelse(food1$type =="외국음식전문점(인도,태국등)","외국음식전문점",food1$type)
food1$type<- ifelse(food1$type%in%c("기타","193959.1505"),NA,food1$type)
#파생변수 만들기
range(food1$open_date)
## [1] 19000531 20220401
food1$open_date<-ifelse(food1$open_date<19700301,NA,food1$open_date)
table(is.na(food1$open_date))
## 
##  FALSE   TRUE 
## 473735    405
food1$open_year<-substr(food1$open_date,1,4)
range(food1$close_date,na.rm=TRUE)
## [1]  2000126 20220401
food1$close_year<-substr(food1$close_date,1,4)
#(1) 가장 오래된 영업 중인 음식점의 open_year와 type,name을 출력하시오
food1 %>% filter(!is.na(open_date)&status=="영업") %>% filter(open_date==min(open_date)) %>% select(type,open_year,name)
##   type open_year   name
## 1 한식      1970 경원집
#(2) 개업한 전체 음식점에서 상위 3개 업종의 비율을 출력하시오
food1 %>% filter(!is.na(open_date)&!is.na(type)) %>% 
  group_by(type) %>% 
  summarize(n=n()) %>% 
  mutate(total=sum(n), pct=n/total*100) %>% 
  arrange(desc(n)) %>% head(3)
## # A tibble: 3 × 4
##   type          n  total   pct
##   <chr>     <int>  <int> <dbl>
## 1 한식     199971 440404  45.4
## 2 분식      78725 440404  17.9
## 3 레스토랑  47934 440404  10.9
#(3) 개업과 폐업이 최고로 많았던 년도와 그 때의 개업수와 폐업수를 각각 출력하시오
food1 %>% filter(!is.na(open_date)) %>% group_by(open_year) %>% summarize(n=n()) %>% arrange(desc(n)) %>% head(3)
## # A tibble: 3 × 2
##   open_year     n
##   <chr>     <int>
## 1 2001      18818
## 2 1994      17978
## 3 1999      17882
food1 %>% filter(!is.na(close_date)) %>% group_by(close_year) %>% summarize(n=n()) %>% arrange(desc(n)) %>% head(3)
## # A tibble: 3 × 2
##   close_year     n
##   <chr>      <int>
## 1 1999       15848
## 2 2000       15768
## 3 2005       14943
# part3 기출문제 1
house<-read.csv("housing.csv", fileEncoding="euc-kr")
nrow(house) #행의 수 확인
## [1] 20640
rownum<-nrow(house)*0.8
house1<-house[1:rownum,]
glimpse(house1)
## Rows: 16,512
## Columns: 10
## $ longitude          <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude           <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms        <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms     <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population         <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households         <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income      <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity    <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
colSums(is.na(house1))#colSums=열별로 합구하는 함수
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                159                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
df1<-sd(house1$total_bedrooms,na.rm=TRUE)
df2<-median(house1$total_bedrooms,na.rm=TRUE)
house1$total_bedrooms<-ifelse(is.na(house1$total_bedrooms),df2,house1$total_bedrooms)
colSums(is.na(house1))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                  0                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
df3<-sd(house1$total_bedrooms)
df4<-df1-df3
print(df4)
## [1] 1.975147
#part3 기출문제 2
house<-read.csv("housing.csv")
nrow(house)
## [1] 20640
colSums(is.na(house))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                207                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
house<- house %>% filter(!is.na(house$total_bedrooms))
colSums(is.na(house))
##          longitude           latitude housing_median_age        total_rooms 
##                  0                  0                  0                  0 
##     total_bedrooms         population         households      median_income 
##                  0                  0                  0                  0 
## median_house_value    ocean_proximity 
##                  0                  0
rownum<-nrow(house)*0.7
house1<-house[1:rownum,]
quantile(house1$housing_median_age)
##   0%  25%  50%  75% 100% 
##    1   19   30   38   52
df<-quantile(house1$housing_median_age)[[2]]
df
## [1] 19
#part3 기출문제 3

titanic<-read.csv("train100.csv")
glimpse(titanic)
## Rows: 891
## Columns: 11
## $ PassengerId <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ Survived    <int> 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1…
## $ Pclass      <int> 3, 1, 3, 1, 3, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 2, 3, 2, 3, 3…
## $ Name        <chr> "Braund, Mr. Owen Harris", "Cumings, Mrs. John Bradley (Fl…
## $ Sex         <chr> "male", "female", "female", "female", "male", "male", "mal…
## $ Age         <dbl> 22, 38, 26, 35, 35, NA, 54, 2, 27, 14, 4, 58, 20, 39, 14, …
## $ SibSp       <int> 1, 1, 0, 1, 0, 0, 0, 3, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 1, 0…
## $ Parch       <int> 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 0, 5, 0, 0, 1, 0, 0, 0…
## $ Ticket      <chr> "A/5 21171", "PC 17599", "STON/O2. 3101282", "113803", "37…
## $ Fare        <dbl> 7.2500, 71.2833, 7.9250, 53.1000, 8.0500, 8.4583, 51.8625,…
## $ Embarked    <chr> "S", "C", "S", "S", "S", "Q", "S", "S", "S", "C", "S", "S"…
colSums(is.na(titanic))
## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0           0           0           0           0         177 
##       SibSp       Parch      Ticket        Fare    Embarked 
##           0           0           0           0           0
titanic$Embarked<-as.factor(titanic$Embarked)
titanic$Sex<-as.factor(titanic$Sex)
titanic$Pclass<-as.factor(titanic$Pclass)
summary(titanic)
##   PassengerId       Survived      Pclass      Name               Sex     
##  Min.   :  1.0   Min.   :0.0000   1:216   Length:891         female:314  
##  1st Qu.:223.5   1st Qu.:0.0000   2:184   Class :character   male  :577  
##  Median :446.0   Median :0.0000   3:491   Mode  :character               
##  Mean   :446.0   Mean   :0.3838                                          
##  3rd Qu.:668.5   3rd Qu.:1.0000                                          
##  Max.   :891.0   Max.   :1.0000                                          
##                                                                          
##       Age            SibSp           Parch           Ticket         
##  Min.   : 0.42   Min.   :0.000   Min.   :0.0000   Length:891        
##  1st Qu.:20.12   1st Qu.:0.000   1st Qu.:0.0000   Class :character  
##  Median :28.00   Median :0.000   Median :0.0000   Mode  :character  
##  Mean   :29.70   Mean   :0.523   Mean   :0.3816                     
##  3rd Qu.:38.00   3rd Qu.:1.000   3rd Qu.:0.0000                     
##  Max.   :80.00   Max.   :8.000   Max.   :6.0000                     
##  NA's   :177                                                        
##       Fare        Embarked
##  Min.   :  0.00    :  2   
##  1st Qu.:  7.91   C:168   
##  Median : 14.45   Q: 77   
##  Mean   : 32.20   S:644   
##  3rd Qu.: 31.00           
##  Max.   :512.33           
## 
df<-nrow(titanic)
titanic %>% filter(is.na(Age)|Age=='') %>% summarize(n=n()) %>% mutate(pct=n/df*100)-> df1
df2<- titanic %>% filter(is.na(Embarked)|Embarked=='') %>% summarize(n=n()) %>% mutate(pct=n/df*100)
df1;df2
##     n      pct
## 1 177 19.86532
##   n       pct
## 1 2 0.2244669
names(titanic[6])->df3
df3
## [1] "Age"
#part3 기출문제 4
select<-dplyr::select
library(MASS)
## 
## 다음의 패키지를 부착합니다: 'MASS'
## The following object is masked _by_ '.GlobalEnv':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
data("Boston")
glimpse(Boston)
## Rows: 506
## Columns: 14
## $ crim    <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985, 0.08829,…
## $ zn      <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5, 1…
## $ indus   <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87, 7.…
## $ chas    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nox     <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524,…
## $ rm      <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.631,…
## $ age     <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9, 9…
## $ dis     <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9505…
## $ rad     <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ tax     <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311, 31…
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2, 15…
## $ black   <dbl> 396.90, 396.90, 392.83, 394.63, 396.90, 394.12, 395.60, 396.90…
## $ lstat   <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17.10…
## $ medv    <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15…
boston1<-Boston %>% arrange(desc(crim))
boston1$crim[10]
## [1] 25.9406
boston1$crim[1:10]<-boston1$crim[10]
boston1 %>% filter(age>=80) %>% select(crim) %>% summarize(m=mean(crim))-> df
df
##          m
## 1 5.759387
#part3 기출문제 5
insurance<-read.csv("insurance.csv")
glimpse(insurance)
## Rows: 1,338
## Columns: 7
## $ age      <int> 19, 18, 28, 33, 32, 31, 46, 37, 37, 60, 25, 62, 23, 56, 27, 1…
## $ sex      <chr> "female", "male", "male", "male", "male", "female", "female",…
## $ bmi      <dbl> 27.900, 33.770, 33.000, 22.705, 28.880, 25.740, 33.440, 27.74…
## $ children <int> 0, 1, 3, 0, 0, 0, 1, 3, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0…
## $ smoker   <chr> "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", …
## $ region   <chr> "southwest", "southeast", "southeast", "northwest", "northwes…
## $ charges  <dbl> 16884.924, 1725.552, 4449.462, 21984.471, 3866.855, 3756.622,…
colSums(is.na(insurance))
##      age      sex      bmi children   smoker   region  charges 
##        0        0        0        0        0        0        0
avg=mean(insurance$charges)
std=sd(insurance$charges)
insurance1<- insurance %>% filter(charges>=avg+1.5*std|charges<=avg-1.5*std)
result <- sum(insurance1$charges)
print(result)
## [1] 6421430
#part3 기출문제 6

data(mtcars)
glimpse(mtcars)
## Rows: 32
## Columns: 11
## $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…
result<-mtcars %>% select(wt) %>% mutate(min_max=((wt)-min(wt))/(max(wt)-min(wt))) %>% filter(min_max>0.5) %>% NROW
print(result)
## [1] 11
#part3 기출문제 7

library(mlbench)
data("PimaIndiansDiabetes")
pima<-PimaIndiansDiabetes
glimpse(pima)
## Rows: 768
## Columns: 9
## $ pregnant <dbl> 6, 1, 8, 1, 0, 5, 3, 10, 2, 8, 4, 10, 10, 1, 5, 7, 0, 7, 1, 1…
## $ glucose  <dbl> 148, 85, 183, 89, 137, 116, 78, 115, 197, 125, 110, 168, 139,…
## $ pressure <dbl> 72, 66, 64, 66, 40, 74, 50, 0, 70, 96, 92, 74, 80, 60, 72, 0,…
## $ triceps  <dbl> 35, 29, 0, 23, 35, 0, 32, 0, 45, 0, 0, 0, 0, 23, 19, 0, 47, 0…
## $ insulin  <dbl> 0, 0, 0, 94, 168, 0, 88, 0, 543, 0, 0, 0, 0, 846, 175, 0, 230…
## $ mass     <dbl> 33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5, 0.0, 37…
## $ pedigree <dbl> 0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.134, 0.158…
## $ age      <dbl> 50, 31, 32, 21, 33, 30, 26, 29, 53, 54, 30, 34, 57, 59, 51, 3…
## $ diabetes <fct> pos, neg, pos, neg, pos, neg, pos, neg, pos, pos, neg, pos, n…
summary(pima)
##     pregnant         glucose         pressure         triceps     
##  Min.   : 0.000   Min.   :  0.0   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.00   1st Qu.: 0.00  
##  Median : 3.000   Median :117.0   Median : 72.00   Median :23.00  
##  Mean   : 3.845   Mean   :120.9   Mean   : 69.11   Mean   :20.54  
##  3rd Qu.: 6.000   3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:32.00  
##  Max.   :17.000   Max.   :199.0   Max.   :122.00   Max.   :99.00  
##     insulin           mass          pedigree           age        diabetes 
##  Min.   :  0.0   Min.   : 0.00   Min.   :0.0780   Min.   :21.00   neg:500  
##  1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2437   1st Qu.:24.00   pos:268  
##  Median : 30.5   Median :32.00   Median :0.3725   Median :29.00            
##  Mean   : 79.8   Mean   :31.99   Mean   :0.4719   Mean   :33.24            
##  3rd Qu.:127.2   3rd Qu.:36.60   3rd Qu.:0.6262   3rd Qu.:41.00            
##  Max.   :846.0   Max.   :67.10   Max.   :2.4200   Max.   :81.00
colSums(is.na(pima))
## pregnant  glucose pressure  triceps  insulin     mass pedigree      age 
##        0        0        0        0        0        0        0        0 
## diabetes 
##        0
pima %>% mutate(age_class=ifelse(age>=60,"3", ifelse(age>=41,"2","1")))->pima1 #연속형자료 범주형으로 변경
table(pima1$age_class)
## 
##   1   2   3 
## 574 162  32
pima1 %>% group_by(age_class) %>% summarize(n=n(),fre=sum(diabetes=='pos')) %>% mutate(ill_rate=fre/n*100)->df 
df
## # A tibble: 3 × 4
##   age_class     n   fre ill_rate
##   <chr>     <int> <int>    <dbl>
## 1 1           574   166     28.9
## 2 2           162    93     57.4
## 3 3            32     9     28.1
result<-round(df$ill_rate[2],1)
print(result)
## [1] 57.4
#part3 기출문제 8

library(gapminder)
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country   <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
gapminder %>% filter(year==2002) %>% summarize(m=mean(lifeExp))
## # A tibble: 1 × 1
##       m
##   <dbl>
## 1  65.7
gapminder %>% filter(year==2002) %>% group_by(country) %>% summarize(m=mean(lifeExp)) %>% filter(m>=65.7) %>% nrow->result
cat(result)
## 85
#part3 기출문제 9
library(reshape)
## 
## 다음의 패키지를 부착합니다: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
df<- read.csv("disease.csv")
glimpse(df)
## Rows: 4
## Columns: 194
## $ year                         <int> 1999, 2000, 2001, 2002
## $ Afghanistan                  <int> 0, 0, 0, 0
## $ Albania                      <dbl> 89.0, 132.0, 54.0, 4.9
## $ Algeria                      <dbl> 25.0, 0.0, 14.0, 0.7
## $ Andorra                      <dbl> 245.0, 138.0, 312.0, 12.4
## $ Angola                       <dbl> 217.0, 57.0, 45.0, 5.9
## $ Antigua...Barbuda            <dbl> 102.0, 128.0, 45.0, 4.9
## $ Argentina                    <dbl> 193.0, 25.0, 221.0, 8.3
## $ Armenia                      <dbl> 21.0, 179.0, 11.0, 3.8
## $ Australia                    <dbl> 261.0, 72.0, 212.0, 10.4
## $ Austria                      <dbl> 279.0, 75.0, 191.0, 9.7
## $ Azerbaijan                   <dbl> 21.0, 46.0, 5.0, 1.3
## $ Bahamas                      <dbl> 122.0, 176.0, 51.0, 6.3
## $ Bahrain                      <int> 42, 63, 7, 2
## $ Bangladesh                   <int> 0, 0, 0, 0
## $ Barbados                     <dbl> 143.0, 173.0, 36.0, 6.3
## $ Belarus                      <dbl> 142.0, 373.0, 42.0, 14.4
## $ Belgium                      <dbl> 295.0, 84.0, 212.0, 10.5
## $ Belize                       <dbl> 263.0, 114.0, 8.0, 6.8
## $ Benin                        <dbl> 34.0, 4.0, 13.0, 1.1
## $ Bhutan                       <dbl> 23.0, 0.0, 0.0, 0.4
## $ Bolivia                      <dbl> 167.0, 41.0, 8.0, 3.8
## $ Bosnia.Herzegovina           <dbl> 76.0, 173.0, 8.0, 4.6
## $ Botswana                     <dbl> 173.0, 35.0, 35.0, 5.4
## $ Brazil                       <dbl> 245.0, 145.0, 16.0, 7.2
## $ Brunei                       <dbl> 31.0, 2.0, 1.0, 0.6
## $ Bulgaria                     <dbl> 231.0, 252.0, 94.0, 10.3
## $ Burkina.Faso                 <dbl> 25.0, 7.0, 7.0, 4.3
## $ Burundi                      <dbl> 88.0, 0.0, 0.0, 6.3
## $ Cote.d.Ivoire                <int> 37, 1, 7, 4
## $ Cabo.Verde                   <int> 144, 56, 16, 4
## $ Cambodia                     <dbl> 57.0, 65.0, 1.0, 2.2
## $ Cameroon                     <dbl> 147.0, 1.0, 4.0, 5.8
## $ Canada                       <dbl> 240.0, 122.0, 100.0, 8.2
## $ Central.African.Republic     <dbl> 17.0, 2.0, 1.0, 1.8
## $ Chad                         <dbl> 15.0, 1.0, 1.0, 0.4
## $ Chile                        <dbl> 130.0, 124.0, 172.0, 7.6
## $ China                        <int> 79, 192, 8, 5
## $ Colombia                     <dbl> 159.0, 76.0, 3.0, 4.2
## $ Comoros                      <dbl> 1.0, 3.0, 1.0, 0.1
## $ Congo                        <dbl> 76.0, 1.0, 9.0, 1.7
## $ Cook.Islands                 <dbl> 0.0, 254.0, 74.0, 5.9
## $ Costa.Rica                   <dbl> 149.0, 87.0, 11.0, 4.4
## $ Croatia                      <dbl> 230.0, 87.0, 254.0, 10.2
## $ Cuba                         <dbl> 93.0, 137.0, 5.0, 4.2
## $ Cyprus                       <dbl> 192.0, 154.0, 113.0, 8.2
## $ Czech.Republic               <dbl> 361.0, 170.0, 134.0, 11.8
## $ North.Korea                  <int> 0, 0, 0, 0
## $ DR.Congo                     <dbl> 32.0, 3.0, 1.0, 2.3
## $ Denmark                      <dbl> 224.0, 81.0, 278.0, 10.4
## $ Djibouti                     <dbl> 15.0, 44.0, 3.0, 1.1
## $ Dominica                     <dbl> 52.0, 286.0, 26.0, 6.6
## $ Dominican.Republic           <dbl> 193.0, 147.0, 9.0, 6.2
## $ Ecuador                      <dbl> 162.0, 74.0, 3.0, 4.2
## $ Egypt                        <dbl> 6.0, 4.0, 1.0, 0.2
## $ El.Salvador                  <dbl> 52.0, 69.0, 2.0, 2.2
## $ Equatorial.Guinea            <dbl> 92.0, 0.0, 233.0, 5.8
## $ Eritrea                      <dbl> 18.0, 0.0, 0.0, 0.5
## $ Estonia                      <dbl> 224.0, 194.0, 59.0, 9.5
## $ Ethiopia                     <dbl> 20.0, 3.0, 0.0, 0.7
## $ Fiji                         <int> 77, 35, 1, 2
## $ Finland                      <int> 263, 133, 97, 10
## $ France                       <dbl> 127.0, 151.0, 370.0, 11.8
## $ Gabon                        <dbl> 347.0, 98.0, 59.0, 8.9
## $ Gambia                       <dbl> 8.0, 0.0, 1.0, 2.4
## $ Georgia                      <dbl> 52.0, 100.0, 149.0, 5.4
## $ Germany                      <dbl> 346.0, 117.0, 175.0, 11.3
## $ Ghana                        <dbl> 31.0, 3.0, 10.0, 1.8
## $ Greece                       <dbl> 133.0, 112.0, 218.0, 8.3
## $ Grenada                      <dbl> 199.0, 438.0, 28.0, 11.9
## $ Guatemala                    <dbl> 53.0, 69.0, 2.0, 2.2
## $ Guinea                       <dbl> 9.0, 0.0, 2.0, 0.2
## $ Guinea.Bissau                <dbl> 28.0, 31.0, 21.0, 2.5
## $ Guyana                       <dbl> 93.0, 302.0, 1.0, 7.1
## $ Haiti                        <dbl> 1.0, 326.0, 1.0, 5.9
## $ Honduras                     <int> 69, 98, 2, 3
## $ Hungary                      <dbl> 234.0, 215.0, 185.0, 11.3
## $ Iceland                      <dbl> 233.0, 61.0, 78.0, 6.6
## $ India                        <dbl> 9.0, 114.0, 0.0, 2.2
## $ Indonesia                    <dbl> 5.0, 1.0, 0.0, 0.1
## $ Iran                         <int> 0, 0, 0, 0
## $ Iraq                         <dbl> 9.0, 3.0, 0.0, 0.2
## $ Ireland                      <dbl> 313.0, 118.0, 165.0, 11.4
## $ Israel                       <dbl> 63.0, 69.0, 9.0, 2.5
## $ Italy                        <dbl> 85.0, 42.0, 237.0, 6.5
## $ Jamaica                      <dbl> 82.0, 88.0, 9.0, 3.4
## $ Japan                        <int> 77, 202, 16, 7
## $ Jordan                       <dbl> 6.0, 21.0, 1.0, 0.5
## $ Kazakhstan                   <dbl> 124.0, 246.0, 12.0, 6.8
## $ Kenya                        <dbl> 58.0, 22.0, 2.0, 1.8
## $ Kiribati                     <int> 21, 34, 1, 1
## $ Kuwait                       <int> 0, 0, 0, 0
## $ Kyrgyzstan                   <dbl> 31.0, 88.0, 6.0, 2.4
## $ Laos                         <dbl> 62.0, 0.0, 123.0, 6.2
## $ Latvia                       <dbl> 281.0, 216.0, 62.0, 10.5
## $ Lebanon                      <dbl> 20.0, 55.0, 31.0, 1.9
## $ Lesotho                      <dbl> 82.0, 50.0, 0.0, 2.8
## $ Liberia                      <dbl> 19.0, 152.0, 2.0, 3.1
## $ Libya                        <int> 0, 0, 0, 0
## $ Lithuania                    <dbl> 343.0, 244.0, 56.0, 12.9
## $ Luxembourg                   <dbl> 236.0, 133.0, 271.0, 11.4
## $ Madagascar                   <dbl> 26.0, 15.0, 4.0, 0.8
## $ Malawi                       <dbl> 8.0, 11.0, 1.0, 1.5
## $ Malaysia                     <dbl> 13.0, 4.0, 0.0, 0.3
## $ Maldives                     <int> 0, 0, 0, 0
## $ Mali                         <dbl> 5.0, 1.0, 1.0, 0.6
## $ Malta                        <dbl> 149.0, 100.0, 120.0, 6.6
## $ Marshall.Islands             <int> 0, 0, 0, 0
## $ Mauritania                   <int> 0, 0, 0, 0
## $ Mauritius                    <dbl> 98.0, 31.0, 18.0, 2.6
## $ Mexico                       <dbl> 238.0, 68.0, 5.0, 5.5
## $ Micronesia                   <dbl> 62.0, 50.0, 18.0, 2.3
## $ Monaco                       <int> 0, 0, 0, 0
## $ Mongolia                     <dbl> 77.0, 189.0, 8.0, 4.9
## $ Montenegro                   <dbl> 31.0, 114.0, 128.0, 4.9
## $ Morocco                      <dbl> 12.0, 6.0, 10.0, 0.5
## $ Mozambique                   <dbl> 47.0, 18.0, 5.0, 1.3
## $ Myanmar                      <dbl> 5.0, 1.0, 0.0, 0.1
## $ Namibia                      <dbl> 376.0, 3.0, 1.0, 6.8
## $ Nauru                        <int> 49, 0, 8, 1
## $ Nepal                        <dbl> 5.0, 6.0, 0.0, 0.2
## $ Netherlands                  <dbl> 251.0, 88.0, 190.0, 9.4
## $ New.Zealand                  <dbl> 203.0, 79.0, 175.0, 9.3
## $ Nicaragua                    <dbl> 78.0, 118.0, 1.0, 3.5
## $ Niger                        <dbl> 3.0, 2.0, 1.0, 0.1
## $ Nigeria                      <dbl> 42.0, 5.0, 2.0, 9.1
## $ Niue                         <int> 188, 200, 7, 7
## $ Norway                       <dbl> 169.0, 71.0, 129.0, 6.7
## $ Oman                         <dbl> 22.0, 16.0, 1.0, 0.7
## $ Pakistan                     <int> 0, 0, 0, 0
## $ Palau                        <dbl> 306.0, 63.0, 23.0, 6.9
## $ Panama                       <dbl> 285.0, 104.0, 18.0, 7.2
## $ Papua.New.Guinea             <dbl> 44.0, 39.0, 1.0, 1.5
## $ Paraguay                     <dbl> 213.0, 117.0, 74.0, 7.3
## $ Peru                         <dbl> 163.0, 160.0, 21.0, 6.1
## $ Philippines                  <dbl> 71.0, 186.0, 1.0, 4.6
## $ Poland                       <dbl> 343.0, 215.0, 56.0, 10.9
## $ Portugal                     <int> 194, 67, 339, 11
## $ Qatar                        <dbl> 1.0, 42.0, 7.0, 0.9
## $ South.Korea                  <dbl> 140.0, 16.0, 9.0, 9.8
## $ Moldova                      <dbl> 109.0, 226.0, 18.0, 6.3
## $ Romania                      <dbl> 297.0, 122.0, 167.0, 10.4
## $ Russian.Federation           <dbl> 247.0, 326.0, 73.0, 11.5
## $ Rwanda                       <dbl> 43.0, 2.0, 0.0, 6.8
## $ St..Kitts...Nevis            <dbl> 194.0, 205.0, 32.0, 7.7
## $ St..Lucia                    <dbl> 171.0, 315.0, 71.0, 10.1
## $ St..Vincent...the.Grenadines <dbl> 120.0, 221.0, 11.0, 6.3
## $ Samoa                        <dbl> 105.0, 18.0, 24.0, 2.6
## $ San.Marino                   <int> 0, 0, 0, 0
## $ Sao.Tome...Principe          <dbl> 56.0, 38.0, 140.0, 4.2
## $ Saudi.Arabia                 <dbl> 0.0, 5.0, 0.0, 0.1
## $ Senegal                      <dbl> 9.0, 1.0, 7.0, 0.3
## $ Serbia                       <dbl> 283.0, 131.0, 127.0, 9.6
## $ Seychelles                   <dbl> 157.0, 25.0, 51.0, 4.1
## $ Sierra.Leone                 <dbl> 25.0, 3.0, 2.0, 6.7
## $ Singapore                    <dbl> 60.0, 12.0, 11.0, 1.5
## $ Slovakia                     <dbl> 196.0, 293.0, 116.0, 11.4
## $ Slovenia                     <dbl> 270.0, 51.0, 276.0, 10.6
## $ Solomon.Islands              <dbl> 56.0, 11.0, 1.0, 1.2
## $ Somalia                      <int> 0, 0, 0, 0
## $ South.Africa                 <dbl> 225.0, 76.0, 81.0, 8.2
## $ Spain                        <int> 284, 157, 112, 10
## $ Sri.Lanka                    <dbl> 16.0, 104.0, 0.0, 2.2
## $ Sudan                        <dbl> 8.0, 13.0, 0.0, 1.7
## $ Suriname                     <dbl> 128.0, 178.0, 7.0, 5.6
## $ Swaziland                    <dbl> 90.0, 2.0, 2.0, 4.7
## $ Sweden                       <dbl> 152.0, 60.0, 186.0, 7.2
## $ Switzerland                  <dbl> 185.0, 100.0, 280.0, 10.2
## $ Syria                        <int> 5, 35, 16, 1
## $ Tajikistan                   <dbl> 2.0, 15.0, 0.0, 0.3
## $ Thailand                     <dbl> 99.0, 258.0, 1.0, 6.4
## $ Macedonia                    <dbl> 106.0, 27.0, 86.0, 3.9
## $ Timor.Leste                  <dbl> 1.0, 1.0, 4.0, 0.1
## $ Togo                         <dbl> 36.0, 2.0, 19.0, 1.3
## $ Tonga                        <dbl> 36.0, 21.0, 5.0, 1.1
## $ Trinidad...Tobago            <dbl> 197.0, 156.0, 7.0, 6.4
## $ Tunisia                      <dbl> 51.0, 3.0, 20.0, 1.3
## $ Turkey                       <dbl> 51.0, 22.0, 7.0, 1.4
## $ Turkmenistan                 <dbl> 19.0, 71.0, 32.0, 2.2
## $ Tuvalu                       <int> 6, 41, 9, 1
## $ Uganda                       <dbl> 45.0, 9.0, 0.0, 8.3
## $ Ukraine                      <dbl> 206.0, 237.0, 45.0, 8.9
## $ United.Arab.Emirates         <dbl> 16.0, 135.0, 5.0, 2.8
## $ United.Kingdom               <dbl> 219.0, 126.0, 195.0, 10.4
## $ Tanzania                     <dbl> 36.0, 6.0, 1.0, 5.7
## $ USA                          <dbl> 249.0, 158.0, 84.0, 8.7
## $ Uruguay                      <dbl> 115.0, 35.0, 220.0, 6.6
## $ Uzbekistan                   <dbl> 25.0, 101.0, 8.0, 2.4
## $ Vanuatu                      <dbl> 21.0, 18.0, 11.0, 0.9
## $ Venezuela                    <dbl> 333.0, 100.0, 3.0, 7.7
## $ Vietnam                      <int> 111, 2, 1, 2
## $ Yemen                        <dbl> 6.0, 0.0, 0.0, 0.1
## $ Zambia                       <dbl> 32.0, 19.0, 4.0, 2.5
## $ Zimbabwe                     <dbl> 64.0, 18.0, 4.0, 4.7
df1<-melt(df,id='year')
glimpse(df1)
## Rows: 772
## Columns: 3
## $ year     <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value    <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
df1<-df1 %>% filter(year==2000)
names(df1)[2:3]<-c("country","disease")
df1
##     year                      country disease
## 1   2000                  Afghanistan       0
## 2   2000                      Albania     132
## 3   2000                      Algeria       0
## 4   2000                      Andorra     138
## 5   2000                       Angola      57
## 6   2000            Antigua...Barbuda     128
## 7   2000                    Argentina      25
## 8   2000                      Armenia     179
## 9   2000                    Australia      72
## 10  2000                      Austria      75
## 11  2000                   Azerbaijan      46
## 12  2000                      Bahamas     176
## 13  2000                      Bahrain      63
## 14  2000                   Bangladesh       0
## 15  2000                     Barbados     173
## 16  2000                      Belarus     373
## 17  2000                      Belgium      84
## 18  2000                       Belize     114
## 19  2000                        Benin       4
## 20  2000                       Bhutan       0
## 21  2000                      Bolivia      41
## 22  2000           Bosnia.Herzegovina     173
## 23  2000                     Botswana      35
## 24  2000                       Brazil     145
## 25  2000                       Brunei       2
## 26  2000                     Bulgaria     252
## 27  2000                 Burkina.Faso       7
## 28  2000                      Burundi       0
## 29  2000                Cote.d.Ivoire       1
## 30  2000                   Cabo.Verde      56
## 31  2000                     Cambodia      65
## 32  2000                     Cameroon       1
## 33  2000                       Canada     122
## 34  2000     Central.African.Republic       2
## 35  2000                         Chad       1
## 36  2000                        Chile     124
## 37  2000                        China     192
## 38  2000                     Colombia      76
## 39  2000                      Comoros       3
## 40  2000                        Congo       1
## 41  2000                 Cook.Islands     254
## 42  2000                   Costa.Rica      87
## 43  2000                      Croatia      87
## 44  2000                         Cuba     137
## 45  2000                       Cyprus     154
## 46  2000               Czech.Republic     170
## 47  2000                  North.Korea       0
## 48  2000                     DR.Congo       3
## 49  2000                      Denmark      81
## 50  2000                     Djibouti      44
## 51  2000                     Dominica     286
## 52  2000           Dominican.Republic     147
## 53  2000                      Ecuador      74
## 54  2000                        Egypt       4
## 55  2000                  El.Salvador      69
## 56  2000            Equatorial.Guinea       0
## 57  2000                      Eritrea       0
## 58  2000                      Estonia     194
## 59  2000                     Ethiopia       3
## 60  2000                         Fiji      35
## 61  2000                      Finland     133
## 62  2000                       France     151
## 63  2000                        Gabon      98
## 64  2000                       Gambia       0
## 65  2000                      Georgia     100
## 66  2000                      Germany     117
## 67  2000                        Ghana       3
## 68  2000                       Greece     112
## 69  2000                      Grenada     438
## 70  2000                    Guatemala      69
## 71  2000                       Guinea       0
## 72  2000                Guinea.Bissau      31
## 73  2000                       Guyana     302
## 74  2000                        Haiti     326
## 75  2000                     Honduras      98
## 76  2000                      Hungary     215
## 77  2000                      Iceland      61
## 78  2000                        India     114
## 79  2000                    Indonesia       1
## 80  2000                         Iran       0
## 81  2000                         Iraq       3
## 82  2000                      Ireland     118
## 83  2000                       Israel      69
## 84  2000                        Italy      42
## 85  2000                      Jamaica      88
## 86  2000                        Japan     202
## 87  2000                       Jordan      21
## 88  2000                   Kazakhstan     246
## 89  2000                        Kenya      22
## 90  2000                     Kiribati      34
## 91  2000                       Kuwait       0
## 92  2000                   Kyrgyzstan      88
## 93  2000                         Laos       0
## 94  2000                       Latvia     216
## 95  2000                      Lebanon      55
## 96  2000                      Lesotho      50
## 97  2000                      Liberia     152
## 98  2000                        Libya       0
## 99  2000                    Lithuania     244
## 100 2000                   Luxembourg     133
## 101 2000                   Madagascar      15
## 102 2000                       Malawi      11
## 103 2000                     Malaysia       4
## 104 2000                     Maldives       0
## 105 2000                         Mali       1
## 106 2000                        Malta     100
## 107 2000             Marshall.Islands       0
## 108 2000                   Mauritania       0
## 109 2000                    Mauritius      31
## 110 2000                       Mexico      68
## 111 2000                   Micronesia      50
## 112 2000                       Monaco       0
## 113 2000                     Mongolia     189
## 114 2000                   Montenegro     114
## 115 2000                      Morocco       6
## 116 2000                   Mozambique      18
## 117 2000                      Myanmar       1
## 118 2000                      Namibia       3
## 119 2000                        Nauru       0
## 120 2000                        Nepal       6
## 121 2000                  Netherlands      88
## 122 2000                  New.Zealand      79
## 123 2000                    Nicaragua     118
## 124 2000                        Niger       2
## 125 2000                      Nigeria       5
## 126 2000                         Niue     200
## 127 2000                       Norway      71
## 128 2000                         Oman      16
## 129 2000                     Pakistan       0
## 130 2000                        Palau      63
## 131 2000                       Panama     104
## 132 2000             Papua.New.Guinea      39
## 133 2000                     Paraguay     117
## 134 2000                         Peru     160
## 135 2000                  Philippines     186
## 136 2000                       Poland     215
## 137 2000                     Portugal      67
## 138 2000                        Qatar      42
## 139 2000                  South.Korea      16
## 140 2000                      Moldova     226
## 141 2000                      Romania     122
## 142 2000           Russian.Federation     326
## 143 2000                       Rwanda       2
## 144 2000            St..Kitts...Nevis     205
## 145 2000                    St..Lucia     315
## 146 2000 St..Vincent...the.Grenadines     221
## 147 2000                        Samoa      18
## 148 2000                   San.Marino       0
## 149 2000          Sao.Tome...Principe      38
## 150 2000                 Saudi.Arabia       5
## 151 2000                      Senegal       1
## 152 2000                       Serbia     131
## 153 2000                   Seychelles      25
## 154 2000                 Sierra.Leone       3
## 155 2000                    Singapore      12
## 156 2000                     Slovakia     293
## 157 2000                     Slovenia      51
## 158 2000              Solomon.Islands      11
## 159 2000                      Somalia       0
## 160 2000                 South.Africa      76
## 161 2000                        Spain     157
## 162 2000                    Sri.Lanka     104
## 163 2000                        Sudan      13
## 164 2000                     Suriname     178
## 165 2000                    Swaziland       2
## 166 2000                       Sweden      60
## 167 2000                  Switzerland     100
## 168 2000                        Syria      35
## 169 2000                   Tajikistan      15
## 170 2000                     Thailand     258
## 171 2000                    Macedonia      27
## 172 2000                  Timor.Leste       1
## 173 2000                         Togo       2
## 174 2000                        Tonga      21
## 175 2000            Trinidad...Tobago     156
## 176 2000                      Tunisia       3
## 177 2000                       Turkey      22
## 178 2000                 Turkmenistan      71
## 179 2000                       Tuvalu      41
## 180 2000                       Uganda       9
## 181 2000                      Ukraine     237
## 182 2000         United.Arab.Emirates     135
## 183 2000               United.Kingdom     126
## 184 2000                     Tanzania       6
## 185 2000                          USA     158
## 186 2000                      Uruguay      35
## 187 2000                   Uzbekistan     101
## 188 2000                      Vanuatu      18
## 189 2000                    Venezuela     100
## 190 2000                      Vietnam       2
## 191 2000                        Yemen       0
## 192 2000                       Zambia      19
## 193 2000                     Zimbabwe      18
df1 %>% filter(year==2000) %>% summarize(m=mean(disease))
##          m
## 1 81.01036
df1 %>% filter(year==2000) %>% filter(disease>81.01036) %>% nrow->result
print(result)
## [1] 76