setwd("c:/data")
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
food<-read.csv("6110000_서울특별시_07_24_04_P_일반음식점.csv",na="",fileEncoding="euc-kr")
food1<-food %>%
rename(open_date=인허가일자,
status=상세영업상태명,
close_date=폐업일자,
name=사업장명,
type=업태구분명,
address=소재지전체주소) %>%
select("open_date","status","name","close_date","type","address")
str(food1)
## 'data.frame': 474140 obs. of 6 variables:
## $ open_date : int 20200803 20200803 20200803 20200803 20200803 20200803 20200803 20200803 20200803 20200803 ...
## $ status : chr "영업" "영업" "영업" "영업" ...
## $ name : chr "혼밥대왕 마곡점" "꾸어가게생선구이화곡점" "인생갈비탕" "놀부 공유주방 선릉역" ...
## $ close_date: int NA NA NA NA NA NA NA NA NA NA ...
## $ type : chr "한식" "한식" "한식" "한식" ...
## $ address : chr "서울특별시 강서구 마곡동 757 두산더랜드파크 B동 207호" "서울특별시 강서구 화곡동 827-2 1층" "서울특별시 강남구 역삼동 707-34 한신인터밸리24빌딩" "서울특별시 강남구 대치동 896-28" ...
glimpse(food1)
## Rows: 474,140
## Columns: 6
## $ open_date <int> 20200803, 20200803, 20200803, 20200803, 20200803, 20200803,…
## $ status <chr> "영업", "영업", "영업", "영업", "영업", "영업", "영업", "영…
## $ name <chr> "혼밥대왕 마곡점", "꾸어가게생선구이화곡점", "인생갈비탕", …
## $ close_date <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ type <chr> "한식", "한식", "한식", "한식", "일식", "한식", "분식", "기…
## $ address <chr> "서울특별시 강서구 마곡동 757 두산더랜드파크 B동 207호", "…
summary(is.na(food1))
## open_date status name close_date
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:474140 FALSE:474140 FALSE:474140 FALSE:349716
## TRUE :124424
## type address
## Mode :logical Mode :logical
## FALSE:474119 FALSE:473903
## TRUE :21 TRUE :237
table(food1$type)
##
## 193959.1505 간이주점
## 1 1
## 감성주점 경양식
## 349 47987
## 기타 기타 휴게음식점
## 33313 3
## 김밥(도시락) 까페
## 1537 7904
## 냉면집 다방
## 126 1
## 라이브카페 룸살롱
## 276 1
## 복어취급 분식
## 113 78766
## 뷔페식 식육(숯불구이)
## 2791 1445
## 식품등 수입판매업 식품소분업
## 1 1
## 외국음식전문점(인도,태국등) 이동조리
## 1993 101
## 일반조리판매 일식
## 2 17804
## 전통찻집 정종/대포집/소주방
## 823 12757
## 제과점영업 중국식
## 1 14858
## 출장조리 커피숍
## 370 5
## 키즈카페 탕류(보신용)
## 101 413
## 통닭(치킨) 패밀리레스트랑
## 9550 238
## 패스트푸드 한식
## 3911 200172
## 호프/통닭 횟집
## 34974 1430
#유사업종은 하나의 통합업종으로 통일
food1$type<-ifelse(food1$type%in%c("까페","다방","라이브카페","커피숍","카페"),"까페",food1$type)
food1$type<-ifelse(food1$type%in%c("통닭(치킨)","호프/통닭"),"치킨",food1$type)
food1$type<-ifelse(food1$type%in%c("일식","회집","횟집"),"회집",food1$type)
food1$type<-ifelse(food1$type%in%c("경양식","패밀리레스토랑"),
"레스토랑",food1$type)
food1$type<-ifelse(food1$type%in%c("정종/대포집/소주방"),"소주방",food1$type)
food1$type<-ifelse(food1$type=="외국음식전문점(인도,태국등)",
"외국음식전문점",food1$type)
food1$type<-ifelse(food1$type%in%c("기타","193959.1505"),NA,food1$type)
table(food1$type)
##
## 간이주점 감성주점 기타 휴게음식점 김밥(도시락)
## 1 349 3 1537
## 까페 냉면집 레스토랑 룸살롱
## 8186 126 47987 1
## 복어취급 분식 뷔페식 소주방
## 113 78766 2791 12757
## 식육(숯불구이) 식품등 수입판매업 식품소분업 외국음식전문점
## 1445 1 1 1993
## 이동조리 일반조리판매 전통찻집 제과점영업
## 101 2 823 1
## 중국식 출장조리 치킨 키즈카페
## 14858 370 44524 101
## 탕류(보신용) 패밀리레스트랑 패스트푸드 한식
## 413 238 3911 200172
## 회집
## 19234
#파생변수만들기
range(food1$open_date)
## [1] 19000531 20220401
food1$open_date<-ifelse(food1$open_date<19700301,NA,food1$open_date)
table(is.na(food1$open_date))
##
## FALSE TRUE
## 473735 405
food1$open_year<-substr(food1$open_date,1,4)
range(food1$close_date,na.rm=TRUE)
## [1] 2000126 20220401
food1$close_year<-substr(food1$close_date,1,4)
#가장오래되었고 영업중인 음식점업의 오픈년도와 타입과 이름을 구하라
food1 %>% filter(!is.na(open_date)&status=="영업") %>%
filter(open_date==min(open_date)) %>%
select(type,open_year,name)
## type open_year name
## 1 한식 1970 경원집
#개업한 전체음식점에서 상위 3개업종의 비율을 구하라
food1 %>% filter(!is.na(open_date)&!is.na(type)) %>%
group_by(type) %>%
summarise(n=n()) %>%
mutate(total=sum(n),
pct=n/total*100) %>%
arrange(desc(n)) %>% head(3)
## # A tibble: 3 × 4
## type n total pct
## <chr> <int> <int> <dbl>
## 1 한식 199971 440404 45.4
## 2 분식 78725 440404 17.9
## 3 레스토랑 47934 440404 10.9
#개업과 폐업이 최고로많았던 년도와 그때 개업수와 폐업수를 각각 구하기
food1 %>% filter(!is.na(open_date)) %>%
group_by(open_year) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>% head(3)
## # A tibble: 3 × 2
## open_year n
## <chr> <int>
## 1 2001 18818
## 2 1994 17978
## 3 1999 17882
food1 %>% filter(!is.na(close_date)) %>%
group_by(close_year) %>%
summarise(n=n()) %>%
arrange(desc(n)) %>% head(3)
## # A tibble: 3 × 2
## close_year n
## <chr> <int>
## 1 1999 15848
## 2 2000 15768
## 3 2005 14943
house<-read.csv("housing.csv")
nrow(house)
## [1] 20640
rownum<-nrow(house)*0.8
house1<-house[1:rownum,]
house1 %>% glimpse
## Rows: 16,512
## Columns: 10
## $ longitude <dbl> -122.23, -122.22, -122.24, -122.25, -122.25, -122.2…
## $ latitude <dbl> 37.88, 37.86, 37.85, 37.85, 37.85, 37.85, 37.84, 37…
## $ housing_median_age <int> 41, 21, 52, 52, 52, 52, 52, 52, 42, 52, 52, 52, 52,…
## $ total_rooms <int> 880, 7099, 1467, 1274, 1627, 919, 2535, 3104, 2555,…
## $ total_bedrooms <int> 129, 1106, 190, 235, 280, 213, 489, 687, 665, 707, …
## $ population <int> 322, 2401, 496, 558, 565, 413, 1094, 1157, 1206, 15…
## $ households <int> 126, 1138, 177, 219, 259, 193, 514, 647, 595, 714, …
## $ median_income <dbl> 8.3252, 8.3014, 7.2574, 5.6431, 3.8462, 4.0368, 3.6…
## $ median_house_value <int> 452600, 358500, 352100, 341300, 342200, 269700, 299…
## $ ocean_proximity <chr> "NEAR BAY", "NEAR BAY", "NEAR BAY", "NEAR BAY", "NE…
colSums(is.na(house1))
## longitude latitude housing_median_age total_rooms
## 0 0 0 0
## total_bedrooms population households median_income
## 159 0 0 0
## median_house_value ocean_proximity
## 0 0
#결측치 대체전 표준편차구하기
df1<-sd(house1$total_bedrooms,na.rm = TRUE)
df1
## [1] 435.9006
#435.9006
#중앙값구하고 df2에 저장
df2<-median(house1$total_bedrooms,na.rm = TRUE)
df2
## [1] 436
#436
#결측치를 중앙값으로 대체하였는지 확인
house1$total_bedrooms<-ifelse(is.na(house1$total_bedrooms),df2,
house1$total_bedrooms)
colSums(is.na(house1))
## longitude latitude housing_median_age total_rooms
## 0 0 0 0
## total_bedrooms population households median_income
## 0 0 0 0
## median_house_value ocean_proximity
## 0 0
#결측치 대체후 표준편차를 df3에 저장후 확인
df3<-sd(house1$total_bedrooms)
df3
## [1] 433.9254
#433.9254
#print와 cat함수를 이용하여 제출
#df4에 표준편차끼리의 차이를 저장
df4<-df1-df3
print(df4)
## [1] 1.975147
cat(df4)
## 1.975147
rownum1<-nrow(house)*0.7
rownum1
## [1] 14448
house2<-house[1:rownum1,]
quantile(house2$housing_median_age)
## 0% 25% 50% 75% 100%
## 1 19 30 38 52
df5<-quantile(house2$housing_median_age)[[2]]
print(df5)
## [1] 19
cat(df5)
## 19
titanic<-read.csv("train100.csv")
titanic %>% glimpse
## Rows: 891
## Columns: 11
## $ PassengerId <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ Survived <int> 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1…
## $ Pclass <int> 3, 1, 3, 1, 3, 3, 1, 3, 3, 2, 3, 1, 3, 3, 3, 2, 3, 2, 3, 3…
## $ Name <chr> "Braund, Mr. Owen Harris", "Cumings, Mrs. John Bradley (Fl…
## $ Sex <chr> "male", "female", "female", "female", "male", "male", "mal…
## $ Age <dbl> 22, 38, 26, 35, 35, NA, 54, 2, 27, 14, 4, 58, 20, 39, 14, …
## $ SibSp <int> 1, 1, 0, 1, 0, 0, 0, 3, 0, 1, 1, 0, 0, 1, 0, 0, 4, 0, 1, 0…
## $ Parch <int> 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 0, 5, 0, 0, 1, 0, 0, 0…
## $ Ticket <chr> "A/5 21171", "PC 17599", "STON/O2. 3101282", "113803", "37…
## $ Fare <dbl> 7.2500, 71.2833, 7.9250, 53.1000, 8.0500, 8.4583, 51.8625,…
## $ Embarked <chr> "S", "C", "S", "S", "S", "Q", "S", "S", "S", "C", "S", "S"…
colSums(is.na(titanic))
## PassengerId Survived Pclass Name Sex Age
## 0 0 0 0 0 177
## SibSp Parch Ticket Fare Embarked
## 0 0 0 0 0
titanic$Embarked<-as.factor(titanic$Embarked)
titanic$Sex<-as.factor(titanic$Sex)
titanic$Pclass<-as.factor(titanic$Pclass)
summary(titanic)
## PassengerId Survived Pclass Name Sex
## Min. : 1.0 Min. :0.0000 1:216 Length:891 female:314
## 1st Qu.:223.5 1st Qu.:0.0000 2:184 Class :character male :577
## Median :446.0 Median :0.0000 3:491 Mode :character
## Mean :446.0 Mean :0.3838
## 3rd Qu.:668.5 3rd Qu.:1.0000
## Max. :891.0 Max. :1.0000
##
## Age SibSp Parch Ticket
## Min. : 0.42 Min. :0.000 Min. :0.0000 Length:891
## 1st Qu.:20.12 1st Qu.:0.000 1st Qu.:0.0000 Class :character
## Median :28.00 Median :0.000 Median :0.0000 Mode :character
## Mean :29.70 Mean :0.523 Mean :0.3816
## 3rd Qu.:38.00 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :80.00 Max. :8.000 Max. :6.0000
## NA's :177
## Fare Embarked
## Min. : 0.00 : 2
## 1st Qu.: 7.91 C:168
## Median : 14.45 Q: 77
## Mean : 32.20 S:644
## 3rd Qu.: 31.00
## Max. :512.33
##
table(titanic$Embarked)
##
## C Q S
## 2 168 77 644
df<-nrow(titanic)
titanic %>% filter(is.na(Age)|Age=="") %>% summarise(n=n()) %>%
mutate(pct=n/df*100)->df1
titanic %>% filter(is.na(Embarked)|Embarked=="") %>% summarise(n=n()) %>%
mutate(pct=n/df*100)->df2
df1;df2
## n pct
## 1 177 19.86532
## n pct
## 1 2 0.2244669
names(titanic)[6]->df3
print(df3)
## [1] "Age"
library(MASS)
##
## 다음의 패키지를 부착합니다: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
data("Boston")
Boston %>% glimpse
## Rows: 506
## Columns: 14
## $ crim <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985, 0.08829,…
## $ zn <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5, 1…
## $ indus <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87, 7.…
## $ chas <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nox <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524,…
## $ rm <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.631,…
## $ age <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9, 9…
## $ dis <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9505…
## $ rad <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ tax <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311, 31…
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2, 15…
## $ black <dbl> 396.90, 396.90, 392.83, 394.63, 396.90, 394.12, 395.60, 396.90…
## $ lstat <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17.10…
## $ medv <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15…
boston1<-Boston %>% arrange(desc(crim))
boston1 %>% head
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 88.9762 0 18.1 0 0.671 6.968 91.9 1.4165 24 666 20.2 396.90 17.21
## 2 73.5341 0 18.1 0 0.679 5.957 100.0 1.8026 24 666 20.2 16.45 20.62
## 3 67.9208 0 18.1 0 0.693 5.683 100.0 1.4254 24 666 20.2 384.97 22.98
## 4 51.1358 0 18.1 0 0.597 5.757 100.0 1.4130 24 666 20.2 2.60 10.11
## 5 45.7461 0 18.1 0 0.693 4.519 100.0 1.6582 24 666 20.2 88.27 36.98
## 6 41.5292 0 18.1 0 0.693 5.531 85.4 1.6074 24 666 20.2 329.46 27.38
## medv
## 1 10.4
## 2 8.8
## 3 5.0
## 4 15.0
## 5 7.0
## 6 8.5
boston1$crim[10]
## [1] 25.9406
boston1$crim[1:10]<-25.9406
boston1 %>% head(10)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 25.9406 0 18.1 0 0.671 6.968 91.9 1.4165 24 666 20.2 396.90 17.21
## 2 25.9406 0 18.1 0 0.679 5.957 100.0 1.8026 24 666 20.2 16.45 20.62
## 3 25.9406 0 18.1 0 0.693 5.683 100.0 1.4254 24 666 20.2 384.97 22.98
## 4 25.9406 0 18.1 0 0.597 5.757 100.0 1.4130 24 666 20.2 2.60 10.11
## 5 25.9406 0 18.1 0 0.693 4.519 100.0 1.6582 24 666 20.2 88.27 36.98
## 6 25.9406 0 18.1 0 0.693 5.531 85.4 1.6074 24 666 20.2 329.46 27.38
## 7 25.9406 0 18.1 0 0.693 5.453 100.0 1.4896 24 666 20.2 396.90 30.59
## 8 25.9406 0 18.1 0 0.679 6.202 78.7 1.8629 24 666 20.2 18.82 14.52
## 9 25.9406 0 18.1 0 0.597 5.155 100.0 1.5894 24 666 20.2 210.97 20.08
## 10 25.9406 0 18.1 0 0.679 5.304 89.1 1.6475 24 666 20.2 127.36 26.64
## medv
## 1 10.4
## 2 8.8
## 3 5.0
## 4 15.0
## 5 7.0
## 6 8.5
## 7 5.0
## 8 10.9
## 9 16.3
## 10 10.4
select<-dplyr::select
boston1 %>% filter(age>=80) %>% select(crim) %>% summarise(m=mean(crim))->df
df
## m
## 1 5.759387
print(df[[1]])
## [1] 5.759387
insurance<-read.csv("insurance.csv")
colSums(is.na(insurance))
## age sex bmi children smoker region charges
## 0 0 0 0 0 0 0
avg=mean(insurance$charges)
avg
## [1] 13270.42
sd=sd(insurance$charges)
sd
## [1] 12110.01
insurance1<-insurance %>% filter(charges>=avg+1.5*sd|charges<=avg-1.5*sd)
sum(insurance1$charges)
## [1] 6421430
print(sum(insurance1$charges))
## [1] 6421430
data(mtcars)
mtcars %>% glimpse
## Rows: 32
## Columns: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…
result<-mtcars %>% select(wt) %>%
mutate(min_max=((wt)-min(wt))/(max(wt)-min(wt))) %>%
filter(min_max>0.5) %>% NROW
print(result)
## [1] 11
library(mlbench)
data("PimaIndiansDiabetes")
pima<-PimaIndiansDiabetes
pima %>% glimpse
## Rows: 768
## Columns: 9
## $ pregnant <dbl> 6, 1, 8, 1, 0, 5, 3, 10, 2, 8, 4, 10, 10, 1, 5, 7, 0, 7, 1, 1…
## $ glucose <dbl> 148, 85, 183, 89, 137, 116, 78, 115, 197, 125, 110, 168, 139,…
## $ pressure <dbl> 72, 66, 64, 66, 40, 74, 50, 0, 70, 96, 92, 74, 80, 60, 72, 0,…
## $ triceps <dbl> 35, 29, 0, 23, 35, 0, 32, 0, 45, 0, 0, 0, 0, 23, 19, 0, 47, 0…
## $ insulin <dbl> 0, 0, 0, 94, 168, 0, 88, 0, 543, 0, 0, 0, 0, 846, 175, 0, 230…
## $ mass <dbl> 33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5, 0.0, 37…
## $ pedigree <dbl> 0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.134, 0.158…
## $ age <dbl> 50, 31, 32, 21, 33, 30, 26, 29, 53, 54, 30, 34, 57, 59, 51, 3…
## $ diabetes <fct> pos, neg, pos, neg, pos, neg, pos, neg, pos, pos, neg, pos, n…
colSums(is.na(pima))
## pregnant glucose pressure triceps insulin mass pedigree age
## 0 0 0 0 0 0 0 0
## diabetes
## 0
pima %>% mutate(age_class=ifelse(age>=60,"3",
ifelse(age>=41,"2","1")))->pima1
table(pima1$age_class)
##
## 1 2 3
## 574 162 32
pima1 %>% group_by(age_class) %>%
summarise(n=n(),fre=sum(diabetes=='pos')) %>%
mutate(ill_rate=fre/n*100)->df
df
## # A tibble: 3 × 4
## age_class n fre ill_rate
## <chr> <int> <int> <dbl>
## 1 1 574 166 28.9
## 2 2 162 93 57.4
## 3 3 32 9 28.1
round(print(df$ill_rate[2]),1)
## [1] 57.40741
## [1] 57.4
library(gapminder)
gapminder %>% glimpse
## Rows: 1,704
## Columns: 6
## $ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
gapminder %>% filter(year==2002) %>% summarise(m=mean(lifeExp))
## # A tibble: 1 × 1
## m
## <dbl>
## 1 65.7
gapminder %>% filter(year==2002) %>% group_by(country) %>%
summarise(m=mean(lifeExp)) %>%
filter(m>=65.7) %>% NROW->result
print(result)
## [1] 85
df<-read.csv("disease.csv")
df %>% glimpse
## Rows: 4
## Columns: 194
## $ year <int> 1999, 2000, 2001, 2002
## $ Afghanistan <int> 0, 0, 0, 0
## $ Albania <dbl> 89.0, 132.0, 54.0, 4.9
## $ Algeria <dbl> 25.0, 0.0, 14.0, 0.7
## $ Andorra <dbl> 245.0, 138.0, 312.0, 12.4
## $ Angola <dbl> 217.0, 57.0, 45.0, 5.9
## $ Antigua...Barbuda <dbl> 102.0, 128.0, 45.0, 4.9
## $ Argentina <dbl> 193.0, 25.0, 221.0, 8.3
## $ Armenia <dbl> 21.0, 179.0, 11.0, 3.8
## $ Australia <dbl> 261.0, 72.0, 212.0, 10.4
## $ Austria <dbl> 279.0, 75.0, 191.0, 9.7
## $ Azerbaijan <dbl> 21.0, 46.0, 5.0, 1.3
## $ Bahamas <dbl> 122.0, 176.0, 51.0, 6.3
## $ Bahrain <int> 42, 63, 7, 2
## $ Bangladesh <int> 0, 0, 0, 0
## $ Barbados <dbl> 143.0, 173.0, 36.0, 6.3
## $ Belarus <dbl> 142.0, 373.0, 42.0, 14.4
## $ Belgium <dbl> 295.0, 84.0, 212.0, 10.5
## $ Belize <dbl> 263.0, 114.0, 8.0, 6.8
## $ Benin <dbl> 34.0, 4.0, 13.0, 1.1
## $ Bhutan <dbl> 23.0, 0.0, 0.0, 0.4
## $ Bolivia <dbl> 167.0, 41.0, 8.0, 3.8
## $ Bosnia.Herzegovina <dbl> 76.0, 173.0, 8.0, 4.6
## $ Botswana <dbl> 173.0, 35.0, 35.0, 5.4
## $ Brazil <dbl> 245.0, 145.0, 16.0, 7.2
## $ Brunei <dbl> 31.0, 2.0, 1.0, 0.6
## $ Bulgaria <dbl> 231.0, 252.0, 94.0, 10.3
## $ Burkina.Faso <dbl> 25.0, 7.0, 7.0, 4.3
## $ Burundi <dbl> 88.0, 0.0, 0.0, 6.3
## $ Cote.d.Ivoire <int> 37, 1, 7, 4
## $ Cabo.Verde <int> 144, 56, 16, 4
## $ Cambodia <dbl> 57.0, 65.0, 1.0, 2.2
## $ Cameroon <dbl> 147.0, 1.0, 4.0, 5.8
## $ Canada <dbl> 240.0, 122.0, 100.0, 8.2
## $ Central.African.Republic <dbl> 17.0, 2.0, 1.0, 1.8
## $ Chad <dbl> 15.0, 1.0, 1.0, 0.4
## $ Chile <dbl> 130.0, 124.0, 172.0, 7.6
## $ China <int> 79, 192, 8, 5
## $ Colombia <dbl> 159.0, 76.0, 3.0, 4.2
## $ Comoros <dbl> 1.0, 3.0, 1.0, 0.1
## $ Congo <dbl> 76.0, 1.0, 9.0, 1.7
## $ Cook.Islands <dbl> 0.0, 254.0, 74.0, 5.9
## $ Costa.Rica <dbl> 149.0, 87.0, 11.0, 4.4
## $ Croatia <dbl> 230.0, 87.0, 254.0, 10.2
## $ Cuba <dbl> 93.0, 137.0, 5.0, 4.2
## $ Cyprus <dbl> 192.0, 154.0, 113.0, 8.2
## $ Czech.Republic <dbl> 361.0, 170.0, 134.0, 11.8
## $ North.Korea <int> 0, 0, 0, 0
## $ DR.Congo <dbl> 32.0, 3.0, 1.0, 2.3
## $ Denmark <dbl> 224.0, 81.0, 278.0, 10.4
## $ Djibouti <dbl> 15.0, 44.0, 3.0, 1.1
## $ Dominica <dbl> 52.0, 286.0, 26.0, 6.6
## $ Dominican.Republic <dbl> 193.0, 147.0, 9.0, 6.2
## $ Ecuador <dbl> 162.0, 74.0, 3.0, 4.2
## $ Egypt <dbl> 6.0, 4.0, 1.0, 0.2
## $ El.Salvador <dbl> 52.0, 69.0, 2.0, 2.2
## $ Equatorial.Guinea <dbl> 92.0, 0.0, 233.0, 5.8
## $ Eritrea <dbl> 18.0, 0.0, 0.0, 0.5
## $ Estonia <dbl> 224.0, 194.0, 59.0, 9.5
## $ Ethiopia <dbl> 20.0, 3.0, 0.0, 0.7
## $ Fiji <int> 77, 35, 1, 2
## $ Finland <int> 263, 133, 97, 10
## $ France <dbl> 127.0, 151.0, 370.0, 11.8
## $ Gabon <dbl> 347.0, 98.0, 59.0, 8.9
## $ Gambia <dbl> 8.0, 0.0, 1.0, 2.4
## $ Georgia <dbl> 52.0, 100.0, 149.0, 5.4
## $ Germany <dbl> 346.0, 117.0, 175.0, 11.3
## $ Ghana <dbl> 31.0, 3.0, 10.0, 1.8
## $ Greece <dbl> 133.0, 112.0, 218.0, 8.3
## $ Grenada <dbl> 199.0, 438.0, 28.0, 11.9
## $ Guatemala <dbl> 53.0, 69.0, 2.0, 2.2
## $ Guinea <dbl> 9.0, 0.0, 2.0, 0.2
## $ Guinea.Bissau <dbl> 28.0, 31.0, 21.0, 2.5
## $ Guyana <dbl> 93.0, 302.0, 1.0, 7.1
## $ Haiti <dbl> 1.0, 326.0, 1.0, 5.9
## $ Honduras <int> 69, 98, 2, 3
## $ Hungary <dbl> 234.0, 215.0, 185.0, 11.3
## $ Iceland <dbl> 233.0, 61.0, 78.0, 6.6
## $ India <dbl> 9.0, 114.0, 0.0, 2.2
## $ Indonesia <dbl> 5.0, 1.0, 0.0, 0.1
## $ Iran <int> 0, 0, 0, 0
## $ Iraq <dbl> 9.0, 3.0, 0.0, 0.2
## $ Ireland <dbl> 313.0, 118.0, 165.0, 11.4
## $ Israel <dbl> 63.0, 69.0, 9.0, 2.5
## $ Italy <dbl> 85.0, 42.0, 237.0, 6.5
## $ Jamaica <dbl> 82.0, 88.0, 9.0, 3.4
## $ Japan <int> 77, 202, 16, 7
## $ Jordan <dbl> 6.0, 21.0, 1.0, 0.5
## $ Kazakhstan <dbl> 124.0, 246.0, 12.0, 6.8
## $ Kenya <dbl> 58.0, 22.0, 2.0, 1.8
## $ Kiribati <int> 21, 34, 1, 1
## $ Kuwait <int> 0, 0, 0, 0
## $ Kyrgyzstan <dbl> 31.0, 88.0, 6.0, 2.4
## $ Laos <dbl> 62.0, 0.0, 123.0, 6.2
## $ Latvia <dbl> 281.0, 216.0, 62.0, 10.5
## $ Lebanon <dbl> 20.0, 55.0, 31.0, 1.9
## $ Lesotho <dbl> 82.0, 50.0, 0.0, 2.8
## $ Liberia <dbl> 19.0, 152.0, 2.0, 3.1
## $ Libya <int> 0, 0, 0, 0
## $ Lithuania <dbl> 343.0, 244.0, 56.0, 12.9
## $ Luxembourg <dbl> 236.0, 133.0, 271.0, 11.4
## $ Madagascar <dbl> 26.0, 15.0, 4.0, 0.8
## $ Malawi <dbl> 8.0, 11.0, 1.0, 1.5
## $ Malaysia <dbl> 13.0, 4.0, 0.0, 0.3
## $ Maldives <int> 0, 0, 0, 0
## $ Mali <dbl> 5.0, 1.0, 1.0, 0.6
## $ Malta <dbl> 149.0, 100.0, 120.0, 6.6
## $ Marshall.Islands <int> 0, 0, 0, 0
## $ Mauritania <int> 0, 0, 0, 0
## $ Mauritius <dbl> 98.0, 31.0, 18.0, 2.6
## $ Mexico <dbl> 238.0, 68.0, 5.0, 5.5
## $ Micronesia <dbl> 62.0, 50.0, 18.0, 2.3
## $ Monaco <int> 0, 0, 0, 0
## $ Mongolia <dbl> 77.0, 189.0, 8.0, 4.9
## $ Montenegro <dbl> 31.0, 114.0, 128.0, 4.9
## $ Morocco <dbl> 12.0, 6.0, 10.0, 0.5
## $ Mozambique <dbl> 47.0, 18.0, 5.0, 1.3
## $ Myanmar <dbl> 5.0, 1.0, 0.0, 0.1
## $ Namibia <dbl> 376.0, 3.0, 1.0, 6.8
## $ Nauru <int> 49, 0, 8, 1
## $ Nepal <dbl> 5.0, 6.0, 0.0, 0.2
## $ Netherlands <dbl> 251.0, 88.0, 190.0, 9.4
## $ New.Zealand <dbl> 203.0, 79.0, 175.0, 9.3
## $ Nicaragua <dbl> 78.0, 118.0, 1.0, 3.5
## $ Niger <dbl> 3.0, 2.0, 1.0, 0.1
## $ Nigeria <dbl> 42.0, 5.0, 2.0, 9.1
## $ Niue <int> 188, 200, 7, 7
## $ Norway <dbl> 169.0, 71.0, 129.0, 6.7
## $ Oman <dbl> 22.0, 16.0, 1.0, 0.7
## $ Pakistan <int> 0, 0, 0, 0
## $ Palau <dbl> 306.0, 63.0, 23.0, 6.9
## $ Panama <dbl> 285.0, 104.0, 18.0, 7.2
## $ Papua.New.Guinea <dbl> 44.0, 39.0, 1.0, 1.5
## $ Paraguay <dbl> 213.0, 117.0, 74.0, 7.3
## $ Peru <dbl> 163.0, 160.0, 21.0, 6.1
## $ Philippines <dbl> 71.0, 186.0, 1.0, 4.6
## $ Poland <dbl> 343.0, 215.0, 56.0, 10.9
## $ Portugal <int> 194, 67, 339, 11
## $ Qatar <dbl> 1.0, 42.0, 7.0, 0.9
## $ South.Korea <dbl> 140.0, 16.0, 9.0, 9.8
## $ Moldova <dbl> 109.0, 226.0, 18.0, 6.3
## $ Romania <dbl> 297.0, 122.0, 167.0, 10.4
## $ Russian.Federation <dbl> 247.0, 326.0, 73.0, 11.5
## $ Rwanda <dbl> 43.0, 2.0, 0.0, 6.8
## $ St..Kitts...Nevis <dbl> 194.0, 205.0, 32.0, 7.7
## $ St..Lucia <dbl> 171.0, 315.0, 71.0, 10.1
## $ St..Vincent...the.Grenadines <dbl> 120.0, 221.0, 11.0, 6.3
## $ Samoa <dbl> 105.0, 18.0, 24.0, 2.6
## $ San.Marino <int> 0, 0, 0, 0
## $ Sao.Tome...Principe <dbl> 56.0, 38.0, 140.0, 4.2
## $ Saudi.Arabia <dbl> 0.0, 5.0, 0.0, 0.1
## $ Senegal <dbl> 9.0, 1.0, 7.0, 0.3
## $ Serbia <dbl> 283.0, 131.0, 127.0, 9.6
## $ Seychelles <dbl> 157.0, 25.0, 51.0, 4.1
## $ Sierra.Leone <dbl> 25.0, 3.0, 2.0, 6.7
## $ Singapore <dbl> 60.0, 12.0, 11.0, 1.5
## $ Slovakia <dbl> 196.0, 293.0, 116.0, 11.4
## $ Slovenia <dbl> 270.0, 51.0, 276.0, 10.6
## $ Solomon.Islands <dbl> 56.0, 11.0, 1.0, 1.2
## $ Somalia <int> 0, 0, 0, 0
## $ South.Africa <dbl> 225.0, 76.0, 81.0, 8.2
## $ Spain <int> 284, 157, 112, 10
## $ Sri.Lanka <dbl> 16.0, 104.0, 0.0, 2.2
## $ Sudan <dbl> 8.0, 13.0, 0.0, 1.7
## $ Suriname <dbl> 128.0, 178.0, 7.0, 5.6
## $ Swaziland <dbl> 90.0, 2.0, 2.0, 4.7
## $ Sweden <dbl> 152.0, 60.0, 186.0, 7.2
## $ Switzerland <dbl> 185.0, 100.0, 280.0, 10.2
## $ Syria <int> 5, 35, 16, 1
## $ Tajikistan <dbl> 2.0, 15.0, 0.0, 0.3
## $ Thailand <dbl> 99.0, 258.0, 1.0, 6.4
## $ Macedonia <dbl> 106.0, 27.0, 86.0, 3.9
## $ Timor.Leste <dbl> 1.0, 1.0, 4.0, 0.1
## $ Togo <dbl> 36.0, 2.0, 19.0, 1.3
## $ Tonga <dbl> 36.0, 21.0, 5.0, 1.1
## $ Trinidad...Tobago <dbl> 197.0, 156.0, 7.0, 6.4
## $ Tunisia <dbl> 51.0, 3.0, 20.0, 1.3
## $ Turkey <dbl> 51.0, 22.0, 7.0, 1.4
## $ Turkmenistan <dbl> 19.0, 71.0, 32.0, 2.2
## $ Tuvalu <int> 6, 41, 9, 1
## $ Uganda <dbl> 45.0, 9.0, 0.0, 8.3
## $ Ukraine <dbl> 206.0, 237.0, 45.0, 8.9
## $ United.Arab.Emirates <dbl> 16.0, 135.0, 5.0, 2.8
## $ United.Kingdom <dbl> 219.0, 126.0, 195.0, 10.4
## $ Tanzania <dbl> 36.0, 6.0, 1.0, 5.7
## $ USA <dbl> 249.0, 158.0, 84.0, 8.7
## $ Uruguay <dbl> 115.0, 35.0, 220.0, 6.6
## $ Uzbekistan <dbl> 25.0, 101.0, 8.0, 2.4
## $ Vanuatu <dbl> 21.0, 18.0, 11.0, 0.9
## $ Venezuela <dbl> 333.0, 100.0, 3.0, 7.7
## $ Vietnam <int> 111, 2, 1, 2
## $ Yemen <dbl> 6.0, 0.0, 0.0, 0.1
## $ Zambia <dbl> 32.0, 19.0, 4.0, 2.5
## $ Zimbabwe <dbl> 64.0, 18.0, 4.0, 4.7
View(df)
library(reshape)
##
## 다음의 패키지를 부착합니다: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
df1<-melt(df,id="year")
df1 %>% glimpse
## Rows: 772
## Columns: 3
## $ year <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
colSums(is.na(df1))
## year variable value
## 0 0 0
View(df1)
names(df1)[2:3]<-c("country","disease")
names(df1)
## [1] "year" "country" "disease"
df1 %>% filter(year==2000) %>% summarise(m=mean(disease))
## m
## 1 81.01036
df1 %>% filter(year==2000) %>% filter(disease>81.01036) %>% NROW->result
print(result)
## [1] 76