rm(list=ls())
ls()
## character(0)
setwd("c:/data")
getwd()
## [1] "c:/data"
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(psych)
##
## 다음의 패키지를 부착합니다: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(hflights)
library(lubridate)
##
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## 다음의 패키지를 부착합니다: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(reshape2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ purrr 1.0.1 ✔ tibble 3.2.1
## ✔ readr 2.1.4 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ psych::alpha() masks ggplot2::alpha()
## ✖ plyr::arrange() masks dplyr::arrange()
## ✖ purrr::compact() masks plyr::compact()
## ✖ plyr::count() masks dplyr::count()
## ✖ plyr::desc() masks dplyr::desc()
## ✖ plyr::failwith() masks dplyr::failwith()
## ✖ dplyr::filter() masks stats::filter()
## ✖ plyr::id() masks dplyr::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ plyr::mutate() masks dplyr::mutate()
## ✖ plyr::rename() masks dplyr::rename()
## ✖ plyr::summarise() masks dplyr::summarise()
## ✖ plyr::summarize() masks dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#sd() 원소의합 var()분산 length(원소의수) range(범위)
#cor(상관계수) sample(임의추출)
#sample : 무작위 추출/set.seed(1) tab sample() : 추출수 고정(변동 x)
sample(1:45,6)
## [1] 29 27 4 23 5 40
sample(1:45,6)
## [1] 19 45 8 9 43 12
set.seed(1)
sample(1:45,6)
## [1] 4 39 1 34 23 14
set.seed(1)
sample(1:45,6)
## [1] 4 39 1 34 23 14
#~138p 추가
#행렬
m<-matrix(1:6,nrow=3)
m[m[,1]>1&m[,2]>5]
## [1] 3 6
#rep(반복할 수, 반복 횟수)
rep(1,3)
## [1] 1 1 1
rep(2:5,3)
## [1] 2 3 4 5 2 3 4 5 2 3 4 5
#seq(시작할 수, 마지막 수):첫 인수부터 두번째 인수까지 1씩 증가
seq(1,3)
## [1] 1 2 3
seq(1,11,length=7)#전체 수열의 개수가 m개가 되도록 자동적으로 증가하는 수열 생성
## [1] 1.000000 2.666667 4.333333 6.000000 7.666667 9.333333 11.000000
#기초적 대푯값 및 분산 계산 : mean, var, sd()
c<-1:10
print(round(sd(c),2))
## [1] 3.03
#기초통계정리요약
a<-1:10
describe(a)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10 5.5 3.03 5.5 5.5 3.71 1 10 9 0 -1.56 0.96
# R 데이터 핸들링
b<-c("a", "b","c")
b
## [1] "a" "b" "c"
b[-3]
## [1] "a" "b"
b
## [1] "a" "b" "c"
b[c(1,2)]
## [1] "a" "b"
#반복문 조건문
#for 반복구문 : for문 전체 블록 후 실행
a<-c()#아무런 값 포함x a벡터
for(i in 1:9)
{a[i]=i*i
}
a
## [1] 1 4 9 16 25 36 49 64 81
#while 구문_오류남
x=1
while(x<5){
x=x+1
print(x)
}
## [1] 2
## [1] 3
## [1] 4
## [1] 5
#if~else 구문
gender<-c("m","f","m","f","m")
gender<-ifelse(gender=="f",0,1)
gender
## [1] 1 0 1 0 1
#paste : 입력받은 문자열을 하나로 붙이기
number<-1:5
alphabet<-c("a","b","c")
paste(number,alphabet)
## [1] "1 a" "2 b" "3 c" "4 a" "5 b"
#자료형 데이터구조 변화
as.data.frame(x)
## x
## 1 5
as.factor(x)
## [1] 5
## Levels: 5
rm(list=ls())
ls()
## character(0)
setwd("c:/data")
getwd()
## [1] "c:/data"
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
#sd() 원소의합 var()분산 length(원소의수) range(범위)
#cor(상관계수) sample(임의추출)
#sample : 무작위 추출/set.seed(1) tab sample() : 추출수 고정(변동 x)
sample(1:45,6)
## [1] 18 33 21 43 10 7
sample(1:45,6)
## [1] 9 15 21 37 41 25
set.seed(1)
sample(1:45,6)
## [1] 4 39 1 34 23 14
set.seed(1)
sample(1:45,6)
## [1] 4 39 1 34 23 14
#~138p 추가
#행렬
m<-matrix(1:6,nrow=3)
m[m[,1]>1&m[,2]>5]
## [1] 3 6
#rep(반복할 수, 반복 횟수)
rep(1,3)
## [1] 1 1 1
rep(2:5,3)
## [1] 2 3 4 5 2 3 4 5 2 3 4 5
#seq(시작할 수, 마지막 수):첫 인수부터 두번째 인수까지 1씩 증가
seq(1,3)
## [1] 1 2 3
seq(1,11,length=7)#전체 수열의 개수가 m개가 되도록 자동적으로 증가하는 수열 생성
## [1] 1.000000 2.666667 4.333333 6.000000 7.666667 9.333333 11.000000
#기초적 대푯값 및 분산 계산 : mean, var, sd()
c<-1:10
print(round(sd(c),2))
## [1] 3.03
#기초통계정리요약
a<-1:10
describe(a)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10 5.5 3.03 5.5 5.5 3.71 1 10 9 0 -1.56 0.96
# R 데이터 핸들링
b<-c("a", "b","c")
b
## [1] "a" "b" "c"
b[-3]
## [1] "a" "b"
b
## [1] "a" "b" "c"
b[c(1,2)]
## [1] "a" "b"
#반복문 조건문
#for 반복구문 : for문 전체 블록 후 실행
a<-c()#아무런 값 포함x a벡터
for(i in 1:9)
{a[i]=i*i
}
a
## [1] 1 4 9 16 25 36 49 64 81
#while 구문_오류남
x=1
while(x<5){
x=x+1
print(x)
}
## [1] 2
## [1] 3
## [1] 4
## [1] 5
#if~else 구문
gender<-c("m","f","m","f","m")
gender<-ifelse(gender=="f",0,1)
gender
## [1] 1 0 1 0 1
#paste : 입력받은 문자열을 하나로 붙이기
number<-1:5
alphabet<-c("a","b","c")
paste(number,alphabet)
## [1] "1 a" "2 b" "3 c" "4 a" "5 b"
#자료형 데이터구조 변화
as.data.frame(x)
## x
## 1 5
as.factor(x)
## [1] 5
## Levels: 5
as.numeric(FALSE)
## [1] 0
as.logical(0.45)
## [1] TRUE
#문자열을 날짜로 변환
as.Date("2018-01-13")
## [1] "2018-01-13"
as.Date("01/13/2018",format="%m%d%Y")
## [1] NA
#R그래픽기능
#산점도
data("airquality")
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
summary(airquality$ozone)
## Length Class Mode
## 0 NULL NULL
#산점도행렬
data(iris)
pairs(iris[1:4],main="Anderson's Iris Data -- 3 species",
pch=21, bg=c("red","green3","blue")[unclass(iris$species)])

#히스토그램
#왜도 분자 = 평균값-중앙값=양수
library(dplyr)
library(psych)
height<-c(182,160,165,170,163,160,181,166,159,145,175)
hist(height)

summary(height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 145.0 160.0 165.0 166.0 172.5 182.0
describe(airquality$Ozone,na.rm=TRUE)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 116 42.13 32.99 31.5 37.8 25.95 1 168 167 1.21 1.11 3.06
#상자그림
library(caret)
## 필요한 패키지를 로딩중입니다: lattice
##
## 다음의 패키지를 부착합니다: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
featurePlot(x=iris[,1:4],
y=iris$Species,
plot="density",
scales=list(x=list(relation="free"),
y=list(relation="free")),
adjust=1.5,
pch="1",
layout=c(4,1),
auto.key=list(columns=3))

#Petal이 정확성 높아 가려낼 수 있음
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
par(mfrow=c(2,2))
boxplot(iris$Petal.Length~iris$Species,data=iris)
boxplot(iris$Sepal.Length~iris$Species,data=iris)
boxplot(iris$Sepal.Width~iris$Species,data=iris)
boxplot(iris$Petal.Width~iris$Species,data=iris)

data("Titanic")
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
glimpse(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic,#데이터 입력
main="Survival on the Titanic",#제목 설정
color=c("blue","green"
),#색 지정
off=1)#블럭 사이 간격 지정
#NA, 오류
#describe(airquality$ozone,na.rm=TRUE)
#hist(airquality$ozone,na.rm=TRUE)
#데이터마트
library(reshape2)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환
head(airquality)#airquality 데이터 앞 6행 보기
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
head(airquality,3)#선택한 데이터의 개수 정하기
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
library(dplyr)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환
T<-melt(airquality,id=c("month","day"),na.rm=TRUE)
head(T)
## month day variable value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 6 5 6 ozone 28
## 7 5 7 ozone 23
head(airquality)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
T %>% group_by(month) %>%
filter(variable=="ozone") %>%
summarize(m=mean(value))
## m
## 1 42.12931
library(lubridate)
library(plyr)
getwd()
## [1] "c:/data"
setwd("c:/data")
df<-read.csv("disease.csv")
df
## year Afghanistan Albania Algeria Andorra Angola Antigua...Barbuda Argentina
## 1 1999 0 89.0 25.0 245.0 217.0 102.0 193.0
## 2 2000 0 132.0 0.0 138.0 57.0 128.0 25.0
## 3 2001 0 54.0 14.0 312.0 45.0 45.0 221.0
## 4 2002 0 4.9 0.7 12.4 5.9 4.9 8.3
## Armenia Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados
## 1 21.0 261.0 279.0 21.0 122.0 42 0 143.0
## 2 179.0 72.0 75.0 46.0 176.0 63 0 173.0
## 3 11.0 212.0 191.0 5.0 51.0 7 0 36.0
## 4 3.8 10.4 9.7 1.3 6.3 2 0 6.3
## Belarus Belgium Belize Benin Bhutan Bolivia Bosnia.Herzegovina Botswana
## 1 142.0 295.0 263.0 34.0 23.0 167.0 76.0 173.0
## 2 373.0 84.0 114.0 4.0 0.0 41.0 173.0 35.0
## 3 42.0 212.0 8.0 13.0 0.0 8.0 8.0 35.0
## 4 14.4 10.5 6.8 1.1 0.4 3.8 4.6 5.4
## Brazil Brunei Bulgaria Burkina.Faso Burundi Cote.d.Ivoire Cabo.Verde Cambodia
## 1 245.0 31.0 231.0 25.0 88.0 37 144 57.0
## 2 145.0 2.0 252.0 7.0 0.0 1 56 65.0
## 3 16.0 1.0 94.0 7.0 0.0 7 16 1.0
## 4 7.2 0.6 10.3 4.3 6.3 4 4 2.2
## Cameroon Canada Central.African.Republic Chad Chile China Colombia Comoros
## 1 147.0 240.0 17.0 15.0 130.0 79 159.0 1.0
## 2 1.0 122.0 2.0 1.0 124.0 192 76.0 3.0
## 3 4.0 100.0 1.0 1.0 172.0 8 3.0 1.0
## 4 5.8 8.2 1.8 0.4 7.6 5 4.2 0.1
## Congo Cook.Islands Costa.Rica Croatia Cuba Cyprus Czech.Republic North.Korea
## 1 76.0 0.0 149.0 230.0 93.0 192.0 361.0 0
## 2 1.0 254.0 87.0 87.0 137.0 154.0 170.0 0
## 3 9.0 74.0 11.0 254.0 5.0 113.0 134.0 0
## 4 1.7 5.9 4.4 10.2 4.2 8.2 11.8 0
## DR.Congo Denmark Djibouti Dominica Dominican.Republic Ecuador Egypt
## 1 32.0 224.0 15.0 52.0 193.0 162.0 6.0
## 2 3.0 81.0 44.0 286.0 147.0 74.0 4.0
## 3 1.0 278.0 3.0 26.0 9.0 3.0 1.0
## 4 2.3 10.4 1.1 6.6 6.2 4.2 0.2
## El.Salvador Equatorial.Guinea Eritrea Estonia Ethiopia Fiji Finland France
## 1 52.0 92.0 18.0 224.0 20.0 77 263 127.0
## 2 69.0 0.0 0.0 194.0 3.0 35 133 151.0
## 3 2.0 233.0 0.0 59.0 0.0 1 97 370.0
## 4 2.2 5.8 0.5 9.5 0.7 2 10 11.8
## Gabon Gambia Georgia Germany Ghana Greece Grenada Guatemala Guinea
## 1 347.0 8.0 52.0 346.0 31.0 133.0 199.0 53.0 9.0
## 2 98.0 0.0 100.0 117.0 3.0 112.0 438.0 69.0 0.0
## 3 59.0 1.0 149.0 175.0 10.0 218.0 28.0 2.0 2.0
## 4 8.9 2.4 5.4 11.3 1.8 8.3 11.9 2.2 0.2
## Guinea.Bissau Guyana Haiti Honduras Hungary Iceland India Indonesia Iran Iraq
## 1 28.0 93.0 1.0 69 234.0 233.0 9.0 5.0 0 9.0
## 2 31.0 302.0 326.0 98 215.0 61.0 114.0 1.0 0 3.0
## 3 21.0 1.0 1.0 2 185.0 78.0 0.0 0.0 0 0.0
## 4 2.5 7.1 5.9 3 11.3 6.6 2.2 0.1 0 0.2
## Ireland Israel Italy Jamaica Japan Jordan Kazakhstan Kenya Kiribati Kuwait
## 1 313.0 63.0 85.0 82.0 77 6.0 124.0 58.0 21 0
## 2 118.0 69.0 42.0 88.0 202 21.0 246.0 22.0 34 0
## 3 165.0 9.0 237.0 9.0 16 1.0 12.0 2.0 1 0
## 4 11.4 2.5 6.5 3.4 7 0.5 6.8 1.8 1 0
## Kyrgyzstan Laos Latvia Lebanon Lesotho Liberia Libya Lithuania Luxembourg
## 1 31.0 62.0 281.0 20.0 82.0 19.0 0 343.0 236.0
## 2 88.0 0.0 216.0 55.0 50.0 152.0 0 244.0 133.0
## 3 6.0 123.0 62.0 31.0 0.0 2.0 0 56.0 271.0
## 4 2.4 6.2 10.5 1.9 2.8 3.1 0 12.9 11.4
## Madagascar Malawi Malaysia Maldives Mali Malta Marshall.Islands Mauritania
## 1 26.0 8.0 13.0 0 5.0 149.0 0 0
## 2 15.0 11.0 4.0 0 1.0 100.0 0 0
## 3 4.0 1.0 0.0 0 1.0 120.0 0 0
## 4 0.8 1.5 0.3 0 0.6 6.6 0 0
## Mauritius Mexico Micronesia Monaco Mongolia Montenegro Morocco Mozambique
## 1 98.0 238.0 62.0 0 77.0 31.0 12.0 47.0
## 2 31.0 68.0 50.0 0 189.0 114.0 6.0 18.0
## 3 18.0 5.0 18.0 0 8.0 128.0 10.0 5.0
## 4 2.6 5.5 2.3 0 4.9 4.9 0.5 1.3
## Myanmar Namibia Nauru Nepal Netherlands New.Zealand Nicaragua Niger Nigeria
## 1 5.0 376.0 49 5.0 251.0 203.0 78.0 3.0 42.0
## 2 1.0 3.0 0 6.0 88.0 79.0 118.0 2.0 5.0
## 3 0.0 1.0 8 0.0 190.0 175.0 1.0 1.0 2.0
## 4 0.1 6.8 1 0.2 9.4 9.3 3.5 0.1 9.1
## Niue Norway Oman Pakistan Palau Panama Papua.New.Guinea Paraguay Peru
## 1 188 169.0 22.0 0 306.0 285.0 44.0 213.0 163.0
## 2 200 71.0 16.0 0 63.0 104.0 39.0 117.0 160.0
## 3 7 129.0 1.0 0 23.0 18.0 1.0 74.0 21.0
## 4 7 6.7 0.7 0 6.9 7.2 1.5 7.3 6.1
## Philippines Poland Portugal Qatar South.Korea Moldova Romania
## 1 71.0 343.0 194 1.0 140.0 109.0 297.0
## 2 186.0 215.0 67 42.0 16.0 226.0 122.0
## 3 1.0 56.0 339 7.0 9.0 18.0 167.0
## 4 4.6 10.9 11 0.9 9.8 6.3 10.4
## Russian.Federation Rwanda St..Kitts...Nevis St..Lucia
## 1 247.0 43.0 194.0 171.0
## 2 326.0 2.0 205.0 315.0
## 3 73.0 0.0 32.0 71.0
## 4 11.5 6.8 7.7 10.1
## St..Vincent...the.Grenadines Samoa San.Marino Sao.Tome...Principe
## 1 120.0 105.0 0 56.0
## 2 221.0 18.0 0 38.0
## 3 11.0 24.0 0 140.0
## 4 6.3 2.6 0 4.2
## Saudi.Arabia Senegal Serbia Seychelles Sierra.Leone Singapore Slovakia
## 1 0.0 9.0 283.0 157.0 25.0 60.0 196.0
## 2 5.0 1.0 131.0 25.0 3.0 12.0 293.0
## 3 0.0 7.0 127.0 51.0 2.0 11.0 116.0
## 4 0.1 0.3 9.6 4.1 6.7 1.5 11.4
## Slovenia Solomon.Islands Somalia South.Africa Spain Sri.Lanka Sudan Suriname
## 1 270.0 56.0 0 225.0 284 16.0 8.0 128.0
## 2 51.0 11.0 0 76.0 157 104.0 13.0 178.0
## 3 276.0 1.0 0 81.0 112 0.0 0.0 7.0
## 4 10.6 1.2 0 8.2 10 2.2 1.7 5.6
## Swaziland Sweden Switzerland Syria Tajikistan Thailand Macedonia Timor.Leste
## 1 90.0 152.0 185.0 5 2.0 99.0 106.0 1.0
## 2 2.0 60.0 100.0 35 15.0 258.0 27.0 1.0
## 3 2.0 186.0 280.0 16 0.0 1.0 86.0 4.0
## 4 4.7 7.2 10.2 1 0.3 6.4 3.9 0.1
## Togo Tonga Trinidad...Tobago Tunisia Turkey Turkmenistan Tuvalu Uganda
## 1 36.0 36.0 197.0 51.0 51.0 19.0 6 45.0
## 2 2.0 21.0 156.0 3.0 22.0 71.0 41 9.0
## 3 19.0 5.0 7.0 20.0 7.0 32.0 9 0.0
## 4 1.3 1.1 6.4 1.3 1.4 2.2 1 8.3
## Ukraine United.Arab.Emirates United.Kingdom Tanzania USA Uruguay Uzbekistan
## 1 206.0 16.0 219.0 36.0 249.0 115.0 25.0
## 2 237.0 135.0 126.0 6.0 158.0 35.0 101.0
## 3 45.0 5.0 195.0 1.0 84.0 220.0 8.0
## 4 8.9 2.8 10.4 5.7 8.7 6.6 2.4
## Vanuatu Venezuela Vietnam Yemen Zambia Zimbabwe
## 1 21.0 333.0 111 6.0 32.0 64.0
## 2 18.0 100.0 2 0.0 19.0 18.0
## 3 11.0 3.0 1 0.0 4.0 4.0
## 4 0.9 7.7 2 0.1 2.5 4.7
library(dplyr)
library(reshape2)
df1<-melt(df,id="year")
df1 %>% glimpse
## Rows: 772
## Columns: 3
## $ year <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
names(df1)[2:3]<-c("country","disease")
names(df1)
## [1] "year" "country" "disease"
df1 %>% filter(year==2000) %>% summarize(m=mean(disease))
## m
## 1 81.01036
df1 %>% filter(year==2000) %>% filter(disease>81.01036) %>% NROW->result
print(result)
## [1] 76
acast(T,day~month~variable)#행을 day, 열을 month로 각 변수들을 새롭게 배열
## , , ozone
##
## 5 6 7 8 9
## 1 41 NA 135 39 96
## 2 36 NA 49 9 78
## 3 12 NA 32 16 73
## 4 18 NA NA 78 91
## 5 NA NA 64 35 47
## 6 28 NA 40 66 32
## 7 23 29 77 122 20
## 8 19 NA 97 89 23
## 9 8 71 97 110 21
## 10 NA 39 85 NA 24
## 11 7 NA NA NA 44
## 12 16 NA 10 44 21
## 13 11 23 27 28 28
## 14 14 NA NA 65 9
## 15 18 NA 7 NA 13
## 16 14 21 48 22 46
## 17 34 37 35 59 18
## 18 6 20 61 23 13
## 19 30 12 79 31 24
## 20 11 13 63 44 16
## 21 1 NA 16 21 13
## 22 11 NA NA 9 23
## 23 4 NA NA NA 36
## 24 32 NA 80 45 7
## 25 NA NA 108 168 14
## 26 NA NA 20 73 30
## 27 NA NA 52 NA NA
## 28 23 NA 82 76 14
## 29 45 NA 50 118 18
## 30 115 NA 64 84 20
## 31 37 NA 59 85 NA
##
## , , solar.r
##
## 5 6 7 8 9
## 1 190 286 269 83 167
## 2 118 287 248 24 197
## 3 149 242 236 77 183
## 4 313 186 101 NA 189
## 5 NA 220 175 NA 95
## 6 NA 264 314 NA 92
## 7 299 127 276 255 252
## 8 99 273 267 229 220
## 9 19 291 272 207 230
## 10 194 323 175 222 259
## 11 NA 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157 24
## 15 65 322 48 64 112
## 16 334 191 260 71 237
## 17 307 284 274 51 224
## 18 78 37 285 115 27
## 19 322 120 187 244 238
## 20 44 137 220 190 201
## 21 8 150 7 259 238
## 22 320 59 258 36 14
## 23 25 91 295 255 139
## 24 92 250 294 212 49
## 25 66 135 223 238 20
## 26 266 127 81 215 193
## 27 NA 47 82 153 145
## 28 13 98 213 203 191
## 29 252 31 275 225 131
## 30 223 138 253 237 223
## 31 279 NA 254 188 NA
##
## , , wind
##
## 5 6 7 8 9
## 1 7.4 8.6 4.1 6.9 6.9
## 2 8.0 9.7 9.2 13.8 5.1
## 3 12.6 16.1 9.2 7.4 2.8
## 4 11.5 9.2 10.9 6.9 4.6
## 5 14.3 8.6 4.6 7.4 7.4
## 6 14.9 14.3 10.9 4.6 15.5
## 7 8.6 9.7 5.1 4.0 10.9
## 8 13.8 6.9 6.3 10.3 10.3
## 9 20.1 13.8 5.7 8.0 10.9
## 10 8.6 11.5 7.4 8.6 9.7
## 11 6.9 10.9 8.6 11.5 14.9
## 12 9.7 9.2 14.3 11.5 15.5
## 13 9.2 8.0 14.9 11.5 6.3
## 14 10.9 13.8 14.9 9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9 6.9 10.3 6.9
## 17 12.0 20.7 10.3 6.3 13.8
## 18 18.4 9.2 6.3 7.4 10.3
## 19 11.5 11.5 5.1 10.9 10.3
## 20 9.7 10.3 11.5 10.3 8.0
## 21 9.7 6.3 6.9 15.5 12.6
## 22 16.6 1.7 9.7 14.3 9.2
## 23 9.7 4.6 11.5 12.6 10.3
## 24 12.0 6.3 8.6 9.7 10.3
## 25 16.6 8.0 8.0 3.4 16.6
## 26 14.9 8.0 8.6 8.0 6.9
## 27 8.0 10.3 12.0 5.7 13.2
## 28 12.0 11.5 7.4 9.7 14.3
## 29 14.9 14.9 7.4 2.3 8.0
## 30 5.7 8.0 7.4 6.3 11.5
## 31 7.4 NA 9.2 6.3 NA
##
## , , temp
##
## 5 6 7 8 9
## 1 67 78 84 81 91
## 2 72 74 85 81 92
## 3 74 67 81 82 93
## 4 62 84 84 86 93
## 5 56 85 83 85 87
## 6 66 79 83 87 84
## 7 65 82 88 89 80
## 8 59 87 92 90 78
## 9 61 90 92 90 75
## 10 69 87 89 92 73
## 11 74 93 82 86 81
## 12 69 92 73 86 76
## 13 66 82 81 82 77
## 14 68 80 91 80 71
## 15 58 79 80 79 71
## 16 64 77 81 77 78
## 17 66 72 82 79 67
## 18 57 65 84 76 76
## 19 68 73 87 78 68
## 20 62 76 85 78 82
## 21 59 77 74 77 64
## 22 73 76 81 72 71
## 23 61 76 82 75 81
## 24 61 76 86 79 69
## 25 57 75 85 81 63
## 26 58 78 82 86 70
## 27 57 73 86 88 77
## 28 67 80 88 97 75
## 29 81 77 86 94 76
## 30 79 83 83 96 68
## 31 76 NA 81 94 NA
b<-acast(T,day~month~variable,mean)#각 변수들의 month 평균
b
## , , ozone
##
## 5 6 7 8 9
## 1 41 NaN 135 39 96
## 2 36 NaN 49 9 78
## 3 12 NaN 32 16 73
## 4 18 NaN NaN 78 91
## 5 NaN NaN 64 35 47
## 6 28 NaN 40 66 32
## 7 23 29 77 122 20
## 8 19 NaN 97 89 23
## 9 8 71 97 110 21
## 10 NaN 39 85 NaN 24
## 11 7 NaN NaN NaN 44
## 12 16 NaN 10 44 21
## 13 11 23 27 28 28
## 14 14 NaN NaN 65 9
## 15 18 NaN 7 NaN 13
## 16 14 21 48 22 46
## 17 34 37 35 59 18
## 18 6 20 61 23 13
## 19 30 12 79 31 24
## 20 11 13 63 44 16
## 21 1 NaN 16 21 13
## 22 11 NaN NaN 9 23
## 23 4 NaN NaN NaN 36
## 24 32 NaN 80 45 7
## 25 NaN NaN 108 168 14
## 26 NaN NaN 20 73 30
## 27 NaN NaN 52 NaN NaN
## 28 23 NaN 82 76 14
## 29 45 NaN 50 118 18
## 30 115 NaN 64 84 20
## 31 37 NaN 59 85 NaN
##
## , , solar.r
##
## 5 6 7 8 9
## 1 190 286 269 83 167
## 2 118 287 248 24 197
## 3 149 242 236 77 183
## 4 313 186 101 NaN 189
## 5 NaN 220 175 NaN 95
## 6 NaN 264 314 NaN 92
## 7 299 127 276 255 252
## 8 99 273 267 229 220
## 9 19 291 272 207 230
## 10 194 323 175 222 259
## 11 NaN 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157 24
## 15 65 322 48 64 112
## 16 334 191 260 71 237
## 17 307 284 274 51 224
## 18 78 37 285 115 27
## 19 322 120 187 244 238
## 20 44 137 220 190 201
## 21 8 150 7 259 238
## 22 320 59 258 36 14
## 23 25 91 295 255 139
## 24 92 250 294 212 49
## 25 66 135 223 238 20
## 26 266 127 81 215 193
## 27 NaN 47 82 153 145
## 28 13 98 213 203 191
## 29 252 31 275 225 131
## 30 223 138 253 237 223
## 31 279 NaN 254 188 NaN
##
## , , wind
##
## 5 6 7 8 9
## 1 7.4 8.6 4.1 6.9 6.9
## 2 8.0 9.7 9.2 13.8 5.1
## 3 12.6 16.1 9.2 7.4 2.8
## 4 11.5 9.2 10.9 6.9 4.6
## 5 14.3 8.6 4.6 7.4 7.4
## 6 14.9 14.3 10.9 4.6 15.5
## 7 8.6 9.7 5.1 4.0 10.9
## 8 13.8 6.9 6.3 10.3 10.3
## 9 20.1 13.8 5.7 8.0 10.9
## 10 8.6 11.5 7.4 8.6 9.7
## 11 6.9 10.9 8.6 11.5 14.9
## 12 9.7 9.2 14.3 11.5 15.5
## 13 9.2 8.0 14.9 11.5 6.3
## 14 10.9 13.8 14.9 9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9 6.9 10.3 6.9
## 17 12.0 20.7 10.3 6.3 13.8
## 18 18.4 9.2 6.3 7.4 10.3
## 19 11.5 11.5 5.1 10.9 10.3
## 20 9.7 10.3 11.5 10.3 8.0
## 21 9.7 6.3 6.9 15.5 12.6
## 22 16.6 1.7 9.7 14.3 9.2
## 23 9.7 4.6 11.5 12.6 10.3
## 24 12.0 6.3 8.6 9.7 10.3
## 25 16.6 8.0 8.0 3.4 16.6
## 26 14.9 8.0 8.6 8.0 6.9
## 27 8.0 10.3 12.0 5.7 13.2
## 28 12.0 11.5 7.4 9.7 14.3
## 29 14.9 14.9 7.4 2.3 8.0
## 30 5.7 8.0 7.4 6.3 11.5
## 31 7.4 NaN 9.2 6.3 NaN
##
## , , temp
##
## 5 6 7 8 9
## 1 67 78 84 81 91
## 2 72 74 85 81 92
## 3 74 67 81 82 93
## 4 62 84 84 86 93
## 5 56 85 83 85 87
## 6 66 79 83 87 84
## 7 65 82 88 89 80
## 8 59 87 92 90 78
## 9 61 90 92 90 75
## 10 69 87 89 92 73
## 11 74 93 82 86 81
## 12 69 92 73 86 76
## 13 66 82 81 82 77
## 14 68 80 91 80 71
## 15 58 79 80 79 71
## 16 64 77 81 77 78
## 17 66 72 82 79 67
## 18 57 65 84 76 76
## 19 68 73 87 78 68
## 20 62 76 85 78 82
## 21 59 77 74 77 64
## 22 73 76 81 72 71
## 23 61 76 82 75 81
## 24 61 76 86 79 69
## 25 57 75 85 81 63
## 26 58 78 82 86 70
## 27 57 73 86 88 77
## 28 67 80 88 97 75
## 29 81 77 86 94 76
## 30 79 83 83 96 68
## 31 76 NaN 81 94 NaN
#plyr: 데이터 분리, 특정함수 적용, 결과 재결합
#1.apply
library(plyr)
a<-matrix(1:6,ncol=2)
a
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
apply(a,1,sum)
## [1] 5 7 9
apply(iris[,-5],2,sum)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
colSums(iris[,-5])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
colMeans(iris[,-5])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
rowSums(iris[,-5])
## [1] 10.2 9.5 9.4 9.4 10.2 11.4 9.7 10.1 8.9 9.6 10.8 10.0 9.3 8.5 11.2
## [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7 9.4 10.6 10.3 9.8 10.4 10.4 10.2 9.7
## [31] 9.7 10.7 10.9 11.3 9.7 9.6 10.5 10.0 8.9 10.2 10.1 8.4 9.1 10.7 11.2
## [46] 9.5 10.7 9.4 10.7 9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
## [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
## [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
## [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
## [106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
## [121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
## [136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
rowMeans(iris[,-5])
## [1] 2.550 2.375 2.350 2.350 2.550 2.850 2.425 2.525 2.225 2.400 2.700 2.500
## [13] 2.325 2.125 2.800 3.000 2.750 2.575 2.875 2.675 2.675 2.675 2.350 2.650
## [25] 2.575 2.450 2.600 2.600 2.550 2.425 2.425 2.675 2.725 2.825 2.425 2.400
## [37] 2.625 2.500 2.225 2.550 2.525 2.100 2.275 2.675 2.800 2.375 2.675 2.350
## [49] 2.675 2.475 4.075 3.900 4.100 3.275 3.850 3.575 3.975 2.900 3.850 3.300
## [61] 2.875 3.650 3.300 3.775 3.350 3.900 3.650 3.400 3.600 3.275 3.925 3.550
## [73] 3.800 3.700 3.725 3.850 3.950 4.100 3.725 3.200 3.200 3.150 3.400 3.850
## [85] 3.600 3.875 4.000 3.575 3.500 3.325 3.425 3.775 3.400 2.900 3.450 3.525
## [97] 3.525 3.675 2.925 3.475 4.525 3.875 4.525 4.150 4.375 4.825 3.400 4.575
## [109] 4.200 4.850 4.200 4.075 4.350 3.800 4.025 4.300 4.200 5.100 4.875 3.675
## [121] 4.525 3.825 4.800 3.925 4.450 4.550 3.900 3.950 4.225 4.400 4.550 5.025
## [133] 4.250 3.925 3.925 4.775 4.425 4.200 3.900 4.375 4.450 4.350 3.875 4.550
## [145] 4.550 4.300 3.925 4.175 4.325 3.950
#데이터테이블
#데이터 기초통계
library(dplyr)
library(plyr)
data(iris)
head(iris)#데이터 앞6행
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)#데이터 구조 파악
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)#기초통계량 확인
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
cov(iris[,1:4])#공분산
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.6856935 -0.0424340 1.2743154 0.5162707
## Sepal.Width -0.0424340 0.1899794 -0.3296564 -0.1216394
## Petal.Length 1.2743154 -0.3296564 3.1162779 1.2956094
## Petal.Width 0.5162707 -0.1216394 1.2956094 0.5810063
cor(iris[,1:4])#상관계수
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
#이론 166p 기출유형 개념잡기 13번
library(ISLR)
data("Wage")
glimpse(Wage)
## Rows: 3,000
## Columns: 11
## $ year <int> 2006, 2004, 2003, 2003, 2005, 2008, 2009, 2008, 2006, 2004,…
## $ age <int> 18, 24, 45, 43, 50, 54, 44, 30, 41, 52, 45, 34, 35, 39, 54,…
## $ maritl <fct> 1. Never Married, 1. Never Married, 2. Married, 2. Married,…
## $ race <fct> 1. White, 1. White, 1. White, 3. Asian, 1. White, 1. White,…
## $ education <fct> 1. < HS Grad, 4. College Grad, 3. Some College, 4. College …
## $ region <fct> 2. Middle Atlantic, 2. Middle Atlantic, 2. Middle Atlantic,…
## $ jobclass <fct> 1. Industrial, 2. Information, 1. Industrial, 2. Informatio…
## $ health <fct> 1. <=Good, 2. >=Very Good, 1. <=Good, 2. >=Very Good, 1. <=…
## $ health_ins <fct> 2. No, 2. No, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Ye…
## $ logwage <dbl> 4.318063, 4.255273, 4.875061, 5.041393, 4.318063, 4.845098,…
## $ wage <dbl> 75.04315, 70.47602, 130.98218, 154.68529, 75.04315, 127.115…
summary(Wage)
## year age maritl race
## Min. :2003 Min. :18.00 1. Never Married: 648 1. White:2480
## 1st Qu.:2004 1st Qu.:33.75 2. Married :2074 2. Black: 293
## Median :2006 Median :42.00 3. Widowed : 19 3. Asian: 190
## Mean :2006 Mean :42.41 4. Divorced : 204 4. Other: 37
## 3rd Qu.:2008 3rd Qu.:51.00 5. Separated : 55
## Max. :2009 Max. :80.00
##
## education region jobclass
## 1. < HS Grad :268 2. Middle Atlantic :3000 1. Industrial :1544
## 2. HS Grad :971 1. New England : 0 2. Information:1456
## 3. Some College :650 3. East North Central: 0
## 4. College Grad :685 4. West North Central: 0
## 5. Advanced Degree:426 5. South Atlantic : 0
## 6. East South Central: 0
## (Other) : 0
## health health_ins logwage wage
## 1. <=Good : 858 1. Yes:2083 Min. :3.000 Min. : 20.09
## 2. >=Very Good:2142 2. No : 917 1st Qu.:4.447 1st Qu.: 85.38
## Median :4.653 Median :104.92
## Mean :4.654 Mean :111.70
## 3rd Qu.:4.857 3rd Qu.:128.68
## Max. :5.763 Max. :318.34
##
#결측값처리
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
data(french_fries)
french_fries[!complete.cases(french_fries),]
## time treatment subject rep potato buttery grassy rancid painty
## 315 5 3 15 1 NA NA NA NA NA
## 455 7 2 79 1 7.3 NA 0.0 0.7 0
## 515 8 1 79 1 10.5 NA 0.0 0.5 0
## 520 8 2 16 1 4.5 NA 1.4 6.7 0
## 563 8 2 79 2 5.7 0 1.4 2.3 NA
#!complete.case()함수는 결측값(NA)만 반환
#complete.case()함수는 결측값을 FALSE반환
as.numeric(FALSE)
## [1] 0
as.logical(0.45)
## [1] TRUE
#문자열을 날짜로 변환
as.Date("2018-01-13")
## [1] "2018-01-13"
as.Date("01/13/2018",format="%m%d%Y")
## [1] NA

#R그래픽기능
#산점도
data("airquality")
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
plot(airquality$Ozone,airquality$Solar.R)

summary(airquality$Ozone)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 18.00 31.50 42.13 63.25 168.00 37
#산점도행렬
data(iris)
pairs(iris[1:4],main="Anderson's Iris Data -- 3 species",
pch=21, bg=c("red","green3","blue")[unclass(iris$species)])

#히스토그램
#왜도 분자 = 평균값-중앙값=양수
library(dplyr)
height<-c(182,160,165,170,163,160,181,166,159,145,175)
hist(height)

summary(height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 145.0 160.0 165.0 166.0 172.5 182.0
describe(airquality$Ozone,na.rm=TRUE)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 116 42.13 32.99 31.5 37.8 25.95 1 168 167 1.21 1.11 3.06
par(mfrow=c(1,2))
hist(iris$Petal.Length)
par(mfrow=c(1,1))

#상자그림
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
library(tidyverse)
library(caret)
featurePlot(x=iris[,1:4],
y=iris$Species,
plot="density",
scales=list(x=list(relation="free"),
y=list(relation="free")),
adjust=1.5,
pch="|",
layout=c(4,1),
auto.key=list(columns=3))

#Petal이 정확성 높아 가려낼 수 있음
par(mfrow=c(1,1))
boxplot(iris$Petal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Width~iris$Species,data=iris)

boxplot(iris$Petal.Width~iris$Species,data=iris)

data("Titanic")
library(dplyr)
glimpse(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic,#데이터 입력
main="Survival on the Titanic",#제목 설정
calor=c("blue","green"
),#색 지정
off=1)#블럭 사이 간격 지정
## Warning: In mosaicplot.default(Titanic, main = "Survival on the Titanic",
## calor = c("blue", "green"), off = 1) :
## extra argument 'calor' will be disregarded

#NA, 오류
#데이터마트
library(reshape2)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환
head(airquality)#airquality 데이터 앞 6행 보기
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
head(airquality,3)#선택한 데이터의 개수 정하기
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
library(dplyr)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환
T<-melt(airquality,id=c("month","day"),na.rm=TRUE)#오류
head(T)
## month day variable value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 6 5 6 ozone 28
## 7 5 7 ozone 23
head(airquality)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
T %>% group_by(month) %>%
filter(variable=="ozone") %>%
summarize(m=mean(value))
## m
## 1 42.12931
library(lubridate)
library(plyr)
getwd()
## [1] "C:/Users/김지수/Desktop/2023/여름방학/AI빅데이터인력양성/adsp"
setwd("c:/data")
df<-read.csv("disease.csv")
df
## year Afghanistan Albania Algeria Andorra Angola Antigua...Barbuda Argentina
## 1 1999 0 89.0 25.0 245.0 217.0 102.0 193.0
## 2 2000 0 132.0 0.0 138.0 57.0 128.0 25.0
## 3 2001 0 54.0 14.0 312.0 45.0 45.0 221.0
## 4 2002 0 4.9 0.7 12.4 5.9 4.9 8.3
## Armenia Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados
## 1 21.0 261.0 279.0 21.0 122.0 42 0 143.0
## 2 179.0 72.0 75.0 46.0 176.0 63 0 173.0
## 3 11.0 212.0 191.0 5.0 51.0 7 0 36.0
## 4 3.8 10.4 9.7 1.3 6.3 2 0 6.3
## Belarus Belgium Belize Benin Bhutan Bolivia Bosnia.Herzegovina Botswana
## 1 142.0 295.0 263.0 34.0 23.0 167.0 76.0 173.0
## 2 373.0 84.0 114.0 4.0 0.0 41.0 173.0 35.0
## 3 42.0 212.0 8.0 13.0 0.0 8.0 8.0 35.0
## 4 14.4 10.5 6.8 1.1 0.4 3.8 4.6 5.4
## Brazil Brunei Bulgaria Burkina.Faso Burundi Cote.d.Ivoire Cabo.Verde Cambodia
## 1 245.0 31.0 231.0 25.0 88.0 37 144 57.0
## 2 145.0 2.0 252.0 7.0 0.0 1 56 65.0
## 3 16.0 1.0 94.0 7.0 0.0 7 16 1.0
## 4 7.2 0.6 10.3 4.3 6.3 4 4 2.2
## Cameroon Canada Central.African.Republic Chad Chile China Colombia Comoros
## 1 147.0 240.0 17.0 15.0 130.0 79 159.0 1.0
## 2 1.0 122.0 2.0 1.0 124.0 192 76.0 3.0
## 3 4.0 100.0 1.0 1.0 172.0 8 3.0 1.0
## 4 5.8 8.2 1.8 0.4 7.6 5 4.2 0.1
## Congo Cook.Islands Costa.Rica Croatia Cuba Cyprus Czech.Republic North.Korea
## 1 76.0 0.0 149.0 230.0 93.0 192.0 361.0 0
## 2 1.0 254.0 87.0 87.0 137.0 154.0 170.0 0
## 3 9.0 74.0 11.0 254.0 5.0 113.0 134.0 0
## 4 1.7 5.9 4.4 10.2 4.2 8.2 11.8 0
## DR.Congo Denmark Djibouti Dominica Dominican.Republic Ecuador Egypt
## 1 32.0 224.0 15.0 52.0 193.0 162.0 6.0
## 2 3.0 81.0 44.0 286.0 147.0 74.0 4.0
## 3 1.0 278.0 3.0 26.0 9.0 3.0 1.0
## 4 2.3 10.4 1.1 6.6 6.2 4.2 0.2
## El.Salvador Equatorial.Guinea Eritrea Estonia Ethiopia Fiji Finland France
## 1 52.0 92.0 18.0 224.0 20.0 77 263 127.0
## 2 69.0 0.0 0.0 194.0 3.0 35 133 151.0
## 3 2.0 233.0 0.0 59.0 0.0 1 97 370.0
## 4 2.2 5.8 0.5 9.5 0.7 2 10 11.8
## Gabon Gambia Georgia Germany Ghana Greece Grenada Guatemala Guinea
## 1 347.0 8.0 52.0 346.0 31.0 133.0 199.0 53.0 9.0
## 2 98.0 0.0 100.0 117.0 3.0 112.0 438.0 69.0 0.0
## 3 59.0 1.0 149.0 175.0 10.0 218.0 28.0 2.0 2.0
## 4 8.9 2.4 5.4 11.3 1.8 8.3 11.9 2.2 0.2
## Guinea.Bissau Guyana Haiti Honduras Hungary Iceland India Indonesia Iran Iraq
## 1 28.0 93.0 1.0 69 234.0 233.0 9.0 5.0 0 9.0
## 2 31.0 302.0 326.0 98 215.0 61.0 114.0 1.0 0 3.0
## 3 21.0 1.0 1.0 2 185.0 78.0 0.0 0.0 0 0.0
## 4 2.5 7.1 5.9 3 11.3 6.6 2.2 0.1 0 0.2
## Ireland Israel Italy Jamaica Japan Jordan Kazakhstan Kenya Kiribati Kuwait
## 1 313.0 63.0 85.0 82.0 77 6.0 124.0 58.0 21 0
## 2 118.0 69.0 42.0 88.0 202 21.0 246.0 22.0 34 0
## 3 165.0 9.0 237.0 9.0 16 1.0 12.0 2.0 1 0
## 4 11.4 2.5 6.5 3.4 7 0.5 6.8 1.8 1 0
## Kyrgyzstan Laos Latvia Lebanon Lesotho Liberia Libya Lithuania Luxembourg
## 1 31.0 62.0 281.0 20.0 82.0 19.0 0 343.0 236.0
## 2 88.0 0.0 216.0 55.0 50.0 152.0 0 244.0 133.0
## 3 6.0 123.0 62.0 31.0 0.0 2.0 0 56.0 271.0
## 4 2.4 6.2 10.5 1.9 2.8 3.1 0 12.9 11.4
## Madagascar Malawi Malaysia Maldives Mali Malta Marshall.Islands Mauritania
## 1 26.0 8.0 13.0 0 5.0 149.0 0 0
## 2 15.0 11.0 4.0 0 1.0 100.0 0 0
## 3 4.0 1.0 0.0 0 1.0 120.0 0 0
## 4 0.8 1.5 0.3 0 0.6 6.6 0 0
## Mauritius Mexico Micronesia Monaco Mongolia Montenegro Morocco Mozambique
## 1 98.0 238.0 62.0 0 77.0 31.0 12.0 47.0
## 2 31.0 68.0 50.0 0 189.0 114.0 6.0 18.0
## 3 18.0 5.0 18.0 0 8.0 128.0 10.0 5.0
## 4 2.6 5.5 2.3 0 4.9 4.9 0.5 1.3
## Myanmar Namibia Nauru Nepal Netherlands New.Zealand Nicaragua Niger Nigeria
## 1 5.0 376.0 49 5.0 251.0 203.0 78.0 3.0 42.0
## 2 1.0 3.0 0 6.0 88.0 79.0 118.0 2.0 5.0
## 3 0.0 1.0 8 0.0 190.0 175.0 1.0 1.0 2.0
## 4 0.1 6.8 1 0.2 9.4 9.3 3.5 0.1 9.1
## Niue Norway Oman Pakistan Palau Panama Papua.New.Guinea Paraguay Peru
## 1 188 169.0 22.0 0 306.0 285.0 44.0 213.0 163.0
## 2 200 71.0 16.0 0 63.0 104.0 39.0 117.0 160.0
## 3 7 129.0 1.0 0 23.0 18.0 1.0 74.0 21.0
## 4 7 6.7 0.7 0 6.9 7.2 1.5 7.3 6.1
## Philippines Poland Portugal Qatar South.Korea Moldova Romania
## 1 71.0 343.0 194 1.0 140.0 109.0 297.0
## 2 186.0 215.0 67 42.0 16.0 226.0 122.0
## 3 1.0 56.0 339 7.0 9.0 18.0 167.0
## 4 4.6 10.9 11 0.9 9.8 6.3 10.4
## Russian.Federation Rwanda St..Kitts...Nevis St..Lucia
## 1 247.0 43.0 194.0 171.0
## 2 326.0 2.0 205.0 315.0
## 3 73.0 0.0 32.0 71.0
## 4 11.5 6.8 7.7 10.1
## St..Vincent...the.Grenadines Samoa San.Marino Sao.Tome...Principe
## 1 120.0 105.0 0 56.0
## 2 221.0 18.0 0 38.0
## 3 11.0 24.0 0 140.0
## 4 6.3 2.6 0 4.2
## Saudi.Arabia Senegal Serbia Seychelles Sierra.Leone Singapore Slovakia
## 1 0.0 9.0 283.0 157.0 25.0 60.0 196.0
## 2 5.0 1.0 131.0 25.0 3.0 12.0 293.0
## 3 0.0 7.0 127.0 51.0 2.0 11.0 116.0
## 4 0.1 0.3 9.6 4.1 6.7 1.5 11.4
## Slovenia Solomon.Islands Somalia South.Africa Spain Sri.Lanka Sudan Suriname
## 1 270.0 56.0 0 225.0 284 16.0 8.0 128.0
## 2 51.0 11.0 0 76.0 157 104.0 13.0 178.0
## 3 276.0 1.0 0 81.0 112 0.0 0.0 7.0
## 4 10.6 1.2 0 8.2 10 2.2 1.7 5.6
## Swaziland Sweden Switzerland Syria Tajikistan Thailand Macedonia Timor.Leste
## 1 90.0 152.0 185.0 5 2.0 99.0 106.0 1.0
## 2 2.0 60.0 100.0 35 15.0 258.0 27.0 1.0
## 3 2.0 186.0 280.0 16 0.0 1.0 86.0 4.0
## 4 4.7 7.2 10.2 1 0.3 6.4 3.9 0.1
## Togo Tonga Trinidad...Tobago Tunisia Turkey Turkmenistan Tuvalu Uganda
## 1 36.0 36.0 197.0 51.0 51.0 19.0 6 45.0
## 2 2.0 21.0 156.0 3.0 22.0 71.0 41 9.0
## 3 19.0 5.0 7.0 20.0 7.0 32.0 9 0.0
## 4 1.3 1.1 6.4 1.3 1.4 2.2 1 8.3
## Ukraine United.Arab.Emirates United.Kingdom Tanzania USA Uruguay Uzbekistan
## 1 206.0 16.0 219.0 36.0 249.0 115.0 25.0
## 2 237.0 135.0 126.0 6.0 158.0 35.0 101.0
## 3 45.0 5.0 195.0 1.0 84.0 220.0 8.0
## 4 8.9 2.8 10.4 5.7 8.7 6.6 2.4
## Vanuatu Venezuela Vietnam Yemen Zambia Zimbabwe
## 1 21.0 333.0 111 6.0 32.0 64.0
## 2 18.0 100.0 2 0.0 19.0 18.0
## 3 11.0 3.0 1 0.0 4.0 4.0
## 4 0.9 7.7 2 0.1 2.5 4.7
library(dplyr)
library(reshape2)
df1<-melt(df,id="year")
df1 %>% glimpse
## Rows: 772
## Columns: 3
## $ year <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
names(df1)[2:3]<-c("country","disease")
names(df1)
## [1] "year" "country" "disease"
df1 %>% filter(year==2000) %>% summarize(m=mean(disease))
## m
## 1 81.01036
df1 %>% filter(year==2000) %>% filter(disease>81.01036) %>% NROW->result
print(result)
## [1] 76
#~제출
acast(T,day~month~variable)#행을 day, 열을 month로 각 변수들을 새롭게 배열
## , , ozone
##
## 5 6 7 8 9
## 1 41 NA 135 39 96
## 2 36 NA 49 9 78
## 3 12 NA 32 16 73
## 4 18 NA NA 78 91
## 5 NA NA 64 35 47
## 6 28 NA 40 66 32
## 7 23 29 77 122 20
## 8 19 NA 97 89 23
## 9 8 71 97 110 21
## 10 NA 39 85 NA 24
## 11 7 NA NA NA 44
## 12 16 NA 10 44 21
## 13 11 23 27 28 28
## 14 14 NA NA 65 9
## 15 18 NA 7 NA 13
## 16 14 21 48 22 46
## 17 34 37 35 59 18
## 18 6 20 61 23 13
## 19 30 12 79 31 24
## 20 11 13 63 44 16
## 21 1 NA 16 21 13
## 22 11 NA NA 9 23
## 23 4 NA NA NA 36
## 24 32 NA 80 45 7
## 25 NA NA 108 168 14
## 26 NA NA 20 73 30
## 27 NA NA 52 NA NA
## 28 23 NA 82 76 14
## 29 45 NA 50 118 18
## 30 115 NA 64 84 20
## 31 37 NA 59 85 NA
##
## , , solar.r
##
## 5 6 7 8 9
## 1 190 286 269 83 167
## 2 118 287 248 24 197
## 3 149 242 236 77 183
## 4 313 186 101 NA 189
## 5 NA 220 175 NA 95
## 6 NA 264 314 NA 92
## 7 299 127 276 255 252
## 8 99 273 267 229 220
## 9 19 291 272 207 230
## 10 194 323 175 222 259
## 11 NA 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157 24
## 15 65 322 48 64 112
## 16 334 191 260 71 237
## 17 307 284 274 51 224
## 18 78 37 285 115 27
## 19 322 120 187 244 238
## 20 44 137 220 190 201
## 21 8 150 7 259 238
## 22 320 59 258 36 14
## 23 25 91 295 255 139
## 24 92 250 294 212 49
## 25 66 135 223 238 20
## 26 266 127 81 215 193
## 27 NA 47 82 153 145
## 28 13 98 213 203 191
## 29 252 31 275 225 131
## 30 223 138 253 237 223
## 31 279 NA 254 188 NA
##
## , , wind
##
## 5 6 7 8 9
## 1 7.4 8.6 4.1 6.9 6.9
## 2 8.0 9.7 9.2 13.8 5.1
## 3 12.6 16.1 9.2 7.4 2.8
## 4 11.5 9.2 10.9 6.9 4.6
## 5 14.3 8.6 4.6 7.4 7.4
## 6 14.9 14.3 10.9 4.6 15.5
## 7 8.6 9.7 5.1 4.0 10.9
## 8 13.8 6.9 6.3 10.3 10.3
## 9 20.1 13.8 5.7 8.0 10.9
## 10 8.6 11.5 7.4 8.6 9.7
## 11 6.9 10.9 8.6 11.5 14.9
## 12 9.7 9.2 14.3 11.5 15.5
## 13 9.2 8.0 14.9 11.5 6.3
## 14 10.9 13.8 14.9 9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9 6.9 10.3 6.9
## 17 12.0 20.7 10.3 6.3 13.8
## 18 18.4 9.2 6.3 7.4 10.3
## 19 11.5 11.5 5.1 10.9 10.3
## 20 9.7 10.3 11.5 10.3 8.0
## 21 9.7 6.3 6.9 15.5 12.6
## 22 16.6 1.7 9.7 14.3 9.2
## 23 9.7 4.6 11.5 12.6 10.3
## 24 12.0 6.3 8.6 9.7 10.3
## 25 16.6 8.0 8.0 3.4 16.6
## 26 14.9 8.0 8.6 8.0 6.9
## 27 8.0 10.3 12.0 5.7 13.2
## 28 12.0 11.5 7.4 9.7 14.3
## 29 14.9 14.9 7.4 2.3 8.0
## 30 5.7 8.0 7.4 6.3 11.5
## 31 7.4 NA 9.2 6.3 NA
##
## , , temp
##
## 5 6 7 8 9
## 1 67 78 84 81 91
## 2 72 74 85 81 92
## 3 74 67 81 82 93
## 4 62 84 84 86 93
## 5 56 85 83 85 87
## 6 66 79 83 87 84
## 7 65 82 88 89 80
## 8 59 87 92 90 78
## 9 61 90 92 90 75
## 10 69 87 89 92 73
## 11 74 93 82 86 81
## 12 69 92 73 86 76
## 13 66 82 81 82 77
## 14 68 80 91 80 71
## 15 58 79 80 79 71
## 16 64 77 81 77 78
## 17 66 72 82 79 67
## 18 57 65 84 76 76
## 19 68 73 87 78 68
## 20 62 76 85 78 82
## 21 59 77 74 77 64
## 22 73 76 81 72 71
## 23 61 76 82 75 81
## 24 61 76 86 79 69
## 25 57 75 85 81 63
## 26 58 78 82 86 70
## 27 57 73 86 88 77
## 28 67 80 88 97 75
## 29 81 77 86 94 76
## 30 79 83 83 96 68
## 31 76 NA 81 94 NA
b<-acast(T,day~month~variable,mean)#각 변수들의 month 평균
b
## , , ozone
##
## 5 6 7 8 9
## 1 41 NaN 135 39 96
## 2 36 NaN 49 9 78
## 3 12 NaN 32 16 73
## 4 18 NaN NaN 78 91
## 5 NaN NaN 64 35 47
## 6 28 NaN 40 66 32
## 7 23 29 77 122 20
## 8 19 NaN 97 89 23
## 9 8 71 97 110 21
## 10 NaN 39 85 NaN 24
## 11 7 NaN NaN NaN 44
## 12 16 NaN 10 44 21
## 13 11 23 27 28 28
## 14 14 NaN NaN 65 9
## 15 18 NaN 7 NaN 13
## 16 14 21 48 22 46
## 17 34 37 35 59 18
## 18 6 20 61 23 13
## 19 30 12 79 31 24
## 20 11 13 63 44 16
## 21 1 NaN 16 21 13
## 22 11 NaN NaN 9 23
## 23 4 NaN NaN NaN 36
## 24 32 NaN 80 45 7
## 25 NaN NaN 108 168 14
## 26 NaN NaN 20 73 30
## 27 NaN NaN 52 NaN NaN
## 28 23 NaN 82 76 14
## 29 45 NaN 50 118 18
## 30 115 NaN 64 84 20
## 31 37 NaN 59 85 NaN
##
## , , solar.r
##
## 5 6 7 8 9
## 1 190 286 269 83 167
## 2 118 287 248 24 197
## 3 149 242 236 77 183
## 4 313 186 101 NaN 189
## 5 NaN 220 175 NaN 95
## 6 NaN 264 314 NaN 92
## 7 299 127 276 255 252
## 8 99 273 267 229 220
## 9 19 291 272 207 230
## 10 194 323 175 222 259
## 11 NaN 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157 24
## 15 65 322 48 64 112
## 16 334 191 260 71 237
## 17 307 284 274 51 224
## 18 78 37 285 115 27
## 19 322 120 187 244 238
## 20 44 137 220 190 201
## 21 8 150 7 259 238
## 22 320 59 258 36 14
## 23 25 91 295 255 139
## 24 92 250 294 212 49
## 25 66 135 223 238 20
## 26 266 127 81 215 193
## 27 NaN 47 82 153 145
## 28 13 98 213 203 191
## 29 252 31 275 225 131
## 30 223 138 253 237 223
## 31 279 NaN 254 188 NaN
##
## , , wind
##
## 5 6 7 8 9
## 1 7.4 8.6 4.1 6.9 6.9
## 2 8.0 9.7 9.2 13.8 5.1
## 3 12.6 16.1 9.2 7.4 2.8
## 4 11.5 9.2 10.9 6.9 4.6
## 5 14.3 8.6 4.6 7.4 7.4
## 6 14.9 14.3 10.9 4.6 15.5
## 7 8.6 9.7 5.1 4.0 10.9
## 8 13.8 6.9 6.3 10.3 10.3
## 9 20.1 13.8 5.7 8.0 10.9
## 10 8.6 11.5 7.4 8.6 9.7
## 11 6.9 10.9 8.6 11.5 14.9
## 12 9.7 9.2 14.3 11.5 15.5
## 13 9.2 8.0 14.9 11.5 6.3
## 14 10.9 13.8 14.9 9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9 6.9 10.3 6.9
## 17 12.0 20.7 10.3 6.3 13.8
## 18 18.4 9.2 6.3 7.4 10.3
## 19 11.5 11.5 5.1 10.9 10.3
## 20 9.7 10.3 11.5 10.3 8.0
## 21 9.7 6.3 6.9 15.5 12.6
## 22 16.6 1.7 9.7 14.3 9.2
## 23 9.7 4.6 11.5 12.6 10.3
## 24 12.0 6.3 8.6 9.7 10.3
## 25 16.6 8.0 8.0 3.4 16.6
## 26 14.9 8.0 8.6 8.0 6.9
## 27 8.0 10.3 12.0 5.7 13.2
## 28 12.0 11.5 7.4 9.7 14.3
## 29 14.9 14.9 7.4 2.3 8.0
## 30 5.7 8.0 7.4 6.3 11.5
## 31 7.4 NaN 9.2 6.3 NaN
##
## , , temp
##
## 5 6 7 8 9
## 1 67 78 84 81 91
## 2 72 74 85 81 92
## 3 74 67 81 82 93
## 4 62 84 84 86 93
## 5 56 85 83 85 87
## 6 66 79 83 87 84
## 7 65 82 88 89 80
## 8 59 87 92 90 78
## 9 61 90 92 90 75
## 10 69 87 89 92 73
## 11 74 93 82 86 81
## 12 69 92 73 86 76
## 13 66 82 81 82 77
## 14 68 80 91 80 71
## 15 58 79 80 79 71
## 16 64 77 81 77 78
## 17 66 72 82 79 67
## 18 57 65 84 76 76
## 19 68 73 87 78 68
## 20 62 76 85 78 82
## 21 59 77 74 77 64
## 22 73 76 81 72 71
## 23 61 76 82 75 81
## 24 61 76 86 79 69
## 25 57 75 85 81 63
## 26 58 78 82 86 70
## 27 57 73 86 88 77
## 28 67 80 88 97 75
## 29 81 77 86 94 76
## 30 79 83 83 96 68
## 31 76 NaN 81 94 NaN
#plyr: 데이터 분리, 특정함수 적용, 결과 재결합
#1.apply
library(plyr)
a<-matrix(1:6,ncol=2)
a
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
apply(a,1,sum)
## [1] 5 7 9
apply(iris[,-5],2,sum)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
colSums(iris[,-5])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
colMeans(iris[,-5])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
rowSums(iris[,-5])
## [1] 10.2 9.5 9.4 9.4 10.2 11.4 9.7 10.1 8.9 9.6 10.8 10.0 9.3 8.5 11.2
## [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7 9.4 10.6 10.3 9.8 10.4 10.4 10.2 9.7
## [31] 9.7 10.7 10.9 11.3 9.7 9.6 10.5 10.0 8.9 10.2 10.1 8.4 9.1 10.7 11.2
## [46] 9.5 10.7 9.4 10.7 9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
## [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
## [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
## [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
## [106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
## [121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
## [136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
rowMeans(iris[,-5])
## [1] 2.550 2.375 2.350 2.350 2.550 2.850 2.425 2.525 2.225 2.400 2.700 2.500
## [13] 2.325 2.125 2.800 3.000 2.750 2.575 2.875 2.675 2.675 2.675 2.350 2.650
## [25] 2.575 2.450 2.600 2.600 2.550 2.425 2.425 2.675 2.725 2.825 2.425 2.400
## [37] 2.625 2.500 2.225 2.550 2.525 2.100 2.275 2.675 2.800 2.375 2.675 2.350
## [49] 2.675 2.475 4.075 3.900 4.100 3.275 3.850 3.575 3.975 2.900 3.850 3.300
## [61] 2.875 3.650 3.300 3.775 3.350 3.900 3.650 3.400 3.600 3.275 3.925 3.550
## [73] 3.800 3.700 3.725 3.850 3.950 4.100 3.725 3.200 3.200 3.150 3.400 3.850
## [85] 3.600 3.875 4.000 3.575 3.500 3.325 3.425 3.775 3.400 2.900 3.450 3.525
## [97] 3.525 3.675 2.925 3.475 4.525 3.875 4.525 4.150 4.375 4.825 3.400 4.575
## [109] 4.200 4.850 4.200 4.075 4.350 3.800 4.025 4.300 4.200 5.100 4.875 3.675
## [121] 4.525 3.825 4.800 3.925 4.450 4.550 3.900 3.950 4.225 4.400 4.550 5.025
## [133] 4.250 3.925 3.925 4.775 4.425 4.200 3.900 4.375 4.450 4.350 3.875 4.550
## [145] 4.550 4.300 3.925 4.175 4.325 3.950
#데이터테이블
#데이터 기초통계
library(dplyr)
library(plyr)
data(iris)
head(iris)#데이터 앞6행
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)#데이터 구조 파악
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)#기초통계량 확인
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
cov(iris[,1:4])#공분산
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.6856935 -0.0424340 1.2743154 0.5162707
## Sepal.Width -0.0424340 0.1899794 -0.3296564 -0.1216394
## Petal.Length 1.2743154 -0.3296564 3.1162779 1.2956094
## Petal.Width 0.5162707 -0.1216394 1.2956094 0.5810063
cor(iris[,1:4])#상관계수
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
#이론 166p 기출유형 개념잡기 13번
library(ISLR)
data("Wage")
glimpse(Wage)
## Rows: 3,000
## Columns: 11
## $ year <int> 2006, 2004, 2003, 2003, 2005, 2008, 2009, 2008, 2006, 2004,…
## $ age <int> 18, 24, 45, 43, 50, 54, 44, 30, 41, 52, 45, 34, 35, 39, 54,…
## $ maritl <fct> 1. Never Married, 1. Never Married, 2. Married, 2. Married,…
## $ race <fct> 1. White, 1. White, 1. White, 3. Asian, 1. White, 1. White,…
## $ education <fct> 1. < HS Grad, 4. College Grad, 3. Some College, 4. College …
## $ region <fct> 2. Middle Atlantic, 2. Middle Atlantic, 2. Middle Atlantic,…
## $ jobclass <fct> 1. Industrial, 2. Information, 1. Industrial, 2. Informatio…
## $ health <fct> 1. <=Good, 2. >=Very Good, 1. <=Good, 2. >=Very Good, 1. <=…
## $ health_ins <fct> 2. No, 2. No, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Ye…
## $ logwage <dbl> 4.318063, 4.255273, 4.875061, 5.041393, 4.318063, 4.845098,…
## $ wage <dbl> 75.04315, 70.47602, 130.98218, 154.68529, 75.04315, 127.115…
summary(Wage)
## year age maritl race
## Min. :2003 Min. :18.00 1. Never Married: 648 1. White:2480
## 1st Qu.:2004 1st Qu.:33.75 2. Married :2074 2. Black: 293
## Median :2006 Median :42.00 3. Widowed : 19 3. Asian: 190
## Mean :2006 Mean :42.41 4. Divorced : 204 4. Other: 37
## 3rd Qu.:2008 3rd Qu.:51.00 5. Separated : 55
## Max. :2009 Max. :80.00
##
## education region jobclass
## 1. < HS Grad :268 2. Middle Atlantic :3000 1. Industrial :1544
## 2. HS Grad :971 1. New England : 0 2. Information:1456
## 3. Some College :650 3. East North Central: 0
## 4. College Grad :685 4. West North Central: 0
## 5. Advanced Degree:426 5. South Atlantic : 0
## 6. East South Central: 0
## (Other) : 0
## health health_ins logwage wage
## 1. <=Good : 858 1. Yes:2083 Min. :3.000 Min. : 20.09
## 2. >=Very Good:2142 2. No : 917 1st Qu.:4.447 1st Qu.: 85.38
## Median :4.653 Median :104.92
## Mean :4.654 Mean :111.70
## 3rd Qu.:4.857 3rd Qu.:128.68
## Max. :5.763 Max. :318.34
##
#결측값처리
library(reshape)
##
## 다음의 패키지를 부착합니다: 'reshape'
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
## The following objects are masked from 'package:reshape2':
##
## colsplit, melt, recast
## The following objects are masked from 'package:plyr':
##
## rename, round_any
## The following object is masked from 'package:lubridate':
##
## stamp
## The following object is masked from 'package:dplyr':
##
## rename
data(french_fries)
french_fries[!complete.cases(french_fries),]
## time treatment subject rep potato buttery grassy rancid painty
## 315 5 3 15 1 NA NA NA NA NA
## 455 7 2 79 1 7.3 NA 0.0 0.7 0
## 515 8 1 79 1 10.5 NA 0.0 0.5 0
## 520 8 2 16 1 4.5 NA 1.4 6.7 0
## 563 8 2 79 2 5.7 0 1.4 2.3 NA
#!complete.case()함수는 결측값(NA)만 반환
#complete.case()함수는 결측값을 FALSE반환환