rm(list=ls())
ls()
## character(0)
setwd("c:/data")
getwd()
## [1] "c:/data"
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(psych)
## 
## 다음의 패키지를 부착합니다: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(hflights)
library(lubridate)
## 
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## 다음의 패키지를 부착합니다: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(reshape2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ stringr 1.5.0
## ✔ purrr   1.0.1     ✔ tibble  3.2.1
## ✔ readr   2.1.4     ✔ tidyr   1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()      masks ggplot2::%+%()
## ✖ psych::alpha()    masks ggplot2::alpha()
## ✖ plyr::arrange()   masks dplyr::arrange()
## ✖ purrr::compact()  masks plyr::compact()
## ✖ plyr::count()     masks dplyr::count()
## ✖ plyr::desc()      masks dplyr::desc()
## ✖ plyr::failwith()  masks dplyr::failwith()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ plyr::id()        masks dplyr::id()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ plyr::mutate()    masks dplyr::mutate()
## ✖ plyr::rename()    masks dplyr::rename()
## ✖ plyr::summarise() masks dplyr::summarise()
## ✖ plyr::summarize() masks dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#sd() 원소의합 var()분산 length(원소의수) range(범위)
#cor(상관계수) sample(임의추출)

#sample : 무작위 추출/set.seed(1) tab sample() : 추출수 고정(변동 x)
sample(1:45,6)
## [1] 29 27  4 23  5 40
sample(1:45,6)
## [1] 19 45  8  9 43 12
set.seed(1)
sample(1:45,6)
## [1]  4 39  1 34 23 14
set.seed(1)
sample(1:45,6)
## [1]  4 39  1 34 23 14
#~138p 추가

#행렬
m<-matrix(1:6,nrow=3)
m[m[,1]>1&m[,2]>5]
## [1] 3 6
#rep(반복할 수, 반복 횟수)
rep(1,3)
## [1] 1 1 1
rep(2:5,3)
##  [1] 2 3 4 5 2 3 4 5 2 3 4 5
#seq(시작할 수, 마지막 수):첫 인수부터 두번째 인수까지 1씩 증가
seq(1,3)
## [1] 1 2 3
seq(1,11,length=7)#전체 수열의 개수가 m개가 되도록 자동적으로 증가하는 수열 생성
## [1]  1.000000  2.666667  4.333333  6.000000  7.666667  9.333333 11.000000
#기초적 대푯값 및 분산 계산 : mean, var, sd()
c<-1:10
print(round(sd(c),2))
## [1] 3.03
#기초통계정리요약
a<-1:10 
describe(a)
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 10  5.5 3.03    5.5     5.5 3.71   1  10     9    0    -1.56 0.96
# R 데이터 핸들링
b<-c("a", "b","c")
b
## [1] "a" "b" "c"
b[-3]
## [1] "a" "b"
b
## [1] "a" "b" "c"
b[c(1,2)]
## [1] "a" "b"
#반복문 조건문
#for 반복구문 : for문 전체 블록 후 실행
a<-c()#아무런 값 포함x a벡터
for(i in 1:9)
{a[i]=i*i
}
a
## [1]  1  4  9 16 25 36 49 64 81
#while 구문_오류남
x=1
while(x<5){
  x=x+1
  print(x)
}
## [1] 2
## [1] 3
## [1] 4
## [1] 5
#if~else 구문
gender<-c("m","f","m","f","m")
gender<-ifelse(gender=="f",0,1)
gender
## [1] 1 0 1 0 1
#paste : 입력받은 문자열을 하나로 붙이기
number<-1:5 
alphabet<-c("a","b","c")
paste(number,alphabet)
## [1] "1 a" "2 b" "3 c" "4 a" "5 b"
#자료형 데이터구조 변화
as.data.frame(x)
##   x
## 1 5
as.factor(x)
## [1] 5
## Levels: 5
rm(list=ls())
ls()
## character(0)
setwd("c:/data")
getwd()
## [1] "c:/data"
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)

#sd() 원소의합 var()분산 length(원소의수) range(범위)
#cor(상관계수) sample(임의추출)

#sample : 무작위 추출/set.seed(1) tab sample() : 추출수 고정(변동 x)
sample(1:45,6)
## [1] 18 33 21 43 10  7
sample(1:45,6)
## [1]  9 15 21 37 41 25
set.seed(1)
sample(1:45,6)
## [1]  4 39  1 34 23 14
set.seed(1)
sample(1:45,6)
## [1]  4 39  1 34 23 14
#~138p 추가

#행렬
m<-matrix(1:6,nrow=3)
m[m[,1]>1&m[,2]>5]
## [1] 3 6
#rep(반복할 수, 반복 횟수)
rep(1,3)
## [1] 1 1 1
rep(2:5,3)
##  [1] 2 3 4 5 2 3 4 5 2 3 4 5
#seq(시작할 수, 마지막 수):첫 인수부터 두번째 인수까지 1씩 증가
seq(1,3)
## [1] 1 2 3
seq(1,11,length=7)#전체 수열의 개수가 m개가 되도록 자동적으로 증가하는 수열 생성
## [1]  1.000000  2.666667  4.333333  6.000000  7.666667  9.333333 11.000000
#기초적 대푯값 및 분산 계산 : mean, var, sd()
c<-1:10
print(round(sd(c),2))
## [1] 3.03
#기초통계정리요약
a<-1:10 
describe(a)
##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 10  5.5 3.03    5.5     5.5 3.71   1  10     9    0    -1.56 0.96
# R 데이터 핸들링
b<-c("a", "b","c")
b
## [1] "a" "b" "c"
b[-3]
## [1] "a" "b"
b
## [1] "a" "b" "c"
b[c(1,2)]
## [1] "a" "b"
#반복문 조건문
#for 반복구문 : for문 전체 블록 후 실행
a<-c()#아무런 값 포함x a벡터
for(i in 1:9)
{a[i]=i*i
}
a
## [1]  1  4  9 16 25 36 49 64 81
#while 구문_오류남
x=1
while(x<5){
  x=x+1
  print(x)
}
## [1] 2
## [1] 3
## [1] 4
## [1] 5
#if~else 구문
gender<-c("m","f","m","f","m")
gender<-ifelse(gender=="f",0,1)
gender
## [1] 1 0 1 0 1
#paste : 입력받은 문자열을 하나로 붙이기
number<-1:5 
alphabet<-c("a","b","c")
paste(number,alphabet)
## [1] "1 a" "2 b" "3 c" "4 a" "5 b"
#자료형 데이터구조 변화
as.data.frame(x)
##   x
## 1 5
as.factor(x)
## [1] 5
## Levels: 5
as.numeric(FALSE)
## [1] 0
as.logical(0.45)
## [1] TRUE
#문자열을 날짜로 변환
as.Date("2018-01-13")
## [1] "2018-01-13"
as.Date("01/13/2018",format="%m%d%Y")
## [1] NA
#R그래픽기능
#산점도
data("airquality")
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone   <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind    <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp    <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month   <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
summary(airquality$ozone)
## Length  Class   Mode 
##      0   NULL   NULL
#산점도행렬
data(iris)
pairs(iris[1:4],main="Anderson's Iris Data -- 3 species",
      pch=21, bg=c("red","green3","blue")[unclass(iris$species)])

#히스토그램
#왜도 분자 = 평균값-중앙값=양수
library(dplyr)
library(psych)
height<-c(182,160,165,170,163,160,181,166,159,145,175)
hist(height)

summary(height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   145.0   160.0   165.0   166.0   172.5   182.0
describe(airquality$Ozone,na.rm=TRUE)
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 116 42.13 32.99   31.5    37.8 25.95   1 168   167 1.21     1.11 3.06
#상자그림
library(caret)
## 필요한 패키지를 로딩중입니다: lattice
## 
## 다음의 패키지를 부착합니다: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)

featurePlot(x=iris[,1:4],
            y=iris$Species,
            plot="density",
            scales=list(x=list(relation="free"),
                        y=list(relation="free")),
            adjust=1.5,
            pch="1",
            layout=c(4,1),
            auto.key=list(columns=3))

#Petal이 정확성 높아 가려낼 수 있음
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
par(mfrow=c(2,2))
boxplot(iris$Petal.Length~iris$Species,data=iris)
boxplot(iris$Sepal.Length~iris$Species,data=iris)
boxplot(iris$Sepal.Width~iris$Species,data=iris)
boxplot(iris$Petal.Width~iris$Species,data=iris)

data("Titanic")
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)

glimpse(Titanic)
##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic,#데이터 입력
           main="Survival on the Titanic",#제목 설정
           color=c("blue","green"
           ),#색 지정
           off=1)#블럭 사이 간격 지정

#NA, 오류
#describe(airquality$ozone,na.rm=TRUE)
#hist(airquality$ozone,na.rm=TRUE)



#데이터마트

library(reshape2)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환
head(airquality)#airquality 데이터 앞 6행 보기
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
head(airquality,3)#선택한 데이터의 개수 정하기
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
library(dplyr)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환

T<-melt(airquality,id=c("month","day"),na.rm=TRUE)
head(T)
##   month day variable value
## 1     5   1    ozone    41
## 2     5   2    ozone    36
## 3     5   3    ozone    12
## 4     5   4    ozone    18
## 6     5   6    ozone    28
## 7     5   7    ozone    23
head(airquality)
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
T %>% group_by(month) %>%
  filter(variable=="ozone") %>%
  summarize(m=mean(value))
##          m
## 1 42.12931
library(lubridate)
library(plyr)

getwd()
## [1] "c:/data"
setwd("c:/data")
df<-read.csv("disease.csv")
df
##   year Afghanistan Albania Algeria Andorra Angola Antigua...Barbuda Argentina
## 1 1999           0    89.0    25.0   245.0  217.0             102.0     193.0
## 2 2000           0   132.0     0.0   138.0   57.0             128.0      25.0
## 3 2001           0    54.0    14.0   312.0   45.0              45.0     221.0
## 4 2002           0     4.9     0.7    12.4    5.9               4.9       8.3
##   Armenia Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados
## 1    21.0     261.0   279.0       21.0   122.0      42          0    143.0
## 2   179.0      72.0    75.0       46.0   176.0      63          0    173.0
## 3    11.0     212.0   191.0        5.0    51.0       7          0     36.0
## 4     3.8      10.4     9.7        1.3     6.3       2          0      6.3
##   Belarus Belgium Belize Benin Bhutan Bolivia Bosnia.Herzegovina Botswana
## 1   142.0   295.0  263.0  34.0   23.0   167.0               76.0    173.0
## 2   373.0    84.0  114.0   4.0    0.0    41.0              173.0     35.0
## 3    42.0   212.0    8.0  13.0    0.0     8.0                8.0     35.0
## 4    14.4    10.5    6.8   1.1    0.4     3.8                4.6      5.4
##   Brazil Brunei Bulgaria Burkina.Faso Burundi Cote.d.Ivoire Cabo.Verde Cambodia
## 1  245.0   31.0    231.0         25.0    88.0            37        144     57.0
## 2  145.0    2.0    252.0          7.0     0.0             1         56     65.0
## 3   16.0    1.0     94.0          7.0     0.0             7         16      1.0
## 4    7.2    0.6     10.3          4.3     6.3             4          4      2.2
##   Cameroon Canada Central.African.Republic Chad Chile China Colombia Comoros
## 1    147.0  240.0                     17.0 15.0 130.0    79    159.0     1.0
## 2      1.0  122.0                      2.0  1.0 124.0   192     76.0     3.0
## 3      4.0  100.0                      1.0  1.0 172.0     8      3.0     1.0
## 4      5.8    8.2                      1.8  0.4   7.6     5      4.2     0.1
##   Congo Cook.Islands Costa.Rica Croatia  Cuba Cyprus Czech.Republic North.Korea
## 1  76.0          0.0      149.0   230.0  93.0  192.0          361.0           0
## 2   1.0        254.0       87.0    87.0 137.0  154.0          170.0           0
## 3   9.0         74.0       11.0   254.0   5.0  113.0          134.0           0
## 4   1.7          5.9        4.4    10.2   4.2    8.2           11.8           0
##   DR.Congo Denmark Djibouti Dominica Dominican.Republic Ecuador Egypt
## 1     32.0   224.0     15.0     52.0              193.0   162.0   6.0
## 2      3.0    81.0     44.0    286.0              147.0    74.0   4.0
## 3      1.0   278.0      3.0     26.0                9.0     3.0   1.0
## 4      2.3    10.4      1.1      6.6                6.2     4.2   0.2
##   El.Salvador Equatorial.Guinea Eritrea Estonia Ethiopia Fiji Finland France
## 1        52.0              92.0    18.0   224.0     20.0   77     263  127.0
## 2        69.0               0.0     0.0   194.0      3.0   35     133  151.0
## 3         2.0             233.0     0.0    59.0      0.0    1      97  370.0
## 4         2.2               5.8     0.5     9.5      0.7    2      10   11.8
##   Gabon Gambia Georgia Germany Ghana Greece Grenada Guatemala Guinea
## 1 347.0    8.0    52.0   346.0  31.0  133.0   199.0      53.0    9.0
## 2  98.0    0.0   100.0   117.0   3.0  112.0   438.0      69.0    0.0
## 3  59.0    1.0   149.0   175.0  10.0  218.0    28.0       2.0    2.0
## 4   8.9    2.4     5.4    11.3   1.8    8.3    11.9       2.2    0.2
##   Guinea.Bissau Guyana Haiti Honduras Hungary Iceland India Indonesia Iran Iraq
## 1          28.0   93.0   1.0       69   234.0   233.0   9.0       5.0    0  9.0
## 2          31.0  302.0 326.0       98   215.0    61.0 114.0       1.0    0  3.0
## 3          21.0    1.0   1.0        2   185.0    78.0   0.0       0.0    0  0.0
## 4           2.5    7.1   5.9        3    11.3     6.6   2.2       0.1    0  0.2
##   Ireland Israel Italy Jamaica Japan Jordan Kazakhstan Kenya Kiribati Kuwait
## 1   313.0   63.0  85.0    82.0    77    6.0      124.0  58.0       21      0
## 2   118.0   69.0  42.0    88.0   202   21.0      246.0  22.0       34      0
## 3   165.0    9.0 237.0     9.0    16    1.0       12.0   2.0        1      0
## 4    11.4    2.5   6.5     3.4     7    0.5        6.8   1.8        1      0
##   Kyrgyzstan  Laos Latvia Lebanon Lesotho Liberia Libya Lithuania Luxembourg
## 1       31.0  62.0  281.0    20.0    82.0    19.0     0     343.0      236.0
## 2       88.0   0.0  216.0    55.0    50.0   152.0     0     244.0      133.0
## 3        6.0 123.0   62.0    31.0     0.0     2.0     0      56.0      271.0
## 4        2.4   6.2   10.5     1.9     2.8     3.1     0      12.9       11.4
##   Madagascar Malawi Malaysia Maldives Mali Malta Marshall.Islands Mauritania
## 1       26.0    8.0     13.0        0  5.0 149.0                0          0
## 2       15.0   11.0      4.0        0  1.0 100.0                0          0
## 3        4.0    1.0      0.0        0  1.0 120.0                0          0
## 4        0.8    1.5      0.3        0  0.6   6.6                0          0
##   Mauritius Mexico Micronesia Monaco Mongolia Montenegro Morocco Mozambique
## 1      98.0  238.0       62.0      0     77.0       31.0    12.0       47.0
## 2      31.0   68.0       50.0      0    189.0      114.0     6.0       18.0
## 3      18.0    5.0       18.0      0      8.0      128.0    10.0        5.0
## 4       2.6    5.5        2.3      0      4.9        4.9     0.5        1.3
##   Myanmar Namibia Nauru Nepal Netherlands New.Zealand Nicaragua Niger Nigeria
## 1     5.0   376.0    49   5.0       251.0       203.0      78.0   3.0    42.0
## 2     1.0     3.0     0   6.0        88.0        79.0     118.0   2.0     5.0
## 3     0.0     1.0     8   0.0       190.0       175.0       1.0   1.0     2.0
## 4     0.1     6.8     1   0.2         9.4         9.3       3.5   0.1     9.1
##   Niue Norway Oman Pakistan Palau Panama Papua.New.Guinea Paraguay  Peru
## 1  188  169.0 22.0        0 306.0  285.0             44.0    213.0 163.0
## 2  200   71.0 16.0        0  63.0  104.0             39.0    117.0 160.0
## 3    7  129.0  1.0        0  23.0   18.0              1.0     74.0  21.0
## 4    7    6.7  0.7        0   6.9    7.2              1.5      7.3   6.1
##   Philippines Poland Portugal Qatar South.Korea Moldova Romania
## 1        71.0  343.0      194   1.0       140.0   109.0   297.0
## 2       186.0  215.0       67  42.0        16.0   226.0   122.0
## 3         1.0   56.0      339   7.0         9.0    18.0   167.0
## 4         4.6   10.9       11   0.9         9.8     6.3    10.4
##   Russian.Federation Rwanda St..Kitts...Nevis St..Lucia
## 1              247.0   43.0             194.0     171.0
## 2              326.0    2.0             205.0     315.0
## 3               73.0    0.0              32.0      71.0
## 4               11.5    6.8               7.7      10.1
##   St..Vincent...the.Grenadines Samoa San.Marino Sao.Tome...Principe
## 1                        120.0 105.0          0                56.0
## 2                        221.0  18.0          0                38.0
## 3                         11.0  24.0          0               140.0
## 4                          6.3   2.6          0                 4.2
##   Saudi.Arabia Senegal Serbia Seychelles Sierra.Leone Singapore Slovakia
## 1          0.0     9.0  283.0      157.0         25.0      60.0    196.0
## 2          5.0     1.0  131.0       25.0          3.0      12.0    293.0
## 3          0.0     7.0  127.0       51.0          2.0      11.0    116.0
## 4          0.1     0.3    9.6        4.1          6.7       1.5     11.4
##   Slovenia Solomon.Islands Somalia South.Africa Spain Sri.Lanka Sudan Suriname
## 1    270.0            56.0       0        225.0   284      16.0   8.0    128.0
## 2     51.0            11.0       0         76.0   157     104.0  13.0    178.0
## 3    276.0             1.0       0         81.0   112       0.0   0.0      7.0
## 4     10.6             1.2       0          8.2    10       2.2   1.7      5.6
##   Swaziland Sweden Switzerland Syria Tajikistan Thailand Macedonia Timor.Leste
## 1      90.0  152.0       185.0     5        2.0     99.0     106.0         1.0
## 2       2.0   60.0       100.0    35       15.0    258.0      27.0         1.0
## 3       2.0  186.0       280.0    16        0.0      1.0      86.0         4.0
## 4       4.7    7.2        10.2     1        0.3      6.4       3.9         0.1
##   Togo Tonga Trinidad...Tobago Tunisia Turkey Turkmenistan Tuvalu Uganda
## 1 36.0  36.0             197.0    51.0   51.0         19.0      6   45.0
## 2  2.0  21.0             156.0     3.0   22.0         71.0     41    9.0
## 3 19.0   5.0               7.0    20.0    7.0         32.0      9    0.0
## 4  1.3   1.1               6.4     1.3    1.4          2.2      1    8.3
##   Ukraine United.Arab.Emirates United.Kingdom Tanzania   USA Uruguay Uzbekistan
## 1   206.0                 16.0          219.0     36.0 249.0   115.0       25.0
## 2   237.0                135.0          126.0      6.0 158.0    35.0      101.0
## 3    45.0                  5.0          195.0      1.0  84.0   220.0        8.0
## 4     8.9                  2.8           10.4      5.7   8.7     6.6        2.4
##   Vanuatu Venezuela Vietnam Yemen Zambia Zimbabwe
## 1    21.0     333.0     111   6.0   32.0     64.0
## 2    18.0     100.0       2   0.0   19.0     18.0
## 3    11.0       3.0       1   0.0    4.0      4.0
## 4     0.9       7.7       2   0.1    2.5      4.7
library(dplyr)
library(reshape2)
df1<-melt(df,id="year")
df1 %>% glimpse
## Rows: 772
## Columns: 3
## $ year     <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value    <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
names(df1)[2:3]<-c("country","disease")
names(df1)
## [1] "year"    "country" "disease"
df1 %>% filter(year==2000) %>% summarize(m=mean(disease))
##          m
## 1 81.01036
df1 %>% filter(year==2000) %>% filter(disease>81.01036) %>% NROW->result
print(result)
## [1] 76
acast(T,day~month~variable)#행을 day, 열을 month로 각 변수들을 새롭게 배열
## , , ozone
## 
##      5  6   7   8  9
## 1   41 NA 135  39 96
## 2   36 NA  49   9 78
## 3   12 NA  32  16 73
## 4   18 NA  NA  78 91
## 5   NA NA  64  35 47
## 6   28 NA  40  66 32
## 7   23 29  77 122 20
## 8   19 NA  97  89 23
## 9    8 71  97 110 21
## 10  NA 39  85  NA 24
## 11   7 NA  NA  NA 44
## 12  16 NA  10  44 21
## 13  11 23  27  28 28
## 14  14 NA  NA  65  9
## 15  18 NA   7  NA 13
## 16  14 21  48  22 46
## 17  34 37  35  59 18
## 18   6 20  61  23 13
## 19  30 12  79  31 24
## 20  11 13  63  44 16
## 21   1 NA  16  21 13
## 22  11 NA  NA   9 23
## 23   4 NA  NA  NA 36
## 24  32 NA  80  45  7
## 25  NA NA 108 168 14
## 26  NA NA  20  73 30
## 27  NA NA  52  NA NA
## 28  23 NA  82  76 14
## 29  45 NA  50 118 18
## 30 115 NA  64  84 20
## 31  37 NA  59  85 NA
## 
## , , solar.r
## 
##      5   6   7   8   9
## 1  190 286 269  83 167
## 2  118 287 248  24 197
## 3  149 242 236  77 183
## 4  313 186 101  NA 189
## 5   NA 220 175  NA  95
## 6   NA 264 314  NA  92
## 7  299 127 276 255 252
## 8   99 273 267 229 220
## 9   19 291 272 207 230
## 10 194 323 175 222 259
## 11  NA 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157  24
## 15  65 322  48  64 112
## 16 334 191 260  71 237
## 17 307 284 274  51 224
## 18  78  37 285 115  27
## 19 322 120 187 244 238
## 20  44 137 220 190 201
## 21   8 150   7 259 238
## 22 320  59 258  36  14
## 23  25  91 295 255 139
## 24  92 250 294 212  49
## 25  66 135 223 238  20
## 26 266 127  81 215 193
## 27  NA  47  82 153 145
## 28  13  98 213 203 191
## 29 252  31 275 225 131
## 30 223 138 253 237 223
## 31 279  NA 254 188  NA
## 
## , , wind
## 
##       5    6    7    8    9
## 1   7.4  8.6  4.1  6.9  6.9
## 2   8.0  9.7  9.2 13.8  5.1
## 3  12.6 16.1  9.2  7.4  2.8
## 4  11.5  9.2 10.9  6.9  4.6
## 5  14.3  8.6  4.6  7.4  7.4
## 6  14.9 14.3 10.9  4.6 15.5
## 7   8.6  9.7  5.1  4.0 10.9
## 8  13.8  6.9  6.3 10.3 10.3
## 9  20.1 13.8  5.7  8.0 10.9
## 10  8.6 11.5  7.4  8.6  9.7
## 11  6.9 10.9  8.6 11.5 14.9
## 12  9.7  9.2 14.3 11.5 15.5
## 13  9.2  8.0 14.9 11.5  6.3
## 14 10.9 13.8 14.9  9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9  6.9 10.3  6.9
## 17 12.0 20.7 10.3  6.3 13.8
## 18 18.4  9.2  6.3  7.4 10.3
## 19 11.5 11.5  5.1 10.9 10.3
## 20  9.7 10.3 11.5 10.3  8.0
## 21  9.7  6.3  6.9 15.5 12.6
## 22 16.6  1.7  9.7 14.3  9.2
## 23  9.7  4.6 11.5 12.6 10.3
## 24 12.0  6.3  8.6  9.7 10.3
## 25 16.6  8.0  8.0  3.4 16.6
## 26 14.9  8.0  8.6  8.0  6.9
## 27  8.0 10.3 12.0  5.7 13.2
## 28 12.0 11.5  7.4  9.7 14.3
## 29 14.9 14.9  7.4  2.3  8.0
## 30  5.7  8.0  7.4  6.3 11.5
## 31  7.4   NA  9.2  6.3   NA
## 
## , , temp
## 
##     5  6  7  8  9
## 1  67 78 84 81 91
## 2  72 74 85 81 92
## 3  74 67 81 82 93
## 4  62 84 84 86 93
## 5  56 85 83 85 87
## 6  66 79 83 87 84
## 7  65 82 88 89 80
## 8  59 87 92 90 78
## 9  61 90 92 90 75
## 10 69 87 89 92 73
## 11 74 93 82 86 81
## 12 69 92 73 86 76
## 13 66 82 81 82 77
## 14 68 80 91 80 71
## 15 58 79 80 79 71
## 16 64 77 81 77 78
## 17 66 72 82 79 67
## 18 57 65 84 76 76
## 19 68 73 87 78 68
## 20 62 76 85 78 82
## 21 59 77 74 77 64
## 22 73 76 81 72 71
## 23 61 76 82 75 81
## 24 61 76 86 79 69
## 25 57 75 85 81 63
## 26 58 78 82 86 70
## 27 57 73 86 88 77
## 28 67 80 88 97 75
## 29 81 77 86 94 76
## 30 79 83 83 96 68
## 31 76 NA 81 94 NA
b<-acast(T,day~month~variable,mean)#각 변수들의 month 평균
b
## , , ozone
## 
##      5   6   7   8   9
## 1   41 NaN 135  39  96
## 2   36 NaN  49   9  78
## 3   12 NaN  32  16  73
## 4   18 NaN NaN  78  91
## 5  NaN NaN  64  35  47
## 6   28 NaN  40  66  32
## 7   23  29  77 122  20
## 8   19 NaN  97  89  23
## 9    8  71  97 110  21
## 10 NaN  39  85 NaN  24
## 11   7 NaN NaN NaN  44
## 12  16 NaN  10  44  21
## 13  11  23  27  28  28
## 14  14 NaN NaN  65   9
## 15  18 NaN   7 NaN  13
## 16  14  21  48  22  46
## 17  34  37  35  59  18
## 18   6  20  61  23  13
## 19  30  12  79  31  24
## 20  11  13  63  44  16
## 21   1 NaN  16  21  13
## 22  11 NaN NaN   9  23
## 23   4 NaN NaN NaN  36
## 24  32 NaN  80  45   7
## 25 NaN NaN 108 168  14
## 26 NaN NaN  20  73  30
## 27 NaN NaN  52 NaN NaN
## 28  23 NaN  82  76  14
## 29  45 NaN  50 118  18
## 30 115 NaN  64  84  20
## 31  37 NaN  59  85 NaN
## 
## , , solar.r
## 
##      5   6   7   8   9
## 1  190 286 269  83 167
## 2  118 287 248  24 197
## 3  149 242 236  77 183
## 4  313 186 101 NaN 189
## 5  NaN 220 175 NaN  95
## 6  NaN 264 314 NaN  92
## 7  299 127 276 255 252
## 8   99 273 267 229 220
## 9   19 291 272 207 230
## 10 194 323 175 222 259
## 11 NaN 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157  24
## 15  65 322  48  64 112
## 16 334 191 260  71 237
## 17 307 284 274  51 224
## 18  78  37 285 115  27
## 19 322 120 187 244 238
## 20  44 137 220 190 201
## 21   8 150   7 259 238
## 22 320  59 258  36  14
## 23  25  91 295 255 139
## 24  92 250 294 212  49
## 25  66 135 223 238  20
## 26 266 127  81 215 193
## 27 NaN  47  82 153 145
## 28  13  98 213 203 191
## 29 252  31 275 225 131
## 30 223 138 253 237 223
## 31 279 NaN 254 188 NaN
## 
## , , wind
## 
##       5    6    7    8    9
## 1   7.4  8.6  4.1  6.9  6.9
## 2   8.0  9.7  9.2 13.8  5.1
## 3  12.6 16.1  9.2  7.4  2.8
## 4  11.5  9.2 10.9  6.9  4.6
## 5  14.3  8.6  4.6  7.4  7.4
## 6  14.9 14.3 10.9  4.6 15.5
## 7   8.6  9.7  5.1  4.0 10.9
## 8  13.8  6.9  6.3 10.3 10.3
## 9  20.1 13.8  5.7  8.0 10.9
## 10  8.6 11.5  7.4  8.6  9.7
## 11  6.9 10.9  8.6 11.5 14.9
## 12  9.7  9.2 14.3 11.5 15.5
## 13  9.2  8.0 14.9 11.5  6.3
## 14 10.9 13.8 14.9  9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9  6.9 10.3  6.9
## 17 12.0 20.7 10.3  6.3 13.8
## 18 18.4  9.2  6.3  7.4 10.3
## 19 11.5 11.5  5.1 10.9 10.3
## 20  9.7 10.3 11.5 10.3  8.0
## 21  9.7  6.3  6.9 15.5 12.6
## 22 16.6  1.7  9.7 14.3  9.2
## 23  9.7  4.6 11.5 12.6 10.3
## 24 12.0  6.3  8.6  9.7 10.3
## 25 16.6  8.0  8.0  3.4 16.6
## 26 14.9  8.0  8.6  8.0  6.9
## 27  8.0 10.3 12.0  5.7 13.2
## 28 12.0 11.5  7.4  9.7 14.3
## 29 14.9 14.9  7.4  2.3  8.0
## 30  5.7  8.0  7.4  6.3 11.5
## 31  7.4  NaN  9.2  6.3  NaN
## 
## , , temp
## 
##     5   6  7  8   9
## 1  67  78 84 81  91
## 2  72  74 85 81  92
## 3  74  67 81 82  93
## 4  62  84 84 86  93
## 5  56  85 83 85  87
## 6  66  79 83 87  84
## 7  65  82 88 89  80
## 8  59  87 92 90  78
## 9  61  90 92 90  75
## 10 69  87 89 92  73
## 11 74  93 82 86  81
## 12 69  92 73 86  76
## 13 66  82 81 82  77
## 14 68  80 91 80  71
## 15 58  79 80 79  71
## 16 64  77 81 77  78
## 17 66  72 82 79  67
## 18 57  65 84 76  76
## 19 68  73 87 78  68
## 20 62  76 85 78  82
## 21 59  77 74 77  64
## 22 73  76 81 72  71
## 23 61  76 82 75  81
## 24 61  76 86 79  69
## 25 57  75 85 81  63
## 26 58  78 82 86  70
## 27 57  73 86 88  77
## 28 67  80 88 97  75
## 29 81  77 86 94  76
## 30 79  83 83 96  68
## 31 76 NaN 81 94 NaN
#plyr: 데이터 분리, 특정함수 적용, 결과 재결합
#1.apply
library(plyr)
a<-matrix(1:6,ncol=2)
a
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
apply(a,1,sum)
## [1] 5 7 9
apply(iris[,-5],2,sum)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9
colSums(iris[,-5])
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9
colMeans(iris[,-5])
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##     5.843333     3.057333     3.758000     1.199333
rowSums(iris[,-5])
##   [1] 10.2  9.5  9.4  9.4 10.2 11.4  9.7 10.1  8.9  9.6 10.8 10.0  9.3  8.5 11.2
##  [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7  9.4 10.6 10.3  9.8 10.4 10.4 10.2  9.7
##  [31]  9.7 10.7 10.9 11.3  9.7  9.6 10.5 10.0  8.9 10.2 10.1  8.4  9.1 10.7 11.2
##  [46]  9.5 10.7  9.4 10.7  9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
##  [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
##  [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
##  [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
## [106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
## [121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
## [136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
rowMeans(iris[,-5])
##   [1] 2.550 2.375 2.350 2.350 2.550 2.850 2.425 2.525 2.225 2.400 2.700 2.500
##  [13] 2.325 2.125 2.800 3.000 2.750 2.575 2.875 2.675 2.675 2.675 2.350 2.650
##  [25] 2.575 2.450 2.600 2.600 2.550 2.425 2.425 2.675 2.725 2.825 2.425 2.400
##  [37] 2.625 2.500 2.225 2.550 2.525 2.100 2.275 2.675 2.800 2.375 2.675 2.350
##  [49] 2.675 2.475 4.075 3.900 4.100 3.275 3.850 3.575 3.975 2.900 3.850 3.300
##  [61] 2.875 3.650 3.300 3.775 3.350 3.900 3.650 3.400 3.600 3.275 3.925 3.550
##  [73] 3.800 3.700 3.725 3.850 3.950 4.100 3.725 3.200 3.200 3.150 3.400 3.850
##  [85] 3.600 3.875 4.000 3.575 3.500 3.325 3.425 3.775 3.400 2.900 3.450 3.525
##  [97] 3.525 3.675 2.925 3.475 4.525 3.875 4.525 4.150 4.375 4.825 3.400 4.575
## [109] 4.200 4.850 4.200 4.075 4.350 3.800 4.025 4.300 4.200 5.100 4.875 3.675
## [121] 4.525 3.825 4.800 3.925 4.450 4.550 3.900 3.950 4.225 4.400 4.550 5.025
## [133] 4.250 3.925 3.925 4.775 4.425 4.200 3.900 4.375 4.450 4.350 3.875 4.550
## [145] 4.550 4.300 3.925 4.175 4.325 3.950
#데이터테이블

#데이터 기초통계
library(dplyr)
library(plyr)
data(iris)
head(iris)#데이터 앞6행
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)#데이터 구조 파악
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)#기초통계량 확인
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
cov(iris[,1:4])#공분산
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    0.6856935  -0.0424340    1.2743154   0.5162707
## Sepal.Width    -0.0424340   0.1899794   -0.3296564  -0.1216394
## Petal.Length    1.2743154  -0.3296564    3.1162779   1.2956094
## Petal.Width     0.5162707  -0.1216394    1.2956094   0.5810063
cor(iris[,1:4])#상관계수
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    1.0000000  -0.1175698    0.8717538   0.8179411
## Sepal.Width    -0.1175698   1.0000000   -0.4284401  -0.3661259
## Petal.Length    0.8717538  -0.4284401    1.0000000   0.9628654
## Petal.Width     0.8179411  -0.3661259    0.9628654   1.0000000
#이론 166p 기출유형 개념잡기 13번
library(ISLR)
data("Wage")
glimpse(Wage)
## Rows: 3,000
## Columns: 11
## $ year       <int> 2006, 2004, 2003, 2003, 2005, 2008, 2009, 2008, 2006, 2004,…
## $ age        <int> 18, 24, 45, 43, 50, 54, 44, 30, 41, 52, 45, 34, 35, 39, 54,…
## $ maritl     <fct> 1. Never Married, 1. Never Married, 2. Married, 2. Married,…
## $ race       <fct> 1. White, 1. White, 1. White, 3. Asian, 1. White, 1. White,…
## $ education  <fct> 1. < HS Grad, 4. College Grad, 3. Some College, 4. College …
## $ region     <fct> 2. Middle Atlantic, 2. Middle Atlantic, 2. Middle Atlantic,…
## $ jobclass   <fct> 1. Industrial, 2. Information, 1. Industrial, 2. Informatio…
## $ health     <fct> 1. <=Good, 2. >=Very Good, 1. <=Good, 2. >=Very Good, 1. <=…
## $ health_ins <fct> 2. No, 2. No, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Ye…
## $ logwage    <dbl> 4.318063, 4.255273, 4.875061, 5.041393, 4.318063, 4.845098,…
## $ wage       <dbl> 75.04315, 70.47602, 130.98218, 154.68529, 75.04315, 127.115…
summary(Wage)
##       year           age                     maritl           race     
##  Min.   :2003   Min.   :18.00   1. Never Married: 648   1. White:2480  
##  1st Qu.:2004   1st Qu.:33.75   2. Married      :2074   2. Black: 293  
##  Median :2006   Median :42.00   3. Widowed      :  19   3. Asian: 190  
##  Mean   :2006   Mean   :42.41   4. Divorced     : 204   4. Other:  37  
##  3rd Qu.:2008   3rd Qu.:51.00   5. Separated    :  55                  
##  Max.   :2009   Max.   :80.00                                          
##                                                                        
##               education                     region               jobclass   
##  1. < HS Grad      :268   2. Middle Atlantic   :3000   1. Industrial :1544  
##  2. HS Grad        :971   1. New England       :   0   2. Information:1456  
##  3. Some College   :650   3. East North Central:   0                        
##  4. College Grad   :685   4. West North Central:   0                        
##  5. Advanced Degree:426   5. South Atlantic    :   0                        
##                           6. East South Central:   0                        
##                           (Other)              :   0                        
##             health      health_ins      logwage           wage       
##  1. <=Good     : 858   1. Yes:2083   Min.   :3.000   Min.   : 20.09  
##  2. >=Very Good:2142   2. No : 917   1st Qu.:4.447   1st Qu.: 85.38  
##                                      Median :4.653   Median :104.92  
##                                      Mean   :4.654   Mean   :111.70  
##                                      3rd Qu.:4.857   3rd Qu.:128.68  
##                                      Max.   :5.763   Max.   :318.34  
## 
#결측값처리
library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)


data(french_fries)

french_fries[!complete.cases(french_fries),]
##     time treatment subject rep potato buttery grassy rancid painty
## 315    5         3      15   1     NA      NA     NA     NA     NA
## 455    7         2      79   1    7.3      NA    0.0    0.7      0
## 515    8         1      79   1   10.5      NA    0.0    0.5      0
## 520    8         2      16   1    4.5      NA    1.4    6.7      0
## 563    8         2      79   2    5.7       0    1.4    2.3     NA
#!complete.case()함수는 결측값(NA)만 반환
#complete.case()함수는 결측값을 FALSE반환

as.numeric(FALSE)
## [1] 0
as.logical(0.45)
## [1] TRUE
#문자열을 날짜로 변환
as.Date("2018-01-13")
## [1] "2018-01-13"
as.Date("01/13/2018",format="%m%d%Y")
## [1] NA

#R그래픽기능
#산점도
data("airquality")
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone   <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind    <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp    <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month   <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
plot(airquality$Ozone,airquality$Solar.R)

summary(airquality$Ozone)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   18.00   31.50   42.13   63.25  168.00      37
#산점도행렬
data(iris)
pairs(iris[1:4],main="Anderson's Iris Data -- 3 species",
      pch=21, bg=c("red","green3","blue")[unclass(iris$species)])

#히스토그램
#왜도 분자 = 평균값-중앙값=양수
library(dplyr)
height<-c(182,160,165,170,163,160,181,166,159,145,175)
hist(height)

summary(height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   145.0   160.0   165.0   166.0   172.5   182.0
describe(airquality$Ozone,na.rm=TRUE)
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 116 42.13 32.99   31.5    37.8 25.95   1 168   167 1.21     1.11 3.06
par(mfrow=c(1,2))

hist(iris$Petal.Length)

par(mfrow=c(1,1))

#상자그림

library(dplyr)
library(ggplot2)
library(psych)
library(hflights)
library(lubridate)
library(plyr)
library(reshape2)
library(tidyverse)
library(caret)
featurePlot(x=iris[,1:4],
            y=iris$Species,
            plot="density",
            scales=list(x=list(relation="free"),
                        y=list(relation="free")),
            adjust=1.5,
            pch="|",
            layout=c(4,1),
            auto.key=list(columns=3))

#Petal이 정확성 높아 가려낼 수 있음
par(mfrow=c(1,1))
boxplot(iris$Petal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Width~iris$Species,data=iris)

boxplot(iris$Petal.Width~iris$Species,data=iris)

data("Titanic")
library(dplyr)
glimpse(Titanic)
##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic,#데이터 입력
           main="Survival on the Titanic",#제목 설정
           calor=c("blue","green"
                   ),#색 지정
           off=1)#블럭 사이 간격 지정
## Warning: In mosaicplot.default(Titanic, main = "Survival on the Titanic", 
##     calor = c("blue", "green"), off = 1) :
##  extra argument 'calor' will be disregarded

#NA, 오류




#데이터마트
library(reshape2)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환
head(airquality)#airquality 데이터 앞 6행 보기
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
head(airquality,3)#선택한 데이터의 개수 정하기
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
library(dplyr)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))#변수 소문자로 변환
T<-melt(airquality,id=c("month","day"),na.rm=TRUE)#오류
head(T)
##   month day variable value
## 1     5   1    ozone    41
## 2     5   2    ozone    36
## 3     5   3    ozone    12
## 4     5   4    ozone    18
## 6     5   6    ozone    28
## 7     5   7    ozone    23
head(airquality)
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
T %>% group_by(month) %>%
  filter(variable=="ozone") %>%
  summarize(m=mean(value))
##          m
## 1 42.12931
library(lubridate)
library(plyr)

getwd()
## [1] "C:/Users/김지수/Desktop/2023/여름방학/AI빅데이터인력양성/adsp"
setwd("c:/data")
df<-read.csv("disease.csv")
df
##   year Afghanistan Albania Algeria Andorra Angola Antigua...Barbuda Argentina
## 1 1999           0    89.0    25.0   245.0  217.0             102.0     193.0
## 2 2000           0   132.0     0.0   138.0   57.0             128.0      25.0
## 3 2001           0    54.0    14.0   312.0   45.0              45.0     221.0
## 4 2002           0     4.9     0.7    12.4    5.9               4.9       8.3
##   Armenia Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados
## 1    21.0     261.0   279.0       21.0   122.0      42          0    143.0
## 2   179.0      72.0    75.0       46.0   176.0      63          0    173.0
## 3    11.0     212.0   191.0        5.0    51.0       7          0     36.0
## 4     3.8      10.4     9.7        1.3     6.3       2          0      6.3
##   Belarus Belgium Belize Benin Bhutan Bolivia Bosnia.Herzegovina Botswana
## 1   142.0   295.0  263.0  34.0   23.0   167.0               76.0    173.0
## 2   373.0    84.0  114.0   4.0    0.0    41.0              173.0     35.0
## 3    42.0   212.0    8.0  13.0    0.0     8.0                8.0     35.0
## 4    14.4    10.5    6.8   1.1    0.4     3.8                4.6      5.4
##   Brazil Brunei Bulgaria Burkina.Faso Burundi Cote.d.Ivoire Cabo.Verde Cambodia
## 1  245.0   31.0    231.0         25.0    88.0            37        144     57.0
## 2  145.0    2.0    252.0          7.0     0.0             1         56     65.0
## 3   16.0    1.0     94.0          7.0     0.0             7         16      1.0
## 4    7.2    0.6     10.3          4.3     6.3             4          4      2.2
##   Cameroon Canada Central.African.Republic Chad Chile China Colombia Comoros
## 1    147.0  240.0                     17.0 15.0 130.0    79    159.0     1.0
## 2      1.0  122.0                      2.0  1.0 124.0   192     76.0     3.0
## 3      4.0  100.0                      1.0  1.0 172.0     8      3.0     1.0
## 4      5.8    8.2                      1.8  0.4   7.6     5      4.2     0.1
##   Congo Cook.Islands Costa.Rica Croatia  Cuba Cyprus Czech.Republic North.Korea
## 1  76.0          0.0      149.0   230.0  93.0  192.0          361.0           0
## 2   1.0        254.0       87.0    87.0 137.0  154.0          170.0           0
## 3   9.0         74.0       11.0   254.0   5.0  113.0          134.0           0
## 4   1.7          5.9        4.4    10.2   4.2    8.2           11.8           0
##   DR.Congo Denmark Djibouti Dominica Dominican.Republic Ecuador Egypt
## 1     32.0   224.0     15.0     52.0              193.0   162.0   6.0
## 2      3.0    81.0     44.0    286.0              147.0    74.0   4.0
## 3      1.0   278.0      3.0     26.0                9.0     3.0   1.0
## 4      2.3    10.4      1.1      6.6                6.2     4.2   0.2
##   El.Salvador Equatorial.Guinea Eritrea Estonia Ethiopia Fiji Finland France
## 1        52.0              92.0    18.0   224.0     20.0   77     263  127.0
## 2        69.0               0.0     0.0   194.0      3.0   35     133  151.0
## 3         2.0             233.0     0.0    59.0      0.0    1      97  370.0
## 4         2.2               5.8     0.5     9.5      0.7    2      10   11.8
##   Gabon Gambia Georgia Germany Ghana Greece Grenada Guatemala Guinea
## 1 347.0    8.0    52.0   346.0  31.0  133.0   199.0      53.0    9.0
## 2  98.0    0.0   100.0   117.0   3.0  112.0   438.0      69.0    0.0
## 3  59.0    1.0   149.0   175.0  10.0  218.0    28.0       2.0    2.0
## 4   8.9    2.4     5.4    11.3   1.8    8.3    11.9       2.2    0.2
##   Guinea.Bissau Guyana Haiti Honduras Hungary Iceland India Indonesia Iran Iraq
## 1          28.0   93.0   1.0       69   234.0   233.0   9.0       5.0    0  9.0
## 2          31.0  302.0 326.0       98   215.0    61.0 114.0       1.0    0  3.0
## 3          21.0    1.0   1.0        2   185.0    78.0   0.0       0.0    0  0.0
## 4           2.5    7.1   5.9        3    11.3     6.6   2.2       0.1    0  0.2
##   Ireland Israel Italy Jamaica Japan Jordan Kazakhstan Kenya Kiribati Kuwait
## 1   313.0   63.0  85.0    82.0    77    6.0      124.0  58.0       21      0
## 2   118.0   69.0  42.0    88.0   202   21.0      246.0  22.0       34      0
## 3   165.0    9.0 237.0     9.0    16    1.0       12.0   2.0        1      0
## 4    11.4    2.5   6.5     3.4     7    0.5        6.8   1.8        1      0
##   Kyrgyzstan  Laos Latvia Lebanon Lesotho Liberia Libya Lithuania Luxembourg
## 1       31.0  62.0  281.0    20.0    82.0    19.0     0     343.0      236.0
## 2       88.0   0.0  216.0    55.0    50.0   152.0     0     244.0      133.0
## 3        6.0 123.0   62.0    31.0     0.0     2.0     0      56.0      271.0
## 4        2.4   6.2   10.5     1.9     2.8     3.1     0      12.9       11.4
##   Madagascar Malawi Malaysia Maldives Mali Malta Marshall.Islands Mauritania
## 1       26.0    8.0     13.0        0  5.0 149.0                0          0
## 2       15.0   11.0      4.0        0  1.0 100.0                0          0
## 3        4.0    1.0      0.0        0  1.0 120.0                0          0
## 4        0.8    1.5      0.3        0  0.6   6.6                0          0
##   Mauritius Mexico Micronesia Monaco Mongolia Montenegro Morocco Mozambique
## 1      98.0  238.0       62.0      0     77.0       31.0    12.0       47.0
## 2      31.0   68.0       50.0      0    189.0      114.0     6.0       18.0
## 3      18.0    5.0       18.0      0      8.0      128.0    10.0        5.0
## 4       2.6    5.5        2.3      0      4.9        4.9     0.5        1.3
##   Myanmar Namibia Nauru Nepal Netherlands New.Zealand Nicaragua Niger Nigeria
## 1     5.0   376.0    49   5.0       251.0       203.0      78.0   3.0    42.0
## 2     1.0     3.0     0   6.0        88.0        79.0     118.0   2.0     5.0
## 3     0.0     1.0     8   0.0       190.0       175.0       1.0   1.0     2.0
## 4     0.1     6.8     1   0.2         9.4         9.3       3.5   0.1     9.1
##   Niue Norway Oman Pakistan Palau Panama Papua.New.Guinea Paraguay  Peru
## 1  188  169.0 22.0        0 306.0  285.0             44.0    213.0 163.0
## 2  200   71.0 16.0        0  63.0  104.0             39.0    117.0 160.0
## 3    7  129.0  1.0        0  23.0   18.0              1.0     74.0  21.0
## 4    7    6.7  0.7        0   6.9    7.2              1.5      7.3   6.1
##   Philippines Poland Portugal Qatar South.Korea Moldova Romania
## 1        71.0  343.0      194   1.0       140.0   109.0   297.0
## 2       186.0  215.0       67  42.0        16.0   226.0   122.0
## 3         1.0   56.0      339   7.0         9.0    18.0   167.0
## 4         4.6   10.9       11   0.9         9.8     6.3    10.4
##   Russian.Federation Rwanda St..Kitts...Nevis St..Lucia
## 1              247.0   43.0             194.0     171.0
## 2              326.0    2.0             205.0     315.0
## 3               73.0    0.0              32.0      71.0
## 4               11.5    6.8               7.7      10.1
##   St..Vincent...the.Grenadines Samoa San.Marino Sao.Tome...Principe
## 1                        120.0 105.0          0                56.0
## 2                        221.0  18.0          0                38.0
## 3                         11.0  24.0          0               140.0
## 4                          6.3   2.6          0                 4.2
##   Saudi.Arabia Senegal Serbia Seychelles Sierra.Leone Singapore Slovakia
## 1          0.0     9.0  283.0      157.0         25.0      60.0    196.0
## 2          5.0     1.0  131.0       25.0          3.0      12.0    293.0
## 3          0.0     7.0  127.0       51.0          2.0      11.0    116.0
## 4          0.1     0.3    9.6        4.1          6.7       1.5     11.4
##   Slovenia Solomon.Islands Somalia South.Africa Spain Sri.Lanka Sudan Suriname
## 1    270.0            56.0       0        225.0   284      16.0   8.0    128.0
## 2     51.0            11.0       0         76.0   157     104.0  13.0    178.0
## 3    276.0             1.0       0         81.0   112       0.0   0.0      7.0
## 4     10.6             1.2       0          8.2    10       2.2   1.7      5.6
##   Swaziland Sweden Switzerland Syria Tajikistan Thailand Macedonia Timor.Leste
## 1      90.0  152.0       185.0     5        2.0     99.0     106.0         1.0
## 2       2.0   60.0       100.0    35       15.0    258.0      27.0         1.0
## 3       2.0  186.0       280.0    16        0.0      1.0      86.0         4.0
## 4       4.7    7.2        10.2     1        0.3      6.4       3.9         0.1
##   Togo Tonga Trinidad...Tobago Tunisia Turkey Turkmenistan Tuvalu Uganda
## 1 36.0  36.0             197.0    51.0   51.0         19.0      6   45.0
## 2  2.0  21.0             156.0     3.0   22.0         71.0     41    9.0
## 3 19.0   5.0               7.0    20.0    7.0         32.0      9    0.0
## 4  1.3   1.1               6.4     1.3    1.4          2.2      1    8.3
##   Ukraine United.Arab.Emirates United.Kingdom Tanzania   USA Uruguay Uzbekistan
## 1   206.0                 16.0          219.0     36.0 249.0   115.0       25.0
## 2   237.0                135.0          126.0      6.0 158.0    35.0      101.0
## 3    45.0                  5.0          195.0      1.0  84.0   220.0        8.0
## 4     8.9                  2.8           10.4      5.7   8.7     6.6        2.4
##   Vanuatu Venezuela Vietnam Yemen Zambia Zimbabwe
## 1    21.0     333.0     111   6.0   32.0     64.0
## 2    18.0     100.0       2   0.0   19.0     18.0
## 3    11.0       3.0       1   0.0    4.0      4.0
## 4     0.9       7.7       2   0.1    2.5      4.7
library(dplyr)
library(reshape2)
df1<-melt(df,id="year")
df1 %>% glimpse
## Rows: 772
## Columns: 3
## $ year     <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value    <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
names(df1)[2:3]<-c("country","disease")
names(df1)
## [1] "year"    "country" "disease"
df1 %>% filter(year==2000) %>% summarize(m=mean(disease))
##          m
## 1 81.01036
df1 %>% filter(year==2000) %>% filter(disease>81.01036) %>% NROW->result
print(result)
## [1] 76
#~제출

acast(T,day~month~variable)#행을 day, 열을 month로 각 변수들을 새롭게 배열
## , , ozone
## 
##      5  6   7   8  9
## 1   41 NA 135  39 96
## 2   36 NA  49   9 78
## 3   12 NA  32  16 73
## 4   18 NA  NA  78 91
## 5   NA NA  64  35 47
## 6   28 NA  40  66 32
## 7   23 29  77 122 20
## 8   19 NA  97  89 23
## 9    8 71  97 110 21
## 10  NA 39  85  NA 24
## 11   7 NA  NA  NA 44
## 12  16 NA  10  44 21
## 13  11 23  27  28 28
## 14  14 NA  NA  65  9
## 15  18 NA   7  NA 13
## 16  14 21  48  22 46
## 17  34 37  35  59 18
## 18   6 20  61  23 13
## 19  30 12  79  31 24
## 20  11 13  63  44 16
## 21   1 NA  16  21 13
## 22  11 NA  NA   9 23
## 23   4 NA  NA  NA 36
## 24  32 NA  80  45  7
## 25  NA NA 108 168 14
## 26  NA NA  20  73 30
## 27  NA NA  52  NA NA
## 28  23 NA  82  76 14
## 29  45 NA  50 118 18
## 30 115 NA  64  84 20
## 31  37 NA  59  85 NA
## 
## , , solar.r
## 
##      5   6   7   8   9
## 1  190 286 269  83 167
## 2  118 287 248  24 197
## 3  149 242 236  77 183
## 4  313 186 101  NA 189
## 5   NA 220 175  NA  95
## 6   NA 264 314  NA  92
## 7  299 127 276 255 252
## 8   99 273 267 229 220
## 9   19 291 272 207 230
## 10 194 323 175 222 259
## 11  NA 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157  24
## 15  65 322  48  64 112
## 16 334 191 260  71 237
## 17 307 284 274  51 224
## 18  78  37 285 115  27
## 19 322 120 187 244 238
## 20  44 137 220 190 201
## 21   8 150   7 259 238
## 22 320  59 258  36  14
## 23  25  91 295 255 139
## 24  92 250 294 212  49
## 25  66 135 223 238  20
## 26 266 127  81 215 193
## 27  NA  47  82 153 145
## 28  13  98 213 203 191
## 29 252  31 275 225 131
## 30 223 138 253 237 223
## 31 279  NA 254 188  NA
## 
## , , wind
## 
##       5    6    7    8    9
## 1   7.4  8.6  4.1  6.9  6.9
## 2   8.0  9.7  9.2 13.8  5.1
## 3  12.6 16.1  9.2  7.4  2.8
## 4  11.5  9.2 10.9  6.9  4.6
## 5  14.3  8.6  4.6  7.4  7.4
## 6  14.9 14.3 10.9  4.6 15.5
## 7   8.6  9.7  5.1  4.0 10.9
## 8  13.8  6.9  6.3 10.3 10.3
## 9  20.1 13.8  5.7  8.0 10.9
## 10  8.6 11.5  7.4  8.6  9.7
## 11  6.9 10.9  8.6 11.5 14.9
## 12  9.7  9.2 14.3 11.5 15.5
## 13  9.2  8.0 14.9 11.5  6.3
## 14 10.9 13.8 14.9  9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9  6.9 10.3  6.9
## 17 12.0 20.7 10.3  6.3 13.8
## 18 18.4  9.2  6.3  7.4 10.3
## 19 11.5 11.5  5.1 10.9 10.3
## 20  9.7 10.3 11.5 10.3  8.0
## 21  9.7  6.3  6.9 15.5 12.6
## 22 16.6  1.7  9.7 14.3  9.2
## 23  9.7  4.6 11.5 12.6 10.3
## 24 12.0  6.3  8.6  9.7 10.3
## 25 16.6  8.0  8.0  3.4 16.6
## 26 14.9  8.0  8.6  8.0  6.9
## 27  8.0 10.3 12.0  5.7 13.2
## 28 12.0 11.5  7.4  9.7 14.3
## 29 14.9 14.9  7.4  2.3  8.0
## 30  5.7  8.0  7.4  6.3 11.5
## 31  7.4   NA  9.2  6.3   NA
## 
## , , temp
## 
##     5  6  7  8  9
## 1  67 78 84 81 91
## 2  72 74 85 81 92
## 3  74 67 81 82 93
## 4  62 84 84 86 93
## 5  56 85 83 85 87
## 6  66 79 83 87 84
## 7  65 82 88 89 80
## 8  59 87 92 90 78
## 9  61 90 92 90 75
## 10 69 87 89 92 73
## 11 74 93 82 86 81
## 12 69 92 73 86 76
## 13 66 82 81 82 77
## 14 68 80 91 80 71
## 15 58 79 80 79 71
## 16 64 77 81 77 78
## 17 66 72 82 79 67
## 18 57 65 84 76 76
## 19 68 73 87 78 68
## 20 62 76 85 78 82
## 21 59 77 74 77 64
## 22 73 76 81 72 71
## 23 61 76 82 75 81
## 24 61 76 86 79 69
## 25 57 75 85 81 63
## 26 58 78 82 86 70
## 27 57 73 86 88 77
## 28 67 80 88 97 75
## 29 81 77 86 94 76
## 30 79 83 83 96 68
## 31 76 NA 81 94 NA
b<-acast(T,day~month~variable,mean)#각 변수들의 month 평균
b
## , , ozone
## 
##      5   6   7   8   9
## 1   41 NaN 135  39  96
## 2   36 NaN  49   9  78
## 3   12 NaN  32  16  73
## 4   18 NaN NaN  78  91
## 5  NaN NaN  64  35  47
## 6   28 NaN  40  66  32
## 7   23  29  77 122  20
## 8   19 NaN  97  89  23
## 9    8  71  97 110  21
## 10 NaN  39  85 NaN  24
## 11   7 NaN NaN NaN  44
## 12  16 NaN  10  44  21
## 13  11  23  27  28  28
## 14  14 NaN NaN  65   9
## 15  18 NaN   7 NaN  13
## 16  14  21  48  22  46
## 17  34  37  35  59  18
## 18   6  20  61  23  13
## 19  30  12  79  31  24
## 20  11  13  63  44  16
## 21   1 NaN  16  21  13
## 22  11 NaN NaN   9  23
## 23   4 NaN NaN NaN  36
## 24  32 NaN  80  45   7
## 25 NaN NaN 108 168  14
## 26 NaN NaN  20  73  30
## 27 NaN NaN  52 NaN NaN
## 28  23 NaN  82  76  14
## 29  45 NaN  50 118  18
## 30 115 NaN  64  84  20
## 31  37 NaN  59  85 NaN
## 
## , , solar.r
## 
##      5   6   7   8   9
## 1  190 286 269  83 167
## 2  118 287 248  24 197
## 3  149 242 236  77 183
## 4  313 186 101 NaN 189
## 5  NaN 220 175 NaN  95
## 6  NaN 264 314 NaN  92
## 7  299 127 276 255 252
## 8   99 273 267 229 220
## 9   19 291 272 207 230
## 10 194 323 175 222 259
## 11 NaN 259 139 137 236
## 12 256 250 264 192 259
## 13 290 148 175 273 238
## 14 274 332 291 157  24
## 15  65 322  48  64 112
## 16 334 191 260  71 237
## 17 307 284 274  51 224
## 18  78  37 285 115  27
## 19 322 120 187 244 238
## 20  44 137 220 190 201
## 21   8 150   7 259 238
## 22 320  59 258  36  14
## 23  25  91 295 255 139
## 24  92 250 294 212  49
## 25  66 135 223 238  20
## 26 266 127  81 215 193
## 27 NaN  47  82 153 145
## 28  13  98 213 203 191
## 29 252  31 275 225 131
## 30 223 138 253 237 223
## 31 279 NaN 254 188 NaN
## 
## , , wind
## 
##       5    6    7    8    9
## 1   7.4  8.6  4.1  6.9  6.9
## 2   8.0  9.7  9.2 13.8  5.1
## 3  12.6 16.1  9.2  7.4  2.8
## 4  11.5  9.2 10.9  6.9  4.6
## 5  14.3  8.6  4.6  7.4  7.4
## 6  14.9 14.3 10.9  4.6 15.5
## 7   8.6  9.7  5.1  4.0 10.9
## 8  13.8  6.9  6.3 10.3 10.3
## 9  20.1 13.8  5.7  8.0 10.9
## 10  8.6 11.5  7.4  8.6  9.7
## 11  6.9 10.9  8.6 11.5 14.9
## 12  9.7  9.2 14.3 11.5 15.5
## 13  9.2  8.0 14.9 11.5  6.3
## 14 10.9 13.8 14.9  9.7 10.9
## 15 13.2 11.5 14.3 11.5 11.5
## 16 11.5 14.9  6.9 10.3  6.9
## 17 12.0 20.7 10.3  6.3 13.8
## 18 18.4  9.2  6.3  7.4 10.3
## 19 11.5 11.5  5.1 10.9 10.3
## 20  9.7 10.3 11.5 10.3  8.0
## 21  9.7  6.3  6.9 15.5 12.6
## 22 16.6  1.7  9.7 14.3  9.2
## 23  9.7  4.6 11.5 12.6 10.3
## 24 12.0  6.3  8.6  9.7 10.3
## 25 16.6  8.0  8.0  3.4 16.6
## 26 14.9  8.0  8.6  8.0  6.9
## 27  8.0 10.3 12.0  5.7 13.2
## 28 12.0 11.5  7.4  9.7 14.3
## 29 14.9 14.9  7.4  2.3  8.0
## 30  5.7  8.0  7.4  6.3 11.5
## 31  7.4  NaN  9.2  6.3  NaN
## 
## , , temp
## 
##     5   6  7  8   9
## 1  67  78 84 81  91
## 2  72  74 85 81  92
## 3  74  67 81 82  93
## 4  62  84 84 86  93
## 5  56  85 83 85  87
## 6  66  79 83 87  84
## 7  65  82 88 89  80
## 8  59  87 92 90  78
## 9  61  90 92 90  75
## 10 69  87 89 92  73
## 11 74  93 82 86  81
## 12 69  92 73 86  76
## 13 66  82 81 82  77
## 14 68  80 91 80  71
## 15 58  79 80 79  71
## 16 64  77 81 77  78
## 17 66  72 82 79  67
## 18 57  65 84 76  76
## 19 68  73 87 78  68
## 20 62  76 85 78  82
## 21 59  77 74 77  64
## 22 73  76 81 72  71
## 23 61  76 82 75  81
## 24 61  76 86 79  69
## 25 57  75 85 81  63
## 26 58  78 82 86  70
## 27 57  73 86 88  77
## 28 67  80 88 97  75
## 29 81  77 86 94  76
## 30 79  83 83 96  68
## 31 76 NaN 81 94 NaN
#plyr: 데이터 분리, 특정함수 적용, 결과 재결합
#1.apply
library(plyr)
a<-matrix(1:6,ncol=2)
a
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
apply(a,1,sum)
## [1] 5 7 9
apply(iris[,-5],2,sum)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9
colSums(iris[,-5])
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9
colMeans(iris[,-5])
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##     5.843333     3.057333     3.758000     1.199333
rowSums(iris[,-5])
##   [1] 10.2  9.5  9.4  9.4 10.2 11.4  9.7 10.1  8.9  9.6 10.8 10.0  9.3  8.5 11.2
##  [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7  9.4 10.6 10.3  9.8 10.4 10.4 10.2  9.7
##  [31]  9.7 10.7 10.9 11.3  9.7  9.6 10.5 10.0  8.9 10.2 10.1  8.4  9.1 10.7 11.2
##  [46]  9.5 10.7  9.4 10.7  9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
##  [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
##  [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
##  [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
## [106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
## [121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
## [136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
rowMeans(iris[,-5])
##   [1] 2.550 2.375 2.350 2.350 2.550 2.850 2.425 2.525 2.225 2.400 2.700 2.500
##  [13] 2.325 2.125 2.800 3.000 2.750 2.575 2.875 2.675 2.675 2.675 2.350 2.650
##  [25] 2.575 2.450 2.600 2.600 2.550 2.425 2.425 2.675 2.725 2.825 2.425 2.400
##  [37] 2.625 2.500 2.225 2.550 2.525 2.100 2.275 2.675 2.800 2.375 2.675 2.350
##  [49] 2.675 2.475 4.075 3.900 4.100 3.275 3.850 3.575 3.975 2.900 3.850 3.300
##  [61] 2.875 3.650 3.300 3.775 3.350 3.900 3.650 3.400 3.600 3.275 3.925 3.550
##  [73] 3.800 3.700 3.725 3.850 3.950 4.100 3.725 3.200 3.200 3.150 3.400 3.850
##  [85] 3.600 3.875 4.000 3.575 3.500 3.325 3.425 3.775 3.400 2.900 3.450 3.525
##  [97] 3.525 3.675 2.925 3.475 4.525 3.875 4.525 4.150 4.375 4.825 3.400 4.575
## [109] 4.200 4.850 4.200 4.075 4.350 3.800 4.025 4.300 4.200 5.100 4.875 3.675
## [121] 4.525 3.825 4.800 3.925 4.450 4.550 3.900 3.950 4.225 4.400 4.550 5.025
## [133] 4.250 3.925 3.925 4.775 4.425 4.200 3.900 4.375 4.450 4.350 3.875 4.550
## [145] 4.550 4.300 3.925 4.175 4.325 3.950
#데이터테이블

#데이터 기초통계
library(dplyr)
library(plyr)
data(iris)
head(iris)#데이터 앞6행
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)#데이터 구조 파악
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)#기초통계량 확인
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
cov(iris[,1:4])#공분산
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    0.6856935  -0.0424340    1.2743154   0.5162707
## Sepal.Width    -0.0424340   0.1899794   -0.3296564  -0.1216394
## Petal.Length    1.2743154  -0.3296564    3.1162779   1.2956094
## Petal.Width     0.5162707  -0.1216394    1.2956094   0.5810063
cor(iris[,1:4])#상관계수
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    1.0000000  -0.1175698    0.8717538   0.8179411
## Sepal.Width    -0.1175698   1.0000000   -0.4284401  -0.3661259
## Petal.Length    0.8717538  -0.4284401    1.0000000   0.9628654
## Petal.Width     0.8179411  -0.3661259    0.9628654   1.0000000
#이론 166p 기출유형 개념잡기 13번
library(ISLR)
data("Wage")
glimpse(Wage)
## Rows: 3,000
## Columns: 11
## $ year       <int> 2006, 2004, 2003, 2003, 2005, 2008, 2009, 2008, 2006, 2004,…
## $ age        <int> 18, 24, 45, 43, 50, 54, 44, 30, 41, 52, 45, 34, 35, 39, 54,…
## $ maritl     <fct> 1. Never Married, 1. Never Married, 2. Married, 2. Married,…
## $ race       <fct> 1. White, 1. White, 1. White, 3. Asian, 1. White, 1. White,…
## $ education  <fct> 1. < HS Grad, 4. College Grad, 3. Some College, 4. College …
## $ region     <fct> 2. Middle Atlantic, 2. Middle Atlantic, 2. Middle Atlantic,…
## $ jobclass   <fct> 1. Industrial, 2. Information, 1. Industrial, 2. Informatio…
## $ health     <fct> 1. <=Good, 2. >=Very Good, 1. <=Good, 2. >=Very Good, 1. <=…
## $ health_ins <fct> 2. No, 2. No, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Ye…
## $ logwage    <dbl> 4.318063, 4.255273, 4.875061, 5.041393, 4.318063, 4.845098,…
## $ wage       <dbl> 75.04315, 70.47602, 130.98218, 154.68529, 75.04315, 127.115…
summary(Wage)
##       year           age                     maritl           race     
##  Min.   :2003   Min.   :18.00   1. Never Married: 648   1. White:2480  
##  1st Qu.:2004   1st Qu.:33.75   2. Married      :2074   2. Black: 293  
##  Median :2006   Median :42.00   3. Widowed      :  19   3. Asian: 190  
##  Mean   :2006   Mean   :42.41   4. Divorced     : 204   4. Other:  37  
##  3rd Qu.:2008   3rd Qu.:51.00   5. Separated    :  55                  
##  Max.   :2009   Max.   :80.00                                          
##                                                                        
##               education                     region               jobclass   
##  1. < HS Grad      :268   2. Middle Atlantic   :3000   1. Industrial :1544  
##  2. HS Grad        :971   1. New England       :   0   2. Information:1456  
##  3. Some College   :650   3. East North Central:   0                        
##  4. College Grad   :685   4. West North Central:   0                        
##  5. Advanced Degree:426   5. South Atlantic    :   0                        
##                           6. East South Central:   0                        
##                           (Other)              :   0                        
##             health      health_ins      logwage           wage       
##  1. <=Good     : 858   1. Yes:2083   Min.   :3.000   Min.   : 20.09  
##  2. >=Very Good:2142   2. No : 917   1st Qu.:4.447   1st Qu.: 85.38  
##                                      Median :4.653   Median :104.92  
##                                      Mean   :4.654   Mean   :111.70  
##                                      3rd Qu.:4.857   3rd Qu.:128.68  
##                                      Max.   :5.763   Max.   :318.34  
## 
#결측값처리
library(reshape)
## 
## 다음의 패키지를 부착합니다: 'reshape'
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
## The following objects are masked from 'package:reshape2':
## 
##     colsplit, melt, recast
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any
## The following object is masked from 'package:lubridate':
## 
##     stamp
## The following object is masked from 'package:dplyr':
## 
##     rename
data(french_fries)

french_fries[!complete.cases(french_fries),]
##     time treatment subject rep potato buttery grassy rancid painty
## 315    5         3      15   1     NA      NA     NA     NA     NA
## 455    7         2      79   1    7.3      NA    0.0    0.7      0
## 515    8         1      79   1   10.5      NA    0.0    0.5      0
## 520    8         2      16   1    4.5      NA    1.4    6.7      0
## 563    8         2      79   2    5.7       0    1.4    2.3     NA
#!complete.case()함수는 결측값(NA)만 반환
#complete.case()함수는 결측값을 FALSE반환환