#2023.07.18
#정답은 NULL
sqrt(-3)
## Warning in sqrt(-3): NaN이 생성되었습니다
## [1] NaN
0/0
## [1] NaN
x<-c()
x
## NULL
set.seed(1) #가끔 시험에 나옴, 초기 난수-> 랜덤값이 같은 값으로 지정됨
sample(1:45,6)
## [1] 4 39 1 34 23 14
m<-matrix(1:6,nrow=3)
m[m[,1]>1&m[,2]>5,]
## [1] 3 6
m
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
m[,1]>1
## [1] FALSE TRUE TRUE
m[,2]>5
## [1] FALSE FALSE TRUE
c<-1:10
print(round(sd(c),2)) #소수 두째짜리까지 나타내시오
## [1] 3.03
round(sd(c),2) #위에랑 같음
## [1] 3.03
data("airquality")
colSums(is.na(airquality))
## Ozone Solar.R Wind Temp Month Day
## 37 7 0 0 0 0
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
as.numeric(FALSE)
## [1] 0
as.numeric("foo")
## Warning: 강제형변환에 의해 생성된 NA 입니다
## [1] NA
as.logical(0.45)
## [1] TRUE
as.Date("2018-01-13")
## [1] "2018-01-13"
as.Date("01/13/2018",format="%m%d%Y")
## [1] NA
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data("airquality")
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
plot(airquality$Ozone,airquality$Solar.R)

#히스토그램
hist(airquality$Ozone,na.rm=TRUE)
## Warning in plot.window(xlim, ylim, "", ...): "na.rm"는 그래픽 매개변수가
## 아닙니다
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(1, ...): "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(2, at = yt, ...): "na.rm"는 그래픽 매개변수가 아닙니다
summary(airquality$Ozone)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 18.00 31.50 42.13 63.25 168.00 37
#왜도 분자-> 평균값-중앙값 >0 무조건 0보다 크다 ->오른쪽 꼬리 분포 경우
library(psych)

describe(airquality$Ozone,na.rm=TRUE)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 116 42.13 32.99 31.5 37.8 25.95 1 168 167 1.21 1.11 3.06
data(iris)
glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
pairs(iris[1:4], main="Anderson's Iris Data -- 3 species", pch=21, bg=c("red","green3","blue")[unclass(iris$species)])

hist(iris$Petal.Length)

par(mfrow=c(1,2)) #한 화면에 2개 그래프
hist(airquality$Ozone,na.rm=TRUE)
## Warning in plot.window(xlim, ylim, "", ...): "na.rm"는 그래픽 매개변수가
## 아닙니다
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(1, ...): "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(2, at = yt, ...): "na.rm"는 그래픽 매개변수가 아닙니다
hist(iris$Petal.Length)

library(hflights)
data("hflights")
glimpse(hflights)
## Rows: 227,496
## Columns: 21
## $ Year <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011…
## $ Month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ DayOfWeek <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2…
## $ DepTime <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1443…
## $ ArrTime <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1554…
## $ UniqueCarrier <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"…
## $ FlightNum <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428, 42…
## $ TailNum <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA", "N…
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 63, …
## $ AirTime <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 44, …
## $ ArrDelay <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -9, …
## $ DepDelay <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -3, …
## $ Origin <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IA…
## $ Dest <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DF…
## $ Distance <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 22…
## $ TaxiIn <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12, 8,…
## $ TaxiOut <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13, 15…
## $ Cancelled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", …
## $ Diverted <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
par(mfrow=c(2,2))
par(mfrow=c(1,1))
boxplot(iris$Petal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Width~iris$Species,data=iris)

data("Titanic")
glimpse(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic, #데이터 입력
main="Survival on the Titanic", #제목 설정
color=c("red","green"), #색 지정
off=1) #블럭들 사이의 간격 지정

library(reshape2)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))
T<-melt(airquality,id=c("month","day"),na.rm=TRUE)
head(T)
## month day variable value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 6 5 6 ozone 28
## 7 5 7 ozone 23
head(airquality)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
T%>%group_by(month)%>% filter(variable=="ozone")%>%
summarize(m=mean(value))
## # A tibble: 5 × 2
## month m
## <int> <dbl>
## 1 5 23.6
## 2 6 29.4
## 3 7 59.1
## 4 8 60.0
## 5 9 31.4
names(airquality)
## [1] "ozone" "solar.r" "wind" "temp" "month" "day"
T%>%group_by(month)%>%filter(variable=="temp")%>%
summarize(m=mean(value))
## # A tibble: 5 × 2
## month m
## <int> <dbl>
## 1 5 65.5
## 2 6 79.1
## 3 7 83.9
## 4 8 84.0
## 5 9 76.9
library(reshape)
##
## 다음의 패키지를 부착합니다: 'reshape'
## The following objects are masked from 'package:reshape2':
##
## colsplit, melt, recast
## The following object is masked from 'package:dplyr':
##
## rename
data("disease")
## Warning in data("disease"): 데이터셋 'disease'을 찾을 수 없습니다
#데이터 받고 나서*****************************************
getwd()
## [1] "C:/data"
setwd("c:/data")
df<-read.csv("disease.csv")
#%>%여기에서 오류가 떠서 dplyr 다시 설치 하니까 실행됨
library(dplyr)
df1<-melt(df,id="year")
df1%>%glimpse
## Rows: 772
## Columns: 3
## $ year <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
colSums(is.na(df1))
## year variable value
## 0 0 0
names(df1)[2:3]<-c("country","disease")
names(df1)
## [1] "year" "country" "disease"
df1%>% filter(year==2000)%>%summarize(m=mean(disease))
## m
## 1 81.01036
df1%>%filter(year==2000)%>% filter(disease>81.01036)%>%NROW->result
print(result)
## [1] 76
apply(iris[,-5],2,sum)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
colSums(iris[,-5])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
#p.165
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
cov(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.6856935 -0.0424340 1.2743154 0.5162707
## Sepal.Width -0.0424340 0.1899794 -0.3296564 -0.1216394
## Petal.Length 1.2743154 -0.3296564 3.1162779 1.2956094
## Petal.Width 0.5162707 -0.1216394 1.2956094 0.5810063
cor(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000