#2023.07.18
#정답은 NULL
sqrt(-3)
## Warning in sqrt(-3): NaN이 생성되었습니다
## [1] NaN
0/0
## [1] NaN
x<-c()
x
## NULL
set.seed(1) #가끔 시험에 나옴, 초기 난수-> 랜덤값이 같은 값으로 지정됨
sample(1:45,6)
## [1]  4 39  1 34 23 14
m<-matrix(1:6,nrow=3)
m[m[,1]>1&m[,2]>5,]
## [1] 3 6
m
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
m[,1]>1
## [1] FALSE  TRUE  TRUE
m[,2]>5
## [1] FALSE FALSE  TRUE
c<-1:10
print(round(sd(c),2)) #소수 두째짜리까지 나타내시오
## [1] 3.03
round(sd(c),2) #위에랑 같음
## [1] 3.03
data("airquality")
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
as.numeric(FALSE)
## [1] 0
as.numeric("foo")
## Warning: 강제형변환에 의해 생성된 NA 입니다
## [1] NA
as.logical(0.45)
## [1] TRUE
as.Date("2018-01-13")
## [1] "2018-01-13"
as.Date("01/13/2018",format="%m%d%Y")
## [1] NA
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data("airquality")
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone   <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind    <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp    <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month   <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
plot(airquality$Ozone,airquality$Solar.R)

#히스토그램
hist(airquality$Ozone,na.rm=TRUE)
## Warning in plot.window(xlim, ylim, "", ...): "na.rm"는 그래픽 매개변수가
## 아닙니다
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(1, ...): "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(2, at = yt, ...): "na.rm"는 그래픽 매개변수가 아닙니다
summary(airquality$Ozone)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   18.00   31.50   42.13   63.25  168.00      37
#왜도 분자-> 평균값-중앙값 >0 무조건 0보다 크다 ->오른쪽 꼬리 분포 경우
library(psych)

describe(airquality$Ozone,na.rm=TRUE)
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 116 42.13 32.99   31.5    37.8 25.95   1 168   167 1.21     1.11 3.06
data(iris)
glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
pairs(iris[1:4], main="Anderson's Iris Data -- 3 species", pch=21, bg=c("red","green3","blue")[unclass(iris$species)])

hist(iris$Petal.Length)

par(mfrow=c(1,2)) #한 화면에 2개 그래프
hist(airquality$Ozone,na.rm=TRUE)
## Warning in plot.window(xlim, ylim, "", ...): "na.rm"는 그래픽 매개변수가
## 아닙니다
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(1, ...): "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(2, at = yt, ...): "na.rm"는 그래픽 매개변수가 아닙니다
hist(iris$Petal.Length)

library(hflights)
data("hflights")
glimpse(hflights)
## Rows: 227,496
## Columns: 21
## $ Year              <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011…
## $ Month             <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth        <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ DayOfWeek         <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2…
## $ DepTime           <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1443…
## $ ArrTime           <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1554…
## $ UniqueCarrier     <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"…
## $ FlightNum         <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428, 42…
## $ TailNum           <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA", "N…
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 63, …
## $ AirTime           <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 44, …
## $ ArrDelay          <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -9, …
## $ DepDelay          <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -3, …
## $ Origin            <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IA…
## $ Dest              <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DF…
## $ Distance          <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 22…
## $ TaxiIn            <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12, 8,…
## $ TaxiOut           <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13, 15…
## $ Cancelled         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode  <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", …
## $ Diverted          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
par(mfrow=c(2,2))

par(mfrow=c(1,1))
boxplot(iris$Petal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Length~iris$Species,data=iris)

boxplot(iris$Sepal.Width~iris$Species,data=iris)

data("Titanic")
glimpse(Titanic)
##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic, #데이터 입력
           main="Survival on the Titanic", #제목 설정
           color=c("red","green"), #색 지정
           off=1) #블럭들 사이의 간격 지정

library(reshape2)
data(airquality)
colnames(airquality)<-tolower(colnames(airquality))
T<-melt(airquality,id=c("month","day"),na.rm=TRUE)
head(T)
##   month day variable value
## 1     5   1    ozone    41
## 2     5   2    ozone    36
## 3     5   3    ozone    12
## 4     5   4    ozone    18
## 6     5   6    ozone    28
## 7     5   7    ozone    23
head(airquality)
##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
T%>%group_by(month)%>% filter(variable=="ozone")%>%
  summarize(m=mean(value))
## # A tibble: 5 × 2
##   month     m
##   <int> <dbl>
## 1     5  23.6
## 2     6  29.4
## 3     7  59.1
## 4     8  60.0
## 5     9  31.4
names(airquality)
## [1] "ozone"   "solar.r" "wind"    "temp"    "month"   "day"
T%>%group_by(month)%>%filter(variable=="temp")%>%
  summarize(m=mean(value))
## # A tibble: 5 × 2
##   month     m
##   <int> <dbl>
## 1     5  65.5
## 2     6  79.1
## 3     7  83.9
## 4     8  84.0
## 5     9  76.9
library(reshape)
## 
## 다음의 패키지를 부착합니다: 'reshape'
## The following objects are masked from 'package:reshape2':
## 
##     colsplit, melt, recast
## The following object is masked from 'package:dplyr':
## 
##     rename
data("disease")
## Warning in data("disease"): 데이터셋 'disease'을 찾을 수 없습니다
#데이터 받고 나서*****************************************
getwd()
## [1] "C:/data"
setwd("c:/data")
df<-read.csv("disease.csv")

#%>%여기에서 오류가 떠서 dplyr 다시 설치 하니까 실행됨
library(dplyr)

df1<-melt(df,id="year")
df1%>%glimpse
## Rows: 772
## Columns: 3
## $ year     <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value    <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
colSums(is.na(df1))
##     year variable    value 
##        0        0        0
names(df1)[2:3]<-c("country","disease")
names(df1)
## [1] "year"    "country" "disease"
df1%>% filter(year==2000)%>%summarize(m=mean(disease))
##          m
## 1 81.01036
df1%>%filter(year==2000)%>% filter(disease>81.01036)%>%NROW->result
print(result)
## [1] 76
apply(iris[,-5],2,sum)
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9
colSums(iris[,-5])
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9
#p.165
data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
cov(iris[,1:4])
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    0.6856935  -0.0424340    1.2743154   0.5162707
## Sepal.Width    -0.0424340   0.1899794   -0.3296564  -0.1216394
## Petal.Length    1.2743154  -0.3296564    3.1162779   1.2956094
## Petal.Width     0.5162707  -0.1216394    1.2956094   0.5810063
cor(iris[,1:4])
##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    1.0000000  -0.1175698    0.8717538   0.8179411
## Sepal.Width    -0.1175698   1.0000000   -0.4284401  -0.3661259
## Petal.Length    0.8717538  -0.4284401    1.0000000   0.9628654
## Petal.Width     0.8179411  -0.3661259    0.9628654   1.0000000