한정민-예비군.knit

library(psych)

## Warning: 패키지 'psych'는 R 버전 4.2.3에서 작성되었습니다

library(dplyr)

## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

b <- c("a","b","c")
b[1]

## [1] "a"

b[-3]

## [1] "a" "b"

data('airquality')
a <- c()
for(i in 1:9)
{a[i]=i*i}
a

## [1]  1  4  9 16 25 36 49 64 81

rm(list=ls())
ls()

## character(0)

gender <- c("m","f","m","f","m")
gender <- ifelse(gender=="f",0,1)
gender

## [1] 1 0 1 0 1

number <- 1:5
alphabet <- c("a","b","c")
paste(number,alphabet)

## [1] "1 a" "2 b" "3 c" "4 a" "5 b"

paste(number,alphabet,sep="to the")

## [1] "1to thea" "2to theb" "3to thec" "4to thea" "5to theb"

as.numeric(FALSE)

## [1] 0

as.logical(0.45)

## [1] TRUE

as.Date("01/13/2018",format="%m/%d/%Y")

## [1] "2018-01-13"

a <- 0:4
as.logical(a)

## [1] FALSE  TRUE  TRUE  TRUE  TRUE

data("airquality")
glimpse(airquality)

## Rows: 153
## Columns: 6
## $ Ozone   <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind    <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp    <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month   <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…

plot(airquality$Ozone,airquality$solar.R)

pairs(iris[1:4],main="anderson's Iris Data --3 species",pch=21,bg=c("red","green3","blue")[unclass(iris$species)])

hist(airquality$Ozone,na.rm=T)

## Warning in plot.window(xlim, ylim, "", ...): "na.rm"는 그래픽 매개변수가
## 아닙니다

## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "na.rm"는 그래픽 매개변수가 아닙니다

## Warning in axis(1, ...): "na.rm"는 그래픽 매개변수가 아닙니다

## Warning in axis(2, at = yt, ...): "na.rm"는 그래픽 매개변수가 아닙니다

summary(airquality$Ozone)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   18.00   31.50   42.13   63.25  168.00      37

describe(airquality$Ozone,na.rm=T)

##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis   se
## X1    1 116 42.13 32.99   31.5    37.8 25.95   1 168   167 1.21     1.11 3.06

#par(mfrow=c(1,2))
par(mfrow=c(1,1))
data(iris)
hist(iris$Petal.Length)

par(mfcol=c(1,4))
boxplot(iris$Petal.Length~iris$Species,data=iris) 
boxplot(iris$Sepal.Length~iris$Species,data=iris)  
boxplot(iris$Sepal.Width~iris$Species,data=iris)  
boxplot(iris$Petal.Width~iris$Species,data=iris)

library(caret)

## Warning: 패키지 'caret'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: ggplot2

## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다

## 
## 다음의 패키지를 부착합니다: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## 필요한 패키지를 로딩중입니다: lattice

featurePlot(x=iris[, 1:4],
y=iris$Species,
plot="density",
scales=list(x=list(relation="free"),
y=list(relation="free")),
adjust=1.5,
pch="|",
layout=c(4,1),
auto.key=list(columns=3))

library(dplyr)
data(Titanic)
glimpse(Titanic)

##  'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
##  - attr(*, "dimnames")=List of 4
##   ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
##   ..$ Sex     : chr [1:2] "Male" "Female"
##   ..$ Age     : chr [1:2] "Child" "Adult"
##   ..$ Survived: chr [1:2] "No" "Yes"

mosaicplot(Titanic, # 데이터 입력
           main="Survival on the Titanic", #제목 설정
           color=c("black","green"), #색 지정
           off=1) #블럭들 사이의 간격 지정

library(reshape2)

## Warning: 패키지 'reshape2'는 R 버전 4.2.3에서 작성되었습니다

data(airquality)
colnames(airquality) <- tolower(colnames(airquality))
head(airquality)

##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

head(airquality,3)

##   ozone solar.r wind temp month day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3

T <- melt(airquality,id=c("month","day"),na.rm=TRUE) %>% head(2)
T

##   month day variable value
## 1     5   1    ozone    41
## 2     5   2    ozone    36

#T %>% group_by(month) %>% filter(variable=="ozone" %>% summarize(m=mean(value))
                                 
getwd()

## [1] "C:/Users/cic/Desktop"

setwd("C:/Users/cic/Desktop")
df<-read.csv("C:/Users/cic/Desktop/disease.csv")
df1 <- melt(df,id="year")                                 
df1 %>% head(2)

##   year    variable value
## 1 1999 Afghanistan     0
## 2 2000 Afghanistan     0

glimpse(df1)

## Rows: 772
## Columns: 3
## $ year     <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value    <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …

names(df1)[2:3] <- c("country","disease")
names(df1)

## [1] "year"    "country" "disease"

acast(T,day~month~variable)

## , , ozone
## 
##    5
## 1 41
## 2 36

b <- acast(T,month~variable,mean)
b

##   ozone
## 5  38.5

library("sqldf")

## Warning: 패키지 'sqldf'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: gsubfn

## Warning: 패키지 'gsubfn'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: proto

## Warning: 패키지 'proto'는 R 버전 4.2.3에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: RSQLite

## Warning: 패키지 'RSQLite'는 R 버전 4.2.3에서 작성되었습니다

data(iris)
sqldf("select*from iris") %>% head(2)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa

a <- matrix(1:6,ncol=2)
a

##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6

apply(a,1,sum)

## [1] 5 7 9

apply(iris[,-5],2,sum)

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9

colSums(iris[-5])

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##        876.5        458.6        563.7        179.9

colMeans(iris[-5])

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##     5.843333     3.057333     3.758000     1.199333

rowSums(iris[-5])

##   [1] 10.2  9.5  9.4  9.4 10.2 11.4  9.7 10.1  8.9  9.6 10.8 10.0  9.3  8.5 11.2
##  [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7  9.4 10.6 10.3  9.8 10.4 10.4 10.2  9.7
##  [31]  9.7 10.7 10.9 11.3  9.7  9.6 10.5 10.0  8.9 10.2 10.1  8.4  9.1 10.7 11.2
##  [46]  9.5 10.7  9.4 10.7  9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
##  [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
##  [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
##  [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
## [106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
## [121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
## [136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8

rowMeans(iris[-5])

##   [1] 2.550 2.375 2.350 2.350 2.550 2.850 2.425 2.525 2.225 2.400 2.700 2.500
##  [13] 2.325 2.125 2.800 3.000 2.750 2.575 2.875 2.675 2.675 2.675 2.350 2.650
##  [25] 2.575 2.450 2.600 2.600 2.550 2.425 2.425 2.675 2.725 2.825 2.425 2.400
##  [37] 2.625 2.500 2.225 2.550 2.525 2.100 2.275 2.675 2.800 2.375 2.675 2.350
##  [49] 2.675 2.475 4.075 3.900 4.100 3.275 3.850 3.575 3.975 2.900 3.850 3.300
##  [61] 2.875 3.650 3.300 3.775 3.350 3.900 3.650 3.400 3.600 3.275 3.925 3.550
##  [73] 3.800 3.700 3.725 3.850 3.950 4.100 3.725 3.200 3.200 3.150 3.400 3.850
##  [85] 3.600 3.875 4.000 3.575 3.500 3.325 3.425 3.775 3.400 2.900 3.450 3.525
##  [97] 3.525 3.675 2.925 3.475 4.525 3.875 4.525 4.150 4.375 4.825 3.400 4.575
## [109] 4.200 4.850 4.200 4.075 4.350 3.800 4.025 4.300 4.200 5.100 4.875 3.675
## [121] 4.525 3.825 4.800 3.925 4.450 4.550 3.900 3.950 4.225 4.400 4.550 5.025
## [133] 4.250 3.925 3.925 4.775 4.425 4.200 3.900 4.375 4.450 4.350 3.875 4.550
## [145] 4.550 4.300 3.925 4.175 4.325 3.950

data(iris)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

summary(iris)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

cov(iris[,1:4])

##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    0.6856935  -0.0424340    1.2743154   0.5162707
## Sepal.Width    -0.0424340   0.1899794   -0.3296564  -0.1216394
## Petal.Length    1.2743154  -0.3296564    3.1162779   1.2956094
## Petal.Width     0.5162707  -0.1216394    1.2956094   0.5810063

cor(iris[,1:4])

##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length    1.0000000  -0.1175698    0.8717538   0.8179411
## Sepal.Width    -0.1175698   1.0000000   -0.4284401  -0.3661259
## Petal.Length    0.8717538  -0.4284401    1.0000000   0.9628654
## Petal.Width     0.8179411  -0.3661259    0.9628654   1.0000000

library(dplyr)
library(ISLR)

## Warning: 패키지 'ISLR'는 R 버전 4.2.3에서 작성되었습니다

data("Wage")
glimpse(Wage)

## Rows: 3,000
## Columns: 11
## $ year       <int> 2006, 2004, 2003, 2003, 2005, 2008, 2009, 2008, 2006, 2004,…
## $ age        <int> 18, 24, 45, 43, 50, 54, 44, 30, 41, 52, 45, 34, 35, 39, 54,…
## $ maritl     <fct> 1. Never Married, 1. Never Married, 2. Married, 2. Married,…
## $ race       <fct> 1. White, 1. White, 1. White, 3. Asian, 1. White, 1. White,…
## $ education  <fct> 1. < HS Grad, 4. College Grad, 3. Some College, 4. College …
## $ region     <fct> 2. Middle Atlantic, 2. Middle Atlantic, 2. Middle Atlantic,…
## $ jobclass   <fct> 1. Industrial, 2. Information, 1. Industrial, 2. Informatio…
## $ health     <fct> 1. <=Good, 2. >=Very Good, 1. <=Good, 2. >=Very Good, 1. <=…
## $ health_ins <fct> 2. No, 2. No, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Ye…
## $ logwage    <dbl> 4.318063, 4.255273, 4.875061, 5.041393, 4.318063, 4.845098,…
## $ wage       <dbl> 75.04315, 70.47602, 130.98218, 154.68529, 75.04315, 127.115…

Wage$logwage %>% head(3)

## [1] 4.318063 4.255273 4.875061

summary(Wage) %>% head(3)

##       year           age                     maritl           race     
##  Min.   :2003   Min.   :18.00   1. Never Married: 648   1. White:2480  
##  1st Qu.:2004   1st Qu.:33.75   2. Married      :2074   2. Black: 293  
##  Median :2006   Median :42.00   3. Widowed      :  19   3. Asian: 190  
##               education                     region               jobclass   
##  1. < HS Grad      :268   2. Middle Atlantic   :3000   1. Industrial :1544  
##  2. HS Grad        :971   1. New England       :   0   2. Information:1456  
##  3. Some College   :650   3. East North Central:   0                        
##             health      health_ins      logwage           wage       
##  1. <=Good     : 858   1. Yes:2083   Min.   :3.000   Min.   : 20.09  
##  2. >=Very Good:2142   2. No : 917   1st Qu.:4.447   1st Qu.: 85.38  
##                                      Median :4.653   Median :104.92