library(psych)
## Warning: 패키지 'psych'는 R 버전 4.2.3에서 작성되었습니다
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
b <- c("a","b","c")
b[1]
## [1] "a"
b[-3]
## [1] "a" "b"
data('airquality')
a <- c()
for(i in 1:9)
{a[i]=i*i}
a
## [1] 1 4 9 16 25 36 49 64 81
rm(list=ls())
ls()
## character(0)
gender <- c("m","f","m","f","m")
gender <- ifelse(gender=="f",0,1)
gender
## [1] 1 0 1 0 1
number <- 1:5
alphabet <- c("a","b","c")
paste(number,alphabet)
## [1] "1 a" "2 b" "3 c" "4 a" "5 b"
paste(number,alphabet,sep="to the")
## [1] "1to thea" "2to theb" "3to thec" "4to thea" "5to theb"
as.numeric(FALSE)
## [1] 0
as.logical(0.45)
## [1] TRUE
as.Date("01/13/2018",format="%m/%d/%Y")
## [1] "2018-01-13"
a <- 0:4
as.logical(a)
## [1] FALSE TRUE TRUE TRUE TRUE
data("airquality")
glimpse(airquality)
## Rows: 153
## Columns: 6
## $ Ozone <int> 41, 36, 12, 18, NA, 28, 23, 19, 8, NA, 7, 16, 11, 14, 18, 14, …
## $ Solar.R <int> 190, 118, 149, 313, NA, NA, 299, 99, 19, 194, NA, 256, 290, 27…
## $ Wind <dbl> 7.4, 8.0, 12.6, 11.5, 14.3, 14.9, 8.6, 13.8, 20.1, 8.6, 6.9, 9…
## $ Temp <int> 67, 72, 74, 62, 56, 66, 65, 59, 61, 69, 74, 69, 66, 68, 58, 64…
## $ Month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
## $ Day <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
plot(airquality$Ozone,airquality$solar.R)

pairs(iris[1:4],main="anderson's Iris Data --3 species",pch=21,bg=c("red","green3","blue")[unclass(iris$species)])

hist(airquality$Ozone,na.rm=T)
## Warning in plot.window(xlim, ylim, "", ...): "na.rm"는 그래픽 매개변수가
## 아닙니다
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(1, ...): "na.rm"는 그래픽 매개변수가 아닙니다
## Warning in axis(2, at = yt, ...): "na.rm"는 그래픽 매개변수가 아닙니다

summary(airquality$Ozone)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 18.00 31.50 42.13 63.25 168.00 37
describe(airquality$Ozone,na.rm=T)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 116 42.13 32.99 31.5 37.8 25.95 1 168 167 1.21 1.11 3.06
#par(mfrow=c(1,2))
par(mfrow=c(1,1))
data(iris)
hist(iris$Petal.Length)

par(mfcol=c(1,4))
boxplot(iris$Petal.Length~iris$Species,data=iris)
boxplot(iris$Sepal.Length~iris$Species,data=iris)
boxplot(iris$Sepal.Width~iris$Species,data=iris)
boxplot(iris$Petal.Width~iris$Species,data=iris)

library(caret)
## Warning: 패키지 'caret'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: ggplot2
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## 필요한 패키지를 로딩중입니다: lattice
featurePlot(x=iris[, 1:4],
y=iris$Species,
plot="density",
scales=list(x=list(relation="free"),
y=list(relation="free")),
adjust=1.5,
pch="|",
layout=c(4,1),
auto.key=list(columns=3))

library(dplyr)
data(Titanic)
glimpse(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
mosaicplot(Titanic, # 데이터 입력
main="Survival on the Titanic", #제목 설정
color=c("black","green"), #색 지정
off=1) #블럭들 사이의 간격 지정
library(reshape2)
## Warning: 패키지 'reshape2'는 R 버전 4.2.3에서 작성되었습니다
data(airquality)
colnames(airquality) <- tolower(colnames(airquality))
head(airquality)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
head(airquality,3)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
T <- melt(airquality,id=c("month","day"),na.rm=TRUE) %>% head(2)
T
## month day variable value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
#T %>% group_by(month) %>% filter(variable=="ozone" %>% summarize(m=mean(value))
getwd()
## [1] "C:/Users/cic/Desktop"
setwd("C:/Users/cic/Desktop")
df<-read.csv("C:/Users/cic/Desktop/disease.csv")
df1 <- melt(df,id="year")
df1 %>% head(2)
## year variable value
## 1 1999 Afghanistan 0
## 2 2000 Afghanistan 0
glimpse(df1)
## Rows: 772
## Columns: 3
## $ year <int> 1999, 2000, 2001, 2002, 1999, 2000, 2001, 2002, 1999, 2000, 2…
## $ variable <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Albania, …
## $ value <dbl> 0.0, 0.0, 0.0, 0.0, 89.0, 132.0, 54.0, 4.9, 25.0, 0.0, 14.0, …
names(df1)[2:3] <- c("country","disease")
names(df1)
## [1] "year" "country" "disease"
acast(T,day~month~variable)
## , , ozone
##
## 5
## 1 41
## 2 36
b <- acast(T,month~variable,mean)
b
## ozone
## 5 38.5
library("sqldf")
## Warning: 패키지 'sqldf'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: gsubfn
## Warning: 패키지 'gsubfn'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: proto
## Warning: 패키지 'proto'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: RSQLite
## Warning: 패키지 'RSQLite'는 R 버전 4.2.3에서 작성되었습니다
data(iris)
sqldf("select*from iris") %>% head(2)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
a <- matrix(1:6,ncol=2)
a
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
apply(a,1,sum)
## [1] 5 7 9
apply(iris[,-5],2,sum)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
colSums(iris[-5])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 876.5 458.6 563.7 179.9
colMeans(iris[-5])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
rowSums(iris[-5])
## [1] 10.2 9.5 9.4 9.4 10.2 11.4 9.7 10.1 8.9 9.6 10.8 10.0 9.3 8.5 11.2
## [16] 12.0 11.0 10.3 11.5 10.7 10.7 10.7 9.4 10.6 10.3 9.8 10.4 10.4 10.2 9.7
## [31] 9.7 10.7 10.9 11.3 9.7 9.6 10.5 10.0 8.9 10.2 10.1 8.4 9.1 10.7 11.2
## [46] 9.5 10.7 9.4 10.7 9.9 16.3 15.6 16.4 13.1 15.4 14.3 15.9 11.6 15.4 13.2
## [61] 11.5 14.6 13.2 15.1 13.4 15.6 14.6 13.6 14.4 13.1 15.7 14.2 15.2 14.8 14.9
## [76] 15.4 15.8 16.4 14.9 12.8 12.8 12.6 13.6 15.4 14.4 15.5 16.0 14.3 14.0 13.3
## [91] 13.7 15.1 13.6 11.6 13.8 14.1 14.1 14.7 11.7 13.9 18.1 15.5 18.1 16.6 17.5
## [106] 19.3 13.6 18.3 16.8 19.4 16.8 16.3 17.4 15.2 16.1 17.2 16.8 20.4 19.5 14.7
## [121] 18.1 15.3 19.2 15.7 17.8 18.2 15.6 15.8 16.9 17.6 18.2 20.1 17.0 15.7 15.7
## [136] 19.1 17.7 16.8 15.6 17.5 17.8 17.4 15.5 18.2 18.2 17.2 15.7 16.7 17.3 15.8
rowMeans(iris[-5])
## [1] 2.550 2.375 2.350 2.350 2.550 2.850 2.425 2.525 2.225 2.400 2.700 2.500
## [13] 2.325 2.125 2.800 3.000 2.750 2.575 2.875 2.675 2.675 2.675 2.350 2.650
## [25] 2.575 2.450 2.600 2.600 2.550 2.425 2.425 2.675 2.725 2.825 2.425 2.400
## [37] 2.625 2.500 2.225 2.550 2.525 2.100 2.275 2.675 2.800 2.375 2.675 2.350
## [49] 2.675 2.475 4.075 3.900 4.100 3.275 3.850 3.575 3.975 2.900 3.850 3.300
## [61] 2.875 3.650 3.300 3.775 3.350 3.900 3.650 3.400 3.600 3.275 3.925 3.550
## [73] 3.800 3.700 3.725 3.850 3.950 4.100 3.725 3.200 3.200 3.150 3.400 3.850
## [85] 3.600 3.875 4.000 3.575 3.500 3.325 3.425 3.775 3.400 2.900 3.450 3.525
## [97] 3.525 3.675 2.925 3.475 4.525 3.875 4.525 4.150 4.375 4.825 3.400 4.575
## [109] 4.200 4.850 4.200 4.075 4.350 3.800 4.025 4.300 4.200 5.100 4.875 3.675
## [121] 4.525 3.825 4.800 3.925 4.450 4.550 3.900 3.950 4.225 4.400 4.550 5.025
## [133] 4.250 3.925 3.925 4.775 4.425 4.200 3.900 4.375 4.450 4.350 3.875 4.550
## [145] 4.550 4.300 3.925 4.175 4.325 3.950
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
cov(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 0.6856935 -0.0424340 1.2743154 0.5162707
## Sepal.Width -0.0424340 0.1899794 -0.3296564 -0.1216394
## Petal.Length 1.2743154 -0.3296564 3.1162779 1.2956094
## Petal.Width 0.5162707 -0.1216394 1.2956094 0.5810063
cor(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
library(dplyr)
library(ISLR)
## Warning: 패키지 'ISLR'는 R 버전 4.2.3에서 작성되었습니다
data("Wage")
glimpse(Wage)
## Rows: 3,000
## Columns: 11
## $ year <int> 2006, 2004, 2003, 2003, 2005, 2008, 2009, 2008, 2006, 2004,…
## $ age <int> 18, 24, 45, 43, 50, 54, 44, 30, 41, 52, 45, 34, 35, 39, 54,…
## $ maritl <fct> 1. Never Married, 1. Never Married, 2. Married, 2. Married,…
## $ race <fct> 1. White, 1. White, 1. White, 3. Asian, 1. White, 1. White,…
## $ education <fct> 1. < HS Grad, 4. College Grad, 3. Some College, 4. College …
## $ region <fct> 2. Middle Atlantic, 2. Middle Atlantic, 2. Middle Atlantic,…
## $ jobclass <fct> 1. Industrial, 2. Information, 1. Industrial, 2. Informatio…
## $ health <fct> 1. <=Good, 2. >=Very Good, 1. <=Good, 2. >=Very Good, 1. <=…
## $ health_ins <fct> 2. No, 2. No, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Yes, 1. Ye…
## $ logwage <dbl> 4.318063, 4.255273, 4.875061, 5.041393, 4.318063, 4.845098,…
## $ wage <dbl> 75.04315, 70.47602, 130.98218, 154.68529, 75.04315, 127.115…
Wage$logwage %>% head(3)
## [1] 4.318063 4.255273 4.875061
summary(Wage) %>% head(3)
## year age maritl race
## Min. :2003 Min. :18.00 1. Never Married: 648 1. White:2480
## 1st Qu.:2004 1st Qu.:33.75 2. Married :2074 2. Black: 293
## Median :2006 Median :42.00 3. Widowed : 19 3. Asian: 190
## education region jobclass
## 1. < HS Grad :268 2. Middle Atlantic :3000 1. Industrial :1544
## 2. HS Grad :971 1. New England : 0 2. Information:1456
## 3. Some College :650 3. East North Central: 0
## health health_ins logwage wage
## 1. <=Good : 858 1. Yes:2083 Min. :3.000 Min. : 20.09
## 2. >=Very Good:2142 2. No : 917 1st Qu.:4.447 1st Qu.: 85.38
## Median :4.653 Median :104.92
