library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mx=matrix(c(1,2,3,4,5,6),ncol=2,byrow=TRUE)
mx
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
## [3,] 5 6
x=matrix(c(1,2,3,4,5,6,7,8,9),ncol=3)
x[1,3]
## [1] 7
id=c(1,2,3,4)
age=c(29,30,31,32)
gender=c("f","m","f","m")
df1=data.frame(id,age,gender)
View(df1)
str(df1)
## 'data.frame': 4 obs. of 3 variables:
## $ id : num 1 2 3 4
## $ age : num 29 30 31 32
## $ gender: chr "f" "m" "f" "m"
glimpse(df1)
## Rows: 4
## Columns: 3
## $ id <dbl> 1, 2, 3, 4
## $ age <dbl> 29, 30, 31, 32
## $ gender <chr> "f", "m", "f", "m"
data(iris)
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
ls()
## [1] "age" "df1" "gender" "id" "iris" "mx" "x"
rm(list=ls())
library(dplyr)
getwd()
## [1] "C:/Users/cic/Desktop"
setwd("C:/Users/cic/Desktop") #집에서는 user 학교에서는 cic
read.csv("C:/Users/cic/Desktop/boston.csv") %>% head
## CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## MEDV
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
read.csv("boston.csv") %>% head
## CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## MEDV
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
df=read.csv("boston.csv") %>% head
glimpse(df)
## Rows: 6
## Columns: 14
## $ CRIM <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985
## $ ZN <dbl> 18, 0, 0, 0, 0, 0
## $ INDUS <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18
## $ CHAS <int> 0, 0, 0, 0, 0, 0
## $ NOX <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458
## $ RM <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430
## $ AGE <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7
## $ DIS <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622
## $ RAD <int> 1, 2, 2, 3, 3, 3
## $ TAX <int> 296, 242, 242, 222, 222, 222
## $ PTRATIO <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7
## $ B <dbl> 396.90, 396.90, 392.83, 394.63, 396.90, 394.12
## $ LSTAT <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21
## $ MEDV <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7
df$CRIM
## [1] 0.00632 0.02731 0.02729 0.03237 0.06905 0.02985
df[["CRIM"]]
## [1] 0.00632 0.02731 0.02729 0.03237 0.06905 0.02985
df["CRIM"]
## CRIM
## 1 0.00632
## 2 0.02731
## 3 0.02729
## 4 0.03237
## 5 0.06905
## 6 0.02985
summary(df)
## CRIM ZN INDUS CHAS NOX
## Min. :0.00632 Min. : 0 Min. :2.180 Min. :0 Min. :0.4580
## 1st Qu.:0.02729 1st Qu.: 0 1st Qu.:2.180 1st Qu.:0 1st Qu.:0.4580
## Median :0.02858 Median : 0 Median :2.245 Median :0 Median :0.4635
## Mean :0.03203 Mean : 3 Mean :3.832 Mean :0 Mean :0.4750
## 3rd Qu.:0.03174 3rd Qu.: 0 3rd Qu.:5.880 3rd Qu.:0 3rd Qu.:0.4690
## Max. :0.06905 Max. :18 Max. :7.070 Max. :0 Max. :0.5380
## RM AGE DIS RAD TAX
## Min. :6.421 Min. :45.80 Min. :4.090 Min. :1.000 Min. :222
## 1st Qu.:6.466 1st Qu.:55.33 1st Qu.:4.967 1st Qu.:2.000 1st Qu.:222
## Median :6.787 Median :59.90 Median :5.515 Median :2.500 Median :232
## Mean :6.793 Mean :60.65 Mean :5.368 Mean :2.333 Mean :241
## 3rd Qu.:7.110 3rd Qu.:64.17 3rd Qu.:6.062 3rd Qu.:3.000 3rd Qu.:242
## Max. :7.185 Max. :78.90 Max. :6.062 Max. :3.000 Max. :296
## PTRATIO B LSTAT MEDV
## Min. :15.30 Min. :392.8 Min. :2.940 Min. :21.60
## 1st Qu.:17.80 1st Qu.:394.2 1st Qu.:4.268 1st Qu.:25.18
## Median :18.25 Median :395.8 Median :5.095 Median :31.05
## Mean :17.83 Mean :395.4 Mean :5.272 Mean :29.77
## 3rd Qu.:18.70 3rd Qu.:396.9 3rd Qu.:5.300 3rd Qu.:34.38
## Max. :18.70 Max. :396.9 Max. :9.140 Max. :36.20
glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
a=1:10
b=log(a)
mean(a)
## [1] 5.5
var(a)
## [1] 9.166667
sd(a)
## [1] 3.02765
sum(a)
## [1] 55
median(a)
## [1] 5.5
log(a)
## [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379 1.7917595 1.9459101
## [8] 2.0794415 2.1972246 2.3025851
cov(a,b)
## [1] 2.112062
cor(a,b)
## [1] 0.9516624
summary(a)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.25 5.50 5.50 7.75 10.00
rm(list=ls())
ls()
## character(0)
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
tail(iris);tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
head(iris,10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
gender <- c("F","M","f")
bloodtype <- c("AB", "O", "B")
height <-c (170, 175, 165)
weight<-c(70, 65, 55)
df<-data.frame(gender, bloodtype, height, weight)
df
## gender bloodtype height weight
## 1 F AB 170 70
## 2 M O 175 65
## 3 f B 165 55
df[,2]=df[,"bloodtype"]
df[,2]
## [1] "AB" "O" "B"
df[,"bloodtype"]
## [1] "AB" "O" "B"
df[,c(2,3)]
## bloodtype height
## 1 AB 170
## 2 O 175
## 3 B 165
df[,2:3]
## bloodtype height
## 1 AB 170
## 2 O 175
## 3 B 165
df$avg<-(height+weight)/3
df
## gender bloodtype height weight avg
## 1 F AB 170 70 80.00000
## 2 M O 175 65 80.00000
## 3 f B 165 55 73.33333
df$bmi=height/weight
df
## gender bloodtype height weight avg bmi
## 1 F AB 170 70 80.00000 2.428571
## 2 M O 175 65 80.00000 2.692308
## 3 f B 165 55 73.33333 3.000000
colnames(df)[2] <- "blood"
df
## gender blood height weight avg bmi
## 1 F AB 170 70 80.00000 2.428571
## 2 M O 175 65 80.00000 2.692308
## 3 f B 165 55 73.33333 3.000000
#install.packages("ggplot2")
library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
data("diamonds")
diamonds %>% head %>% dim #(토스 역할)
## [1] 6 10
head(diamonds)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
dim(diamonds)
## [1] 53940 10
names(diamonds)
## [1] "carat" "cut" "color" "clarity" "depth" "table" "price"
## [8] "x" "y" "z"
colnames(diamonds)
## [1] "carat" "cut" "color" "clarity" "depth" "table" "price"
## [8] "x" "y" "z"
row.names(diamonds)%>% head
## [1] "1" "2" "3" "4" "5" "6"
daimonds1 <- diamonds %>% rename(c=clarity,p=price)
head(daimonds1,3)
## # A tibble: 3 × 10
## carat cut color c depth table p x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
table(diamonds$cut)
##
## Fair Good Very Good Premium Ideal
## 1610 4906 12082 13791 21551
diamonds %>% count(cut)
## # A tibble: 5 × 2
## cut n
## <ord> <int>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
library("dplyr")
#View(diamonds) view 안되면 View
diamonds %>% select(1,3,8:10)
## # A tibble: 53,940 × 5
## carat color x y z
## <dbl> <ord> <dbl> <dbl> <dbl>
## 1 0.23 E 3.95 3.98 2.43
## 2 0.21 E 3.89 3.84 2.31
## 3 0.23 E 4.05 4.07 2.31
## 4 0.29 I 4.2 4.23 2.63
## 5 0.31 J 4.34 4.35 2.75
## 6 0.24 J 3.94 3.96 2.48
## 7 0.24 I 3.95 3.98 2.47
## 8 0.26 H 4.07 4.11 2.53
## 9 0.22 E 3.87 3.78 2.49
## 10 0.23 H 4 4.05 2.39
## # ℹ 53,930 more rows
df2 <- diamonds %>% select(-carat,-price)
head(df2,3)
## # A tibble: 3 × 8
## cut color clarity depth table x y z
## <ord> <ord> <ord> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ideal E SI2 61.5 55 3.95 3.98 2.43
## 2 Premium E SI1 59.8 61 3.89 3.84 2.31
## 3 Good E VS1 56.9 65 4.05 4.07 2.31
diamonds %>% slice(1:5)
## # A tibble: 5 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
diamonds %>% filter(cut=="Good") %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 2 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 3 0.3 Good J SI1 64 55 339 4.25 4.28 2.73
max(diamonds$price)
## [1] 18823
diamonds %>% filter(price==max(price))
## # A tibble: 1 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
diamonds %>% filter(price==18823)
## # A tibble: 1 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
diamonds %>% filter(cut!="premium") %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>% filter(price>=1000) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.7 Ideal E SI1 62.5 57 2757 5.7 5.72 3.57
## 2 0.86 Fair E SI2 55.1 69 2757 6.45 6.33 3.52
## 3 0.7 Ideal G VS2 61.6 56 2757 5.7 5.67 3.5
diamonds %>% filter(price!=1000) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
diamonds %>% filter(price==1000) %>% head(3)
## # A tibble: 3 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.38 Very Good E VVS2 61.8 56 1000 4.66 4.68 2.88
## 2 0.39 Very Good F VS1 57.1 61 1000 4.86 4.91 2.79
## 3 0.38 Very Good E VS1 61.5 58 1000 4.64 4.69 2.87