한정민 바보

library(dplyr)

## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

mx=matrix(c(1,2,3,4,5,6),ncol=2,byrow=TRUE)
mx

##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4
## [3,]    5    6

x=matrix(c(1,2,3,4,5,6,7,8,9),ncol=3)
x[1,3]

## [1] 7

id=c(1,2,3,4)
age=c(29,30,31,32)
gender=c("f","m","f","m")
df1=data.frame(id,age,gender)
View(df1)
str(df1)

## 'data.frame':    4 obs. of  3 variables:
##  $ id    : num  1 2 3 4
##  $ age   : num  29 30 31 32
##  $ gender: chr  "f" "m" "f" "m"

glimpse(df1)

## Rows: 4
## Columns: 3
## $ id     <dbl> 1, 2, 3, 4
## $ age    <dbl> 29, 30, 31, 32
## $ gender <chr> "f", "m", "f", "m"

data(iris)
str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

glimpse(iris)

## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…

ls()

## [1] "age"    "df1"    "gender" "id"     "iris"   "mx"     "x"

rm(list=ls())
library(dplyr)
getwd()

## [1] "C:/Users/cic/Desktop"

setwd("C:/Users/cic/Desktop")  #집에서는 user 학교에서는 cic
read.csv("C:/Users/cic/Desktop/boston.csv") %>% head

##      CRIM ZN INDUS CHAS   NOX    RM  AGE    DIS RAD TAX PTRATIO      B LSTAT
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21
##   MEDV
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7

read.csv("boston.csv") %>% head

##      CRIM ZN INDUS CHAS   NOX    RM  AGE    DIS RAD TAX PTRATIO      B LSTAT
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21
##   MEDV
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7

df=read.csv("boston.csv") %>% head
glimpse(df)

## Rows: 6
## Columns: 14
## $ CRIM    <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985
## $ ZN      <dbl> 18, 0, 0, 0, 0, 0
## $ INDUS   <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18
## $ CHAS    <int> 0, 0, 0, 0, 0, 0
## $ NOX     <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458
## $ RM      <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430
## $ AGE     <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7
## $ DIS     <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622
## $ RAD     <int> 1, 2, 2, 3, 3, 3
## $ TAX     <int> 296, 242, 242, 222, 222, 222
## $ PTRATIO <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7
## $ B       <dbl> 396.90, 396.90, 392.83, 394.63, 396.90, 394.12
## $ LSTAT   <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21
## $ MEDV    <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7

df$CRIM

## [1] 0.00632 0.02731 0.02729 0.03237 0.06905 0.02985

df[["CRIM"]]

## [1] 0.00632 0.02731 0.02729 0.03237 0.06905 0.02985

df["CRIM"]

##      CRIM
## 1 0.00632
## 2 0.02731
## 3 0.02729
## 4 0.03237
## 5 0.06905
## 6 0.02985

summary(df)

##       CRIM               ZN         INDUS            CHAS        NOX        
##  Min.   :0.00632   Min.   : 0   Min.   :2.180   Min.   :0   Min.   :0.4580  
##  1st Qu.:0.02729   1st Qu.: 0   1st Qu.:2.180   1st Qu.:0   1st Qu.:0.4580  
##  Median :0.02858   Median : 0   Median :2.245   Median :0   Median :0.4635  
##  Mean   :0.03203   Mean   : 3   Mean   :3.832   Mean   :0   Mean   :0.4750  
##  3rd Qu.:0.03174   3rd Qu.: 0   3rd Qu.:5.880   3rd Qu.:0   3rd Qu.:0.4690  
##  Max.   :0.06905   Max.   :18   Max.   :7.070   Max.   :0   Max.   :0.5380  
##        RM             AGE             DIS             RAD             TAX     
##  Min.   :6.421   Min.   :45.80   Min.   :4.090   Min.   :1.000   Min.   :222  
##  1st Qu.:6.466   1st Qu.:55.33   1st Qu.:4.967   1st Qu.:2.000   1st Qu.:222  
##  Median :6.787   Median :59.90   Median :5.515   Median :2.500   Median :232  
##  Mean   :6.793   Mean   :60.65   Mean   :5.368   Mean   :2.333   Mean   :241  
##  3rd Qu.:7.110   3rd Qu.:64.17   3rd Qu.:6.062   3rd Qu.:3.000   3rd Qu.:242  
##  Max.   :7.185   Max.   :78.90   Max.   :6.062   Max.   :3.000   Max.   :296  
##     PTRATIO            B             LSTAT            MEDV      
##  Min.   :15.30   Min.   :392.8   Min.   :2.940   Min.   :21.60  
##  1st Qu.:17.80   1st Qu.:394.2   1st Qu.:4.268   1st Qu.:25.18  
##  Median :18.25   Median :395.8   Median :5.095   Median :31.05  
##  Mean   :17.83   Mean   :395.4   Mean   :5.272   Mean   :29.77  
##  3rd Qu.:18.70   3rd Qu.:396.9   3rd Qu.:5.300   3rd Qu.:34.38  
##  Max.   :18.70   Max.   :396.9   Max.   :9.140   Max.   :36.20

glimpse(iris)

## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…

summary(iris)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

a=1:10
b=log(a)
mean(a)

## [1] 5.5

var(a)

## [1] 9.166667

sd(a)

## [1] 3.02765

sum(a)

## [1] 55

median(a)

## [1] 5.5

log(a)

##  [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379 1.7917595 1.9459101
##  [8] 2.0794415 2.1972246 2.3025851

cov(a,b)

## [1] 2.112062

cor(a,b)

## [1] 0.9516624

summary(a)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    3.25    5.50    5.50    7.75   10.00

rm(list=ls())
ls()

## character(0)

data(iris)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

tail(iris);tail(iris)

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

head(iris,10)

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1           5.1         3.5          1.4         0.2  setosa
## 2           4.9         3.0          1.4         0.2  setosa
## 3           4.7         3.2          1.3         0.2  setosa
## 4           4.6         3.1          1.5         0.2  setosa
## 5           5.0         3.6          1.4         0.2  setosa
## 6           5.4         3.9          1.7         0.4  setosa
## 7           4.6         3.4          1.4         0.3  setosa
## 8           5.0         3.4          1.5         0.2  setosa
## 9           4.4         2.9          1.4         0.2  setosa
## 10          4.9         3.1          1.5         0.1  setosa

gender <- c("F","M","f")
bloodtype <- c("AB", "O", "B")
height <-c (170, 175, 165)
weight<-c(70, 65, 55)
df<-data.frame(gender, bloodtype, height, weight)
df

##   gender bloodtype height weight
## 1      F        AB    170     70
## 2      M         O    175     65
## 3      f         B    165     55

df[,2]=df[,"bloodtype"]
df[,2]

## [1] "AB" "O"  "B"

df[,"bloodtype"]

## [1] "AB" "O"  "B"

df[,c(2,3)]

##   bloodtype height
## 1        AB    170
## 2         O    175
## 3         B    165

df[,2:3]

##   bloodtype height
## 1        AB    170
## 2         O    175
## 3         B    165

df$avg<-(height+weight)/3
df

##   gender bloodtype height weight      avg
## 1      F        AB    170     70 80.00000
## 2      M         O    175     65 80.00000
## 3      f         B    165     55 73.33333

df$bmi=height/weight
df

##   gender bloodtype height weight      avg      bmi
## 1      F        AB    170     70 80.00000 2.428571
## 2      M         O    175     65 80.00000 2.692308
## 3      f         B    165     55 73.33333 3.000000

colnames(df)[2] <- "blood"
df

##   gender blood height weight      avg      bmi
## 1      F    AB    170     70 80.00000 2.428571
## 2      M     O    175     65 80.00000 2.692308
## 3      f     B    165     55 73.33333 3.000000

#install.packages("ggplot2")
library(ggplot2)

## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다

data("diamonds")
diamonds %>% head %>% dim  #(토스 역할)

## [1]  6 10

head(diamonds)

## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48

dim(diamonds)

## [1] 53940    10

names(diamonds)

##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"

colnames(diamonds)

##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"

row.names(diamonds)%>% head

## [1] "1" "2" "3" "4" "5" "6"

daimonds1 <- diamonds %>% rename(c=clarity,p=price)
head(daimonds1,3)

## # A tibble: 3 × 10
##   carat cut     color c     depth table     p     x     y     z
##   <dbl> <ord>   <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2    61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1    59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1    56.9    65   327  4.05  4.07  2.31

table(diamonds$cut)

## 
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551

diamonds %>% count(cut)

## # A tibble: 5 × 2
##   cut           n
##   <ord>     <int>
## 1 Fair       1610
## 2 Good       4906
## 3 Very Good 12082
## 4 Premium   13791
## 5 Ideal     21551

library("dplyr")
#View(diamonds) view 안되면 View
diamonds %>%  select(1,3,8:10)

## # A tibble: 53,940 × 5
##    carat color     x     y     z
##    <dbl> <ord> <dbl> <dbl> <dbl>
##  1  0.23 E      3.95  3.98  2.43
##  2  0.21 E      3.89  3.84  2.31
##  3  0.23 E      4.05  4.07  2.31
##  4  0.29 I      4.2   4.23  2.63
##  5  0.31 J      4.34  4.35  2.75
##  6  0.24 J      3.94  3.96  2.48
##  7  0.24 I      3.95  3.98  2.47
##  8  0.26 H      4.07  4.11  2.53
##  9  0.22 E      3.87  3.78  2.49
## 10  0.23 H      4     4.05  2.39
## # ℹ 53,930 more rows

df2 <- diamonds %>% select(-carat,-price)
head(df2,3)

## # A tibble: 3 × 8
##   cut     color clarity depth table     x     y     z
##   <ord>   <ord> <ord>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ideal   E     SI2      61.5    55  3.95  3.98  2.43
## 2 Premium E     SI1      59.8    61  3.89  3.84  2.31
## 3 Good    E     VS1      56.9    65  4.05  4.07  2.31

diamonds %>% slice(1:5)

## # A tibble: 5 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good    J     SI2      63.3    58   335  4.34  4.35  2.75

diamonds %>% filter(cut=="Good") %>% head(3)

## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Good  E     VS1      56.9    65   327  4.05  4.07  2.31
## 2  0.31 Good  J     SI2      63.3    58   335  4.34  4.35  2.75
## 3  0.3  Good  J     SI1      64      55   339  4.25  4.28  2.73

max(diamonds$price)

## [1] 18823

diamonds %>% filter(price==max(price))

## # A tibble: 1 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  2.29 Premium I     VS2      60.8    60 18823   8.5  8.47  5.16

diamonds %>% filter(price==18823)

## # A tibble: 1 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  2.29 Premium I     VS2      60.8    60 18823   8.5  8.47  5.16

diamonds %>% filter(cut!="premium") %>% head(3)

## # A tibble: 3 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31

diamonds %>% filter(price>=1000) %>% head(3)

## # A tibble: 3 × 10
##   carat cut   color clarity depth table price     x     y     z
##   <dbl> <ord> <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.7  Ideal E     SI1      62.5    57  2757  5.7   5.72  3.57
## 2  0.86 Fair  E     SI2      55.1    69  2757  6.45  6.33  3.52
## 3  0.7  Ideal G     VS2      61.6    56  2757  5.7   5.67  3.5

diamonds %>% filter(price!=1000) %>% head(3)

## # A tibble: 3 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal   E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good    E     VS1      56.9    65   327  4.05  4.07  2.31

diamonds %>% filter(price==1000) %>% head(3)

## # A tibble: 3 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.38 Very Good E     VVS2     61.8    56  1000  4.66  4.68  2.88
## 2  0.39 Very Good F     VS1      57.1    61  1000  4.86  4.91  2.79
## 3  0.38 Very Good E     VS1      61.5    58  1000  4.64  4.69  2.87

한정민 바보

김태혁

2023-07-11