library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
rm(list=ls())
ls()
## character(0)
library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.2.3에서 작성되었습니다
getwd()
## [1] "C:/Users/cic/Desktop"
setwd("C:/Users/cic/Desktop")
library("caret")
## Warning: 패키지 'caret'는 R 버전 4.2.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: lattice
ott1 <- data.frame(id=c(1,2,3),car=c("bmw","bmw","bmw"),fe=c(20,22,24))
ott2 <- data.frame(id=c(1,4,5),fe1=c(30,34,35))
ott1
## id car fe
## 1 1 bmw 20
## 2 2 bmw 22
## 3 3 bmw 24
ott2
## id fe1
## 1 1 30
## 2 4 34
## 3 5 35
left_join(ott1,ott2,by="id")
## id car fe fe1
## 1 1 bmw 20 30
## 2 2 bmw 22 NA
## 3 3 bmw 24 NA
inner_join(ott1,ott2,by="id")
## id car fe fe1
## 1 1 bmw 20 30
full_join(ott1,ott2,by="id")
## id car fe fe1
## 1 1 bmw 20 30
## 2 2 bmw 22 NA
## 3 3 bmw 24 NA
## 4 4 <NA> NA 34
## 5 5 <NA> NA 35
ott3 <- data.frame(nation_code=c(1,2,3,4),nation=c("한국","일본","중국","독일"))
ott4 <- data.frame(car=c("bmw","toyota","kia"),nation_code=c(4,2,1))
left_join(ott4,ott3,by="nation_code")
## car nation_code nation
## 1 bmw 4 독일
## 2 toyota 2 일본
## 3 kia 1 한국
ott7 <- data.frame(nation_code=c(1,2,3),nation=c('톨스토이','도스토예프스키','이반 투르게네프'))
ott8 <- data.frame(car=c('죄와 벌','전쟁과 평화','첫 사랑'),nation_code=c(2,1,3))
left_join(ott8,ott7,by="nation_code")
## car nation_code nation
## 1 죄와 벌 2 도스토예프스키
## 2 전쟁과 평화 1 톨스토이
## 3 첫 사랑 3 이반 투르게네프
ott5 <- data.frame(car=c("bmw","bmw","bmw"),fe=c(20,22,24))
ott6 <- data.frame(car=c("audi","audi","audi"),fe1=c(20,22,24))
bind_rows(ott5,ott6)
## car fe fe1
## 1 bmw 20 NA
## 2 bmw 22 NA
## 3 bmw 24 NA
## 4 audi NA 20
## 5 audi NA 22
## 6 audi NA 24
library(ggplot2)
data("economics")
glimpse(economics)
## Rows: 574
## Columns: 6
## $ date <date> 1967-07-01, 1967-08-01, 1967-09-01, 1967-10-01, 1967-11-01, …
## $ pce <dbl> 506.7, 509.8, 515.6, 512.2, 517.4, 525.1, 530.9, 533.6, 544.3…
## $ pop <dbl> 198712, 198911, 199113, 199311, 199498, 199657, 199808, 19992…
## $ psavert <dbl> 12.6, 12.6, 11.9, 12.9, 12.8, 11.8, 11.7, 12.3, 11.7, 12.3, 1…
## $ uempmed <dbl> 4.5, 4.7, 4.6, 4.9, 4.7, 4.8, 5.1, 4.5, 4.1, 4.6, 4.4, 4.4, 4…
## $ unemploy <dbl> 2944, 2945, 2958, 3143, 3066, 3018, 2878, 3001, 2877, 2709, 2…
economics <- economics %>% mutate(year=substr(economics$date,1,4))
economics %>% group_by(year) %>% summarize(m=mean(psavert)) %>% arrange(desc(m)) %>% head(5)
## # A tibble: 5 × 2
## year m
## <chr> <dbl>
## 1 1971 13.5
## 2 1973 13.4
## 3 1975 13.4
## 4 1974 13.3
## 5 1970 12.8
library(lubridate)
## Warning: 패키지 'lubridate'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
as.Date("2021-05-01")
## [1] "2021-05-01"
#as.Date("20210501")
data(lakers)
glimpse(lakers)
## Rows: 34,624
## Columns: 13
## $ date <int> 20081028, 20081028, 20081028, 20081028, 20081028, 20081028, …
## $ opponent <chr> "POR", "POR", "POR", "POR", "POR", "POR", "POR", "POR", "POR…
## $ game_type <chr> "home", "home", "home", "home", "home", "home", "home", "hom…
## $ time <chr> "12:00", "11:39", "11:37", "11:25", "11:23", "11:22", "11:22…
## $ period <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ etype <chr> "jump ball", "shot", "rebound", "shot", "rebound", "shot", "…
## $ team <chr> "OFF", "LAL", "LAL", "LAL", "LAL", "LAL", "POR", "LAL", "LAL…
## $ player <chr> "", "Pau Gasol", "Vladimir Radmanovic", "Derek Fisher", "Pau…
## $ result <chr> "", "missed", "", "missed", "", "made", "", "made", "", "mad…
## $ points <int> 0, 0, 0, 0, 0, 2, 0, 1, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0, …
## $ type <chr> "", "hook", "off", "layup", "off", "hook", "shooting", "", "…
## $ x <int> NA, 23, NA, 25, NA, 25, NA, NA, NA, 36, 30, 34, NA, 15, 46, …
## $ y <int> NA, 13, NA, 6, NA, 10, NA, NA, NA, 21, 21, 10, NA, 17, 9, 10…
lakers %>% select(date,time) %>% head(3)
## date time
## 1 20081028 12:00
## 2 20081028 11:39
## 3 20081028 11:37
lakers <- lakers %>% mutate(date=paste(date,time) %>% ymd_hm) %>% rename(time_index=date) %>% select(-time)
head(lakers)
## time_index opponent game_type period etype team
## 1 2008-10-28 12:00:00 POR home 1 jump ball OFF
## 2 2008-10-28 11:39:00 POR home 1 shot LAL
## 3 2008-10-28 11:37:00 POR home 1 rebound LAL
## 4 2008-10-28 11:25:00 POR home 1 shot LAL
## 5 2008-10-28 11:23:00 POR home 1 rebound LAL
## 6 2008-10-28 11:22:00 POR home 1 shot LAL
## player result points type x y
## 1 0 NA NA
## 2 Pau Gasol missed 0 hook 23 13
## 3 Vladimir Radmanovic 0 off NA NA
## 4 Derek Fisher missed 0 layup 25 6
## 5 Pau Gasol 0 off NA NA
## 6 Pau Gasol made 2 hook 25 10
summary(lakers)
## time_index opponent game_type
## Min. :2008-10-28 00:00:00.0 Length:34624 Length:34624
## 1st Qu.:2008-12-10 00:19:30.0 Class :character Class :character
## Median :2009-01-21 10:52:00.0 Mode :character Mode :character
## Mean :2009-01-22 20:08:18.4
## 3rd Qu.:2009-03-09 00:33:00.0
## Max. :2009-04-14 12:00:00.0
##
## period etype team player
## Min. :1.000 Length:34624 Length:34624 Length:34624
## 1st Qu.:2.000 Class :character Class :character Class :character
## Median :3.000 Mode :character Mode :character Mode :character
## Mean :2.536
## 3rd Qu.:4.000
## Max. :5.000
##
## result points type x
## Length:34624 Min. :0.0000 Length:34624 Min. : 0.00
## Class :character 1st Qu.:0.0000 Class :character 1st Qu.:20.00
## Mode :character Median :0.0000 Mode :character Median :25.00
## Mean :0.4627 Mean :25.32
## 3rd Qu.:1.0000 3rd Qu.:31.00
## Max. :3.0000 Max. :51.00
## NA's :21557
## y
## Min. : 3.00
## 1st Qu.: 6.00
## Median :10.00
## Mean :13.43
## 3rd Qu.:20.00
## Max. :90.00
## NA's :21557
lakers %>% group_by(year(time_index)) %>% summarize(mean_x=mean(x,na.rm=T),mean_y=mean(y,na.rm=T))
## # A tibble: 2 × 3
## `year(time_index)` mean_x mean_y
## <dbl> <dbl> <dbl>
## 1 2008 25.2 13.4
## 2 2009 25.4 13.4
lakers %>% filter(time_index <= ymd_hms("2008-10-28 12:00:00")) %>% head(3)
## time_index opponent game_type period etype team
## 1 2008-10-28 12:00:00 POR home 1 jump ball OFF
## 2 2008-10-28 11:39:00 POR home 1 shot LAL
## 3 2008-10-28 11:37:00 POR home 1 rebound LAL
## player result points type x y
## 1 0 NA NA
## 2 Pau Gasol missed 0 hook 23 13
## 3 Vladimir Radmanovic 0 off NA NA
lakers %>% filter(time_index>=ymd_hms("2008-10-28 12:00:00"),time_index<=ymd_hms("2009-03-09 00:33:00")) %>% head(3)
## time_index opponent game_type period etype team player result
## 1 2008-10-28 12:00:00 POR home 1 jump ball OFF
## 2 2008-10-29 12:00:00 LAC away 1 jump ball OFF
## 3 2008-10-29 11:36:00 LAC away 1 shot LAL Pau Gasol made
## points type x y
## 1 0 NA NA
## 2 0 NA NA
## 3 2 running jump 20 8
lakers %>% filter(time_index>=ymd_hms("2008-10-28 12:00:00"),time_index<=ymd_hms("2009-03-09 00:33:00")) %>% head(3)
## time_index opponent game_type period etype team player result
## 1 2008-10-28 12:00:00 POR home 1 jump ball OFF
## 2 2008-10-29 12:00:00 LAC away 1 jump ball OFF
## 3 2008-10-29 11:36:00 LAC away 1 shot LAL Pau Gasol made
## points type x y
## 1 0 NA NA
## 2 0 NA NA
## 3 2 running jump 20 8
data("airquality")
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
is.na(airquality) %>% head(1)
## Ozone Solar.R Wind Temp Month Day
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE
names(airquality) <- tolower(names(airquality))
is.na(airquality$ozone)
## [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE FALSE TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE FALSE FALSE
## [49] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [73] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
table(is.na(airquality))
##
## FALSE TRUE
## 874 44
table(is.na(airquality$zone))
## < table of extent 0 >
summary(is.na(airquality))
## ozone solar.r wind temp
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:116 FALSE:146 FALSE:153 FALSE:153
## TRUE :37 TRUE :7
## month day
## Mode :logical Mode :logical
## FALSE:153 FALSE:153
##
colSums(is.na(airquality))
## ozone solar.r wind temp month day
## 37 7 0 0 0 0
today()
## [1] "2023-07-17"
now()
## [1] "2023-07-17 15:26:03 KST"
sum(airquality$ozone)
## [1] NA
mean(airquality$ozone)
## [1] NA
sum(airquality$ozone,na.rm=T)
## [1] 4887
mean(airquality$ozone,na.rm=T)
## [1] 42.12931
names(airquality) <- tolower(names(airquality))
airquality %>% filter(!is.na(ozone)) %>% head(3)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
airquality %>% filter(!is.na(ozone)&!is.na(solar.r)) %>% head(3)
## ozone solar.r wind temp month day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
mean(airquality$ozone,na.rm=T)
## [1] 42.12931
airquality$ozone <- ifelse(is.na(airquality$ozone),42.12931,airquality$ozone)
table(is.na(airquality$ozone))
##
## FALSE
## 153
airquality %>% filter(ozone==42.12931)%>% NROW()
## [1] 37