getwd()
## [1] "C:/data"
setwd("c:/data")
ls()
## character(0)
rm(list=ls())
ls()
## character(0)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df<-read.csv("ta1.csv", fileEncoding = "euc-kr")
View(df)
names(df)
## [1] "가해자연령층별.1." "월별.1." "X2022"
## [4] "X2022.1" "X2022.2"
df1 <- df %>% rename(month=월별.1.,사고건수=X2022,사망자수=X2022.1,
연령층=가해자연령층별.1.,부상자수=X2022.2)
View(df1)
df2 <- df1 %>% slice(-1)
df2 %>% filter(month!="전체") %>% glimpse()
## Rows: 108
## Columns: 5
## $ 연령층 <chr> "20세이하", "20세이하", "20세이하", "20세이하", "20세이하", "…
## $ month <chr> "1월", "2월", "3월", "4월", "5월", "6월", "7월", "8월", "9월"…
## $ 사고건수 <chr> "435", "357", "473", "580", "713", "668", "639", "557", "609"…
## $ 사망자수 <chr> "6", "2", "3", "7", "6", "5", "6", "7", "8", "8", "8", "6", "…
## $ 부상자수 <chr> "633", "505", "678", "810", "955", "877", "847", "735", "808"…
df2$사고건수<-as.numeric(df2$사고건수)
glimpse(df2)
## Rows: 117
## Columns: 5
## $ 연령층 <chr> "20세이하", "20세이하", "20세이하", "20세이하", "20세이하", "…
## $ month <chr> "전체", "1월", "2월", "3월", "4월", "5월", "6월", "7월", "8월…
## $ 사고건수 <dbl> 6508, 435, 357, 473, 580, 713, 668, 639, 557, 609, 618, 532, …
## $ 사망자수 <chr> "72", "6", "2", "3", "7", "6", "5", "6", "7", "8", "8", "8", …
## $ 부상자수 <chr> "8863", "633", "505", "678", "810", "955", "877", "847", "735…
#install.packages("gapminder")
library(gapminder)
y <- gapminder %>% group_by(year, continent) %>% summarize(c_pop=sum(pop))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(y, 20)
## # A tibble: 20 × 3
## # Groups: year [4]
## year continent c_pop
## <int> <fct> <dbl>
## 1 1952 Africa 237640501
## 2 1952 Americas 345152446
## 3 1952 Asia 1395357351
## 4 1952 Europe 418120846
## 5 1952 Oceania 10686006
## 6 1957 Africa 264837738
## 7 1957 Americas 386953916
## 8 1957 Asia 1562780599
## 9 1957 Europe 437890351
## 10 1957 Oceania 11941976
## 11 1962 Africa 296516865
## 12 1962 Americas 433270254
## 13 1962 Asia 1696357182
## 14 1962 Europe 460355155
## 15 1962 Oceania 13283518
## 16 1967 Africa 335289489
## 17 1967 Americas 480746623
## 18 1967 Asia 1905662900
## 19 1967 Europe 481178958
## 20 1967 Oceania 14600414
View(gapminder)
plot(y$year, y$c_pop)

plot(log10(gapminder$gdpPercap),gapminder$lifeExp,col=gapminder$continent)
legend("bottomright",legend=levels(gapminder$continent),
pch=c(1:length(levels(gapminder$continent))),
col=c(1:length(levels(y$continent))))
#install.packages("ggplot2")
library(ggplot2)

ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,col=continent,size=pop))+
geom_point()+scale_x_log10()

scale_x_log10()
## <ScaleContinuousPosition>
## Range:
## Limits: 0 -- 1
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,col=continent,size=pop))+
geom_point(alpha=0.5)+scale_x_log10()

ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,col=continent,size=pop))+
geom_point(alpha=0.5)+scale_x_log10()+facet_wrap(~year)

gapminder %>% filter(year==1952&continent=="Asia") %>%
ggplot(aes(reorder(country,pop),pop))+geom_bar(stat='identity')+coord_flip()

gapminder %>% count(continent)
## # A tibble: 5 × 2
## continent n
## <fct> <int>
## 1 Africa 624
## 2 Americas 300
## 3 Asia 396
## 4 Europe 360
## 5 Oceania 24
gapminder %>% filter(country=='Korea, Rep.')
## # A tibble: 12 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Korea, Rep. Asia 1952 47.5 20947571 1031.
## 2 Korea, Rep. Asia 1957 52.7 22611552 1488.
## 3 Korea, Rep. Asia 1962 55.3 26420307 1536.
## 4 Korea, Rep. Asia 1967 57.7 30131000 2029.
## 5 Korea, Rep. Asia 1972 62.6 33505000 3031.
## 6 Korea, Rep. Asia 1977 64.8 36436000 4657.
## 7 Korea, Rep. Asia 1982 67.1 39326000 5623.
## 8 Korea, Rep. Asia 1987 69.8 41622000 8533.
## 9 Korea, Rep. Asia 1992 72.2 43805450 12104.
## 10 Korea, Rep. Asia 1997 74.6 46173816 15994.
## 11 Korea, Rep. Asia 2002 77.0 47969150 19234.
## 12 Korea, Rep. Asia 2007 78.6 49044790 23348.
#gapminder %>% filter(country=='Korea, Rep.') %>% ggplot(aes(year,lifeExp,
# ))
data(cars)
glimpse(cars)
## Rows: 50
## Columns: 2
## $ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13…
## $ dist <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26, 34…
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
cars1 <- cars %>% mutate(violation=ifelse(cars$speed>15.4,
"fast","slow"))
cars1
## speed dist violation
## 1 4 2 slow
## 2 4 10 slow
## 3 7 4 slow
## 4 7 22 slow
## 5 8 16 slow
## 6 9 10 slow
## 7 10 18 slow
## 8 10 26 slow
## 9 10 34 slow
## 10 11 17 slow
## 11 11 28 slow
## 12 12 14 slow
## 13 12 20 slow
## 14 12 24 slow
## 15 12 28 slow
## 16 13 26 slow
## 17 13 34 slow
## 18 13 34 slow
## 19 13 46 slow
## 20 14 26 slow
## 21 14 36 slow
## 22 14 60 slow
## 23 14 80 slow
## 24 15 20 slow
## 25 15 26 slow
## 26 15 54 slow
## 27 16 32 fast
## 28 16 40 fast
## 29 17 32 fast
## 30 17 40 fast
## 31 17 50 fast
## 32 18 42 fast
## 33 18 56 fast
## 34 18 76 fast
## 35 18 84 fast
## 36 19 36 fast
## 37 19 46 fast
## 38 19 68 fast
## 39 20 32 fast
## 40 20 48 fast
## 41 20 52 fast
## 42 20 56 fast
## 43 20 64 fast
## 44 22 66 fast
## 45 23 54 fast
## 46 24 70 fast
## 47 24 92 fast
## 48 24 93 fast
## 49 24 120 fast
## 50 25 85 fast
glimpse(cars1)
## Rows: 50
## Columns: 3
## $ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13…
## $ dist <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26…
## $ violation <chr> "slow", "slow", "slow", "slow", "slow", "slow", "slow", "slo…
cars1$violation<-factor(cars1$violation)
glimpse(cars1)
## Rows: 50
## Columns: 3
## $ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13…
## $ dist <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26…
## $ violation <fct> slow, slow, slow, slow, slow, slow, slow, slow, slow, slow, …
cars1 %>% group_by(violation) %>% summarize(p=mean(speed))
## # A tibble: 2 × 2
## violation p
## <fct> <dbl>
## 1 fast 19.9
## 2 slow 11.2
#install.packages("hflights")
library(hflights)
summary(hflights)
## Year Month DayofMonth DayOfWeek DepTime
## Min. :2011 Min. : 1.000 Min. : 1.00 Min. :1.000 Min. : 1
## 1st Qu.:2011 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.:2.000 1st Qu.:1021
## Median :2011 Median : 7.000 Median :16.00 Median :4.000 Median :1416
## Mean :2011 Mean : 6.514 Mean :15.74 Mean :3.948 Mean :1396
## 3rd Qu.:2011 3rd Qu.: 9.000 3rd Qu.:23.00 3rd Qu.:6.000 3rd Qu.:1801
## Max. :2011 Max. :12.000 Max. :31.00 Max. :7.000 Max. :2400
## NA's :2905
## ArrTime UniqueCarrier FlightNum TailNum
## Min. : 1 Length:227496 Min. : 1 Length:227496
## 1st Qu.:1215 Class :character 1st Qu.: 855 Class :character
## Median :1617 Mode :character Median :1696 Mode :character
## Mean :1578 Mean :1962
## 3rd Qu.:1953 3rd Qu.:2755
## Max. :2400 Max. :7290
## NA's :3066
## ActualElapsedTime AirTime ArrDelay DepDelay
## Min. : 34.0 Min. : 11.0 Min. :-70.000 Min. :-33.000
## 1st Qu.: 77.0 1st Qu.: 58.0 1st Qu.: -8.000 1st Qu.: -3.000
## Median :128.0 Median :107.0 Median : 0.000 Median : 0.000
## Mean :129.3 Mean :108.1 Mean : 7.094 Mean : 9.445
## 3rd Qu.:165.0 3rd Qu.:141.0 3rd Qu.: 11.000 3rd Qu.: 9.000
## Max. :575.0 Max. :549.0 Max. :978.000 Max. :981.000
## NA's :3622 NA's :3622 NA's :3622 NA's :2905
## Origin Dest Distance TaxiIn
## Length:227496 Length:227496 Min. : 79.0 Min. : 1.000
## Class :character Class :character 1st Qu.: 376.0 1st Qu.: 4.000
## Mode :character Mode :character Median : 809.0 Median : 5.000
## Mean : 787.8 Mean : 6.099
## 3rd Qu.:1042.0 3rd Qu.: 7.000
## Max. :3904.0 Max. :165.000
## NA's :3066
## TaxiOut Cancelled CancellationCode Diverted
## Min. : 1.00 Min. :0.00000 Length:227496 Min. :0.000000
## 1st Qu.: 10.00 1st Qu.:0.00000 Class :character 1st Qu.:0.000000
## Median : 14.00 Median :0.00000 Mode :character Median :0.000000
## Mean : 15.09 Mean :0.01307 Mean :0.002853
## 3rd Qu.: 18.00 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :163.00 Max. :1.00000 Max. :1.000000
## NA's :2947
glimpse(hflights)
## Rows: 227,496
## Columns: 21
## $ Year <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011…
## $ Month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ DayOfWeek <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2…
## $ DepTime <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1443…
## $ ArrTime <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1554…
## $ UniqueCarrier <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"…
## $ FlightNum <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428, 42…
## $ TailNum <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA", "N…
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 63, …
## $ AirTime <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 44, …
## $ ArrDelay <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -9, …
## $ DepDelay <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -3, …
## $ Origin <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IA…
## $ Dest <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DF…
## $ Distance <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 22…
## $ TaxiIn <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12, 8,…
## $ TaxiOut <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13, 15…
## $ Cancelled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", …
## $ Diverted <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
hflights %>% filter(Month==2|Month==8) %>% dim()
## [1] 37304 21
glimpse(hflights)
## Rows: 227,496
## Columns: 21
## $ Year <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011…
## $ Month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ DayOfWeek <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2…
## $ DepTime <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1443…
## $ ArrTime <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1554…
## $ UniqueCarrier <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"…
## $ FlightNum <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428, 42…
## $ TailNum <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA", "N…
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 63, …
## $ AirTime <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 44, …
## $ ArrDelay <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -9, …
## $ DepDelay <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -3, …
## $ Origin <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IA…
## $ Dest <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DF…
## $ Distance <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 22…
## $ TaxiIn <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12, 8,…
## $ TaxiOut <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13, 15…
## $ Cancelled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", …
## $ Diverted <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
hflights %>% count(Dest)
## Dest n
## 1 ABQ 2812
## 2 AEX 724
## 3 AGS 1
## 4 AMA 1297
## 5 ANC 125
## 6 ASE 125
## 7 ATL 7886
## 8 AUS 5022
## 9 AVL 350
## 10 BFL 504
## 11 BHM 2736
## 12 BKG 110
## 13 BNA 3481
## 14 BOS 1752
## 15 BPT 3
## 16 BRO 1692
## 17 BTR 1762
## 18 BWI 2551
## 19 CAE 561
## 20 CHS 1200
## 21 CID 410
## 22 CLE 2140
## 23 CLT 4735
## 24 CMH 1348
## 25 COS 1657
## 26 CRP 4813
## 27 CRW 357
## 28 CVG 1535
## 29 DAL 9820
## 30 DAY 451
## 31 DCA 2699
## 32 DEN 5920
## 33 DFW 6653
## 34 DSM 647
## 35 DTW 2601
## 36 ECP 729
## 37 EGE 110
## 38 ELP 3036
## 39 EWR 4314
## 40 FLL 2462
## 41 GJT 403
## 42 GPT 1618
## 43 GRK 42
## 44 GRR 677
## 45 GSO 630
## 46 GSP 1123
## 47 GUC 86
## 48 HDN 110
## 49 HNL 402
## 50 HOB 309
## 51 HRL 3983
## 52 HSV 923
## 53 IAD 1980
## 54 ICT 1517
## 55 IND 1750
## 56 JAN 2011
## 57 JAX 2135
## 58 JFK 695
## 59 LAS 4082
## 60 LAX 6064
## 61 LBB 1333
## 62 LCH 364
## 63 LEX 584
## 64 LFT 2313
## 65 LGA 2730
## 66 LIT 1579
## 67 LRD 1188
## 68 MAF 2306
## 69 MCI 3174
## 70 MCO 3687
## 71 MDW 2094
## 72 MEM 2399
## 73 MFE 1128
## 74 MIA 2463
## 75 MKE 1588
## 76 MLU 292
## 77 MOB 1674
## 78 MSP 2010
## 79 MSY 6823
## 80 MTJ 164
## 81 OAK 690
## 82 OKC 3170
## 83 OMA 2044
## 84 ONT 952
## 85 ORD 5748
## 86 ORF 717
## 87 PBI 1253
## 88 PDX 1235
## 89 PHL 2367
## 90 PHX 5096
## 91 PIT 1664
## 92 PNS 1539
## 93 PSP 106
## 94 RDU 1740
## 95 RIC 900
## 96 RNO 243
## 97 RSW 948
## 98 SAN 2936
## 99 SAT 4893
## 100 SAV 863
## 101 SDF 1279
## 102 SEA 2615
## 103 SFO 2818
## 104 SHV 787
## 105 SJC 885
## 106 SJU 391
## 107 SLC 2033
## 108 SMF 1014
## 109 SNA 1661
## 110 STL 2509
## 111 TPA 3085
## 112 TUL 2924
## 113 TUS 1565
## 114 TYS 1210
## 115 VPS 880
## 116 XNA 1172
hflights %>% count(Dest) %>% arrange(n)
## Dest n
## 1 AGS 1
## 2 BPT 3
## 3 GRK 42
## 4 GUC 86
## 5 PSP 106
## 6 BKG 110
## 7 EGE 110
## 8 HDN 110
## 9 ANC 125
## 10 ASE 125
## 11 MTJ 164
## 12 RNO 243
## 13 MLU 292
## 14 HOB 309
## 15 AVL 350
## 16 CRW 357
## 17 LCH 364
## 18 SJU 391
## 19 HNL 402
## 20 GJT 403
## 21 CID 410
## 22 DAY 451
## 23 BFL 504
## 24 CAE 561
## 25 LEX 584
## 26 GSO 630
## 27 DSM 647
## 28 GRR 677
## 29 OAK 690
## 30 JFK 695
## 31 ORF 717
## 32 AEX 724
## 33 ECP 729
## 34 SHV 787
## 35 SAV 863
## 36 VPS 880
## 37 SJC 885
## 38 RIC 900
## 39 HSV 923
## 40 RSW 948
## 41 ONT 952
## 42 SMF 1014
## 43 GSP 1123
## 44 MFE 1128
## 45 XNA 1172
## 46 LRD 1188
## 47 CHS 1200
## 48 TYS 1210
## 49 PDX 1235
## 50 PBI 1253
## 51 SDF 1279
## 52 AMA 1297
## 53 LBB 1333
## 54 CMH 1348
## 55 ICT 1517
## 56 CVG 1535
## 57 PNS 1539
## 58 TUS 1565
## 59 LIT 1579
## 60 MKE 1588
## 61 GPT 1618
## 62 COS 1657
## 63 SNA 1661
## 64 PIT 1664
## 65 MOB 1674
## 66 BRO 1692
## 67 RDU 1740
## 68 IND 1750
## 69 BOS 1752
## 70 BTR 1762
## 71 IAD 1980
## 72 MSP 2010
## 73 JAN 2011
## 74 SLC 2033
## 75 OMA 2044
## 76 MDW 2094
## 77 JAX 2135
## 78 CLE 2140
## 79 MAF 2306
## 80 LFT 2313
## 81 PHL 2367
## 82 MEM 2399
## 83 FLL 2462
## 84 MIA 2463
## 85 STL 2509
## 86 BWI 2551
## 87 DTW 2601
## 88 SEA 2615
## 89 DCA 2699
## 90 LGA 2730
## 91 BHM 2736
## 92 ABQ 2812
## 93 SFO 2818
## 94 TUL 2924
## 95 SAN 2936
## 96 ELP 3036
## 97 TPA 3085
## 98 OKC 3170
## 99 MCI 3174
## 100 BNA 3481
## 101 MCO 3687
## 102 HRL 3983
## 103 LAS 4082
## 104 EWR 4314
## 105 CLT 4735
## 106 CRP 4813
## 107 SAT 4893
## 108 AUS 5022
## 109 PHX 5096
## 110 ORD 5748
## 111 DEN 5920
## 112 LAX 6064
## 113 DFW 6653
## 114 MSY 6823
## 115 ATL 7886
## 116 DAL 9820
hflights %>% count(Dest) %>% arrange(desc(n))
## Dest n
## 1 DAL 9820
## 2 ATL 7886
## 3 MSY 6823
## 4 DFW 6653
## 5 LAX 6064
## 6 DEN 5920
## 7 ORD 5748
## 8 PHX 5096
## 9 AUS 5022
## 10 SAT 4893
## 11 CRP 4813
## 12 CLT 4735
## 13 EWR 4314
## 14 LAS 4082
## 15 HRL 3983
## 16 MCO 3687
## 17 BNA 3481
## 18 MCI 3174
## 19 OKC 3170
## 20 TPA 3085
## 21 ELP 3036
## 22 SAN 2936
## 23 TUL 2924
## 24 SFO 2818
## 25 ABQ 2812
## 26 BHM 2736
## 27 LGA 2730
## 28 DCA 2699
## 29 SEA 2615
## 30 DTW 2601
## 31 BWI 2551
## 32 STL 2509
## 33 MIA 2463
## 34 FLL 2462
## 35 MEM 2399
## 36 PHL 2367
## 37 LFT 2313
## 38 MAF 2306
## 39 CLE 2140
## 40 JAX 2135
## 41 MDW 2094
## 42 OMA 2044
## 43 SLC 2033
## 44 JAN 2011
## 45 MSP 2010
## 46 IAD 1980
## 47 BTR 1762
## 48 BOS 1752
## 49 IND 1750
## 50 RDU 1740
## 51 BRO 1692
## 52 MOB 1674
## 53 PIT 1664
## 54 SNA 1661
## 55 COS 1657
## 56 GPT 1618
## 57 MKE 1588
## 58 LIT 1579
## 59 TUS 1565
## 60 PNS 1539
## 61 CVG 1535
## 62 ICT 1517
## 63 CMH 1348
## 64 LBB 1333
## 65 AMA 1297
## 66 SDF 1279
## 67 PBI 1253
## 68 PDX 1235
## 69 TYS 1210
## 70 CHS 1200
## 71 LRD 1188
## 72 XNA 1172
## 73 MFE 1128
## 74 GSP 1123
## 75 SMF 1014
## 76 ONT 952
## 77 RSW 948
## 78 HSV 923
## 79 RIC 900
## 80 SJC 885
## 81 VPS 880
## 82 SAV 863
## 83 SHV 787
## 84 ECP 729
## 85 AEX 724
## 86 ORF 717
## 87 JFK 695
## 88 OAK 690
## 89 GRR 677
## 90 DSM 647
## 91 GSO 630
## 92 LEX 584
## 93 CAE 561
## 94 BFL 504
## 95 DAY 451
## 96 CID 410
## 97 GJT 403
## 98 HNL 402
## 99 SJU 391
## 100 LCH 364
## 101 CRW 357
## 102 AVL 350
## 103 HOB 309
## 104 MLU 292
## 105 RNO 243
## 106 MTJ 164
## 107 ANC 125
## 108 ASE 125
## 109 BKG 110
## 110 EGE 110
## 111 HDN 110
## 112 PSP 106
## 113 GUC 86
## 114 GRK 42
## 115 BPT 3
## 116 AGS 1
# gapminder data
library(gapminder)
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
gapminder %>% count(country)
## # A tibble: 142 × 2
## country n
## <fct> <int>
## 1 Afghanistan 12
## 2 Albania 12
## 3 Algeria 12
## 4 Angola 12
## 5 Argentina 12
## 6 Australia 12
## 7 Austria 12
## 8 Bahrain 12
## 9 Bangladesh 12
## 10 Belgium 12
## # ℹ 132 more rows
gapminder %>% count(continent)
## # A tibble: 5 × 2
## continent n
## <fct> <int>
## 1 Africa 624
## 2 Americas 300
## 3 Asia 396
## 4 Europe 360
## 5 Oceania 24
# 대륙별 평균 기대 수명
gapminder %>% group_by(continent) %>% summarize(m=mean(lifeExp))
## # A tibble: 5 × 2
## continent m
## <fct> <dbl>
## 1 Africa 48.9
## 2 Americas 64.7
## 3 Asia 60.1
## 4 Europe 71.9
## 5 Oceania 74.3
tips<-read.csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
glimpse(tips)
## Rows: 244
## Columns: 7
## $ total_bill <dbl> 16.99, 10.34, 21.01, 23.68, 24.59, 25.29, 8.77, 26.88, 15.0…
## $ tip <dbl> 1.01, 1.66, 3.50, 3.31, 3.61, 4.71, 2.00, 3.12, 1.96, 3.23,…
## $ sex <chr> "Female", "Male", "Male", "Male", "Female", "Male", "Male",…
## $ smoker <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",…
## $ day <chr> "Sun", "Sun", "Sun", "Sun", "Sun", "Sun", "Sun", "Sun", "Su…
## $ time <chr> "Dinner", "Dinner", "Dinner", "Dinner", "Dinner", "Dinner",…
## $ size <int> 2, 3, 3, 2, 4, 4, 2, 4, 2, 2, 2, 4, 2, 4, 2, 2, 3, 3, 3, 3,…
head(tips)
## total_bill tip sex smoker day time size
## 1 16.99 1.01 Female No Sun Dinner 2
## 2 10.34 1.66 Male No Sun Dinner 3
## 3 21.01 3.50 Male No Sun Dinner 3
## 4 23.68 3.31 Male No Sun Dinner 2
## 5 24.59 3.61 Female No Sun Dinner 4
## 6 25.29 4.71 Male No Sun Dinner 4
tips %>% ggplot(aes(size))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

tips %>% ggplot(aes(total_bill,tip))+geom_point()

tips %>% ggplot(aes(total_bill,tip))+geom_point(aes(col=day))

tips %>% ggplot(aes(total_bill,tip))+geom_point(aes(col=day,pch=sex),size=3)

gapminder %>% ggplot(aes(x=year,y=lifeExp,col=continent))+
geom_point(alpha=0.2)+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

x<-gapminder %>% filter(year==1952)
hist(x$lifeExp,main="Histogram of lifeExp in 1952")

x %>% ggplot(aes(continent,lifeExp))+geom_boxplot()

data("ChickWeight")
glimpse(ChickWeight)
## Rows: 578
## Columns: 4
## $ weight <dbl> 42, 51, 59, 64, 76, 93, 106, 125, 149, 171, 199, 205, 40, 49, 5…
## $ Time <dbl> 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 21, 0, 2, 4, 6, 8, 10, 1…
## $ Chick <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ Diet <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
boxplot(weight~Diet, data=ChickWeight, main="Chick Weight by Diet",
xlab="Diet", ylab="Weight")

data(cars)
library(dplyr)
glimpse(cars)
## Rows: 50
## Columns: 2
## $ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13…
## $ dist <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26, 34…
plot(cars)
car_model<-lm(dist~speed,data=cars)
coef(car_model)
## (Intercept) speed
## -17.579095 3.932409
abline(car_model,col='red')

fitted(car_model)
## 1 2 3 4 5 6 7 8
## -1.849460 -1.849460 9.947766 9.947766 13.880175 17.812584 21.744993 21.744993
## 9 10 11 12 13 14 15 16
## 21.744993 25.677401 25.677401 29.609810 29.609810 29.609810 29.609810 33.542219
## 17 18 19 20 21 22 23 24
## 33.542219 33.542219 33.542219 37.474628 37.474628 37.474628 37.474628 41.407036
## 25 26 27 28 29 30 31 32
## 41.407036 41.407036 45.339445 45.339445 49.271854 49.271854 49.271854 53.204263
## 33 34 35 36 37 38 39 40
## 53.204263 53.204263 53.204263 57.136672 57.136672 57.136672 61.069080 61.069080
## 41 42 43 44 45 46 47 48
## 61.069080 61.069080 61.069080 68.933898 72.866307 76.798715 76.798715 76.798715
## 49 50
## 76.798715 80.731124
residuals(car_model)
## 1 2 3 4 5 6 7
## 3.849460 11.849460 -5.947766 12.052234 2.119825 -7.812584 -3.744993
## 8 9 10 11 12 13 14
## 4.255007 12.255007 -8.677401 2.322599 -15.609810 -9.609810 -5.609810
## 15 16 17 18 19 20 21
## -1.609810 -7.542219 0.457781 0.457781 12.457781 -11.474628 -1.474628
## 22 23 24 25 26 27 28
## 22.525372 42.525372 -21.407036 -15.407036 12.592964 -13.339445 -5.339445
## 29 30 31 32 33 34 35
## -17.271854 -9.271854 0.728146 -11.204263 2.795737 22.795737 30.795737
## 36 37 38 39 40 41 42
## -21.136672 -11.136672 10.863328 -29.069080 -13.069080 -9.069080 -5.069080
## 43 44 45 46 47 48 49
## 2.930920 -2.933898 -18.866307 -6.798715 15.201285 16.201285 43.201285
## 50
## 4.268876
nx1<-data.frame(speed=c(21.5))
predict(car_model,nx1)
## 1
## 66.96769
nx<-data.frame(speed=c(21.5,25.0,25.5,26.0,26.5,27.0,28.0))
plot(nx$speed,predict(car_model,nx),col='red',cex=2,pch=20)
abline(car_model)

#install.packages("caret")
#install.packages("mlbench")
library(caret)
## 필요한 패키지를 로딩중입니다: lattice
library(mlbench)
data(Sonar)
glimpse(Sonar)
## Rows: 208
## Columns: 61
## $ V1 <dbl> 0.0200, 0.0453, 0.0262, 0.0100, 0.0762, 0.0286, 0.0317, 0.0519, …
## $ V2 <dbl> 0.0371, 0.0523, 0.0582, 0.0171, 0.0666, 0.0453, 0.0956, 0.0548, …
## $ V3 <dbl> 0.0428, 0.0843, 0.1099, 0.0623, 0.0481, 0.0277, 0.1321, 0.0842, …
## $ V4 <dbl> 0.0207, 0.0689, 0.1083, 0.0205, 0.0394, 0.0174, 0.1408, 0.0319, …
## $ V5 <dbl> 0.0954, 0.1183, 0.0974, 0.0205, 0.0590, 0.0384, 0.1674, 0.1158, …
## $ V6 <dbl> 0.0986, 0.2583, 0.2280, 0.0368, 0.0649, 0.0990, 0.1710, 0.0922, …
## $ V7 <dbl> 0.1539, 0.2156, 0.2431, 0.1098, 0.1209, 0.1201, 0.0731, 0.1027, …
## $ V8 <dbl> 0.1601, 0.3481, 0.3771, 0.1276, 0.2467, 0.1833, 0.1401, 0.0613, …
## $ V9 <dbl> 0.3109, 0.3337, 0.5598, 0.0598, 0.3564, 0.2105, 0.2083, 0.1465, …
## $ V10 <dbl> 0.2111, 0.2872, 0.6194, 0.1264, 0.4459, 0.3039, 0.3513, 0.2838, …
## $ V11 <dbl> 0.1609, 0.4918, 0.6333, 0.0881, 0.4152, 0.2988, 0.1786, 0.2802, …
## $ V12 <dbl> 0.1582, 0.6552, 0.7060, 0.1992, 0.3952, 0.4250, 0.0658, 0.3086, …
## $ V13 <dbl> 0.2238, 0.6919, 0.5544, 0.0184, 0.4256, 0.6343, 0.0513, 0.2657, …
## $ V14 <dbl> 0.0645, 0.7797, 0.5320, 0.2261, 0.4135, 0.8198, 0.3752, 0.3801, …
## $ V15 <dbl> 0.0660, 0.7464, 0.6479, 0.1729, 0.4528, 1.0000, 0.5419, 0.5626, …
## $ V16 <dbl> 0.2273, 0.9444, 0.6931, 0.2131, 0.5326, 0.9988, 0.5440, 0.4376, …
## $ V17 <dbl> 0.3100, 1.0000, 0.6759, 0.0693, 0.7306, 0.9508, 0.5150, 0.2617, …
## $ V18 <dbl> 0.2999, 0.8874, 0.7551, 0.2281, 0.6193, 0.9025, 0.4262, 0.1199, …
## $ V19 <dbl> 0.5078, 0.8024, 0.8929, 0.4060, 0.2032, 0.7234, 0.2024, 0.6676, …
## $ V20 <dbl> 0.4797, 0.7818, 0.8619, 0.3973, 0.4636, 0.5122, 0.4233, 0.9402, …
## $ V21 <dbl> 0.5783, 0.5212, 0.7974, 0.2741, 0.4148, 0.2074, 0.7723, 0.7832, …
## $ V22 <dbl> 0.5071, 0.4052, 0.6737, 0.3690, 0.4292, 0.3985, 0.9735, 0.5352, …
## $ V23 <dbl> 0.4328, 0.3957, 0.4293, 0.5556, 0.5730, 0.5890, 0.9390, 0.6809, …
## $ V24 <dbl> 0.5550, 0.3914, 0.3648, 0.4846, 0.5399, 0.2872, 0.5559, 0.9174, …
## $ V25 <dbl> 0.6711, 0.3250, 0.5331, 0.3140, 0.3161, 0.2043, 0.5268, 0.7613, …
## $ V26 <dbl> 0.6415, 0.3200, 0.2413, 0.5334, 0.2285, 0.5782, 0.6826, 0.8220, …
## $ V27 <dbl> 0.7104, 0.3271, 0.5070, 0.5256, 0.6995, 0.5389, 0.5713, 0.8872, …
## $ V28 <dbl> 0.8080, 0.2767, 0.8533, 0.2520, 1.0000, 0.3750, 0.5429, 0.6091, …
## $ V29 <dbl> 0.6791, 0.4423, 0.6036, 0.2090, 0.7262, 0.3411, 0.2177, 0.2967, …
## $ V30 <dbl> 0.3857, 0.2028, 0.8514, 0.3559, 0.4724, 0.5067, 0.2149, 0.1103, …
## $ V31 <dbl> 0.1307, 0.3788, 0.8512, 0.6260, 0.5103, 0.5580, 0.5811, 0.1318, …
## $ V32 <dbl> 0.2604, 0.2947, 0.5045, 0.7340, 0.5459, 0.4778, 0.6323, 0.0624, …
## $ V33 <dbl> 0.5121, 0.1984, 0.1862, 0.6120, 0.2881, 0.3299, 0.2965, 0.0990, …
## $ V34 <dbl> 0.7547, 0.2341, 0.2709, 0.3497, 0.0981, 0.2198, 0.1873, 0.4006, …
## $ V35 <dbl> 0.8537, 0.1306, 0.4232, 0.3953, 0.1951, 0.1407, 0.2969, 0.3666, …
## $ V36 <dbl> 0.8507, 0.4182, 0.3043, 0.3012, 0.4181, 0.2856, 0.5163, 0.1050, …
## $ V37 <dbl> 0.6692, 0.3835, 0.6116, 0.5408, 0.4604, 0.3807, 0.6153, 0.1915, …
## $ V38 <dbl> 0.6097, 0.1057, 0.6756, 0.8814, 0.3217, 0.4158, 0.4283, 0.3930, …
## $ V39 <dbl> 0.4943, 0.1840, 0.5375, 0.9857, 0.2828, 0.4054, 0.5479, 0.4288, …
## $ V40 <dbl> 0.2744, 0.1970, 0.4719, 0.9167, 0.2430, 0.3296, 0.6133, 0.2546, …
## $ V41 <dbl> 0.0510, 0.1674, 0.4647, 0.6121, 0.1979, 0.2707, 0.5017, 0.1151, …
## $ V42 <dbl> 0.2834, 0.0583, 0.2587, 0.5006, 0.2444, 0.2650, 0.2377, 0.2196, …
## $ V43 <dbl> 0.2825, 0.1401, 0.2129, 0.3210, 0.1847, 0.0723, 0.1957, 0.1879, …
## $ V44 <dbl> 0.4256, 0.1628, 0.2222, 0.3202, 0.0841, 0.1238, 0.1749, 0.1437, …
## $ V45 <dbl> 0.2641, 0.0621, 0.2111, 0.4295, 0.0692, 0.1192, 0.1304, 0.2146, …
## $ V46 <dbl> 0.1386, 0.0203, 0.0176, 0.3654, 0.0528, 0.1089, 0.0597, 0.2360, …
## $ V47 <dbl> 0.1051, 0.0530, 0.1348, 0.2655, 0.0357, 0.0623, 0.1124, 0.1125, …
## $ V48 <dbl> 0.1343, 0.0742, 0.0744, 0.1576, 0.0085, 0.0494, 0.1047, 0.0254, …
## $ V49 <dbl> 0.0383, 0.0409, 0.0130, 0.0681, 0.0230, 0.0264, 0.0507, 0.0285, …
## $ V50 <dbl> 0.0324, 0.0061, 0.0106, 0.0294, 0.0046, 0.0081, 0.0159, 0.0178, …
## $ V51 <dbl> 0.0232, 0.0125, 0.0033, 0.0241, 0.0156, 0.0104, 0.0195, 0.0052, …
## $ V52 <dbl> 0.0027, 0.0084, 0.0232, 0.0121, 0.0031, 0.0045, 0.0201, 0.0081, …
## $ V53 <dbl> 0.0065, 0.0089, 0.0166, 0.0036, 0.0054, 0.0014, 0.0248, 0.0120, …
## $ V54 <dbl> 0.0159, 0.0048, 0.0095, 0.0150, 0.0105, 0.0038, 0.0131, 0.0045, …
## $ V55 <dbl> 0.0072, 0.0094, 0.0180, 0.0085, 0.0110, 0.0013, 0.0070, 0.0121, …
## $ V56 <dbl> 0.0167, 0.0191, 0.0244, 0.0073, 0.0015, 0.0089, 0.0138, 0.0097, …
## $ V57 <dbl> 0.0180, 0.0140, 0.0316, 0.0050, 0.0072, 0.0057, 0.0092, 0.0085, …
## $ V58 <dbl> 0.0084, 0.0049, 0.0164, 0.0044, 0.0048, 0.0027, 0.0143, 0.0047, …
## $ V59 <dbl> 0.0090, 0.0052, 0.0095, 0.0040, 0.0107, 0.0051, 0.0036, 0.0048, …
## $ V60 <dbl> 0.0032, 0.0044, 0.0078, 0.0117, 0.0094, 0.0062, 0.0103, 0.0053, …
## $ Class <fct> R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R…
summary(Sonar)
## V1 V2 V3 V4
## Min. :0.00150 Min. :0.00060 Min. :0.00150 Min. :0.00580
## 1st Qu.:0.01335 1st Qu.:0.01645 1st Qu.:0.01895 1st Qu.:0.02438
## Median :0.02280 Median :0.03080 Median :0.03430 Median :0.04405
## Mean :0.02916 Mean :0.03844 Mean :0.04383 Mean :0.05389
## 3rd Qu.:0.03555 3rd Qu.:0.04795 3rd Qu.:0.05795 3rd Qu.:0.06450
## Max. :0.13710 Max. :0.23390 Max. :0.30590 Max. :0.42640
## V5 V6 V7 V8
## Min. :0.00670 Min. :0.01020 Min. :0.0033 Min. :0.00550
## 1st Qu.:0.03805 1st Qu.:0.06703 1st Qu.:0.0809 1st Qu.:0.08042
## Median :0.06250 Median :0.09215 Median :0.1070 Median :0.11210
## Mean :0.07520 Mean :0.10457 Mean :0.1217 Mean :0.13480
## 3rd Qu.:0.10028 3rd Qu.:0.13412 3rd Qu.:0.1540 3rd Qu.:0.16960
## Max. :0.40100 Max. :0.38230 Max. :0.3729 Max. :0.45900
## V9 V10 V11 V12
## Min. :0.00750 Min. :0.0113 Min. :0.0289 Min. :0.0236
## 1st Qu.:0.09703 1st Qu.:0.1113 1st Qu.:0.1293 1st Qu.:0.1335
## Median :0.15225 Median :0.1824 Median :0.2248 Median :0.2490
## Mean :0.17800 Mean :0.2083 Mean :0.2360 Mean :0.2502
## 3rd Qu.:0.23342 3rd Qu.:0.2687 3rd Qu.:0.3016 3rd Qu.:0.3312
## Max. :0.68280 Max. :0.7106 Max. :0.7342 Max. :0.7060
## V13 V14 V15 V16
## Min. :0.0184 Min. :0.0273 Min. :0.0031 Min. :0.0162
## 1st Qu.:0.1661 1st Qu.:0.1752 1st Qu.:0.1646 1st Qu.:0.1963
## Median :0.2640 Median :0.2811 Median :0.2817 Median :0.3047
## Mean :0.2733 Mean :0.2966 Mean :0.3202 Mean :0.3785
## 3rd Qu.:0.3513 3rd Qu.:0.3862 3rd Qu.:0.4529 3rd Qu.:0.5357
## Max. :0.7131 Max. :0.9970 Max. :1.0000 Max. :0.9988
## V17 V18 V19 V20
## Min. :0.0349 Min. :0.0375 Min. :0.0494 Min. :0.0656
## 1st Qu.:0.2059 1st Qu.:0.2421 1st Qu.:0.2991 1st Qu.:0.3506
## Median :0.3084 Median :0.3683 Median :0.4350 Median :0.5425
## Mean :0.4160 Mean :0.4523 Mean :0.5048 Mean :0.5630
## 3rd Qu.:0.6594 3rd Qu.:0.6791 3rd Qu.:0.7314 3rd Qu.:0.8093
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V21 V22 V23 V24
## Min. :0.0512 Min. :0.0219 Min. :0.0563 Min. :0.0239
## 1st Qu.:0.3997 1st Qu.:0.4069 1st Qu.:0.4502 1st Qu.:0.5407
## Median :0.6177 Median :0.6649 Median :0.6997 Median :0.6985
## Mean :0.6091 Mean :0.6243 Mean :0.6470 Mean :0.6727
## 3rd Qu.:0.8170 3rd Qu.:0.8320 3rd Qu.:0.8486 3rd Qu.:0.8722
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V25 V26 V27 V28
## Min. :0.0240 Min. :0.0921 Min. :0.0481 Min. :0.0284
## 1st Qu.:0.5258 1st Qu.:0.5442 1st Qu.:0.5319 1st Qu.:0.5348
## Median :0.7211 Median :0.7545 Median :0.7456 Median :0.7319
## Mean :0.6754 Mean :0.6999 Mean :0.7022 Mean :0.6940
## 3rd Qu.:0.8737 3rd Qu.:0.8938 3rd Qu.:0.9171 3rd Qu.:0.9003
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V29 V30 V31 V32
## Min. :0.0144 Min. :0.0613 Min. :0.0482 Min. :0.0404
## 1st Qu.:0.4637 1st Qu.:0.4114 1st Qu.:0.3456 1st Qu.:0.2814
## Median :0.6808 Median :0.6071 Median :0.4904 Median :0.4296
## Mean :0.6421 Mean :0.5809 Mean :0.5045 Mean :0.4390
## 3rd Qu.:0.8521 3rd Qu.:0.7352 3rd Qu.:0.6420 3rd Qu.:0.5803
## Max. :1.0000 Max. :1.0000 Max. :0.9657 Max. :0.9306
## V33 V34 V35 V36
## Min. :0.0477 Min. :0.0212 Min. :0.0223 Min. :0.0080
## 1st Qu.:0.2579 1st Qu.:0.2176 1st Qu.:0.1794 1st Qu.:0.1543
## Median :0.3912 Median :0.3510 Median :0.3127 Median :0.3211
## Mean :0.4172 Mean :0.4032 Mean :0.3926 Mean :0.3848
## 3rd Qu.:0.5561 3rd Qu.:0.5961 3rd Qu.:0.5934 3rd Qu.:0.5565
## Max. :1.0000 Max. :0.9647 Max. :1.0000 Max. :1.0000
## V37 V38 V39 V40
## Min. :0.0351 Min. :0.0383 Min. :0.0371 Min. :0.0117
## 1st Qu.:0.1601 1st Qu.:0.1743 1st Qu.:0.1740 1st Qu.:0.1865
## Median :0.3063 Median :0.3127 Median :0.2835 Median :0.2781
## Mean :0.3638 Mean :0.3397 Mean :0.3258 Mean :0.3112
## 3rd Qu.:0.5189 3rd Qu.:0.4405 3rd Qu.:0.4349 3rd Qu.:0.4244
## Max. :0.9497 Max. :1.0000 Max. :0.9857 Max. :0.9297
## V41 V42 V43 V44
## Min. :0.0360 Min. :0.0056 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1631 1st Qu.:0.1589 1st Qu.:0.1552 1st Qu.:0.1269
## Median :0.2595 Median :0.2451 Median :0.2225 Median :0.1777
## Mean :0.2893 Mean :0.2783 Mean :0.2465 Mean :0.2141
## 3rd Qu.:0.3875 3rd Qu.:0.3842 3rd Qu.:0.3245 3rd Qu.:0.2717
## Max. :0.8995 Max. :0.8246 Max. :0.7733 Max. :0.7762
## V45 V46 V47 V48
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.09448 1st Qu.:0.06855 1st Qu.:0.06425 1st Qu.:0.04512
## Median :0.14800 Median :0.12135 Median :0.10165 Median :0.07810
## Mean :0.19723 Mean :0.16063 Mean :0.12245 Mean :0.09142
## 3rd Qu.:0.23155 3rd Qu.:0.20037 3rd Qu.:0.15443 3rd Qu.:0.12010
## Max. :0.70340 Max. :0.72920 Max. :0.55220 Max. :0.33390
## V49 V50 V51 V52
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.000800
## 1st Qu.:0.02635 1st Qu.:0.01155 1st Qu.:0.008425 1st Qu.:0.007275
## Median :0.04470 Median :0.01790 Median :0.013900 Median :0.011400
## Mean :0.05193 Mean :0.02042 Mean :0.016069 Mean :0.013420
## 3rd Qu.:0.06853 3rd Qu.:0.02527 3rd Qu.:0.020825 3rd Qu.:0.016725
## Max. :0.19810 Max. :0.08250 Max. :0.100400 Max. :0.070900
## V53 V54 V55 V56
## Min. :0.000500 Min. :0.001000 Min. :0.00060 Min. :0.000400
## 1st Qu.:0.005075 1st Qu.:0.005375 1st Qu.:0.00415 1st Qu.:0.004400
## Median :0.009550 Median :0.009300 Median :0.00750 Median :0.006850
## Mean :0.010709 Mean :0.010941 Mean :0.00929 Mean :0.008222
## 3rd Qu.:0.014900 3rd Qu.:0.014500 3rd Qu.:0.01210 3rd Qu.:0.010575
## Max. :0.039000 Max. :0.035200 Max. :0.04470 Max. :0.039400
## V57 V58 V59 V60
## Min. :0.00030 Min. :0.000300 Min. :0.000100 Min. :0.000600
## 1st Qu.:0.00370 1st Qu.:0.003600 1st Qu.:0.003675 1st Qu.:0.003100
## Median :0.00595 Median :0.005800 Median :0.006400 Median :0.005300
## Mean :0.00782 Mean :0.007949 Mean :0.007941 Mean :0.006507
## 3rd Qu.:0.01043 3rd Qu.:0.010350 3rd Qu.:0.010325 3rd Qu.:0.008525
## Max. :0.03550 Max. :0.044000 Max. :0.036400 Max. :0.043900
## Class
## M:111
## R: 97
##
##
##
##
Sonar %>% count(Class)
## Class n
## 1 M 111
## 2 R 97
set.seed(998)
inTraining <- createDataPartition(y=Sonar$Class,p=0.75,list=FALSE)
training <- Sonar[inTraining,]
testing <- Sonar[-inTraining,]
set.seed(825)
NROW(training)
## [1] 157
NROW(testing)
## [1] 51
ucla<-read.csv('https://stats.idre.ucla.edu/stat/data/binary.csv')
library(dplyr)
glimpse(ucla)
## Rows: 400
## Columns: 4
## $ admit <int> 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1…
## $ gre <int> 380, 660, 800, 640, 520, 760, 560, 400, 540, 700, 800, 440, 760,…
## $ gpa <dbl> 3.61, 3.67, 4.00, 3.19, 2.93, 3.00, 2.98, 3.08, 3.39, 3.92, 4.00…
## $ rank <int> 3, 3, 1, 4, 4, 2, 1, 2, 3, 2, 4, 1, 1, 2, 1, 3, 4, 3, 2, 1, 3, 2…
ucla$admit<-as.factor(ucla$admit)
m<-glm(admit~.,data=ucla,family="binomial")
summary(m)
##
## Call:
## glm(formula = admit ~ ., family = "binomial", data = ucla)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.449548 1.132846 -3.045 0.00233 **
## gre 0.002294 0.001092 2.101 0.03564 *
## gpa 0.777014 0.327484 2.373 0.01766 *
## rank -0.560031 0.127137 -4.405 1.06e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.98 on 399 degrees of freedom
## Residual deviance: 459.44 on 396 degrees of freedom
## AIC: 467.44
##
## Number of Fisher Scoring iterations: 4
exp(0.777014)
## [1] 2.174968
s<-data.frame(gre=c(400),gpa=c(3.6),rank=c(3))
predict(m,newdata=s,type='response')
## 1
## 0.1954765