getwd()
## [1] "C:/data"
setwd("c:/data")

ls()
## character(0)
rm(list=ls())
ls()
## character(0)
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df<-read.csv("ta1.csv", fileEncoding = "euc-kr")
View(df)

names(df)
## [1] "가해자연령층별.1." "월별.1."           "X2022"            
## [4] "X2022.1"           "X2022.2"
df1 <- df %>% rename(month=월별.1.,사고건수=X2022,사망자수=X2022.1,
              연령층=가해자연령층별.1.,부상자수=X2022.2)

View(df1)

df2 <- df1 %>% slice(-1)

df2 %>% filter(month!="전체") %>% glimpse()
## Rows: 108
## Columns: 5
## $ 연령층   <chr> "20세이하", "20세이하", "20세이하", "20세이하", "20세이하", "…
## $ month    <chr> "1월", "2월", "3월", "4월", "5월", "6월", "7월", "8월", "9월"…
## $ 사고건수 <chr> "435", "357", "473", "580", "713", "668", "639", "557", "609"…
## $ 사망자수 <chr> "6", "2", "3", "7", "6", "5", "6", "7", "8", "8", "8", "6", "…
## $ 부상자수 <chr> "633", "505", "678", "810", "955", "877", "847", "735", "808"…
df2$사고건수<-as.numeric(df2$사고건수)
glimpse(df2)
## Rows: 117
## Columns: 5
## $ 연령층   <chr> "20세이하", "20세이하", "20세이하", "20세이하", "20세이하", "…
## $ month    <chr> "전체", "1월", "2월", "3월", "4월", "5월", "6월", "7월", "8월…
## $ 사고건수 <dbl> 6508, 435, 357, 473, 580, 713, 668, 639, 557, 609, 618, 532, …
## $ 사망자수 <chr> "72", "6", "2", "3", "7", "6", "5", "6", "7", "8", "8", "8", …
## $ 부상자수 <chr> "8863", "633", "505", "678", "810", "955", "877", "847", "735…
#install.packages("gapminder")
library(gapminder)
y <- gapminder %>% group_by(year, continent) %>% summarize(c_pop=sum(pop))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
head(y, 20)
## # A tibble: 20 × 3
## # Groups:   year [4]
##     year continent      c_pop
##    <int> <fct>          <dbl>
##  1  1952 Africa     237640501
##  2  1952 Americas   345152446
##  3  1952 Asia      1395357351
##  4  1952 Europe     418120846
##  5  1952 Oceania     10686006
##  6  1957 Africa     264837738
##  7  1957 Americas   386953916
##  8  1957 Asia      1562780599
##  9  1957 Europe     437890351
## 10  1957 Oceania     11941976
## 11  1962 Africa     296516865
## 12  1962 Americas   433270254
## 13  1962 Asia      1696357182
## 14  1962 Europe     460355155
## 15  1962 Oceania     13283518
## 16  1967 Africa     335289489
## 17  1967 Americas   480746623
## 18  1967 Asia      1905662900
## 19  1967 Europe     481178958
## 20  1967 Oceania     14600414
View(gapminder)

plot(y$year, y$c_pop)

plot(log10(gapminder$gdpPercap),gapminder$lifeExp,col=gapminder$continent)
legend("bottomright",legend=levels(gapminder$continent),
        pch=c(1:length(levels(gapminder$continent))),
        col=c(1:length(levels(y$continent))))

#install.packages("ggplot2")
library(ggplot2)

ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,col=continent,size=pop))+
  geom_point()+scale_x_log10()

scale_x_log10()
## <ScaleContinuousPosition>
##  Range:  
##  Limits:    0 --    1
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,col=continent,size=pop))+
  geom_point(alpha=0.5)+scale_x_log10()

ggplot(gapminder,aes(x=gdpPercap,y=lifeExp,col=continent,size=pop))+
  geom_point(alpha=0.5)+scale_x_log10()+facet_wrap(~year)

gapminder %>% filter(year==1952&continent=="Asia") %>%
  ggplot(aes(reorder(country,pop),pop))+geom_bar(stat='identity')+coord_flip()

gapminder %>% count(continent)
## # A tibble: 5 × 2
##   continent     n
##   <fct>     <int>
## 1 Africa      624
## 2 Americas    300
## 3 Asia        396
## 4 Europe      360
## 5 Oceania      24
gapminder %>% filter(country=='Korea, Rep.')
## # A tibble: 12 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Korea, Rep. Asia       1952    47.5 20947571     1031.
##  2 Korea, Rep. Asia       1957    52.7 22611552     1488.
##  3 Korea, Rep. Asia       1962    55.3 26420307     1536.
##  4 Korea, Rep. Asia       1967    57.7 30131000     2029.
##  5 Korea, Rep. Asia       1972    62.6 33505000     3031.
##  6 Korea, Rep. Asia       1977    64.8 36436000     4657.
##  7 Korea, Rep. Asia       1982    67.1 39326000     5623.
##  8 Korea, Rep. Asia       1987    69.8 41622000     8533.
##  9 Korea, Rep. Asia       1992    72.2 43805450    12104.
## 10 Korea, Rep. Asia       1997    74.6 46173816    15994.
## 11 Korea, Rep. Asia       2002    77.0 47969150    19234.
## 12 Korea, Rep. Asia       2007    78.6 49044790    23348.
#gapminder %>% filter(country=='Korea, Rep.') %>% ggplot(aes(year,lifeExp,
#                                                            ))

data(cars)
glimpse(cars)
## Rows: 50
## Columns: 2
## $ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13…
## $ dist  <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26, 34…
summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
cars1 <- cars %>% mutate(violation=ifelse(cars$speed>15.4,
                                          "fast","slow"))
cars1
##    speed dist violation
## 1      4    2      slow
## 2      4   10      slow
## 3      7    4      slow
## 4      7   22      slow
## 5      8   16      slow
## 6      9   10      slow
## 7     10   18      slow
## 8     10   26      slow
## 9     10   34      slow
## 10    11   17      slow
## 11    11   28      slow
## 12    12   14      slow
## 13    12   20      slow
## 14    12   24      slow
## 15    12   28      slow
## 16    13   26      slow
## 17    13   34      slow
## 18    13   34      slow
## 19    13   46      slow
## 20    14   26      slow
## 21    14   36      slow
## 22    14   60      slow
## 23    14   80      slow
## 24    15   20      slow
## 25    15   26      slow
## 26    15   54      slow
## 27    16   32      fast
## 28    16   40      fast
## 29    17   32      fast
## 30    17   40      fast
## 31    17   50      fast
## 32    18   42      fast
## 33    18   56      fast
## 34    18   76      fast
## 35    18   84      fast
## 36    19   36      fast
## 37    19   46      fast
## 38    19   68      fast
## 39    20   32      fast
## 40    20   48      fast
## 41    20   52      fast
## 42    20   56      fast
## 43    20   64      fast
## 44    22   66      fast
## 45    23   54      fast
## 46    24   70      fast
## 47    24   92      fast
## 48    24   93      fast
## 49    24  120      fast
## 50    25   85      fast
glimpse(cars1)
## Rows: 50
## Columns: 3
## $ speed     <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13…
## $ dist      <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26…
## $ violation <chr> "slow", "slow", "slow", "slow", "slow", "slow", "slow", "slo…
cars1$violation<-factor(cars1$violation)
glimpse(cars1)
## Rows: 50
## Columns: 3
## $ speed     <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13…
## $ dist      <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26…
## $ violation <fct> slow, slow, slow, slow, slow, slow, slow, slow, slow, slow, …
cars1 %>% group_by(violation) %>% summarize(p=mean(speed))
## # A tibble: 2 × 2
##   violation     p
##   <fct>     <dbl>
## 1 fast       19.9
## 2 slow       11.2
#install.packages("hflights")
library(hflights)
summary(hflights)
##       Year          Month          DayofMonth      DayOfWeek        DepTime    
##  Min.   :2011   Min.   : 1.000   Min.   : 1.00   Min.   :1.000   Min.   :   1  
##  1st Qu.:2011   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.:2.000   1st Qu.:1021  
##  Median :2011   Median : 7.000   Median :16.00   Median :4.000   Median :1416  
##  Mean   :2011   Mean   : 6.514   Mean   :15.74   Mean   :3.948   Mean   :1396  
##  3rd Qu.:2011   3rd Qu.: 9.000   3rd Qu.:23.00   3rd Qu.:6.000   3rd Qu.:1801  
##  Max.   :2011   Max.   :12.000   Max.   :31.00   Max.   :7.000   Max.   :2400  
##                                                                  NA's   :2905  
##     ArrTime     UniqueCarrier        FlightNum      TailNum         
##  Min.   :   1   Length:227496      Min.   :   1   Length:227496     
##  1st Qu.:1215   Class :character   1st Qu.: 855   Class :character  
##  Median :1617   Mode  :character   Median :1696   Mode  :character  
##  Mean   :1578                      Mean   :1962                     
##  3rd Qu.:1953                      3rd Qu.:2755                     
##  Max.   :2400                      Max.   :7290                     
##  NA's   :3066                                                       
##  ActualElapsedTime    AirTime         ArrDelay          DepDelay      
##  Min.   : 34.0     Min.   : 11.0   Min.   :-70.000   Min.   :-33.000  
##  1st Qu.: 77.0     1st Qu.: 58.0   1st Qu.: -8.000   1st Qu.: -3.000  
##  Median :128.0     Median :107.0   Median :  0.000   Median :  0.000  
##  Mean   :129.3     Mean   :108.1   Mean   :  7.094   Mean   :  9.445  
##  3rd Qu.:165.0     3rd Qu.:141.0   3rd Qu.: 11.000   3rd Qu.:  9.000  
##  Max.   :575.0     Max.   :549.0   Max.   :978.000   Max.   :981.000  
##  NA's   :3622      NA's   :3622    NA's   :3622      NA's   :2905     
##     Origin              Dest              Distance          TaxiIn       
##  Length:227496      Length:227496      Min.   :  79.0   Min.   :  1.000  
##  Class :character   Class :character   1st Qu.: 376.0   1st Qu.:  4.000  
##  Mode  :character   Mode  :character   Median : 809.0   Median :  5.000  
##                                        Mean   : 787.8   Mean   :  6.099  
##                                        3rd Qu.:1042.0   3rd Qu.:  7.000  
##                                        Max.   :3904.0   Max.   :165.000  
##                                                         NA's   :3066     
##     TaxiOut         Cancelled       CancellationCode      Diverted       
##  Min.   :  1.00   Min.   :0.00000   Length:227496      Min.   :0.000000  
##  1st Qu.: 10.00   1st Qu.:0.00000   Class :character   1st Qu.:0.000000  
##  Median : 14.00   Median :0.00000   Mode  :character   Median :0.000000  
##  Mean   : 15.09   Mean   :0.01307                      Mean   :0.002853  
##  3rd Qu.: 18.00   3rd Qu.:0.00000                      3rd Qu.:0.000000  
##  Max.   :163.00   Max.   :1.00000                      Max.   :1.000000  
##  NA's   :2947
glimpse(hflights)
## Rows: 227,496
## Columns: 21
## $ Year              <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011…
## $ Month             <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth        <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ DayOfWeek         <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2…
## $ DepTime           <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1443…
## $ ArrTime           <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1554…
## $ UniqueCarrier     <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"…
## $ FlightNum         <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428, 42…
## $ TailNum           <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA", "N…
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 63, …
## $ AirTime           <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 44, …
## $ ArrDelay          <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -9, …
## $ DepDelay          <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -3, …
## $ Origin            <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IA…
## $ Dest              <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DF…
## $ Distance          <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 22…
## $ TaxiIn            <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12, 8,…
## $ TaxiOut           <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13, 15…
## $ Cancelled         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode  <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", …
## $ Diverted          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
hflights %>% filter(Month==2|Month==8) %>% dim()
## [1] 37304    21
glimpse(hflights)
## Rows: 227,496
## Columns: 21
## $ Year              <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011…
## $ Month             <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ DayofMonth        <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ DayOfWeek         <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2…
## $ DepTime           <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 1355, 1443…
## $ ArrTime           <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 1454, 1554…
## $ UniqueCarrier     <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA"…
## $ FlightNum         <int> 428, 428, 428, 428, 428, 428, 428, 428, 428, 428, 42…
## $ TailNum           <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N492AA", "N…
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, 56, 63, …
## $ AirTime           <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, 41, 44, …
## $ ArrDelay          <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29, 5, -9, …
## $ DepDelay          <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, -2, -3, …
## $ Origin            <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "IA…
## $ Dest              <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "DF…
## $ Distance          <int> 224, 224, 224, 224, 224, 224, 224, 224, 224, 224, 22…
## $ TaxiIn            <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6, 12, 8,…
## $ TaxiOut           <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11, 13, 15…
## $ Cancelled         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ CancellationCode  <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", …
## $ Diverted          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
hflights %>% count(Dest)
##     Dest    n
## 1    ABQ 2812
## 2    AEX  724
## 3    AGS    1
## 4    AMA 1297
## 5    ANC  125
## 6    ASE  125
## 7    ATL 7886
## 8    AUS 5022
## 9    AVL  350
## 10   BFL  504
## 11   BHM 2736
## 12   BKG  110
## 13   BNA 3481
## 14   BOS 1752
## 15   BPT    3
## 16   BRO 1692
## 17   BTR 1762
## 18   BWI 2551
## 19   CAE  561
## 20   CHS 1200
## 21   CID  410
## 22   CLE 2140
## 23   CLT 4735
## 24   CMH 1348
## 25   COS 1657
## 26   CRP 4813
## 27   CRW  357
## 28   CVG 1535
## 29   DAL 9820
## 30   DAY  451
## 31   DCA 2699
## 32   DEN 5920
## 33   DFW 6653
## 34   DSM  647
## 35   DTW 2601
## 36   ECP  729
## 37   EGE  110
## 38   ELP 3036
## 39   EWR 4314
## 40   FLL 2462
## 41   GJT  403
## 42   GPT 1618
## 43   GRK   42
## 44   GRR  677
## 45   GSO  630
## 46   GSP 1123
## 47   GUC   86
## 48   HDN  110
## 49   HNL  402
## 50   HOB  309
## 51   HRL 3983
## 52   HSV  923
## 53   IAD 1980
## 54   ICT 1517
## 55   IND 1750
## 56   JAN 2011
## 57   JAX 2135
## 58   JFK  695
## 59   LAS 4082
## 60   LAX 6064
## 61   LBB 1333
## 62   LCH  364
## 63   LEX  584
## 64   LFT 2313
## 65   LGA 2730
## 66   LIT 1579
## 67   LRD 1188
## 68   MAF 2306
## 69   MCI 3174
## 70   MCO 3687
## 71   MDW 2094
## 72   MEM 2399
## 73   MFE 1128
## 74   MIA 2463
## 75   MKE 1588
## 76   MLU  292
## 77   MOB 1674
## 78   MSP 2010
## 79   MSY 6823
## 80   MTJ  164
## 81   OAK  690
## 82   OKC 3170
## 83   OMA 2044
## 84   ONT  952
## 85   ORD 5748
## 86   ORF  717
## 87   PBI 1253
## 88   PDX 1235
## 89   PHL 2367
## 90   PHX 5096
## 91   PIT 1664
## 92   PNS 1539
## 93   PSP  106
## 94   RDU 1740
## 95   RIC  900
## 96   RNO  243
## 97   RSW  948
## 98   SAN 2936
## 99   SAT 4893
## 100  SAV  863
## 101  SDF 1279
## 102  SEA 2615
## 103  SFO 2818
## 104  SHV  787
## 105  SJC  885
## 106  SJU  391
## 107  SLC 2033
## 108  SMF 1014
## 109  SNA 1661
## 110  STL 2509
## 111  TPA 3085
## 112  TUL 2924
## 113  TUS 1565
## 114  TYS 1210
## 115  VPS  880
## 116  XNA 1172
hflights %>% count(Dest) %>% arrange(n)
##     Dest    n
## 1    AGS    1
## 2    BPT    3
## 3    GRK   42
## 4    GUC   86
## 5    PSP  106
## 6    BKG  110
## 7    EGE  110
## 8    HDN  110
## 9    ANC  125
## 10   ASE  125
## 11   MTJ  164
## 12   RNO  243
## 13   MLU  292
## 14   HOB  309
## 15   AVL  350
## 16   CRW  357
## 17   LCH  364
## 18   SJU  391
## 19   HNL  402
## 20   GJT  403
## 21   CID  410
## 22   DAY  451
## 23   BFL  504
## 24   CAE  561
## 25   LEX  584
## 26   GSO  630
## 27   DSM  647
## 28   GRR  677
## 29   OAK  690
## 30   JFK  695
## 31   ORF  717
## 32   AEX  724
## 33   ECP  729
## 34   SHV  787
## 35   SAV  863
## 36   VPS  880
## 37   SJC  885
## 38   RIC  900
## 39   HSV  923
## 40   RSW  948
## 41   ONT  952
## 42   SMF 1014
## 43   GSP 1123
## 44   MFE 1128
## 45   XNA 1172
## 46   LRD 1188
## 47   CHS 1200
## 48   TYS 1210
## 49   PDX 1235
## 50   PBI 1253
## 51   SDF 1279
## 52   AMA 1297
## 53   LBB 1333
## 54   CMH 1348
## 55   ICT 1517
## 56   CVG 1535
## 57   PNS 1539
## 58   TUS 1565
## 59   LIT 1579
## 60   MKE 1588
## 61   GPT 1618
## 62   COS 1657
## 63   SNA 1661
## 64   PIT 1664
## 65   MOB 1674
## 66   BRO 1692
## 67   RDU 1740
## 68   IND 1750
## 69   BOS 1752
## 70   BTR 1762
## 71   IAD 1980
## 72   MSP 2010
## 73   JAN 2011
## 74   SLC 2033
## 75   OMA 2044
## 76   MDW 2094
## 77   JAX 2135
## 78   CLE 2140
## 79   MAF 2306
## 80   LFT 2313
## 81   PHL 2367
## 82   MEM 2399
## 83   FLL 2462
## 84   MIA 2463
## 85   STL 2509
## 86   BWI 2551
## 87   DTW 2601
## 88   SEA 2615
## 89   DCA 2699
## 90   LGA 2730
## 91   BHM 2736
## 92   ABQ 2812
## 93   SFO 2818
## 94   TUL 2924
## 95   SAN 2936
## 96   ELP 3036
## 97   TPA 3085
## 98   OKC 3170
## 99   MCI 3174
## 100  BNA 3481
## 101  MCO 3687
## 102  HRL 3983
## 103  LAS 4082
## 104  EWR 4314
## 105  CLT 4735
## 106  CRP 4813
## 107  SAT 4893
## 108  AUS 5022
## 109  PHX 5096
## 110  ORD 5748
## 111  DEN 5920
## 112  LAX 6064
## 113  DFW 6653
## 114  MSY 6823
## 115  ATL 7886
## 116  DAL 9820
hflights %>% count(Dest) %>% arrange(desc(n))
##     Dest    n
## 1    DAL 9820
## 2    ATL 7886
## 3    MSY 6823
## 4    DFW 6653
## 5    LAX 6064
## 6    DEN 5920
## 7    ORD 5748
## 8    PHX 5096
## 9    AUS 5022
## 10   SAT 4893
## 11   CRP 4813
## 12   CLT 4735
## 13   EWR 4314
## 14   LAS 4082
## 15   HRL 3983
## 16   MCO 3687
## 17   BNA 3481
## 18   MCI 3174
## 19   OKC 3170
## 20   TPA 3085
## 21   ELP 3036
## 22   SAN 2936
## 23   TUL 2924
## 24   SFO 2818
## 25   ABQ 2812
## 26   BHM 2736
## 27   LGA 2730
## 28   DCA 2699
## 29   SEA 2615
## 30   DTW 2601
## 31   BWI 2551
## 32   STL 2509
## 33   MIA 2463
## 34   FLL 2462
## 35   MEM 2399
## 36   PHL 2367
## 37   LFT 2313
## 38   MAF 2306
## 39   CLE 2140
## 40   JAX 2135
## 41   MDW 2094
## 42   OMA 2044
## 43   SLC 2033
## 44   JAN 2011
## 45   MSP 2010
## 46   IAD 1980
## 47   BTR 1762
## 48   BOS 1752
## 49   IND 1750
## 50   RDU 1740
## 51   BRO 1692
## 52   MOB 1674
## 53   PIT 1664
## 54   SNA 1661
## 55   COS 1657
## 56   GPT 1618
## 57   MKE 1588
## 58   LIT 1579
## 59   TUS 1565
## 60   PNS 1539
## 61   CVG 1535
## 62   ICT 1517
## 63   CMH 1348
## 64   LBB 1333
## 65   AMA 1297
## 66   SDF 1279
## 67   PBI 1253
## 68   PDX 1235
## 69   TYS 1210
## 70   CHS 1200
## 71   LRD 1188
## 72   XNA 1172
## 73   MFE 1128
## 74   GSP 1123
## 75   SMF 1014
## 76   ONT  952
## 77   RSW  948
## 78   HSV  923
## 79   RIC  900
## 80   SJC  885
## 81   VPS  880
## 82   SAV  863
## 83   SHV  787
## 84   ECP  729
## 85   AEX  724
## 86   ORF  717
## 87   JFK  695
## 88   OAK  690
## 89   GRR  677
## 90   DSM  647
## 91   GSO  630
## 92   LEX  584
## 93   CAE  561
## 94   BFL  504
## 95   DAY  451
## 96   CID  410
## 97   GJT  403
## 98   HNL  402
## 99   SJU  391
## 100  LCH  364
## 101  CRW  357
## 102  AVL  350
## 103  HOB  309
## 104  MLU  292
## 105  RNO  243
## 106  MTJ  164
## 107  ANC  125
## 108  ASE  125
## 109  BKG  110
## 110  EGE  110
## 111  HDN  110
## 112  PSP  106
## 113  GUC   86
## 114  GRK   42
## 115  BPT    3
## 116  AGS    1
# gapminder data
library(gapminder)
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country   <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
gapminder %>% count(country)
## # A tibble: 142 × 2
##    country         n
##    <fct>       <int>
##  1 Afghanistan    12
##  2 Albania        12
##  3 Algeria        12
##  4 Angola         12
##  5 Argentina      12
##  6 Australia      12
##  7 Austria        12
##  8 Bahrain        12
##  9 Bangladesh     12
## 10 Belgium        12
## # ℹ 132 more rows
gapminder %>% count(continent)
## # A tibble: 5 × 2
##   continent     n
##   <fct>     <int>
## 1 Africa      624
## 2 Americas    300
## 3 Asia        396
## 4 Europe      360
## 5 Oceania      24
# 대륙별 평균 기대 수명
gapminder %>% group_by(continent) %>% summarize(m=mean(lifeExp))
## # A tibble: 5 × 2
##   continent     m
##   <fct>     <dbl>
## 1 Africa     48.9
## 2 Americas   64.7
## 3 Asia       60.1
## 4 Europe     71.9
## 5 Oceania    74.3
tips<-read.csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
glimpse(tips)
## Rows: 244
## Columns: 7
## $ total_bill <dbl> 16.99, 10.34, 21.01, 23.68, 24.59, 25.29, 8.77, 26.88, 15.0…
## $ tip        <dbl> 1.01, 1.66, 3.50, 3.31, 3.61, 4.71, 2.00, 3.12, 1.96, 3.23,…
## $ sex        <chr> "Female", "Male", "Male", "Male", "Female", "Male", "Male",…
## $ smoker     <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No",…
## $ day        <chr> "Sun", "Sun", "Sun", "Sun", "Sun", "Sun", "Sun", "Sun", "Su…
## $ time       <chr> "Dinner", "Dinner", "Dinner", "Dinner", "Dinner", "Dinner",…
## $ size       <int> 2, 3, 3, 2, 4, 4, 2, 4, 2, 2, 2, 4, 2, 4, 2, 2, 3, 3, 3, 3,…
head(tips)
##   total_bill  tip    sex smoker day   time size
## 1      16.99 1.01 Female     No Sun Dinner    2
## 2      10.34 1.66   Male     No Sun Dinner    3
## 3      21.01 3.50   Male     No Sun Dinner    3
## 4      23.68 3.31   Male     No Sun Dinner    2
## 5      24.59 3.61 Female     No Sun Dinner    4
## 6      25.29 4.71   Male     No Sun Dinner    4
tips %>% ggplot(aes(size))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

tips %>% ggplot(aes(total_bill,tip))+geom_point()

tips %>% ggplot(aes(total_bill,tip))+geom_point(aes(col=day))

tips %>% ggplot(aes(total_bill,tip))+geom_point(aes(col=day,pch=sex),size=3)

gapminder %>% ggplot(aes(x=year,y=lifeExp,col=continent))+
  geom_point(alpha=0.2)+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

x<-gapminder %>% filter(year==1952)
hist(x$lifeExp,main="Histogram of lifeExp in 1952")

x %>% ggplot(aes(continent,lifeExp))+geom_boxplot()

data("ChickWeight")
glimpse(ChickWeight)
## Rows: 578
## Columns: 4
## $ weight <dbl> 42, 51, 59, 64, 76, 93, 106, 125, 149, 171, 199, 205, 40, 49, 5…
## $ Time   <dbl> 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 21, 0, 2, 4, 6, 8, 10, 1…
## $ Chick  <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ Diet   <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
boxplot(weight~Diet, data=ChickWeight, main="Chick Weight by Diet",
        xlab="Diet", ylab="Weight")

data(cars)
library(dplyr)
glimpse(cars)
## Rows: 50
## Columns: 2
## $ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13…
## $ dist  <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26, 34…
plot(cars)

car_model<-lm(dist~speed,data=cars)
coef(car_model)
## (Intercept)       speed 
##  -17.579095    3.932409
abline(car_model,col='red')

fitted(car_model)
##         1         2         3         4         5         6         7         8 
## -1.849460 -1.849460  9.947766  9.947766 13.880175 17.812584 21.744993 21.744993 
##         9        10        11        12        13        14        15        16 
## 21.744993 25.677401 25.677401 29.609810 29.609810 29.609810 29.609810 33.542219 
##        17        18        19        20        21        22        23        24 
## 33.542219 33.542219 33.542219 37.474628 37.474628 37.474628 37.474628 41.407036 
##        25        26        27        28        29        30        31        32 
## 41.407036 41.407036 45.339445 45.339445 49.271854 49.271854 49.271854 53.204263 
##        33        34        35        36        37        38        39        40 
## 53.204263 53.204263 53.204263 57.136672 57.136672 57.136672 61.069080 61.069080 
##        41        42        43        44        45        46        47        48 
## 61.069080 61.069080 61.069080 68.933898 72.866307 76.798715 76.798715 76.798715 
##        49        50 
## 76.798715 80.731124
residuals(car_model)
##          1          2          3          4          5          6          7 
##   3.849460  11.849460  -5.947766  12.052234   2.119825  -7.812584  -3.744993 
##          8          9         10         11         12         13         14 
##   4.255007  12.255007  -8.677401   2.322599 -15.609810  -9.609810  -5.609810 
##         15         16         17         18         19         20         21 
##  -1.609810  -7.542219   0.457781   0.457781  12.457781 -11.474628  -1.474628 
##         22         23         24         25         26         27         28 
##  22.525372  42.525372 -21.407036 -15.407036  12.592964 -13.339445  -5.339445 
##         29         30         31         32         33         34         35 
## -17.271854  -9.271854   0.728146 -11.204263   2.795737  22.795737  30.795737 
##         36         37         38         39         40         41         42 
## -21.136672 -11.136672  10.863328 -29.069080 -13.069080  -9.069080  -5.069080 
##         43         44         45         46         47         48         49 
##   2.930920  -2.933898 -18.866307  -6.798715  15.201285  16.201285  43.201285 
##         50 
##   4.268876
nx1<-data.frame(speed=c(21.5))
predict(car_model,nx1)
##        1 
## 66.96769
nx<-data.frame(speed=c(21.5,25.0,25.5,26.0,26.5,27.0,28.0))
plot(nx$speed,predict(car_model,nx),col='red',cex=2,pch=20)
abline(car_model)

#install.packages("caret")
#install.packages("mlbench")
library(caret)
## 필요한 패키지를 로딩중입니다: lattice
library(mlbench)
data(Sonar)
glimpse(Sonar)
## Rows: 208
## Columns: 61
## $ V1    <dbl> 0.0200, 0.0453, 0.0262, 0.0100, 0.0762, 0.0286, 0.0317, 0.0519, …
## $ V2    <dbl> 0.0371, 0.0523, 0.0582, 0.0171, 0.0666, 0.0453, 0.0956, 0.0548, …
## $ V3    <dbl> 0.0428, 0.0843, 0.1099, 0.0623, 0.0481, 0.0277, 0.1321, 0.0842, …
## $ V4    <dbl> 0.0207, 0.0689, 0.1083, 0.0205, 0.0394, 0.0174, 0.1408, 0.0319, …
## $ V5    <dbl> 0.0954, 0.1183, 0.0974, 0.0205, 0.0590, 0.0384, 0.1674, 0.1158, …
## $ V6    <dbl> 0.0986, 0.2583, 0.2280, 0.0368, 0.0649, 0.0990, 0.1710, 0.0922, …
## $ V7    <dbl> 0.1539, 0.2156, 0.2431, 0.1098, 0.1209, 0.1201, 0.0731, 0.1027, …
## $ V8    <dbl> 0.1601, 0.3481, 0.3771, 0.1276, 0.2467, 0.1833, 0.1401, 0.0613, …
## $ V9    <dbl> 0.3109, 0.3337, 0.5598, 0.0598, 0.3564, 0.2105, 0.2083, 0.1465, …
## $ V10   <dbl> 0.2111, 0.2872, 0.6194, 0.1264, 0.4459, 0.3039, 0.3513, 0.2838, …
## $ V11   <dbl> 0.1609, 0.4918, 0.6333, 0.0881, 0.4152, 0.2988, 0.1786, 0.2802, …
## $ V12   <dbl> 0.1582, 0.6552, 0.7060, 0.1992, 0.3952, 0.4250, 0.0658, 0.3086, …
## $ V13   <dbl> 0.2238, 0.6919, 0.5544, 0.0184, 0.4256, 0.6343, 0.0513, 0.2657, …
## $ V14   <dbl> 0.0645, 0.7797, 0.5320, 0.2261, 0.4135, 0.8198, 0.3752, 0.3801, …
## $ V15   <dbl> 0.0660, 0.7464, 0.6479, 0.1729, 0.4528, 1.0000, 0.5419, 0.5626, …
## $ V16   <dbl> 0.2273, 0.9444, 0.6931, 0.2131, 0.5326, 0.9988, 0.5440, 0.4376, …
## $ V17   <dbl> 0.3100, 1.0000, 0.6759, 0.0693, 0.7306, 0.9508, 0.5150, 0.2617, …
## $ V18   <dbl> 0.2999, 0.8874, 0.7551, 0.2281, 0.6193, 0.9025, 0.4262, 0.1199, …
## $ V19   <dbl> 0.5078, 0.8024, 0.8929, 0.4060, 0.2032, 0.7234, 0.2024, 0.6676, …
## $ V20   <dbl> 0.4797, 0.7818, 0.8619, 0.3973, 0.4636, 0.5122, 0.4233, 0.9402, …
## $ V21   <dbl> 0.5783, 0.5212, 0.7974, 0.2741, 0.4148, 0.2074, 0.7723, 0.7832, …
## $ V22   <dbl> 0.5071, 0.4052, 0.6737, 0.3690, 0.4292, 0.3985, 0.9735, 0.5352, …
## $ V23   <dbl> 0.4328, 0.3957, 0.4293, 0.5556, 0.5730, 0.5890, 0.9390, 0.6809, …
## $ V24   <dbl> 0.5550, 0.3914, 0.3648, 0.4846, 0.5399, 0.2872, 0.5559, 0.9174, …
## $ V25   <dbl> 0.6711, 0.3250, 0.5331, 0.3140, 0.3161, 0.2043, 0.5268, 0.7613, …
## $ V26   <dbl> 0.6415, 0.3200, 0.2413, 0.5334, 0.2285, 0.5782, 0.6826, 0.8220, …
## $ V27   <dbl> 0.7104, 0.3271, 0.5070, 0.5256, 0.6995, 0.5389, 0.5713, 0.8872, …
## $ V28   <dbl> 0.8080, 0.2767, 0.8533, 0.2520, 1.0000, 0.3750, 0.5429, 0.6091, …
## $ V29   <dbl> 0.6791, 0.4423, 0.6036, 0.2090, 0.7262, 0.3411, 0.2177, 0.2967, …
## $ V30   <dbl> 0.3857, 0.2028, 0.8514, 0.3559, 0.4724, 0.5067, 0.2149, 0.1103, …
## $ V31   <dbl> 0.1307, 0.3788, 0.8512, 0.6260, 0.5103, 0.5580, 0.5811, 0.1318, …
## $ V32   <dbl> 0.2604, 0.2947, 0.5045, 0.7340, 0.5459, 0.4778, 0.6323, 0.0624, …
## $ V33   <dbl> 0.5121, 0.1984, 0.1862, 0.6120, 0.2881, 0.3299, 0.2965, 0.0990, …
## $ V34   <dbl> 0.7547, 0.2341, 0.2709, 0.3497, 0.0981, 0.2198, 0.1873, 0.4006, …
## $ V35   <dbl> 0.8537, 0.1306, 0.4232, 0.3953, 0.1951, 0.1407, 0.2969, 0.3666, …
## $ V36   <dbl> 0.8507, 0.4182, 0.3043, 0.3012, 0.4181, 0.2856, 0.5163, 0.1050, …
## $ V37   <dbl> 0.6692, 0.3835, 0.6116, 0.5408, 0.4604, 0.3807, 0.6153, 0.1915, …
## $ V38   <dbl> 0.6097, 0.1057, 0.6756, 0.8814, 0.3217, 0.4158, 0.4283, 0.3930, …
## $ V39   <dbl> 0.4943, 0.1840, 0.5375, 0.9857, 0.2828, 0.4054, 0.5479, 0.4288, …
## $ V40   <dbl> 0.2744, 0.1970, 0.4719, 0.9167, 0.2430, 0.3296, 0.6133, 0.2546, …
## $ V41   <dbl> 0.0510, 0.1674, 0.4647, 0.6121, 0.1979, 0.2707, 0.5017, 0.1151, …
## $ V42   <dbl> 0.2834, 0.0583, 0.2587, 0.5006, 0.2444, 0.2650, 0.2377, 0.2196, …
## $ V43   <dbl> 0.2825, 0.1401, 0.2129, 0.3210, 0.1847, 0.0723, 0.1957, 0.1879, …
## $ V44   <dbl> 0.4256, 0.1628, 0.2222, 0.3202, 0.0841, 0.1238, 0.1749, 0.1437, …
## $ V45   <dbl> 0.2641, 0.0621, 0.2111, 0.4295, 0.0692, 0.1192, 0.1304, 0.2146, …
## $ V46   <dbl> 0.1386, 0.0203, 0.0176, 0.3654, 0.0528, 0.1089, 0.0597, 0.2360, …
## $ V47   <dbl> 0.1051, 0.0530, 0.1348, 0.2655, 0.0357, 0.0623, 0.1124, 0.1125, …
## $ V48   <dbl> 0.1343, 0.0742, 0.0744, 0.1576, 0.0085, 0.0494, 0.1047, 0.0254, …
## $ V49   <dbl> 0.0383, 0.0409, 0.0130, 0.0681, 0.0230, 0.0264, 0.0507, 0.0285, …
## $ V50   <dbl> 0.0324, 0.0061, 0.0106, 0.0294, 0.0046, 0.0081, 0.0159, 0.0178, …
## $ V51   <dbl> 0.0232, 0.0125, 0.0033, 0.0241, 0.0156, 0.0104, 0.0195, 0.0052, …
## $ V52   <dbl> 0.0027, 0.0084, 0.0232, 0.0121, 0.0031, 0.0045, 0.0201, 0.0081, …
## $ V53   <dbl> 0.0065, 0.0089, 0.0166, 0.0036, 0.0054, 0.0014, 0.0248, 0.0120, …
## $ V54   <dbl> 0.0159, 0.0048, 0.0095, 0.0150, 0.0105, 0.0038, 0.0131, 0.0045, …
## $ V55   <dbl> 0.0072, 0.0094, 0.0180, 0.0085, 0.0110, 0.0013, 0.0070, 0.0121, …
## $ V56   <dbl> 0.0167, 0.0191, 0.0244, 0.0073, 0.0015, 0.0089, 0.0138, 0.0097, …
## $ V57   <dbl> 0.0180, 0.0140, 0.0316, 0.0050, 0.0072, 0.0057, 0.0092, 0.0085, …
## $ V58   <dbl> 0.0084, 0.0049, 0.0164, 0.0044, 0.0048, 0.0027, 0.0143, 0.0047, …
## $ V59   <dbl> 0.0090, 0.0052, 0.0095, 0.0040, 0.0107, 0.0051, 0.0036, 0.0048, …
## $ V60   <dbl> 0.0032, 0.0044, 0.0078, 0.0117, 0.0094, 0.0062, 0.0103, 0.0053, …
## $ Class <fct> R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R…
summary(Sonar)
##        V1                V2                V3                V4         
##  Min.   :0.00150   Min.   :0.00060   Min.   :0.00150   Min.   :0.00580  
##  1st Qu.:0.01335   1st Qu.:0.01645   1st Qu.:0.01895   1st Qu.:0.02438  
##  Median :0.02280   Median :0.03080   Median :0.03430   Median :0.04405  
##  Mean   :0.02916   Mean   :0.03844   Mean   :0.04383   Mean   :0.05389  
##  3rd Qu.:0.03555   3rd Qu.:0.04795   3rd Qu.:0.05795   3rd Qu.:0.06450  
##  Max.   :0.13710   Max.   :0.23390   Max.   :0.30590   Max.   :0.42640  
##        V5                V6                V7               V8         
##  Min.   :0.00670   Min.   :0.01020   Min.   :0.0033   Min.   :0.00550  
##  1st Qu.:0.03805   1st Qu.:0.06703   1st Qu.:0.0809   1st Qu.:0.08042  
##  Median :0.06250   Median :0.09215   Median :0.1070   Median :0.11210  
##  Mean   :0.07520   Mean   :0.10457   Mean   :0.1217   Mean   :0.13480  
##  3rd Qu.:0.10028   3rd Qu.:0.13412   3rd Qu.:0.1540   3rd Qu.:0.16960  
##  Max.   :0.40100   Max.   :0.38230   Max.   :0.3729   Max.   :0.45900  
##        V9               V10              V11              V12        
##  Min.   :0.00750   Min.   :0.0113   Min.   :0.0289   Min.   :0.0236  
##  1st Qu.:0.09703   1st Qu.:0.1113   1st Qu.:0.1293   1st Qu.:0.1335  
##  Median :0.15225   Median :0.1824   Median :0.2248   Median :0.2490  
##  Mean   :0.17800   Mean   :0.2083   Mean   :0.2360   Mean   :0.2502  
##  3rd Qu.:0.23342   3rd Qu.:0.2687   3rd Qu.:0.3016   3rd Qu.:0.3312  
##  Max.   :0.68280   Max.   :0.7106   Max.   :0.7342   Max.   :0.7060  
##       V13              V14              V15              V16        
##  Min.   :0.0184   Min.   :0.0273   Min.   :0.0031   Min.   :0.0162  
##  1st Qu.:0.1661   1st Qu.:0.1752   1st Qu.:0.1646   1st Qu.:0.1963  
##  Median :0.2640   Median :0.2811   Median :0.2817   Median :0.3047  
##  Mean   :0.2733   Mean   :0.2966   Mean   :0.3202   Mean   :0.3785  
##  3rd Qu.:0.3513   3rd Qu.:0.3862   3rd Qu.:0.4529   3rd Qu.:0.5357  
##  Max.   :0.7131   Max.   :0.9970   Max.   :1.0000   Max.   :0.9988  
##       V17              V18              V19              V20        
##  Min.   :0.0349   Min.   :0.0375   Min.   :0.0494   Min.   :0.0656  
##  1st Qu.:0.2059   1st Qu.:0.2421   1st Qu.:0.2991   1st Qu.:0.3506  
##  Median :0.3084   Median :0.3683   Median :0.4350   Median :0.5425  
##  Mean   :0.4160   Mean   :0.4523   Mean   :0.5048   Mean   :0.5630  
##  3rd Qu.:0.6594   3rd Qu.:0.6791   3rd Qu.:0.7314   3rd Qu.:0.8093  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##       V21              V22              V23              V24        
##  Min.   :0.0512   Min.   :0.0219   Min.   :0.0563   Min.   :0.0239  
##  1st Qu.:0.3997   1st Qu.:0.4069   1st Qu.:0.4502   1st Qu.:0.5407  
##  Median :0.6177   Median :0.6649   Median :0.6997   Median :0.6985  
##  Mean   :0.6091   Mean   :0.6243   Mean   :0.6470   Mean   :0.6727  
##  3rd Qu.:0.8170   3rd Qu.:0.8320   3rd Qu.:0.8486   3rd Qu.:0.8722  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##       V25              V26              V27              V28        
##  Min.   :0.0240   Min.   :0.0921   Min.   :0.0481   Min.   :0.0284  
##  1st Qu.:0.5258   1st Qu.:0.5442   1st Qu.:0.5319   1st Qu.:0.5348  
##  Median :0.7211   Median :0.7545   Median :0.7456   Median :0.7319  
##  Mean   :0.6754   Mean   :0.6999   Mean   :0.7022   Mean   :0.6940  
##  3rd Qu.:0.8737   3rd Qu.:0.8938   3rd Qu.:0.9171   3rd Qu.:0.9003  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##       V29              V30              V31              V32        
##  Min.   :0.0144   Min.   :0.0613   Min.   :0.0482   Min.   :0.0404  
##  1st Qu.:0.4637   1st Qu.:0.4114   1st Qu.:0.3456   1st Qu.:0.2814  
##  Median :0.6808   Median :0.6071   Median :0.4904   Median :0.4296  
##  Mean   :0.6421   Mean   :0.5809   Mean   :0.5045   Mean   :0.4390  
##  3rd Qu.:0.8521   3rd Qu.:0.7352   3rd Qu.:0.6420   3rd Qu.:0.5803  
##  Max.   :1.0000   Max.   :1.0000   Max.   :0.9657   Max.   :0.9306  
##       V33              V34              V35              V36        
##  Min.   :0.0477   Min.   :0.0212   Min.   :0.0223   Min.   :0.0080  
##  1st Qu.:0.2579   1st Qu.:0.2176   1st Qu.:0.1794   1st Qu.:0.1543  
##  Median :0.3912   Median :0.3510   Median :0.3127   Median :0.3211  
##  Mean   :0.4172   Mean   :0.4032   Mean   :0.3926   Mean   :0.3848  
##  3rd Qu.:0.5561   3rd Qu.:0.5961   3rd Qu.:0.5934   3rd Qu.:0.5565  
##  Max.   :1.0000   Max.   :0.9647   Max.   :1.0000   Max.   :1.0000  
##       V37              V38              V39              V40        
##  Min.   :0.0351   Min.   :0.0383   Min.   :0.0371   Min.   :0.0117  
##  1st Qu.:0.1601   1st Qu.:0.1743   1st Qu.:0.1740   1st Qu.:0.1865  
##  Median :0.3063   Median :0.3127   Median :0.2835   Median :0.2781  
##  Mean   :0.3638   Mean   :0.3397   Mean   :0.3258   Mean   :0.3112  
##  3rd Qu.:0.5189   3rd Qu.:0.4405   3rd Qu.:0.4349   3rd Qu.:0.4244  
##  Max.   :0.9497   Max.   :1.0000   Max.   :0.9857   Max.   :0.9297  
##       V41              V42              V43              V44        
##  Min.   :0.0360   Min.   :0.0056   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.1631   1st Qu.:0.1589   1st Qu.:0.1552   1st Qu.:0.1269  
##  Median :0.2595   Median :0.2451   Median :0.2225   Median :0.1777  
##  Mean   :0.2893   Mean   :0.2783   Mean   :0.2465   Mean   :0.2141  
##  3rd Qu.:0.3875   3rd Qu.:0.3842   3rd Qu.:0.3245   3rd Qu.:0.2717  
##  Max.   :0.8995   Max.   :0.8246   Max.   :0.7733   Max.   :0.7762  
##       V45               V46               V47               V48         
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.09448   1st Qu.:0.06855   1st Qu.:0.06425   1st Qu.:0.04512  
##  Median :0.14800   Median :0.12135   Median :0.10165   Median :0.07810  
##  Mean   :0.19723   Mean   :0.16063   Mean   :0.12245   Mean   :0.09142  
##  3rd Qu.:0.23155   3rd Qu.:0.20037   3rd Qu.:0.15443   3rd Qu.:0.12010  
##  Max.   :0.70340   Max.   :0.72920   Max.   :0.55220   Max.   :0.33390  
##       V49               V50               V51                V52          
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000000   Min.   :0.000800  
##  1st Qu.:0.02635   1st Qu.:0.01155   1st Qu.:0.008425   1st Qu.:0.007275  
##  Median :0.04470   Median :0.01790   Median :0.013900   Median :0.011400  
##  Mean   :0.05193   Mean   :0.02042   Mean   :0.016069   Mean   :0.013420  
##  3rd Qu.:0.06853   3rd Qu.:0.02527   3rd Qu.:0.020825   3rd Qu.:0.016725  
##  Max.   :0.19810   Max.   :0.08250   Max.   :0.100400   Max.   :0.070900  
##       V53                V54                V55               V56          
##  Min.   :0.000500   Min.   :0.001000   Min.   :0.00060   Min.   :0.000400  
##  1st Qu.:0.005075   1st Qu.:0.005375   1st Qu.:0.00415   1st Qu.:0.004400  
##  Median :0.009550   Median :0.009300   Median :0.00750   Median :0.006850  
##  Mean   :0.010709   Mean   :0.010941   Mean   :0.00929   Mean   :0.008222  
##  3rd Qu.:0.014900   3rd Qu.:0.014500   3rd Qu.:0.01210   3rd Qu.:0.010575  
##  Max.   :0.039000   Max.   :0.035200   Max.   :0.04470   Max.   :0.039400  
##       V57               V58                V59                V60          
##  Min.   :0.00030   Min.   :0.000300   Min.   :0.000100   Min.   :0.000600  
##  1st Qu.:0.00370   1st Qu.:0.003600   1st Qu.:0.003675   1st Qu.:0.003100  
##  Median :0.00595   Median :0.005800   Median :0.006400   Median :0.005300  
##  Mean   :0.00782   Mean   :0.007949   Mean   :0.007941   Mean   :0.006507  
##  3rd Qu.:0.01043   3rd Qu.:0.010350   3rd Qu.:0.010325   3rd Qu.:0.008525  
##  Max.   :0.03550   Max.   :0.044000   Max.   :0.036400   Max.   :0.043900  
##  Class  
##  M:111  
##  R: 97  
##         
##         
##         
## 
Sonar %>% count(Class)
##   Class   n
## 1     M 111
## 2     R  97
set.seed(998)
inTraining <- createDataPartition(y=Sonar$Class,p=0.75,list=FALSE)
training <- Sonar[inTraining,]
testing <- Sonar[-inTraining,]
set.seed(825)

NROW(training)
## [1] 157
NROW(testing)
## [1] 51
ucla<-read.csv('https://stats.idre.ucla.edu/stat/data/binary.csv')
library(dplyr)
glimpse(ucla)
## Rows: 400
## Columns: 4
## $ admit <int> 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1…
## $ gre   <int> 380, 660, 800, 640, 520, 760, 560, 400, 540, 700, 800, 440, 760,…
## $ gpa   <dbl> 3.61, 3.67, 4.00, 3.19, 2.93, 3.00, 2.98, 3.08, 3.39, 3.92, 4.00…
## $ rank  <int> 3, 3, 1, 4, 4, 2, 1, 2, 3, 2, 4, 1, 1, 2, 1, 3, 4, 3, 2, 1, 3, 2…
ucla$admit<-as.factor(ucla$admit)
m<-glm(admit~.,data=ucla,family="binomial")
summary(m)
## 
## Call:
## glm(formula = admit ~ ., family = "binomial", data = ucla)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.449548   1.132846  -3.045  0.00233 ** 
## gre          0.002294   0.001092   2.101  0.03564 *  
## gpa          0.777014   0.327484   2.373  0.01766 *  
## rank        -0.560031   0.127137  -4.405 1.06e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 499.98  on 399  degrees of freedom
## Residual deviance: 459.44  on 396  degrees of freedom
## AIC: 467.44
## 
## Number of Fisher Scoring iterations: 4
exp(0.777014)
## [1] 2.174968
s<-data.frame(gre=c(400),gpa=c(3.6),rank=c(3))
predict(m,newdata=s,type='response')
##         1 
## 0.1954765