library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
str(flights)
## tibble [336,776 × 19] (S3: tbl_df/tbl/data.frame)
## $ year : int [1:336776] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ month : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
## $ day : int [1:336776] 1 1 1 1 1 1 1 1 1 1 ...
## $ dep_time : int [1:336776] 517 533 542 544 554 554 555 557 557 558 ...
## $ sched_dep_time: int [1:336776] 515 529 540 545 600 558 600 600 600 600 ...
## $ dep_delay : num [1:336776] 2 4 2 -1 -6 -4 -5 -3 -3 -2 ...
## $ arr_time : int [1:336776] 830 850 923 1004 812 740 913 709 838 753 ...
## $ sched_arr_time: int [1:336776] 819 830 850 1022 837 728 854 723 846 745 ...
## $ arr_delay : num [1:336776] 11 20 33 -18 -25 12 19 -14 -8 8 ...
## $ carrier : chr [1:336776] "UA" "UA" "AA" "B6" ...
## $ flight : int [1:336776] 1545 1714 1141 725 461 1696 507 5708 79 301 ...
## $ tailnum : chr [1:336776] "N14228" "N24211" "N619AA" "N804JB" ...
## $ origin : chr [1:336776] "EWR" "LGA" "JFK" "JFK" ...
## $ dest : chr [1:336776] "IAH" "IAH" "MIA" "BQN" ...
## $ air_time : num [1:336776] 227 227 160 183 116 150 158 53 140 138 ...
## $ distance : num [1:336776] 1400 1416 1089 1576 762 ...
## $ hour : num [1:336776] 5 5 5 5 6 5 6 6 6 6 ...
## $ minute : num [1:336776] 15 29 40 45 0 58 0 0 0 0 ...
## $ time_hour : POSIXct[1:336776], format: "2013-01-01 05:00:00" "2013-01-01 05:00:00" ...
head(flights)
## # A tibble: 6 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
CREATE NEW COLUMNS BY COUNT OR GROUP BY
df<-flights%>%mutate(long_flight=(air_time>=6*60))
dim(flights)
## [1] 336776 19
#str(df)
df%>%count(long_flight)
## # A tibble: 3 × 2
## long_flight n
## <lgl> <int>
## 1 FALSE 322630
## 2 TRUE 4716
## 3 NA 9430
GROUP BY
df2<-flights%>%group_by(date=make_date(year,month,day))%>%summarise(flights_n=n(),air_time_median=median(air_time,na.rm=TRUE))%>%ungroup()
df2
## # A tibble: 365 × 3
## date flights_n air_time_median
## <date> <int> <dbl>
## 1 2013-01-01 842 149
## 2 2013-01-02 943 148
## 3 2013-01-03 914 148
## 4 2013-01-04 915 140
## 5 2013-01-05 720 147
## 6 2013-01-06 832 147
## 7 2013-01-07 933 126.
## 8 2013-01-08 899 126.
## 9 2013-01-09 902 135
## 10 2013-01-10 932 126
## # ℹ 355 more rows
SAMPLE
v<-flights%>% slice_sample(prop=0.01)
v
## # A tibble: 3,367 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 5 8 616 629 -13 748 825
## 2 2013 5 31 1842 1835 7 2022 2049
## 3 2013 1 17 954 845 69 1120 1006
## 4 2013 10 18 1559 1550 9 1838 1816
## 5 2013 6 24 1704 1553 71 1846 1709
## 6 2013 9 26 1256 1259 -3 1523 1501
## 7 2013 2 17 2204 2112 52 2318 2224
## 8 2013 12 27 1412 1410 2 1628 1701
## 9 2013 1 8 1645 1645 0 1854 1900
## 10 2013 11 15 1245 1247 -2 1356 1415
## # ℹ 3,357 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
f<-flights%>% slice_sample(prop=1)
f
## # A tibble: 336,776 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 4 9 1450 1350 60 1736 1638
## 2 2013 10 11 1433 1429 4 1724 1741
## 3 2013 7 15 1503 1455 8 1656 1645
## 4 2013 10 6 1439 1445 -6 1601 1629
## 5 2013 2 8 NA 1635 NA NA 1856
## 6 2013 7 17 1505 1503 2 1719 1659
## 7 2013 8 13 1315 1245 30 1415 1404
## 8 2013 4 16 1552 1600 -8 1855 1901
## 9 2013 8 18 1549 1530 19 1735 1715
## 10 2013 1 3 901 900 1 1031 1048
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
h<-flights%>%slice_sample(n=3)
h
## # A tibble: 3 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 8 5 601 601 0 920 915
## 2 2013 10 27 645 650 -5 811 819
## 3 2013 9 7 720 730 -10 804 827
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
GENERATE 3 RANDOM FLIGHTS BASED ON ORIGIN
nn<-flights%>%group_by(origin) %>% slice_sample(n=3)
nn
## # A tibble: 9 × 19
## # Groups: origin [3]
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 11 1543 1555 -12 1754 1810
## 2 2013 12 21 1826 1825 1 2132 2152
## 3 2013 6 2 1724 1450 154 1859 1642
## 4 2013 6 18 553 600 -7 713 712
## 5 2013 10 5 1820 1829 -9 1912 1949
## 6 2013 1 14 1725 1725 0 2031 2040
## 7 2013 4 28 2139 2130 9 2309 2300
## 8 2013 6 1 824 830 -6 948 1015
## 9 2013 8 15 756 802 -6 908 930
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
CREATE DATE
bbb<-flights%>%select(year,month,day)%>%mutate(date=make_date(year,month,day))
bbb
## # A tibble: 336,776 × 4
## year month day date
## <int> <int> <int> <date>
## 1 2013 1 1 2013-01-01
## 2 2013 1 1 2013-01-01
## 3 2013 1 1 2013-01-01
## 4 2013 1 1 2013-01-01
## 5 2013 1 1 2013-01-01
## 6 2013 1 1 2013-01-01
## 7 2013 1 1 2013-01-01
## 8 2013 1 1 2013-01-01
## 9 2013 1 1 2013-01-01
## 10 2013 1 1 2013-01-01
## # ℹ 336,766 more rows
SELECT START_WITH
flights%>% select(starts_with("dep_"))
## # A tibble: 336,776 × 2
## dep_time dep_delay
## <int> <dbl>
## 1 517 2
## 2 533 4
## 3 542 2
## 4 544 -1
## 5 554 -6
## 6 554 -4
## 7 555 -5
## 8 557 -3
## 9 557 -3
## 10 558 -2
## # ℹ 336,766 more rows
flights%>% select(starts_with("dep_"),everything())
## # A tibble: 336,776 × 19
## dep_time dep_delay year month day sched_dep_time arr_time sched_arr_time
## <int> <dbl> <int> <int> <int> <int> <int> <int>
## 1 517 2 2013 1 1 515 830 819
## 2 533 4 2013 1 1 529 850 830
## 3 542 2 2013 1 1 540 923 850
## 4 544 -1 2013 1 1 545 1004 1022
## 5 554 -6 2013 1 1 600 812 837
## 6 554 -4 2013 1 1 558 740 728
## 7 555 -5 2013 1 1 600 913 854
## 8 557 -3 2013 1 1 600 709 723
## 9 557 -3 2013 1 1 600 838 846
## 10 558 -2 2013 1 1 600 753 745
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
flights%>%select(ends_with("hour"))
## # A tibble: 336,776 × 2
## hour time_hour
## <dbl> <dttm>
## 1 5 2013-01-01 05:00:00
## 2 5 2013-01-01 05:00:00
## 3 5 2013-01-01 05:00:00
## 4 5 2013-01-01 05:00:00
## 5 6 2013-01-01 06:00:00
## 6 5 2013-01-01 05:00:00
## 7 6 2013-01-01 06:00:00
## 8 6 2013-01-01 06:00:00
## 9 6 2013-01-01 06:00:00
## 10 6 2013-01-01 06:00:00
## # ℹ 336,766 more rows
flights%>%select(contains("dep"))
## # A tibble: 336,776 × 3
## dep_time sched_dep_time dep_delay
## <int> <int> <dbl>
## 1 517 515 2
## 2 533 529 4
## 3 542 540 2
## 4 544 545 -1
## 5 554 600 -6
## 6 554 558 -4
## 7 555 600 -5
## 8 557 600 -3
## 9 557 600 -3
## 10 558 600 -2
## # ℹ 336,766 more rows
CASE WHEN
flights%>%mutate(origin=case_when(origin=="EWR"~"NEWYORK INTERNATIONAL AIRPORT",
origin=="JFK"~"JOHN KENNEDY",origin=="LGA"~"LAIRPORT"))%>%count(origin)
## # A tibble: 3 × 2
## origin n
## <chr> <int>
## 1 JOHN KENNEDY 111279
## 2 LAIRPORT 104662
## 3 NEWYORK INTERNATIONAL AIRPORT 120835
STR_REPLACE_ALL
#flights%>%mutate(origin=str_replace_all(origin,
#c("^EWR$="NEW YORK","^JFK$"="KENNEDY","^LGA$"="LAGORF")))%>%count(origin)
flights%>%transmute(date=make_date(year,month,day))
## # A tibble: 336,776 × 1
## date
## <date>
## 1 2013-01-01
## 2 2013-01-01
## 3 2013-01-01
## 4 2013-01-01
## 5 2013-01-01
## 6 2013-01-01
## 7 2013-01-01
## 8 2013-01-01
## 9 2013-01-01
## 10 2013-01-01
## # ℹ 336,766 more rows
airlines
## # A tibble: 16 × 2
## carrier name
## <chr> <chr>
## 1 9E Endeavor Air Inc.
## 2 AA American Airlines Inc.
## 3 AS Alaska Airlines Inc.
## 4 B6 JetBlue Airways
## 5 DL Delta Air Lines Inc.
## 6 EV ExpressJet Airlines Inc.
## 7 F9 Frontier Airlines Inc.
## 8 FL AirTran Airways Corporation
## 9 HA Hawaiian Airlines Inc.
## 10 MQ Envoy Air
## 11 OO SkyWest Airlines Inc.
## 12 UA United Air Lines Inc.
## 13 US US Airways Inc.
## 14 VX Virgin America
## 15 WN Southwest Airlines Co.
## 16 YV Mesa Airlines Inc.
str(airlines)
## tibble [16 × 2] (S3: tbl_df/tbl/data.frame)
## $ carrier: chr [1:16] "9E" "AA" "AS" "B6" ...
## $ name : chr [1:16] "Endeavor Air Inc." "American Airlines Inc." "Alaska Airlines Inc." "JetBlue Airways" ...
airlines%>%mutate(names=name%>%str_to_upper())%>%gsub("(INC|CO)\\.?$","")
## Warning in gsub(., "(INC|CO)\\.?$", ""): argument 'pattern' has length > 1 and
## only the first element will be used
## [1] ""
airlines%>%mutate(names=name%>%str_to_upper())%>%str_replace_all("AIR?(LINES|WAYS)?(CORPORATION)?$","")
## Warning in stri_replace_all_regex(string, pattern,
## fix_replacement(replacement), : argument is not an atomic vector; coercing
## [1] "c(\"9E\", \"AA\", \"AS\", \"B6\", \"DL\", \"EV\", \"F9\", \"FL\", \"HA\", \"MQ\", \"OO\", \"UA\", \"US\", \"VX\", \"WN\", \"YV\")"
## [2] "c(\"Endeavor Air Inc.\", \"American Airlines Inc.\", \"Alaska Airlines Inc.\", \"JetBlue Airways\", \"Delta Air Lines Inc.\", \"ExpressJet Airlines Inc.\", \"Frontier Airlines Inc.\", \"AirTran Airways Corporation\", \"Hawaiian Airlines Inc.\", \"Envoy Air\", \"SkyWest Airlines Inc.\", \"United Air Lines Inc.\", \"US Airways Inc.\", \"Virgin America\", \"Southwest Airlines Co.\", \"Mesa Airlines Inc.\")"
## [3] "c(\"ENDEAVOR AIR INC.\", \"AMERICAN AIRLINES INC.\", \"ALASKA AIRLINES INC.\", \"JETBLUE AIRWAYS\", \"DELTA AIR LINES INC.\", \"EXPRESSJET AIRLINES INC.\", \"FRONTIER AIRLINES INC.\", \"AIRTRAN AIRWAYS CORPORATION\", \"HAWAIIAN AIRLINES INC.\", \"ENVOY AIR\", \"SKYWEST AIRLINES INC.\", \"UNITED AIR LINES INC.\", \"US AIRWAYS INC.\", \"VIRGIN AMERICA\", \"SOUTHWEST AIRLINES CO.\", \"MESA AIRLINES INC.\")"
mtcars%>%group_by(cyl)%>%summarise_at(.vars=vars(mpg,disp,qsec),.funs=mean)
## # A tibble: 3 × 4
## cyl mpg disp qsec
## <dbl> <dbl> <dbl> <dbl>
## 1 4 26.7 105. 19.1
## 2 6 19.7 183. 18.0
## 3 8 15.1 353. 16.8
mtcars%>%mutate(cyl=factor(cyl,levels=c(4,6,8),labels=c("4c","3d","6r")))%>%glimpse()
## Rows: 32
## Columns: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <fct> 3d, 3d, 4c, 3d, 6r, 3d, 6r, 4c, 4c, 3d, 3d, 6r, 6r, 6r, 6r, 6r, 6…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…
VISUALIZATION
str(mpg)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
mpg%>%ggplot(aes(displ,cyl))+geom_point(aes(colour=drv,size=trans))
## Warning: Using size for a discrete variable is not advised.
mpg%>%ggplot(aes(cyl,displ))+geom_col()+theme_minimal()
mpg%>%ggplot(aes(displ,cyl))+geom_point()+geom_point(aes(colour=drv,size=trans))+geom_smooth(method=lm,se=0)+facet_wrap(~year,nrow=1)+labs(x="engine size",y="mpg in the city",title="fuel efficiency")+coord_flip()+theme_minimal()
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using formula = 'y ~ x'
ggplot(mpg)+geom_bar(aes(x=class))+ coord_cartesian(ylim=c(5,60))
ggplot(mpg)+geom_bar(aes(x=class))#+ facet_wrap(~year)
f<-mpg%>%group_by(class)%>%count()#%>%arrange(desc(n))
ggplot(mpg)+geom_bar(aes(x=cyl))
ggplot(mpg)+geom_histogram(aes(displ))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mpg<-mpg%>%count(class)%>%mutate(class=fct_reorder(class,n))%>%ggplot(aes(class,n),stat='identity')+geom_col(fill="blue")
mpg
#n<-mpg%>%ggplot(aes(x=class))+geom_bar()
#n
#c<-ggplot(mpg)+geom_bar(aes(x=class,after-stat(100*count/sum(count))))
#c
str(CO2)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 84 obs. of 5 variables:
## $ Plant : Ord.factor w/ 12 levels "Qn1"<"Qn2"<"Qn3"<..: 1 1 1 1 1 1 1 2 2 2 ...
## $ Type : Factor w/ 2 levels "Quebec","Mississippi": 1 1 1 1 1 1 1 1 1 1 ...
## $ Treatment: Factor w/ 2 levels "nonchilled","chilled": 1 1 1 1 1 1 1 1 1 1 ...
## $ conc : num 95 175 250 350 500 675 1000 95 175 250 ...
## $ uptake : num 16 30.4 34.8 37.2 35.3 39.2 39.7 13.6 27.3 37.1 ...
## - attr(*, "formula")=Class 'formula' language uptake ~ conc | Plant
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Treatment * Type
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Ambient carbon dioxide concentration"
## ..$ y: chr "CO2 uptake rate"
## - attr(*, "units")=List of 2
## ..$ x: chr "(uL/L)"
## ..$ y: chr "(umol/m^2 s)"
CO2%>%group_by(Type)%>%count()%>%ggplot(aes(Type,n))+geom_col()
CO2 %>%group_by(Treatment)%>%summarise_at(.vars=vars(conc,uptake),.funs=mean)
## # A tibble: 2 × 3
## Treatment conc uptake
## <fct> <dbl> <dbl>
## 1 nonchilled 435 30.6
## 2 chilled 435 23.8
CO2 %>%group_by(Treatment)%>%summarise_at(.vars=vars(conc,uptake),.funs=mean)%>%ggplot(aes(Treatment,uptake))+geom_col(fill="brown")
CO2 %>%group_by(Treatment)%>%summarise_at(.vars=vars(conc,uptake),.funs=mean)%>%ggplot(aes(conc))+geom_histogram(fill="brown")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
CO2 %>%group_by(Treatment)%>%summarise_at(.vars=vars(conc,uptake),.funs=mean)%>%ggplot(aes(Treatment,conc))+geom_col(fill="green")
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
mtcars$gear<-as.factor(mtcars$gear)
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: Factor w/ 3 levels "3","4","5": 2 2 2 1 1 1 1 2 2 2 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
mtcars%>%group_by(gear)%>%summarise_at(.vars=vars(mpg,qsec,mpg,wt,hp),.funs=mean)
## # A tibble: 3 × 5
## gear mpg qsec wt hp
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 3 16.1 17.7 3.89 176.
## 2 4 24.5 19.0 2.62 89.5
## 3 5 21.4 15.6 2.63 196.
mtcars%>%group_by(cyl)%>%summarise_at(.vars=vars(mpg,qsec),.funs=mean)
## # A tibble: 3 × 3
## cyl mpg qsec
## <dbl> <dbl> <dbl>
## 1 4 26.7 19.1
## 2 6 19.7 18.0
## 3 8 15.1 16.8
p2<-mtcars%>%group_by(gear)%>%summarise_at(.vars=vars(mpg,qsec,hp),.funs=mean)%>%ggplot(aes(gear,hp))+geom_col(fill="grey")
p2<-mtcars%>%group_by(gear)%>%summarise_at(.vars=vars(mpg,qsec,hp),.funs=mean)%>%mutate(gear=fct_reorder(gear,mpg))%>%ggplot(aes(gear,mpg))+geom_col(fill="purple")
p2
p<-mtcars%>%group_by(cyl)%>%summarise_at(.vars=vars(mpg,qsec),.funs=mean)%>%ggplot(aes(cyl,mpg))+geom_col(fill="orange")
p
``` pe<-mtcars%>%group_by(cyl)%>%summarise_at(.vars=vars(mpg,qsec),.funs=mean)%>% ggplot(aes(cyl,qsec))+geom_col(colour=“blue”,fill=“yellow”) pe