── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ggplot(mpg, aes(x = displ, y = hwy)) +geom_point()
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.65044
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : span too small. fewer
data values than degrees of freedom.
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
5.6935
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
0.5065
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 0
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 0.65044
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 4.008
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.25
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
4.008
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
0.708
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 0
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 0.25
geom_boxplot() produces a box-and-whisker plot to summarise the distribution of a set of points.
# This example uses the ChickWeight dataset, which comes with ggplot2# First plotp1 <-ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet, group=Chick)) +geom_line() +ggtitle("Tiêu đề thôi, không gì cả")
# Second plotp2 <-ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet)) +geom_point(alpha=.3) +geom_smooth(alpha=.2, size=1) +ggtitle("Tiêu đề 2 - biểu đồ smooth ")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
# Third plotp3 <-ggplot(subset(ChickWeight, Time==21), aes(x=weight, colour=Diet)) +geom_density() +ggtitle("Tiêu đề 3- Biểu đồ phân phối")
# Fourth plotp4 <-ggplot(subset(ChickWeight, Time==21), aes(x=weight, fill=Diet)) +geom_histogram(colour="black", binwidth=50) +facet_grid(Diet ~ .) +ggtitle("Tiêu đề 4 - biểu đồ phân phối histogram") +theme(legend.position="none") # No legend (redundant in this graph)
gridExtra::grid.arrange(p1,p2,p3,p4, ncol=2)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(mpg, aes(manufacturer, cyl, fill = model )) +geom_bar(stat ="identity") +coord_flip()
mpg |>subset(manufacturer=="toyota"|manufacturer=="audi" ) |>ggplot( aes(manufacturer, cyl, fill = model )) +geom_bar(stat ="identity") +coord_flip()
mpg |>subset(manufacturer!="chevrolet") |>ggplot( aes(manufacturer, cyl, fill = model )) +geom_bar(stat ="identity", position ="dodge") +coord_flip()+facet_wrap(~ year )
X <- openxlsx::read.xlsx("C:\\Users\\Huynh Chuong\\Desktop\\University\\UEL\\Class_QuantMethods\\2024\\Lectures\\Supermarket Transactions.xlsx", sheet ="Data")
head(X, n=20)
Transaction Purchase.Date Customer.ID Gender Marital.Status Homeowner
1 1 40895 7223 F M Y
2 2 40897 7841 M M Y
3 3 40898 8374 F M N
4 4 40898 9619 M M Y
5 5 40899 1900 F S Y
6 6 40899 6696 F M Y
7 7 40900 9673 M S Y
8 8 40902 354 F M Y
9 9 40902 1293 M M Y
10 10 40902 7938 M S N
11 11 40903 9357 F M N
12 12 40903 3097 M M Y
13 13 40903 2741 M S N
14 14 40903 2032 F M N
15 15 40904 6651 M S N
16 16 40904 5230 M M Y
17 17 40904 6161 F S N
18 18 40905 5717 F S N
19 19 40905 354 F M Y
20 20 40905 3458 F S N
Children Annual.Income City State.or.Province Country
1 2 $30K - $50K Los Angeles CA USA
2 5 $70K - $90K Los Angeles CA USA
3 2 $50K - $70K Bremerton WA USA
4 3 $30K - $50K Portland OR USA
5 3 $130K - $150K Beverly Hills CA USA
6 3 $10K - $30K Beverly Hills CA USA
7 2 $30K - $50K Salem OR USA
8 2 $150K + Yakima WA USA
9 3 $10K - $30K Bellingham WA USA
10 1 $50K - $70K San Diego CA USA
11 0 $30K - $50K Beverly Hills CA USA
12 1 $30K - $50K Beverly Hills CA USA
13 3 $70K - $90K Bellingham WA USA
14 3 $10K - $30K Yakima WA USA
15 0 $30K - $50K Portland OR USA
16 3 $30K - $50K Salem OR USA
17 1 $10K - $30K Tacoma WA USA
18 0 $30K - $50K Los Angeles CA USA
19 2 $150K + Yakima WA USA
20 5 $10K - $30K San Diego CA USA
Product.Family Product.Department Product.Category Units.Sold Revenue
1 Food Snack Foods Snack Foods 5 27.38
2 Food Produce Vegetables 5 14.90
3 Food Snack Foods Snack Foods 3 5.52
4 Food Snacks Candy 4 4.44
5 Drink Beverages Carbonated Beverages 4 14.00
6 Food Deli Side Dishes 3 4.37
7 Food Frozen Foods Breakfast Foods 4 13.78
8 Food Canned Foods Canned Soup 6 7.34
9 Non-Consumable Household Cleaning Supplies 1 2.41
10 Non-Consumable Health and Hygiene Pain Relievers 2 8.96
11 Food Snack Foods Snack Foods 3 11.82
12 Food Baking Goods Baking Goods 5 14.45
13 Food Canned Foods Canned Tuna 4 19.18
14 Non-Consumable Household Plastic Products 4 19.50
15 Food Produce Fruit 5 13.06
16 Non-Consumable Health and Hygiene Hygiene 5 12.42
17 Food Frozen Foods Meat 5 17.65
18 Food Snack Foods Snack Foods 3 8.74
19 Drink Dairy Dairy 5 7.75
20 Drink Beverages Drinks 5 6.66
X |>filter(Marital.Status=="M") |>ggally_rowbar( aes(Gender,Product.Category))
ggplot(X, aes(City, Revenue, fill = Gender)) +geom_bar(stat ="identity") +coord_flip()
ggplot(X, aes(City, Revenue, fill = Gender)) +geom_bar(stat ="identity", position ="dodge") +coord_flip()
ggplot(X, aes(City, Revenue, fill = Gender)) +geom_bar(stat ="identity", position ="dodge") +coord_flip() +facet_wrap(~ Gender)
X |>group_by(City, Gender) |>summarise(Revenue =sum(Revenue))
`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.
# A tibble: 46 × 3
# Groups: City [23]
City Gender Revenue
<chr> <chr> <dbl>
1 Acapulco F 2566.
2 Acapulco M 2596.
3 Bellingham F 453.
4 Bellingham M 539.
5 Beverly Hills F 5050.
6 Beverly Hills M 5270.
7 Bremerton F 5270.
8 Bremerton M 5705
9 Camacho F 3643.
10 Camacho M 2154.
# ℹ 36 more rows
`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.
X |>group_by(City, Gender) %>%summarise(Revenue =sum(Revenue, na.rm =TRUE)) %>%ungroup() |>ggplot( aes(Revenue, City, fill = Gender)) +geom_bar(stat ="identity", position ="dodge") +facet_wrap(~ Gender)
`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.
# A tibble: 23 × 3
# Groups: City [23]
City Gender Revenue
<chr> <chr> <dbl>
1 Acapulco M 2596.
2 Bellingham M 539.
3 Beverly Hills M 5270.
4 Bremerton M 5705
5 Camacho F 3643.
6 Guadalajara F 291.
7 Hidalgo F 7361.
8 Los Angeles M 6282.
9 Merida F 4770.
10 Mexico City F 1255.
# ℹ 13 more rows
# A tibble: 6 × 3
City Gender Revenue
<fct> <chr> <dbl>
1 Acapulco F 2566.
2 Acapulco M 2596.
3 Bellingham F 453.
4 Bellingham M 539.
5 Beverly Hills F 5050.
6 Beverly Hills M 5270.
# A tibble: 10 × 4
# Groups: City [5]
City Gender Revenue D
<fct> <chr> <dbl> <dbl>
1 Hidalgo F 7361. 1.86
2 Hidalgo M 3952. 1.86
3 Camacho F 3643. 1.69
4 Camacho M 2154. 1.69
5 Walla Walla F 488. 1.38
6 Walla Walla M 676. 1.38
7 San Andres F 3459. 1.37
8 San Andres M 4748. 1.37
9 Yakima F 2159. 1.35
10 Yakima M 2911. 1.35
# A tibble: 10 × 3
City Gender Revenue
<fct> <chr> <dbl>
1 Camacho F 3643.
2 Camacho M 2154.
3 Hidalgo F 7361.
4 Hidalgo M 3952.
5 San Andres F 3459.
6 San Andres M 4748.
7 Walla Walla F 488.
8 Walla Walla M 676.
9 Yakima F 2159.
10 Yakima M 2911.
Warning: package 'plotly' was built under R version 4.4.1
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
head(X)
Transaction Purchase.Date Customer.ID Gender Marital.Status Homeowner
1 1 40895 7223 F M Y
2 2 40897 7841 M M Y
3 3 40898 8374 F M N
4 4 40898 9619 M M Y
5 5 40899 1900 F S Y
6 6 40899 6696 F M Y
Children Annual.Income City State.or.Province Country Product.Family
1 2 $30K - $50K Los Angeles CA USA Food
2 5 $70K - $90K Los Angeles CA USA Food
3 2 $50K - $70K Bremerton WA USA Food
4 3 $30K - $50K Portland OR USA Food
5 3 $130K - $150K Beverly Hills CA USA Drink
6 3 $10K - $30K Beverly Hills CA USA Food
Product.Department Product.Category Units.Sold Revenue
1 Snack Foods Snack Foods 5 27.38
2 Produce Vegetables 5 14.90
3 Snack Foods Snack Foods 3 5.52
4 Snacks Candy 4 4.44
5 Beverages Carbonated Beverages 4 14.00
6 Deli Side Dishes 3 4.37
X %>%group_by(City, Gender,Product.Family ) %>%mutate(R=sum(Revenue)) -> X2
ggplot(X2,aes(R , City, size = R , color = Product.Family)) +geom_point()
ggplot(X2,aes(R , City, size = R , color = Product.Family)) +geom_point() -> p1