tieuluan <- read.csv("D:/TMT_R/Retail Transaction export 2025-10-04 16-35-57.csv")
A <- tieuluan
#Thông tin
str(tieuluan)
## 'data.frame':    100000 obs. of  10 variables:
##  $ CustomerID        : int  109318 993229 579675 799826 121413 463050 888163 843385 839609 184135 ...
##  $ ProductID         : chr  "C" "C" "A" "D" ...
##  $ Quantity          : int  7 4 8 5 7 3 7 8 5 4 ...
##  $ Price             : num  80.1 75.2 31.5 98.9 93.2 ...
##  $ TransactionDate   : chr  "2023-12-26 12:32:00.000" "2023-08-05 00:00:00.000" "2024-03-11 18:51:00.000" "2023-10-27 22:00:00.000" ...
##  $ PaymentMethod     : chr  "Cash" "Cash" "Cash" "PayPal" ...
##  $ StoreLocation     : chr  "176 Andrew Cliffs\nBaileyfort, HI 93354" "11635 William Well Suite 809\nEast Kara, MT 19483" "910 Mendez Ville Suite 909\nPort Lauraland, MO 99563" "87522 Sharon Corners Suite 500\nLake Tammy, MO 76234" ...
##  $ ProductCategory   : chr  "Books" "Home Decor" "Books" "Books" ...
##  $ DiscountApplied...: num  18.68 14.12 15.94 6.69 4.03 ...
##  $ TotalAmount       : num  456 258 212 461 626 ...
summary(tieuluan)
##    CustomerID      ProductID            Quantity         Price       
##  Min.   :    14   Length:100000      Min.   :1.000   Min.   : 10.00  
##  1st Qu.:250694   Class :character   1st Qu.:3.000   1st Qu.: 32.55  
##  Median :499679   Mode  :character   Median :5.000   Median : 55.12  
##  Mean   :500464                      Mean   :5.009   Mean   : 55.07  
##  3rd Qu.:751105                      3rd Qu.:7.000   3rd Qu.: 77.46  
##  Max.   :999997                      Max.   :9.000   Max.   :100.00  
##  TransactionDate    PaymentMethod      StoreLocation      ProductCategory   
##  Length:100000      Length:100000      Length:100000      Length:100000     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  DiscountApplied...   TotalAmount     
##  Min.   : 0.000046   Min.   :  8.275  
##  1st Qu.: 5.001013   1st Qu.: 95.163  
##  Median :10.030353   Median :200.368  
##  Mean   :10.020155   Mean   :248.335  
##  3rd Qu.:15.018367   3rd Qu.:362.010  
##  Max.   :19.999585   Max.   :896.141
nrow(tieuluan)
## [1] 100000
ncol(tieuluan)
## [1] 10
dim(tieuluan)
## [1] 100000     10
names(tieuluan)
##  [1] "CustomerID"         "ProductID"          "Quantity"          
##  [4] "Price"              "TransactionDate"    "PaymentMethod"     
##  [7] "StoreLocation"      "ProductCategory"    "DiscountApplied..."
## [10] "TotalAmount"

2. Phân tổ các biến

2.1 Phân tổ biến Price theo mức giá

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
tieuluan <- tieuluan %>%
  mutate(
    mucgia = case_when(
      is.na(Price) ~ NA_character_,                     # Nếu Price bị thiếu
      Price < 20.000 ~ "Rất rẻ",
      Price >= 20.000 & Price < 40.000 ~ "Rẻ",
      Price >= 40.000 & Price < 60.000 ~ "Trung bình",
      Price >= 60.000 & Price < 80.000 ~ "Mắc",
      Price >= 80.000 ~ "Rất mắc"
    )
  )

2.2 Phân loại

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.