movies_dataset <- read.csv("D:/movies_dataset.csv")

Thông tin cơ bản về dữ liệu

Kích thước bộ dữ liệu

dim(movies_dataset)
## [1] 999999     17

==> Bộ dữ liệu có 999999 quan sát và 17 biến

Tên các biến của bộ dữ liệu

names(movies_dataset)
##  [1] "MovieID"              "Title"                "Genre"               
##  [4] "ReleaseYear"          "ReleaseDate"          "Country"             
##  [7] "BudgetUSD"            "US_BoxOfficeUSD"      "Global_BoxOfficeUSD" 
## [10] "Opening_Day_SalesUSD" "One_Week_SalesUSD"    "IMDbRating"          
## [13] "RottenTomatoesScore"  "NumVotesIMDb"         "NumVotesRT"          
## [16] "Director"             "LeadActor"

Kiểu dữ liệu của các biến trong bộ dữ liệu

sapply(movies_dataset, typeof)
##              MovieID                Title                Genre 
##            "integer"          "character"          "character" 
##          ReleaseYear          ReleaseDate              Country 
##            "integer"          "character"          "character" 
##            BudgetUSD      US_BoxOfficeUSD  Global_BoxOfficeUSD 
##             "double"             "double"             "double" 
## Opening_Day_SalesUSD    One_Week_SalesUSD           IMDbRating 
##             "double"             "double"             "double" 
##  RottenTomatoesScore         NumVotesIMDb           NumVotesRT 
##            "integer"            "integer"            "integer" 
##             Director            LeadActor 
##          "character"          "character"

Thống kê tóm tắt

summary(movies_dataset)
##     MovieID          Title              Genre            ReleaseYear  
##  Min.   :     1   Length:999999      Length:999999      Min.   :1950  
##  1st Qu.:250001   Class :character   Class :character   1st Qu.:1984  
##  Median :500000   Mode  :character   Mode  :character   Median :2001  
##  Mean   :500000                                         Mean   :1998  
##  3rd Qu.:750000                                         3rd Qu.:2014  
##  Max.   :999999                                         Max.   :2025  
##  ReleaseDate          Country            BudgetUSD         US_BoxOfficeUSD    
##  Length:999999      Length:999999      Min.   :   100000   Min.   :4.002e+04  
##  Class :character   Class :character   1st Qu.:  1190511   1st Qu.:1.490e+06  
##  Mode  :character   Mode  :character   Median :  3265790   Median :4.389e+06  
##                                        Mean   :  9802824   Mean   :1.496e+07  
##                                        3rd Qu.:  9002791   3rd Qu.:1.288e+07  
##                                        Max.   :300000000   Max.   :1.018e+09  
##  Global_BoxOfficeUSD Opening_Day_SalesUSD One_Week_SalesUSD     IMDbRating    
##  Min.   :1.000e+05   Min.   :     4050    Min.   :    16507   Min.   : 1.000  
##  1st Qu.:2.762e+06   1st Qu.:   279026    1st Qu.:   738315   1st Qu.: 5.500  
##  Median :8.090e+06   Median :   838722    Median :  2179436   Median : 6.500  
##  Mean   :2.721e+07   Mean   :  2992745    Mean   :  7483442   Mean   : 6.495  
##  3rd Qu.:2.355e+07   3rd Qu.:  2510360    3rd Qu.:  6415143   3rd Qu.: 7.500  
##  Max.   :1.499e+09   Max.   :295751068    Max.   :579555113   Max.   :10.000  
##  RottenTomatoesScore  NumVotesIMDb       NumVotesRT       Director        
##  Min.   :  0.00      Min.   :    100   Min.   :    50   Length:999999     
##  1st Qu.: 53.00      1st Qu.:   1083   1st Qu.:   119   Class :character  
##  Median : 65.00      Median :   2983   Median :   405   Mode  :character  
##  Mean   : 64.78      Mean   :   9137   Mean   :  2032                     
##  3rd Qu.: 77.00      3rd Qu.:   8192   3rd Qu.:  1360                     
##  Max.   :100.00      Max.   :1000000   Max.   :500000                     
##   LeadActor        
##  Length:999999     
##  Class :character  
##  Mode  :character  
##                    
##                    
##