rm(list = ls())
gc()      
##           used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells  543004 29.0    1201317 64.2         NA   700245 37.4
## Vcells 1002482  7.7    8388608 64.0      16384  1963257 15.0
cat("\f")   
test <- read.csv("~/Desktop/Day 4/test.csv")
test <- na.omit(test)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
?stargazer
stargazer(test, type = "text")
## 
## ================================================
## Statistic    N    Mean    St. Dev.  Min    Max  
## ------------------------------------------------
## PassengerId 331 1,100.233 122.910   892   1,307 
## Pclass      331   2.142    0.846     1      3   
## Age         331  30.181    14.105  0.170 76.000 
## SibSp       331   0.483    0.875     0      8   
## Parch       331   0.399    0.812     0      6   
## Fare        331  40.982    61.229  0.000 512.329
## ------------------------------------------------

We can see that the maximum fare is 512.329 dollar while th mean of fare is 40.982, so there are some people who bought tickets with very high price and make the mean higher.

?boxplot
layout(mat = matrix(c(1,2),2,1, byrow=TRUE),  height = c(1,8))

dev.new(width = 10, height = 5)
boxplot(test$Fare, 
        horizontal = TRUE,  
        ylim       = c(0, 512.329), 
        col        = rgb(0.8, 0.8, 0, 0.5), 
        frame      = F,
        main       = "Boxplot of Passenger Fare",
        xlab       = "Fare"
)
?hist
hist(test$Fare , 
     breaks  = 10 , 
     col     = rgb(0.2,0.8,0.5,0.5) , 
     border  = F , 
     main    = "" , 
     xlab    = "Fare", 
     xlim    = c(0,512.329)
     )