Entregable 2

ncr_ride_bookings <- read.csv("ncr_ride_bookings.csv", 
                              na.strings=c("NA","null"), stringsAsFactors=TRUE)

summary(ncr_ride_bookings)
##          Date              Time               Booking.ID    
##  2024-11-16:   462   17:44:57:    16   "CNR2726142":     3  
##  2024-05-09:   456   19:17:33:    12   "CNR3648267":     3  
##  2024-09-18:   456   10:23:23:    11   "CNR5292943":     3  
##  2024-01-26:   452   11:29:50:    11   "CNR6337479":     3  
##  2024-02-06:   452   15:23:56:    11   "CNR7199036":     3  
##  2024-10-12:   452   17:54:33:    11   "CNR7585544":     3  
##  (Other)   :147270   (Other) :149928   (Other)     :149982  
##                Booking.Status        Customer.ID            Vehicle.Type  
##  Cancelled by Customer:10500   "CID4523979":     3   Auto         :37419  
##  Cancelled by Driver  :27000   "CID5481002":     3   Bike         :22517  
##  Completed            :93000   "CID6468528":     3   eBike        :10557  
##  Incomplete           : 9000   "CID6715450":     3   Go Mini      :29806  
##  No Driver Found      :10500   "CID7828101":     3   Go Sedan     :27141  
##                                "CID8727691":     3   Premier Sedan:18111  
##                                (Other)     :149982   Uber XL      : 4449  
##         Pickup.Location           Drop.Location       Avg.VTAT     
##  Khandsa        :   949   Ashram         :   936   Min.   : 2.000  
##  Barakhamba Road:   946   Basai Dhankot  :   917   1st Qu.: 5.300  
##  Saket          :   931   Lok Kalyan Marg:   916   Median : 8.300  
##  Badarpur       :   921   Narsinghpur    :   913   Mean   : 8.456  
##  Pragati Maidan :   920   Cyber Hub      :   912   3rd Qu.:11.300  
##  Madipur        :   919   Kalkaji        :   912   Max.   :20.000  
##  (Other)        :144414   (Other)        :144494   NA's   :10500   
##     Avg.CTAT     Cancelled.Rides.by.Customer
##  Min.   :10.00   Min.   :1                  
##  1st Qu.:21.60   1st Qu.:1                  
##  Median :28.80   Median :1                  
##  Mean   :29.15   Mean   :1                  
##  3rd Qu.:36.80   3rd Qu.:1                  
##  Max.   :45.00   Max.   :1                  
##  NA's   :48000   NA's   :139500             
##                             Reason.for.cancelling.by.Customer
##  AC is not working                           :  1155         
##  Change of plans                             :  2353         
##  Driver asked to cancel                      :  2295         
##  Driver is not moving towards pickup location:  2335         
##  Wrong Address                               :  2362         
##  NA's                                        :139500         
##                                                              
##  Cancelled.Rides.by.Driver                       Driver.Cancellation.Reason
##  Min.   :1                 Customer related issue             :  6837      
##  1st Qu.:1                 More than permitted people in there:  6686      
##  Median :1                 Personal & Car related issues      :  6726      
##  Mean   :1                 The customer was coughing/sick     :  6751      
##  3rd Qu.:1                 NA's                               :123000      
##  Max.   :1                                                                 
##  NA's   :123000                                                            
##  Incomplete.Rides      Incomplete.Rides.Reason Booking.Value    Ride.Distance  
##  Min.   :1        Customer Demand  :  3040     Min.   :  50.0   Min.   : 1.00  
##  1st Qu.:1        Other Issue      :  2948     1st Qu.: 234.0   1st Qu.:12.46  
##  Median :1        Vehicle Breakdown:  3012     Median : 414.0   Median :23.72  
##  Mean   :1        NA's             :141000     Mean   : 508.3   Mean   :24.64  
##  3rd Qu.:1                                     3rd Qu.: 689.0   3rd Qu.:36.82  
##  Max.   :1                                     Max.   :4277.0   Max.   :50.00  
##  NA's   :141000                                NA's   :48000    NA's   :48000  
##  Driver.Ratings  Customer.Rating     Payment.Method 
##  Min.   :3.000   Min.   :3.000   Cash       :25367  
##  1st Qu.:4.100   1st Qu.:4.200   Credit Card:10209  
##  Median :4.300   Median :4.500   Debit Card : 8239  
##  Mean   :4.231   Mean   :4.405   Uber Wallet:12276  
##  3rd Qu.:4.600   3rd Qu.:4.800   UPI        :45909  
##  Max.   :5.000   Max.   :5.000   NA's       :48000  
##  NA's   :57000   NA's   :57000
nuevosDatos = data.frame(ncr_ride_bookings$Driver.Ratings,
                         ncr_ride_bookings$Customer.Rating)

library(corrplot)
## corrplot 0.95 loaded
y=ncr_ride_bookings$Driver.Ratings
x=ncr_ride_bookings$Customer.Rating
modelo=lm(y ~ x)
summary(modelo)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2324 -0.1320  0.0687  0.3678  0.7696 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.235433   0.014483 292.439   <2e-16 ***
## x           -0.001008   0.003272  -0.308    0.758    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4369 on 92998 degrees of freedom
##   (57000 observations deleted due to missingness)
## Multiple R-squared:  1.021e-06,  Adjusted R-squared:  -9.732e-06 
## F-statistic: 0.09493 on 1 and 92998 DF,  p-value: 0.758
cor(na.omit(nuevosDatos))
##                                   ncr_ride_bookings.Driver.Ratings
## ncr_ride_bookings.Driver.Ratings                       1.000000000
## ncr_ride_bookings.Customer.Rating                     -0.001010351
##                                   ncr_ride_bookings.Customer.Rating
## ncr_ride_bookings.Driver.Ratings                       -0.001010351
## ncr_ride_bookings.Customer.Rating                       1.000000000

R Markdown

hola

library(ggplot2)
ggplot(ncr_ride_bookings, aes(y=Avg.CTAT, x=Payment.Method, fill=Payment.Method))+
  geom_boxplot()
## Warning: Removed 48000 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

BARRAS APILADAS 2 VARIABLES CUANTITATIVAS

You can also embed plots, for example:

table(ncr_ride_bookings$Booking.Status, ncr_ride_bookings$Vehicle.Type)
##                        
##                          Auto  Bike eBike Go Mini Go Sedan Premier Sedan
##   Cancelled by Customer  2680  1575   723    2097     1832          1266
##   Cancelled by Driver    6643  4077  1907    5330     5031          3250
##   Completed             23155 14034  6551   18549    16676         11252
##   Incomplete             2260  1328   630    1815     1642          1063
##   No Driver Found        2681  1503   746    2015     1960          1280
##                        
##                         Uber XL
##   Cancelled by Customer     327
##   Cancelled by Driver       762
##   Completed                2783
##   Incomplete                262
##   No Driver Found           315
library(ggplot2)
ggplot(ncr_ride_bookings, aes(x=Vehicle.Type, fill=Booking.Status))+
  geom_bar()

OTRO PUNTO