Entregable 2
ncr_ride_bookings <- read.csv("ncr_ride_bookings.csv",
na.strings=c("NA","null"), stringsAsFactors=TRUE)
summary(ncr_ride_bookings)
## Date Time Booking.ID
## 2024-11-16: 462 17:44:57: 16 "CNR2726142": 3
## 2024-05-09: 456 19:17:33: 12 "CNR3648267": 3
## 2024-09-18: 456 10:23:23: 11 "CNR5292943": 3
## 2024-01-26: 452 11:29:50: 11 "CNR6337479": 3
## 2024-02-06: 452 15:23:56: 11 "CNR7199036": 3
## 2024-10-12: 452 17:54:33: 11 "CNR7585544": 3
## (Other) :147270 (Other) :149928 (Other) :149982
## Booking.Status Customer.ID Vehicle.Type
## Cancelled by Customer:10500 "CID4523979": 3 Auto :37419
## Cancelled by Driver :27000 "CID5481002": 3 Bike :22517
## Completed :93000 "CID6468528": 3 eBike :10557
## Incomplete : 9000 "CID6715450": 3 Go Mini :29806
## No Driver Found :10500 "CID7828101": 3 Go Sedan :27141
## "CID8727691": 3 Premier Sedan:18111
## (Other) :149982 Uber XL : 4449
## Pickup.Location Drop.Location Avg.VTAT
## Khandsa : 949 Ashram : 936 Min. : 2.000
## Barakhamba Road: 946 Basai Dhankot : 917 1st Qu.: 5.300
## Saket : 931 Lok Kalyan Marg: 916 Median : 8.300
## Badarpur : 921 Narsinghpur : 913 Mean : 8.456
## Pragati Maidan : 920 Cyber Hub : 912 3rd Qu.:11.300
## Madipur : 919 Kalkaji : 912 Max. :20.000
## (Other) :144414 (Other) :144494 NA's :10500
## Avg.CTAT Cancelled.Rides.by.Customer
## Min. :10.00 Min. :1
## 1st Qu.:21.60 1st Qu.:1
## Median :28.80 Median :1
## Mean :29.15 Mean :1
## 3rd Qu.:36.80 3rd Qu.:1
## Max. :45.00 Max. :1
## NA's :48000 NA's :139500
## Reason.for.cancelling.by.Customer
## AC is not working : 1155
## Change of plans : 2353
## Driver asked to cancel : 2295
## Driver is not moving towards pickup location: 2335
## Wrong Address : 2362
## NA's :139500
##
## Cancelled.Rides.by.Driver Driver.Cancellation.Reason
## Min. :1 Customer related issue : 6837
## 1st Qu.:1 More than permitted people in there: 6686
## Median :1 Personal & Car related issues : 6726
## Mean :1 The customer was coughing/sick : 6751
## 3rd Qu.:1 NA's :123000
## Max. :1
## NA's :123000
## Incomplete.Rides Incomplete.Rides.Reason Booking.Value Ride.Distance
## Min. :1 Customer Demand : 3040 Min. : 50.0 Min. : 1.00
## 1st Qu.:1 Other Issue : 2948 1st Qu.: 234.0 1st Qu.:12.46
## Median :1 Vehicle Breakdown: 3012 Median : 414.0 Median :23.72
## Mean :1 NA's :141000 Mean : 508.3 Mean :24.64
## 3rd Qu.:1 3rd Qu.: 689.0 3rd Qu.:36.82
## Max. :1 Max. :4277.0 Max. :50.00
## NA's :141000 NA's :48000 NA's :48000
## Driver.Ratings Customer.Rating Payment.Method
## Min. :3.000 Min. :3.000 Cash :25367
## 1st Qu.:4.100 1st Qu.:4.200 Credit Card:10209
## Median :4.300 Median :4.500 Debit Card : 8239
## Mean :4.231 Mean :4.405 Uber Wallet:12276
## 3rd Qu.:4.600 3rd Qu.:4.800 UPI :45909
## Max. :5.000 Max. :5.000 NA's :48000
## NA's :57000 NA's :57000
nuevosDatos = data.frame(ncr_ride_bookings$Driver.Ratings,
ncr_ride_bookings$Customer.Rating)
library(corrplot)
## corrplot 0.95 loaded
y=ncr_ride_bookings$Driver.Ratings
x=ncr_ride_bookings$Customer.Rating
modelo=lm(y ~ x)
summary(modelo)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2324 -0.1320 0.0687 0.3678 0.7696
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.235433 0.014483 292.439 <2e-16 ***
## x -0.001008 0.003272 -0.308 0.758
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4369 on 92998 degrees of freedom
## (57000 observations deleted due to missingness)
## Multiple R-squared: 1.021e-06, Adjusted R-squared: -9.732e-06
## F-statistic: 0.09493 on 1 and 92998 DF, p-value: 0.758
cor(na.omit(nuevosDatos))
## ncr_ride_bookings.Driver.Ratings
## ncr_ride_bookings.Driver.Ratings 1.000000000
## ncr_ride_bookings.Customer.Rating -0.001010351
## ncr_ride_bookings.Customer.Rating
## ncr_ride_bookings.Driver.Ratings -0.001010351
## ncr_ride_bookings.Customer.Rating 1.000000000
hola
library(ggplot2)
ggplot(ncr_ride_bookings, aes(y=Avg.CTAT, x=Payment.Method, fill=Payment.Method))+
geom_boxplot()
## Warning: Removed 48000 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
You can also embed plots, for example:
table(ncr_ride_bookings$Booking.Status, ncr_ride_bookings$Vehicle.Type)
##
## Auto Bike eBike Go Mini Go Sedan Premier Sedan
## Cancelled by Customer 2680 1575 723 2097 1832 1266
## Cancelled by Driver 6643 4077 1907 5330 5031 3250
## Completed 23155 14034 6551 18549 16676 11252
## Incomplete 2260 1328 630 1815 1642 1063
## No Driver Found 2681 1503 746 2015 1960 1280
##
## Uber XL
## Cancelled by Customer 327
## Cancelled by Driver 762
## Completed 2783
## Incomplete 262
## No Driver Found 315
library(ggplot2)
ggplot(ncr_ride_bookings, aes(x=Vehicle.Type, fill=Booking.Status))+
geom_bar()
OTRO PUNTO