*.

dataset<- read.csv(file = "~/Desktop/berlin-airbnb-data/listings_summary.csv", head= TRUE)
print(head(dataset,1))
##     id                       listing_url    scrape_id last_scraped
## 1 2015 https://www.airbnb.com/rooms/2015 2.018111e+13   2018-11-07
##                                               name
## 1 Berlin-Mitte Value! Quiet courtyard/very central
##                                                                                                                                                                                                                                                                                                                                                                                                                          summary
## 1 Great location!  30 of 75 sq meters. This wood floored/high ceiling typical Berlin "Altbau" section of an apartment consists of 1 simple large room, a small kitchen and a bathroom + shower. The apartment is in Mitte, close to Prenzlauer Berg/Mauerpark. Perfect for short visits, singles or couples. Your section is closed from the rest of the bigger flat wich is not noticeable. You will not be sharing your space.
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      space
## 1 A+++ location! This „Einliegerwohnung“ is an extention of a larger apartment with a separate entrance, bathroom and kitchen. The door to the rest of the apartment is soundproof, hidden, locked and barely noticable (behind mirror in pictures). Your 30 sq meters are facing a quiet courtyard. This wood floored/high ceiling typical Berlin "Altbau" apartment consists of 1 large room with a large double bed, optionally with an extra matress for a 3rd guest), a small kitchen suitable for preparing simple meals, a bathroom + shower + bathtub and of course your own entrance (all linens and towels are provided.) it is located facing a quiet courtyard, it is clean cozy and an authentic "Berlin" place to live. The place is in Mitte, close to Prenzlauer Berg one block from Kastanienallee, Zionskirchplatz, Rosenthaler Platz and Mauerpark. Generally this area is lively with galleries, museums, resturants, cafes, flea markets, bars, clubs, shops, good take away food, a park, U-Bahn, trams, supermarket
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                description
## 1 Great location!  30 of 75 sq meters. This wood floored/high ceiling typical Berlin "Altbau" section of an apartment consists of 1 simple large room, a small kitchen and a bathroom + shower. The apartment is in Mitte, close to Prenzlauer Berg/Mauerpark. Perfect for short visits, singles or couples. Your section is closed from the rest of the bigger flat wich is not noticeable. You will not be sharing your space. A+++ location! This „Einliegerwohnung“ is an extention of a larger apartment with a separate entrance, bathroom and kitchen. The door to the rest of the apartment is soundproof, hidden, locked and barely noticable (behind mirror in pictures). Your 30 sq meters are facing a quiet courtyard. This wood floored/high ceiling typical Berlin "Altbau" apartment consists of 1 large room with a large double bed, optionally with an extra matress for a 3rd guest), a small kitchen suitable for preparing simple meals, a bathroom + shower + bathtub and of course your own entrance (all linens a
##   experiences_offered
## 1                none
##                                                                                                                                                                                                                                                                                                                                                                                                                     neighborhood_overview
## 1 It is located in the former East Berlin area of Kastanienallee and Zionskirchplatz (Berlins only natural elevation), very charming and original turn of the last century architecture; this area was barely been destroyed during the war. Close to galleries, resturants, museums, flea markets, cafes, bars, clubs, shops, good take away food, a park, great public transportation: U-Bahn, S-Bahn, Trams, Busses, supermarkets etc.
##                                                                               notes
## 1 This is my home, not a hotel. I rent out occasionally when I stay with my friend.
##                                                                                                                                                                            transit
## 1 Close to U-Bahn U8 and U2 (metro), Trams M12, M10 and M8, Busses. Rosenthaler Platz and Hackescher Markt are close by. S-Bahn. Alexander Platz is 6 minutes away on the U8 line.
##                                                                                                                                                                                                                                                                                                                                                                                               access
## 1 Simple kitchen/cooking, refrigerator, microwave oven, stove, dishwasher, espresso/ coffee mashine, cable TV deskspace, shower, bath tub, little courtyard garden, 1st floor. There is a large double bed and optional additional airbeds for a 3rd or 4th person/kids. (Please note that the apartment is 30 square meters, if you stay a few nights and are more than 2 adults, it will be tight)
##        interaction
## 1 Always available
##                                                                                                                                                                                       house_rules
## 1 No parties No events No pets No smoking, not even in the courtyard/garden Do not pull the door shut with the keys in the lock from the inside; should that happen, the guest pays for locksmith
##   thumbnail_url medium_url
## 1            NA         NA
##                                                                                     picture_url
## 1 https://a0.muscache.com/im/pictures/260fd609-710a-4304-82ab-abd7498efd62.jpg?aki_policy=large
##   xl_picture_url host_id                               host_url host_name
## 1             NA    2217 https://www.airbnb.com/users/show/2217       Ian
##   host_since                        host_location
## 1 2008-08-18 Key Biscayne, Florida, United States
##                    host_about host_response_time host_response_rate
## 1 Believe in sharing economy.     within an hour                96%
##   host_acceptance_rate host_is_superhost
## 1                  N/A                 t
##                                                                                      host_thumbnail_url
## 1 https://a0.muscache.com/im/pictures/21428a22-4f38-444d-8c72-5892be9da109.jpg?aki_policy=profile_small
##                                                                                           host_picture_url
## 1 https://a0.muscache.com/im/pictures/21428a22-4f38-444d-8c72-5892be9da109.jpg?aki_policy=profile_x_medium
##   host_neighbourhood host_listings_count host_total_listings_count
## 1              Mitte                   4                         4
##                                                                 host_verifications
## 1 ['email', 'phone', 'reviews', 'jumio', 'offline_government_id', 'government_id']
##   host_has_profile_pic host_identity_verified                  street
## 1                    t                      t Berlin, Berlin, Germany
##   neighbourhood neighbourhood_cleansed neighbourhood_group_cleansed   city
## 1         Mitte        Brunnenstr. Süd                        Mitte Berlin
##    state zipcode market  smart_location country_code country latitude
## 1 Berlin   10119 Berlin Berlin, Germany           DE Germany 52.53454
##   longitude is_location_exact property_type       room_type accommodates
## 1  13.40256                 f    Guesthouse Entire home/apt            3
##   bathrooms bedrooms beds bed_type
## 1         1        1    2 Real Bed
##                                                                                                                                                                                                                                                                                                                                                                                                                 amenities
## 1 {TV,"Cable TV",Wifi,Kitchen,Gym,Heating,"Family/kid friendly","Smoke detector",Essentials,Shampoo,"Lock on bedroom door",Hangers,"Hair dryer",Iron,"Laptop friendly workspace","Private living room",Bathtub,"Hot water","Bed linens","Extra pillows and blankets",Microwave,"Coffee maker",Refrigerator,Dishwasher,"Dishes and silverware","Cooking basics",Stove,"Luggage dropoff allowed","Long term stays allowed"}
##   square_feet  price weekly_price monthly_price security_deposit
## 1          NA $60.00                                     $200.00
##   cleaning_fee guests_included extra_people minimum_nights maximum_nights
## 1       $30.00               1       $28.00              4           1125
##   calendar_updated has_availability availability_30 availability_60
## 1     3 months ago                t               0              21
##   availability_90 availability_365 calendar_last_scraped number_of_reviews
## 1              51              141            2018-11-07               118
##   first_review last_review review_scores_rating review_scores_accuracy
## 1   2016-04-11  2018-10-28                   93                     10
##   review_scores_cleanliness review_scores_checkin
## 1                         9                    10
##   review_scores_communication review_scores_location review_scores_value
## 1                          10                     10                   9
##   requires_license license jurisdiction_names instant_bookable
## 1                t                         NA                f
##   is_business_travel_ready         cancellation_policy
## 1                        f strict_14_with_grace_period
##   require_guest_profile_picture require_guest_phone_verification
## 1                             f                                f
##   calculated_host_listings_count reviews_per_month
## 1                              4              3.76
#install.packages('geosphere')
#Find the distance using latitude and longitude.
library(geosphere)
dataset$distance<-distHaversine(dataset[,49:50], c(52.52437, 13.41053))


#Filling the missing values
dataset$bathrooms[is.na(dataset$bathrooms)] <-1
dataset$bedrooms[is.na(dataset$bedrooms)]<-1
dataset$beds[is.na(dataset$beds)]<-1
dataset$review_scores_rating[is.na(dataset$review_scores_rating)]<-0
dataset$review_scores_accuracy[is.na(dataset$review_scores_accuracy)]<-0
dataset$review_scores_checkin[is.na(dataset$review_scores_checkin)]<-0
dataset$review_scores_cleanliness[is.na(dataset$review_scores_cleanliness)]<-0
dataset$review_scores_location[is.na(dataset$review_scores_location)]<-0
dataset$review_scores_value[is.na(dataset$review_scores_value)]<-0
dataset$review_scores_communication[is.na(dataset$review_scores_communication)]<-0


#Categorical features.

dataset$room_type= factor(dataset$room_type,
                       levels = c('Entire home/apt', 'Private room', 'Shared room'),
                       labels = c(1, 2, 3))
dataset$bed_type= factor(dataset$bed_type, 
                         levels= c('Airbed',"Couch","Futon","Pull-out Sofa","Real Bed"), 
                         labels= c(1,2,3,4,5))


#amenities count
dataset$amenities<-sapply(strsplit(as.character(dataset$amenities),","),FUN=function(x){length(x[x!="Null"])})



#Removing $ sign from price.
dataset$price = as.numeric(gsub("\\$", "", dataset$price))
## Warning: NAs introduced by coercion
summary(is.na(dataset$price))
##    Mode   FALSE    TRUE 
## logical   22511      41
#Features that I will be considering 
X=dataset[,c('property_type', 'room_type','accommodates','bathrooms','bedrooms','beds','bed_type','amenities','guests_included','minimum_nights','availability_30','number_of_reviews','review_scores_rating','review_scores_accuracy','review_scores_cleanliness','review_scores_checkin','review_scores_communication','review_scores_location','review_scores_value','distance',"price")]

#Removing the rows having price= 0 or null.
X <- X[!(is.na(X$price)),]

summary(is.na(X))
##  property_type   room_type       accommodates    bathrooms      
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:22511     FALSE:22511     FALSE:22511     FALSE:22511    
##   bedrooms          beds          bed_type       amenities      
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:22511     FALSE:22511     FALSE:22511     FALSE:22511    
##  guests_included minimum_nights  availability_30 number_of_reviews
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical    
##  FALSE:22511     FALSE:22511     FALSE:22511     FALSE:22511      
##  review_scores_rating review_scores_accuracy review_scores_cleanliness
##  Mode :logical        Mode :logical          Mode :logical            
##  FALSE:22511          FALSE:22511            FALSE:22511              
##  review_scores_checkin review_scores_communication review_scores_location
##  Mode :logical         Mode :logical               Mode :logical         
##  FALSE:22511           FALSE:22511                 FALSE:22511           
##  review_scores_value  distance         price        
##  Mode :logical       Mode :logical   Mode :logical  
##  FALSE:22511         FALSE:22511     FALSE:22511
#Splitting the dataset into the Training set and Test set
library(caTools)

split = sample.split(X$price, SplitRatio = 2/3)
training_set = subset(X, split == TRUE)
test_set = subset(X, split == FALSE)

#summary(regressor)
regressor= lm(price~ ., data=training_set)
y_pred= predict(regressor, newdata = test_set)
## Error in model.frame.default(Terms, newdata, na.action = na.action, xlev = object$xlevels): factor property_type has new levels Chalet, Island, Tipi
summary(regressor)
## 
## Call:
## lm(formula = price ~ ., data = training_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -252.25  -15.61   -3.09    9.85  830.27 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                         -4.316e+00  2.481e+01  -0.174 0.861892
## property_typeApartment               1.496e+01  2.259e+01   0.662 0.507757
## property_typeBarn                    5.646e+01  4.515e+01   1.250 0.211185
## property_typeBed and breakfast       2.252e+01  2.337e+01   0.963 0.335352
## property_typeBoat                    1.275e+02  2.526e+01   5.047 4.55e-07
## property_typeBoutique hotel          1.250e+02  2.358e+01   5.301 1.17e-07
## property_typeBungalow                1.075e+02  2.577e+01   4.174 3.01e-05
## property_typeCabin                   3.209e+01  2.858e+01   1.123 0.261526
## property_typeCamper/RV               1.299e+01  2.768e+01   0.469 0.638890
## property_typeCasa particular (Cuba)  6.143e+00  3.569e+01   0.172 0.863337
## property_typeCastle                  1.500e+01  4.517e+01   0.332 0.739834
## property_typeCave                    5.494e+00  4.514e+01   0.122 0.903122
## property_typeCondominium             2.002e+01  2.267e+01   0.883 0.377102
## property_typeCottage                 8.737e+00  2.991e+01   0.292 0.770206
## property_typeGuest suite             1.076e+01  2.324e+01   0.463 0.643349
## property_typeGuesthouse              1.707e+01  2.330e+01   0.732 0.463997
## property_typeHostel                 -1.296e+00  2.305e+01  -0.056 0.955156
## property_typeHotel                   8.293e+01  2.414e+01   3.436 0.000593
## property_typeHouse                   2.095e+01  2.272e+01   0.922 0.356417
## property_typeHouseboat               1.421e+02  2.698e+01   5.265 1.42e-07
## property_typeIn-law                  3.721e+01  4.511e+01   0.825 0.409509
## property_typeLoft                    4.267e+01  2.269e+01   1.880 0.060077
## property_typeOther                   3.719e+01  2.352e+01   1.581 0.113891
## property_typePension (South Korea)   3.824e+01  2.765e+01   1.383 0.166617
## property_typeResort                  7.307e+00  3.193e+01   0.229 0.819002
## property_typeServiced apartment      6.388e+01  2.286e+01   2.794 0.005213
## property_typeTiny house              3.305e+01  2.650e+01   1.247 0.212312
## property_typeTownhouse               1.517e+01  2.305e+01   0.658 0.510490
## property_typeTrain                   3.215e+01  4.526e+01   0.711 0.477398
## property_typeVilla                   1.969e+01  2.702e+01   0.729 0.466141
## room_type2                          -2.229e+01  7.356e-01 -30.295  < 2e-16
## room_type3                          -3.276e+01  3.017e+00 -10.860  < 2e-16
## accommodates                         8.811e+00  3.986e-01  22.106  < 2e-16
## bathrooms                            1.767e+01  1.047e+00  16.868  < 2e-16
## bedrooms                             1.510e+01  6.762e-01  22.332  < 2e-16
## beds                                -2.411e+00  4.845e-01  -4.977 6.52e-07
## bed_type2                            1.010e+01  1.161e+01   0.870 0.384115
## bed_type3                           -3.306e+00  1.059e+01  -0.312 0.755037
## bed_type4                            2.054e+00  1.037e+01   0.198 0.842930
## bed_type5                            1.501e+00  1.012e+01   0.148 0.882167
## amenities                            3.390e-01  4.082e-02   8.304  < 2e-16
## guests_included                      4.569e+00  4.532e-01  10.082  < 2e-16
## minimum_nights                      -4.069e-03  6.659e-03  -0.611 0.541187
## availability_30                      9.668e-01  3.961e-02  24.406  < 2e-16
## number_of_reviews                   -6.267e-02  9.561e-03  -6.555 5.76e-11
## review_scores_rating                 1.112e-01  6.660e-02   1.669 0.095121
## review_scores_accuracy              -6.812e-01  6.169e-01  -1.104 0.269518
## review_scores_cleanliness            1.895e+00  4.603e-01   4.116 3.87e-05
## review_scores_checkin               -2.406e-01  6.068e-01  -0.396 0.691790
## review_scores_communication         -1.158e+00  6.287e-01  -1.842 0.065540
## review_scores_location               1.855e+00  4.959e-01   3.740 0.000185
## review_scores_value                 -3.519e+00  5.725e-01  -6.147 8.11e-10
## distance                            -1.335e-03  7.987e-05 -16.712  < 2e-16
##                                        
## (Intercept)                            
## property_typeApartment                 
## property_typeBarn                      
## property_typeBed and breakfast         
## property_typeBoat                   ***
## property_typeBoutique hotel         ***
## property_typeBungalow               ***
## property_typeCabin                     
## property_typeCamper/RV                 
## property_typeCasa particular (Cuba)    
## property_typeCastle                    
## property_typeCave                      
## property_typeCondominium               
## property_typeCottage                   
## property_typeGuest suite               
## property_typeGuesthouse                
## property_typeHostel                    
## property_typeHotel                  ***
## property_typeHouse                     
## property_typeHouseboat              ***
## property_typeIn-law                    
## property_typeLoft                   .  
## property_typeOther                     
## property_typePension (South Korea)     
## property_typeResort                    
## property_typeServiced apartment     ** 
## property_typeTiny house                
## property_typeTownhouse                 
## property_typeTrain                     
## property_typeVilla                     
## room_type2                          ***
## room_type3                          ***
## accommodates                        ***
## bathrooms                           ***
## bedrooms                            ***
## beds                                ***
## bed_type2                              
## bed_type3                              
## bed_type4                              
## bed_type5                              
## amenities                           ***
## guests_included                     ***
## minimum_nights                         
## availability_30                     ***
## number_of_reviews                   ***
## review_scores_rating                .  
## review_scores_accuracy                 
## review_scores_cleanliness           ***
## review_scores_checkin                  
## review_scores_communication         .  
## review_scores_location              ***
## review_scores_value                 ***
## distance                            ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.06 on 14962 degrees of freedom
## Multiple R-squared:  0.4433, Adjusted R-squared:  0.4414 
## F-statistic: 229.2 on 52 and 14962 DF,  p-value: < 2.2e-16
#Backward Elimination to eliminate the features having p-value >0.5 and making model more efficient.

backwardElimination <- function(x, sl) {
  numVars = length(x)
  for (i in c(1:numVars)){
    regressor = lm(formula = price ~ ., data = x)
    maxVar = max(coef(summary(regressor))[c(2:numVars), "Pr(>|t|)"])
    if (maxVar > sl){
      j = which(coef(summary(regressor))[c(2:numVars), "Pr(>|t|)"] == maxVar)
      x = x[, -j]
    }
    numVars = numVars - 1
  }
  return(summary(regressor))
}

SL = 0.05
dataset = X
print(backwardElimination(training_set, SL))
## 
## Call:
## lm(formula = price ~ ., data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -285.36  -16.76   -5.27    9.50  920.98 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.3683    10.9676   0.307 0.758759    
## room_type2   -23.3081     0.7671 -30.386  < 2e-16 ***
## room_type3   -33.7114     3.0549 -11.035  < 2e-16 ***
## accommodates  10.9977     0.4116  26.717  < 2e-16 ***
## bathrooms     20.3031     1.1107  18.280  < 2e-16 ***
## bedrooms      14.2200     0.7152  19.883  < 2e-16 ***
## beds          -1.8527     0.5096  -3.635 0.000279 ***
## bed_type2     10.4998    12.4973   0.840 0.400829    
## bed_type3     -1.3187    11.4014  -0.116 0.907924    
## bed_type4      2.7028    11.1584   0.242 0.808609    
## bed_type5      3.5445    10.8981   0.325 0.745003    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42.08 on 15004 degrees of freedom
## Multiple R-squared:  0.3521, Adjusted R-squared:  0.3517 
## F-statistic: 815.4 on 10 and 15004 DF,  p-value: < 2.2e-16

}