TAsk Discription

Task 1: explore data Task 2: source the data Task 3: print a sample of raw data Task 4: print a plot of the sample data Task 5: clean the data Task 6: summarize the clean data *Task 7: plot the clean data

##Exploratory Analysis This is going to look at the 10 columns of the sample data. The data at this point is unclean so there is some possible false data points.

mydata = read.csv("~/CST-425/input.csv", header = TRUE)
print(head(mydata,10))
##      id                                             name host_id   host_name
## 1  2539               Clean & quiet apt home by the park    2787        John
## 2  2595                            Skylit Midtown Castle    2845    Jennifer
## 3  3647              THE VILLAGE OF HARLEM....NEW YORK !    4632   Elisabeth
## 4  3831                  Cozy Entire Floor of Brownstone    4869 LisaRoxanne
## 5  5022 Entire Apt: Spacious Studio/Loft by central park    7192       Laura
## 6  5099        Large Cozy 1 BR Apartment In Midtown East    7322       Chris
## 7  5121                                  BlissArtsSpace!    7356       Garon
## 8  5178                 Large Furnished Room Near B'way     8967    Shunichi
## 9  5203               Cozy Clean Guest Room - Family Apt    7490   MaryEllen
## 10 5238               Cute & Cozy Lower East Side 1 bdrm    7549         Ben
##    neighbourhood_group      neighbourhood latitude longitude       room_type
## 1             Brooklyn         Kensington 40.64749 -73.97237    Private room
## 2            Manhattan            Midtown 40.75362 -73.98377 Entire home/apt
## 3            Manhattan             Harlem 40.80902 -73.94190    Private room
## 4             Brooklyn       Clinton Hill 40.68514 -73.95976 Entire home/apt
## 5            Manhattan        East Harlem 40.79851 -73.94399 Entire home/apt
## 6            Manhattan        Murray Hill 40.74767 -73.97500 Entire home/apt
## 7             Brooklyn Bedford-Stuyvesant 40.68688 -73.95596    Private room
## 8            Manhattan     Hell's Kitchen 40.76489 -73.98493    Private room
## 9            Manhattan    Upper West Side 40.80178 -73.96723    Private room
## 10           Manhattan          Chinatown 40.71344 -73.99037 Entire home/apt
##    price minimum_nights number_of_reviews last_review reviews_per_month
## 1    149              1                 9  10/19/2018              0.21
## 2    225              1                45   5/21/2019              0.38
## 3    150              3                 0                            NA
## 4     89              1               270    7/5/2019              4.64
## 5     80             10                 9  11/19/2018              0.10
## 6    200              3                74   6/22/2019              0.59
## 7     60             45                49   10/5/2017              0.40
## 8     79              2               430   6/24/2019              3.47
## 9     79              2               118   7/21/2017              0.99
## 10   150              1               160    6/9/2019              1.33
##    calculated_host_listings_count availability_365
## 1                               6              365
## 2                               2              355
## 3                               1              365
## 4                               1              194
## 5                               1                0
## 6                               1              129
## 7                               1                0
## 8                               1              220
## 9                               1                0
## 10                              4              188

##Summary of price

summary(mydata$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    69.0   106.0   152.7   175.0 10000.0

##Ploting Data Plot below shows the areas and the prices in each area. Comparing the Bronx and Manhattan neighborhoods Manhattan has higher prices than the Bronx, which could mean that there are nicer places in Manhattan that can fetch more money than in the Bronx.

mydata_frame = data.frame(mydata$price,mydata$neighbourhood_group)
boxplot(mydata$price~mydata$neighbourhood_group, data = mydata_frame) 

##Analyzing Clean Data In looking over the data there are significant outliers between the Bronx and Manhattan neighborhoods.

boxplot.stats(mydata$price, coef =2)$out
##    [1]   800   500   400   475   500   500   390   575   500   650   599   395
##   [13]   495   451   450   500   400  2000   500   429   399   438   700   400
##   [25]   850   599   495   760   485  1300   402   800   450   499   500  3000
##   [37]   549   500   395  1300   500   450   499   800   450   500   399   700
##   [49]   500   400  1000   700   433   595  2000   550   900   399   425   495
##   [61]   625   575   400   500   600   390   465  4000   500   400   400   495
##   [73]   999   800   500  2500   890   399   400   650   420   950   575   550
##   [85]   499  1000   499  1395   425   395   650   550  1000   750   499  1000
##   [97]   595   390   399  1500  1899   399   465   495   800   450  5000   390
##  [109]   600  1100   550   420   400   600  1200   440   439   555   400   450
##  [121]   390   700   799   475   455   420   400   550  1700   525   600   399
##  [133]   499   450   450   650   750   399   499  1999   480   450  1500  1000
##  [145]   850   500   500   400   500   395  1000   800   599   400   700   575
##  [157]   560   400  6000   900   900   850  2000   750   575   599  2000   850
##  [169]   950  1500   675   900   395  1500   749  1000  4000   650   750  1000
##  [181]  1200   420   395  5250  1500  1500  1000  1000  1500  1550   500   500
##  [193]   400   750   425  1000  1250  1485   600   400  6500   395  2750  1500
##  [205]  2500  3750   500   800  1600   750  1000   600   800  1000   900   425
##  [217]   975   650   400   500   500   400   400   500   450   650   400   600
##  [229]   395   399   550   595   450  2300   425   400   409   430   400   650
##  [241]   450  5000  4500  8000   600   795   414   499   950   750   500   500
##  [253]   425   429   450   400   425   500   785  1000   425   400   500   750
##  [265]   400   495   550   600   399   500   500   545   400   395   390   400
##  [277]   450   860  1000   450   540  1400   500   600   395   419   500   900
##  [289]   500  1050   395  2695  1000   450   650  2000   985   425   699   550
##  [301]   495   400   450  1500   400   450   450   399  1000   500   450   895
##  [313]   600   600   550   525   500  1000  1000  9999   400  2400   600  2000
##  [325]   600   500   450   549   760  1000   795   450   500   415   995  1763
##  [337]   510  1450   400   495   600   499   825   700   500   524   750   450
##  [349]  2000  2000   400  2000   475  2000   475   450   450   400   450  1200
##  [361]   499   499   480   950   750   750   445   399   495   400   449   395
##  [373]   400   499   395   680   900  1000   700   450   399   400   600   440
##  [385]  1495   399   450   499   888   389   460  2000   700   450   900   800
##  [397]   700   800   500   999 10000   600   499   850   500   400   500   398
##  [409]   450   599   700   450   500   450   450   500   600   450   480   399
##  [421]   650   650  1600   450   450   399   595   400   499   450   400   700
##  [433]   400   700  1500   600   400   450   500   420   500   425   490   800
##  [445]   500   400   400  1200  1100   500   400   400  1195   450   500   999
##  [457]   395   400   500   429   700   469   600   399   400   500   414   450
##  [469]   455   500   650  2000   425   800   800   500   495   400   450  1000
##  [481]   500  2000   650   745   399   395   410  1170   699  1150  1000   500
##  [493]   450   434   500   590   899   750   500   400   450   400   800   650
##  [505]   465   425   400   450   465   500   450   531   525   655   450   500
##  [517]   400   410  1200  9999   500   450   450   450   395   400   550   450
##  [529]   390   399   450   425   499  1000   500   750   477   699  1200   800
##  [541]   799  1100   995   500   600   850   425   400   500   400   500   485
##  [553]   700   425   800   587   399   550   500   449   500   499   500  1731
##  [565]   400   600  1002   500   400   450   450  1250   600   425   400   400
##  [577]  1100   399   395   450   814   450   550   500   550   550   390  1800
##  [589]  2000   700  1200   499   421   690   619   600   450  2100  1100   400
##  [601]   999   500   450   750   495   625   400   490   450   400   425   650
##  [613]   595   500   800   450   999  1000   500   999   400   500   500   409
##  [625]   750   449   450  2500   525   450   436   399   500   935   500   650
##  [637]  6000  1300   500   399   800   980  1000  1000  1000   400   900   399
##  [649]   480   390   475   500   450   700  2000   399   575   400  1000   850
##  [661]   400   400   700   559   499   600   509   400   650  2000   500   500
##  [673]   600   450   450  1100   450   400   525  1000   400   408   700   499
##  [685]   495   500   399   525   700   400   750   500  1000  3800   479   450
##  [697] 10000   650  2350   500   400   822  1100   450   400   499   560   407
##  [709]   700   650   425   489   425   450  2500   650   795   800   399   400
##  [721]   499   999   440   750   672  3000   399   400   800   575   800   500
##  [733]   415   400   500   584   750   399   475   894   643   699   462   749
##  [745]   799   599   699   499   799   499   649   799   399   499   534   748
##  [757]   748   534   748   748   850   400   450   550  1000   390   399   600
##  [769]   425  2000   408  1500   500   534   534   534   534   534   748   748
##  [781]   748   449  2000   487   450   499  1500   500   400  2000  1020   700
##  [793]   900   950   400   500   899   700   599   799  1599   649   699   799
##  [805]   500   500   699   400   799   800  1200   500   400   700   525   850
##  [817]   425   585   799   450   900  2590   400   400   499   399   599   400
##  [829]   450   599   999   550   895   899   550   500  1000   995   399   640
##  [841]   800   459   400   400   900   600   489   600   450   545   650   500
##  [853]   600   500   400  4000   600   500   450  2250   800   700   484   429
##  [865]   395  5000  2800   499   510   700  2250   550  1000   490   400   850
##  [877]  1475   450   400   615   419   545  2000  2900   450   400   475   430
##  [889]   825   500   399   390   800   515  1500  1067   465  1599   389   400
##  [901]   424  1000   578  1200   950   475   600   800  1000   399   450   650
##  [913]   400   425  1500   500   390   399   550   450   400   389   900   400
##  [925]   499   400  2500   398   550  2500   400   700   700  1000   550   400
##  [937]   789   600   500   500   599   585   795   449   500   399  1795   690
##  [949]   500  1095   500   389   450   899   399   800   600   800   400  1500
##  [961]   600   615   800   409   600  1000   672   530   400   400   495   449
##  [973]   799   389   699   389   400   499  5000   450  3900   496  1700   435
##  [985]   422   525   435   450   400   700   650   695   425  1177   790   400
##  [997]   400  1500   475   750  5000   525   573  1100   766   840   400   400
## [1009]   540   585   395   800   577   589   750   550   949   400   489   400
## [1021]   500  1000   485   500  2200   395   850   400   459   497   550   500
## [1033]   700   495   750  1000   499   500   600   400   400   400  1066   400
## [1045]   450   450   398   398   425   412   400   650   600   850   400   850
## [1057]   395   550   650   800   649   485   650   475   460   487   455   399
## [1069]   400   400   400   425   400   450   400   647   485   625   600   500
## [1081]   500   400  3000   400   720   400  3000   400   500  2010  3210  4160
## [1093]  1046   400   475  1000   400   500   450   450   499   499  1500 10000
## [1105]   399   400   550   800   450   490   416   545   395   800   450  1680
## [1117]  7703  3518  6419  2626  2103   980  1100  1500   600   480   450   400
## [1129]   399   495   525   850   495  1250   500   500   700   395  1000  2000
## [1141]  2500  8500   409   400   450   479   550   450   640   400   500   400
## [1153]   400   400   400   750   800   750   550   800   600   600   400   600
## [1165]  2990  1000  1000   600   620   399   649   412   412   412   650   500
## [1177]   750  1500   394   450   425   450   975   450   545   400  1000   974
## [1189]   400   400   415   700   400   400   650   599   400   450   550   450
## [1201]   600  1195   795  1200   400  1500   415   400  2500   399   398   450
## [1213]   500   650   699   475   450   450   400   390   600   799   399   410
## [1225]   450  1000   425   650  1000   500   460  1333   500  1100   499   500
## [1237]   398   400   525   649   499   425  1000   550   550   800   780   850
## [1249]   399   399   393   550   745  1200   750   425   595   400   499   650
## [1261]   449   400   450   599  1500   400   465   900   500   490   995   460
## [1273]   390   425   750   395   395  1250   999   750   450   700  1200   415
## [1285]   500   483   550   450  1700   699   699   699   699   699   699   699
## [1297]   699   699   737   737   737   737   737   737   737   737   699   699
## [1309]   699   737   737   550   395   600   650   495   470   435   525   550
## [1321]   400  1000   450   799   475   450   689   400   400  1100  3750   400
## [1333]   800   400   400   399   600   495   394   799   499   400   499   639
## [1345]   460   480   530   590   848   410   430   500   600   590   600  1500
## [1357]   449   800   750   389   785   500   575   425   400   403   471  1000
## [1369]   750   399   500   400   500   799   480   500   478   450   399   425
## [1381]   400   500   850   575   800   433   400   999   600   500   899   975
## [1393]   425   599   670   499   390   500   525  1500   625   499   700  1700
## [1405]  6500   600   399  1000   399   500   860   650   500  2545   399   500
## [1417]   485   500  1115   650   700   650   399   700   520   399   455   590
## [1429]   913   550   505   718   475   692   400  3600   750   400   400   450
## [1441]   600   600   450  5000   600   499   600  2000   800   500   400   460
## [1453]   550  1880  3000   495   450   500   400   714   675   675   675   675
## [1465]   675   956   675   675   675   675   675   714   714   714   956   714
## [1477]   714   714   675   675   714   714   714   714   714  3000   670  1500
## [1489]  2999   399   499   499   499   399   399   399   399   418  1600   399
## [1501]   452   397   392   396   420   459   416   453   481   466   388   423
## [1513]   430   750   805  3000   725   443   392   800   699   595   800   398
## [1525]   400   390   450   550  1350   550   392   950   520   500   400  3200
## [1537]   500   500   500   575   600   500   600   600   550   599   432   450
## [1549]   444   399   590   450   499   515   500   400   750   600  1000   425
## [1561]   420  1350   443  1000   499   450  1075   550  3000   956   410   450
## [1573]   820   520   399  1999   400   649   399   890   500  1750   450  1000
## [1585]   478   400   450   399   399   500  1080  1299  9999   495   525   395
## [1597]   400   599   400   400   400  2500  1150  1050   750   600  1295  1200
## [1609]   700   400   495   519   699   799   450   450   575   450  1500   450
## [1621]   400  1978  1494  3512   600   890   800   500   699   799  1000   500
## [1633]   400   418   450   404   425   503   392   472   999   454   472   503
## [1645]   404   431   412   403   451   419   505   468   390   396   498   612
## [1657]   650  1200   999   999   999   500   650   400   488   425  1497   750
## [1669]   515  1250   420   500   500  2850   400   500   500   500   540   700
## [1681]   990   395   484   388   407   425   432   396   416   506   650   499
## [1693]   840   450  1315   650   400   450   489   488   400   400   446  7500
## [1705]  3613   400   443   520   388   400  2600   399   600  4100   399   700
## [1717]   396   700   794   500   395  1145  1306   400   525   450   595   560
## [1729]  5100   499   800   898   419   450   950   750  1000   799   600   470
## [1741]   395   442   949   400   439   505   425   425   425   800   400   499
## [1753]   419   700   600   395   600   472   474   495   398  1799   395   699
## [1765]   700   549   550   800  4200   420   390   475   420   400   450   500
## [1777]   700   500   790   616   408  6800   396   500   500   590   550   500
## [1789]   945   450   690   409   430   390   410   468  1000   500   499   650
## [1801]   470   470   539   499   426   699   577   499   699   500   699   500
## [1813]   595  2000   450   750   875   389   499   395   499   589   449   699
## [1825]   699   699   699   438   650   463   399   700   389   688   449   399
## [1837]   595   399   399   485   479   499   399   600   500   750   549   599
## [1849]   400   475   475   425   450   450   500   425   450   450   425   400
## [1861]   425   945   500   620   500   560   500   400   400   950   580   583
## [1873]   650   390   999  2500   420   400   690   400   449   500   585   499
## [1885]   460  1900   500   479   500   490   499   499   650   450   500   395
## [1897]  1250   799   659   650   650   399   475   499  1000   400   830   440
## [1909]   489   650   399   590  7500   450  2500   550   459   650   850   650
## [1921]   699   600   499   399   850   399   975  4200   895   499   447   447
## [1933]   600   795   995   695  1800   595   900   600   450   399   389   495
## [1945]   400   400   550   495  1099   400   428   650   599   650   700   400
## [1957]   549   479   395   400   600   395   390   500   500   500   650   425
## [1969]   445  1750   390   450   400   550   590   585   399   425   399   399
## [1981]   399   500   420   499   500   600   450   500   500   995   945   395
## [1993]   500   500   444   550   900   395   490   900   400   920   500   400
## [2005]   900   800   390   750   450   930   599   459   700   950   416   500
## [2017]   475   750   399  2995   480   500   550   999   500  4500   450   700
## [2029]   450   500   400  1200   400   400   489   650   400   400   500   489
## [2041]   450   500   500   450   450   575   600   800   475   900   400   450
## [2053]   850  1750   395   475   650   500   950   750   499   465   800   600
## [2065]   900   600   843   395   400   500   450   800   429   425   600   400
## [2077]   425   800   400   400   400   475   417   450   450   395   450   500
## [2089]  2000   400   400  1400   750   400   399   449   400   400   399   399
## [2101]   649   450   399   500   650  1100   399   750   500   600  1500   550
## [2113]   600   400   495   595   500   750   600   450   500   399   499   450
## [2125]   699   699   550   400   425   425  1600   800   420   599   800   399
## [2137]   650  6500  1050  1308   390   560   583   550   595   500   561  1000
## [2149]   999  2999  1999   600   750   600   400   400   450   450   520   450
## [2161]  1369   450  1749   466   421   400   400   430   450   400   750   396
## [2173]   393   550   499   700   475   800
boxplot(mydata$price~mydata$neighbourhood_group, data = mydata_frame)

##conclusion Looking at the data, there was not much if at anything to clean. Analyzing two neighborhood groups from the graph above Manhattan has the a higher price range than the Bronx neighborhood.