#code for revision of basics
#created on 14 May 2017
#Ajay Ohri v1.0


sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7600)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] compiler_3.4.0  backports_1.0.5 magrittr_1.5    rprojroot_1.2  
##  [5] tools_3.4.0     htmltools_0.3.6 Rcpp_0.12.10    stringi_1.1.5  
##  [9] rmarkdown_1.5   knitr_1.15.1    stringr_1.2.0   digest_0.6.12  
## [13] evaluate_0.10
memory.limit()
## [1] 8096
memory.size()
## [1] 48.98
getwd()
## [1] "C:/Users/Dell/Documents/R/revisionofbasics"
dir()
## [1] "revisionofbasics.R"        "revisionofbasics.Rproj"   
## [3] "revisionofbasics.spin.R"   "revisionofbasics.spin.Rmd"
dir("C:/Users/Dell/Downloads")
##   [1] "140749_2017.pdf"                                                        
##   [2] "2+Clustering+-K+Means.ipynb"                                            
##   [3] "2011-F01-0700-Rev4-MDDS.XLSX"                                           
##   [4] "20150817143155.pdf"                                                     
##   [5] "20160111060911.pdf"                                                     
##   [6] "20170214052225.pdf"                                                     
##   [7] "692480_2017.pdf"                                                        
##   [8] "7z1604-x64.exe"                                                         
##   [9] "7z1604.exe"                                                             
##  [10] "861415_10151432783238421_2124270505_o (1).jpg"                          
##  [11] "861415_10151432783238421_2124270505_o.jpg"                              
##  [12] "AirPassengers.csv"                                                      
##  [13] "ajay.csv"                                                               
##  [14] "ajayo.jpg"                                                              
##  [15] "Alison Python  Invoice   - Sheet1.pdf"                                  
##  [16] "Alison SAS  Invoice   - Sheet1.pdf"                                     
##  [17] "All+CSV+Files+in+a+Folder.ipynb"                                        
##  [18] "Allison Interview Jones Invoice   - Sheet1.pdf"                         
##  [19] "Anaconda3-4.2.0-Windows-x86_64.exe"                                     
##  [20] "anscombe+dataset.ipynb"                                                 
##  [21] "apachehttpd.exe"                                                        
##  [22] "April invoice adaptive analytics   - Sheet1.pdf"                        
##  [23] "Assignment14_BusinessAnalytics (1).docx"                                
##  [24] "Assignment14_BusinessAnalytics.docx"                                    
##  [25] "Assignment15_BusinessAnalytics.docx"                                    
##  [26] "Assignment16_BusinessAnalytics (1).docx"                                
##  [27] "Assignment16_BusinessAnalytics (2).docx"                                
##  [28] "Assignment16_BusinessAnalytics.docx"                                    
##  [29] "aug ust 2008.JPG"                                                       
##  [30] "avast_free_antivirus_setup_online.exe"                                  
##  [31] "avinash_ltv (1).zip"                                                    
##  [32] "avinash_ltv.zip"                                                        
##  [33] "BigDiamonds.csv"                                                        
##  [34] "BigDiamonds.csv (1).zip"                                                
##  [35] "BigDiamonds.csv (2)"                                                    
##  [36] "BigDiamonds.csv (2).zip"                                                
##  [37] "BigDiamonds.csv (3).zip"                                                
##  [38] "BigDiamonds.csv.zip"                                                    
##  [39] "Boston (1).csv"                                                         
##  [40] "Boston (2).csv"                                                         
##  [41] "Boston.csv"                                                             
##  [42] "BuildTools_Full.exe"                                                    
##  [43] "CAM- Ajay Ohri (1).pdf"                                                 
##  [44] "CAM- Ajay Ohri.pdf"                                                     
##  [45] "camtasia.exe"                                                           
##  [46] "ccFraud.csv"                                                            
##  [47] "ce_query_pla_cenvat_revenue.txt"                                        
##  [48] "Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf"
##  [49] "CHAP1-6PythonforRUsersAnapproachforDataScience.docx"                    
##  [50] "chapter+3+_+spark.html"                                                 
##  [51] "cheat+sheet+python (5).ipynb"                                           
##  [52] "chi+square+test.ipynb"                                                  
##  [53] "chromeinstall-8u111.exe"                                                
##  [54] "Cisco_WebEx_Add-On.exe"                                                 
##  [55] "class batch 2.R"                                                        
##  [56] "class_batch_2.html"                                                     
##  [57] "class+exercise+data+viz.ipynb"                                          
##  [58] "class2.csv"                                                             
##  [59] "Clustering+-K+Means.ipynb"                                              
##  [60] "Collabera Invoice (1).pdf"                                              
##  [61] "Collabera Invoice.pdf"                                                  
##  [62] "Collectcent Invoice.pdf"                                                
##  [63] "college degrees.pdf"                                                    
##  [64] "DAP 1.pdf"                                                              
##  [65] "DAP 1.pptx"                                                             
##  [66] "DAP 6 RDBMS and SQL.pdf"                                                
##  [67] "DAP 6 RDBMS and SQL.pptx"                                               
##  [68] "Data Analysis (1).7z"                                                   
##  [69] "Data Analysis (1).rar"                                                  
##  [70] "Data Analysis (2).rar"                                                  
##  [71] "Data Analysis (3).rar"                                                  
##  [72] "Data Analysis.rar"                                                      
##  [73] "Data Viz.pptx"                                                          
##  [74] "data+exploration.ipynb"                                                 
##  [75] "data+manipulation.ipynb"                                                
##  [76] "data+munging+again.ipynb"                                               
##  [77] "data+viz.ipynb"                                                         
##  [78] "data+wrangling+titanic+dataset.ipynb"                                   
##  [79] "data1.csv"                                                              
##  [80] "datasets (1).csv"                                                       
##  [81] "datasets.csv"                                                           
##  [82] "Decision Trees.pdf"                                                     
##  [83] "DecisionStatsOfferLetter.docx"                                          
##  [84] "DecisionStatsRelievingLetter.docx"                                      
##  [85] "descriptive+stats+in+Python.ipynb"                                      
##  [86] "desktop.ini"                                                            
##  [87] "Diamond (1).csv"                                                        
##  [88] "Diamond (2).csv"                                                        
##  [89] "Diamond (3).csv"                                                        
##  [90] "Diamond (4).csv"                                                        
##  [91] "Diamond (5).csv"                                                        
##  [92] "Diamond (6).csv"                                                        
##  [93] "Diamond (7).csv"                                                        
##  [94] "Diamond (8).csv"                                                        
##  [95] "Diamond.csv"                                                            
##  [96] "DolbyVoiceClient.msi"                                                   
##  [97] "DropboxInstaller.exe"                                                   
##  [98] "edb_npgsql.exe"                                                         
##  [99] "edb_npgsql.exe-20170506133801"                                          
## [100] "edb_pgjdbc.exe"                                                         
## [101] "edb_pgjdbc.exe-20170506133830"                                          
## [102] "edb_psqlodbc.exe"                                                       
## [103] "edb_psqlodbc.exe-20170203172812"                                        
## [104] "edb_psqlodbc.exe-20170307203617"                                        
## [105] "edb_psqlodbc.exe-20170506133907"                                        
## [106] "edb_psqlodbc.exe-20170506134004"                                        
## [107] "fim.pyd"                                                                
## [108] "final invoice edureka  - Sheet1.pdf"                                    
## [109] "final_webinar (1).pdf"                                                  
## [110] "final_webinar.pdf"                                                      
## [111] "FinalPythonforRUsersAnapproachforDataScience (1).docx"                  
## [112] "FinalPythonforRUsersAnapproachforDataScience (2).docx"                  
## [113] "FinalPythonforRUsersAnapproachforDataScience (3).docx"                  
## [114] "FinalPythonforRUsersAnapproachforDataScience (4).docx"                  
## [115] "FinalPythonforRUsersAnapproachforDataScience.docx"                      
## [116] "Git-2.11.0-64-bit.exe"                                                  
## [117] "Git-2.12.0-64-bit.exe"                                                  
## [118] "GitHubSetup (1).exe"                                                    
## [119] "GitHubSetup (2).exe"                                                    
## [120] "GitHubSetup.exe"                                                        
## [121] "GOMAUDIOGLOBALSETUP.EXE"                                                
## [122] "graphviz-2.38.msi"                                                      
## [123] "Hdma.csv"                                                               
## [124] "Hedonic (1).csv"                                                        
## [125] "Hedonic (2).csv"                                                        
## [126] "Hedonic.csv"                                                            
## [127] "Hierarchical+Clustering (1).ipynb"                                      
## [128] "Hierarchical+Clustering.ipynb"                                          
## [129] "HP Downloads"                                                           
## [130] "HPSupportSolutionsFramework-12.5.32.203.exe"                            
## [131] "image.png"                                                              
## [132] "IMS PROSCHOOL Workshop.pptx.pdf"                                        
## [133] "IMS PROSCHOOL Workshop.pptx.pptx"                                       
## [134] "internship.docx"                                                        
## [135] "Introduction to SAS (1).pdf"                                            
## [136] "Introduction to SAS Part 1 (1).pdf"                                     
## [137] "Introduction to SAS Part 1.pdf"                                         
## [138] "Introduction to SAS.pdf"                                                
## [139] "introductory+python.ipynb"                                              
## [140] "Invoice for Digital Vidya.pdf"                                          
## [141] "Invoice for Weekendr.pdf"                                               
## [142] "Invoice format - Ajay Ohri CONTATA (1).xls"                             
## [143] "Invoice format - Ajay Ohri CONTATA.xls"                                 
## [144] "invoice rapid miner.pdf"                                                
## [145] "Invoice trafla format.docx"                                             
## [146] "iris2 (1).ipynb"                                                        
## [147] "iris2 (2).ipynb"                                                        
## [148] "iris2.ipynb"                                                            
## [149] "January invoice Indicus  .pdf"                                          
## [150] "June AV   Invoice   - Sheet1.pdf"                                       
## [151] "Lecture 6 - KNN & Naive Bayes.ppt"                                      
## [152] "Local Disk (C) - Shortcut.lnk"                                          
## [153] "logistic regression - script for ppt.R"                                 
## [154] "Logistic Regression.ipynb"                                              
## [155] "logistic_regression_-_script_for_ppt.html"                              
## [156] "lyncentry.exe"                                                          
## [157] "Machine+Learning++Part+1 (1).ipynb"                                     
## [158] "Machine+Learning++Part+1.ipynb"                                         
## [159] "March invoice Indicus   - Sheet1.pdf"                                   
## [160] "matplotlib+cars.ipynb"                                                  
## [161] "matplotlib+line+graph.ipynb"                                            
## [162] "ML+part+2.ipynb"                                                        
## [163] "ML+part+3.ipynb"                                                        
## [164] "mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi"                     
## [165] "mongodb-win32-x86_64-3.4.2-signed.msi"                                  
## [166] "mortDefault"                                                            
## [167] "mortDefault.zip"                                                        
## [168] "mtcars.csv"                                                             
## [169] "mtcarslm.R"                                                             
## [170] "multiple+file+concat+in+pandas (1).ipynb"                               
## [171] "multiple+file+concat+in+pandas.ipynb"                                   
## [172] "my+first+class+in+python.ipynb"                                         
## [173] "NDP451-KB2858728-x86-x64-AllOS-ENU.exe"                                 
## [174] "nltk.ipynb"                                                             
## [175] "notebook-Copy1.html"                                                    
## [176] "Offer Letter - Ajay Ohri (1).pdf"                                       
## [177] "Offer Letter - Ajay Ohri.pdf"                                           
## [178] "Other Data Mining  Methods (1).pdf"                                     
## [179] "Other Data Mining  Methods.pdf"                                         
## [180] "output1 (1).xls"                                                        
## [181] "output1 (2).xls"                                                        
## [182] "output1.xls"                                                            
## [183] "pandas+11.ipynb"                                                        
## [184] "pandas+analysis+1.ipynb"                                                
## [185] "pandas+data+manipulation.ipynb"                                         
## [186] "passport image.pdf"                                                     
## [187] "Pawconinvoice2016.pdf"                                                  
## [188] "Pawconinvoice2017 (1).pdf"                                              
## [189] "Pawconinvoice2017 (2).pdf"                                              
## [190] "Pawconinvoice2017 (3).pdf"                                              
## [191] "Pawconinvoice2017.pdf"                                                  
## [192] "Payslip Feb 2016 - Sheet1.pdf"                                          
## [193] "Payslip Feb 2016.pdf"                                                   
## [194] "Payslip Format Decisionstats - Sheet1.pdf"                              
## [195] "Payslip Jan 2016 - Sheet1.pdf"                                          
## [196] "Payslip Jan 2016.pdf"                                                   
## [197] "Payslip March 2016 - Sheet1.pdf"                                        
## [198] "Payslip March 2016.pdf"                                                 
## [199] "pgd.csv"                                                                
## [200] "plot_roc.ipynb"                                                         
## [201] "postgres data - Sheet1.csv"                                             
## [202] "postgresql-9.6.1-1-windows-x64.exe"                                     
## [203] "postgresql-9.6.2-4-windows-x64.exe"                                     
## [204] "Program 1-results.rtf"                                                  
## [205] "protein (1).csv"                                                        
## [206] "protein (2).csv"                                                        
## [207] "protein.csv"                                                            
## [208] "pycharm-professional-2017.1.2.exe"                                      
## [209] "pyfim.zip"                                                              
## [210] "Python.docx"                                                            
## [211] "python+with+postgres (1).ipynb"                                         
## [212] "Python+with+Postgres (2).ipynb"                                         
## [213] "Python+with+Postgres (3).ipynb"                                         
## [214] "python+with+postgres.ipynb"                                             
## [215] "R-3.3.2-win.exe"                                                        
## [216] "R-3.3.3-win.exe"                                                        
## [217] "R-3.4.0-win.exe"                                                        
## [218] "RCertificationExam.pdf"                                                 
## [219] "reg+model.ipynb"                                                        
## [220] "Resume_DivyataJaiswal_IITG.pdf"                                         
## [221] "Revision -  Business Analytics (1).pdf"                                 
## [222] "Revision -  Business Analytics.pdf"                                     
## [223] "RFM Analysis Assignment - Data.csv"                                     
## [224] "RidingMowers.csv"                                                       
## [225] "rsconnect"                                                              
## [226] "RStudio-1.0.136.exe"                                                    
## [227] "RStudio-1.0.143.exe"                                                    
## [228] "Salary Slip, Feb 2016.pdf"                                              
## [229] "Salary Slip, Jan 2016.pdf"                                              
## [230] "Salary Slip, March 2016 (1).pdf"                                        
## [231] "Salary Slip, March 2016 (2).pdf"                                        
## [232] "Salary Slip, March 2016.pdf"                                            
## [233] "sales-of-shampoo-over-a-three-ye.csv"                                   
## [234] "sales.csv"                                                              
## [235] "sales22.csv"                                                            
## [236] "sas-university-edition-107140.pdf"                                      
## [237] "SAS part 2.pdf"                                                         
## [238] "SAS Part 3.pdf"                                                         
## [239] "Scan0095.pdf"                                                           
## [240] "Scanned Invoice for Collabera.pdf"                                      
## [241] "scrape+amazon.ipynb"                                                    
## [242] "Screenshot 2017-01-23 12.36.55.png"                                     
## [243] "September invoice adaptive analytics   - Sheet1.pdf"                    
## [244] "simple+matplot+graph.ipynb"                                             
## [245] "SkypeSetup.exe"                                                         
## [246] "SkypeSetupFull.exe"                                                     
## [247] "Sollers January.pdf"                                                    
## [248] "sqlalchemy.ipynb"                                                       
## [249] "stackoverflow-dump-analysis.html"                                       
## [250] "stat13-lecture18.ppt"                                                   
## [251] "sts_gold_v03"                                                           
## [252] "sts_gold_v03.zip"                                                       
## [253] "Sunstone.pdf"                                                           
## [254] "Tableau.pdf"                                                            
## [255] "TableauPublicDesktop-64bit-10-1-3.exe"                                  
## [256] "TableauPublicDesktop-64bit-10-1-4.exe"                                  
## [257] "telecom.csv"                                                            
## [258] "TelecomServiceProviderCaseStudy.pdf"                                    
## [259] "test+web+scraping.ipynb"                                                
## [260] "Text Mining (1).pdf"                                                    
## [261] "Text Mining.pdf"                                                        
## [262] "third.sas7bdat"                                                         
## [263] "Time Series  Forecasting (1).pdf"                                       
## [264] "Time Series  Forecasting.pdf"                                           
## [265] "time+series (1).ipynb"                                                  
## [266] "time+series.ipynb"                                                      
## [267] "ts.html"                                                                
## [268] "ts.R"                                                                   
## [269] "Unconfirmed 141422.crdownload"                                          
## [270] "Unconfirmed 373974.crdownload"                                          
## [271] "Unconfirmed 376991.crdownload"                                          
## [272] "Unconfirmed 950045.crdownload"                                          
## [273] "uTorrent.exe"                                                           
## [274] "VCForPython27.msi"                                                      
## [275] "vcsetup.exe"                                                            
## [276] "VirtualBox-5.1.8-111374-Win (1).exe"                                    
## [277] "VirtualBox-5.1.8-111374-Win.exe"                                        
## [278] "visualcppbuildtools_full (1).exe"                                       
## [279] "visualcppbuildtools_full.exe"                                           
## [280] "vs_community__196078652.1492249774.exe"                                 
## [281] "Web+Scraping+Yelp+with+Beautiful+Soup.ipynb"                            
## [282] "Webinar for Business Analytics.pdf"                                     
## [283] "wendyhe-tweets-on-womensmarch-and-maga.zip"                             
## [284] "WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg"                         
## [285] "WhatsApp Image 2017-02-18 at 08.42.55.jpeg"                             
## [286] "Wipro_Third_Party_DB_New_Format (1).xlsx"                               
## [287] "Wipro_Third_Party_DB_New_Format.xlsx"                                   
## [288] "YourPayslip_April2017.pdf"                                              
## [289] "YourPayslip_March2017.pdf"
dir("C:/Users/Dell/Downloads",pattern = "csv")
##  [1] "AirPassengers.csv"                   
##  [2] "ajay.csv"                            
##  [3] "BigDiamonds.csv"                     
##  [4] "BigDiamonds.csv (1).zip"             
##  [5] "BigDiamonds.csv (2)"                 
##  [6] "BigDiamonds.csv (2).zip"             
##  [7] "BigDiamonds.csv (3).zip"             
##  [8] "BigDiamonds.csv.zip"                 
##  [9] "Boston (1).csv"                      
## [10] "Boston (2).csv"                      
## [11] "Boston.csv"                          
## [12] "ccFraud.csv"                         
## [13] "class2.csv"                          
## [14] "data1.csv"                           
## [15] "datasets (1).csv"                    
## [16] "datasets.csv"                        
## [17] "Diamond (1).csv"                     
## [18] "Diamond (2).csv"                     
## [19] "Diamond (3).csv"                     
## [20] "Diamond (4).csv"                     
## [21] "Diamond (5).csv"                     
## [22] "Diamond (6).csv"                     
## [23] "Diamond (7).csv"                     
## [24] "Diamond (8).csv"                     
## [25] "Diamond.csv"                         
## [26] "Hdma.csv"                            
## [27] "Hedonic (1).csv"                     
## [28] "Hedonic (2).csv"                     
## [29] "Hedonic.csv"                         
## [30] "mtcars.csv"                          
## [31] "pgd.csv"                             
## [32] "postgres data - Sheet1.csv"          
## [33] "protein (1).csv"                     
## [34] "protein (2).csv"                     
## [35] "protein.csv"                         
## [36] "RFM Analysis Assignment - Data.csv"  
## [37] "RidingMowers.csv"                    
## [38] "sales-of-shampoo-over-a-three-ye.csv"
## [39] "sales.csv"                           
## [40] "sales22.csv"                         
## [41] "telecom.csv"
diamonds=read.csv("C:/Users/Dell/Downloads/BigDiamonds.csv/BigDiamonds.csv")

str(diamonds)
## 'data.frame':    598024 obs. of  13 variables:
##  $ X           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ carat       : num  0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
##  $ cut         : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
##  $ color       : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
##  $ clarity     : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
##  $ table       : num  59 61 58 60 59 60 63 61 57.5 65 ...
##  $ depth       : num  63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
##  $ cert        : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
##  $ measurements: Factor w/ 241453 levels "","  3.99  x   3.95  x   2.44",..: 19960 21917 48457 15701 37341 14661 14400 19642 17115 16177 ...
##  $ price       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x           : num  3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
##  $ y           : num  3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
##  $ z           : num  2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
head(diamonds)
##   X carat    cut color clarity table depth cert       measurements price
## 1 1  0.25 V.Good     K      I1    59  63.7  GIA 3.96 x 3.95 x 2.52    NA
## 2 2  0.23   Good     G      I1    61  58.1  GIA 4.00 x 4.05 x 2.30    NA
## 3 3  0.34   Good     J      I2    58  58.7  GIA 4.56 x 4.53 x 2.67    NA
## 4 4  0.21 V.Good     D      I1    60  60.6  GIA 3.80 x 3.82 x 2.31    NA
## 5 5  0.31 V.Good     K      I1    59  62.2  EGL 4.35 x 4.26 x 2.68    NA
## 6 6  0.20   Good     G     SI2    60  64.4  GIA 3.74 x 3.67 x 2.38    NA
##      x    y    z
## 1 3.96 3.95 2.52
## 2 4.00 4.05 2.30
## 3 4.56 4.53 2.67
## 4 3.80 3.82 2.31
## 5 4.35 4.26 2.68
## 6 3.74 3.67 2.38
summary(diamonds)
##        X              carat           cut             color      
##  Min.   :     1   Min.   :0.200   Good  : 59680   G      :96204  
##  1st Qu.:149507   1st Qu.:0.500   Ideal :369448   F      :93573  
##  Median :299013   Median :0.900   V.Good:168896   E      :93483  
##  Mean   :299013   Mean   :1.071                   H      :86619  
##  3rd Qu.:448518   3rd Qu.:1.500                   D      :73630  
##  Max.   :598024   Max.   :9.250                   I      :70282  
##                                                   (Other):84233  
##     clarity           table           depth               cert       
##  SI1    :116631   Min.   : 0.00   Min.   : 0.00   GIA       :463555  
##  VS2    :111082   1st Qu.:56.00   1st Qu.:61.00   IGI       : 43667  
##  SI2    :104300   Median :58.00   Median :62.10   EGL       : 33814  
##  VS1    : 97730   Mean   :57.63   Mean   :61.06   EGL USA   : 16079  
##  VVS2   : 65500   3rd Qu.:59.00   3rd Qu.:62.70   EGL Intl. : 11447  
##  VVS1   : 54798   Max.   :75.90   Max.   :81.30   EGL ISRAEL: 11301  
##  (Other): 47983                                   (Other)   : 18161  
##                  measurements        price             x         
##  0.00  x  0.00  x  0.00:   425   Min.   :  300   Min.   : 0.150  
##  0.00 x 0.00 x 0.00    :   222   1st Qu.: 1220   1st Qu.: 4.740  
##  4.3 x 4.27 x 2.67     :    97   Median : 3503   Median : 5.780  
##  4.31 x 4.29 x 2.68    :    87   Mean   : 8753   Mean   : 5.991  
##  4.29 x 4.26 x 2.67    :    86   3rd Qu.:11174   3rd Qu.: 6.970  
##  4.3 x 4.28 x 2.67     :    84   Max.   :99990   Max.   :13.890  
##  (Other)               :597023   NA's   :713     NA's   :1815    
##        y                z         
##  Min.   : 1.000   Min.   : 0.040  
##  1st Qu.: 4.970   1st Qu.: 3.120  
##  Median : 6.050   Median : 3.860  
##  Mean   : 6.199   Mean   : 4.033  
##  3rd Qu.: 7.230   3rd Qu.: 4.610  
##  Max.   :13.890   Max.   :13.180  
##  NA's   :1852     NA's   :2544
table(diamonds$cut)
## 
##   Good  Ideal V.Good 
##  59680 369448 168896
summary(diamonds$carat)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.200   0.500   0.900   1.071   1.500   9.250
diamonds2=diamonds[,3]
head(diamonds2)
## [1] V.Good Good   Good   V.Good V.Good Good  
## Levels: Good Ideal V.Good
class(diamonds)
## [1] "data.frame"
class(diamonds2)
## [1] "factor"
ajay=c("Ajay","Vijay","Amitabh")
class(ajay)
## [1] "character"
ajay2=c(TRUE,TRUE,FALSE)
class(ajay2)
## [1] "logical"
ajay3=1:10
class(ajay3)
## [1] "integer"
ajay4=c(1,2,3,4,5,6,7,8,9,10)
class(ajay4)
## [1] "numeric"
ajay3
##  [1]  1  2  3  4  5  6  7  8  9 10
ajay4
##  [1]  1  2  3  4  5  6  7  8  9 10
ajay5=1:10
identical(ajay3,ajay4)
## [1] FALSE
ls()
## [1] "ajay"      "ajay2"     "ajay3"     "ajay4"     "ajay5"     "diamonds" 
## [7] "diamonds2"
identical(ajay5,ajay3)
## [1] TRUE
#file.choose()
air=read.csv("C:\\Users\\Dell\\Downloads\\AirPassengers.csv")

#this is a comment

ls()
## [1] "air"       "ajay"      "ajay2"     "ajay3"     "ajay4"     "ajay5"    
## [7] "diamonds"  "diamonds2"
rm(ajay2)
ls()
## [1] "air"       "ajay"      "ajay3"     "ajay4"     "ajay5"     "diamonds" 
## [7] "diamonds2"
#rm(list=ls())
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells  743413 39.8    2164898 115.7  1321661  70.6
## Vcells 8260469 63.1   21535217 164.4 21469488 163.8
ls()[3:5]
## [1] "ajay3" "ajay4" "ajay5"
#rm(list=ls()[3:5])

ls()[c(1,4)]
## [1] "air"   "ajay4"
#rm(list=ls()[c(1,4)])

ajay6=letters
ajay6
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
ajay6[20]
## [1] "t"
ajay6[20]="Ajay"
ajay6
##  [1] "a"    "b"    "c"    "d"    "e"    "f"    "g"    "h"    "i"    "j"   
## [11] "k"    "l"    "m"    "n"    "o"    "p"    "q"    "r"    "s"    "Ajay"
## [21] "u"    "v"    "w"    "x"    "y"    "z"
ajay6=gsub("Ajay",NA,ajay6)
na.omit(ajay6)
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "u" "v" "w" "x" "y" "z"
## attr(,"na.action")
## [1] 20
## attr(,"class")
## [1] "omit"
mean(ajay5)
## [1] 5.5
ajay5=gsub(2,NA,ajay5)
ajay5=as.numeric(ajay5)
mean(ajay5)
## [1] NA
mean(ajay5,na.rm=T)
## [1] 5.888889
ajay5
##  [1]  1 NA  3  4  5  6  7  8  9 10
is.na(ajay5)
##  [1] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
table(is.na(ajay5))
## 
## FALSE  TRUE 
##     9     1
ajay5
##  [1]  1 NA  3  4  5  6  7  8  9 10
ajay5=ifelse(is.na(ajay5),mean(ajay5,na.rm=T),ajay5)
ajay5
##  [1]  1.000000  5.888889  3.000000  4.000000  5.000000  6.000000  7.000000
##  [8]  8.000000  9.000000 10.000000
ajay3
##  [1]  1  2  3  4  5  6  7  8  9 10
ajay3[3:5]=NA
ajay3
##  [1]  1  2 NA NA NA  6  7  8  9 10
ajay3=na.omit(ajay3)
ajay3
## [1]  1  2  6  7  8  9 10
## attr(,"na.action")
## [1] 3 4 5
## attr(,"class")
## [1] "omit"
plot(ajay3)

plot(ajay3,type="l")

class(ajay3)
## [1] "integer"
ajay3=as.vector(ajay3)
barplot(ajay3)

data(mtcars)
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
dim(mtcars)
## [1] 32 11
ncol(mtcars)
## [1] 11
nrow(mtcars)
## [1] 32
class(mtcars)
## [1] "data.frame"
mtcars$brand=row.names(mtcars)
mtcars
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
##                                   brand
## Mazda RX4                     Mazda RX4
## Mazda RX4 Wag             Mazda RX4 Wag
## Datsun 710                   Datsun 710
## Hornet 4 Drive           Hornet 4 Drive
## Hornet Sportabout     Hornet Sportabout
## Valiant                         Valiant
## Duster 360                   Duster 360
## Merc 240D                     Merc 240D
## Merc 230                       Merc 230
## Merc 280                       Merc 280
## Merc 280C                     Merc 280C
## Merc 450SE                   Merc 450SE
## Merc 450SL                   Merc 450SL
## Merc 450SLC                 Merc 450SLC
## Cadillac Fleetwood   Cadillac Fleetwood
## Lincoln Continental Lincoln Continental
## Chrysler Imperial     Chrysler Imperial
## Fiat 128                       Fiat 128
## Honda Civic                 Honda Civic
## Toyota Corolla           Toyota Corolla
## Toyota Corona             Toyota Corona
## Dodge Challenger       Dodge Challenger
## AMC Javelin                 AMC Javelin
## Camaro Z28                   Camaro Z28
## Pontiac Firebird       Pontiac Firebird
## Fiat X1-9                     Fiat X1-9
## Porsche 914-2             Porsche 914-2
## Lotus Europa               Lotus Europa
## Ford Pantera L           Ford Pantera L
## Ferrari Dino               Ferrari Dino
## Maserati Bora             Maserati Bora
## Volvo 142E                   Volvo 142E
row.names(mtcars)=1:32
mtcars
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb               brand
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4           Mazda RX4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4       Mazda RX4 Wag
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1          Datsun 710
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1      Hornet 4 Drive
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2   Hornet Sportabout
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1             Valiant
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4          Duster 360
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2           Merc 240D
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2            Merc 230
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4            Merc 280
## 11 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4           Merc 280C
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3          Merc 450SE
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3          Merc 450SL
## 14 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3         Merc 450SLC
## 15 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4  Cadillac Fleetwood
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4 Lincoln Continental
## 17 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4   Chrysler Imperial
## 18 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1            Fiat 128
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2         Honda Civic
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1      Toyota Corolla
## 21 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1       Toyota Corona
## 22 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2    Dodge Challenger
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2         AMC Javelin
## 24 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4          Camaro Z28
## 25 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2    Pontiac Firebird
## 26 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1           Fiat X1-9
## 27 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2       Porsche 914-2
## 28 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2        Lotus Europa
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4      Ford Pantera L
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6        Ferrari Dino
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8       Maserati Bora
## 32 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2          Volvo 142E
sample(32,8,F)
## [1]  9  8  3 19 11 31 20 14
mtcars[sample(32,8,F),]
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb               brand
## 29 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4      Ford Pantera L
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2   Hornet Sportabout
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4       Mazda RX4 Wag
## 31 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8       Maserati Bora
## 20 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1      Toyota Corolla
## 30 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6        Ferrari Dino
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2           Merc 240D
## 16 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4 Lincoln Continental
mtcars[sample(nrow(mtcars),
              0.125*nrow(mtcars),
              F),]
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb       brand
## 19 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2 Honda Civic
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3  Merc 450SE
## 13 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3  Merc 450SL
## 23 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2 AMC Javelin
table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
str(mtcars)
## 'data.frame':    32 obs. of  12 variables:
##  $ mpg  : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl  : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp : num  160 160 108 258 360 ...
##  $ hp   : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat : num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt   : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec : num  16.5 17 18.6 19.4 17 ...
##  $ vs   : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am   : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear : num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb : num  4 4 1 1 2 1 4 2 2 4 ...
##  $ brand: chr  "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb          brand          
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000   Length:32         
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000   Class :character  
##  Median :0.0000   Median :4.000   Median :2.000   Mode  :character  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812                     
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000                     
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
a=table(mtcars$cyl,mtcars$gear)
class(a)
## [1] "table"
mtcars[sample(ncol(mtcars),
0.25*ncol(mtcars),
F),]
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb          brand
## 12 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3     Merc 450SE
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1 Hornet 4 Drive
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4  Mazda RX4 Wag
pie(table(mtcars$cyl))

hist(mtcars$mpg,col=topo.colors(5,0.9))

hist(mtcars$mpg,col=cm.colors(5,0.9))

hist(mtcars$mpg,col=heat.colors(5,0.9))

colors()
##   [1] "white"                "aliceblue"            "antiquewhite"        
##   [4] "antiquewhite1"        "antiquewhite2"        "antiquewhite3"       
##   [7] "antiquewhite4"        "aquamarine"           "aquamarine1"         
##  [10] "aquamarine2"          "aquamarine3"          "aquamarine4"         
##  [13] "azure"                "azure1"               "azure2"              
##  [16] "azure3"               "azure4"               "beige"               
##  [19] "bisque"               "bisque1"              "bisque2"             
##  [22] "bisque3"              "bisque4"              "black"               
##  [25] "blanchedalmond"       "blue"                 "blue1"               
##  [28] "blue2"                "blue3"                "blue4"               
##  [31] "blueviolet"           "brown"                "brown1"              
##  [34] "brown2"               "brown3"               "brown4"              
##  [37] "burlywood"            "burlywood1"           "burlywood2"          
##  [40] "burlywood3"           "burlywood4"           "cadetblue"           
##  [43] "cadetblue1"           "cadetblue2"           "cadetblue3"          
##  [46] "cadetblue4"           "chartreuse"           "chartreuse1"         
##  [49] "chartreuse2"          "chartreuse3"          "chartreuse4"         
##  [52] "chocolate"            "chocolate1"           "chocolate2"          
##  [55] "chocolate3"           "chocolate4"           "coral"               
##  [58] "coral1"               "coral2"               "coral3"              
##  [61] "coral4"               "cornflowerblue"       "cornsilk"            
##  [64] "cornsilk1"            "cornsilk2"            "cornsilk3"           
##  [67] "cornsilk4"            "cyan"                 "cyan1"               
##  [70] "cyan2"                "cyan3"                "cyan4"               
##  [73] "darkblue"             "darkcyan"             "darkgoldenrod"       
##  [76] "darkgoldenrod1"       "darkgoldenrod2"       "darkgoldenrod3"      
##  [79] "darkgoldenrod4"       "darkgray"             "darkgreen"           
##  [82] "darkgrey"             "darkkhaki"            "darkmagenta"         
##  [85] "darkolivegreen"       "darkolivegreen1"      "darkolivegreen2"     
##  [88] "darkolivegreen3"      "darkolivegreen4"      "darkorange"          
##  [91] "darkorange1"          "darkorange2"          "darkorange3"         
##  [94] "darkorange4"          "darkorchid"           "darkorchid1"         
##  [97] "darkorchid2"          "darkorchid3"          "darkorchid4"         
## [100] "darkred"              "darksalmon"           "darkseagreen"        
## [103] "darkseagreen1"        "darkseagreen2"        "darkseagreen3"       
## [106] "darkseagreen4"        "darkslateblue"        "darkslategray"       
## [109] "darkslategray1"       "darkslategray2"       "darkslategray3"      
## [112] "darkslategray4"       "darkslategrey"        "darkturquoise"       
## [115] "darkviolet"           "deeppink"             "deeppink1"           
## [118] "deeppink2"            "deeppink3"            "deeppink4"           
## [121] "deepskyblue"          "deepskyblue1"         "deepskyblue2"        
## [124] "deepskyblue3"         "deepskyblue4"         "dimgray"             
## [127] "dimgrey"              "dodgerblue"           "dodgerblue1"         
## [130] "dodgerblue2"          "dodgerblue3"          "dodgerblue4"         
## [133] "firebrick"            "firebrick1"           "firebrick2"          
## [136] "firebrick3"           "firebrick4"           "floralwhite"         
## [139] "forestgreen"          "gainsboro"            "ghostwhite"          
## [142] "gold"                 "gold1"                "gold2"               
## [145] "gold3"                "gold4"                "goldenrod"           
## [148] "goldenrod1"           "goldenrod2"           "goldenrod3"          
## [151] "goldenrod4"           "gray"                 "gray0"               
## [154] "gray1"                "gray2"                "gray3"               
## [157] "gray4"                "gray5"                "gray6"               
## [160] "gray7"                "gray8"                "gray9"               
## [163] "gray10"               "gray11"               "gray12"              
## [166] "gray13"               "gray14"               "gray15"              
## [169] "gray16"               "gray17"               "gray18"              
## [172] "gray19"               "gray20"               "gray21"              
## [175] "gray22"               "gray23"               "gray24"              
## [178] "gray25"               "gray26"               "gray27"              
## [181] "gray28"               "gray29"               "gray30"              
## [184] "gray31"               "gray32"               "gray33"              
## [187] "gray34"               "gray35"               "gray36"              
## [190] "gray37"               "gray38"               "gray39"              
## [193] "gray40"               "gray41"               "gray42"              
## [196] "gray43"               "gray44"               "gray45"              
## [199] "gray46"               "gray47"               "gray48"              
## [202] "gray49"               "gray50"               "gray51"              
## [205] "gray52"               "gray53"               "gray54"              
## [208] "gray55"               "gray56"               "gray57"              
## [211] "gray58"               "gray59"               "gray60"              
## [214] "gray61"               "gray62"               "gray63"              
## [217] "gray64"               "gray65"               "gray66"              
## [220] "gray67"               "gray68"               "gray69"              
## [223] "gray70"               "gray71"               "gray72"              
## [226] "gray73"               "gray74"               "gray75"              
## [229] "gray76"               "gray77"               "gray78"              
## [232] "gray79"               "gray80"               "gray81"              
## [235] "gray82"               "gray83"               "gray84"              
## [238] "gray85"               "gray86"               "gray87"              
## [241] "gray88"               "gray89"               "gray90"              
## [244] "gray91"               "gray92"               "gray93"              
## [247] "gray94"               "gray95"               "gray96"              
## [250] "gray97"               "gray98"               "gray99"              
## [253] "gray100"              "green"                "green1"              
## [256] "green2"               "green3"               "green4"              
## [259] "greenyellow"          "grey"                 "grey0"               
## [262] "grey1"                "grey2"                "grey3"               
## [265] "grey4"                "grey5"                "grey6"               
## [268] "grey7"                "grey8"                "grey9"               
## [271] "grey10"               "grey11"               "grey12"              
## [274] "grey13"               "grey14"               "grey15"              
## [277] "grey16"               "grey17"               "grey18"              
## [280] "grey19"               "grey20"               "grey21"              
## [283] "grey22"               "grey23"               "grey24"              
## [286] "grey25"               "grey26"               "grey27"              
## [289] "grey28"               "grey29"               "grey30"              
## [292] "grey31"               "grey32"               "grey33"              
## [295] "grey34"               "grey35"               "grey36"              
## [298] "grey37"               "grey38"               "grey39"              
## [301] "grey40"               "grey41"               "grey42"              
## [304] "grey43"               "grey44"               "grey45"              
## [307] "grey46"               "grey47"               "grey48"              
## [310] "grey49"               "grey50"               "grey51"              
## [313] "grey52"               "grey53"               "grey54"              
## [316] "grey55"               "grey56"               "grey57"              
## [319] "grey58"               "grey59"               "grey60"              
## [322] "grey61"               "grey62"               "grey63"              
## [325] "grey64"               "grey65"               "grey66"              
## [328] "grey67"               "grey68"               "grey69"              
## [331] "grey70"               "grey71"               "grey72"              
## [334] "grey73"               "grey74"               "grey75"              
## [337] "grey76"               "grey77"               "grey78"              
## [340] "grey79"               "grey80"               "grey81"              
## [343] "grey82"               "grey83"               "grey84"              
## [346] "grey85"               "grey86"               "grey87"              
## [349] "grey88"               "grey89"               "grey90"              
## [352] "grey91"               "grey92"               "grey93"              
## [355] "grey94"               "grey95"               "grey96"              
## [358] "grey97"               "grey98"               "grey99"              
## [361] "grey100"              "honeydew"             "honeydew1"           
## [364] "honeydew2"            "honeydew3"            "honeydew4"           
## [367] "hotpink"              "hotpink1"             "hotpink2"            
## [370] "hotpink3"             "hotpink4"             "indianred"           
## [373] "indianred1"           "indianred2"           "indianred3"          
## [376] "indianred4"           "ivory"                "ivory1"              
## [379] "ivory2"               "ivory3"               "ivory4"              
## [382] "khaki"                "khaki1"               "khaki2"              
## [385] "khaki3"               "khaki4"               "lavender"            
## [388] "lavenderblush"        "lavenderblush1"       "lavenderblush2"      
## [391] "lavenderblush3"       "lavenderblush4"       "lawngreen"           
## [394] "lemonchiffon"         "lemonchiffon1"        "lemonchiffon2"       
## [397] "lemonchiffon3"        "lemonchiffon4"        "lightblue"           
## [400] "lightblue1"           "lightblue2"           "lightblue3"          
## [403] "lightblue4"           "lightcoral"           "lightcyan"           
## [406] "lightcyan1"           "lightcyan2"           "lightcyan3"          
## [409] "lightcyan4"           "lightgoldenrod"       "lightgoldenrod1"     
## [412] "lightgoldenrod2"      "lightgoldenrod3"      "lightgoldenrod4"     
## [415] "lightgoldenrodyellow" "lightgray"            "lightgreen"          
## [418] "lightgrey"            "lightpink"            "lightpink1"          
## [421] "lightpink2"           "lightpink3"           "lightpink4"          
## [424] "lightsalmon"          "lightsalmon1"         "lightsalmon2"        
## [427] "lightsalmon3"         "lightsalmon4"         "lightseagreen"       
## [430] "lightskyblue"         "lightskyblue1"        "lightskyblue2"       
## [433] "lightskyblue3"        "lightskyblue4"        "lightslateblue"      
## [436] "lightslategray"       "lightslategrey"       "lightsteelblue"      
## [439] "lightsteelblue1"      "lightsteelblue2"      "lightsteelblue3"     
## [442] "lightsteelblue4"      "lightyellow"          "lightyellow1"        
## [445] "lightyellow2"         "lightyellow3"         "lightyellow4"        
## [448] "limegreen"            "linen"                "magenta"             
## [451] "magenta1"             "magenta2"             "magenta3"            
## [454] "magenta4"             "maroon"               "maroon1"             
## [457] "maroon2"              "maroon3"              "maroon4"             
## [460] "mediumaquamarine"     "mediumblue"           "mediumorchid"        
## [463] "mediumorchid1"        "mediumorchid2"        "mediumorchid3"       
## [466] "mediumorchid4"        "mediumpurple"         "mediumpurple1"       
## [469] "mediumpurple2"        "mediumpurple3"        "mediumpurple4"       
## [472] "mediumseagreen"       "mediumslateblue"      "mediumspringgreen"   
## [475] "mediumturquoise"      "mediumvioletred"      "midnightblue"        
## [478] "mintcream"            "mistyrose"            "mistyrose1"          
## [481] "mistyrose2"           "mistyrose3"           "mistyrose4"          
## [484] "moccasin"             "navajowhite"          "navajowhite1"        
## [487] "navajowhite2"         "navajowhite3"         "navajowhite4"        
## [490] "navy"                 "navyblue"             "oldlace"             
## [493] "olivedrab"            "olivedrab1"           "olivedrab2"          
## [496] "olivedrab3"           "olivedrab4"           "orange"              
## [499] "orange1"              "orange2"              "orange3"             
## [502] "orange4"              "orangered"            "orangered1"          
## [505] "orangered2"           "orangered3"           "orangered4"          
## [508] "orchid"               "orchid1"              "orchid2"             
## [511] "orchid3"              "orchid4"              "palegoldenrod"       
## [514] "palegreen"            "palegreen1"           "palegreen2"          
## [517] "palegreen3"           "palegreen4"           "paleturquoise"       
## [520] "paleturquoise1"       "paleturquoise2"       "paleturquoise3"      
## [523] "paleturquoise4"       "palevioletred"        "palevioletred1"      
## [526] "palevioletred2"       "palevioletred3"       "palevioletred4"      
## [529] "papayawhip"           "peachpuff"            "peachpuff1"          
## [532] "peachpuff2"           "peachpuff3"           "peachpuff4"          
## [535] "peru"                 "pink"                 "pink1"               
## [538] "pink2"                "pink3"                "pink4"               
## [541] "plum"                 "plum1"                "plum2"               
## [544] "plum3"                "plum4"                "powderblue"          
## [547] "purple"               "purple1"              "purple2"             
## [550] "purple3"              "purple4"              "red"                 
## [553] "red1"                 "red2"                 "red3"                
## [556] "red4"                 "rosybrown"            "rosybrown1"          
## [559] "rosybrown2"           "rosybrown3"           "rosybrown4"          
## [562] "royalblue"            "royalblue1"           "royalblue2"          
## [565] "royalblue3"           "royalblue4"           "saddlebrown"         
## [568] "salmon"               "salmon1"              "salmon2"             
## [571] "salmon3"              "salmon4"              "sandybrown"          
## [574] "seagreen"             "seagreen1"            "seagreen2"           
## [577] "seagreen3"            "seagreen4"            "seashell"            
## [580] "seashell1"            "seashell2"            "seashell3"           
## [583] "seashell4"            "sienna"               "sienna1"             
## [586] "sienna2"              "sienna3"              "sienna4"             
## [589] "skyblue"              "skyblue1"             "skyblue2"            
## [592] "skyblue3"             "skyblue4"             "slateblue"           
## [595] "slateblue1"           "slateblue2"           "slateblue3"          
## [598] "slateblue4"           "slategray"            "slategray1"          
## [601] "slategray2"           "slategray3"           "slategray4"          
## [604] "slategrey"            "snow"                 "snow1"               
## [607] "snow2"                "snow3"                "snow4"               
## [610] "springgreen"          "springgreen1"         "springgreen2"        
## [613] "springgreen3"         "springgreen4"         "steelblue"           
## [616] "steelblue1"           "steelblue2"           "steelblue3"          
## [619] "steelblue4"           "tan"                  "tan1"                
## [622] "tan2"                 "tan3"                 "tan4"                
## [625] "thistle"              "thistle1"             "thistle2"            
## [628] "thistle3"             "thistle4"             "tomato"              
## [631] "tomato1"              "tomato2"              "tomato3"             
## [634] "tomato4"              "turquoise"            "turquoise1"          
## [637] "turquoise2"           "turquoise3"           "turquoise4"          
## [640] "violet"               "violetred"            "violetred1"          
## [643] "violetred2"           "violetred3"           "violetred4"          
## [646] "wheat"                "wheat1"               "wheat2"              
## [649] "wheat3"               "wheat4"               "whitesmoke"          
## [652] "yellow"               "yellow1"              "yellow2"             
## [655] "yellow3"              "yellow4"              "yellowgreen"
ls()
##  [1] "a"         "air"       "ajay"      "ajay3"     "ajay4"    
##  [6] "ajay5"     "ajay6"     "diamonds"  "diamonds2" "mtcars"
rm(list=ls())
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells  518944 27.8    1731918  92.5  1347784  72.0
## Vcells 1450453 11.1   17228173 131.5 21469488 163.8
getwd()
## [1] "C:/Users/Dell/Documents/R/revisionofbasics"
setwd("C:/Users/Dell/Downloads/BigDiamonds.csv" )
dir(pattern = "csv")
## [1] "BigDiamonds.csv"       "BigDiamonds.csv.Rproj"
#library(benchmark)
Sys.Date()
## [1] "2017-05-15"
Sys.time()
## [1] "2017-05-15 16:11:28 IST"
library(data.table)
library(readr)
system.time(read.csv("BigDiamonds.csv"))
##    user  system elapsed 
##   26.02    0.34   26.39
system.time(fread("BigDiamonds.csv"))
## 
Read 23.4% of 598024 rows
Read 36.8% of 598024 rows
Read 48.5% of 598024 rows
Read 61.9% of 598024 rows
Read 73.6% of 598024 rows
Read 85.3% of 598024 rows
Read 95.3% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:09
##    user  system elapsed 
##    8.65    0.06    8.74
system.time(read_csv("BigDiamonds.csv"))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_integer(),
##   carat = col_double(),
##   cut = col_character(),
##   color = col_character(),
##   clarity = col_character(),
##   table = col_double(),
##   depth = col_double(),
##   cert = col_character(),
##   measurements = col_character(),
##   price = col_integer(),
##   x = col_double(),
##   y = col_double(),
##   z = col_double()
## )
##    user  system elapsed 
##    3.76    0.29    4.06
library(RColorBrewer)

hist(mtcars$mpg,col=brewer.pal(4,"Set1"))

hist(mtcars$mpg,col=brewer.pal(3,"Set2"),breaks = 8)

hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 10)

hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 5)

hist(iris$Sepal.Length,
     col=brewer.pal(8,"Greens"),
     breaks = 15,
     main="Title of my graph",
     xlab="Sepal Length",
     ylab = "Frequency",
     ylim=c(0,20))

par(mfrow=c(2,3))
par(bg="grey")

hist(mtcars$mpg,col=brewer.pal(4,"Set1"))
hist(mtcars$mpg,col=brewer.pal(3,"Set2"),breaks = 8)
hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 10)
hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 5)

hist(iris$Sepal.Length,
     col=brewer.pal(8,"Greens"),
     breaks = 15,
     main="Title of my graph",
     xlab="Sepal Length",
     ylab = "Frequency",
     ylim=c(0,20))

brewer.pal.info
##          maxcolors category colorblind
## BrBG            11      div       TRUE
## PiYG            11      div       TRUE
## PRGn            11      div       TRUE
## PuOr            11      div       TRUE
## RdBu            11      div       TRUE
## RdGy            11      div      FALSE
## RdYlBu          11      div       TRUE
## RdYlGn          11      div      FALSE
## Spectral        11      div      FALSE
## Accent           8     qual      FALSE
## Dark2            8     qual       TRUE
## Paired          12     qual       TRUE
## Pastel1          9     qual      FALSE
## Pastel2          8     qual      FALSE
## Set1             9     qual      FALSE
## Set2             8     qual       TRUE
## Set3            12     qual      FALSE
## Blues            9      seq       TRUE
## BuGn             9      seq       TRUE
## BuPu             9      seq       TRUE
## GnBu             9      seq       TRUE
## Greens           9      seq       TRUE
## Greys            9      seq       TRUE
## Oranges          9      seq       TRUE
## OrRd             9      seq       TRUE
## PuBu             9      seq       TRUE
## PuBuGn           9      seq       TRUE
## PuRd             9      seq       TRUE
## Purples          9      seq       TRUE
## RdPu             9      seq       TRUE
## Reds             9      seq       TRUE
## YlGn             9      seq       TRUE
## YlGnBu           9      seq       TRUE
## YlOrBr           9      seq       TRUE
## YlOrRd           9      seq       TRUE
#brewer.show


Sys.Date()
## [1] "2017-05-15"
dob=as.Date("1997-04-01")
Sys.Date()-dob
## Time difference of 7349 days
fundates=c("1Apr2017","01-06-2017","1/6/17")
fundates2=c("1Apr2017","12-30-2017","1/6/17")

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday,
##     week, yday, year
## The following object is masked from 'package:base':
## 
##     date
fun2=dmy(fundates)
fun2
## [1] "2017-04-01" "2017-06-01" "2017-06-01"
fun3=dmy(fundates2)
## Warning: 1 failed to parse.
fun3
## [1] "2017-04-01" NA           "2017-06-01"
fun4=mdy(fundates2)
## Warning: 1 failed to parse.
fun4
## [1] NA           "2017-12-30" "2017-01-06"
fun4=ifelse(is.na(dmy(fundates2)),mdy(fundates2),dmy(fundates2))
## Warning: 1 failed to parse.
## Warning: 1 failed to parse.

## Warning: 1 failed to parse.
fun4
## [1] 17257 17530 17318
ls()
## [1] "dob"       "fun2"      "fun3"      "fun4"      "fundates"  "fundates2"
rm(list=ls())

df=fread("BigDiamonds.csv")
## 
Read 20.1% of 598024 rows
Read 33.4% of 598024 rows
Read 45.1% of 598024 rows
Read 56.9% of 598024 rows
Read 68.6% of 598024 rows
Read 80.3% of 598024 rows
Read 92.0% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:09
summary(df$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     300    1220    3503    8753   11174   99990     713
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
df$price2=na.aggregate(df$price, FUN = median) 

library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
sqldf("select *
  from mtcars
where mpg > (
  select avg(mpg)
  from mtcars
);")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 6  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 7  32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## 8  30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## 9  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 10 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 11 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## 12 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## 13 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## 14 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
mtcars$brand=row.names(mtcars)
mtcars$brand
##  [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"         
##  [4] "Hornet 4 Drive"      "Hornet Sportabout"   "Valiant"            
##  [7] "Duster 360"          "Merc 240D"           "Merc 230"           
## [10] "Merc 280"            "Merc 280C"           "Merc 450SE"         
## [13] "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood" 
## [16] "Lincoln Continental" "Chrysler Imperial"   "Fiat 128"           
## [19] "Honda Civic"         "Toyota Corolla"      "Toyota Corona"      
## [22] "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"         
## [25] "Pontiac Firebird"    "Fiat X1-9"           "Porsche 914-2"      
## [28] "Lotus Europa"        "Ford Pantera L"      "Ferrari Dino"       
## [31] "Maserati Bora"       "Volvo 142E"
nchar(mtcars$brand)
##  [1]  9 13 10 14 17  7 10  9  8  8  9 10 10 11 18 19 17  8 11 14 13 16 11
## [24] 10 16  9 13 12 14 12 13 10
grepl(" ",mtcars$brand)
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [12]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [23]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
grep(" ",mtcars$brand)
##  [1]  1  2  3  4  5  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [24] 25 26 27 28 29 30 31 32
grep(" ",mtcars$brand,value = T)
##  [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"         
##  [4] "Hornet 4 Drive"      "Hornet Sportabout"   "Duster 360"         
##  [7] "Merc 240D"           "Merc 230"            "Merc 280"           
## [10] "Merc 280C"           "Merc 450SE"          "Merc 450SL"         
## [13] "Merc 450SLC"         "Cadillac Fleetwood"  "Lincoln Continental"
## [16] "Chrysler Imperial"   "Fiat 128"            "Honda Civic"        
## [19] "Toyota Corolla"      "Toyota Corona"       "Dodge Challenger"   
## [22] "AMC Javelin"         "Camaro Z28"          "Pontiac Firebird"   
## [25] "Fiat X1-9"           "Porsche 914-2"       "Lotus Europa"       
## [28] "Ford Pantera L"      "Ferrari Dino"        "Maserati Bora"      
## [31] "Volvo 142E"
gregexpr(" ",mtcars$brand)
## [[1]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[2]]
## [1]  6 10
## attr(,"match.length")
## [1] 1 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[3]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[4]]
## [1] 7 9
## attr(,"match.length")
## [1] 1 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[5]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[6]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"useBytes")
## [1] TRUE
## 
## [[7]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[8]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[9]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[10]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[11]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[12]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[13]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[14]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[15]]
## [1] 9
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[16]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[17]]
## [1] 9
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[18]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[19]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[20]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[21]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[22]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[23]]
## [1] 4
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[24]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[25]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[26]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[27]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[28]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[29]]
## [1]  5 13
## attr(,"match.length")
## [1] 1 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[30]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[31]]
## [1] 9
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
## 
## [[32]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
regexpr(" ",mtcars$brand)
##  [1]  6  6  7  7  7 -1  7  5  5  5  5  5  5  5  9  8  9  5  6  7  7  6  4
## [24]  7  8  5  8  6  5  8  9  6
## attr(,"match.length")
##  [1]  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [24]  1  1  1  1  1  1  1  1  1
## attr(,"useBytes")
## [1] TRUE
mtcars$brand2=substr(mtcars$brand,1,6)
dim(mtcars)
## [1] 32 13
mtcars[1:10,12:13]
##                               brand brand2
## Mazda RX4                 Mazda RX4 Mazda 
## Mazda RX4 Wag         Mazda RX4 Wag Mazda 
## Datsun 710               Datsun 710 Datsun
## Hornet 4 Drive       Hornet 4 Drive Hornet
## Hornet Sportabout Hornet Sportabout Hornet
## Valiant                     Valiant Valian
## Duster 360               Duster 360 Duster
## Merc 240D                 Merc 240D Merc 2
## Merc 230                   Merc 230 Merc 2
## Merc 280                   Merc 280 Merc 2
regexpr(" ",mtcars$brand)
##  [1]  6  6  7  7  7 -1  7  5  5  5  5  5  5  5  9  8  9  5  6  7  7  6  4
## [24]  7  8  5  8  6  5  8  9  6
## attr(,"match.length")
##  [1]  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [24]  1  1  1  1  1  1  1  1  1
## attr(,"useBytes")
## [1] TRUE
mtcars$brand3=substr(mtcars$brand,1,regexpr(" ",mtcars$brand))

mtcars[1:10,12:14]
##                               brand brand2  brand3
## Mazda RX4                 Mazda RX4 Mazda   Mazda 
## Mazda RX4 Wag         Mazda RX4 Wag Mazda   Mazda 
## Datsun 710               Datsun 710 Datsun Datsun 
## Hornet 4 Drive       Hornet 4 Drive Hornet Hornet 
## Hornet Sportabout Hornet Sportabout Hornet Hornet 
## Valiant                     Valiant Valian        
## Duster 360               Duster 360 Duster Duster 
## Merc 240D                 Merc 240D Merc 2   Merc 
## Merc 230                   Merc 230 Merc 2   Merc 
## Merc 280                   Merc 280 Merc 2   Merc
mtcars$brand4=ifelse(mtcars$brand3=="",mtcars$brand,mtcars$brand3)


mtcars[1:10,12:15]
##                               brand brand2  brand3  brand4
## Mazda RX4                 Mazda RX4 Mazda   Mazda   Mazda 
## Mazda RX4 Wag         Mazda RX4 Wag Mazda   Mazda   Mazda 
## Datsun 710               Datsun 710 Datsun Datsun  Datsun 
## Hornet 4 Drive       Hornet 4 Drive Hornet Hornet  Hornet 
## Hornet Sportabout Hornet Sportabout Hornet Hornet  Hornet 
## Valiant                     Valiant Valian         Valiant
## Duster 360               Duster 360 Duster Duster  Duster 
## Merc 240D                 Merc 240D Merc 2   Merc    Merc 
## Merc 230                   Merc 230 Merc 2   Merc    Merc 
## Merc 280                   Merc 280 Merc 2   Merc    Merc
money=c("50000","50,000","$50000",50000,"$50,000")

money2=gsub(",","",money)
money2
## [1] "50000"  "50000"  "$50000" "50000"  "$50000"
money3=gsub("\\$","",money2)
money3
## [1] "50000" "50000" "50000" "50000" "50000"
money4=as.numeric(money3)
money4
## [1] 50000 50000 50000 50000 50000
mean(money4)
## [1] 50000
ls()
## [1] "df"     "money"  "money2" "money3" "money4" "mtcars"
summary(df)
##       V1                carat           cut               color          
##  Length:598024      Min.   :0.200   Length:598024      Length:598024     
##  Class :character   1st Qu.:0.500   Class :character   Class :character  
##  Mode  :character   Median :0.900   Mode  :character   Mode  :character  
##                     Mean   :1.071                                        
##                     3rd Qu.:1.500                                        
##                     Max.   :9.250                                        
##                                                                          
##    clarity              table           depth           cert          
##  Length:598024      Min.   : 0.00   Min.   : 0.00   Length:598024     
##  Class :character   1st Qu.:56.00   1st Qu.:61.00   Class :character  
##  Mode  :character   Median :58.00   Median :62.10   Mode  :character  
##                     Mean   :57.63   Mean   :61.06                     
##                     3rd Qu.:59.00   3rd Qu.:62.70                     
##                     Max.   :75.90   Max.   :81.30                     
##                                                                       
##  measurements           price             x                y         
##  Length:598024      Min.   :  300   Min.   : 0.150   Min.   : 1.000  
##  Class :character   1st Qu.: 1220   1st Qu.: 4.740   1st Qu.: 4.970  
##  Mode  :character   Median : 3503   Median : 5.780   Median : 6.050  
##                     Mean   : 8753   Mean   : 5.991   Mean   : 6.199  
##                     3rd Qu.:11174   3rd Qu.: 6.970   3rd Qu.: 7.230  
##                     Max.   :99990   Max.   :13.890   Max.   :13.890  
##                     NA's   :713     NA's   :1815     NA's   :1852    
##        z              price2     
##  Min.   : 0.040   Min.   :  300  
##  1st Qu.: 3.120   1st Qu.: 1220  
##  Median : 3.860   Median : 3503  
##  Mean   : 4.033   Mean   : 8747  
##  3rd Qu.: 4.610   3rd Qu.:11151  
##  Max.   :13.180   Max.   :99990  
##  NA's   :2544
tables()
##      NAME    NROW NCOL  MB
## [1,] df   598,024   14 104
##      COLS                                                                       
## [1,] V1,carat,cut,color,clarity,table,depth,cert,measurements,price,x,y,z,price2
##      KEY
## [1,]    
## Total: 104MB
head(df)
##    V1 carat    cut color clarity table depth cert       measurements price
## 1:  1  0.25 V.Good     K      I1    59  63.7  GIA 3.96 x 3.95 x 2.52    NA
## 2:  2  0.23   Good     G      I1    61  58.1  GIA 4.00 x 4.05 x 2.30    NA
## 3:  3  0.34   Good     J      I2    58  58.7  GIA 4.56 x 4.53 x 2.67    NA
## 4:  4  0.21 V.Good     D      I1    60  60.6  GIA 3.80 x 3.82 x 2.31    NA
## 5:  5  0.31 V.Good     K      I1    59  62.2  EGL 4.35 x 4.26 x 2.68    NA
## 6:  6  0.20   Good     G     SI2    60  64.4  GIA 3.74 x 3.67 x 2.38    NA
##       x    y    z price2
## 1: 3.96 3.95 2.52   3503
## 2: 4.00 4.05 2.30   3503
## 3: 4.56 4.53 2.67   3503
## 4: 3.80 3.82 2.31   3503
## 5: 4.35 4.26 2.68   3503
## 6: 3.74 3.67 2.38   3503
df22=df[!is.na(price),,]

df3=df[,median(price,na.rm=T),color]
df3
##    color   V1
## 1:     K 4418
## 2:     G 3720
## 3:     J 4697
## 4:     D 2690
## 5:     F 2966
## 6:     E 2342
## 7:     H 4535
## 8:     I 4717
## 9:     L 3017
names(df3)=c("color","price")
df3[order(df3$color),]
##    color price
## 1:     D  2690
## 2:     E  2342
## 3:     F  2966
## 4:     G  3720
## 5:     H  4535
## 6:     I  4717
## 7:     J  4697
## 8:     K  4418
## 9:     L  3017
df2=df[is.na(price),,]
df2
##          V1 carat    cut color clarity table depth cert       measurements
##   1:      1  0.25 V.Good     K      I1    59  63.7  GIA 3.96 x 3.95 x 2.52
##   2:      2  0.23   Good     G      I1    61  58.1  GIA 4.00 x 4.05 x 2.30
##   3:      3  0.34   Good     J      I2    58  58.7  GIA 4.56 x 4.53 x 2.67
##   4:      4  0.21 V.Good     D      I1    60  60.6  GIA 3.80 x 3.82 x 2.31
##   5:      5  0.31 V.Good     K      I1    59  62.2  EGL 4.35 x 4.26 x 2.68
##  ---                                                                      
## 709: 525569  0.20   Good     E     VS2    62  60.4  GIA 3.81 x 3.74 x 2.28
## 710: 525570  0.29 V.Good     G      I1    58  64.0  GIA 4.17 x 4.19 x 2.67
## 711: 525571  0.31 V.Good     F      I1    59  62.8  EGL 4.33 x 4.27 x 2.67
## 712: 525572  0.21   Good     E     SI1    66  62.5  GIA 3.81 x 3.73 x 2.35
## 713: 525573  0.22   Good     D     VS2    61  63.7  GIA 3.77 x 3.73 x 2.39
##      price    x    y    z price2
##   1:    NA 3.96 3.95 2.52   3503
##   2:    NA 4.00 4.05 2.30   3503
##   3:    NA 4.56 4.53 2.67   3503
##   4:    NA 3.80 3.82 2.31   3503
##   5:    NA 4.35 4.26 2.68   3503
##  ---                            
## 709:    NA 3.81 3.74 2.28   3503
## 710:    NA 4.17 4.19 2.67   3503
## 711:    NA 4.33 4.27 2.67   3503
## 712:    NA 3.81 3.73 2.35   3503
## 713:    NA 3.77 3.73 2.39   3503
df2[order(df2$color),]
##       V1 carat    cut color clarity table depth cert       measurements
##   1:   4  0.21 V.Good     D      I1    60  60.6  GIA 3.80 x 3.82 x 2.31
##   2:   8  0.22 V.Good     D      I1    61  59.2  GIA 3.95 x 3.97 x 2.34
##   3:  13  0.22 V.Good     D     SI2    57  59.7  GIA 3.94 x 3.93 x 2.35
##   4:  22  0.22   Good     D     VS2    61  63.7  GIA 3.77 x 3.73 x 2.39
##   5:  24  0.21   Good     D     SI2    62  64.4  IGI 3.73 x 3.78 x 2.42
##  ---                                                                   
## 709: 178  0.32   Good     L    VVS1    64  58.9  GIA 4.52 x 4.45 x 2.64
## 710: 223  0.23 V.Good     L    VVS2    61  59.3  GIA 3.99 x 3.96 x 2.36
## 711: 227  0.30   Good     L     VS2    63  58.1  GIA 4.45 x 4.39 x 2.57
## 712: 337  0.41 V.Good     L     VS2    58  60.5  GIA 4.86 x 4.81 x 2.92
## 713: 435  0.30   Good     L     VS1    57  66.2  GIA 4.21 x 4.13 x 2.76
##      price    x    y    z price2
##   1:    NA 3.80 3.82 2.31   3503
##   2:    NA 3.95 3.97 2.34   3503
##   3:    NA 3.94 3.93 2.35   3503
##   4:    NA 3.77 3.73 2.39   3503
##   5:    NA 3.73 3.78 2.42   3503
##  ---                            
## 709:    NA 4.52 4.45 2.64   3503
## 710:    NA 3.99 3.96 2.36   3503
## 711:    NA 4.45 4.39 2.57   3503
## 712:    NA 4.86 4.81 2.92   3503
## 713:    NA 4.21 4.13 2.76   3503
setkey(df3,color)
setkey(df2,color)

#joined  <- df3[df2]
# head(joined)

df4=merge(df3,df2) 
df4$price.y=NULL

names(df4)[2]=c("price")
df4
##      color price  V1 carat    cut clarity table depth cert
##   1:     D  2690   4  0.21 V.Good      I1    60  60.6  GIA
##   2:     D  2690   8  0.22 V.Good      I1    61  59.2  GIA
##   3:     D  2690  13  0.22 V.Good     SI2    57  59.7  GIA
##   4:     D  2690  22  0.22   Good     VS2    61  63.7  GIA
##   5:     D  2690  24  0.21   Good     SI2    62  64.4  IGI
##  ---                                                      
## 709:     L  3017 178  0.32   Good    VVS1    64  58.9  GIA
## 710:     L  3017 223  0.23 V.Good    VVS2    61  59.3  GIA
## 711:     L  3017 227  0.30   Good     VS2    63  58.1  GIA
## 712:     L  3017 337  0.41 V.Good     VS2    58  60.5  GIA
## 713:     L  3017 435  0.30   Good     VS1    57  66.2  GIA
##            measurements    x    y    z price2
##   1: 3.80 x 3.82 x 2.31 3.80 3.82 2.31   3503
##   2: 3.95 x 3.97 x 2.34 3.95 3.97 2.34   3503
##   3: 3.94 x 3.93 x 2.35 3.94 3.93 2.35   3503
##   4: 3.77 x 3.73 x 2.39 3.77 3.73 2.39   3503
##   5: 3.73 x 3.78 x 2.42 3.73 3.78 2.42   3503
##  ---                                         
## 709: 4.52 x 4.45 x 2.64 4.52 4.45 2.64   3503
## 710: 3.99 x 3.96 x 2.36 3.99 3.96 2.36   3503
## 711: 4.45 x 4.39 x 2.57 4.45 4.39 2.57   3503
## 712: 4.86 x 4.81 x 2.92 4.86 4.81 2.92   3503
## 713: 4.21 x 4.13 x 2.76 4.21 4.13 2.76   3503