#code for revision of basics
#created on 14 May 2017
#Ajay Ohri v1.0
sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7600)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.4.0 backports_1.0.5 magrittr_1.5 rprojroot_1.2
## [5] tools_3.4.0 htmltools_0.3.6 Rcpp_0.12.10 stringi_1.1.5
## [9] rmarkdown_1.5 knitr_1.15.1 stringr_1.2.0 digest_0.6.12
## [13] evaluate_0.10
memory.limit()
## [1] 8096
memory.size()
## [1] 48.98
getwd()
## [1] "C:/Users/Dell/Documents/R/revisionofbasics"
dir()
## [1] "revisionofbasics.R" "revisionofbasics.Rproj"
## [3] "revisionofbasics.spin.R" "revisionofbasics.spin.Rmd"
dir("C:/Users/Dell/Downloads")
## [1] "140749_2017.pdf"
## [2] "2+Clustering+-K+Means.ipynb"
## [3] "2011-F01-0700-Rev4-MDDS.XLSX"
## [4] "20150817143155.pdf"
## [5] "20160111060911.pdf"
## [6] "20170214052225.pdf"
## [7] "692480_2017.pdf"
## [8] "7z1604-x64.exe"
## [9] "7z1604.exe"
## [10] "861415_10151432783238421_2124270505_o (1).jpg"
## [11] "861415_10151432783238421_2124270505_o.jpg"
## [12] "AirPassengers.csv"
## [13] "ajay.csv"
## [14] "ajayo.jpg"
## [15] "Alison Python Invoice - Sheet1.pdf"
## [16] "Alison SAS Invoice - Sheet1.pdf"
## [17] "All+CSV+Files+in+a+Folder.ipynb"
## [18] "Allison Interview Jones Invoice - Sheet1.pdf"
## [19] "Anaconda3-4.2.0-Windows-x86_64.exe"
## [20] "anscombe+dataset.ipynb"
## [21] "apachehttpd.exe"
## [22] "April invoice adaptive analytics - Sheet1.pdf"
## [23] "Assignment14_BusinessAnalytics (1).docx"
## [24] "Assignment14_BusinessAnalytics.docx"
## [25] "Assignment15_BusinessAnalytics.docx"
## [26] "Assignment16_BusinessAnalytics (1).docx"
## [27] "Assignment16_BusinessAnalytics (2).docx"
## [28] "Assignment16_BusinessAnalytics.docx"
## [29] "aug ust 2008.JPG"
## [30] "avast_free_antivirus_setup_online.exe"
## [31] "avinash_ltv (1).zip"
## [32] "avinash_ltv.zip"
## [33] "BigDiamonds.csv"
## [34] "BigDiamonds.csv (1).zip"
## [35] "BigDiamonds.csv (2)"
## [36] "BigDiamonds.csv (2).zip"
## [37] "BigDiamonds.csv (3).zip"
## [38] "BigDiamonds.csv.zip"
## [39] "Boston (1).csv"
## [40] "Boston (2).csv"
## [41] "Boston.csv"
## [42] "BuildTools_Full.exe"
## [43] "CAM- Ajay Ohri (1).pdf"
## [44] "CAM- Ajay Ohri.pdf"
## [45] "camtasia.exe"
## [46] "ccFraud.csv"
## [47] "ce_query_pla_cenvat_revenue.txt"
## [48] "Certificate of Incorporation - U74999DL2015PTC282030 (26 June 2015).pdf"
## [49] "CHAP1-6PythonforRUsersAnapproachforDataScience.docx"
## [50] "chapter+3+_+spark.html"
## [51] "cheat+sheet+python (5).ipynb"
## [52] "chi+square+test.ipynb"
## [53] "chromeinstall-8u111.exe"
## [54] "Cisco_WebEx_Add-On.exe"
## [55] "class batch 2.R"
## [56] "class_batch_2.html"
## [57] "class+exercise+data+viz.ipynb"
## [58] "class2.csv"
## [59] "Clustering+-K+Means.ipynb"
## [60] "Collabera Invoice (1).pdf"
## [61] "Collabera Invoice.pdf"
## [62] "Collectcent Invoice.pdf"
## [63] "college degrees.pdf"
## [64] "DAP 1.pdf"
## [65] "DAP 1.pptx"
## [66] "DAP 6 RDBMS and SQL.pdf"
## [67] "DAP 6 RDBMS and SQL.pptx"
## [68] "Data Analysis (1).7z"
## [69] "Data Analysis (1).rar"
## [70] "Data Analysis (2).rar"
## [71] "Data Analysis (3).rar"
## [72] "Data Analysis.rar"
## [73] "Data Viz.pptx"
## [74] "data+exploration.ipynb"
## [75] "data+manipulation.ipynb"
## [76] "data+munging+again.ipynb"
## [77] "data+viz.ipynb"
## [78] "data+wrangling+titanic+dataset.ipynb"
## [79] "data1.csv"
## [80] "datasets (1).csv"
## [81] "datasets.csv"
## [82] "Decision Trees.pdf"
## [83] "DecisionStatsOfferLetter.docx"
## [84] "DecisionStatsRelievingLetter.docx"
## [85] "descriptive+stats+in+Python.ipynb"
## [86] "desktop.ini"
## [87] "Diamond (1).csv"
## [88] "Diamond (2).csv"
## [89] "Diamond (3).csv"
## [90] "Diamond (4).csv"
## [91] "Diamond (5).csv"
## [92] "Diamond (6).csv"
## [93] "Diamond (7).csv"
## [94] "Diamond (8).csv"
## [95] "Diamond.csv"
## [96] "DolbyVoiceClient.msi"
## [97] "DropboxInstaller.exe"
## [98] "edb_npgsql.exe"
## [99] "edb_npgsql.exe-20170506133801"
## [100] "edb_pgjdbc.exe"
## [101] "edb_pgjdbc.exe-20170506133830"
## [102] "edb_psqlodbc.exe"
## [103] "edb_psqlodbc.exe-20170203172812"
## [104] "edb_psqlodbc.exe-20170307203617"
## [105] "edb_psqlodbc.exe-20170506133907"
## [106] "edb_psqlodbc.exe-20170506134004"
## [107] "fim.pyd"
## [108] "final invoice edureka - Sheet1.pdf"
## [109] "final_webinar (1).pdf"
## [110] "final_webinar.pdf"
## [111] "FinalPythonforRUsersAnapproachforDataScience (1).docx"
## [112] "FinalPythonforRUsersAnapproachforDataScience (2).docx"
## [113] "FinalPythonforRUsersAnapproachforDataScience (3).docx"
## [114] "FinalPythonforRUsersAnapproachforDataScience (4).docx"
## [115] "FinalPythonforRUsersAnapproachforDataScience.docx"
## [116] "Git-2.11.0-64-bit.exe"
## [117] "Git-2.12.0-64-bit.exe"
## [118] "GitHubSetup (1).exe"
## [119] "GitHubSetup (2).exe"
## [120] "GitHubSetup.exe"
## [121] "GOMAUDIOGLOBALSETUP.EXE"
## [122] "graphviz-2.38.msi"
## [123] "Hdma.csv"
## [124] "Hedonic (1).csv"
## [125] "Hedonic (2).csv"
## [126] "Hedonic.csv"
## [127] "Hierarchical+Clustering (1).ipynb"
## [128] "Hierarchical+Clustering.ipynb"
## [129] "HP Downloads"
## [130] "HPSupportSolutionsFramework-12.5.32.203.exe"
## [131] "image.png"
## [132] "IMS PROSCHOOL Workshop.pptx.pdf"
## [133] "IMS PROSCHOOL Workshop.pptx.pptx"
## [134] "internship.docx"
## [135] "Introduction to SAS (1).pdf"
## [136] "Introduction to SAS Part 1 (1).pdf"
## [137] "Introduction to SAS Part 1.pdf"
## [138] "Introduction to SAS.pdf"
## [139] "introductory+python.ipynb"
## [140] "Invoice for Digital Vidya.pdf"
## [141] "Invoice for Weekendr.pdf"
## [142] "Invoice format - Ajay Ohri CONTATA (1).xls"
## [143] "Invoice format - Ajay Ohri CONTATA.xls"
## [144] "invoice rapid miner.pdf"
## [145] "Invoice trafla format.docx"
## [146] "iris2 (1).ipynb"
## [147] "iris2 (2).ipynb"
## [148] "iris2.ipynb"
## [149] "January invoice Indicus .pdf"
## [150] "June AV Invoice - Sheet1.pdf"
## [151] "Lecture 6 - KNN & Naive Bayes.ppt"
## [152] "Local Disk (C) - Shortcut.lnk"
## [153] "logistic regression - script for ppt.R"
## [154] "Logistic Regression.ipynb"
## [155] "logistic_regression_-_script_for_ppt.html"
## [156] "lyncentry.exe"
## [157] "Machine+Learning++Part+1 (1).ipynb"
## [158] "Machine+Learning++Part+1.ipynb"
## [159] "March invoice Indicus - Sheet1.pdf"
## [160] "matplotlib+cars.ipynb"
## [161] "matplotlib+line+graph.ipynb"
## [162] "ML+part+2.ipynb"
## [163] "ML+part+3.ipynb"
## [164] "mongodb-win32-x86_64-2008plus-ssl-3.4.2-signed.msi"
## [165] "mongodb-win32-x86_64-3.4.2-signed.msi"
## [166] "mortDefault"
## [167] "mortDefault.zip"
## [168] "mtcars.csv"
## [169] "mtcarslm.R"
## [170] "multiple+file+concat+in+pandas (1).ipynb"
## [171] "multiple+file+concat+in+pandas.ipynb"
## [172] "my+first+class+in+python.ipynb"
## [173] "NDP451-KB2858728-x86-x64-AllOS-ENU.exe"
## [174] "nltk.ipynb"
## [175] "notebook-Copy1.html"
## [176] "Offer Letter - Ajay Ohri (1).pdf"
## [177] "Offer Letter - Ajay Ohri.pdf"
## [178] "Other Data Mining Methods (1).pdf"
## [179] "Other Data Mining Methods.pdf"
## [180] "output1 (1).xls"
## [181] "output1 (2).xls"
## [182] "output1.xls"
## [183] "pandas+11.ipynb"
## [184] "pandas+analysis+1.ipynb"
## [185] "pandas+data+manipulation.ipynb"
## [186] "passport image.pdf"
## [187] "Pawconinvoice2016.pdf"
## [188] "Pawconinvoice2017 (1).pdf"
## [189] "Pawconinvoice2017 (2).pdf"
## [190] "Pawconinvoice2017 (3).pdf"
## [191] "Pawconinvoice2017.pdf"
## [192] "Payslip Feb 2016 - Sheet1.pdf"
## [193] "Payslip Feb 2016.pdf"
## [194] "Payslip Format Decisionstats - Sheet1.pdf"
## [195] "Payslip Jan 2016 - Sheet1.pdf"
## [196] "Payslip Jan 2016.pdf"
## [197] "Payslip March 2016 - Sheet1.pdf"
## [198] "Payslip March 2016.pdf"
## [199] "pgd.csv"
## [200] "plot_roc.ipynb"
## [201] "postgres data - Sheet1.csv"
## [202] "postgresql-9.6.1-1-windows-x64.exe"
## [203] "postgresql-9.6.2-4-windows-x64.exe"
## [204] "Program 1-results.rtf"
## [205] "protein (1).csv"
## [206] "protein (2).csv"
## [207] "protein.csv"
## [208] "pycharm-professional-2017.1.2.exe"
## [209] "pyfim.zip"
## [210] "Python.docx"
## [211] "python+with+postgres (1).ipynb"
## [212] "Python+with+Postgres (2).ipynb"
## [213] "Python+with+Postgres (3).ipynb"
## [214] "python+with+postgres.ipynb"
## [215] "R-3.3.2-win.exe"
## [216] "R-3.3.3-win.exe"
## [217] "R-3.4.0-win.exe"
## [218] "RCertificationExam.pdf"
## [219] "reg+model.ipynb"
## [220] "Resume_DivyataJaiswal_IITG.pdf"
## [221] "Revision - Business Analytics (1).pdf"
## [222] "Revision - Business Analytics.pdf"
## [223] "RFM Analysis Assignment - Data.csv"
## [224] "RidingMowers.csv"
## [225] "rsconnect"
## [226] "RStudio-1.0.136.exe"
## [227] "RStudio-1.0.143.exe"
## [228] "Salary Slip, Feb 2016.pdf"
## [229] "Salary Slip, Jan 2016.pdf"
## [230] "Salary Slip, March 2016 (1).pdf"
## [231] "Salary Slip, March 2016 (2).pdf"
## [232] "Salary Slip, March 2016.pdf"
## [233] "sales-of-shampoo-over-a-three-ye.csv"
## [234] "sales.csv"
## [235] "sales22.csv"
## [236] "sas-university-edition-107140.pdf"
## [237] "SAS part 2.pdf"
## [238] "SAS Part 3.pdf"
## [239] "Scan0095.pdf"
## [240] "Scanned Invoice for Collabera.pdf"
## [241] "scrape+amazon.ipynb"
## [242] "Screenshot 2017-01-23 12.36.55.png"
## [243] "September invoice adaptive analytics - Sheet1.pdf"
## [244] "simple+matplot+graph.ipynb"
## [245] "SkypeSetup.exe"
## [246] "SkypeSetupFull.exe"
## [247] "Sollers January.pdf"
## [248] "sqlalchemy.ipynb"
## [249] "stackoverflow-dump-analysis.html"
## [250] "stat13-lecture18.ppt"
## [251] "sts_gold_v03"
## [252] "sts_gold_v03.zip"
## [253] "Sunstone.pdf"
## [254] "Tableau.pdf"
## [255] "TableauPublicDesktop-64bit-10-1-3.exe"
## [256] "TableauPublicDesktop-64bit-10-1-4.exe"
## [257] "telecom.csv"
## [258] "TelecomServiceProviderCaseStudy.pdf"
## [259] "test+web+scraping.ipynb"
## [260] "Text Mining (1).pdf"
## [261] "Text Mining.pdf"
## [262] "third.sas7bdat"
## [263] "Time Series Forecasting (1).pdf"
## [264] "Time Series Forecasting.pdf"
## [265] "time+series (1).ipynb"
## [266] "time+series.ipynb"
## [267] "ts.html"
## [268] "ts.R"
## [269] "Unconfirmed 141422.crdownload"
## [270] "Unconfirmed 373974.crdownload"
## [271] "Unconfirmed 376991.crdownload"
## [272] "Unconfirmed 950045.crdownload"
## [273] "uTorrent.exe"
## [274] "VCForPython27.msi"
## [275] "vcsetup.exe"
## [276] "VirtualBox-5.1.8-111374-Win (1).exe"
## [277] "VirtualBox-5.1.8-111374-Win.exe"
## [278] "visualcppbuildtools_full (1).exe"
## [279] "visualcppbuildtools_full.exe"
## [280] "vs_community__196078652.1492249774.exe"
## [281] "Web+Scraping+Yelp+with+Beautiful+Soup.ipynb"
## [282] "Webinar for Business Analytics.pdf"
## [283] "wendyhe-tweets-on-womensmarch-and-maga.zip"
## [284] "WhatsApp Image 2017-02-18 at 08.42.55 (1).jpeg"
## [285] "WhatsApp Image 2017-02-18 at 08.42.55.jpeg"
## [286] "Wipro_Third_Party_DB_New_Format (1).xlsx"
## [287] "Wipro_Third_Party_DB_New_Format.xlsx"
## [288] "YourPayslip_April2017.pdf"
## [289] "YourPayslip_March2017.pdf"
dir("C:/Users/Dell/Downloads",pattern = "csv")
## [1] "AirPassengers.csv"
## [2] "ajay.csv"
## [3] "BigDiamonds.csv"
## [4] "BigDiamonds.csv (1).zip"
## [5] "BigDiamonds.csv (2)"
## [6] "BigDiamonds.csv (2).zip"
## [7] "BigDiamonds.csv (3).zip"
## [8] "BigDiamonds.csv.zip"
## [9] "Boston (1).csv"
## [10] "Boston (2).csv"
## [11] "Boston.csv"
## [12] "ccFraud.csv"
## [13] "class2.csv"
## [14] "data1.csv"
## [15] "datasets (1).csv"
## [16] "datasets.csv"
## [17] "Diamond (1).csv"
## [18] "Diamond (2).csv"
## [19] "Diamond (3).csv"
## [20] "Diamond (4).csv"
## [21] "Diamond (5).csv"
## [22] "Diamond (6).csv"
## [23] "Diamond (7).csv"
## [24] "Diamond (8).csv"
## [25] "Diamond.csv"
## [26] "Hdma.csv"
## [27] "Hedonic (1).csv"
## [28] "Hedonic (2).csv"
## [29] "Hedonic.csv"
## [30] "mtcars.csv"
## [31] "pgd.csv"
## [32] "postgres data - Sheet1.csv"
## [33] "protein (1).csv"
## [34] "protein (2).csv"
## [35] "protein.csv"
## [36] "RFM Analysis Assignment - Data.csv"
## [37] "RidingMowers.csv"
## [38] "sales-of-shampoo-over-a-three-ye.csv"
## [39] "sales.csv"
## [40] "sales22.csv"
## [41] "telecom.csv"
diamonds=read.csv("C:/Users/Dell/Downloads/BigDiamonds.csv/BigDiamonds.csv")
str(diamonds)
## 'data.frame': 598024 obs. of 13 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ carat : num 0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
## $ cut : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
## $ color : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
## $ clarity : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
## $ table : num 59 61 58 60 59 60 63 61 57.5 65 ...
## $ depth : num 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
## $ cert : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
## $ measurements: Factor w/ 241453 levels ""," 3.99 x 3.95 x 2.44",..: 19960 21917 48457 15701 37341 14661 14400 19642 17115 16177 ...
## $ price : int NA NA NA NA NA NA NA NA NA NA ...
## $ x : num 3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
## $ y : num 3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
## $ z : num 2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
head(diamonds)
## X carat cut color clarity table depth cert measurements price
## 1 1 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NA
## 2 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NA
## 3 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NA
## 4 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NA
## 5 5 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NA
## 6 6 0.20 Good G SI2 60 64.4 GIA 3.74 x 3.67 x 2.38 NA
## x y z
## 1 3.96 3.95 2.52
## 2 4.00 4.05 2.30
## 3 4.56 4.53 2.67
## 4 3.80 3.82 2.31
## 5 4.35 4.26 2.68
## 6 3.74 3.67 2.38
summary(diamonds)
## X carat cut color
## Min. : 1 Min. :0.200 Good : 59680 G :96204
## 1st Qu.:149507 1st Qu.:0.500 Ideal :369448 F :93573
## Median :299013 Median :0.900 V.Good:168896 E :93483
## Mean :299013 Mean :1.071 H :86619
## 3rd Qu.:448518 3rd Qu.:1.500 D :73630
## Max. :598024 Max. :9.250 I :70282
## (Other):84233
## clarity table depth cert
## SI1 :116631 Min. : 0.00 Min. : 0.00 GIA :463555
## VS2 :111082 1st Qu.:56.00 1st Qu.:61.00 IGI : 43667
## SI2 :104300 Median :58.00 Median :62.10 EGL : 33814
## VS1 : 97730 Mean :57.63 Mean :61.06 EGL USA : 16079
## VVS2 : 65500 3rd Qu.:59.00 3rd Qu.:62.70 EGL Intl. : 11447
## VVS1 : 54798 Max. :75.90 Max. :81.30 EGL ISRAEL: 11301
## (Other): 47983 (Other) : 18161
## measurements price x
## 0.00 x 0.00 x 0.00: 425 Min. : 300 Min. : 0.150
## 0.00 x 0.00 x 0.00 : 222 1st Qu.: 1220 1st Qu.: 4.740
## 4.3 x 4.27 x 2.67 : 97 Median : 3503 Median : 5.780
## 4.31 x 4.29 x 2.68 : 87 Mean : 8753 Mean : 5.991
## 4.29 x 4.26 x 2.67 : 86 3rd Qu.:11174 3rd Qu.: 6.970
## 4.3 x 4.28 x 2.67 : 84 Max. :99990 Max. :13.890
## (Other) :597023 NA's :713 NA's :1815
## y z
## Min. : 1.000 Min. : 0.040
## 1st Qu.: 4.970 1st Qu.: 3.120
## Median : 6.050 Median : 3.860
## Mean : 6.199 Mean : 4.033
## 3rd Qu.: 7.230 3rd Qu.: 4.610
## Max. :13.890 Max. :13.180
## NA's :1852 NA's :2544
table(diamonds$cut)
##
## Good Ideal V.Good
## 59680 369448 168896
summary(diamonds$carat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.200 0.500 0.900 1.071 1.500 9.250
diamonds2=diamonds[,3]
head(diamonds2)
## [1] V.Good Good Good V.Good V.Good Good
## Levels: Good Ideal V.Good
class(diamonds)
## [1] "data.frame"
class(diamonds2)
## [1] "factor"
ajay=c("Ajay","Vijay","Amitabh")
class(ajay)
## [1] "character"
ajay2=c(TRUE,TRUE,FALSE)
class(ajay2)
## [1] "logical"
ajay3=1:10
class(ajay3)
## [1] "integer"
ajay4=c(1,2,3,4,5,6,7,8,9,10)
class(ajay4)
## [1] "numeric"
ajay3
## [1] 1 2 3 4 5 6 7 8 9 10
ajay4
## [1] 1 2 3 4 5 6 7 8 9 10
ajay5=1:10
identical(ajay3,ajay4)
## [1] FALSE
ls()
## [1] "ajay" "ajay2" "ajay3" "ajay4" "ajay5" "diamonds"
## [7] "diamonds2"
identical(ajay5,ajay3)
## [1] TRUE
#file.choose()
air=read.csv("C:\\Users\\Dell\\Downloads\\AirPassengers.csv")
#this is a comment
ls()
## [1] "air" "ajay" "ajay2" "ajay3" "ajay4" "ajay5"
## [7] "diamonds" "diamonds2"
rm(ajay2)
ls()
## [1] "air" "ajay" "ajay3" "ajay4" "ajay5" "diamonds"
## [7] "diamonds2"
#rm(list=ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 743413 39.8 2164898 115.7 1321661 70.6
## Vcells 8260469 63.1 21535217 164.4 21469488 163.8
ls()[3:5]
## [1] "ajay3" "ajay4" "ajay5"
#rm(list=ls()[3:5])
ls()[c(1,4)]
## [1] "air" "ajay4"
#rm(list=ls()[c(1,4)])
ajay6=letters
ajay6
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
ajay6[20]
## [1] "t"
ajay6[20]="Ajay"
ajay6
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
## [11] "k" "l" "m" "n" "o" "p" "q" "r" "s" "Ajay"
## [21] "u" "v" "w" "x" "y" "z"
ajay6=gsub("Ajay",NA,ajay6)
na.omit(ajay6)
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "u" "v" "w" "x" "y" "z"
## attr(,"na.action")
## [1] 20
## attr(,"class")
## [1] "omit"
mean(ajay5)
## [1] 5.5
ajay5=gsub(2,NA,ajay5)
ajay5=as.numeric(ajay5)
mean(ajay5)
## [1] NA
mean(ajay5,na.rm=T)
## [1] 5.888889
ajay5
## [1] 1 NA 3 4 5 6 7 8 9 10
is.na(ajay5)
## [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
table(is.na(ajay5))
##
## FALSE TRUE
## 9 1
ajay5
## [1] 1 NA 3 4 5 6 7 8 9 10
ajay5=ifelse(is.na(ajay5),mean(ajay5,na.rm=T),ajay5)
ajay5
## [1] 1.000000 5.888889 3.000000 4.000000 5.000000 6.000000 7.000000
## [8] 8.000000 9.000000 10.000000
ajay3
## [1] 1 2 3 4 5 6 7 8 9 10
ajay3[3:5]=NA
ajay3
## [1] 1 2 NA NA NA 6 7 8 9 10
ajay3=na.omit(ajay3)
ajay3
## [1] 1 2 6 7 8 9 10
## attr(,"na.action")
## [1] 3 4 5
## attr(,"class")
## [1] "omit"
plot(ajay3)

plot(ajay3,type="l")

class(ajay3)
## [1] "integer"
ajay3=as.vector(ajay3)
barplot(ajay3)

data(mtcars)
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
dim(mtcars)
## [1] 32 11
ncol(mtcars)
## [1] 11
nrow(mtcars)
## [1] 32
class(mtcars)
## [1] "data.frame"
mtcars$brand=row.names(mtcars)
mtcars
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## brand
## Mazda RX4 Mazda RX4
## Mazda RX4 Wag Mazda RX4 Wag
## Datsun 710 Datsun 710
## Hornet 4 Drive Hornet 4 Drive
## Hornet Sportabout Hornet Sportabout
## Valiant Valiant
## Duster 360 Duster 360
## Merc 240D Merc 240D
## Merc 230 Merc 230
## Merc 280 Merc 280
## Merc 280C Merc 280C
## Merc 450SE Merc 450SE
## Merc 450SL Merc 450SL
## Merc 450SLC Merc 450SLC
## Cadillac Fleetwood Cadillac Fleetwood
## Lincoln Continental Lincoln Continental
## Chrysler Imperial Chrysler Imperial
## Fiat 128 Fiat 128
## Honda Civic Honda Civic
## Toyota Corolla Toyota Corolla
## Toyota Corona Toyota Corona
## Dodge Challenger Dodge Challenger
## AMC Javelin AMC Javelin
## Camaro Z28 Camaro Z28
## Pontiac Firebird Pontiac Firebird
## Fiat X1-9 Fiat X1-9
## Porsche 914-2 Porsche 914-2
## Lotus Europa Lotus Europa
## Ford Pantera L Ford Pantera L
## Ferrari Dino Ferrari Dino
## Maserati Bora Maserati Bora
## Volvo 142E Volvo 142E
row.names(mtcars)=1:32
mtcars
## mpg cyl disp hp drat wt qsec vs am gear carb brand
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Mazda RX4 Wag
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Datsun 710
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 Hornet 4 Drive
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 Hornet Sportabout
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 Valiant
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 Duster 360
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 Merc 240D
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 Merc 230
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 Merc 280
## 11 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 Merc 280C
## 12 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 Merc 450SE
## 13 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Merc 450SL
## 14 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 Merc 450SLC
## 15 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Cadillac Fleetwood
## 16 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Lincoln Continental
## 17 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 Chrysler Imperial
## 18 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 Fiat 128
## 19 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Honda Civic
## 20 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 Toyota Corolla
## 21 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 Toyota Corona
## 22 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 Dodge Challenger
## 23 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 AMC Javelin
## 24 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 Camaro Z28
## 25 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 Pontiac Firebird
## 26 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 Fiat X1-9
## 27 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 Porsche 914-2
## 28 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 Lotus Europa
## 29 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 Ford Pantera L
## 30 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 Ferrari Dino
## 31 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 Maserati Bora
## 32 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 Volvo 142E
sample(32,8,F)
## [1] 9 8 3 19 11 31 20 14
mtcars[sample(32,8,F),]
## mpg cyl disp hp drat wt qsec vs am gear carb brand
## 29 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 Ford Pantera L
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 Hornet Sportabout
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Mazda RX4 Wag
## 31 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 Maserati Bora
## 20 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 Toyota Corolla
## 30 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 Ferrari Dino
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 Merc 240D
## 16 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Lincoln Continental
mtcars[sample(nrow(mtcars),
0.125*nrow(mtcars),
F),]
## mpg cyl disp hp drat wt qsec vs am gear carb brand
## 19 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Honda Civic
## 12 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 Merc 450SE
## 13 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Merc 450SL
## 23 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 AMC Javelin
table(mtcars$cyl)
##
## 4 6 8
## 11 7 14
str(mtcars)
## 'data.frame': 32 obs. of 12 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp : num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat : num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec : num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear : num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb : num 4 4 1 1 2 1 4 2 2 4 ...
## $ brand: chr "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb brand
## Min. :0.0000 Min. :3.000 Min. :1.000 Length:32
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000 Class :character
## Median :0.0000 Median :4.000 Median :2.000 Mode :character
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
a=table(mtcars$cyl,mtcars$gear)
class(a)
## [1] "table"
mtcars[sample(ncol(mtcars),
0.25*ncol(mtcars),
F),]
## mpg cyl disp hp drat wt qsec vs am gear carb brand
## 12 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 Merc 450SE
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 Hornet 4 Drive
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Mazda RX4 Wag
pie(table(mtcars$cyl))

hist(mtcars$mpg,col=topo.colors(5,0.9))

hist(mtcars$mpg,col=cm.colors(5,0.9))

hist(mtcars$mpg,col=heat.colors(5,0.9))

colors()
## [1] "white" "aliceblue" "antiquewhite"
## [4] "antiquewhite1" "antiquewhite2" "antiquewhite3"
## [7] "antiquewhite4" "aquamarine" "aquamarine1"
## [10] "aquamarine2" "aquamarine3" "aquamarine4"
## [13] "azure" "azure1" "azure2"
## [16] "azure3" "azure4" "beige"
## [19] "bisque" "bisque1" "bisque2"
## [22] "bisque3" "bisque4" "black"
## [25] "blanchedalmond" "blue" "blue1"
## [28] "blue2" "blue3" "blue4"
## [31] "blueviolet" "brown" "brown1"
## [34] "brown2" "brown3" "brown4"
## [37] "burlywood" "burlywood1" "burlywood2"
## [40] "burlywood3" "burlywood4" "cadetblue"
## [43] "cadetblue1" "cadetblue2" "cadetblue3"
## [46] "cadetblue4" "chartreuse" "chartreuse1"
## [49] "chartreuse2" "chartreuse3" "chartreuse4"
## [52] "chocolate" "chocolate1" "chocolate2"
## [55] "chocolate3" "chocolate4" "coral"
## [58] "coral1" "coral2" "coral3"
## [61] "coral4" "cornflowerblue" "cornsilk"
## [64] "cornsilk1" "cornsilk2" "cornsilk3"
## [67] "cornsilk4" "cyan" "cyan1"
## [70] "cyan2" "cyan3" "cyan4"
## [73] "darkblue" "darkcyan" "darkgoldenrod"
## [76] "darkgoldenrod1" "darkgoldenrod2" "darkgoldenrod3"
## [79] "darkgoldenrod4" "darkgray" "darkgreen"
## [82] "darkgrey" "darkkhaki" "darkmagenta"
## [85] "darkolivegreen" "darkolivegreen1" "darkolivegreen2"
## [88] "darkolivegreen3" "darkolivegreen4" "darkorange"
## [91] "darkorange1" "darkorange2" "darkorange3"
## [94] "darkorange4" "darkorchid" "darkorchid1"
## [97] "darkorchid2" "darkorchid3" "darkorchid4"
## [100] "darkred" "darksalmon" "darkseagreen"
## [103] "darkseagreen1" "darkseagreen2" "darkseagreen3"
## [106] "darkseagreen4" "darkslateblue" "darkslategray"
## [109] "darkslategray1" "darkslategray2" "darkslategray3"
## [112] "darkslategray4" "darkslategrey" "darkturquoise"
## [115] "darkviolet" "deeppink" "deeppink1"
## [118] "deeppink2" "deeppink3" "deeppink4"
## [121] "deepskyblue" "deepskyblue1" "deepskyblue2"
## [124] "deepskyblue3" "deepskyblue4" "dimgray"
## [127] "dimgrey" "dodgerblue" "dodgerblue1"
## [130] "dodgerblue2" "dodgerblue3" "dodgerblue4"
## [133] "firebrick" "firebrick1" "firebrick2"
## [136] "firebrick3" "firebrick4" "floralwhite"
## [139] "forestgreen" "gainsboro" "ghostwhite"
## [142] "gold" "gold1" "gold2"
## [145] "gold3" "gold4" "goldenrod"
## [148] "goldenrod1" "goldenrod2" "goldenrod3"
## [151] "goldenrod4" "gray" "gray0"
## [154] "gray1" "gray2" "gray3"
## [157] "gray4" "gray5" "gray6"
## [160] "gray7" "gray8" "gray9"
## [163] "gray10" "gray11" "gray12"
## [166] "gray13" "gray14" "gray15"
## [169] "gray16" "gray17" "gray18"
## [172] "gray19" "gray20" "gray21"
## [175] "gray22" "gray23" "gray24"
## [178] "gray25" "gray26" "gray27"
## [181] "gray28" "gray29" "gray30"
## [184] "gray31" "gray32" "gray33"
## [187] "gray34" "gray35" "gray36"
## [190] "gray37" "gray38" "gray39"
## [193] "gray40" "gray41" "gray42"
## [196] "gray43" "gray44" "gray45"
## [199] "gray46" "gray47" "gray48"
## [202] "gray49" "gray50" "gray51"
## [205] "gray52" "gray53" "gray54"
## [208] "gray55" "gray56" "gray57"
## [211] "gray58" "gray59" "gray60"
## [214] "gray61" "gray62" "gray63"
## [217] "gray64" "gray65" "gray66"
## [220] "gray67" "gray68" "gray69"
## [223] "gray70" "gray71" "gray72"
## [226] "gray73" "gray74" "gray75"
## [229] "gray76" "gray77" "gray78"
## [232] "gray79" "gray80" "gray81"
## [235] "gray82" "gray83" "gray84"
## [238] "gray85" "gray86" "gray87"
## [241] "gray88" "gray89" "gray90"
## [244] "gray91" "gray92" "gray93"
## [247] "gray94" "gray95" "gray96"
## [250] "gray97" "gray98" "gray99"
## [253] "gray100" "green" "green1"
## [256] "green2" "green3" "green4"
## [259] "greenyellow" "grey" "grey0"
## [262] "grey1" "grey2" "grey3"
## [265] "grey4" "grey5" "grey6"
## [268] "grey7" "grey8" "grey9"
## [271] "grey10" "grey11" "grey12"
## [274] "grey13" "grey14" "grey15"
## [277] "grey16" "grey17" "grey18"
## [280] "grey19" "grey20" "grey21"
## [283] "grey22" "grey23" "grey24"
## [286] "grey25" "grey26" "grey27"
## [289] "grey28" "grey29" "grey30"
## [292] "grey31" "grey32" "grey33"
## [295] "grey34" "grey35" "grey36"
## [298] "grey37" "grey38" "grey39"
## [301] "grey40" "grey41" "grey42"
## [304] "grey43" "grey44" "grey45"
## [307] "grey46" "grey47" "grey48"
## [310] "grey49" "grey50" "grey51"
## [313] "grey52" "grey53" "grey54"
## [316] "grey55" "grey56" "grey57"
## [319] "grey58" "grey59" "grey60"
## [322] "grey61" "grey62" "grey63"
## [325] "grey64" "grey65" "grey66"
## [328] "grey67" "grey68" "grey69"
## [331] "grey70" "grey71" "grey72"
## [334] "grey73" "grey74" "grey75"
## [337] "grey76" "grey77" "grey78"
## [340] "grey79" "grey80" "grey81"
## [343] "grey82" "grey83" "grey84"
## [346] "grey85" "grey86" "grey87"
## [349] "grey88" "grey89" "grey90"
## [352] "grey91" "grey92" "grey93"
## [355] "grey94" "grey95" "grey96"
## [358] "grey97" "grey98" "grey99"
## [361] "grey100" "honeydew" "honeydew1"
## [364] "honeydew2" "honeydew3" "honeydew4"
## [367] "hotpink" "hotpink1" "hotpink2"
## [370] "hotpink3" "hotpink4" "indianred"
## [373] "indianred1" "indianred2" "indianred3"
## [376] "indianred4" "ivory" "ivory1"
## [379] "ivory2" "ivory3" "ivory4"
## [382] "khaki" "khaki1" "khaki2"
## [385] "khaki3" "khaki4" "lavender"
## [388] "lavenderblush" "lavenderblush1" "lavenderblush2"
## [391] "lavenderblush3" "lavenderblush4" "lawngreen"
## [394] "lemonchiffon" "lemonchiffon1" "lemonchiffon2"
## [397] "lemonchiffon3" "lemonchiffon4" "lightblue"
## [400] "lightblue1" "lightblue2" "lightblue3"
## [403] "lightblue4" "lightcoral" "lightcyan"
## [406] "lightcyan1" "lightcyan2" "lightcyan3"
## [409] "lightcyan4" "lightgoldenrod" "lightgoldenrod1"
## [412] "lightgoldenrod2" "lightgoldenrod3" "lightgoldenrod4"
## [415] "lightgoldenrodyellow" "lightgray" "lightgreen"
## [418] "lightgrey" "lightpink" "lightpink1"
## [421] "lightpink2" "lightpink3" "lightpink4"
## [424] "lightsalmon" "lightsalmon1" "lightsalmon2"
## [427] "lightsalmon3" "lightsalmon4" "lightseagreen"
## [430] "lightskyblue" "lightskyblue1" "lightskyblue2"
## [433] "lightskyblue3" "lightskyblue4" "lightslateblue"
## [436] "lightslategray" "lightslategrey" "lightsteelblue"
## [439] "lightsteelblue1" "lightsteelblue2" "lightsteelblue3"
## [442] "lightsteelblue4" "lightyellow" "lightyellow1"
## [445] "lightyellow2" "lightyellow3" "lightyellow4"
## [448] "limegreen" "linen" "magenta"
## [451] "magenta1" "magenta2" "magenta3"
## [454] "magenta4" "maroon" "maroon1"
## [457] "maroon2" "maroon3" "maroon4"
## [460] "mediumaquamarine" "mediumblue" "mediumorchid"
## [463] "mediumorchid1" "mediumorchid2" "mediumorchid3"
## [466] "mediumorchid4" "mediumpurple" "mediumpurple1"
## [469] "mediumpurple2" "mediumpurple3" "mediumpurple4"
## [472] "mediumseagreen" "mediumslateblue" "mediumspringgreen"
## [475] "mediumturquoise" "mediumvioletred" "midnightblue"
## [478] "mintcream" "mistyrose" "mistyrose1"
## [481] "mistyrose2" "mistyrose3" "mistyrose4"
## [484] "moccasin" "navajowhite" "navajowhite1"
## [487] "navajowhite2" "navajowhite3" "navajowhite4"
## [490] "navy" "navyblue" "oldlace"
## [493] "olivedrab" "olivedrab1" "olivedrab2"
## [496] "olivedrab3" "olivedrab4" "orange"
## [499] "orange1" "orange2" "orange3"
## [502] "orange4" "orangered" "orangered1"
## [505] "orangered2" "orangered3" "orangered4"
## [508] "orchid" "orchid1" "orchid2"
## [511] "orchid3" "orchid4" "palegoldenrod"
## [514] "palegreen" "palegreen1" "palegreen2"
## [517] "palegreen3" "palegreen4" "paleturquoise"
## [520] "paleturquoise1" "paleturquoise2" "paleturquoise3"
## [523] "paleturquoise4" "palevioletred" "palevioletred1"
## [526] "palevioletred2" "palevioletred3" "palevioletred4"
## [529] "papayawhip" "peachpuff" "peachpuff1"
## [532] "peachpuff2" "peachpuff3" "peachpuff4"
## [535] "peru" "pink" "pink1"
## [538] "pink2" "pink3" "pink4"
## [541] "plum" "plum1" "plum2"
## [544] "plum3" "plum4" "powderblue"
## [547] "purple" "purple1" "purple2"
## [550] "purple3" "purple4" "red"
## [553] "red1" "red2" "red3"
## [556] "red4" "rosybrown" "rosybrown1"
## [559] "rosybrown2" "rosybrown3" "rosybrown4"
## [562] "royalblue" "royalblue1" "royalblue2"
## [565] "royalblue3" "royalblue4" "saddlebrown"
## [568] "salmon" "salmon1" "salmon2"
## [571] "salmon3" "salmon4" "sandybrown"
## [574] "seagreen" "seagreen1" "seagreen2"
## [577] "seagreen3" "seagreen4" "seashell"
## [580] "seashell1" "seashell2" "seashell3"
## [583] "seashell4" "sienna" "sienna1"
## [586] "sienna2" "sienna3" "sienna4"
## [589] "skyblue" "skyblue1" "skyblue2"
## [592] "skyblue3" "skyblue4" "slateblue"
## [595] "slateblue1" "slateblue2" "slateblue3"
## [598] "slateblue4" "slategray" "slategray1"
## [601] "slategray2" "slategray3" "slategray4"
## [604] "slategrey" "snow" "snow1"
## [607] "snow2" "snow3" "snow4"
## [610] "springgreen" "springgreen1" "springgreen2"
## [613] "springgreen3" "springgreen4" "steelblue"
## [616] "steelblue1" "steelblue2" "steelblue3"
## [619] "steelblue4" "tan" "tan1"
## [622] "tan2" "tan3" "tan4"
## [625] "thistle" "thistle1" "thistle2"
## [628] "thistle3" "thistle4" "tomato"
## [631] "tomato1" "tomato2" "tomato3"
## [634] "tomato4" "turquoise" "turquoise1"
## [637] "turquoise2" "turquoise3" "turquoise4"
## [640] "violet" "violetred" "violetred1"
## [643] "violetred2" "violetred3" "violetred4"
## [646] "wheat" "wheat1" "wheat2"
## [649] "wheat3" "wheat4" "whitesmoke"
## [652] "yellow" "yellow1" "yellow2"
## [655] "yellow3" "yellow4" "yellowgreen"
ls()
## [1] "a" "air" "ajay" "ajay3" "ajay4"
## [6] "ajay5" "ajay6" "diamonds" "diamonds2" "mtcars"
rm(list=ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 518944 27.8 1731918 92.5 1347784 72.0
## Vcells 1450453 11.1 17228173 131.5 21469488 163.8
getwd()
## [1] "C:/Users/Dell/Documents/R/revisionofbasics"
setwd("C:/Users/Dell/Downloads/BigDiamonds.csv" )
dir(pattern = "csv")
## [1] "BigDiamonds.csv" "BigDiamonds.csv.Rproj"
#library(benchmark)
Sys.Date()
## [1] "2017-05-15"
Sys.time()
## [1] "2017-05-15 16:11:28 IST"
library(data.table)
library(readr)
system.time(read.csv("BigDiamonds.csv"))
## user system elapsed
## 26.02 0.34 26.39
system.time(fread("BigDiamonds.csv"))
##
Read 23.4% of 598024 rows
Read 36.8% of 598024 rows
Read 48.5% of 598024 rows
Read 61.9% of 598024 rows
Read 73.6% of 598024 rows
Read 85.3% of 598024 rows
Read 95.3% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:09
## user system elapsed
## 8.65 0.06 8.74
system.time(read_csv("BigDiamonds.csv"))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_integer(),
## carat = col_double(),
## cut = col_character(),
## color = col_character(),
## clarity = col_character(),
## table = col_double(),
## depth = col_double(),
## cert = col_character(),
## measurements = col_character(),
## price = col_integer(),
## x = col_double(),
## y = col_double(),
## z = col_double()
## )
## user system elapsed
## 3.76 0.29 4.06
library(RColorBrewer)
hist(mtcars$mpg,col=brewer.pal(4,"Set1"))

hist(mtcars$mpg,col=brewer.pal(3,"Set2"),breaks = 8)

hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 10)

hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 5)

hist(iris$Sepal.Length,
col=brewer.pal(8,"Greens"),
breaks = 15,
main="Title of my graph",
xlab="Sepal Length",
ylab = "Frequency",
ylim=c(0,20))

par(mfrow=c(2,3))
par(bg="grey")
hist(mtcars$mpg,col=brewer.pal(4,"Set1"))
hist(mtcars$mpg,col=brewer.pal(3,"Set2"),breaks = 8)
hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 10)
hist(iris$Sepal.Length,col=brewer.pal(8,"Blues"),breaks = 5)
hist(iris$Sepal.Length,
col=brewer.pal(8,"Greens"),
breaks = 15,
main="Title of my graph",
xlab="Sepal Length",
ylab = "Frequency",
ylim=c(0,20))
brewer.pal.info
## maxcolors category colorblind
## BrBG 11 div TRUE
## PiYG 11 div TRUE
## PRGn 11 div TRUE
## PuOr 11 div TRUE
## RdBu 11 div TRUE
## RdGy 11 div FALSE
## RdYlBu 11 div TRUE
## RdYlGn 11 div FALSE
## Spectral 11 div FALSE
## Accent 8 qual FALSE
## Dark2 8 qual TRUE
## Paired 12 qual TRUE
## Pastel1 9 qual FALSE
## Pastel2 8 qual FALSE
## Set1 9 qual FALSE
## Set2 8 qual TRUE
## Set3 12 qual FALSE
## Blues 9 seq TRUE
## BuGn 9 seq TRUE
## BuPu 9 seq TRUE
## GnBu 9 seq TRUE
## Greens 9 seq TRUE
## Greys 9 seq TRUE
## Oranges 9 seq TRUE
## OrRd 9 seq TRUE
## PuBu 9 seq TRUE
## PuBuGn 9 seq TRUE
## PuRd 9 seq TRUE
## Purples 9 seq TRUE
## RdPu 9 seq TRUE
## Reds 9 seq TRUE
## YlGn 9 seq TRUE
## YlGnBu 9 seq TRUE
## YlOrBr 9 seq TRUE
## YlOrRd 9 seq TRUE
#brewer.show
Sys.Date()
## [1] "2017-05-15"
dob=as.Date("1997-04-01")
Sys.Date()-dob
## Time difference of 7349 days
fundates=c("1Apr2017","01-06-2017","1/6/17")
fundates2=c("1Apr2017","12-30-2017","1/6/17")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday,
## week, yday, year
## The following object is masked from 'package:base':
##
## date
fun2=dmy(fundates)
fun2
## [1] "2017-04-01" "2017-06-01" "2017-06-01"
fun3=dmy(fundates2)
## Warning: 1 failed to parse.
fun3
## [1] "2017-04-01" NA "2017-06-01"
fun4=mdy(fundates2)
## Warning: 1 failed to parse.
fun4
## [1] NA "2017-12-30" "2017-01-06"
fun4=ifelse(is.na(dmy(fundates2)),mdy(fundates2),dmy(fundates2))
## Warning: 1 failed to parse.
## Warning: 1 failed to parse.
## Warning: 1 failed to parse.
fun4
## [1] 17257 17530 17318
ls()
## [1] "dob" "fun2" "fun3" "fun4" "fundates" "fundates2"
rm(list=ls())
df=fread("BigDiamonds.csv")
##
Read 20.1% of 598024 rows
Read 33.4% of 598024 rows
Read 45.1% of 598024 rows
Read 56.9% of 598024 rows
Read 68.6% of 598024 rows
Read 80.3% of 598024 rows
Read 92.0% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:09
summary(df$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 300 1220 3503 8753 11174 99990 713
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
df$price2=na.aggregate(df$price, FUN = median)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
sqldf("select *
from mtcars
where mpg > (
select avg(mpg)
from mtcars
);")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 6 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 7 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 8 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 9 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 10 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 11 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 12 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 13 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 14 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
mtcars$brand=row.names(mtcars)
mtcars$brand
## [1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
## [4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
## [7] "Duster 360" "Merc 240D" "Merc 230"
## [10] "Merc 280" "Merc 280C" "Merc 450SE"
## [13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
## [16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
## [19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
## [22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
## [25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
## [28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
## [31] "Maserati Bora" "Volvo 142E"
nchar(mtcars$brand)
## [1] 9 13 10 14 17 7 10 9 8 8 9 10 10 11 18 19 17 8 11 14 13 16 11
## [24] 10 16 9 13 12 14 12 13 10
grepl(" ",mtcars$brand)
## [1] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [23] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
grep(" ",mtcars$brand)
## [1] 1 2 3 4 5 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [24] 25 26 27 28 29 30 31 32
grep(" ",mtcars$brand,value = T)
## [1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
## [4] "Hornet 4 Drive" "Hornet Sportabout" "Duster 360"
## [7] "Merc 240D" "Merc 230" "Merc 280"
## [10] "Merc 280C" "Merc 450SE" "Merc 450SL"
## [13] "Merc 450SLC" "Cadillac Fleetwood" "Lincoln Continental"
## [16] "Chrysler Imperial" "Fiat 128" "Honda Civic"
## [19] "Toyota Corolla" "Toyota Corona" "Dodge Challenger"
## [22] "AMC Javelin" "Camaro Z28" "Pontiac Firebird"
## [25] "Fiat X1-9" "Porsche 914-2" "Lotus Europa"
## [28] "Ford Pantera L" "Ferrari Dino" "Maserati Bora"
## [31] "Volvo 142E"
gregexpr(" ",mtcars$brand)
## [[1]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[2]]
## [1] 6 10
## attr(,"match.length")
## [1] 1 1
## attr(,"useBytes")
## [1] TRUE
##
## [[3]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[4]]
## [1] 7 9
## attr(,"match.length")
## [1] 1 1
## attr(,"useBytes")
## [1] TRUE
##
## [[5]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[6]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"useBytes")
## [1] TRUE
##
## [[7]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[8]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[9]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[10]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[11]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[12]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[13]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[14]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[15]]
## [1] 9
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[16]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[17]]
## [1] 9
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[18]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[19]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[20]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[21]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[22]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[23]]
## [1] 4
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[24]]
## [1] 7
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[25]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[26]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[27]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[28]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[29]]
## [1] 5 13
## attr(,"match.length")
## [1] 1 1
## attr(,"useBytes")
## [1] TRUE
##
## [[30]]
## [1] 8
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[31]]
## [1] 9
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
##
## [[32]]
## [1] 6
## attr(,"match.length")
## [1] 1
## attr(,"useBytes")
## [1] TRUE
regexpr(" ",mtcars$brand)
## [1] 6 6 7 7 7 -1 7 5 5 5 5 5 5 5 9 8 9 5 6 7 7 6 4
## [24] 7 8 5 8 6 5 8 9 6
## attr(,"match.length")
## [1] 1 1 1 1 1 -1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [24] 1 1 1 1 1 1 1 1 1
## attr(,"useBytes")
## [1] TRUE
mtcars$brand2=substr(mtcars$brand,1,6)
dim(mtcars)
## [1] 32 13
mtcars[1:10,12:13]
## brand brand2
## Mazda RX4 Mazda RX4 Mazda
## Mazda RX4 Wag Mazda RX4 Wag Mazda
## Datsun 710 Datsun 710 Datsun
## Hornet 4 Drive Hornet 4 Drive Hornet
## Hornet Sportabout Hornet Sportabout Hornet
## Valiant Valiant Valian
## Duster 360 Duster 360 Duster
## Merc 240D Merc 240D Merc 2
## Merc 230 Merc 230 Merc 2
## Merc 280 Merc 280 Merc 2
regexpr(" ",mtcars$brand)
## [1] 6 6 7 7 7 -1 7 5 5 5 5 5 5 5 9 8 9 5 6 7 7 6 4
## [24] 7 8 5 8 6 5 8 9 6
## attr(,"match.length")
## [1] 1 1 1 1 1 -1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [24] 1 1 1 1 1 1 1 1 1
## attr(,"useBytes")
## [1] TRUE
mtcars$brand3=substr(mtcars$brand,1,regexpr(" ",mtcars$brand))
mtcars[1:10,12:14]
## brand brand2 brand3
## Mazda RX4 Mazda RX4 Mazda Mazda
## Mazda RX4 Wag Mazda RX4 Wag Mazda Mazda
## Datsun 710 Datsun 710 Datsun Datsun
## Hornet 4 Drive Hornet 4 Drive Hornet Hornet
## Hornet Sportabout Hornet Sportabout Hornet Hornet
## Valiant Valiant Valian
## Duster 360 Duster 360 Duster Duster
## Merc 240D Merc 240D Merc 2 Merc
## Merc 230 Merc 230 Merc 2 Merc
## Merc 280 Merc 280 Merc 2 Merc
mtcars$brand4=ifelse(mtcars$brand3=="",mtcars$brand,mtcars$brand3)
mtcars[1:10,12:15]
## brand brand2 brand3 brand4
## Mazda RX4 Mazda RX4 Mazda Mazda Mazda
## Mazda RX4 Wag Mazda RX4 Wag Mazda Mazda Mazda
## Datsun 710 Datsun 710 Datsun Datsun Datsun
## Hornet 4 Drive Hornet 4 Drive Hornet Hornet Hornet
## Hornet Sportabout Hornet Sportabout Hornet Hornet Hornet
## Valiant Valiant Valian Valiant
## Duster 360 Duster 360 Duster Duster Duster
## Merc 240D Merc 240D Merc 2 Merc Merc
## Merc 230 Merc 230 Merc 2 Merc Merc
## Merc 280 Merc 280 Merc 2 Merc Merc
money=c("50000","50,000","$50000",50000,"$50,000")
money2=gsub(",","",money)
money2
## [1] "50000" "50000" "$50000" "50000" "$50000"
money3=gsub("\\$","",money2)
money3
## [1] "50000" "50000" "50000" "50000" "50000"
money4=as.numeric(money3)
money4
## [1] 50000 50000 50000 50000 50000
mean(money4)
## [1] 50000
ls()
## [1] "df" "money" "money2" "money3" "money4" "mtcars"
summary(df)
## V1 carat cut color
## Length:598024 Min. :0.200 Length:598024 Length:598024
## Class :character 1st Qu.:0.500 Class :character Class :character
## Mode :character Median :0.900 Mode :character Mode :character
## Mean :1.071
## 3rd Qu.:1.500
## Max. :9.250
##
## clarity table depth cert
## Length:598024 Min. : 0.00 Min. : 0.00 Length:598024
## Class :character 1st Qu.:56.00 1st Qu.:61.00 Class :character
## Mode :character Median :58.00 Median :62.10 Mode :character
## Mean :57.63 Mean :61.06
## 3rd Qu.:59.00 3rd Qu.:62.70
## Max. :75.90 Max. :81.30
##
## measurements price x y
## Length:598024 Min. : 300 Min. : 0.150 Min. : 1.000
## Class :character 1st Qu.: 1220 1st Qu.: 4.740 1st Qu.: 4.970
## Mode :character Median : 3503 Median : 5.780 Median : 6.050
## Mean : 8753 Mean : 5.991 Mean : 6.199
## 3rd Qu.:11174 3rd Qu.: 6.970 3rd Qu.: 7.230
## Max. :99990 Max. :13.890 Max. :13.890
## NA's :713 NA's :1815 NA's :1852
## z price2
## Min. : 0.040 Min. : 300
## 1st Qu.: 3.120 1st Qu.: 1220
## Median : 3.860 Median : 3503
## Mean : 4.033 Mean : 8747
## 3rd Qu.: 4.610 3rd Qu.:11151
## Max. :13.180 Max. :99990
## NA's :2544
tables()
## NAME NROW NCOL MB
## [1,] df 598,024 14 104
## COLS
## [1,] V1,carat,cut,color,clarity,table,depth,cert,measurements,price,x,y,z,price2
## KEY
## [1,]
## Total: 104MB
head(df)
## V1 carat cut color clarity table depth cert measurements price
## 1: 1 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NA
## 2: 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NA
## 3: 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NA
## 4: 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NA
## 5: 5 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NA
## 6: 6 0.20 Good G SI2 60 64.4 GIA 3.74 x 3.67 x 2.38 NA
## x y z price2
## 1: 3.96 3.95 2.52 3503
## 2: 4.00 4.05 2.30 3503
## 3: 4.56 4.53 2.67 3503
## 4: 3.80 3.82 2.31 3503
## 5: 4.35 4.26 2.68 3503
## 6: 3.74 3.67 2.38 3503
df22=df[!is.na(price),,]
df3=df[,median(price,na.rm=T),color]
df3
## color V1
## 1: K 4418
## 2: G 3720
## 3: J 4697
## 4: D 2690
## 5: F 2966
## 6: E 2342
## 7: H 4535
## 8: I 4717
## 9: L 3017
names(df3)=c("color","price")
df3[order(df3$color),]
## color price
## 1: D 2690
## 2: E 2342
## 3: F 2966
## 4: G 3720
## 5: H 4535
## 6: I 4717
## 7: J 4697
## 8: K 4418
## 9: L 3017
df2=df[is.na(price),,]
df2
## V1 carat cut color clarity table depth cert measurements
## 1: 1 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52
## 2: 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30
## 3: 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67
## 4: 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31
## 5: 5 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68
## ---
## 709: 525569 0.20 Good E VS2 62 60.4 GIA 3.81 x 3.74 x 2.28
## 710: 525570 0.29 V.Good G I1 58 64.0 GIA 4.17 x 4.19 x 2.67
## 711: 525571 0.31 V.Good F I1 59 62.8 EGL 4.33 x 4.27 x 2.67
## 712: 525572 0.21 Good E SI1 66 62.5 GIA 3.81 x 3.73 x 2.35
## 713: 525573 0.22 Good D VS2 61 63.7 GIA 3.77 x 3.73 x 2.39
## price x y z price2
## 1: NA 3.96 3.95 2.52 3503
## 2: NA 4.00 4.05 2.30 3503
## 3: NA 4.56 4.53 2.67 3503
## 4: NA 3.80 3.82 2.31 3503
## 5: NA 4.35 4.26 2.68 3503
## ---
## 709: NA 3.81 3.74 2.28 3503
## 710: NA 4.17 4.19 2.67 3503
## 711: NA 4.33 4.27 2.67 3503
## 712: NA 3.81 3.73 2.35 3503
## 713: NA 3.77 3.73 2.39 3503
df2[order(df2$color),]
## V1 carat cut color clarity table depth cert measurements
## 1: 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31
## 2: 8 0.22 V.Good D I1 61 59.2 GIA 3.95 x 3.97 x 2.34
## 3: 13 0.22 V.Good D SI2 57 59.7 GIA 3.94 x 3.93 x 2.35
## 4: 22 0.22 Good D VS2 61 63.7 GIA 3.77 x 3.73 x 2.39
## 5: 24 0.21 Good D SI2 62 64.4 IGI 3.73 x 3.78 x 2.42
## ---
## 709: 178 0.32 Good L VVS1 64 58.9 GIA 4.52 x 4.45 x 2.64
## 710: 223 0.23 V.Good L VVS2 61 59.3 GIA 3.99 x 3.96 x 2.36
## 711: 227 0.30 Good L VS2 63 58.1 GIA 4.45 x 4.39 x 2.57
## 712: 337 0.41 V.Good L VS2 58 60.5 GIA 4.86 x 4.81 x 2.92
## 713: 435 0.30 Good L VS1 57 66.2 GIA 4.21 x 4.13 x 2.76
## price x y z price2
## 1: NA 3.80 3.82 2.31 3503
## 2: NA 3.95 3.97 2.34 3503
## 3: NA 3.94 3.93 2.35 3503
## 4: NA 3.77 3.73 2.39 3503
## 5: NA 3.73 3.78 2.42 3503
## ---
## 709: NA 4.52 4.45 2.64 3503
## 710: NA 3.99 3.96 2.36 3503
## 711: NA 4.45 4.39 2.57 3503
## 712: NA 4.86 4.81 2.92 3503
## 713: NA 4.21 4.13 2.76 3503
setkey(df3,color)
setkey(df2,color)
#joined <- df3[df2]
# head(joined)
df4=merge(df3,df2)
df4$price.y=NULL
names(df4)[2]=c("price")
df4
## color price V1 carat cut clarity table depth cert
## 1: D 2690 4 0.21 V.Good I1 60 60.6 GIA
## 2: D 2690 8 0.22 V.Good I1 61 59.2 GIA
## 3: D 2690 13 0.22 V.Good SI2 57 59.7 GIA
## 4: D 2690 22 0.22 Good VS2 61 63.7 GIA
## 5: D 2690 24 0.21 Good SI2 62 64.4 IGI
## ---
## 709: L 3017 178 0.32 Good VVS1 64 58.9 GIA
## 710: L 3017 223 0.23 V.Good VVS2 61 59.3 GIA
## 711: L 3017 227 0.30 Good VS2 63 58.1 GIA
## 712: L 3017 337 0.41 V.Good VS2 58 60.5 GIA
## 713: L 3017 435 0.30 Good VS1 57 66.2 GIA
## measurements x y z price2
## 1: 3.80 x 3.82 x 2.31 3.80 3.82 2.31 3503
## 2: 3.95 x 3.97 x 2.34 3.95 3.97 2.34 3503
## 3: 3.94 x 3.93 x 2.35 3.94 3.93 2.35 3503
## 4: 3.77 x 3.73 x 2.39 3.77 3.73 2.39 3503
## 5: 3.73 x 3.78 x 2.42 3.73 3.78 2.42 3503
## ---
## 709: 4.52 x 4.45 x 2.64 4.52 4.45 2.64 3503
## 710: 3.99 x 3.96 x 2.36 3.99 3.96 2.36 3503
## 711: 4.45 x 4.39 x 2.57 4.45 4.39 2.57 3503
## 712: 4.86 x 4.81 x 2.92 4.86 4.81 2.92 3503
## 713: 4.21 x 4.13 x 2.76 4.21 4.13 2.76 3503
