dap22.R

 23%/%3 #Numerator

## [1] 7

 23%%3 #Remainder

## [1] 2

 log(20)

## [1] 2.995732

 exp(3)

## [1] 20.08554

pi

## [1] 3.141593

 ls()

## character(0)

 rm(df4)

## Warning in rm(df4): object 'df4' not found

 ls()

## character(0)

 memory.limit()

## [1] 8096

 memory.size()

## [1] 30.92

 rm(list=ls())
 gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 364609 19.5     592000 31.7   460000 24.6
## Vcells 554124  4.3    1023718  7.9   865841  6.7

 ajay=rnorm(100,5,4)
 ajay

##   [1]  7.97831982  8.73351901  6.35486362  3.48622823  7.33964473
##   [6]  7.46588916  4.93476827  5.59195231  6.58478724  4.11061756
##  [11]  8.95905388 10.13553609  0.31398416  8.35218864  6.05780353
##  [16]  2.98587615  0.38028755  3.52000967  1.74655741 12.84393134
##  [21]  6.88188131  2.39541653  3.12256473 -4.72895030  2.76463823
##  [26]  6.94905001  7.51479591  9.82915386  7.21735632 -0.22684182
##  [31] -1.51135703  5.84603356  9.59961122  9.39143203  4.69156280
##  [36]  2.75666575  3.32686797 10.51650054  9.54072796  3.25834653
##  [41]  2.16381246  7.24954237  9.23868210  7.36384357 -0.12851419
##  [46] -4.97217531  7.70074822 -1.03606390 -0.88556803  2.37607916
##  [51]  0.61407862  4.95359648  2.21753811  4.51628850  1.72295860
##  [56]  7.01420028  6.09329191  2.74894689  8.32787863  4.90246809
##  [61]  5.14501540  2.30755002 11.89715225  3.28142428  0.27585514
##  [66]  3.51953534  2.57318265 -1.95243691  6.43684527  5.04221738
##  [71]  4.43817254 -2.16480655 -2.94557226  6.30719684  5.58755266
##  [76]  5.17709606  6.12269303  7.71872310  7.43588537 -4.30490849
##  [81]  4.47340078  4.37390768  5.48750515  2.22941375  5.32100861
##  [86]  4.87644501  8.70227040  7.22366451  7.85923406  0.11552376
##  [91]  5.90232071  5.69271386  7.14506196 10.80115895  2.69489595
##  [96]  0.60079530  0.04542529  7.17584157  1.76121144  2.17007918

 mean(ajay)

## [1] 4.557432

 sd(ajay)

## [1] 3.704212

 median(ajay)

## [1] 4.944182

 ajay=c(10,70,65,35,30,40,55,52)
 vijay=c(1,2,3,4,5)
 ajay*vijay

## Warning in ajay * vijay: longer object length is not a multiple of shorter
## object length

## [1]  10 140 195 140 150  40 110 156

 class(vijay)

## [1] "numeric"

 mean(ajay)

## [1] 44.625

 sort(ajay)

## [1] 10 30 35 40 52 55 65 70

 for (number in 1:15){print (number)}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15

 for (i in 1:15){print (i*2)}

## [1] 2
## [1] 4
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20
## [1] 22
## [1] 24
## [1] 26
## [1] 28
## [1] 30

 for (i in 1:15){print (i**2)}

## [1] 1
## [1] 4
## [1] 9
## [1] 16
## [1] 25
## [1] 36
## [1] 49
## [1] 64
## [1] 81
## [1] 100
## [1] 121
## [1] 144
## [1] 169
## [1] 196
## [1] 225

 namefunction=function(x){x**3+43*x+25}
 namefunction(10)

## [1] 1455

 namefunction(100)

## [1] 1004325

 namefunction2=function(x)(x**3+143*x+25)
 namefunction2(10)

## [1] 2455

 namefunction3=function(x,y)(x**3+143*x*y+25)
 namefunction3(10,5)

## [1] 8175

 namefunction3

## function(x,y)(x**3+143*x*y+25)

 kmeans

## function (x, centers, iter.max = 10L, nstart = 1L, algorithm = c("Hartigan-Wong", 
##     "Lloyd", "Forgy", "MacQueen"), trace = FALSE) 
## {
##     .Mimax <- .Machine$integer.max
##     do_one <- function(nmeth) {
##         switch(nmeth, {
##             isteps.Qtran <- as.integer(min(.Mimax, 50 * m))
##             iTran <- c(isteps.Qtran, integer(max(0, k - 1)))
##             Z <- .Fortran(C_kmns, x, m, p, centers = centers, 
##                 as.integer(k), c1 = integer(m), c2 = integer(m), 
##                 nc = integer(k), double(k), double(k), ncp = integer(k), 
##                 D = double(m), iTran = iTran, live = integer(k), 
##                 iter = iter.max, wss = double(k), ifault = as.integer(trace))
##             switch(Z$ifault, stop("empty cluster: try a better set of initial centers", 
##                 call. = FALSE), Z$iter <- max(Z$iter, iter.max + 
##                 1L), stop("number of cluster centres must lie between 1 and nrow(x)", 
##                 call. = FALSE), warning(gettextf("Quick-TRANSfer stage steps exceeded maximum (= %d)", 
##                 isteps.Qtran), call. = FALSE))
##         }, {
##             Z <- .C(C_kmeans_Lloyd, x, m, p, centers = centers, 
##                 k, c1 = integer(m), iter = iter.max, nc = integer(k), 
##                 wss = double(k))
##         }, {
##             Z <- .C(C_kmeans_MacQueen, x, m, p, centers = as.double(centers), 
##                 k, c1 = integer(m), iter = iter.max, nc = integer(k), 
##                 wss = double(k))
##         })
##         if (m23 <- any(nmeth == c(2L, 3L))) {
##             if (any(Z$nc == 0)) 
##                 warning("empty cluster: try a better set of initial centers", 
##                   call. = FALSE)
##         }
##         if (Z$iter > iter.max) {
##             warning(sprintf(ngettext(iter.max, "did not converge in %d iteration", 
##                 "did not converge in %d iterations"), iter.max), 
##                 call. = FALSE, domain = NA)
##             if (m23) 
##                 Z$ifault <- 2L
##         }
##         if (nmeth %in% c(2L, 3L)) {
##             if (any(Z$nc == 0)) 
##                 warning("empty cluster: try a better set of initial centers", 
##                   call. = FALSE)
##         }
##         Z
##     }
##     x <- as.matrix(x)
##     m <- as.integer(nrow(x))
##     if (is.na(m)) 
##         stop("invalid nrow(x)")
##     p <- as.integer(ncol(x))
##     if (is.na(p)) 
##         stop("invalid ncol(x)")
##     if (missing(centers)) 
##         stop("'centers' must be a number or a matrix")
##     nmeth <- switch(match.arg(algorithm), `Hartigan-Wong` = 1L, 
##         Lloyd = 2L, Forgy = 2L, MacQueen = 3L)
##     storage.mode(x) <- "double"
##     if (length(centers) == 1L) {
##         k <- centers
##         if (nstart == 1L) 
##             centers <- x[sample.int(m, k), , drop = FALSE]
##         if (nstart >= 2L || any(duplicated(centers))) {
##             cn <- unique(x)
##             mm <- nrow(cn)
##             if (mm < k) 
##                 stop("more cluster centers than distinct data points.")
##             centers <- cn[sample.int(mm, k), , drop = FALSE]
##         }
##     }
##     else {
##         centers <- as.matrix(centers)
##         if (any(duplicated(centers))) 
##             stop("initial centers are not distinct")
##         cn <- NULL
##         k <- nrow(centers)
##         if (m < k) 
##             stop("more cluster centers than data points")
##     }
##     k <- as.integer(k)
##     if (is.na(k)) 
##         stop("'invalid value of 'k'")
##     if (k == 1L) 
##         nmeth <- 3L
##     iter.max <- as.integer(iter.max)
##     if (is.na(iter.max) || iter.max < 1L) 
##         stop("'iter.max' must be positive")
##     if (ncol(x) != ncol(centers)) 
##         stop("must have same number of columns in 'x' and 'centers'")
##     storage.mode(centers) <- "double"
##     Z <- do_one(nmeth)
##     best <- sum(Z$wss)
##     if (nstart >= 2L && !is.null(cn)) 
##         for (i in 2:nstart) {
##             centers <- cn[sample.int(mm, k), , drop = FALSE]
##             ZZ <- do_one(nmeth)
##             if ((z <- sum(ZZ$wss)) < best) {
##                 Z <- ZZ
##                 best <- z
##             }
##         }
##     centers <- matrix(Z$centers, k)
##     dimnames(centers) <- list(1L:k, dimnames(x)[[2L]])
##     cluster <- Z$c1
##     if (!is.null(rn <- rownames(x))) 
##         names(cluster) <- rn
##     totss <- sum(scale(x, scale = FALSE)^2)
##     structure(list(cluster = cluster, centers = centers, totss = totss, 
##         withinss = Z$wss, tot.withinss = best, betweenss = totss - 
##             best, size = Z$nc, iter = Z$iter, ifault = Z$ifault), 
##         class = "kmeans")
## }
## <bytecode: 0x000000000a74ac30>
## <environment: namespace:stats>

 getwd()

## [1] "C:/Users/Dell/Documents"

 setwd("C:/Users/Dell/Desktop")
 dir()

##  [1] "16508797_10155115909410362_414170078812994931_n.jpg"                                                      
##  [2] "27032014_Duplicate_Statement.pdf"                                                                         
##  [3] "30072015_form_du-degree.pdf"                                                                              
##  [4] "3e8d73f.jpg"                                                                                              
##  [5] "41ZMN2X1gsL._SX330_BO1,204,203,200_.jpg"                                                                  
##  [6] "a7110dd7e981c0f970736cc5f52f9b717fde51e2.png"                                                             
##  [7] "ACK.html"                                                                                                 
##  [8] "ACK_files"                                                                                                
##  [9] "adult.data.txt"                                                                                           
## [10] "AJAY.xps"                                                                                                 
## [11] "An introduction to text analysis with Python, Part 1 _ Neal Caren.html"                                   
## [12] "An introduction to text analysis with Python, Part 1 _ Neal Caren_files"                                  
## [13] "Basics of SQL & RDBMS _ Must Skills For Data Science Professionals.html"                                  
## [14] "Basics of SQL & RDBMS _ Must Skills For Data Science Professionals_files"                                 
## [15] "BigDiamonds (2).csv"                                                                                      
## [16] "BigDiamonds.csv"                                                                                          
## [17] "BigDiamonds.csv (2).zip"                                                                                  
## [18] "BigDiamonds2.csv"                                                                                         
## [19] "BLOOD REPORT.pdf"                                                                                         
## [20] "Book1.xlsx"                                                                                               
## [21] "CAM- Ajay Ohri.pdf"                                                                                       
## [22] "cam.xps"                                                                                                  
## [23] "cam2.pdf"                                                                                                 
## [24] "cdo.jpeg"                                                                                                 
## [25] "CHAP 1-6 Python for R Users_ An approach for Data Science - Google Docs.pdf"                              
## [26] "clustersas.html"                                                                                          
## [27] "Complete guide to create a Time Series Forecast (with Codes in Python).html"                              
## [28] "Complete guide to create a Time Series Forecast (with Codes in Python)_files"                             
## [29] "dap class 4.R"                                                                                            
## [30] "dap_class_4.html"                                                                                         
## [31] "desktop.ini"                                                                                              
## [32] "Dive Into NLTK, Part I_ Getting Started with NLTK  Text Mining Online.html"                              
## [33] "Dive Into NLTK, Part I_ Getting Started with NLTK  Text Mining Online_files"                             
## [34] "Dropbox.lnk"                                                                                              
## [35] "dupform.pdf"                                                                                              
## [36] "DVD.csv"                                                                                                  
## [37] "GermanCredit.csv"                                                                                         
## [38] "Git Shell.lnk"                                                                                            
## [39] "GitHub.appref-ms"                                                                                         
## [40] "GoToMeeting.lnk"                                                                                          
## [41] "groceries.csv"                                                                                            
## [42] "Guidelines-CBSE.html"                                                                                     
## [43] "IMS proschool"                                                                                            
## [44] "iris2.csv"                                                                                                
## [45] "iris3.csv"                                                                                                
## [46] "Lal Pathlabs Report.pdf"                                                                                  
## [47] "logistic regression - script for ppt.R"                                                                   
## [48] "OnlineCardNSR.pdf"                                                                                        
## [49] "PaymentForm.pdf"                                                                                          
## [50] "Program 1-results.rtf"                                                                                    
## [51] "Rdatasets"                                                                                                
## [52] "Results_ Modeling and Forecasting.html"                                                                   
## [53] "Results_ Program 5.sas.html"                                                                              
## [54] "Results_ Time Series Exploration.ctk.html"                                                                
## [55] "Rplot.png"                                                                                                
## [56] "Rplot01.pdf"                                                                                              
## [57] "Rplot02.pdf"                                                                                              
## [58] "Rplot03.png"                                                                                              
## [59] "rsconnect"                                                                                                
## [60] "sas-university-edition-107140.pdf"                                                                        
## [61] "seanabu.github.io_Seasonal_ARIMA_model_Portland_transit.ipynb at master · seanabu_seanabu.github.io.html" 
## [62] "seanabu.github.io_Seasonal_ARIMA_model_Portland_transit.ipynb at master · seanabu_seanabu.github.io_files"
## [63] "SQL-1.png"                                                                                                
## [64] "sql.jpg"                                                                                                  
## [65] "sqlcheatsheet.jpg"                                                                                        
## [66] "sqljoins_cheatsheet.png"                                                                                  
## [67] "Sunstone - Google Docs.pdf"                                                                               
## [68] "test"                                                                                                     
## [69] "Text Mining in R and Python_ 8 Tips To Get Started _ R-bloggers.html"                                     
## [70] "Text Mining in R and Python_ 8 Tips To Get Started _ R-bloggers_files"                                    
## [71] "Trarscript_Form.pdf"

 dir(,pattern = "csv")

## [1] "BigDiamonds (2).csv"     "BigDiamonds.csv"        
## [3] "BigDiamonds.csv (2).zip" "BigDiamonds2.csv"       
## [5] "DVD.csv"                 "GermanCredit.csv"       
## [7] "groceries.csv"           "iris2.csv"              
## [9] "iris3.csv"

 diamonds=read.csv("BigDiamonds.csv")
 head(diamonds)

##   X carat    cut color clarity table depth cert       measurements price
## 1 1  0.25 V.Good     K      I1    59  63.7  GIA 3.96 x 3.95 x 2.52    NA
## 2 2  0.23   Good     G      I1    61  58.1  GIA 4.00 x 4.05 x 2.30    NA
## 3 3  0.34   Good     J      I2    58  58.7  GIA 4.56 x 4.53 x 2.67    NA
## 4 4  0.21 V.Good     D      I1    60  60.6  GIA 3.80 x 3.82 x 2.31    NA
## 5 5  0.31 V.Good     K      I1    59  62.2  EGL 4.35 x 4.26 x 2.68    NA
## 6 6  0.20   Good     G     SI2    60  64.4  GIA 3.74 x 3.67 x 2.38    NA
##      x    y    z
## 1 3.96 3.95 2.52
## 2 4.00 4.05 2.30
## 3 4.56 4.53 2.67
## 4 3.80 3.82 2.31
## 5 4.35 4.26 2.68
## 6 3.74 3.67 2.38

 str(diamonds)

## 'data.frame':    598024 obs. of  13 variables:
##  $ X           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ carat       : num  0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
##  $ cut         : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
##  $ color       : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
##  $ clarity     : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
##  $ table       : num  59 61 58 60 59 60 63 61 57.5 65 ...
##  $ depth       : num  63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
##  $ cert        : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
##  $ measurements: Factor w/ 241453 levels "","  3.99  x   3.95  x   2.44",..: 19960 21917 48457 15701 37341 14661 14400 19642 17115 16177 ...
##  $ price       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x           : num  3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
##  $ y           : num  3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
##  $ z           : num  2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...

 diamonds$X=NULL #(dropped a variable)
 head(diamonds) #first 6 rows

##   carat    cut color clarity table depth cert       measurements price
## 1  0.25 V.Good     K      I1    59  63.7  GIA 3.96 x 3.95 x 2.52    NA
## 2  0.23   Good     G      I1    61  58.1  GIA 4.00 x 4.05 x 2.30    NA
## 3  0.34   Good     J      I2    58  58.7  GIA 4.56 x 4.53 x 2.67    NA
## 4  0.21 V.Good     D      I1    60  60.6  GIA 3.80 x 3.82 x 2.31    NA
## 5  0.31 V.Good     K      I1    59  62.2  EGL 4.35 x 4.26 x 2.68    NA
## 6  0.20   Good     G     SI2    60  64.4  GIA 3.74 x 3.67 x 2.38    NA
##      x    y    z
## 1 3.96 3.95 2.52
## 2 4.00 4.05 2.30
## 3 4.56 4.53 2.67
## 4 3.80 3.82 2.31
## 5 4.35 4.26 2.68
## 6 3.74 3.67 2.38

 #install.packages("data.table")
 library(data.table)
 diamonds2=fread("BigDiamonds.csv")

## 
Read 15.0% of 598024 rows
Read 25.1% of 598024 rows
Read 35.1% of 598024 rows
Read 45.1% of 598024 rows
Read 55.2% of 598024 rows
Read 65.2% of 598024 rows
Read 76.9% of 598024 rows
Read 88.6% of 598024 rows
Read 98.7% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:12

 str(diamonds)

## 'data.frame':    598024 obs. of  12 variables:
##  $ carat       : num  0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
##  $ cut         : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
##  $ color       : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
##  $ clarity     : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
##  $ table       : num  59 61 58 60 59 60 63 61 57.5 65 ...
##  $ depth       : num  63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
##  $ cert        : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
##  $ measurements: Factor w/ 241453 levels "","  3.99  x   3.95  x   2.44",..: 19960 21917 48457 15701 37341 14661 14400 19642 17115 16177 ...
##  $ price       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x           : num  3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
##  $ y           : num  3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
##  $ z           : num  2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...

 summary(diamonds)

##      carat           cut             color          clarity      
##  Min.   :0.200   Good  : 59680   G      :96204   SI1    :116631  
##  1st Qu.:0.500   Ideal :369448   F      :93573   VS2    :111082  
##  Median :0.900   V.Good:168896   E      :93483   SI2    :104300  
##  Mean   :1.071                   H      :86619   VS1    : 97730  
##  3rd Qu.:1.500                   D      :73630   VVS2   : 65500  
##  Max.   :9.250                   I      :70282   VVS1   : 54798  
##                                  (Other):84233   (Other): 47983  
##      table           depth               cert       
##  Min.   : 0.00   Min.   : 0.00   GIA       :463555  
##  1st Qu.:56.00   1st Qu.:61.00   IGI       : 43667  
##  Median :58.00   Median :62.10   EGL       : 33814  
##  Mean   :57.63   Mean   :61.06   EGL USA   : 16079  
##  3rd Qu.:59.00   3rd Qu.:62.70   EGL Intl. : 11447  
##  Max.   :75.90   Max.   :81.30   EGL ISRAEL: 11301  
##                                  (Other)   : 18161  
##                  measurements        price             x         
##  0.00  x  0.00  x  0.00:   425   Min.   :  300   Min.   : 0.150  
##  0.00 x 0.00 x 0.00    :   222   1st Qu.: 1220   1st Qu.: 4.740  
##  4.3 x 4.27 x 2.67     :    97   Median : 3503   Median : 5.780  
##  4.31 x 4.29 x 2.68    :    87   Mean   : 8753   Mean   : 5.991  
##  4.29 x 4.26 x 2.67    :    86   3rd Qu.:11174   3rd Qu.: 6.970  
##  4.3 x 4.28 x 2.67     :    84   Max.   :99990   Max.   :13.890  
##  (Other)               :597023   NA's   :713     NA's   :1815    
##        y                z         
##  Min.   : 1.000   Min.   : 0.040  
##  1st Qu.: 4.970   1st Qu.: 3.120  
##  Median : 6.050   Median : 3.860  
##  Mean   : 6.199   Mean   : 4.033  
##  3rd Qu.: 7.230   3rd Qu.: 4.610  
##  Max.   :13.890   Max.   :13.180  
##  NA's   :1852     NA's   :2544

 table(diamonds$cut,diamonds$color)

##         
##              D     E     F     G     H     I     J     K     L
##   Good    6604  9733  9141  8923  7600  7380  5357  3467  1475
##   Ideal  45435 55547 58148 62067 56026 43000 29440 14729  5056
##   V.Good 21591 28203 26284 25214 22993 19902 13912  7672  3125

 diamonds5=na.omit(diamonds)
 diamonds5$ppc=diamonds5$price/diamonds5$carat
 head(diamonds5)

##     carat    cut color clarity table depth cert       measurements price
## 494  0.24 V.Good     G     SI1  61.0  58.9  GIA 4.09 x 4.10 x 2.41   300
## 495  0.31 V.Good     K     SI2  59.0  60.2  GIA 4.40 x 4.42 x 2.65   300
## 496  0.26   Good     J     VS2  56.5  64.1  IGI 4.01 x 4.05 x 2.58   300
## 497  0.24  Ideal     G     SI1  55.0  61.3  GIA 4.01 x 4.03 x 2.47   300
## 498  0.30   Good     H      I1  57.0  62.2  GIA 4.21 x 4.24 x 2.63   300
## 499  0.34   Good     F      I1  66.0  55.0  GIA 4.75 x 4.61 x 2.57   300
##        x    y    z       ppc
## 494 4.09 4.10 2.41 1250.0000
## 495 4.40 4.42 2.65  967.7419
## 496 4.01 4.05 2.58 1153.8462
## 497 4.01 4.03 2.47 1250.0000
## 498 4.21 4.24 2.63 1000.0000
## 499 4.75 4.61 2.57  882.3529

 summary(diamonds5$ppc)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     525    2667    4173    5789    7437   49520

dap22.R

Dell

Sat Mar 25 15:56:59 2017