23%/%3 #Numerator
## [1] 7
23%%3 #Remainder
## [1] 2
log(20)
## [1] 2.995732
exp(3)
## [1] 20.08554
pi
## [1] 3.141593
ls()
## character(0)
rm(df4)
## Warning in rm(df4): object 'df4' not found
ls()
## character(0)
memory.limit()
## [1] 8096
memory.size()
## [1] 30.92
rm(list=ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 364609 19.5 592000 31.7 460000 24.6
## Vcells 554124 4.3 1023718 7.9 865841 6.7
ajay=rnorm(100,5,4)
ajay
## [1] 7.97831982 8.73351901 6.35486362 3.48622823 7.33964473
## [6] 7.46588916 4.93476827 5.59195231 6.58478724 4.11061756
## [11] 8.95905388 10.13553609 0.31398416 8.35218864 6.05780353
## [16] 2.98587615 0.38028755 3.52000967 1.74655741 12.84393134
## [21] 6.88188131 2.39541653 3.12256473 -4.72895030 2.76463823
## [26] 6.94905001 7.51479591 9.82915386 7.21735632 -0.22684182
## [31] -1.51135703 5.84603356 9.59961122 9.39143203 4.69156280
## [36] 2.75666575 3.32686797 10.51650054 9.54072796 3.25834653
## [41] 2.16381246 7.24954237 9.23868210 7.36384357 -0.12851419
## [46] -4.97217531 7.70074822 -1.03606390 -0.88556803 2.37607916
## [51] 0.61407862 4.95359648 2.21753811 4.51628850 1.72295860
## [56] 7.01420028 6.09329191 2.74894689 8.32787863 4.90246809
## [61] 5.14501540 2.30755002 11.89715225 3.28142428 0.27585514
## [66] 3.51953534 2.57318265 -1.95243691 6.43684527 5.04221738
## [71] 4.43817254 -2.16480655 -2.94557226 6.30719684 5.58755266
## [76] 5.17709606 6.12269303 7.71872310 7.43588537 -4.30490849
## [81] 4.47340078 4.37390768 5.48750515 2.22941375 5.32100861
## [86] 4.87644501 8.70227040 7.22366451 7.85923406 0.11552376
## [91] 5.90232071 5.69271386 7.14506196 10.80115895 2.69489595
## [96] 0.60079530 0.04542529 7.17584157 1.76121144 2.17007918
mean(ajay)
## [1] 4.557432
sd(ajay)
## [1] 3.704212
median(ajay)
## [1] 4.944182
ajay=c(10,70,65,35,30,40,55,52)
vijay=c(1,2,3,4,5)
ajay*vijay
## Warning in ajay * vijay: longer object length is not a multiple of shorter
## object length
## [1] 10 140 195 140 150 40 110 156
class(vijay)
## [1] "numeric"
mean(ajay)
## [1] 44.625
sort(ajay)
## [1] 10 30 35 40 52 55 65 70
for (number in 1:15){print (number)}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
for (i in 1:15){print (i*2)}
## [1] 2
## [1] 4
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20
## [1] 22
## [1] 24
## [1] 26
## [1] 28
## [1] 30
for (i in 1:15){print (i**2)}
## [1] 1
## [1] 4
## [1] 9
## [1] 16
## [1] 25
## [1] 36
## [1] 49
## [1] 64
## [1] 81
## [1] 100
## [1] 121
## [1] 144
## [1] 169
## [1] 196
## [1] 225
namefunction=function(x){x**3+43*x+25}
namefunction(10)
## [1] 1455
namefunction(100)
## [1] 1004325
namefunction2=function(x)(x**3+143*x+25)
namefunction2(10)
## [1] 2455
namefunction3=function(x,y)(x**3+143*x*y+25)
namefunction3(10,5)
## [1] 8175
namefunction3
## function(x,y)(x**3+143*x*y+25)
kmeans
## function (x, centers, iter.max = 10L, nstart = 1L, algorithm = c("Hartigan-Wong",
## "Lloyd", "Forgy", "MacQueen"), trace = FALSE)
## {
## .Mimax <- .Machine$integer.max
## do_one <- function(nmeth) {
## switch(nmeth, {
## isteps.Qtran <- as.integer(min(.Mimax, 50 * m))
## iTran <- c(isteps.Qtran, integer(max(0, k - 1)))
## Z <- .Fortran(C_kmns, x, m, p, centers = centers,
## as.integer(k), c1 = integer(m), c2 = integer(m),
## nc = integer(k), double(k), double(k), ncp = integer(k),
## D = double(m), iTran = iTran, live = integer(k),
## iter = iter.max, wss = double(k), ifault = as.integer(trace))
## switch(Z$ifault, stop("empty cluster: try a better set of initial centers",
## call. = FALSE), Z$iter <- max(Z$iter, iter.max +
## 1L), stop("number of cluster centres must lie between 1 and nrow(x)",
## call. = FALSE), warning(gettextf("Quick-TRANSfer stage steps exceeded maximum (= %d)",
## isteps.Qtran), call. = FALSE))
## }, {
## Z <- .C(C_kmeans_Lloyd, x, m, p, centers = centers,
## k, c1 = integer(m), iter = iter.max, nc = integer(k),
## wss = double(k))
## }, {
## Z <- .C(C_kmeans_MacQueen, x, m, p, centers = as.double(centers),
## k, c1 = integer(m), iter = iter.max, nc = integer(k),
## wss = double(k))
## })
## if (m23 <- any(nmeth == c(2L, 3L))) {
## if (any(Z$nc == 0))
## warning("empty cluster: try a better set of initial centers",
## call. = FALSE)
## }
## if (Z$iter > iter.max) {
## warning(sprintf(ngettext(iter.max, "did not converge in %d iteration",
## "did not converge in %d iterations"), iter.max),
## call. = FALSE, domain = NA)
## if (m23)
## Z$ifault <- 2L
## }
## if (nmeth %in% c(2L, 3L)) {
## if (any(Z$nc == 0))
## warning("empty cluster: try a better set of initial centers",
## call. = FALSE)
## }
## Z
## }
## x <- as.matrix(x)
## m <- as.integer(nrow(x))
## if (is.na(m))
## stop("invalid nrow(x)")
## p <- as.integer(ncol(x))
## if (is.na(p))
## stop("invalid ncol(x)")
## if (missing(centers))
## stop("'centers' must be a number or a matrix")
## nmeth <- switch(match.arg(algorithm), `Hartigan-Wong` = 1L,
## Lloyd = 2L, Forgy = 2L, MacQueen = 3L)
## storage.mode(x) <- "double"
## if (length(centers) == 1L) {
## k <- centers
## if (nstart == 1L)
## centers <- x[sample.int(m, k), , drop = FALSE]
## if (nstart >= 2L || any(duplicated(centers))) {
## cn <- unique(x)
## mm <- nrow(cn)
## if (mm < k)
## stop("more cluster centers than distinct data points.")
## centers <- cn[sample.int(mm, k), , drop = FALSE]
## }
## }
## else {
## centers <- as.matrix(centers)
## if (any(duplicated(centers)))
## stop("initial centers are not distinct")
## cn <- NULL
## k <- nrow(centers)
## if (m < k)
## stop("more cluster centers than data points")
## }
## k <- as.integer(k)
## if (is.na(k))
## stop("'invalid value of 'k'")
## if (k == 1L)
## nmeth <- 3L
## iter.max <- as.integer(iter.max)
## if (is.na(iter.max) || iter.max < 1L)
## stop("'iter.max' must be positive")
## if (ncol(x) != ncol(centers))
## stop("must have same number of columns in 'x' and 'centers'")
## storage.mode(centers) <- "double"
## Z <- do_one(nmeth)
## best <- sum(Z$wss)
## if (nstart >= 2L && !is.null(cn))
## for (i in 2:nstart) {
## centers <- cn[sample.int(mm, k), , drop = FALSE]
## ZZ <- do_one(nmeth)
## if ((z <- sum(ZZ$wss)) < best) {
## Z <- ZZ
## best <- z
## }
## }
## centers <- matrix(Z$centers, k)
## dimnames(centers) <- list(1L:k, dimnames(x)[[2L]])
## cluster <- Z$c1
## if (!is.null(rn <- rownames(x)))
## names(cluster) <- rn
## totss <- sum(scale(x, scale = FALSE)^2)
## structure(list(cluster = cluster, centers = centers, totss = totss,
## withinss = Z$wss, tot.withinss = best, betweenss = totss -
## best, size = Z$nc, iter = Z$iter, ifault = Z$ifault),
## class = "kmeans")
## }
## <bytecode: 0x000000000a74ac30>
## <environment: namespace:stats>
getwd()
## [1] "C:/Users/Dell/Documents"
setwd("C:/Users/Dell/Desktop")
dir()
## [1] "16508797_10155115909410362_414170078812994931_n.jpg"
## [2] "27032014_Duplicate_Statement.pdf"
## [3] "30072015_form_du-degree.pdf"
## [4] "3e8d73f.jpg"
## [5] "41ZMN2X1gsL._SX330_BO1,204,203,200_.jpg"
## [6] "a7110dd7e981c0f970736cc5f52f9b717fde51e2.png"
## [7] "ACK.html"
## [8] "ACK_files"
## [9] "adult.data.txt"
## [10] "AJAY.xps"
## [11] "An introduction to text analysis with Python, Part 1 _ Neal Caren.html"
## [12] "An introduction to text analysis with Python, Part 1 _ Neal Caren_files"
## [13] "Basics of SQL & RDBMS _ Must Skills For Data Science Professionals.html"
## [14] "Basics of SQL & RDBMS _ Must Skills For Data Science Professionals_files"
## [15] "BigDiamonds (2).csv"
## [16] "BigDiamonds.csv"
## [17] "BigDiamonds.csv (2).zip"
## [18] "BigDiamonds2.csv"
## [19] "BLOOD REPORT.pdf"
## [20] "Book1.xlsx"
## [21] "CAM- Ajay Ohri.pdf"
## [22] "cam.xps"
## [23] "cam2.pdf"
## [24] "cdo.jpeg"
## [25] "CHAP 1-6 Python for R Users_ An approach for Data Science - Google Docs.pdf"
## [26] "clustersas.html"
## [27] "Complete guide to create a Time Series Forecast (with Codes in Python).html"
## [28] "Complete guide to create a Time Series Forecast (with Codes in Python)_files"
## [29] "dap class 4.R"
## [30] "dap_class_4.html"
## [31] "desktop.ini"
## [32] "Dive Into NLTK, Part I_ Getting Started with NLTK Text Mining Online.html"
## [33] "Dive Into NLTK, Part I_ Getting Started with NLTK Text Mining Online_files"
## [34] "Dropbox.lnk"
## [35] "dupform.pdf"
## [36] "DVD.csv"
## [37] "GermanCredit.csv"
## [38] "Git Shell.lnk"
## [39] "GitHub.appref-ms"
## [40] "GoToMeeting.lnk"
## [41] "groceries.csv"
## [42] "Guidelines-CBSE.html"
## [43] "IMS proschool"
## [44] "iris2.csv"
## [45] "iris3.csv"
## [46] "Lal Pathlabs Report.pdf"
## [47] "logistic regression - script for ppt.R"
## [48] "OnlineCardNSR.pdf"
## [49] "PaymentForm.pdf"
## [50] "Program 1-results.rtf"
## [51] "Rdatasets"
## [52] "Results_ Modeling and Forecasting.html"
## [53] "Results_ Program 5.sas.html"
## [54] "Results_ Time Series Exploration.ctk.html"
## [55] "Rplot.png"
## [56] "Rplot01.pdf"
## [57] "Rplot02.pdf"
## [58] "Rplot03.png"
## [59] "rsconnect"
## [60] "sas-university-edition-107140.pdf"
## [61] "seanabu.github.io_Seasonal_ARIMA_model_Portland_transit.ipynb at master · seanabu_seanabu.github.io.html"
## [62] "seanabu.github.io_Seasonal_ARIMA_model_Portland_transit.ipynb at master · seanabu_seanabu.github.io_files"
## [63] "SQL-1.png"
## [64] "sql.jpg"
## [65] "sqlcheatsheet.jpg"
## [66] "sqljoins_cheatsheet.png"
## [67] "Sunstone - Google Docs.pdf"
## [68] "test"
## [69] "Text Mining in R and Python_ 8 Tips To Get Started _ R-bloggers.html"
## [70] "Text Mining in R and Python_ 8 Tips To Get Started _ R-bloggers_files"
## [71] "Trarscript_Form.pdf"
dir(,pattern = "csv")
## [1] "BigDiamonds (2).csv" "BigDiamonds.csv"
## [3] "BigDiamonds.csv (2).zip" "BigDiamonds2.csv"
## [5] "DVD.csv" "GermanCredit.csv"
## [7] "groceries.csv" "iris2.csv"
## [9] "iris3.csv"
diamonds=read.csv("BigDiamonds.csv")
head(diamonds)
## X carat cut color clarity table depth cert measurements price
## 1 1 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NA
## 2 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NA
## 3 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NA
## 4 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NA
## 5 5 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NA
## 6 6 0.20 Good G SI2 60 64.4 GIA 3.74 x 3.67 x 2.38 NA
## x y z
## 1 3.96 3.95 2.52
## 2 4.00 4.05 2.30
## 3 4.56 4.53 2.67
## 4 3.80 3.82 2.31
## 5 4.35 4.26 2.68
## 6 3.74 3.67 2.38
str(diamonds)
## 'data.frame': 598024 obs. of 13 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ carat : num 0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
## $ cut : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
## $ color : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
## $ clarity : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
## $ table : num 59 61 58 60 59 60 63 61 57.5 65 ...
## $ depth : num 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
## $ cert : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
## $ measurements: Factor w/ 241453 levels ""," 3.99 x 3.95 x 2.44",..: 19960 21917 48457 15701 37341 14661 14400 19642 17115 16177 ...
## $ price : int NA NA NA NA NA NA NA NA NA NA ...
## $ x : num 3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
## $ y : num 3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
## $ z : num 2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
diamonds$X=NULL #(dropped a variable)
head(diamonds) #first 6 rows
## carat cut color clarity table depth cert measurements price
## 1 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NA
## 2 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NA
## 3 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NA
## 4 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NA
## 5 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NA
## 6 0.20 Good G SI2 60 64.4 GIA 3.74 x 3.67 x 2.38 NA
## x y z
## 1 3.96 3.95 2.52
## 2 4.00 4.05 2.30
## 3 4.56 4.53 2.67
## 4 3.80 3.82 2.31
## 5 4.35 4.26 2.68
## 6 3.74 3.67 2.38
#install.packages("data.table")
library(data.table)
diamonds2=fread("BigDiamonds.csv")
##
Read 15.0% of 598024 rows
Read 25.1% of 598024 rows
Read 35.1% of 598024 rows
Read 45.1% of 598024 rows
Read 55.2% of 598024 rows
Read 65.2% of 598024 rows
Read 76.9% of 598024 rows
Read 88.6% of 598024 rows
Read 98.7% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:12
str(diamonds)
## 'data.frame': 598024 obs. of 12 variables:
## $ carat : num 0.25 0.23 0.34 0.21 0.31 0.2 0.2 0.22 0.23 0.2 ...
## $ cut : Factor w/ 3 levels "Good","Ideal",..: 3 1 1 3 3 1 1 3 3 1 ...
## $ color : Factor w/ 9 levels "D","E","F","G",..: 8 4 7 1 8 4 4 1 8 3 ...
## $ clarity : Factor w/ 9 levels "I1","I2","IF",..: 1 1 2 1 1 5 5 1 5 4 ...
## $ table : num 59 61 58 60 59 60 63 61 57.5 65 ...
## $ depth : num 63.7 58.1 58.7 60.6 62.2 64.4 62.6 59.2 63.6 54.9 ...
## $ cert : Factor w/ 9 levels "AGS","EGL","EGL Intl.",..: 6 6 6 6 2 6 6 6 8 6 ...
## $ measurements: Factor w/ 241453 levels ""," 3.99 x 3.95 x 2.44",..: 19960 21917 48457 15701 37341 14661 14400 19642 17115 16177 ...
## $ price : int NA NA NA NA NA NA NA NA NA NA ...
## $ x : num 3.96 4 4.56 3.8 4.35 3.74 3.72 3.95 3.87 3.83 ...
## $ y : num 3.95 4.05 4.53 3.82 4.26 3.67 3.65 3.97 3.9 4 ...
## $ z : num 2.52 2.3 2.67 2.31 2.68 2.38 2.31 2.34 2.47 2.14 ...
summary(diamonds)
## carat cut color clarity
## Min. :0.200 Good : 59680 G :96204 SI1 :116631
## 1st Qu.:0.500 Ideal :369448 F :93573 VS2 :111082
## Median :0.900 V.Good:168896 E :93483 SI2 :104300
## Mean :1.071 H :86619 VS1 : 97730
## 3rd Qu.:1.500 D :73630 VVS2 : 65500
## Max. :9.250 I :70282 VVS1 : 54798
## (Other):84233 (Other): 47983
## table depth cert
## Min. : 0.00 Min. : 0.00 GIA :463555
## 1st Qu.:56.00 1st Qu.:61.00 IGI : 43667
## Median :58.00 Median :62.10 EGL : 33814
## Mean :57.63 Mean :61.06 EGL USA : 16079
## 3rd Qu.:59.00 3rd Qu.:62.70 EGL Intl. : 11447
## Max. :75.90 Max. :81.30 EGL ISRAEL: 11301
## (Other) : 18161
## measurements price x
## 0.00 x 0.00 x 0.00: 425 Min. : 300 Min. : 0.150
## 0.00 x 0.00 x 0.00 : 222 1st Qu.: 1220 1st Qu.: 4.740
## 4.3 x 4.27 x 2.67 : 97 Median : 3503 Median : 5.780
## 4.31 x 4.29 x 2.68 : 87 Mean : 8753 Mean : 5.991
## 4.29 x 4.26 x 2.67 : 86 3rd Qu.:11174 3rd Qu.: 6.970
## 4.3 x 4.28 x 2.67 : 84 Max. :99990 Max. :13.890
## (Other) :597023 NA's :713 NA's :1815
## y z
## Min. : 1.000 Min. : 0.040
## 1st Qu.: 4.970 1st Qu.: 3.120
## Median : 6.050 Median : 3.860
## Mean : 6.199 Mean : 4.033
## 3rd Qu.: 7.230 3rd Qu.: 4.610
## Max. :13.890 Max. :13.180
## NA's :1852 NA's :2544
table(diamonds$cut,diamonds$color)
##
## D E F G H I J K L
## Good 6604 9733 9141 8923 7600 7380 5357 3467 1475
## Ideal 45435 55547 58148 62067 56026 43000 29440 14729 5056
## V.Good 21591 28203 26284 25214 22993 19902 13912 7672 3125
diamonds5=na.omit(diamonds)
diamonds5$ppc=diamonds5$price/diamonds5$carat
head(diamonds5)
## carat cut color clarity table depth cert measurements price
## 494 0.24 V.Good G SI1 61.0 58.9 GIA 4.09 x 4.10 x 2.41 300
## 495 0.31 V.Good K SI2 59.0 60.2 GIA 4.40 x 4.42 x 2.65 300
## 496 0.26 Good J VS2 56.5 64.1 IGI 4.01 x 4.05 x 2.58 300
## 497 0.24 Ideal G SI1 55.0 61.3 GIA 4.01 x 4.03 x 2.47 300
## 498 0.30 Good H I1 57.0 62.2 GIA 4.21 x 4.24 x 2.63 300
## 499 0.34 Good F I1 66.0 55.0 GIA 4.75 x 4.61 x 2.57 300
## x y z ppc
## 494 4.09 4.10 2.41 1250.0000
## 495 4.40 4.42 2.65 967.7419
## 496 4.01 4.05 2.58 1153.8462
## 497 4.01 4.03 2.47 1250.0000
## 498 4.21 4.24 2.63 1000.0000
## 499 4.75 4.61 2.57 882.3529
summary(diamonds5$ppc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 525 2667 4173 5789 7437 49520