pfunction=function(x){x**3+3*x}
pfunction(10)
## [1] 1030
pfunction(20)
## [1] 8060
pfunction
## function(x){x**3+3*x}
kmeans
## function (x, centers, iter.max = 10L, nstart = 1L, algorithm = c("Hartigan-Wong",
## "Lloyd", "Forgy", "MacQueen"), trace = FALSE)
## {
## .Mimax <- .Machine$integer.max
## do_one <- function(nmeth) {
## switch(nmeth, {
## isteps.Qtran <- as.integer(min(.Mimax, 50 * m))
## iTran <- c(isteps.Qtran, integer(max(0, k - 1)))
## Z <- .Fortran(C_kmns, x, m, p, centers = centers,
## as.integer(k), c1 = integer(m), c2 = integer(m),
## nc = integer(k), double(k), double(k), ncp = integer(k),
## D = double(m), iTran = iTran, live = integer(k),
## iter = iter.max, wss = double(k), ifault = as.integer(trace))
## switch(Z$ifault, stop("empty cluster: try a better set of initial centers",
## call. = FALSE), Z$iter <- max(Z$iter, iter.max +
## 1L), stop("number of cluster centres must lie between 1 and nrow(x)",
## call. = FALSE), warning(gettextf("Quick-TRANSfer stage steps exceeded maximum (= %d)",
## isteps.Qtran), call. = FALSE))
## }, {
## Z <- .C(C_kmeans_Lloyd, x, m, p, centers = centers,
## k, c1 = integer(m), iter = iter.max, nc = integer(k),
## wss = double(k))
## }, {
## Z <- .C(C_kmeans_MacQueen, x, m, p, centers = as.double(centers),
## k, c1 = integer(m), iter = iter.max, nc = integer(k),
## wss = double(k))
## })
## if (m23 <- any(nmeth == c(2L, 3L))) {
## if (any(Z$nc == 0))
## warning("empty cluster: try a better set of initial centers",
## call. = FALSE)
## }
## if (Z$iter > iter.max) {
## warning(sprintf(ngettext(iter.max, "did not converge in %d iteration",
## "did not converge in %d iterations"), iter.max),
## call. = FALSE, domain = NA)
## if (m23)
## Z$ifault <- 2L
## }
## if (nmeth %in% c(2L, 3L)) {
## if (any(Z$nc == 0))
## warning("empty cluster: try a better set of initial centers",
## call. = FALSE)
## }
## Z
## }
## x <- as.matrix(x)
## m <- as.integer(nrow(x))
## if (is.na(m))
## stop("invalid nrow(x)")
## p <- as.integer(ncol(x))
## if (is.na(p))
## stop("invalid ncol(x)")
## if (missing(centers))
## stop("'centers' must be a number or a matrix")
## nmeth <- switch(match.arg(algorithm), `Hartigan-Wong` = 1L,
## Lloyd = 2L, Forgy = 2L, MacQueen = 3L)
## storage.mode(x) <- "double"
## if (length(centers) == 1L) {
## k <- centers
## if (nstart == 1L)
## centers <- x[sample.int(m, k), , drop = FALSE]
## if (nstart >= 2L || any(duplicated(centers))) {
## cn <- unique(x)
## mm <- nrow(cn)
## if (mm < k)
## stop("more cluster centers than distinct data points.")
## centers <- cn[sample.int(mm, k), , drop = FALSE]
## }
## }
## else {
## centers <- as.matrix(centers)
## if (any(duplicated(centers)))
## stop("initial centers are not distinct")
## cn <- NULL
## k <- nrow(centers)
## if (m < k)
## stop("more cluster centers than data points")
## }
## k <- as.integer(k)
## if (is.na(k))
## stop("'invalid value of 'k'")
## if (k == 1L)
## nmeth <- 3L
## iter.max <- as.integer(iter.max)
## if (is.na(iter.max) || iter.max < 1L)
## stop("'iter.max' must be positive")
## if (ncol(x) != ncol(centers))
## stop("must have same number of columns in 'x' and 'centers'")
## storage.mode(centers) <- "double"
## Z <- do_one(nmeth)
## best <- sum(Z$wss)
## if (nstart >= 2L && !is.null(cn))
## for (i in 2:nstart) {
## centers <- cn[sample.int(mm, k), , drop = FALSE]
## ZZ <- do_one(nmeth)
## if ((z <- sum(ZZ$wss)) < best) {
## Z <- ZZ
## best <- z
## }
## }
## centers <- matrix(Z$centers, k)
## dimnames(centers) <- list(1L:k, dimnames(x)[[2L]])
## cluster <- Z$c1
## if (!is.null(rn <- rownames(x)))
## names(cluster) <- rn
## totss <- sum(scale(x, scale = FALSE)^2)
## structure(list(cluster = cluster, centers = centers, totss = totss,
## withinss = Z$wss, tot.withinss = best, betweenss = totss -
## best, size = Z$nc, iter = Z$iter, ifault = Z$ifault),
## class = "kmeans")
## }
## <bytecode: 0x0000000007439420>
## <environment: namespace:stats>
data(iris, package="datasets")
data(mtcars)
head(mtcars,10)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
mtcars$model=row.names(mtcars)
class(mtcars)
## [1] "data.frame"
class(mtcars$mpg)
## [1] "numeric"
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb model
## Min. :0.0000 Min. :3.000 Min. :1.000 Length:32
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000 Class :character
## Median :0.0000 Median :4.000 Median :2.000 Mode :character
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
mtcars$cyl=as.numeric(mtcars$cyl)
summary(mtcars$cyl)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.000 4.000 6.000 6.188 8.000 8.000
table(mtcars$cyl)
##
## 4 6 8
## 11 7 14
data(mtcars)
table(mtcars$cyl)
##
## 4 6 8
## 11 7 14
table(mtcars$gear)
##
## 3 4 5
## 15 12 5
table(mtcars$am)
##
## 0 1
## 19 13
table(mtcars$carb)
##
## 1 2 3 4 6 8
## 7 10 3 10 1 1
table(mtcars$mpg)
##
## 10.4 13.3 14.3 14.7 15 15.2 15.5 15.8 16.4 17.3 17.8 18.1 18.7 19.2 19.7
## 2 1 1 1 1 2 1 1 1 1 1 1 1 2 1
## 21 21.4 21.5 22.8 24.4 26 27.3 30.4 32.4 33.9
## 2 2 1 2 1 1 1 2 1 1
attach(mtcars)
boxplot(mpg)

boxplot(mpg~am)

boxplot(mpg~cyl)
table(cyl,am)
## am
## cyl 0 1
## 4 3 8
## 6 4 3
## 8 12 2
library(sqldf)
## Warning: package 'sqldf' was built under R version 3.3.3
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.3.3
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.3.3
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.3.3
a1=sqldf("select * from mtcars limit 10")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
a1
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
a2=sqldf("select avg(mpg),cyl from mtcars group by cyl")
a2
## avg(mpg) cyl
## 1 26.66364 4
## 2 19.74286 6
## 3 15.10000 8
a3=sqldf("select avg(mpg),gear from mtcars group by gear")
a3
## avg(mpg) gear
## 1 16.10667 3
## 2 24.53333 4
## 3 21.38000 5
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'mtcars':
##
## mpg
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units

summary(mtcars$mpg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.40 15.42 19.20 20.09 22.80 33.90
boxplot(mtcars$mpg)

describe(mtcars$mpg)
## mtcars$mpg
## n missing distinct Info Mean Gmd .05 .10
## 32 0 25 0.999 20.09 6.796 12.00 14.34
## .25 .50 .75 .90 .95
## 15.43 19.20 22.80 30.09 31.30
##
## lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9
summarize(mtcars$mpg,mtcars$cyl,mean)
## mtcars$cyl mtcars$mpg
## 1 4 26.66364
## 2 6 19.74286
## 3 8 15.10000
summarize(mtcars$mpg,llist(mtcars$cyl,mtcars$gear),mean)
## mtcars$cyl mtcars$gear mtcars$mpg
## 1 4 3 21.500
## 2 4 4 26.925
## 3 4 5 28.200
## 4 6 3 19.750
## 5 6 4 19.750
## 6 6 5 19.700
## 7 8 3 15.050
## 8 8 5 15.400
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
attach(iris)
boxplot(Sepal.Length~Species)

iris$PetalLength=iris$Petal.Length
sqldf("select PetalLength,Species from iris group by Species")
## PetalLength Species
## 1 1.4 setosa
## 2 4.1 versicolor
## 3 5.1 virginica