introduzione a R

R

“To some people R is just the 18th letter of the alphabet. […] R is also the name of a popular programming language used by a growing number of data analysts inside corporations and academia. It is becoming their lingua franca partly because data mining has entered a golden age, whether being used to set ad prices, find new drugs more quickly or fine-tune financial models. Companies as diverse as Google, Pfizer, Merck, Bank of America, the InterContinental Hotels Group and Shell use it.”
[The New York Times]

Se scrivo 3+4 ottengo come risultato 7. [NB nel primo riquadro mostro il comando, nel secondo il risultato di quel comando]

3 + 4
## [1] 7

Il risultato lo posso assegnare a un oggetto.

a <- 3 + 4
b <- 11
c <- a + b
print(c)
## [1] 18

sequenze, vettori

a <- 50:4
print(a)
##  [1] 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28
## [24] 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5
## [47]  4
b <- seq(from = 0.4, to = 18, length.out = 20)
print(b)
##  [1]  0.400  1.326  2.253  3.179  4.105  5.032  5.958  6.884  7.811  8.737
## [11]  9.663 10.589 11.516 12.442 13.368 14.295 15.221 16.147 17.074 18.000

matrici

riga1 <- c(1, 3:10)
riga2 <- c(2:5, 6, 11:8)
riga3 <- riga1/2
matrice <- rbind(riga1, riga2, riga3)
print(matrice)
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## riga1  1.0  3.0    4  5.0    6  7.0    8  9.0   10
## riga2  2.0  3.0    4  5.0    6 11.0   10  9.0    8
## riga3  0.5  1.5    2  2.5    3  3.5    4  4.5    5
summary(matrice)
##        V1             V2             V3             V4             V5     
##  Min.   :0.50   Min.   :1.50   Min.   :2.00   Min.   :2.50   Min.   :3.0  
##  1st Qu.:0.75   1st Qu.:2.25   1st Qu.:3.00   1st Qu.:3.75   1st Qu.:4.5  
##  Median :1.00   Median :3.00   Median :4.00   Median :5.00   Median :6.0  
##  Mean   :1.17   Mean   :2.50   Mean   :3.33   Mean   :4.17   Mean   :5.0  
##  3rd Qu.:1.50   3rd Qu.:3.00   3rd Qu.:4.00   3rd Qu.:5.00   3rd Qu.:6.0  
##  Max.   :2.00   Max.   :3.00   Max.   :4.00   Max.   :5.00   Max.   :6.0  
##        V6              V7              V8             V9       
##  Min.   : 3.50   Min.   : 4.00   Min.   :4.50   Min.   : 5.00  
##  1st Qu.: 5.25   1st Qu.: 6.00   1st Qu.:6.75   1st Qu.: 6.50  
##  Median : 7.00   Median : 8.00   Median :9.00   Median : 8.00  
##  Mean   : 7.17   Mean   : 7.33   Mean   :7.50   Mean   : 7.67  
##  3rd Qu.: 9.00   3rd Qu.: 9.00   3rd Qu.:9.00   3rd Qu.: 9.00  
##  Max.   :11.00   Max.   :10.00   Max.   :9.00   Max.   :10.00

grafici

Un grafico spartano.

plot(riga1, type = "h")
lines(riga2, col = "red")
points(riga3, col = "green")

plot of chunk unnamed-chunk-7

Introduciamo alcuni controlli in più e la legenda

# Create Line Chart

# convert factor to numeric for convenience
Orange$Tree <- as.numeric(Orange$Tree)
ntrees <- max(Orange$Tree)

# get the range for the x and y axis
xrange <- range(Orange$age)
yrange <- range(Orange$circumference)

# set up the plot
plot(xrange, yrange, type = "n", xlab = "Age (days)", ylab = "Circumference (mm)")
colors <- rainbow(ntrees)
linetype <- c(1:ntrees)
plotchar <- seq(18, 18 + ntrees, 1)

# add lines
for (i in 1:ntrees) {
    tree <- subset(Orange, Tree == i)
    lines(tree$age, tree$circumference, type = "b", lwd = 1.5, lty = linetype[i], 
        col = colors[i], pch = plotchar[i])
}

# add a title and subtitle
title("Tree Growth", "example of line plot")

# add a legend
legend(xrange[1], yrange[2], 1:ntrees, cex = 0.8, col = colors, pch = plotchar, 
    lty = linetype, title = "Tree")

plot of chunk unnamed-chunk-8

accesso a dati remoti

url <- "http://www.sr.bham.ac.uk/~ajrs"
file <- "R/datasets/a85_extended_NEDsearch.txt"
A <- read.table(paste(url, file, sep = "/"), sep = "|", skip = 20, header = TRUE)
close(url(paste(url, file, sep = "/")))  # close connection after use
dim(A)  # Show dimensions of data frame: rows columns
## [1] 5077   16

grafico a barre

colnames(A)[c(2, 3, 4, 5)] <- c("name", "ra", "dec", "type")
table(A$type)  # table summary
## 
##      *      G GClstr  GPair    QSO RadioS   VisS  XrayS 
##     24   4976      3      2      2     54      2     14
barplot(sort(table(A$type), decreasing = TRUE), log = "y")

plot of chunk unnamed-chunk-10

boxplot

plot(Redshift ~ type, data = A, log = "y")
abline(h = 0.055, col = "red")  # Show redshift of the cluster Abell 85

plot of chunk unnamed-chunk-11