q()
: Exit programsave.image()
: Save the work environmentls()
: List objects in memoryexample()
: Shows example of the selected function.edit()
: Edit th object data manually, opening a spreadsheet.rank()
: return the position of a number in ascending order.sort()
: return the same number array in ascending order.order()
: return the position of a number in ascending order.search()
: Search package currently loaded.vignette()
: View long documentation about a specific packages.cat()
: Prints the arguments. Useful for producing output in user defined functions.log_series <- tibble(Sequence = seq(1:100)) %>%
mutate(log_sq = log(Sequence))
ggplot(log_series, aes(x = Sequence, y = log_sq)) + geom_line() +
theme_minimal() + ggtitle("Logarithmic Series") +
theme(plot.title = element_text(hjust = 0.5))
log_series <- tibble(Sequence = seq(1:100)) %>%
mutate(log_10 = log10(Sequence))
ggplot(log_series, aes(x = Sequence, y = log_10)) + geom_line() +
theme_minimal() + ggtitle("10 Base Logarithmic Series") +
theme(plot.title = element_text(hjust = 0.5))
log_series <- tibble(Sequence = seq(1:100)) %>%
mutate(sqrt = sqrt(Sequence))
ggplot(log_series, aes(x = Sequence, y = sqrt)) + geom_line() +
theme_minimal() + ggtitle("Square root Series") +
theme(plot.title = element_text(hjust = 0.5))
log_series <- tibble(Sequence = seq(1:50)) %>%
mutate(sin = sin(Sequence),
cos = cos (Sequence),
Tan = tan(Sequence))
ggplot(log_series ) + geom_line(aes(x = Sequence, y = sin),
colour = "#ce2e6c") +
geom_line(aes(x = Sequence, y = cos),
colour = "#52de97") +
theme_minimal() + ggtitle("Trigonometric Series") +
theme(plot.title = element_text(hjust = 0.5))
col1 <- c(2,1,0)
col2 <- c(1,3,1)
col3 <- c(1,1,2)
mat_a <- cbind(col1,col2,col3)
# transpose matrix
t(mat_a)
## [,1] [,2] [,3]
## col1 2 1 0
## col2 1 3 1
## col3 1 1 2
## col1 col2 col3
## col1 5 5 3
## col2 5 11 6
## col3 3 6 6
## [,1] [,2] [,3]
## col1 0.5555556 -0.1111111 -0.2222222
## col2 -0.2222222 0.4444444 -0.1111111
## col3 0.1111111 -0.2222222 0.5555556
The mean is the most representative value in a list of numbers(calculated with mean()
function):
\[\frac{\sum_{i=1}^n x_{i}}{n}\]
The median is the value that separates the higher half from the lower half in a list of numbers. median()
function in R applied to a list of numbers.
If the count of the total numbers is an odd number, the formula for the median is the following:
\[ \frac{n+1}{2} \] With even numbers:
\[ \frac{\frac{n}{2}+[\frac{n}{2}+1]}{2} \]
It measures how far a set of random numbers are spread out from their average value. var()
function in R.
\[ Var(X) = \frac{\sum_{i=1}^n(X_{i}-\mu_{x})^2}{n-1} \]
square root of the variance. sd()
function to perform it.
\[ \sigma_{x} = \sqrt{Var(X)} \] ### Mode
The mode is the most frequently value in a set of data.
The range is the total amplitude of a list of numbers. range()
function is appropiate to it.
\[max(x) -min(x)\]
\[\frac{\sigma}{\sqrt{n}}\]
the variation coefficient is the percentage of dispersion from the total average, it’s a way to compare variance between variables with differente scale.
\[\frac{\sigma}{\mu}\]
Covariance is the measure of the joint variability of two random variables. cov()
is the function used to calculate it.
\[ cov(X,Y) = \frac{\sum_{i=1}^n (X_{i}-\mu_{x})(Y_{i}-\mu_{y})}{n-1} \]
The correlation measures the strength of the relatioship between variables. cor()
is the R function used to calculate it.
\[ \rho(X,Y) = \frac{cov(X,Y)}{\sigma_{x}\sigma_{y}} \]
Most Known probability distributions.
set.seed(123)
random_binomial_dist <- tibble(
sequence = 1:10000,
Value = rbinom(n = 10000,
size = 1000,
prob = 0.4)
)
bin1 <- ggplot(random_binomial_dist, aes(x = Value)) +
geom_histogram(binwidth = 2, colour = "#ffffff",
fill = "#3c9d9b") +
ggtitle("Random Binomial distribution") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
## CDF, dentity and quantile distribution function
#curve(dbinom(size = 1000,x, prob = 0.5),-1,100)
#qbinom()
## Cumulative distribution function
q <- 1:10000
binomial_series <- q
for (i in q) {
binomial_series[i] <- pbinom(size = 10000, q = q[i], prob = 0.5)
}
random_binomial_dist <- cbind(random_binomial_dist, binomial_series)
bin2 <-ggplot(random_binomial_dist, aes(y = binomial_series, x = sequence)) +
geom_line(colour = "#730068") +
ggtitle("Cumulative Binomial distribution") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(bin1,bin2, nrow = 1)
random_exponential_dist <- tibble(
sequence = 1:10000,
Value = rexp(n = 10000,
rate = 2)
)
exp1 <- ggplot(random_exponential_dist, aes(x = Value)) +
geom_histogram(binwidth = 0.05, colour = "#ffffff",
fill = "#caadde") +
ggtitle("Random Exponential distribution") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
q <- seq(0,1,by = 0.0001)
exp_series <- q
for (i in q) {
exp_series[i] <- pexp( q[i], rate = 2)
}
exp_series <- exp_series[-1]
random_exponential_dist <- cbind(random_exponential_dist,
exp_series)
exp2 <-ggplot(random_exponential_dist, aes(y = exp_series, x = sequence)) +
geom_line(colour = "#730068") +
ggtitle("Cumulative Exponential distribution") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(exp1,exp2, nrow = 1)
random_normal_dist <- tibble(
sequence = 1:10000,
Value = rnorm(n = 10000,
mean = 0,
sd = 10)
)
norm1 <- ggplot(random_normal_dist, aes(x = Value)) +
geom_histogram(binwidth = 10, colour ="#ffffff",
fill = "#c70d3a") +
ggtitle("Random Normal distribution") +
theme_minimal()
q <- seq(-5,5, by = 0.001)[-1]
norm_series <- q
for (i in q) {
norm_series[i] <- pnorm(q[i])
}
random_normal_dist <- cbind(random_normal_dist,
norm_series)
norm2 <-ggplot(random_normal_dist, aes(y = norm_series, x = sequence)) +
geom_line(colour = "#730068") +
ggtitle("Cumulative Normal distribution") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
grid.arrange(norm1,norm2, nrow = 1)
random_geom_dist <- tibble(
Value = rgeom(n = 10000,
prob = 0.2)
)
ggplot(random_geom_dist, aes(x = Value)) +
geom_histogram(binwidth = 1, colour = "#ffffff",
fill = "#32dbc6") +
ggtitle("Random Geometric distribution") +
theme_minimal()
random_hyper_dist <- tibble(
Value = rhyper(100000,
100,
80,
70)
)
ggplot(random_hyper_dist, aes(x = Value)) +
geom_histogram(binwidth = 1, colour ="#ffffff",
fill = "#ed0cef") +
ggtitle("Random HyperGeometric distribution") +
theme_minimal()
random_lognormal_dist <- tibble(
Value = rlnorm(10000)
)
ggplot(random_lognormal_dist, aes(x = Value)) +
geom_histogram(binwidth = 1, colour = "#ffffff",
fill = "#4f81c7") +
ggtitle("Random Log-Normal distribution") +
theme_minimal()
random_poisson_dist <- tibble(
Value = rpois(n = 10000,
lambda = 10)
)
ggplot(random_poisson_dist, aes(x = Value)) +
geom_histogram(binwidth = 1, colour = "#ffffff",
fill = "#d6e4aa") +
ggtitle("Random Poisson distribution") +
theme_minimal()
random_chi2_dist <- tibble(
Value = rchisq(n = 10000,
df = 10)
)
ggplot(random_chi2_dist, aes(x = Value)) +
geom_histogram(binwidth = 0.7, colour ="#ffffff",
fill = "#eda593") +
ggtitle("Random Chi-Squared distribution") +
theme_minimal()
random_student_dist <- tibble(
Value = rt(n = 10000,
df = 10)
)
ggplot(random_student_dist, aes(x = Value)) +
geom_histogram(binwidth = 0.5, colour = "#ffffff",
fill = "#a0c334") +
ggtitle("Random Student distribution") +
theme_minimal()
random_uniform_dist <- tibble(
Value = runif(n = 10000,
min = 0,
max = 100)
)
ggplot(random_uniform_dist, aes(x = Value)) +
geom_histogram(binwidth = 1, colour = "#ffffff",
fill = "#979797") +
ggtitle("Random Uniform distribution") +
theme_minimal()
random_fisher_dist <- tibble(
Value = rf(n = 10000,
df1 = 10,
df2 = 400)
)
ggplot(random_fisher_dist, aes(x = Value)) +
geom_histogram(binwidth = 0.1 , colour = "#ffffff",
fill = "#1089ff") +
ggtitle("Random fisher distribution") +
theme_minimal()