PROBABILITY DISTRIBUTION

hasilnya ada dua, ya/tidak, sukses/gagal bisa aja pake permisalan 10 saham, trs mau cari 2 saham terbaik

dbinom(x=2, size=10, prob=0.3)
## [1] 0.2334744

klo ambil berkali-kali:

mean(rbinom(n=10000,size=10,prob=0.3)==2)
## [1] 0.2304

artinya : probabilitasnya memang segitu, mau seberapapun pengambilannya. kaya sampel dari berbagai daerah, maka kita pakai yg ambil berkali-kali. bisa ngga sampelnya cmn di satu daerah : bisa, tp gabisa di generalisasikan ke daerah yg lain. kalau mau generalisasikan, kita lihat dari iklim, ph air, dll.

kalau meneliti berdasarkan pengalaman, maka harus survei - berdasarkan survei (bukan sebuah penelitian). Maka seperti kualitatif research. Penelitian yg bagus adlh kombinasi kualitatif dan kuantitatif research.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
data.frame(heads = 0:10, prob = dbinom(x = 0:10, size = 10, prob = 0.3)) %>%
  mutate(Heads = ifelse(heads == 2, "2", "other")) %>%
  ggplot(aes(x = factor(heads), y = prob, fill = Heads)) +
  theme_minimal()+
  geom_col() +
  geom_text(
    aes(label = round(prob,2), y = prob + 0.01),
    position = position_dodge(0.9),
    size = 3,
    vjust = 0) +
  labs(title = "Probability of X = 2 successes",
       subtitle = "b(10, .3)",
       x = "Successes (x)",
       y = "probability") 

kasus 2:

dbinom(0, size=10, prob=0.3) + 
  dbinom(1, size=10, prob=0.3) + 
  dbinom(2, size=10, prob=0.3) + 
  dbinom(3, size=10, prob=0.3) + 
  dbinom(4, size=10, prob=0.3) +
  dbinom(5, size=10, prob=0.3)                # manual calculation for binomial distribution
## [1] 0.952651
pbinom(5, size=10, prob=0.3)                  # alternative way for binomial distribution
## [1] 0.952651
pbinom(q=5,size=10,p=0.3,lower.tail=TRUE)     # alternative way for binomial distribution
## [1] 0.952651
mean(rbinom(n=10000,size=10,prob=0.3)<= 5)    # simulated
## [1] 0.9495
library(dplyr)
library(ggplot2)
data.frame(heads = 0:10, 
           pmf = dbinom(x = 0:10, size = 10, prob = 0.3),
           cdf = pbinom(q = 0:10, size = 10, prob = 0.3, 
                        lower.tail = TRUE)) %>%
  mutate(Heads = ifelse(heads <= 5, "<=5", "other")) %>%
  ggplot(aes(x = factor(heads), y = cdf, fill = Heads)) +
  geom_col() +
  theme_minimal()+
  geom_text(
    aes(label = round(cdf,2), y = cdf + 0.01),
    position = position_dodge(0.9),
    size = 3,
    vjust = 0) +
  labs(title = "Probability of X <= 5 successes",
       subtitle = "b(10, .3)",
       x = "Successes (x)",
       y = "probability") 

kasus 3:

library(dplyr)
library(ggplot2)
data.frame(heads = 0:10, 
           pmf = dbinom(x = 0:10, size = 10, prob = 0.3),
           cdf = pbinom(q = -1:9, size = 10, prob = 0.3, 
                        lower.tail = FALSE)) %>%
  mutate(Heads = ifelse(heads >= 5, ">=5", "other")) %>%
  ggplot(aes(x = factor(heads), y = cdf, fill = Heads)) +
  geom_col() +
  theme_minimal()+
  geom_text(
    aes(label = round(cdf,2), y = cdf + 0.01),
    position = position_dodge(0.9),
    size = 3,
    vjust = 0) +
  labs(title = "Probability of X >= 5 successes",
       subtitle = "b(10, .3)",
       x = "Successes (x)",
       y = "probability") 

kasus 4:

25 * 0.3                                      # exact expected number of heads in 25 coin flips
## [1] 7.5
mean(rbinom(n = 10000, size = 25, prob = .3)) # exact expected number of heads in 25 coin flips
## [1] 7.4993
25 * 0.3 * (1 - 0.3)                          # variance
## [1] 5.25
var(rbinom(n = 10000, size = 25, prob = .3))  # variance
## [1] 5.163081
library(dplyr)
library(ggplot2)
data.frame(heads = 0:25, 
           pmf = dbinom(x = 0:25, size = 25, prob = 0.3)) %>%
  mutate(Heads = ifelse(heads == 7, "7", "other"))%>%
  ggplot(aes(x = factor(heads), y = pmf, fill = Heads)) +
  geom_col() +
  theme_minimal()+
  geom_text(
    aes(label = round(pmf,2), y = pmf + 0.01),
    position = position_dodge(0.9),
    size = 3,
    vjust = 0) +
  labs(title = "Probability of X = x successes.",
       subtitle = "b(25, .3)",
       x = "Successes (x)",
       y = "probability") 

kasus 5:

POISSON DISTRIBUTION

kasus 5: jika kita tau rata” sales nya 3 per minggu nyari tau probabiliti 2 sampe 4

library(ggplot2)
library(dplyr)
# Using cumulative probability
ppois(q = 4, lambda = 3, lower.tail = TRUE) - 
  ppois(q = 2, lambda = 3, lower.tail = TRUE)
## [1] 0.3920732

kasus 6: dia memukul bola 500 kali, berapa probabilitasnya

library(ggplot2)
library(dplyr)
ppois(q=150,lambda=.300*500,lower.tail=TRUE)  # probability of x <= 150
## [1] 0.5216972
dpois(x=150,lambda=.300*500)                  # probability of x = 150
## [1] 0.03255541
ppois(q=150,lambda=.300*500,lower.tail=FALSE) # probability of x > 150
## [1] 0.4783028

CONTINUOUS UNIFORM DISTRIBUTION

sebaran datanya mirip/ ga beda jauh.

rand.unif <- runif(100, min=-3, max=5)        # ten random numbers between minus one and five
hist(rand.unif, col = "cornflowerblue",               # plot the results as a histogram
     freq = FALSE, 
     xlab = 'x', 
     density = 20)

a <- -3
b <- 5
hist(rand.unif, 
     freq = FALSE,
     col = "azure4",
     xlab = 'x',  
     ylim = c(0, 0.2),
     xlim = c(-4,6),
     density = 20,
     main = "Uniform distribution for the interval [-3,5]")
curve(dunif(x, min = a, max = b), 
      from = -4, to = 6, 
      n = 100000, 
      col = "green", 
      lwd = 2, 
      add = TRUE)

klo data uniform ga perlu pake hipotesis.

NORMAL DISTRIBUTION

bentuknya lonceng

mydata <- rnorm(n=10000, mean=100, sd=5)
mean(mydata)
## [1] 100.053

KESIMPULAN yang akan banyak digunakan yaitu normal distribusi, yg lainnya hanya perlu dipahamkan. chi square : kumpulan dari x1,x2,…,xn t dist : ketika kita mau nganalisa satu normal dist trs dibandingin sm chi square f dist : dipake pas mau bandingin sebaran data chi sama t

kerjakan setiap latihan yang ada (bab1) kerjakan di rpubs langsung soalnya saja