title: '202330' author: '.' date: "2025-11-01" output: htmldocument: default pdfdocument: default ---
```{r balogluhist, echo=TRUE, fig.width=6, fig.height=4} hist(balogludata, main = "Histogram of baloglu_data", xlab = "Values", col = "lightblue", border = "white")
```
balogluseed <- sample(1:10000, 1, replace = TRUE) balogluseed
set.seed(baloglu_seed)
n <- sample(35:4000, 1, replace = TRUE) n # Display the sample size
set.seed(baloglu_seed) mu <- rnorm(1, 2.5, 4) # Draw one mean value from N(2.5, 4) sd <- abs(rnorm(1, 2.5, 4)) # Draw one sd value and take abs() if negative
mu # Display the mean sd # Display the standard deviation
set.seed(balogluseed) balogludata <- rnorm(n, mu, sd)
head(baloglu_data) # Display the first few observations
meanval <- mean(balogludata) varval <- var(balogludata) sdval <- sd(balogludata) medianval <- median(balogludata)
get_mode <- function(x) { uniqx <- unique(x) uniqx[which.max(tabulate(match(x, uniqx)))] }
modeval <- getmode(round(baloglu_data, 2))
meanval varval sdval medianval mode_val
hist(balogludata, main = "Histogram of balogludata", xlab = "Values", col = "lightblue", border = "white")
#Exercise 2.2
baloglu_seed # Your seed number n # Sample size mu # Mean sd # Original standard deviation
sdnew <- sd * 2 # Double the standard deviation set.seed(balogluseed) balogludatanew <- rnorm(n, mu, sd_new)
hist(balogludata, main = "Comparison of Two Distributions" , xlab = "Values", ylab = "Frequency", col = rgb(0.2, 0.4, 0.8, 0.4), # transparent blue border="white", xlim = range(c(balogludata, balogludatanew)), breaks = 30)
hist(balogludatanew, col = rgb(1, 0.2, 0.2, 0.4), # transparent red border = "white", add = TRUE, breaks = 30)
legend("topright", legend = c("Original SD", "Doubled SD"), fill = c(rgb(0.2, 0.4, 0.8, 0.4), rgb(1, 0.2, 0.2, 0.4)), border = "white")
necessary_packages <- c("pwt10","ggplot2","dplyr")
for(package in necessary_packages){ if (!require(package, character.only = TRUE)) { install.packages(package, dependencies = TRUE, repos= "https://cran.r-project.org") library(package, character.only = TRUE) } }
library(pwt10) data("pwt10.01")
library(dplyr)
country_data <- pwt10.01 %>% filter(country %in% c("Turkey", "Spain"), year >= 1987, year <= 2022) %>% select(country, year, rgdpe, pop)
countrydata <- countrydata %>% mutate(gdppercapita = rgdpe / pop)
countrydata <- countrydata %>% groupby(country) %>% arrange(year) %>% mutate(growthrate = (gdppercapita / lag(gdppercapita) - 1) * 100)
library(ggplot2)
ggplot(countrydata, aes(x = year, y = growthrate, color = country)) + geomline(linewidth = 1.1) + geompoint(linewidth = 2) + labs(title = "GDP per Capita Growth Rate: Turkey vs Spain (1987–2022)", x = "Year", y = "Growth Rate (%)", color = "Country") + theme_minimal()
macro_data <- pwt10.01 %>% filter(country %in% c("Germany", "France"), year >= 1987, year <= 2022) %>% select(country, year, labsh)
ggplot(macrodata, aes(x = year, y = labsh, color = country)) + geomline(linewidth = 1.1) + geompoint(linewidth = 2) + labs(title = "Labor Share: Germany vs France (1987–2022)", x = "Year", y = "Labor Share (Share of GDP)", color = "Country") + thememinimal()
similar_countries <- c("Turkey", "Greece", "Portugal", "Poland", "Hungary", "Romania", "Bulgaria", "Croatia", "Serbia", "Czech Republic")
rankdata <- pwt10.01 %>% filter(country %in% similarcountries, year >= 1987, year <= 2022) %>% select(country, year, rgdpe, pop) %>% mutate(gdppercapita = rgdpe / pop) %>% groupby(country) %>% arrange(year) %>% mutate(growthrate = (gdppercapita / lag(gdppercapita) - 1) * 100) %>% ungroup()
ranking <- rankdata %>% groupby(year) %>% arrange(desc(growthrate)) %>% mutate(rank = rownumber()) %>% filter(country == "Turkey")
ggplot(ranking, aes(x = year, y = rank)) + geomline(color = "darkblue", size = 1.2) + geompoint(color = "orange", size = 2) + scaleyreverse(breaks = 1:10) + labs(title = "Turkey’s Rank in GDP per Capita Growth Among 10 Similar Countries", subtitle = "Data Source: Penn World Table 10.01", x = "Year", y = "Rank (1 = Highest Growth)",
title: "LVMH and Dior Financial Analysis" output: html_document ---
```{r setup, include=FALSE} necessarypackages <- c("quantmod","dplyr","ggplot2","zoo","TTR","tidyr","scales") for(pkg in necessarypackages){ if(!require(pkg, character.only = TRUE)){ install.packages(pkg, dependencies = TRUE) library(pkg, character.only = TRUE) } }
startdate <- "2005-05-05" enddate <- "2025-10-01"
lvmhraw <- suppressWarnings(getSymbols("MC.PA", src = "yahoo", from = startdate, to = enddate, auto.assign = FALSE)) diorraw <- suppressWarnings(getSymbols("CDI.PA", src = "yahoo", from = startdate, to = enddate, auto.assign = FALSE))
lvmhmonth <- to.monthly(lvmhraw, indexAt = "lastof") diormonth <- to.monthly(diorraw, indexAt = "lastof")
lvmhclose <- Cl(lvmhmonth) diorclose <- Cl(diormonth)
summstats <- function(pricexts){ vec <- as.numeric(price_xts) list( max = max(vec, na.rm = TRUE), min = min(vec, na.rm = TRUE), range = diff(range(vec, na.rm = TRUE)), mean = mean(vec, na.rm = TRUE), sd = sd(vec, na.rm = TRUE), cv = sd(vec, na.rm = TRUE)/abs(mean(vec, na.rm = TRUE)) ) }
lvmhstats <- summstats(lvmhclose) diorstats <- summstats(diorclose)
lvmhstats diorstats lvmhret <- diff(log(lvmhclose)) * 100 diorret <- diff(log(diorclose)) * 100 lvmhret <- na.omit(lvmhret) diorret <- na.omit(diorret)
dflvmh <- data.frame(date = index(lvmhret), return = coredata(lvmhret)[,1], stock = "LVMH (MC.PA)") dfdior <- data.frame(date = index(diorret), return = coredata(diorret)[,1], stock = "Dior (CDI.PA)") dfreturns <- dplyr::bindrows(dflvmh, dfdior)
ggplot(dfreturns, aes(x = date, y = return, color = stock)) + geomline(size = 0.7) + geomhline(yintercept = 0, color = "gray50", linetype = "dashed") + labs(title = "Monthly Log Returns (%) — LVMH vs Dior", x = "Year", y = "Return (%)", caption = "Prepared by: Suara Baloglu") + thememinimal() + theme(legend.position = "bottom")
lvmhrollsd <- rollapply(lvmhret, width = 8, FUN = sd, align = "right", fill = NA) diorrollsd <- rollapply(diorret, width = 8, FUN = sd, align = "right", fill = NA)
lvmhrollsdclean <- na.omit(lvmhrollsd) diorrollsdclean <- na.omit(diorrollsd)
lvmhretclean <- lvmhret[index(lvmhrollsdclean)] diorretclean <- diorret[index(diorrollsdclean)]
dfroll <- bindrows( data.frame(date = index(lvmhrollsdclean), rollsd = as.numeric(lvmhrollsdclean), stock = "LVMH (MC.PA)"), data.frame(date = index(diorrollsdclean), rollsd = as.numeric(diorrollsdclean), stock = "Dior (CDI.PA)") )
ggplot(dfroll, aes(x = date, y = rollsd, color = stock)) + geomline(size = 0.7) + labs(title = "8-Month Rolling SD — LVMH vs Dior", x = "Year", y = "Rolling SD (%)", caption = "Prepared by: Suara Baloglu") + thememinimal() + theme(legend.position = "bottom")
lvmhcorr <- cor(lvmhretclean, lvmhrollsdclean) diorcorr <- cor(diorretclean, diorrollsdclean)
lvmhcorr diorcorr
lvmhratio <- lvmhretclean / lvmhrollsdclean diorratio <- diorretclean / diorrollsdclean
dfratio <- data.frame( date = index(lvmhratio), LVMH = coredata(lvmhratio), Dior = coredata(diorratio) ) %>% pivotlonger(cols = c("LVMH","Dior"), namesto = "Stock", values_to = "Ratio")
ggplot(dfratio, aes(x = date, y = Ratio, color = Stock)) + geomline(size = 0.7) + labs(title = "Return-to-Risk Ratio Over Time — LVMH vs Dior", x = "Year", y = "Return / SD", caption = "8-Month Rolling Standard Deviation of Stock Returns — LVMH vs Dior") + theme_minimal() + theme(legend.position = "bottom")
library(plotly)
p <- function(n, x, trials = 10000) { count <- 0 for (i in 1:trials) { birthdays <- sample(1:365, n, replace = TRUE) # assign birthdays randomly if (any(table(birthdays) >= x)) { # check if any birthday is shared by >= x people count <- count + 1 } } return(count / trials) # return estimated probability }
nvals <- 2:50 # class sizes from 2 to 50 xvals <- 2:5 # number of people sharing same birthday
probmatrix <- matrix(0, nrow = length(nvals), ncol = length(x_vals))
for (i in 1:length(nvals)) { for (j in 1:length(xvals)) { if (xvals[j] <= nvals[i]) { probmatrix[i, j] <- p(nvals[i], xvals[j]) } else { probmatrix[i, j] <- NA # impossible cases } } }
fig <- plotly( x = nvals, y = xvals, z = probmatrix, type = "surface" )
fig <- fig %>% layout( scene = list( xaxis = list(title = "Class size (n)"), yaxis = list(title = "People sharing same birthday (x)"), zaxis = list(title = "Probability") ) )
fig
title: "Coin Toss Simulation" output: html_document ---
```{r setup, include=FALSE}
set.seed(123)
trials <- 10000 # total experiments success_count <- numeric(trials) # store cumulative probability
for (i in 1:trials) { toss <- sample(c(0,1), 5, replace = TRUE) # 0 = tails, 1 = heads success <- ifelse(sum(toss) >= 2, 1, 0) if (i == 1) { successcount[i] <- success } else { successcount[i] <- success_count[i-1] + success } }
cumprob <- successcount / (1:trials)
plot(1:trials, cum_prob, type = "l", col = "blue", xlab = "Number of Experiments (N)", ylab = "Cumulative Probability", main = "Probability of >= 2 Heads (Fair Coin)") abline(h = 0.8125, col = "red", lty = 2) # theoretical probability legend("bottomright", legend=c("Simulation","Theoretical"), col=c("blue","red"), lty=c(1,2))
pheads <- 0.7 successcount_unfair <- numeric(trials)
for (i in 1:trials) { toss <- rbinom(5, 1, pheads) # 5 coin tosses with biased coin success <- ifelse(sum(toss) >= 2, 1, 0) if (i == 1) { successcountunfair[i] <- success } else { successcountunfair[i] <- successcount_unfair[i-1] + success } }
cumprobunfair <- successcountunfair / (1:trials)
plot(1:trials, cumprobunfair, type="l", col="green", xlab="Number of Experiments (N)", ylab="Cumulative Probability", main="Probability of >= 2 Heads (Unfair Coin, p=0.7)")
title: "Election Simulation" output: html_document ---
```{r setup, include=FALSE} set.seed(123)
n <- 5 trials <- 10000 p <- c(A=0.4, B=0.35, C=0.25) # same probabilities for each individual
winnercount <- matrix(0, nrow=trials, ncol=3) colnames(winnercount) <- c("A","B","C")
for (i in 1:trials) { votes <- replicate(n, sample(c("A","B","C"), 1, prob=p)) tab <- table(votes) winner <- names(which.max(tab)) winner_count[i,winner] <- 1 }
cumprob <- apply(winnercount, 2, cumsum) / (1:trials)
plot(1:trials, cumprob[,1], type="l", col="red", xlab="Number of Experiments (N)", ylab="Winning Probability", ylim=c(0,1), main="Winning Probability Convergence (Equal Probabilities)") lines(1:trials, cumprob[,2], col="blue") lines(1:trials, cum_prob[,3], col="green") legend("bottomright", legend=c("A","B","C"), col=c("red","blue","green"), lty=1)
pmatrix <- matrix(c( 0.7,0.2,0.1, # voter 1 0.6,0.3,0.1, # voter 2 0.2,0.1,0.7, # voter 3 0.1,0.1,0.8, # voter 4 0.5,0.2,0.3 # voter 5 ), nrow=5, byrow=TRUE) colnames(pmatrix) <- c("A","B","C")
winnercount2 <- matrix(0, nrow=trials, ncol=3) colnames(winnercount2) <- c("A","B","C")
for (i in 1:trials) { votes <- sapply(1:n, function(j) sample(c("A","B","C"), 1, prob=pmatrix[j,])) tab <- table(votes) winner <- names(which.max(tab)) winnercount2[i,winner] <- 1 }
cumprob2 <- apply(winnercount2, 2, cumsum) / (1:trials)
plot(1:trials, cumprob2[,1], type="l", col="red", xlab="Number of Experiments (N)", ylab="Winning Probability", ylim=c(0,1), main="Winning Probability Convergence (Polarized Society)") lines(1:trials, cumprob2[,2], col="blue") lines(1:trials, cum_prob2[,3], col="green") legend("bottomright", legend=c("A","B","C"), col=c("red","blue","green"), lty=1)
title: "Chebyshev's Inequality Simulation" output: html_document ---
```{r setup, include=FALSE} set.seed(123)
n <- 10000 # number of samples k_values <- c(1.5,2,3) # k values for inequality
xuniform <- runif(n, 0, 10) # uniform distribution xnormal <- rnorm(n, mean=5, sd=2) # normal distribution x_exponential <- rexp(n, rate=0.5) # exponential distribution
chebyshevcheck <- function(x, k) { xbar <- mean(x) # sample mean s <- sd(x) # sample standard deviation prop <- sum(abs(x - x_bar) >= k*s)/length(x) return(prop) }
results <- data.frame(k=rep(kvalues, each=3), dist=rep(c("uniform","normal","exponential"), times=length(kvalues)), prop=NA, bound=NA)
for (i in 1:nrow(results)) { results$prop[i] <- chebyshevcheck(get(paste0("x", results$dist[i])), results$k[i]) results$bound[i] <- 1/(results$k[i]^2) }
results
library(ggplot2) ggplot(results, aes(x=factor(k), y=prop, fill=dist)) + geombar(stat="identity", position="dodge") + geomhline(aes(yintercept=bound), linetype="dashed", color="black") + ylab("Proportion of observations |X - mean| >= k*SD") + xlab("k value") + ggtitle("Chebyshev's Inequality Check") + theme_minimal()