title: '202330' author: '.' date: "2025-11-01" output: htmldocument: default pdfdocument: default ---

```{r balogluhist, echo=TRUE, fig.width=6, fig.height=4} hist(balogludata, main = "Histogram of baloglu_data", xlab = "Values", col = "lightblue", border = "white")

```

R Markdown

Step 1: Create a random seed number between 1 and 10000

balogluseed <- sample(1:10000, 1, replace = TRUE) balogluseed

Step 2: Set the random seed

set.seed(baloglu_seed)

Step 3: Choose the number of observations (between 35 and 4000)

n <- sample(35:4000, 1, replace = TRUE) n # Display the sample size

Step 4: Generate random mean (mu) and standard deviation (sd)

set.seed(baloglu_seed) mu <- rnorm(1, 2.5, 4) # Draw one mean value from N(2.5, 4) sd <- abs(rnorm(1, 2.5, 4)) # Draw one sd value and take abs() if negative

mu # Display the mean sd # Display the standard deviation

Step 5: Create your unique random dataset

set.seed(balogluseed) balogludata <- rnorm(n, mu, sd)

head(baloglu_data) # Display the first few observations

Step 6: Calculate descriptive statistics

meanval <- mean(balogludata) varval <- var(balogludata) sdval <- sd(balogludata) medianval <- median(balogludata)

Define a function to calculate the mode

get_mode <- function(x) { uniqx <- unique(x) uniqx[which.max(tabulate(match(x, uniqx)))] }

modeval <- getmode(round(baloglu_data, 2))

Print the results

meanval varval sdval medianval mode_val

Step 7: Plot a histogram of your data

hist(balogludata, main = "Histogram of balogludata", xlab = "Values", col = "lightblue", border = "white")

 #Exercise 2.2

Effect of increasing the standard deviation on the distribution

Step 1: Display current parameters

baloglu_seed # Your seed number n # Sample size mu # Mean sd # Original standard deviation

Step 2: Create a new dataset with doubled standard deviation

sdnew <- sd * 2 # Double the standard deviation set.seed(balogluseed) balogludatanew <- rnorm(n, mu, sd_new)

Step 3: Plot both histograms on the same graph

hist(balogludata, main = "Comparison of Two Distributions" , xlab = "Values", ylab = "Frequency", col = rgb(0.2, 0.4, 0.8, 0.4), # transparent blue border="white", xlim = range(c(balogludata, balogludatanew)), breaks = 30)

hist(balogludatanew, col = rgb(1, 0.2, 0.2, 0.4), # transparent red border = "white", add = TRUE, breaks = 30)

legend("topright", legend = c("Original SD", "Doubled SD"), fill = c(rgb(0.2, 0.4, 0.8, 0.4), rgb(1, 0.2, 0.2, 0.4)), border = "white")

Step 4: Explanation

When the standard deviation is doubled, the new histogram (red)

becomes wider and flatter, indicating that the data are more spread out

around the mean. The mean remains approximately the same, but variability increases.

solution 3

Step 1: Check and install necessary packages

necessary_packages <- c("pwt10","ggplot2","dplyr")

for(package in necessary_packages){ if (!require(package, character.only = TRUE)) { install.packages(package, dependencies = TRUE, repos= "https://cran.r-project.org") library(package, character.only = TRUE) } }

Step 2: Load the Penn World Table data

library(pwt10) data("pwt10.01")

Step 3: Choose two countries and a time window

library(dplyr)

country_data <- pwt10.01 %>% filter(country %in% c("Turkey", "Spain"), year >= 1987, year <= 2022) %>% select(country, year, rgdpe, pop)

Calculate GDP per capita

countrydata <- countrydata %>% mutate(gdppercapita = rgdpe / pop)

Step 4: Calculate yearly GDP per capita growth rate

countrydata <- countrydata %>% groupby(country) %>% arrange(year) %>% mutate(growthrate = (gdppercapita / lag(gdppercapita) - 1) * 100)

Step 5: Plot GDP per capita growth rate for both countries

library(ggplot2)

ggplot(countrydata, aes(x = year, y = growthrate, color = country)) + geomline(linewidth = 1.1) + geompoint(linewidth = 2) + labs(title = "GDP per Capita Growth Rate: Turkey vs Spain (1987–2022)", x = "Year", y = "Growth Rate (%)", color = "Country") + theme_minimal()

Step 6: Choose another macro variable and two new countries

Example: Labor share (labsh) for Germany and France

macro_data <- pwt10.01 %>% filter(country %in% c("Germany", "France"), year >= 1987, year <= 2022) %>% select(country, year, labsh)

ggplot(macrodata, aes(x = year, y = labsh, color = country)) + geomline(linewidth = 1.1) + geompoint(linewidth = 2) + labs(title = "Labor Share: Germany vs France (1987–2022)", x = "Year", y = "Labor Share (Share of GDP)", color = "Country") + thememinimal()

Step 7: Compare Turkey’s GDP per capita growth ranking among 10 similar countries

similar_countries <- c("Turkey", "Greece", "Portugal", "Poland", "Hungary", "Romania", "Bulgaria", "Croatia", "Serbia", "Czech Republic")

rankdata <- pwt10.01 %>% filter(country %in% similarcountries, year >= 1987, year <= 2022) %>% select(country, year, rgdpe, pop) %>% mutate(gdppercapita = rgdpe / pop) %>% groupby(country) %>% arrange(year) %>% mutate(growthrate = (gdppercapita / lag(gdppercapita) - 1) * 100) %>% ungroup()

Rank Turkey among the 10 each year

ranking <- rankdata %>% groupby(year) %>% arrange(desc(growthrate)) %>% mutate(rank = rownumber()) %>% filter(country == "Turkey")

Plot Turkey’s rank evolution

ggplot(ranking, aes(x = year, y = rank)) + geomline(color = "darkblue", size = 1.2) + geompoint(color = "orange", size = 2) + scaleyreverse(breaks = 1:10) + labs(title = "Turkey’s Rank in GDP per Capita Growth Among 10 Similar Countries", subtitle = "Data Source: Penn World Table 10.01", x = "Year", y = "Rank (1 = Highest Growth)",


title: "LVMH and Dior Financial Analysis" output: html_document ---

```{r setup, include=FALSE} necessarypackages <- c("quantmod","dplyr","ggplot2","zoo","TTR","tidyr","scales") for(pkg in necessarypackages){ if(!require(pkg, character.only = TRUE)){ install.packages(pkg, dependencies = TRUE) library(pkg, character.only = TRUE) } }

Tickers: LVMH ve Dior

startdate <- "2005-05-05" enddate <- "2025-10-01"

lvmhraw <- suppressWarnings(getSymbols("MC.PA", src = "yahoo", from = startdate, to = enddate, auto.assign = FALSE)) diorraw <- suppressWarnings(getSymbols("CDI.PA", src = "yahoo", from = startdate, to = enddate, auto.assign = FALSE))

Aylık fiyat

lvmhmonth <- to.monthly(lvmhraw, indexAt = "lastof") diormonth <- to.monthly(diorraw, indexAt = "lastof")

lvmhclose <- Cl(lvmhmonth) diorclose <- Cl(diormonth)

summstats <- function(pricexts){ vec <- as.numeric(price_xts) list( max = max(vec, na.rm = TRUE), min = min(vec, na.rm = TRUE), range = diff(range(vec, na.rm = TRUE)), mean = mean(vec, na.rm = TRUE), sd = sd(vec, na.rm = TRUE), cv = sd(vec, na.rm = TRUE)/abs(mean(vec, na.rm = TRUE)) ) }

lvmhstats <- summstats(lvmhclose) diorstats <- summstats(diorclose)

lvmhstats diorstats lvmhret <- diff(log(lvmhclose)) * 100 diorret <- diff(log(diorclose)) * 100 lvmhret <- na.omit(lvmhret) diorret <- na.omit(diorret)

dflvmh <- data.frame(date = index(lvmhret), return = coredata(lvmhret)[,1], stock = "LVMH (MC.PA)") dfdior <- data.frame(date = index(diorret), return = coredata(diorret)[,1], stock = "Dior (CDI.PA)") dfreturns <- dplyr::bindrows(dflvmh, dfdior)

ggplot(dfreturns, aes(x = date, y = return, color = stock)) + geomline(size = 0.7) + geomhline(yintercept = 0, color = "gray50", linetype = "dashed") + labs(title = "Monthly Log Returns (%) — LVMH vs Dior", x = "Year", y = "Return (%)", caption = "Prepared by: Suara Baloglu") + thememinimal() + theme(legend.position = "bottom")

Rolling SD (8-month)

lvmhrollsd <- rollapply(lvmhret, width = 8, FUN = sd, align = "right", fill = NA) diorrollsd <- rollapply(diorret, width = 8, FUN = sd, align = "right", fill = NA)

lvmhrollsdclean <- na.omit(lvmhrollsd) diorrollsdclean <- na.omit(diorrollsd)

lvmhretclean <- lvmhret[index(lvmhrollsdclean)] diorretclean <- diorret[index(diorrollsdclean)]

df_roll hazırla

dfroll <- bindrows( data.frame(date = index(lvmhrollsdclean), rollsd = as.numeric(lvmhrollsdclean), stock = "LVMH (MC.PA)"), data.frame(date = index(diorrollsdclean), rollsd = as.numeric(diorrollsdclean), stock = "Dior (CDI.PA)") )

ggplot(dfroll, aes(x = date, y = rollsd, color = stock)) + geomline(size = 0.7) + labs(title = "8-Month Rolling SD — LVMH vs Dior", x = "Year", y = "Rolling SD (%)", caption = "Prepared by: Suara Baloglu") + thememinimal() + theme(legend.position = "bottom")

lvmhcorr <- cor(lvmhretclean, lvmhrollsdclean) diorcorr <- cor(diorretclean, diorrollsdclean)

lvmhcorr diorcorr

lvmhratio <- lvmhretclean / lvmhrollsdclean diorratio <- diorretclean / diorrollsdclean

dfratio <- data.frame( date = index(lvmhratio), LVMH = coredata(lvmhratio), Dior = coredata(diorratio) ) %>% pivotlonger(cols = c("LVMH","Dior"), namesto = "Stock", values_to = "Ratio")

ggplot(dfratio, aes(x = date, y = Ratio, color = Stock)) + geomline(size = 0.7) + labs(title = "Return-to-Risk Ratio Over Time — LVMH vs Dior", x = "Year", y = "Return / SD", caption = "8-Month Rolling Standard Deviation of Stock Returns — LVMH vs Dior") + theme_minimal() + theme(legend.position = "bottom")

Install plotly if not already installed

install.packages("plotly")

library(plotly)

Function to calculate the probability that at least x people share the same birthday

p <- function(n, x, trials = 10000) { count <- 0 for (i in 1:trials) { birthdays <- sample(1:365, n, replace = TRUE) # assign birthdays randomly if (any(table(birthdays) >= x)) { # check if any birthday is shared by >= x people count <- count + 1 } } return(count / trials) # return estimated probability }

Set class sizes and x values

nvals <- 2:50 # class sizes from 2 to 50 xvals <- 2:5 # number of people sharing same birthday

Initialize probability matrix

probmatrix <- matrix(0, nrow = length(nvals), ncol = length(x_vals))

Fill probability matrix

for (i in 1:length(nvals)) { for (j in 1:length(xvals)) { if (xvals[j] <= nvals[i]) { probmatrix[i, j] <- p(nvals[i], xvals[j]) } else { probmatrix[i, j] <- NA # impossible cases } } }

Create 3D interactive surface plot

fig <- plotly( x = nvals, y = xvals, z = probmatrix, type = "surface" )

Add axis labels

fig <- fig %>% layout( scene = list( xaxis = list(title = "Class size (n)"), yaxis = list(title = "People sharing same birthday (x)"), zaxis = list(title = "Probability") ) )

Show plot

fig


title: "Coin Toss Simulation" output: html_document ---

```{r setup, include=FALSE}

Set seed for reproducibility

set.seed(123)

Parameters

trials <- 10000 # total experiments success_count <- numeric(trials) # store cumulative probability

Run simulation

for (i in 1:trials) { toss <- sample(c(0,1), 5, replace = TRUE) # 0 = tails, 1 = heads success <- ifelse(sum(toss) >= 2, 1, 0) if (i == 1) { successcount[i] <- success } else { successcount[i] <- success_count[i-1] + success } }

Calculate cumulative probability

cumprob <- successcount / (1:trials)

Plot convergence

plot(1:trials, cum_prob, type = "l", col = "blue", xlab = "Number of Experiments (N)", ylab = "Cumulative Probability", main = "Probability of >= 2 Heads (Fair Coin)") abline(h = 0.8125, col = "red", lty = 2) # theoretical probability legend("bottomright", legend=c("Simulation","Theoretical"), col=c("blue","red"), lty=c(1,2))

Probability of heads

pheads <- 0.7 successcount_unfair <- numeric(trials)

for (i in 1:trials) { toss <- rbinom(5, 1, pheads) # 5 coin tosses with biased coin success <- ifelse(sum(toss) >= 2, 1, 0) if (i == 1) { successcountunfair[i] <- success } else { successcountunfair[i] <- successcount_unfair[i-1] + success } }

cumprobunfair <- successcountunfair / (1:trials)

Plot convergence

plot(1:trials, cumprobunfair, type="l", col="green", xlab="Number of Experiments (N)", ylab="Cumulative Probability", main="Probability of >= 2 Heads (Unfair Coin, p=0.7)")


title: "Election Simulation" output: html_document ---

```{r setup, include=FALSE} set.seed(123)

Parameters

n <- 5 trials <- 10000 p <- c(A=0.4, B=0.35, C=0.25) # same probabilities for each individual

winnercount <- matrix(0, nrow=trials, ncol=3) colnames(winnercount) <- c("A","B","C")

for (i in 1:trials) { votes <- replicate(n, sample(c("A","B","C"), 1, prob=p)) tab <- table(votes) winner <- names(which.max(tab)) winner_count[i,winner] <- 1 }

Cumulative winning probabilities

cumprob <- apply(winnercount, 2, cumsum) / (1:trials)

Plot convergence

plot(1:trials, cumprob[,1], type="l", col="red", xlab="Number of Experiments (N)", ylab="Winning Probability", ylim=c(0,1), main="Winning Probability Convergence (Equal Probabilities)") lines(1:trials, cumprob[,2], col="blue") lines(1:trials, cum_prob[,3], col="green") legend("bottomright", legend=c("A","B","C"), col=c("red","blue","green"), lty=1)

Each individual has different probabilities

pmatrix <- matrix(c( 0.7,0.2,0.1, # voter 1 0.6,0.3,0.1, # voter 2 0.2,0.1,0.7, # voter 3 0.1,0.1,0.8, # voter 4 0.5,0.2,0.3 # voter 5 ), nrow=5, byrow=TRUE) colnames(pmatrix) <- c("A","B","C")

winnercount2 <- matrix(0, nrow=trials, ncol=3) colnames(winnercount2) <- c("A","B","C")

for (i in 1:trials) { votes <- sapply(1:n, function(j) sample(c("A","B","C"), 1, prob=pmatrix[j,])) tab <- table(votes) winner <- names(which.max(tab)) winnercount2[i,winner] <- 1 }

cumprob2 <- apply(winnercount2, 2, cumsum) / (1:trials)

Plot convergence

plot(1:trials, cumprob2[,1], type="l", col="red", xlab="Number of Experiments (N)", ylab="Winning Probability", ylim=c(0,1), main="Winning Probability Convergence (Polarized Society)") lines(1:trials, cumprob2[,2], col="blue") lines(1:trials, cum_prob2[,3], col="green") legend("bottomright", legend=c("A","B","C"), col=c("red","blue","green"), lty=1)


title: "Chebyshev's Inequality Simulation" output: html_document ---

```{r setup, include=FALSE} set.seed(123)

Parameters

n <- 10000 # number of samples k_values <- c(1.5,2,3) # k values for inequality

Sample from different distributions

xuniform <- runif(n, 0, 10) # uniform distribution xnormal <- rnorm(n, mean=5, sd=2) # normal distribution x_exponential <- rexp(n, rate=0.5) # exponential distribution

Function to compute Chebyshev proportion

chebyshevcheck <- function(x, k) { xbar <- mean(x) # sample mean s <- sd(x) # sample standard deviation prop <- sum(abs(x - x_bar) >= k*s)/length(x) return(prop) }

Apply for all distributions and k values

results <- data.frame(k=rep(kvalues, each=3), dist=rep(c("uniform","normal","exponential"), times=length(kvalues)), prop=NA, bound=NA)

for (i in 1:nrow(results)) { results$prop[i] <- chebyshevcheck(get(paste0("x", results$dist[i])), results$k[i]) results$bound[i] <- 1/(results$k[i]^2) }

results

Plot proportions vs Chebyshev bound

library(ggplot2) ggplot(results, aes(x=factor(k), y=prop, fill=dist)) + geombar(stat="identity", position="dodge") + geomhline(aes(yintercept=bound), linetype="dashed", color="black") + ylab("Proportion of observations |X - mean| >= k*SD") + xlab("k value") + ggtitle("Chebyshev's Inequality Check") + theme_minimal()