# 1) CREATING YOUR OWN DATA SET
eylul_seed <- sample(1:10000, 1, replace = TRUE)
eylul_seed
## [1] 9885
set.seed(eylul_seed)
n <- sample(35:4000, 1, replace = TRUE)
n
## [1] 949
mu <- rnorm(1, 2, 4)
mu
## [1] -1.416033
sd <- abs(rnorm(1, 2, 4))
sd
## [1] 2.538567
set.seed(eylul_seed)
soyata_data <- rnorm(n, mu, sd)
soyata_data
##   [1] -0.0741898655 -4.3347522166 -0.2442388589 -0.2805825817 -2.4273621225
##   [6]  2.3574796038  2.3208110098 -2.5273018606 -4.2285782119  0.0198626013
##  [11] -2.6943971275  3.0438653202 -3.2876803724 -8.0749134074  2.1886666294
##  [16] -5.8272716029 -4.4354998413 -2.0607967888 -3.7225770076 -0.3549321005
##  [21] -5.6726512588 -2.1253043309 -3.4478053564  1.2753638520 -1.0866339353
##  [26] -1.9503734736 -3.8055708478 -3.7631072661 -0.8128291794 -5.0395740469
##  [31]  0.5016962976 -2.3332650724 -0.5607504601 -0.8358769603 -1.8971385621
##  [36]  1.0685014384 -0.7079548212  1.0964660388  0.3362959992 -1.5238592032
##  [41] -2.2125306960 -4.0094122935  0.5493495467  3.8712798020  1.7929807484
##  [46] -1.4231163585  1.1683783794 -2.3629047217 -3.9295482811 -4.4025400405
##  [51] -4.4019752710  1.9152221774 -0.3707473390  0.7417688452  0.2121626684
##  [56] -5.4114032960 -6.5289160692 -1.8515574388 -1.0042794610 -0.3894878460
##  [61] -4.0378719265 -3.4718274441 -1.6576191721 -3.1662057414 -1.0971662648
##  [66] -1.4267979469 -2.6409150437  0.4375141191  1.1526027916  1.8547219294
##  [71]  0.6866303591 -2.3598961798 -2.9514406049  1.6687554802 -0.2807945964
##  [76] -2.2105326402 -1.9039775100 -0.5220459274 -2.4469826829 -1.2750082692
##  [81]  0.7180113742 -0.1018045646 -1.9035621682 -0.5940203679 -3.2556875683
##  [86] -4.4620430636 -0.0575667682 -1.2058678440 -1.5370837605 -4.6086276895
##  [91] -2.3540981710  0.4142086848  0.2108050582 -4.6311510272  0.4615287981
##  [96] -4.5346691449 -0.6767160907  2.3021359806 -2.5637252382 -4.7907869815
## [101] -1.2134812747 -1.1650534519 -1.3135050830  1.0007860697  1.1731873183
## [106] -1.9074604270 -2.0847076745 -4.1857832123  0.5970542423 -4.9065321615
## [111] -1.6566178797  0.5912682995  1.8638084658 -2.7486793148 -0.4739845344
## [116]  0.9058886941 -0.2673322837 -2.3147550877 -0.0368328789 -2.0156570545
## [121] -0.9678960104 -3.3759035663 -1.6923763090  2.7794535671 -0.6015662979
## [126] -0.8565105559  3.6968850106 -1.6996740571  0.2926900416  1.5088937404
## [131]  0.3564721743 -4.5329555508 -2.2118352388 -2.8065912467 -1.1776009774
## [136]  0.7669137937  4.6636670097 -4.3025303071 -3.3578013617  2.2083278293
## [141] -3.3241285138 -6.2942576138 -3.3494325057 -2.5573064549  1.5914223303
## [146] -0.7653756199  0.2831164943 -1.8990563861 -0.6375282363 -1.2293798171
## [151] -2.6738909848  0.2071665900 -2.8508735745 -3.9774873183 -2.6067528353
## [156]  1.0364093872  1.1389772574  2.2824324868 -3.6727073942  4.9135085939
## [161]  0.8761086339 -3.9679958554 -3.7836747835  0.1721904438 -2.0055266451
## [166] -1.5363436644 -3.6138897585 -1.7049957844  1.5318147766 -4.0136598190
## [171]  0.0603370175  2.6932792095 -0.2513752855 -4.0395885553 -0.8471512213
## [176] -0.7552471565  2.0535910905  1.6620866364 -3.1512365621 -4.9529953319
## [181] -5.2559855960 -0.3746490322 -3.9388749682 -6.2488372495 -0.4044778390
## [186] -4.5735507616 -2.2538483804  0.8199475731 -0.8268153695 -1.3239989604
## [191]  1.9659719672 -3.5959276817  0.9252426945  1.9748873178 -3.5132639656
## [196] -4.8783346937 -0.3582310772 -3.9628538215  2.0417424099 -0.3712852387
## [201] -7.3797666854 -5.0969469150 -3.5131275082 -1.8058469578 -5.0756836647
## [206]  1.3829363709 -3.0948649386 -2.3103013645  1.4000231212  1.7928307061
## [211] -0.1530607587 -4.8980536260 -3.2175568577 -3.5617869789 -2.7243271574
## [216] -4.3469140103  0.0211051236 -1.6419362366 -0.4621572450  0.8148867487
## [221] -2.9711291600  0.8831664864 -1.3146347220 -0.8128644115  0.9755045911
## [226] -6.3734646291 -2.5540575187 -1.9526918862 -6.2468100780 -1.2845921718
## [231] -3.8264427785 -5.2678208689  0.7253510369 -4.0711471902  0.9117955540
## [236] -2.2997010492  2.2381789169  5.4444290944 -4.4033144753 -5.1334415751
## [241]  1.3185846460 -0.1488777707  0.3081272999 -2.9909227220  0.5213226734
## [246] -0.4373182062 -1.9351999774  1.1295016988 -1.0652802441 -4.2330552538
## [251] -3.4916740873 -0.7782907099  0.7704497362  1.3664918144 -3.0394507279
## [256] -0.1349699604 -1.1142805058 -1.3357774634  2.3591175475 -3.3208667484
## [261] -0.1505181212  0.7515320090 -6.0881451856 -7.5735289896 -3.2499573235
## [266]  0.4615369095 -0.9781473274 -2.4868031058 -0.2134602265  1.3674494686
## [271] -2.5387646405  1.4048441827 -3.3584152863  0.4036241248  0.7914871049
## [276] -0.8769268050 -3.6220055652 -5.3196665961  0.8177566388 -0.7553416368
## [281]  0.1006469233  0.0047385579  1.9873702739 -4.3402619862  4.1630199948
## [286] -1.7966508455  1.5526711867 -0.5180066295 -2.8674883114 -2.9185797729
## [291] -1.7167175741 -2.1106217820 -3.6345475637  1.4936577993 -0.3849715588
## [296]  3.1977578494 -3.8203572443 -4.8218137833 -1.7662431428 -5.6047680730
## [301]  1.3574031881  4.1060337768 -0.0466601744  0.3294021250 -0.5137540911
## [306] -1.5956720174 -4.2592211016  0.5436608630 -6.1416358926  2.1323558987
## [311] -3.3405949758 -2.5273619855 -4.5519016909 -3.6013659653 -2.9317271151
## [316] -3.3663223394 -4.2001478095  0.8538761763  1.2185187631 -0.4232712828
## [321]  0.0423297904 -2.5956102479 -1.3596386284 -2.5028097989 -1.8665844786
## [326]  1.1385657107 -2.4323440368 -3.5910547734 -0.1941872024 -1.5313947016
## [331]  2.0839235550 -1.2904040223  1.4984657107 -4.4319411249 -1.0283520499
## [336] -6.8648405994 -3.7204797679  1.3157636968 -1.2213654468 -5.6668659485
## [341] -4.1881593683 -0.2023726957 -1.0983001111 -1.6989147521 -2.6749479784
## [346] -0.3579584461 -4.7085609193 -0.8575716683 -6.3826599824 -0.5187833119
## [351]  1.7745242820 -2.6275640883 -0.5429058098 -3.4592494235  2.1110298214
## [356] -3.7155073851 -1.9726695904 -4.6993151971 -2.8542707725  2.4898218212
## [361] -0.0855747229  0.9944352150 -1.9452460007 -0.3315441381 -3.5678861809
## [366]  3.1026906656  1.0709871817  0.7478136938 -0.7647480311 -2.0671722125
## [371]  1.7976460071 -4.1985583926 -1.0584287134 -0.7049197961 -0.2966441487
## [376] -1.0506347738 -1.1683722685 -2.9093073636 -2.8302763955 -4.7326464198
## [381]  1.0994657310  0.5137138395 -1.4113002990 -0.7421140669 -4.1644534807
## [386] -1.0211037047 -2.4196931794 -3.7287783808 -3.4087010384 -0.2797467438
## [391] -2.4997087171 -0.9039904309 -8.7235554344 -0.9578463782 -3.4111483933
## [396] -0.5205125300  1.3458552779 -1.6993568381 -2.0160264174  3.2265180470
## [401]  0.2191688221 -1.6876578935  1.4418288081 -0.2288744021 -5.9258324813
## [406] -1.5136562390 -4.0855788648 -1.6038823639 -2.9767862696 -1.7437969693
## [411] -1.2157339265 -2.7970559319 -1.2485795890 -1.3217557473 -3.4996119974
## [416]  1.0473706706 -1.6366866892  5.9779790912 -2.3829318168  1.0680405474
## [421] -4.3192418802  0.4189276357 -3.5115073845  0.2088509132 -1.1370572738
## [426] -2.7648134580  0.3960678496 -2.7675834187 -1.3405932190 -1.7614849356
## [431] -0.5213566997 -0.3938617250  2.8779375564  1.8260377116  1.1436789396
## [436] -0.2343287697  0.9881947426 -2.4800920179 -2.9149262092 -0.1782504477
## [441]  1.2516421025 -1.4302053090  0.8849845754  0.1822539787 -2.4774692288
## [446]  0.9016113253 -5.0647550336  0.3266631401 -2.3998333904 -0.0538699576
## [451] -1.0001419896 -3.3844119664 -2.7243270643  1.4140612868  3.6326233324
## [456] -1.8684892607 -2.0214284940 -1.6337028888  2.1133343854 -3.5557102781
## [461] -2.2466401309  1.3357573251 -3.1081637618 -0.6384644963  2.8604068137
## [466]  2.2540195457 -5.8330518267 -0.7910731701 -1.1964164261 -1.9753511852
## [471] -5.1013752661 -4.1130485318 -3.2692336021 -1.1912741551  1.3930921963
## [476] -2.7161087236 -5.7434199890  3.0334557431 -0.0303136847  0.2794512041
## [481]  1.6677570991  1.1614266547 -1.1188748513 -1.3608751199  4.1501696258
## [486] -0.0932960993  2.7597910497 -2.5251835539 -5.2502361653 -2.1460837941
## [491] -1.4594166465 -3.0971121065 -2.6875459026  0.9953591312  1.4366183378
## [496] -1.6198850590 -1.6390818594 -0.1864727857  0.8646129879 -4.3892397875
## [501]  2.2764873125 -5.8060897517  0.8238985423  0.1384004173  1.8580879668
## [506]  5.5587457267 -2.5407933127 -5.8400630754 -4.9105656324 -0.3550319544
## [511]  2.7316355055 -1.4282868686 -2.4114462013 -0.4980737688 -5.9776637261
## [516]  3.5137037766  0.1522332559 -0.1337368378  0.1976968288  0.6529471732
## [521] -0.3112426346 -1.1456684321  0.7188697230 -3.7925731669 -3.4784740739
## [526] -4.3782210091 -0.8825818298 -4.0482512353 -0.5622431306 -0.0132112354
## [531]  2.4738016254  3.8366947072 -0.3103124810  0.2590705275 -2.3061740769
## [536]  0.0161296796  0.6425855164 -4.8298676622 -3.5402577628 -0.9381746977
## [541] -1.8529918781  2.2765103921 -4.8723970045 -2.8233537950 -2.2038896075
## [546] -4.0100111680 -0.3423938851 -3.1195051506 -2.6551069607 -2.6969737897
## [551] -2.6651536969  0.4055576560 -1.6203276343 -2.9688784062 -0.9461843568
## [556] -0.7904102416 -3.2141578343 -0.6873418956 -0.7918778694 -5.4835413739
## [561] -3.6606572001 -0.5040819012 -4.0062660603 -3.6101060789 -3.9177738132
## [566] -2.6723655354 -5.2050922722 -1.7790351204 -2.0962187215  2.1441548424
## [571] -5.5324302796  1.3347257357 -0.3841946449  0.9960289609 -6.0513619182
## [576]  0.6178207136 -1.5490260027 -3.9103725923 -1.0903653341  1.4161728816
## [581]  1.0468155352  2.9626959007 -3.7510025865 -2.8551999691 -0.9319421523
## [586]  3.0885433396 -2.1993934204 -1.9626346661 -2.2235410973 -1.7665198033
## [591] -5.2707416556  0.7982078065 -2.2746739204 -0.6904084295  4.8080531142
## [596] -3.0726964572  0.8913499277  0.6808077323  4.5484816106 -5.0026267386
## [601]  4.2236530955 -1.0442155359  4.3585944540 -3.5075396620  2.4516197295
## [606] -0.7652655921 -0.9159451822 -2.1454333456 -1.9039583444 -1.5477282846
## [611]  3.7068943973 -3.3344924618 -0.9897026233  2.9437628676 -0.9351173815
## [616] -0.1732697479 -6.9012827783 -2.5691841255 -6.0727754876 -0.8806094165
## [621] -0.7939847665 -3.3619615641 -1.0280627275  0.6981777498  0.0349054747
## [626] -1.2274848555 -0.7401420162  0.7866948945 -2.0530843368  2.1143759199
## [631] -1.7645167488 -3.6689032088 -4.6794705625  0.7946282129 -1.9729845944
## [636] -3.0853332137  0.1246246641 -0.4838822817 -5.2571578061  0.3099892150
## [641] -2.7128585543 -1.5165642678 -5.2073670265  1.5128996366 -4.0207239381
## [646] -2.5169147775 -2.1851057560  1.2541321368  0.9597231175 -2.6105098239
## [651]  4.2938401218  2.4062580828 -4.6159601919 -3.1771521373  0.1268226958
## [656] -0.5877500366  0.3087221324  0.8950537696  0.1097215768 -0.8578221801
## [661] -0.5192809088 -2.0531396239 -1.2598948387 -2.2826908566 -1.3956742125
## [666] -3.2201695342 -0.4382672385 -3.6863769673  1.1518912281  1.3775463137
## [671] -1.4187141417 -3.7889916050 -2.5387712381 -3.5091822465 -5.6490053534
## [676] -1.9103917383 -0.0608373778  3.0842270505  1.1341181017  1.6032234320
## [681] -0.5680986745 -6.6858357453 -0.0361419533 -0.1788190338 -3.0787245658
## [686] -2.0193187401  0.3163090473 -4.6173174774 -2.4449891621 -1.1308838630
## [691] -2.6188987135 -3.5350630007 -3.6584015046  1.9178444545  0.5534462555
## [696]  0.0324914003 -1.0619583550  0.9572644518 -6.7589582833 -2.5432559042
## [701]  2.4809004992 -0.0302462239 -0.9894981334 -0.4594888721 -3.7990178114
## [706] -2.2289002198 -7.5038301976  4.1293280634 -1.8226867126 -5.8372974995
## [711] -1.9244551161 -1.6912771464 -9.2062690936 -2.5748647364 -2.7998744197
## [716] -4.3710310048 -5.3528856026 -4.8008124462  1.0583145203  1.9553093190
## [721] -1.3413111190 -2.0069906447  2.4746990377 -0.9532276066  2.2760835437
## [726] -5.0743501309  1.5502140997  0.2912357403 -2.1096239309 -4.7903804694
## [731] -1.0050844166 -2.5067031400 -3.5222401849 -1.4484246154 -3.8636103906
## [736] -0.6601622152 -5.9554267484  2.2469853030  1.7851086501  3.3359133955
## [741] -0.0575359931 -2.6075979040 -4.7612601540 -5.5010756012 -1.1920379054
## [746] -0.8263436795 -0.3964816607 -3.5427063741  0.3294521146  3.0709150673
## [751] -5.7075747718  0.1996908121 -3.2326972717 -5.7801954535 -2.8941807561
## [756] -5.7268339089 -0.0444210654 -2.5213936476 -2.9451424725 -2.5364758567
## [761]  0.1121540709  1.3021723745 -3.5765150737  2.7514925262 -2.4993020991
## [766] -1.7289181674 -0.0339060201 -4.4375642257 -5.3116178531 -1.6835877830
## [771] -2.3142599433 -6.3054760229  4.8777025140 -0.8700751976 -1.7129691199
## [776] -2.1721200705  2.2618620869 -0.8965498125 -1.8744219410  1.5680188968
## [781] -0.3106875616 -0.4400239652  0.6772582931 -1.4903156553 -0.9305192616
## [786] -3.2545711655  1.0603281043 -6.2282474282 -2.5230948974  0.2754533456
## [791] -0.6663509980 -5.9689963968 -1.9788759233 -2.3715227621 -1.2531309017
## [796] -1.3879269547 -2.1879838753 -0.7844748480 -5.0385888853 -2.6216728929
## [801]  3.0429566657 -3.4839530006 -4.1229490896  2.2815558071  0.5687262533
## [806]  2.2444465346  0.4172607235 -0.9441754160 -1.8692499752  0.4704568607
## [811] -4.8456228122  1.2809717133 -0.0247696477 -1.6355994543 -0.8119389881
## [816]  5.8258850959 -2.5800408317 -1.6668063953 -0.4584884843 -0.7678785340
## [821] -1.7523600543 -0.1634249998  1.3833558646 -1.7380951200 -5.6400941261
## [826]  3.3746793045  1.7508628387 -2.9694629344 -1.6650931237 -5.2398832436
## [831] -3.2743022983 -3.7125258925 -6.3734033391  2.2506709440  4.0453785993
## [836]  1.0055709058  3.0100856202 -0.1583412919 -4.3930908083 -6.5735690336
## [841]  0.2166931649 -2.2290782258 -3.5248621376 -1.9115795703 -4.0209837032
## [846] -1.3672565117 -1.5478082348 -2.9869554504  0.5671241762  0.7910441560
## [851] -0.6983832726 -0.0809400422  0.3064109331  1.3986802446 -0.7142649326
## [856] -3.9508465871  1.4518693691 -1.7764214377 -1.3131121158 -5.7684336022
## [861] -0.0412095215  0.7233028394 -0.8616606832 -4.7950095381 -1.9922211661
## [866] -2.1232741554  1.6537448894 -2.9804412432 -5.4886970847 -2.3971237855
## [871]  0.8519829282  0.3766244161 -1.7066498877 -4.3410647278 -0.7868019970
## [876] -3.3898312024 -3.8996458862 -2.7708400550 -1.2485950352 -3.6770202284
## [881] -2.5861758029 -3.2264141223 -1.3723722468  0.3501116639 -0.1366650793
## [886] -3.8047079238 -0.2690135200 -3.9303994629 -4.1324795963 -0.6923134221
## [891] -2.4926276538  0.5303767897  1.3168669477  0.7538898635 -6.5590171137
## [896]  1.7294170929 -0.4196286198 -2.9016613081  1.2016815229 -3.9117097037
## [901]  2.2237266317 -0.2351335737  0.2800391042 -0.6722432199 -4.4732660997
## [906]  2.2196233792 -2.9775960869 -3.1881618712  4.6437996330 -0.8790840436
## [911]  3.8028913791 -3.4338051482 -1.3701037143  0.1327170091 -3.0386593177
## [916]  0.0001287941 -1.2445747478  0.6381993448 -2.3809717229 -4.2498826484
## [921] -1.6794248097 -1.5634401868 -1.1799962879 -2.7917884286  1.9786600573
## [926]  0.2551039518 -0.4819043415 -3.8036111393 -3.1111381270 -3.4073310012
## [931] -3.7837022965 -2.3819131740  1.0400902116 -5.3641590573  2.5572762179
## [936] -2.6039091358  0.1149864841  1.1050013952 -6.1774130355  2.2362190705
## [941] -7.6403283252 -5.5856474942 -1.2084070231  0.8138711133 -0.8267192056
## [946] -3.4065576631 -2.2157838173 -2.3634280834  0.8884479844
# 2) INTRODUCTION TO STATISTICS
## 2.1
calculate_mode <- function(v) {
  unique_values <- unique(v)
  counts <- tabulate(match(v, unique_values))
  mode_index <- which.max(counts)
  return(unique_values[mode_index])
}
data_mode <- calculate_mode(soyata_data)
data_mean <- mean(soyata_data)
data_sd <- sd(soyata_data)
data_median <- median(soyata_data)
data_variance <- var(soyata_data)
### Yes, the mean and the variance will almost certainly differ from the population parameter values that were randomly chosen by the computer.
cat("seed number (s):", eylul_seed, "\n")
## seed number (s): 9885
cat("number of observations (n):", n, "\n")
## number of observations (n): 949
cat("Mean: ", data_mean, "\n")
## Mean:  -1.357188
cat("Variance: ", data_variance, "\n")
## Variance:  6.165684
cat("Standard Deviation (s): ", data_sd, "\n")
## Standard Deviation (s):  2.483079
cat("Median: ", data_median, "\n")
## Median:  -1.313112
cat("Mode: ", data_mode, "\n")
## Mode:  -0.07418987
cat("Population Mean (mu): ", mu, "\n")
## Population Mean (mu):  -1.416033
cat("Population Standard Deviation (sd): ", sd, "\n")
## Population Standard Deviation (sd):  2.538567
cat("Population Variance (sigma^2): ", sd^2, "\n")
## Population Variance (sigma^2):  6.444324
## 2.2
necessary_packages <- c("dplyr", "ggplot2", "pwt10", "quantmod", "PerformanceAnalytics", "zoo")
for(package in necessary_packages) {
 
  if (!require(package, character.only = TRUE)) { 
    cat("Paket yükleniyor:", package, "\n")
    install.packages(package, dependencies = TRUE)
    library(package, character.only = TRUE) 
  }
 
  else { 
    library(package, character.only = TRUE)
  }
}
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: ggplot2
## Loading required package: pwt10
## Loading required package: quantmod
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: PerformanceAnalytics
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(dplyr)
library(ggplot2)
sd2x <- sd * 2 
set.seed(eylul_seed)
soyata_data_2x <- rnorm(n, mu, sd2x) 
data_original <- data.frame(value = soyata_data, group = "Original SD (σ)")
data_doubled <- data.frame(value = soyata_data_2x, group = "SD Multiplied by 2 (2σ)")
combined_data <- bind_rows(data_original, data_doubled)
STUDENT_ID <- "2022300003"
graph_title <- "Effect of Standard Deviation Change on Data Distribution"
subtitle_info <- paste("Student ID:", STUDENT_ID, " | Seed (s):", eylul_seed, " | n:", n)
xlabel <- "Data Set Values"
ylabel <- "Density"
comparison_histogram <- ggplot(combined_data, aes(x = value, fill = group)) +
  
  geom_histogram(
    aes(y = after_stat(density), alpha = ifelse(group == "SD Multiplied by 2 (2σ)", 0.3, 0.7)), 
    bins = 50,
    position = "identity" 
  ) +
  
  geom_density(aes(color = group), linewidth = 1) + 
  
  scale_fill_manual(values = c("Original SD (σ)" = "blue", "SD Multiplied by 2 (2σ)" = "red")) +
  scale_color_manual(values = c("Original SD (σ)" = "blue", "SD Multiplied by 2 (2σ)" = "red")) +
  scale_alpha_identity() + 
  
  labs(
    title = graph_title,
    subtitle = subtitle_info,
    x = xlabel,
    y = ylabel,
    fill = "Data Group",
    color = "Data Group"
  ) +
  
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5)
  )
print(comparison_histogram)

### The process of doubling the standard deviation while keeping the mean constant increases spread and decreases peak.
# 3) DOWNLOADING DATA , SUMMARIZING DATA, VISUALIZATION
## 3.1
library(pwt10)
library(dplyr)
library(ggplot2)
data("pwt10.01") 
pwt_data <- pwt10.01
countries_gdp <- c("DEU", "TUR")
gdp_comparison_data <- pwt_data %>%
  
  filter(isocode %in% countries_gdp, year >= 1990, year <= 2019) %>%
  select(country, year, rgdpe, pop) %>%
  mutate(rgdpc = rgdpe / pop * 1000) %>%
  group_by(country) %>%
  mutate(gdp_growth = (log(rgdpc) - lag(log(rgdpc))) * 100) %>%
  ungroup() %>%
  na.omit()
gdp_growth_plot <- ggplot(gdp_comparison_data, aes(x = year, y = gdp_growth, color = country)) +
  geom_line(linewidth = 1) +
  labs(
    title = "Annual Real GDP per Capita Growth Rate: Turkey vs Germany (1990-2019)",
    x = "Year",
    y = "Growth Rate (%)",
    color = "Country"
  ) +
  theme_minimal()
print(gdp_growth_plot)

countries_tfp <- c("ESP", "ITA")
tfp_comparison_data <- pwt_data %>%
  filter(isocode %in% countries_tfp, year >= 1990, year <= 2019) %>%
  select(country, year, ctfp, country) %>% 
  na.omit()
plot(
  x = tfp_comparison_data$year, 
  y = tfp_comparison_data$ctfp, 
  type = "n",
  main = "Total Factor Productivity (CTFP) Index: Spain vs Italy (1990-2019)",
  xlab = "Year",
  ylab = "CTFP Index"
)
lines(
  x = tfp_comparison_data$year[tfp_comparison_data$country == "Spain"], 
  y = tfp_comparison_data$ctfp[tfp_comparison_data$country == "Spain"],
  col = "blue",
  lwd = 2
)
lines(
  x = tfp_comparison_data$year[tfp_comparison_data$country == "Italy"], 
  y = tfp_comparison_data$ctfp[tfp_comparison_data$country == "Italy"],
  col = "red",
  lwd = 2
)
legend("bottomright", legend = c("Spain", "Italy"), col = c("blue", "red"), lty = 1, lwd = 2)

similar_countries <- c("TUR", "GRC", "POL", "HUN", "MEX", "BRA", "RUS", "PRT", "ESP", "KOR", "IDN")
ranking_data_full <- pwt_data %>%
  filter(isocode %in% similar_countries, year >= 1990, year <= 2019) %>%
  select(country, year, rgdpe, pop, isocode) %>%
  mutate(rgdpc = rgdpe / pop * 1000) %>%
  group_by(country) %>%
  mutate(gdp_growth = (log(rgdpc) - lag(log(rgdpc))) * 100) %>%
  ungroup() %>%
  na.omit()
turkey_ranking <- ranking_data_full %>%
  group_by(year) %>%
  mutate(rank = rank(-gdp_growth, ties.method = "min")) %>%
  ungroup() %>%
  filter(isocode == "TUR") 
ranking_plot <- ggplot(turkey_ranking, aes(x = year, y = rank)) +
  geom_line(linewidth = 1.2, color = "darkblue") +
  geom_point(color = "red") +
  scale_y_reverse(breaks = seq(1, length(similar_countries), by = 1)) + 
  labs(
    title = paste("Evolution of Turkey's Rank in GDP per Capita Growth (Among", length(similar_countries), "Countries)"),
    x = "Year",
    y = "Rank (1 = Fastest Growth)"
  ) +
  theme_minimal()
print(ranking_plot)

## 3.2
library(quantmod)
library(PerformanceAnalytics)
library(zoo)
stocks <- c("AAPL", "GOOG")
start_date <- "2005-05-05"
end_date <- "2025-10-01"
getSymbols(
  stocks, 
  src = "yahoo", 
  from = start_date, 
  to = end_date,
  periodicity = "monthly", 
  auto.assign = TRUE
)
## [1] "AAPL" "GOOG"
price_data <- merge(AAPL$AAPL.Close, GOOG$GOOG.Close)
names(price_data) <- stocks
plot(price_data, 
     main = paste("Monthly Closing Prices (", start_date, " - ", end_date, ")"),
     legend.loc = "topleft", 
     ylab = "Price ($)",
     col = c("blue", "red")) 
legend("topleft", legend = stocks, col = c("blue", "red"), lty = 1, cex = 0.8)

stock_stats <- function(prices) {
  mean_val <- mean(prices, na.rm = TRUE) 
  sd_val <- sd(prices, na.rm = TRUE)
  
  return(data.frame(
    Max = max(prices, na.rm = TRUE),
    Min = min(prices, na.rm = TRUE),
    Range = max(prices, na.rm = TRUE) - min(prices, na.rm = TRUE),
    Average = mean_val,
    SD = sd_val,
    CV = sd_val / mean_val
  ))
}
stats_aapl <- stock_stats(price_data$AAPL)
stats_goog <- stock_stats(price_data$GOOG)
cat("\nAAPL Price Statistics:\n")
## 
## AAPL Price Statistics:
print(stats_aapl)
##      Max      Min    Range  Average       SD       CV
## 1 254.63 1.314643 253.3154 61.06091 70.78066 1.159181
cat("\nGOOG Price Statistics:\n")
## 
## GOOG Price Statistics:
print(stats_goog)
##      Max    Min    Range  Average       SD        CV
## 1 243.55 7.1233 236.4267 54.90201 53.09748 0.9671319
### Which stock is riskier?
### The stock with the higher CV is riskier, because it has a higher standard deviation relative to its average price.
returns <- na.omit(PerformanceAnalytics::CalculateReturns(price_data, method = "log")) 
names(returns) <- paste0(stocks, "_Return")
plot(returns, 
     main = "Monthly Logarithmic Returns", 
     ylab = "Return",
     col = c("blue", "red"))
legend("bottomleft", legend = c("AAPL Return", "GOOG Return"), col = c("blue", "red"), lty = 1, cex = 0.8)

roll_sd_8 <- rollapply(returns, 
                       width = 8, 
                       FUN = sd, 
                       align = "right",
                       fill = NULL)
names(roll_sd_8) <- paste0(stocks, "_RollingSD8")
plot(returns$AAPL_Return, 
     main = "AAPL Return and Volatility",
     type = "l", 
     col = "blue",
     ylab = "Value")

lines(roll_sd_8$AAPL_RollingSD8, col = "red")
legend("topright", legend = c("Return", "Rolling SD (8-month)"), col = c("blue", "red"), lty = 1, cex = 0.8)

### Is there a change over time?
### A higher rolling SD indicates periods of high risk. 
plot(returns$GOOG_Return, 
     main = "GOOG Return and Volatility",
     type = "l", 
     col = "blue",
     ylab = "Value")

lines(roll_sd_8$GOOG_RollingSD8, col = "red")
legend("topright", legend = c("Return", "Rolling SD (8-month)"), col = c("blue", "red"), lty = 1, cex = 0.8)

returns_matched <- returns[index(roll_sd_8)]
correlation_aapl <- cor(returns_matched$AAPL_Return, roll_sd_8$AAPL_RollingSD8, use = "complete.obs")
correlation_goog <- cor(returns_matched$GOOG_Return, roll_sd_8$GOOG_RollingSD8, use = "complete.obs")
cat("\nAAPL Return and Rolling SD Correlation:", correlation_aapl, "\n")
## 
## AAPL Return and Rolling SD Correlation: -0.09567905
cat("GOOG Return and Rolling SD Correlation:", correlation_goog, "\n")
## GOOG Return and Rolling SD Correlation: -0.112434
### Is there a link?
### A positive correlation suggests that high returns often coincide with high volatility, which is common in financial markets.
return_to_risk <- returns / roll_sd_8
names(return_to_risk) <- paste0(stocks, "_RtR")
plot(return_to_risk, 
     main = "Return-to-Risk Ratio (Return/SD) Over Time", 
     ylab = "Ratio",
     col = c("blue", "red"))
legend("bottomleft", legend = c("AAPL RtR", "GOOG RtR"), col = c("blue", "red"), lty = 1, cex = 0.8)

### Which one would you suggest as an investment advisor to risk averse person?
### For a risk-averse person, the stock with the highest and most stable Return-to-Risk ratio over time is the suggested investment.
# 4. PROBABILITY CALCULATIONS
## 4.1
simulate_birthday_prob <- function(n, x, num_sims = 5000) {
  successes <- 0
  
  for (i in 1:num_sims) {
   
    birthdays <- sample(1:365, n, replace = TRUE)
    
    counts <- table(birthdays)
    
    if (max(counts) >= x) {
      successes <- successes + 1
    }
  }
  
  probability <- successes / num_sims
  return(probability)
}
n_values <- seq(10, 50, by = 5)  
x_values <- 2:5                 
prob_matrix <- matrix(NA, nrow = length(n_values), ncol = length(x_values))
colnames(prob_matrix) <- x_values
rownames(prob_matrix) <- n_values
set.seed(42)
for (i in 1:length(n_values)) {
  for (j in 1:length(x_values)) {
    prob_matrix[i, j] <- simulate_birthday_prob(n_values[i], x_values[j], num_sims = 1000)
  }
}
persp(
  x = n_values, 
  y = x_values, 
  z = prob_matrix, 
  theta = 30, 
  phi = 30, 
  expand = 0.5, 
  col = "lightblue",
  main = "Probability of at least X People Sharing a Birthday in a Class of N",
  xlab = "Class Size (N)",
  ylab = "Min Shared Birthdays (X)",
  zlab = "Probability P(N, X)",
  ticktype = "detailed"
)

## 4.2
toss_coin_experiment <- function(p_heads, max_trials = 10000) {
  
  cumulative_successes <- 0
  prob_vec <- numeric(max_trials)
  
  for (N in 1:max_trials) {
   
    toss_results <- rbinom(5, 1, p_heads)
    
    is_success <- as.integer(sum(toss_results) >= 2)
    
    cumulative_successes <- cumulative_successes + is_success
    prob_vec[N] <- cumulative_successes / N
  }
  
  analytical_prob <- 1 - dbinom(0, 5, p_heads) - dbinom(1, 5, p_heads)
  
  plot(
    1:max_trials, 
    prob_vec, 
    type = "l", 
    ylim = c(0, 1), 
    xlab = "Number of Experiments (N)", 
    ylab = "Frequency (n/N)", 
    main = paste("Convergence of P(At least 2 Heads) | p(Heads) =", p_heads),
    col = "darkgreen"
  )
  
  abline(h = analytical_prob, col = "red", lty = 2)
  legend("topright", legend = c("Simulated Frequency (n/N)", "Analytical Probability"), col = c("darkgreen", "red"), lty = c(1, 2))
  
  cat(paste("Analytical Probability (Convergence Value):", analytical_prob, "\n"))
  return(prob_vec)
}
cat("\n--- 4.2.i-iv) Fair Coin (p=0.5) ---\n")
## 
## --- 4.2.i-iv) Fair Coin (p=0.5) ---
set.seed(43)
fair_coin_results <- toss_coin_experiment(p_heads = 0.5, max_trials = 10000)

## Analytical Probability (Convergence Value): 0.8125
cat("\n--- 4.2.v) Unfair Coin (p=0.7) ---\n")
## 
## --- 4.2.v) Unfair Coin (p=0.7) ---
set.seed(43)
unfair_coin_results <- toss_coin_experiment(p_heads = 0.7, max_trials = 10000)

## Analytical Probability (Convergence Value): 0.96922
### Does it converge to a number?
### Yes, the frequency converges to a number. This convergence is guaranteed by the Law of Large Numbers.
## 4.3
simulate_elections <- function(n_individuals, probs_list, max_elections = 5000) {
  winner_counts <- c(A = 0, B = 0, C = 0)
  
  for (i in 1:max_elections) {
    all_votes <- c()
    
    if (is.vector(probs_list)) {
      all_votes <- sample(c("A", "B", "C"), n_individuals, replace = TRUE, prob = probs_list)
      
    } else {
    
      for (j in 1:n_individuals) {
        if (j <= n_individuals/2) {
          votes <- sample(c("A", "B", "C"), 1, replace = TRUE, prob = probs_list[1,])
          
        } else {
          votes <- sample(c("A", "B", "C"), 1, replace = TRUE, prob = probs_list[2,])
        }
        all_votes <- c(all_votes, votes)
      }
    }
    
    vote_counts <- table(all_votes)
    if (length(vote_counts) > 0) {
      winner <- names(vote_counts)[which.max(vote_counts)]
      winner_counts[winner] <- winner_counts[winner] + 1
    }
  }
  
  winning_frequencies <- winner_counts / max_elections
  return(winning_frequencies)
}
probs_fixed <- c(A = 0.4, B = 0.3, C = 0.3)
set.seed(44)
win_freq_n5 <- simulate_elections(n_individuals = 5, probs_list = probs_fixed)
set.seed(44)
win_freq_n100 <- simulate_elections(n_individuals = 100, probs_list = probs_fixed)
cat("\n--- 4.3.a: Fixed Probabilities ---\n")
## 
## --- 4.3.a: Fixed Probabilities ---
cat("Expected Individual Probabilities (p_A, p_B, p_C):", probs_fixed, "\n")
## Expected Individual Probabilities (p_A, p_B, p_C): 0.4 0.3 0.3
cat("Winning Frequencies (n=5):", win_freq_n5, "\n")
## Winning Frequencies (n=5): 0.5844 0.2518 0.1638
cat("Winning Frequencies (n=100):", win_freq_n100, "\n")
## Winning Frequencies (n=100): 0.826 0.0856 0.0884
probs_polarized <- matrix(c(0.6, 0.1, 0.3, 0.2, 0.2, 0.6), nrow = 2, byrow = TRUE)
avg_probs <- c(0.4, 0.15, 0.45)
set.seed(44)
win_freq_polarized <- simulate_elections(n_individuals = 100, probs_list = probs_polarized)
cat("\n--- 4.3.b: Polarized Probabilities ---\n")
## 
## --- 4.3.b: Polarized Probabilities ---
cat("Average Individual Probabilities:", avg_probs, "\n")
## Average Individual Probabilities: 0.4 0.15 0.45
cat("Winning Frequencies (n=100):", win_freq_polarized, "\n")
## Winning Frequencies (n=100): 0.3142 0 0.6858
### Is there a difference? 
### The winning frequency converges not towards the individual probability ($P_i$), but towards the highest average preference probability across the entire population; this fundamental principle holds true whether probabilities are fixed or polarized across individuals, provided the sample size is large.
### What other types of analysis can be done using this kind of simulation?
### This type of Monte Carlo simulation can be effectively used in economic and political analysis to calculate margins of error and certainty intervals in election forecasts, and to model the impacts of undecided voters or differing turnout rates on the final election outcome.
## 4.4
check_chebyshev <- function(data, k_values) {
  # 1. Compute sample mean (x_bar) and standard deviation (S)
  x_bar <- mean(data)
  S <- sd(data)
  n <- length(data)
  
  results <- data.frame(k = k_values, Empirical_Prob = NA, Chebyshev_Bound = NA, Inequality_Holds = NA)
  
  for (i in 1:length(k_values)) {
    k <- k_values[i]
    
# 2. Check whether |x_i - x_bar| >= k * S
# 3. Count how many satisfy this and divide by total n. (Empirical Probability P_k)
    count_outside <- sum(abs(data - x_bar) >= k * S)
    P_k <- count_outside / n
    
# 4. Compare it to the Chebyshevs bound (1/k^2)
    chebyshev_bound <- 1 / k^2
    
    results[i, "Empirical_Prob"] <- P_k
    results[i, "Chebyshev_Bound"] <- chebyshev_bound
    results[i, "Inequality_Holds"] <- P_k <= chebyshev_bound
  }
  return(results)
}
k_values_check <- c(1.5, 2, 3)
n_obs <- 10000
### 1. Distribution 1: Normal Distribution
set.seed(45)
normal_data <- rnorm(n = n_obs, mean = 50, sd = 10)
cat("\n--- 4.4 Chebyshev Check: Normal Distribution ---\n")
## 
## --- 4.4 Chebyshev Check: Normal Distribution ---
print(check_chebyshev(normal_data, k_values = k_values_check))
##     k Empirical_Prob Chebyshev_Bound Inequality_Holds
## 1 1.5         0.1320       0.4444444             TRUE
## 2 2.0         0.0455       0.2500000             TRUE
## 3 3.0         0.0027       0.1111111             TRUE
### 2. Distribution 2: Uniform Distribution
set.seed(45)
uniform_data <- runif(n = n_obs, min = 0, max = 100)
cat("\n--- 4.4 Chebyshev Check: Uniform Distribution ---\n")
## 
## --- 4.4 Chebyshev Check: Uniform Distribution ---
print(check_chebyshev(uniform_data, k_values = k_values_check))
##     k Empirical_Prob Chebyshev_Bound Inequality_Holds
## 1 1.5         0.1349       0.4444444             TRUE
## 2 2.0         0.0000       0.2500000             TRUE
## 3 3.0         0.0000       0.1111111             TRUE
# 3. Distribution 3: Exponential Distribution
set.seed(45)
exponential_data <- rexp(n = n_obs, rate = 0.5)
cat("\n--- 4.4 Chebyshev Check: Exponential Distribution ---\n")
## 
## --- 4.4 Chebyshev Check: Exponential Distribution ---
print(check_chebyshev(exponential_data, k_values = k_values_check))
##     k Empirical_Prob Chebyshev_Bound Inequality_Holds
## 1 1.5         0.0836       0.4444444             TRUE
## 2 2.0         0.0519       0.2500000             TRUE
## 3 3.0         0.0194       0.1111111             TRUE