EX. 1

Create a data frame with x and y values

data <- data.frame(x = c(0.1, 0.5, 1, 1.5, 2.0, 2.5), y = c(0, 0, 1, 1, 1, 0))

Define the logistic regression model with the formula

model <- glm(y ~ x, data = data, family = binomial(link = "logit"))

EX. 2

Load the data. In order to prepare the data for PCA analysis, let’s scale the variables to have zero mean and unit variance

data(mtcars)
View(mtcars)
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
scaled_mtcars <- scale(mtcars)

PCA analysis on the scaled data

pca_mtcars <- prcomp(scaled_mtcars, center = TRUE, scale. = TRUE)

Summary of PCA analysis

summary(pca_mtcars)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.5707 1.6280 0.79196 0.51923 0.47271 0.46000 0.3678
## Proportion of Variance 0.6008 0.2409 0.05702 0.02451 0.02031 0.01924 0.0123
## Cumulative Proportion  0.6008 0.8417 0.89873 0.92324 0.94356 0.96279 0.9751
##                            PC8    PC9    PC10   PC11
## Standard deviation     0.35057 0.2776 0.22811 0.1485
## Proportion of Variance 0.01117 0.0070 0.00473 0.0020
## Cumulative Proportion  0.98626 0.9933 0.99800 1.0000

Plot PCA results

biplot(pca_mtcars, scale = 0)

EX. 3

# Generate a random 4 x 5 matrix
set.seed(123) # set seed for reproducibility
A <- matrix(rnorm(20), nrow = 4, ncol = 5)
print(A)
##             [,1]       [,2]       [,3]       [,4]       [,5]
## [1,] -0.56047565  0.1292877 -0.6868529  0.4007715  0.4978505
## [2,] -0.23017749  1.7150650 -0.4456620  0.1106827 -1.9666172
## [3,]  1.55870831  0.4609162  1.2240818 -0.5558411  0.7013559
## [4,]  0.07050839 -1.2650612  0.3598138  1.7869131 -0.4727914
# Find the SVD of the matrix A
SVD <- svd(A)
print(SVD)
## $d
## [1] 2.8293464 2.5685461 1.8185128 0.6833783
## 
## $u
##             [,1]       [,2]       [,3]      [,4]
## [1,] -0.05111079 -0.1943683  0.4165550 0.8866175
## [2,] -0.89670108  0.1370951 -0.3888386 0.1610486
## [3,]  0.39412497  0.6455621 -0.5133608 0.4054329
## [4,]  0.19488550 -0.7257241 -0.6416753 0.1536131
## 
## $v
##              [,1]       [,2]        [,3]       [,4]
## [1,]  0.305057630  0.4019612 -0.54406558  0.1591875
## [2,] -0.568821182  0.5550350 -0.02083334  0.5610044
## [3,]  0.348948041  0.2341790 -0.53455804 -0.1890518
## [4,]  0.003336294 -0.6690007 -0.40547715  0.6179494
## [5,]  0.679416249  0.1672171  0.50338314  0.4922717

Ex. 4

# Set seed for reproducibility
set.seed(123)

# Generate 100 data points for x1 and x2, uniformly distributed in [1,2]
x1 <- runif(100, min = 1, max = 2)
x2 <- runif(100, min = 1, max = 2)

# Generate 100 data points for x3 and x4, normally distributed with zero mean and unit variance
x3 <- rnorm(100, mean = 0, sd = 1)
x4 <- rnorm(100, mean = 0, sd = 1)

# Calculate y using the equation y = 5*x1 + 2*x2 + 2*x3 + x4
y <- 5*x1 + 2*x2 + 2*x3 + x4

# Print the first 100 values of x1, x2, x3, x4, and y
print(data.frame(x1 = x1[1:100], x2 = x2[1:100], x3 = x3[1:100], x4 = x4[1:100], y = y[1:100]))
##           x1       x2          x3          x4         y
## 1   1.287578 1.599989 -0.71040656  2.19881035 10.415863
## 2   1.788305 1.332824  0.25688371  1.31241298 13.433353
## 3   1.408977 1.488613 -0.24669188 -0.26514506  9.263582
## 4   1.883017 1.954474 -0.34754260  0.54319406 13.172144
## 5   1.940467 1.482902 -0.95161857 -0.41433995 10.350564
## 6   1.045556 1.890350 -0.04502772 -0.47624689  8.442181
## 7   1.528105 1.914438 -0.78490447 -0.78860284  9.110992
## 8   1.892419 1.608735 -1.66794194 -0.59461727  8.749064
## 9   1.551435 1.410690 -0.38022652  1.65090747 11.469009
## 10  1.456615 1.147095  0.91899661 -0.05402813 11.361228
## 11  1.956833 1.935300 -0.57534696  0.11924524 12.623318
## 12  1.453334 1.301229  0.60796432  0.24368743 11.328745
## 13  1.677571 1.060721 -1.61788271  1.23247588  8.506005
## 14  1.572633 1.947727 -0.05556197 -0.51606383 11.131433
## 15  1.102925 1.720596  0.51940720 -0.99250715  9.002123
## 16  1.899825 1.142294  0.30115336  1.67569693 14.061717
## 17  1.246088 1.549285  0.10567619 -0.44116322  9.099197
## 18  1.042060 1.954091 -0.64070601 -0.72306597  7.114002
## 19  1.327921 1.585483 -0.84970435 -1.23627312  6.874888
## 20  1.954504 1.404510 -1.02412879 -1.28471572  9.248566
## 21  1.889539 1.647893  0.11764660 -0.57397348 12.404803
## 22  1.692803 1.319821 -0.94747461  0.61798582  9.826695
## 23  1.640507 1.307720 -0.49055744  1.10984814 10.946707
## 24  1.994270 1.219768 -0.25609219  0.70758835 12.606288
## 25  1.655706 1.369489  1.84386201 -0.36365730 14.341573
## 26  1.708530 1.984219 -0.65194990  0.05974994 11.266941
## 27  1.544066 1.154202  0.23538657 -0.70459646  9.794911
## 28  1.594142 1.091044  0.07796085 -0.71721816  9.591502
## 29  1.289160 1.141907 -0.96185663  0.88465050  7.690550
## 30  1.147114 1.690007 -0.07130809 -1.01559258  7.957374
## 31  1.963024 1.619256  1.44455086  1.95529397 17.898030
## 32  1.902299 1.891394  0.45150405 -0.09031959 14.106972
## 33  1.690705 1.672999  0.04123292  0.21453883 12.096529
## 34  1.795467 1.737078 -0.42249683 -0.73852770 10.867971
## 35  1.024614 1.521136 -2.05324722 -0.57438869  3.484457
## 36  1.477796 1.659838  1.13133721 -1.31701613 11.654315
## 37  1.758460 1.821805 -1.46064007 -0.18292539  9.331703
## 38  1.216408 1.786282  0.73994751  0.41898240 11.553480
## 39  1.318181 1.979822  1.90910357  0.32430434 14.693060
## 40  1.231626 1.439432 -1.44389316 -0.78153649  5.367669
## 41  1.142800 1.311702  0.70178434 -0.78862197  8.952351
## 42  1.414546 1.409475 -0.26219749 -0.50219872  8.865088
## 43  1.413724 1.010467 -1.57214416  1.49606067  7.441328
## 44  1.368845 1.183850 -1.51466765 -1.13730362  5.045287
## 45  1.152445 1.842729 -1.60153617 -0.17905159  6.065558
## 46  1.138806 1.231162 -0.53090652  1.90236182  8.996903
## 47  1.233034 1.239100 -1.46175558 -0.10097489  5.618884
## 48  1.465962 1.076691  0.68791677 -1.35984070  9.499187
## 49  1.265973 1.245724  2.10010894 -0.66476944 12.356759
## 50  1.857828 1.732135 -1.28703048  0.48545998 10.664808
## 51  1.045831 1.847453  0.78773885 -0.37560287 10.123937
## 52  1.442200 1.497527  0.76904224 -0.56187636 11.182263
## 53  1.798925 1.387909  0.33220258 -0.34391723 12.090930
## 54  1.121899 1.246449 -1.00837661  0.09049665  6.176138
## 55  1.560948 1.111096 -0.11945261  1.59850877 11.386536
## 56  1.206531 1.389994 -0.28039534 -0.08856511  8.163290
## 57  1.127532 1.571935  0.56298953  1.08079950 10.988307
## 58  1.753308 1.216893 -0.37243876  0.63075412 11.086201
## 59  1.895045 1.444768  0.97697339 -0.11363990 14.205070
## 60  1.374463 1.217991 -0.37458086 -1.53290200  7.026231
## 61  1.665115 1.502300  1.05271147 -0.52111732 12.914481
## 62  1.094841 1.353905 -1.04917701 -0.48987045  5.593788
## 63  1.383970 1.649985 -1.26015524  0.04715443  7.746662
## 64  1.274384 1.374714  3.24103993  1.30019868 16.903625
## 65  1.814640 1.355445 -0.41685759  2.29307897 13.243455
## 66  1.448516 1.533688  0.29822759  1.54758106 12.453994
## 67  1.810064 1.740334  0.63656967 -0.13315096 13.670979
## 68  1.812390 1.221103 -0.48378063 -1.75652740  8.780065
## 69  1.794342 1.412746  0.51686204 -0.38877986 12.442148
## 70  1.439832 1.265687  0.36896453  0.08920722 10.557668
## 71  1.754475 1.629973 -0.21538051  0.84501300 12.446574
## 72  1.629221 1.183828  0.06529303  0.96252797 11.606877
## 73  1.710182 1.863644 -0.03406725  0.68430943 12.894375
## 74  1.000625 1.746568  2.12845190 -1.39527435 11.357889
## 75  1.475317 1.668285 -0.74133610  0.84964305 10.080123
## 76  1.220119 1.618018 -1.09599627 -0.44655722  6.698080
## 77  1.379817 1.372238  0.03778840  0.17480270  9.893938
## 78  1.612771 1.529836  0.31048075  0.07455118 11.819039
## 79  1.351798 1.874682  0.43652348  0.42816676 11.809568
## 80  1.111135 1.581750 -0.45836533  0.02467498  7.827122
## 81  1.243619 1.839768 -1.06332613 -1.66747510  6.103506
## 82  1.668056 1.312448  1.26318518  0.73649596 14.228041
## 83  1.417647 1.708290 -0.34965039  0.38602657 10.191540
## 84  1.788196 1.265018 -0.86551286 -0.26565163  9.474337
## 85  1.102865 1.594343 -0.23627957  0.11814451  8.348595
## 86  1.434893 1.481290 -0.19717589  0.13403865  9.876730
## 87  1.984957 1.265033  1.10992029  0.22101947 14.895710
## 88  1.893051 1.564590  0.08473729  1.64084617 14.404757
## 89  1.886469 1.913188  0.75405379 -0.21905038 14.547779
## 90  1.175053 1.901874 -0.49929202  0.16806538  8.848493
## 91  1.130696 1.274167  0.21444531  1.16838387  9.799086
## 92  1.653102 1.321483 -0.32468591  1.05418102 11.313284
## 93  1.343516 1.985641  0.09458353  1.14526311 12.023294
## 94  1.656758 1.619993 -0.89536336 -0.57746800  9.155583
## 95  1.320373 1.937314 -1.31080153  2.00248273  9.857374
## 96  1.187691 1.466533  1.99721338  0.06670087 12.932649
## 97  1.782294 1.406833  0.60070882  1.86685184 14.793406
## 98  1.093595 1.659230 -1.25127136 -1.35090269  4.932990
## 99  1.466779 1.152347 -0.61116592  0.02098359  8.437240
## 100 1.511505 1.572867 -1.18548008  1.24991457  9.582216
# Perform principal component analysis on the simulated data
pca <- prcomp(data.frame(x1, x2, x3, x4, y), scale. = TRUE)

# Print the summary of the PCA results
summary(pca)
## Importance of components:
##                           PC1    PC2    PC3    PC4       PC5
## Standard deviation     1.4547 1.0701 0.9667 0.8968 4.539e-16
## Proportion of Variance 0.4232 0.2290 0.1869 0.1608 0.000e+00
## Cumulative Proportion  0.4232 0.6522 0.8391 1.0000 1.000e+00

Yes, the results from the principal component analysis (PCA) on the simulated data are expected based on the formula used to generate the data.