This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
# Load the Cars dataset first
cars <- read.csv(file = "C:/Users/Kadeem Green/Downloads/cars.txt",
stringsAsFactors = FALSE)
head(cars) # Display the first few records
# Plot the dataset (ensure the data is appropriate for a generic plot)
plot(cars)
# Create a subset of "cars": use records 1 to 5 and columns 1, 3, 4, and 8.
cars_tiny <- cars[1:5, c(1, 3, 4, 8)]
cars_tiny
# --- Replace missing values with constants ---
# Set missing values in cells [2,2] and [4,4]
cars_tiny[2, 2] <- NA
cars_tiny[4, 4] <- NA
# Replace missing values with constant values: 0 and "Missing"
cars_tiny[2, 2] <- 0
cars_tiny[4, 4] <- "Missing"
cars_tiny
# --- Replace missing values using field mean or mode ---
# Recreate the missing value table
cars_tiny[2, 2] <- NA
cars_tiny[4, 4] <- NA
# Replace cars_tiny[2,2] with the mean of "cubicinches" (ignoring NAs)
cars_tiny[2, 2] <- mean(na.omit(cars_tiny$cubicinches))
# Replace cars_tiny[4,4] with the mode of "brand"
our_table <- table(cars_tiny$brand)
our_mode <- names(our_table)[our_table == max(our_table)]
cars_tiny[4, 4] <- our_mode
cars_tiny
# --- Replace missing values with random observed values ---
# Recreate the missing value table
cars_tiny[2, 2] <- NA
cars_tiny[4, 4] <- NA
# Generate a random observation from the observed distribution
obs_brand <- sample(na.omit(cars_tiny$brand), 1)
obs_cubicinches <- sample(na.omit(cars_tiny$cubicinches), 1)
# Replace the missing values with these random observations
cars_tiny[2, 2] <- obs_cubicinches
cars_tiny[4, 4] <- obs_brand
cars_tiny
# --- Summary statistics for weight ---
summary(cars$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1613 2246 2835 3005 3664 4997
length(cars$weight)
## [1] 261
# --- Min-Max Normalization and Z-score ---
mmnorm.weight <- (cars$weight - min(cars$weight)) / (max(cars$weight) - min(cars$weight))
mmnorm.weight
## [1] 0.76713948 0.09219858 0.54255319 0.63475177 0.12943262 0.67582742
## [7] 0.81264775 0.79757683 0.56648936 0.12913712 0.18676123 0.16991726
## [13] 0.74704492 0.60608747 0.52245863 0.81176123 0.12027187 0.53782506
## [19] 0.19651300 0.24143026 0.11997636 0.29166667 0.41341608 0.72133570
## [25] 0.10490544 0.05378251 0.04728132 0.89952719 0.57949173 0.22665485
## [31] 0.15277778 0.49202128 0.26388889 0.30585106 0.10992908 0.53250591
## [37] 0.06560284 0.32712766 0.69208038 0.48817967 0.67494090 0.89391253
## [43] 0.60047281 0.56353428 0.43528369 0.30200946 0.63297872 0.23108747
## [49] 0.09663121 0.42760047 0.84249409 0.46601655 0.58983452 0.16105201
## [55] 0.81767139 0.78752955 0.38918440 0.13652482 0.09574468 0.65248227
## [61] 0.39361702 0.22635934 0.38327423 0.14686761 0.77541371 0.56501182
## [67] 0.09958629 0.10401891 0.06264775 0.66991726 0.44001182 0.30348700
## [73] 0.19946809 0.27541371 0.17907801 0.41430260 0.43498818 0.26654846
## [79] 0.19267139 0.21778960 0.80141844 0.98670213 0.53546099 0.31944444
## [85] 0.17937352 0.00000000 0.34308511 0.54728132 0.15130024 0.32712766
## [91] 0.10697400 0.20301418 0.73492908 0.50709220 0.18971631 0.67789598
## [97] 0.85283688 0.45094563 0.65514184 0.64066194 1.00000000 0.97310875
## [103] 0.15159574 0.17346336 0.19267139 0.30200946 0.21335697 0.13356974
## [109] 0.31264775 0.55880615 0.19680851 0.39007092 0.80437352 0.05378251
## [115] 0.15277778 0.64184397 0.33303783 0.42139480 0.12765957 0.80614657
## [121] 0.17405437 0.10342790 0.90277778 0.33451537 0.06530733 0.28132388
## [127] 0.89864066 0.18232861 0.28634752 0.05526005 0.20301418 0.31471631
## [133] 0.98640662 0.60845154 0.83540189 0.76152482 0.10106383 0.45981087
## [139] 0.39361702 0.40366430 0.32121749 0.16489362 0.31087470 0.04343972
## [145] 0.58865248 0.39716312 0.51625296 0.33156028 0.61465721 0.47044917
## [151] 0.27452719 0.27984634 0.83096927 0.45656028 0.25147754 0.49911348
## [157] 0.15100473 0.15425532 0.30171395 0.53102837 0.41075650 0.72901891
## [163] 0.40514184 0.38150118 0.18942080 0.51773050 0.53959811 0.28723404
## [169] 0.35283688 0.15130024 0.20005910 0.35963357 0.07505910 0.28427896
## [175] 0.18114657 0.17021277 0.81914894 0.60845154 0.57830969 0.12765957
## [181] 0.40691489 0.15721040 0.76891253 0.53871158 0.23108747 0.43055556
## [187] 0.54550827 0.68764775 0.74349882 0.92257683 0.29757683 0.31235225
## [193] 0.83008274 0.72960993 0.72813239 0.15691489 0.25916076 0.10992908
## [199] 0.09574468 0.09810875 0.18705674 0.30319149 0.53398345 0.59604019
## [205] 0.23286052 0.66105201 0.14686761 0.41607565 0.73433806 0.38031915
## [211] 0.35520095 0.54137116 0.04196217 0.13622931 0.47872340 0.29609929
## [217] 0.10697400 0.76891253 0.29018913 0.79255319 0.46690307 0.34929078
## [223] 0.84042553 0.65218676 0.71778960 0.11524823 0.88711584 0.01063830
## [229] 0.28782506 0.34781324 0.57624113 0.29905437 0.28930260 0.85254137
## [235] 0.45419622 0.15277778 0.73374704 0.39952719 0.07742317 0.17937352
## [241] 0.72163121 0.44562648 0.77039007 0.31294326 0.80998818 0.52777778
## [247] 0.62706856 0.13947991 0.16016548 0.26950355 0.14982270 0.30939716
## [253] 0.39509456 0.70183215 0.32860520 0.22429078 0.65809693 0.05526005
## [259] 0.36111111 0.49497636 0.51418440
zscore.weight <- (cars$weight - mean(cars$weight)) / sd(cars$weight)
zscore.weight
## [1] 1.411500375 -1.267221019 0.520157004 0.886076914 -1.119445671
## [6] 1.049098925 1.592114690 1.532300858 0.615155442 -1.120618491
## [11] -0.891918547 -0.958769300 1.331748600 0.772313352 0.440405229
## [16] 1.588596229 -1.155803098 0.501391880 -0.853215480 -0.674946806
## [21] -1.156975918 -0.475567367 0.007634565 1.229713240 -1.216789750
## [26] -1.419687649 -1.445489694 1.936923836 0.666759532 -0.733587817
## [31] -1.026792873 0.319604745 -0.585812469 -0.419271997 -1.196851806
## [36] 0.480281116 -1.372774840 -0.334828940 1.113604038 0.304358082
## [41] 1.045580465 1.914640252 0.750029768 0.603427240 0.094423262
## [46] -0.434518660 0.879039993 -0.715995514 -1.249628716 0.063929936
## [51] 1.710569532 0.216396565 0.707808240 -0.993953907 1.612052634
## [56] 1.492424971 -0.088536693 -1.091297986 -1.253147177 0.956446128
## [61] -0.070944390 -0.734760637 -0.111993098 -1.050249278 1.444339341
## [66] 0.609291341 -1.237900514 -1.220308210 -1.384503042 1.025642521
## [71] 0.113188386 -0.428654558 -0.841487278 -0.540072480 -0.922411873
## [76] 0.011153026 0.093250442 -0.575257087 -0.868462143 -0.768772424
## [81] 1.547547521 2.282905802 0.492009318 -0.365322266 -0.921239053
## [86] -1.633140930 -0.271496648 0.538922127 -1.032656974 -0.334828940
## [91] -1.208580008 -0.827413435 1.283662970 0.379418577 -0.880190345
## [96] 1.057308667 1.751618240 0.156582734 0.967001510 0.909533319
## [101] 2.335682713 2.228956072 -1.031484154 -0.944695458 -0.868462143
## [106] -0.434518660 -0.786364727 -1.103026188 -0.392297131 0.584662116
## [111] -0.852042660 -0.085018232 1.559275723 -1.419687649 -1.026792873
## [116] 0.914224600 -0.311372536 0.039300711 -1.126482592 1.566312645
## [121] -0.942349817 -1.222653851 1.949824858 -0.305508435 -1.373947660
## [126] -0.516616075 1.933405375 -0.909510851 -0.496678131 -1.413823548
## [131] -0.827413435 -0.384087390 2.281732982 0.781695914 1.682421847
## [136] 1.389216791 -1.232036413 0.191767341 -0.070944390 -0.031068502
## [141] -0.358285345 -0.978707244 -0.399334053 -1.460736357 0.703116959
## [146] -0.056870547 0.415776004 -0.317236637 0.806325139 0.233988869
## [151] -0.543590941 -0.522480176 1.664829544 0.178866318 -0.635070918
## [156] 0.347752431 -1.033829795 -1.020928772 -0.435691480 0.474417015
## [161] -0.002920817 1.260206566 -0.025204401 -0.119030019 -0.881363165
## [166] 0.421640105 0.508428802 -0.493159671 -0.232793581 -1.032656974
## [171] -0.839141637 -0.205818716 -1.335244593 -0.504887873 -0.914202132
## [176] -0.957596480 1.617916735 0.781695914 0.662068251 -1.126482592
## [181] -0.018167480 -1.009200570 1.418537296 0.504910341 -0.715995514
## [186] 0.075658138 0.531885206 1.096011734 1.317674757 2.028403814
## [191] -0.452110963 -0.393469952 1.661311083 1.262552206 1.256688105
## [196] -1.010373390 -0.604577592 -1.196851806 -1.253147177 -1.243764615
## [201] -0.890745727 -0.429827379 0.486145217 0.732437465 -0.708958592
## [206] 0.990457914 -1.050249278 0.018189947 1.281317330 -0.123721300
## [211] -0.223411019 0.515465723 -1.466600458 -1.092470806 0.266827835
## [216] -0.457975064 -1.208580008 1.418537296 -0.481431469 1.512362914
## [221] 0.219915026 -0.246867424 1.702359791 0.955273307 1.215639397
## [226] -1.175741042 1.887665386 -1.590919402 -0.490814030 -0.252731525
## [231] 0.653858510 -0.446246862 -0.484949929 1.750445420 0.169483756
## [236] -1.026792873 1.278971690 -0.047487985 -1.325862031 -0.921239053
## [241] 1.230886060 0.135471970 1.424401398 -0.391124311 1.581559308
## [246] 0.461515993 0.855583588 -1.079569783 -0.997472368 -0.563528884
## [251] -1.038521076 -0.405198154 -0.065080289 1.152307105 -0.328964839
## [256] -0.742970379 0.978729712 -1.413823548 -0.199954614 0.331332948
## [261] 0.407566262
# --- Histogram using the cars2 dataset ---
cars2 <- read.csv(file = "C:/Users/Kadeem Green/Downloads/cars.txt", stringsAsFactors = TRUE)
par(mfrow = c(1, 1))
hist(cars2$weight,
breaks = 30,
xlim = c(0, 5000),
col = "blue",
border = "black",
ylim = c(0, 40),
xlab = "Weight",
ylab = "Counts",
main = "Histogram of Car Weights")
box(which = "plot", lty = "solid", col = "black")
# --- Scatterplot of MPG by Weight ---
plot(cars2$weight, cars2$mpg,
xlim = c(0, 5000),
ylim = c(0, 600),
xlab = "Weight",
ylab = "MPG",
main = "Scatterplot of MPG by Weight",
type = "p",
pch = 16,
col = "blue")
points(cars2$weight, cars2$mpg,
type = "p",
col = "black")
# --- Transformations ---
# Natural log transformation of weight
natlog_weight <- log(cars$weight)
natlog_weight
## [1] 8.344980 7.562681 8.145840 8.232440 7.626083 8.268732 8.380915 8.369157
## [9] 8.169053 7.625595 7.716461 7.690743 8.328693 8.206311 8.125927 8.380227
## [17] 7.610853 8.141190 7.731053 7.795647 7.610358 7.863267 8.010360 8.307459
## [25] 7.584773 7.492760 7.480428 8.446127 8.181441 7.774856 7.663877 8.094989
## [33] 7.826443 7.881560 7.593374 8.135933 7.514800 7.908387 8.282736 8.091015
## [41] 8.267962 8.442039 8.201112 8.166216 8.034631 7.876638 8.230844 7.781139
## [49] 7.570443 8.026170 8.403801 8.067776 8.191186 7.676937 8.384804 8.361241
## [57] 7.982758 7.637716 7.568896 8.248267 7.987864 7.774436 7.975908 7.654443
## [65] 8.351611 8.167636 7.575585 7.583248 7.509335 8.263590 8.039802 7.878534
## [73] 7.735433 7.841886 7.704812 8.011355 8.034307 7.830028 7.725330 7.762171
## [81] 8.372167 8.507547 8.138857 7.898782 7.705262 7.385851 7.928046 8.150468
## [89] 7.661527 7.908387 7.588324 7.740664 8.318742 8.110427 7.720905 8.270525
## [97] 8.411610 8.051660 8.250620 8.237744 8.516593 8.498214 7.661998 7.696213
## [105] 7.725330 7.876638 7.755767 7.632886 7.890208 8.161660 7.731492 7.983781
## [113] 8.374477 7.492760 7.663877 8.238801 7.915713 8.019284 7.623153 8.375860
## [121] 7.697121 7.582229 8.448486 7.917536 7.514255 7.849714 8.445482 7.709757
## [129] 7.856320 7.495542 7.740664 7.892826 8.507345 8.208492 8.398410 8.340456
## [137] 7.578145 8.061171 7.987864 7.999343 7.901007 7.682943 7.887959 7.473069
## [145] 8.190077 7.991931 8.119696 7.913887 8.214194 8.072467 7.840706 7.847763
## [153] 8.395026 8.057694 7.809541 8.102284 7.661056 7.666222 7.876259 8.134468
## [161] 8.007367 8.313852 8.001020 7.973844 7.720462 8.121183 8.142936 7.857481
## [169] 7.939872 7.661527 7.736307 7.948032 7.532088 7.853605 7.707962 7.691200
## [177] 8.385945 8.208492 8.180321 7.623153 8.003029 7.670895 8.346405 8.142063
## [185] 7.781139 8.029433 8.148735 8.278936 8.325791 8.462737 7.870930 7.889834
## [193] 8.394347 8.314342 8.313117 7.670429 7.820038 7.593374 7.568896 7.573017
## [201] 7.716906 7.878155 8.137396 8.196988 7.783641 8.255828 7.654443 8.013343
## [209] 8.318254 7.972466 7.942718 8.144679 7.470224 7.637234 8.081166 7.869019
## [217] 7.588324 8.346405 7.861342 8.365207 8.068716 7.935587 8.402231 8.248006
## [225] 8.304495 7.602401 8.437067 7.407924 7.858254 7.933797 8.178358 7.872836
## [233] 7.860185 8.411388 8.055158 7.663877 8.317766 7.994632 7.536364 7.705262
## [241] 8.307706 8.045909 8.347590 7.890583 8.378850 8.131237 8.225503 7.642524
## [249] 7.675546 7.833996 7.659171 7.886081 7.989560 8.291045 7.910224 7.771489
## [257] 8.253228 7.495542 7.949797 8.098035 8.117611
# Inverse square root transformation of weight
invsqrt_weight <- 1 / sqrt(cars$weight)
invsqrt_weight
## [1] 0.01541383 0.02279212 0.01702760 0.01630603 0.02208092 0.01601282
## [7] 0.01513936 0.01522862 0.01683111 0.02208631 0.02110531 0.02137846
## [13] 0.01553987 0.01652047 0.01719798 0.01514456 0.02224971 0.01706723
## [19] 0.02095189 0.02028602 0.02225522 0.01961161 0.01822101 0.01570573
## [25] 0.02254174 0.02360303 0.02374902 0.01465369 0.01672718 0.02049800
## [31] 0.02166757 0.01746608 0.01997604 0.01943305 0.02244501 0.01711215
## [37] 0.02334436 0.01917412 0.01590108 0.01750082 0.01601898 0.01468367
## [43] 0.01656347 0.01685500 0.01800122 0.01948093 0.01631906 0.02043371
## [49] 0.02270383 0.01807754 0.01496711 0.01770536 0.01664587 0.02152654
## [55] 0.01510995 0.01528902 0.01847422 0.02195285 0.02272141 0.01617750
## [61] 0.01842712 0.02050231 0.01853760 0.02177002 0.01536281 0.01684304
## [67] 0.02264554 0.02255894 0.02340823 0.01605403 0.01795474 0.01946247
## [73] 0.02090605 0.01982239 0.02122860 0.01821195 0.01800414 0.01994027
## [79] 0.02101193 0.02062842 0.01520572 0.01421051 0.01708715 0.01926643
## [85] 0.02122382 0.02489905 0.01898658 0.01698824 0.02169305 0.01917412
## [91] 0.02250176 0.02085144 0.01561738 0.01733178 0.02105847 0.01599846
## [97] 0.01490878 0.01784861 0.01615848 0.01626285 0.01414638 0.01427698
## [103] 0.02168794 0.02132007 0.02101193 0.01948093 0.02069458 0.02200594
## [109] 0.01934920 0.01689343 0.02094729 0.01846477 0.01518817 0.02360303
## [115] 0.02166757 0.01625425 0.01910402 0.01813989 0.02211329 0.01517767
## [121] 0.02131039 0.02257043 0.01463641 0.01908661 0.02335072 0.01974496
## [127] 0.01465841 0.02117618 0.01967985 0.02357023 0.02085144 0.01932390
## [133] 0.01421195 0.01650246 0.01500751 0.01544874 0.02261656 0.01776392
## [139] 0.01842712 0.01832166 0.01924501 0.02146199 0.01937097 0.02383656
## [145] 0.01665510 0.01838969 0.01725164 0.01912147 0.01645547 0.01766388
## [151] 0.01983409 0.01976424 0.01503292 0.01779483 0.02014557 0.01740249
## [157] 0.02169815 0.02164218 0.01948463 0.01712469 0.01824830 0.01565561
## [163] 0.01830630 0.01855674 0.02106314 0.01723882 0.01705234 0.01966843
## [169] 0.01887465 0.02169305 0.02089692 0.01879779 0.02314344 0.01970659
## [175] 0.02119520 0.02137357 0.01510133 0.01650246 0.01673655 0.02211329
## [181] 0.01828792 0.02159168 0.01540285 0.01705978 0.02043371 0.01804807
## [187] 0.01700297 0.01593132 0.01556244 0.01453249 0.01953662 0.01935282
## [193] 0.01503802 0.01565177 0.01566137 0.02159671 0.02004012 0.02244501
## [199] 0.02272141 0.02267463 0.02110062 0.01946616 0.01709964 0.01659765
## [205] 0.02040816 0.01611646 0.02177002 0.01819385 0.01562119 0.01856953
## [211] 0.01884781 0.01703748 0.02387050 0.02195814 0.01758722 0.01955529
## [217] 0.02250176 0.01540285 0.01963050 0.01525873 0.01769704 0.01891512
## [223] 0.01497886 0.01617962 0.01572903 0.02234393 0.01472021 0.02462576
## [229] 0.01966083 0.01893206 0.01675298 0.01951800 0.01964186 0.01491043
## [235] 0.01781742 0.02166757 0.01562500 0.01836486 0.02309401 0.02122382
## [241] 0.01570379 0.01790000 0.01539373 0.01934558 0.01515499 0.01715238
## [247] 0.01636269 0.02190014 0.02154152 0.01990074 0.02171861 0.01938917
## [253] 0.01841149 0.01583516 0.01915653 0.02053254 0.01613743 0.02357023
## [259] 0.01878121 0.01743950 0.01726964
# --- Skewness calculations ---
# Skewness: (3*(mean - median)) / sd
weight_skew <- (3 * (mean(cars$weight) - median(cars$weight))) / sd(cars$weight)
zscore.weight_skew <- (3 * (mean(zscore.weight) - median(zscore.weight))) / sd(zscore.weight)
weight_skew
## [1] 0.5998638
zscore.weight_skew
## [1] 0.5998638
lnweight_skew <- (3 * (mean(natlog_weight) - median(natlog_weight))) / sd(natlog_weight)
lnweight_skew
## [1] 0.1955832
# --- Side-by-Side Histograms of Weight and Z-score of Weight ---
par(mfrow = c(1, 2))
hist(cars$weight,
breaks = 30,
xlim = c(1000, 5000),
main = "Histogram of Weight",
xlab = "Weight",
ylab = "Counts")
box(which = "plot", lty = "solid", col = "black")
hist(zscore.weight,
breaks = 30,
xlim = c(-2, 3),
main = "Histogram of Z-score of Weight",
xlab = "Z-score of Weight",
ylab = "Counts")
box(which = "plot", lty = "solid", col = "black")
# --- Normal Probability Plot of Inverse Square Root of Weight ---
par(mfrow = c(1, 1))
qqnorm(invsqrt_weight,
datax = TRUE,
col = "red",
ylim = c(0.01, 0.03),
main = "Normal Q-Q Plot of Inverse Square Root of Weight")
qqline(invsqrt_weight, col = "blue", datax = TRUE)
# Inverse square root skewness
invsqweight_skew <- (3 * (mean(invsqrt_weight) - median(invsqrt_weight))) / sd(invsqrt_weight)
invsqweight_skew
## [1] 0.01543358
# --- Histogram with Fitted Normal Distribution ---
# Simulate from a Normal distribution
x_sim <- rnorm(1000000, mean = mean(invsqrt_weight), sd = sd(invsqrt_weight))
par(mfrow = c(1, 1))
hist(invsqrt_weight,
breaks = 30,
xlim = c(0.0125, 0.0275),
col = "lightblue",
prob = TRUE,
border = "black",
xlab = "Inverse Square Root of Weight",
ylab = "Density",
main = "Histogram of Inverse Square Root of Weight")
box(which = "plot", lty = "solid", col = "black")
lines(density(x_sim), col = "red")
# --- Creating Flag Variables for Regions ---
# Ten observations, using 999 as a placeholder for unassigned values
north_flag <- rep(999, 10)
east_flag <- rep(999, 10)
south_flag <- rep(999, 10)
# Create the region variable
region <- c(rep(c("north", "south", "east", "west"), 2), "north", "south")
# Set flag values: 1 if the region matches, 0 otherwise
for (i in 1:length(region)) {
if (region[i] == "north") {
north_flag[i] <- 1
} else {
north_flag[i] <- 0
}
if (region[i] == "east") {
east_flag[i] <- 1
} else {
east_flag[i] <- 0
}
if (region[i] == "south") {
south_flag[i] <- 1
} else {
south_flag[i] <- 0
}
}
north_flag
## [1] 1 0 0 0 1 0 0 0 1 0
east_flag
## [1] 0 0 1 0 0 0 1 0 0 0
south_flag
## [1] 0 1 0 0 0 1 0 0 0 1
# --- Data Transformation Example ---
x_val <- cars$weight[1]
x_val
## [1] 4209
# Transform x using y = 1 / sqrt(x)
y_val <- 1 / sqrt(x_val)
y_val
## [1] 0.01541383
# Detransform x using x = 1 / (y)^2
detransformedx <- 1 / (y_val^2)
detransformedx
## [1] 4209
# --- Duplicate Record Detection ---
anyDuplicated(cars)
## [1] 0
duplicated(cars)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# Duplicate the first record to form a new dataset
new.cars <- rbind(cars, cars[1,])
anyDuplicated(new.cars)
## [1] 262
duplicated(new.cars)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
# --- Creating an Index Field ---
# For data frames, sorting by an existing column (e.g., mpg)
cars[order(cars$mpg),]
# For vectors/matrices: add a column to act as an index
x_vec <- c(1, 1, 3:1, 1:4, 3)
y_vec <- c(9, 9:1)
z_vec <- c(2, 1:9)
mat <- t(rbind(x_vec, y_vec, z_vec))
mat
## x_vec y_vec z_vec
## [1,] 1 9 2
## [2,] 1 9 1
## [3,] 3 8 2
## [4,] 2 7 3
## [5,] 1 6 4
## [6,] 1 5 5
## [7,] 2 4 6
## [8,] 3 3 7
## [9,] 4 2 8
## [10,] 3 1 9
indexed_m <- cbind(Index = 1:length(x_vec), mat)
indexed_m
## Index x_vec y_vec z_vec
## [1,] 1 1 9 2
## [2,] 2 1 9 1
## [3,] 3 3 8 2
## [4,] 4 2 7 3
## [5,] 5 1 6 4
## [6,] 6 1 5 5
## [7,] 7 2 4 6
## [8,] 8 3 3 7
## [9,] 9 4 2 8
## [10,] 10 3 1 9
indexed_m[order(z_vec),]
## Index x_vec y_vec z_vec
## [1,] 2 1 9 1
## [2,] 1 1 9 2
## [3,] 3 3 8 2
## [4,] 4 2 7 3
## [5,] 5 1 6 4
## [6,] 6 1 5 5
## [7,] 7 2 4 6
## [8,] 8 3 3 7
## [9,] 9 4 2 8
## [10,] 10 3 1 9
# --- Binning ---
# Define the dataset for binning
xdata <- c(1, 1, 1, 1, 1, 2, 2, 11, 11, 12, 12, 44)
n <- length(xdata)
nbins <- 3
whichbin <- rep(0, n)
# Equal frequency binning
freq <- n / nbins
xsorted <- sort(xdata)
for (i in 1:nbins) {
for (j in 1:n) {
if (((i - 1) * freq < j) && (j <= i * freq)) {
whichbin[j] <- i
}
}
}
whichbin
## [1] 1 1 1 1 2 2 2 2 3 3 3 3
# K-means binning
kmeansclustering <- kmeans(xdata, centers = nbins)
whichbin <- kmeansclustering$cluster
whichbin
## [1] 3 3 3 3 3 3 3 1 1 1 1 2
# Equal width binning
range_xdata <- max(xdata) - min(xdata) + 1
binwidth <- range_xdata / nbins
for (i in 1:nbins) {
for (j in 1:n) {
if (((i - 1) * binwidth < xdata[j]) && (xdata[j] <= i * binwidth)) {
whichbin[j] <- i
}
}
}
whichbin
## [1] 1 1 1 1 1 1 1 1 1 1 1 3
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.