#Tipos de datos
as.Date("2024-07-23")
## [1] "2024-07-23"
#fecha y hora
as.POSIXct("2024-07-23 18:43:00")
## [1] "2024-07-23 18:43:00 -05"
Sys.time()
## [1] "2024-07-28 21:50:15 -05"
Sys.Date()
## [1] "2024-07-28"
nombre <- c("Juan", "Carlos", "Alberto")
table(nombre)
## nombre
## Alberto Carlos Juan
## 1 1 1
names(nombre)
## NULL
#posiciones especificas
nombre[2]
## [1] "Carlos"
nombre[c(1,3)]
## [1] "Juan" "Alberto"
nombre["Juan"]
## [1] NA
class(nombre)
## [1] "character"
#Funciones para generara vectores
seq(1:10)
## [1] 1 2 3 4 5 6 7 8 9 10
help("seq")
## starting httpd help server ... done
seq(1,10,by=2)
## [1] 1 3 5 7 9
# el 1 representa desde donde empieza a contar la funcion si es uno se refiere a par y si es 2 impar
seq(0,100,length.out=5)
## [1] 0 25 50 75 100
a<- c(1,"a",3.14)
#los vectoressolo permiter contener varaibles del mismo tipo
#Matrices
mat<- matrix(1:9, nrow = 3)
mat
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
mat<- matrix(1:9, nrow = 3, byrow = TRUE)
mat
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
mat<- matrix(1:9, nrow = 3, ncol = 4, byrow = TRUE)
## Warning in matrix(1:9, nrow = 3, ncol = 4, byrow = TRUE): data length [9] is
## not a sub-multiple or multiple of the number of columns [4]
mat
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 1 2 3
rownames(mat)<-c("F1","F2","F3")
colnames(mat)<-c("C1","C2","C3","C4")
#mat[x,y]
#mat[2,3]
#mat[3,]
#mat[,6]
mat[1:2,]
## C1 C2 C3 C4
## F1 1 2 3 4
## F2 5 6 7 8
#Listas
Calificaciones<- list(name="Atrid", student_id="1234", grade=c(5,4.8), final="A")
#acceder a los elementos
Calificaciones$student_id
## [1] "1234"
Calificaciones[["student_id"]]
## [1] "1234"
Calificaciones[[2]]
## [1] "1234"
#Dataframe
#convertir matriz a data frame
df<-as.data.frame(mat)
df
## C1 C2 C3 C4
## F1 1 2 3 4
## F2 5 6 7 8
## F3 9 1 2 3
#columnas es igual a variables y filas los registros de las mismas
df1<-data.frame(nombres=c("Juliana","Andres","Maria"),edades=c(23,35,34))
df1
## nombres edades
## 1 Juliana 23
## 2 Andres 35
## 3 Maria 34
city<-c("Bogota","Cali","Medellin")
temp_c<-c(14,34,28)
DFT<-data.frame(city,temp_c)
DFT
## city temp_c
## 1 Bogota 14
## 2 Cali 34
## 3 Medellin 28
#1temp_f<-(temp_c*9/5)+32
CaF<-function(x){
return(9*x/5+32)
}
temp_f1<-CaF(temp_c)
DFT$temp_f1 <- temp_f1
DFT[temp_c<30,]
## city temp_c temp_f1
## 1 Bogota 14 57.2
## 3 Medellin 28 82.4
#ff
# Fijar una semilla para reproducibilidad set.seed(123)
vector <- sample(1:1000, 1000, replace = TRUE)
# Reemplazar aleatoriamente algunos valores por NA
num_na <- 100 # Número de valores NA deseados
na_indices <- sample(1:1000, num_na)
vector[na_indices] <- NA
print(vector)
## [1] 917 394 NA 623 467 835 974 867 726 694 1 639 366 NA 463 168 812 418
## [19] NA 874 565 867 962 105 129 433 90 NA 205 NA 934 388 672 476 782 868
## [37] 364 556 289 24 380 411 791 141 823 90 985 975 879 228 279 647 829 109
## [55] 227 818 634 297 747 743 316 616 NA 424 NA 766 723 NA 293 192 335 297
## [73] 764 241 NA 872 968 NA 122 860 301 753 79 158 463 664 792 144 286 801
## [91] 709 348 227 NA 382 316 79 682 704 399 44 999 NA 282 332 NA 323 550
## [109] 377 527 776 424 145 562 614 235 957 26 122 606 331 138 40 332 994 319
## [127] 650 505 883 453 NA 470 64 271 518 778 960 724 656 177 613 651 85 172
## [145] 644 402 446 579 383 379 48 742 454 684 699 755 875 NA 436 545 317 169
## [163] 277 500 NA 659 337 888 547 3 554 682 832 NA 558 781 194 530 859 636
## [181] 792 496 387 994 568 386 846 498 672 554 926 676 145 920 184 887 226 986
## [199] 212 965 255 49 324 996 93 387 814 688 129 82 NA 913 642 227 549 328
## [217] 594 621 NA 678 357 867 468 70 642 NA 793 701 NA 643 294 665 333 499
## [235] 328 927 272 620 860 870 NA 240 NA 874 15 242 270 705 516 NA 287 29
## [253] 354 881 315 477 975 359 672 NA 847 301 NA 334 420 688 332 623 167 704
## [271] 208 114 344 401 449 540 384 NA NA 856 721 903 741 625 244 142 56 972
## [289] 276 769 509 377 796 61 367 583 922 105 725 595 755 667 862 409 832 503
## [307] 956 504 363 41 492 553 537 622 316 207 11 34 976 158 110 NA 470 803
## [325] 335 43 824 309 19 869 474 625 89 506 44 437 NA 208 918 411 141 345
## [343] 932 33 751 502 248 242 159 76 970 130 996 821 328 977 741 367 416 613
## [361] 202 438 544 689 278 14 94 933 670 752 503 292 427 882 96 NA 257 146
## [379] NA 841 681 338 817 NA 255 644 460 119 203 286 NA 370 736 659 412 NA
## [397] 292 851 NA NA NA 840 130 635 234 673 134 368 719 NA 514 155 338 128
## [415] 890 82 990 423 825 187 394 268 399 176 689 741 756 734 NA 977 826 248
## [433] 821 NA 880 NA NA NA 490 209 NA 985 549 344 852 75 988 NA 841 675
## [451] NA 545 607 718 437 212 924 436 480 NA 229 156 57 NA NA 826 773 726
## [469] 457 634 355 636 7 255 427 158 252 866 727 977 NA 854 115 87 625 163
## [487] NA 496 217 740 21 569 720 228 306 318 NA 804 48 282 116 NA 631 100
## [505] 987 357 502 838 775 NA 362 670 607 27 517 833 657 204 17 NA 183 NA
## [523] 448 843 112 NA 966 327 91 878 686 NA 546 229 732 177 NA 881 NA 535
## [541] 333 846 57 666 589 769 829 522 5 353 710 794 795 69 724 384 420 521
## [559] 624 711 868 767 609 60 46 965 932 252 553 964 986 882 382 506 724 564
## [577] 353 175 840 NA 440 NA 593 232 861 156 775 285 121 679 54 NA 205 238
## [595] 789 351 282 732 860 501 263 182 549 739 893 41 NA 795 638 514 465 112
## [613] 97 615 751 NA 864 912 831 952 541 537 990 732 531 954 865 767 807 268
## [631] 297 227 210 613 596 498 855 942 110 707 330 354 711 464 178 976 NA 252
## [649] NA 514 NA 174 886 NA 442 877 367 572 18 732 NA 65 823 297 840 401
## [667] 426 163 94 140 NA 655 409 856 NA 448 584 NA 116 877 143 478 644 664
## [685] NA 76 757 203 996 265 254 51 414 28 501 10 710 629 490 871 595 255
## [703] 780 903 260 296 148 92 551 NA NA 64 158 714 151 130 753 545 76 61
## [721] 945 592 34 466 553 660 729 639 200 NA 594 945 368 602 273 231 650 266
## [739] 383 78 865 365 190 184 57 522 723 353 773 392 703 381 139 493 126 917
## [757] 98 41 996 330 606 855 610 109 527 846 NA 723 529 338 712 777 661 307
## [775] 57 283 424 587 476 15 220 261 723 NA 600 17 452 959 833 398 991 572
## [793] 71 189 669 543 140 781 470 28 866 112 18 528 914 848 NA 285 640 379
## [811] 826 NA 405 746 653 527 629 607 68 840 957 222 142 847 188 261 227 719
## [829] 163 358 33 255 290 821 217 517 225 160 783 590 86 300 838 855 NA 944
## [847] 480 158 674 463 NA 274 408 784 535 458 754 253 918 560 392 676 210 673
## [865] 103 677 152 856 821 NA 636 477 231 734 4 406 918 973 NA 555 613 708
## [883] NA 563 86 517 747 101 749 703 304 100 165 225 NA 759 229 500 385 176
## [901] 962 382 26 368 25 620 492 3 NA 649 828 220 NA 220 NA 134 276 52
## [919] 754 9 344 451 865 429 46 88 106 240 803 152 833 976 860 791 946 NA
## [937] 777 NA NA 664 308 82 829 331 562 683 321 380 391 279 701 914 566 582
## [955] 83 761 203 932 819 682 521 259 445 739 275 278 NA 221 933 79 209 727
## [973] NA 801 206 NA 827 558 524 256 493 386 438 700 179 101 525 178 976 488
## [991] 213 625 343 419 NA NA 844 164 558 NA
#is.na = sirve para darle un valor logico a una variable
library(dslabs)
data("murders")
nlevels(murders$region)
## [1] 4
murders
## state abb region population total
## 1 Alabama AL South 4779736 135
## 2 Alaska AK West 710231 19
## 3 Arizona AZ West 6392017 232
## 4 Arkansas AR South 2915918 93
## 5 California CA West 37253956 1257
## 6 Colorado CO West 5029196 65
## 7 Connecticut CT Northeast 3574097 97
## 8 Delaware DE South 897934 38
## 9 District of Columbia DC South 601723 99
## 10 Florida FL South 19687653 669
## 11 Georgia GA South 9920000 376
## 12 Hawaii HI West 1360301 7
## 13 Idaho ID West 1567582 12
## 14 Illinois IL North Central 12830632 364
## 15 Indiana IN North Central 6483802 142
## 16 Iowa IA North Central 3046355 21
## 17 Kansas KS North Central 2853118 63
## 18 Kentucky KY South 4339367 116
## 19 Louisiana LA South 4533372 351
## 20 Maine ME Northeast 1328361 11
## 21 Maryland MD South 5773552 293
## 22 Massachusetts MA Northeast 6547629 118
## 23 Michigan MI North Central 9883640 413
## 24 Minnesota MN North Central 5303925 53
## 25 Mississippi MS South 2967297 120
## 26 Missouri MO North Central 5988927 321
## 27 Montana MT West 989415 12
## 28 Nebraska NE North Central 1826341 32
## 29 Nevada NV West 2700551 84
## 30 New Hampshire NH Northeast 1316470 5
## 31 New Jersey NJ Northeast 8791894 246
## 32 New Mexico NM West 2059179 67
## 33 New York NY Northeast 19378102 517
## 34 North Carolina NC South 9535483 286
## 35 North Dakota ND North Central 672591 4
## 36 Ohio OH North Central 11536504 310
## 37 Oklahoma OK South 3751351 111
## 38 Oregon OR West 3831074 36
## 39 Pennsylvania PA Northeast 12702379 457
## 40 Rhode Island RI Northeast 1052567 16
## 41 South Carolina SC South 4625364 207
## 42 South Dakota SD North Central 814180 8
## 43 Tennessee TN South 6346105 219
## 44 Texas TX South 25145561 805
## 45 Utah UT West 2763885 22
## 46 Vermont VT Northeast 625741 2
## 47 Virginia VA South 8001024 250
## 48 Washington WA West 6724540 93
## 49 West Virginia WV South 1852994 27
## 50 Wisconsin WI North Central 5686986 97
## 51 Wyoming WY West 563626 5
head(murders,3)
## state abb region population total
## 1 Alabama AL South 4779736 135
## 2 Alaska AK West 710231 19
## 3 Arizona AZ West 6392017 232
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
class(murders)
## [1] "data.frame"
names(murders)
## [1] "state" "abb" "region" "population" "total"
sort(murders$total)
## [1] 2 4 5 5 7 8 11 12 12 16 19 21 22 27 32
## [16] 36 38 53 63 65 67 84 93 93 97 97 99 111 116 118
## [31] 120 135 142 207 219 232 246 250 286 293 310 321 351 364 376
## [46] 413 457 517 669 805 1257
order(murders$total,decreasing = TRUE)
## [1] 5 44 10 33 39 23 11 14 19 26 36 21 34 47 31 3 43 41 15 1 25 22 18 37 9
## [26] 7 50 4 48 29 32 6 17 24 8 38 28 49 45 16 2 40 13 27 20 42 12 30 51 35
## [51] 46
murders$state[5]
## [1] "California"
murders$state[46]
## [1] "Vermont"
attach(murders)
state[5]
## [1] "California"
max(total)
## [1] 1257
pos_max<-which.max(total)
pos_max
## [1] 5
min(total)
## [1] 2
pos_min<-which.min(total)
pos_min
## [1] 46
state[pos_max]
## [1] "California"
state[pos_min]
## [1] "Vermont"