library(readxl)
datos <- read_excel("historical_state_population_by_year.xlsx")
#View(datos)
colnames(datos)<-c("Estado","Año","Poblacion")
datos$Poblacion <- as.numeric(datos$Poblacion)
str(datos$Poblacion)
## num [1:6020] 135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...
df <- datos %>% filter(Estado == "MA")
df2 <- df %>% select(Poblacion)
ts1 <- ts(data=df2, start= 1900, frequency=1)
ts1
## Time Series:
## Start = 1900
## End = 2019
## Frequency = 1
## Poblacion
## [1,] 2788000
## [2,] 2794000
## [3,] 2856000
## [4,] 2924000
## [5,] 2969000
## [6,] 3018000
## [7,] 3107000
## [8,] 3183000
## [9,] 3251000
## [10,] 3316000
## [11,] 3365000
## [12,] 3383000
## [13,] 3440000
## [14,] 3534000
## [15,] 3636000
## [16,] 3705000
## [17,] 3726000
## [18,] 3738000
## [19,] 3695000
## [20,] 3785000
## [21,] 3882000
## [22,] 3951000
## [23,] 4010000
## [24,] 4057000
## [25,] 4102000
## [26,] 4154000
## [27,] 4194000
## [28,] 4223000
## [29,] 4217000
## [30,] 4229000
## [31,] 4250000
## [32,] 4248000
## [33,] 4259000
## [34,] 4282000
## [35,] 4305000
## [36,] 4343000
## [37,] 4355000
## [38,] 4358000
## [39,] 4365000
## [40,] 4347000
## [41,] 4318000
## [42,] 4389000
## [43,] 4370000
## [44,] 4257000
## [45,] 4192000
## [46,] 4201000
## [47,] 4494000
## [48,] 4580000
## [49,] 4674000
## [50,] 4741000
## [51,] 4686000
## [52,] 4654000
## [53,] 4650000
## [54,] 4806000
## [55,] 4910000
## [56,] 4882000
## [57,] 4891000
## [58,] 4929000
## [59,] 5010000
## [60,] 5117000
## [61,] 5160000
## [62,] 5219000
## [63,] 5263000
## [64,] 5344000
## [65,] 5448000
## [66,] 5502000
## [67,] 5535000
## [68,] 5594000
## [69,] 5618000
## [70,] 5650000
## [71,] 5689170
## [72,] 5737580
## [73,] 5760302
## [74,] 5781172
## [75,] 5773548
## [76,] 5757756
## [77,] 5743672
## [78,] 5738199
## [79,] 5736469
## [80,] 5738404
## [81,] 5746075
## [82,] 5768685
## [83,] 5771222
## [84,] 5799407
## [85,] 5840773
## [86,] 5880733
## [87,] 5902678
## [88,] 5935204
## [89,] 5979982
## [90,] 6015478
## [91,] 6018664
## [92,] 5998652
## [93,] 5993474
## [94,] 6010884
## [95,] 6031352
## [96,] 6062335
## [97,] 6085393
## [98,] 6115476
## [99,] 6144407
## [100,] 6175169
## [101,] 6361104
## [102,] 6397634
## [103,] 6417206
## [104,] 6422565
## [105,] 6412281
## [106,] 6403290
## [107,] 6410084
## [108,] 6431559
## [109,] 6468967
## [110,] 6517613
## [111,] 6566307
## [112,] 6613583
## [113,] 6663005
## [114,] 6713315
## [115,] 6762596
## [116,] 6794228
## [117,] 6823608
## [118,] 6859789
## [119,] 6882635
## [120,] 6892503
arimaMA <- auto.arima(ts1)
arimaMA
## Series: ts1
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.4026 34308.594
## s.e. 0.0838 5577.936
##
## sigma^2 = 1.923e+09: log likelihood = -1439.88
## AIC=2885.77 AICc=2885.98 BIC=2894.1
summary(arimaMA)
## Series: ts1
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.4026 34308.594
## s.e. 0.0838 5577.936
##
## sigma^2 = 1.923e+09: log likelihood = -1439.88
## AIC=2885.77 AICc=2885.98 BIC=2894.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 111.988 43304.41 26800.86 0.02741526 0.5808001 0.6240271
## ACF1
## Training set 0.01917487
pronosticoMA <- forecast(arimaMA, level = c(95),h=51)
pronosticoMA
## Point Forecast Lo 95 Hi 95
## 2020 6919008 6833052 7004965
## 2021 6953317 6805247 7101387
## 2022 6987625 6796678 7178573
## 2023 7021934 6796108 7247760
## 2024 7056243 6800248 7312238
## 2025 7090551 6807585 7373517
## 2026 7124860 6817279 7432440
## 2027 7159168 6828802 7489535
## 2028 7193477 6841798 7545156
## 2029 7227786 6856013 7599558
## 2030 7262094 6871261 7652928
## 2031 7296403 6887395 7705410
## 2032 7330711 6904304 7757119
## 2033 7365020 6921895 7808145
## 2034 7399329 6940095 7858562
## 2035 7433637 6958840 7908434
## 2036 7467946 6978080 7957811
## 2037 7502254 6997770 8006739
## 2038 7536563 7017872 8055254
## 2039 7570872 7038352 8103391
## 2040 7605180 7059183 8151177
## 2041 7639489 7080338 8198639
## 2042 7673797 7101796 8245799
## 2043 7708106 7123536 8292676
## 2044 7742414 7145541 8339288
## 2045 7776723 7167794 8385652
## 2046 7811032 7190282 8431782
## 2047 7845340 7212990 8477690
## 2048 7879649 7235908 8523390
## 2049 7913957 7259023 8568892
## 2050 7948266 7282327 8614205
## 2051 7982575 7305809 8659340
## 2052 8016883 7329462 8704304
## 2053 8051192 7353278 8749106
## 2054 8085500 7377249 8793752
## 2055 8119809 7401369 8838249
## 2056 8154118 7425631 8882604
## 2057 8188426 7450030 8926822
## 2058 8222735 7474560 8970909
## 2059 8257043 7499217 9014870
## 2060 8291352 7523994 9058710
## 2061 8325661 7548889 9102432
## 2062 8359969 7573897 9146042
## 2063 8394278 7599013 9189543
## 2064 8428586 7624234 9232939
## 2065 8462895 7649557 9276233
## 2066 8497204 7674978 9319429
## 2067 8531512 7700494 9362530
## 2068 8565821 7726102 9405539
## 2069 8600129 7751799 9448459
## 2070 8634438 7777583 9491293
plot(pronosticoMA,main="Poblacion MA")

datos$Poblacion <- as.numeric(datos$Poblacion)
str(datos$Poblacion)
## num [1:6020] 135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...
df <- datos %>% filter(Estado == "NY")
df2 <- df %>% select(Poblacion)
ts1 <- ts(data=df2, start= 1900, frequency=1)
ts1
## Time Series:
## Start = 1900
## End = 2019
## Frequency = 1
## Poblacion
## [1,] 7283000
## [2,] 7449000
## [3,] 7612000
## [4,] 7771000
## [5,] 7927000
## [6,] 8084000
## [7,] 8289000
## [8,] 8499000
## [9,] 8714000
## [10,] 8935000
## [11,] 9137000
## [12,] 9249000
## [13,] 9361000
## [14,] 9473000
## [15,] 9585000
## [16,] 9700000
## [17,] 9848000
## [18,] 9993000
## [19,] 9936000
## [20,] 10252000
## [21,] 10282000
## [22,] 10416000
## [23,] 10589000
## [24,] 10752000
## [25,] 10953000
## [26,] 11186000
## [27,] 11257000
## [28,] 11174000
## [29,] 11599000
## [30,] 12171000
## [31,] 12647000
## [32,] 12848000
## [33,] 13001000
## [34,] 13126000
## [35,] 13253000
## [36,] 13375000
## [37,] 13481000
## [38,] 13511000
## [39,] 13512000
## [40,] 13523000
## [41,] 13456000
## [42,] 13267000
## [43,] 13002000
## [44,] 12807000
## [45,] 12628000
## [46,] 12495000
## [47,] 13398000
## [48,] 13982000
## [49,] 14497000
## [50,] 14892000
## [51,] 14865000
## [52,] 14890000
## [53,] 15192000
## [54,] 15527000
## [55,] 15814000
## [56,] 15966000
## [57,] 16112000
## [58,] 16374000
## [59,] 16601000
## [60,] 16685000
## [61,] 16838000
## [62,] 17061000
## [63,] 17301000
## [64,] 17461000
## [65,] 17589000
## [66,] 17734000
## [67,] 17843000
## [68,] 17935000
## [69,] 18051000
## [70,] 18105000
## [71,] 18241391
## [72,] 18357982
## [73,] 18339400
## [74,] 18177063
## [75,] 18049775
## [76,] 18003485
## [77,] 17940541
## [78,] 17812602
## [79,] 17680589
## [80,] 17583838
## [81,] 17566754
## [82,] 17567734
## [83,] 17589738
## [84,] 17686905
## [85,] 17745684
## [86,] 17791672
## [87,] 17833419
## [88,] 17868848
## [89,] 17941309
## [90,] 17983086
## [91,] 18002855
## [92,] 18029532
## [93,] 18082032
## [94,] 18140894
## [95,] 18156652
## [96,] 18150928
## [97,] 18143805
## [98,] 18143184
## [99,] 18159175
## [100,] 18196601
## [101,] 19001780
## [102,] 19082838
## [103,] 19137800
## [104,] 19175939
## [105,] 19171567
## [106,] 19132610
## [107,] 19104631
## [108,] 19132335
## [109,] 19212436
## [110,] 19307066
## [111,] 19399878
## [112,] 19499241
## [113,] 19572932
## [114,] 19624447
## [115,] 19651049
## [116,] 19654666
## [117,] 19633428
## [118,] 19589572
## [119,] 19530351
## [120,] 19453561
arimaMA <- auto.arima(ts1)
arimaMA
## Series: ts1
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## 0.5065 -0.9697
## s.e. 0.0855 0.0271
##
## sigma^2 = 2.326e+10: log likelihood = -1575.62
## AIC=3157.25 AICc=3157.46 BIC=3165.56
summary(arimaMA)
## Series: ts1
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## 0.5065 -0.9697
## s.e. 0.0855 0.0271
##
## sigma^2 = 2.326e+10: log likelihood = -1575.62
## AIC=3157.25 AICc=3157.46 BIC=3165.56
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -14563.74 149933.4 82036.53 -0.08319142 0.5752184 0.5840548
## ACF1
## Training set -0.009336835
pronosticoMA <- forecast(arimaMA, level = c(95),h=51)
pronosticoMA
## Point Forecast Lo 95 Hi 95
## 2020 19441372 19142479 19740265
## 2021 19461901 18913884 20009918
## 2022 19499001 18722834 20275168
## 2023 19544494 18560730 20528257
## 2024 19594237 18419928 20768546
## 2025 19646134 18294652 20997616
## 2026 19699121 18180758 21217483
## 2027 19752660 18075313 21430006
## 2028 19806478 17976225 21636732
## 2029 19860439 17881980 21838897
## 2030 19914471 17791466 22037476
## 2031 19968539 17703845 22233234
## 2032 20022626 17618478 22426774
## 2033 20076722 17534869 22618576
## 2034 20130823 17452622 22809025
## 2035 20184927 17371422 22998431
## 2036 20239031 17291010 23187052
## 2037 20293136 17211176 23375097
## 2038 20347242 17131741 23562742
## 2039 20401347 17052559 23750136
## 2040 20455453 16973503 23937403
## 2041 20509559 16894467 24124650
## 2042 20563664 16815359 24311970
## 2043 20617770 16736100 24499440
## 2044 20671876 16656621 24687131
## 2045 20725982 16576862 24875101
## 2046 20780087 16496771 25063404
## 2047 20834193 16416303 25252084
## 2048 20888299 16335416 25441182
## 2049 20942405 16254076 25630733
## 2050 20996510 16172251 25820770
## 2051 21050616 16089912 26011320
## 2052 21104722 16007036 26202407
## 2053 21158828 15923601 26394055
## 2054 21212933 15839586 26586281
## 2055 21267039 15754974 26779104
## 2056 21321145 15669750 26972540
## 2057 21375251 15583900 27166601
## 2058 21429356 15497412 27361301
## 2059 21483462 15410274 27556650
## 2060 21537568 15322477 27752659
## 2061 21591674 15234012 27949335
## 2062 21645780 15144871 28146688
## 2063 21699885 15055048 28344722
## 2064 21753991 14964536 28543446
## 2065 21808097 14873330 28742864
## 2066 21862203 14781425 28942980
## 2067 21916308 14688817 29143799
## 2068 21970414 14595503 29345325
## 2069 22024520 14501479 29547561
## 2070 22078626 14406742 29750509
plot(pronosticoMA,main="Poblacion NY")

datos$Poblacion <- as.numeric(datos$Poblacion)
str(datos$Poblacion)
## num [1:6020] 135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...
df <- datos %>% filter(Estado == "CA")
df2 <- df %>% select(Poblacion)
ts1 <- ts(data=df2, start= 1900, frequency=1)
ts1
## Time Series:
## Start = 1900
## End = 2019
## Frequency = 1
## Poblacion
## [1,] 1490000
## [2,] 1550000
## [3,] 1623000
## [4,] 1702000
## [5,] 1792000
## [6,] 1893000
## [7,] 1976000
## [8,] 2054000
## [9,] 2161000
## [10,] 2282000
## [11,] 2406000
## [12,] 2534000
## [13,] 2668000
## [14,] 2811000
## [15,] 2934000
## [16,] 3008000
## [17,] 3071000
## [18,] 3171000
## [19,] 3262000
## [20,] 3339000
## [21,] 3554000
## [22,] 3795000
## [23,] 3991000
## [24,] 4270000
## [25,] 4541000
## [26,] 4730000
## [27,] 4929000
## [28,] 5147000
## [29,] 5344000
## [30,] 5531000
## [31,] 5711000
## [32,] 5824000
## [33,] 5894000
## [34,] 5963000
## [35,] 6060000
## [36,] 6175000
## [37,] 6341000
## [38,] 6528000
## [39,] 6656000
## [40,] 6785000
## [41,] 6950000
## [42,] 7237000
## [43,] 7735000
## [44,] 8506000
## [45,] 8945000
## [46,] 9344000
## [47,] 9559000
## [48,] 9832000
## [49,] 10064000
## [50,] 10337000
## [51,] 10677000
## [52,] 11134000
## [53,] 11635000
## [54,] 12251000
## [55,] 12746000
## [56,] 13133000
## [57,] 13713000
## [58,] 14264000
## [59,] 14880000
## [60,] 15467000
## [61,] 15870000
## [62,] 16497000
## [63,] 17072000
## [64,] 17668000
## [65,] 18151000
## [66,] 18585000
## [67,] 18858000
## [68,] 19176000
## [69,] 19394000
## [70,] 19711000
## [71,] 19971069
## [72,] 20345939
## [73,] 20585469
## [74,] 20868728
## [75,] 21173865
## [76,] 21537849
## [77,] 21935909
## [78,] 22352396
## [79,] 22835958
## [80,] 23256880
## [81,] 23800800
## [82,] 24285933
## [83,] 24820009
## [84,] 25360026
## [85,] 25844393
## [86,] 26441109
## [87,] 27102237
## [88,] 27777158
## [89,] 28464249
## [90,] 29218164
## [91,] 29950111
## [92,] 30414114
## [93,] 30875920
## [94,] 31147208
## [95,] 31317179
## [96,] 31493525
## [97,] 31780829
## [98,] 32217708
## [99,] 32682794
## [100,] 33145121
## [101,] 33987977
## [102,] 34479458
## [103,] 34871843
## [104,] 35253159
## [105,] 35574576
## [106,] 35827943
## [107,] 36021202
## [108,] 36250311
## [109,] 36604337
## [110,] 36961229
## [111,] 37319502
## [112,] 37638369
## [113,] 37948800
## [114,] 38260787
## [115,] 38596972
## [116,] 38918045
## [117,] 39167117
## [118,] 39358497
## [119,] 39461588
## [120,] 39512223
arimaMA <- auto.arima(ts1)
arimaMA
## Series: ts1
## ARIMA(0,2,0)
##
## sigma^2 = 9.971e+09: log likelihood = -1525.79
## AIC=3053.57 AICc=3053.61 BIC=3056.34
summary(arimaMA)
## Series: ts1
## ARIMA(0,2,0)
##
## sigma^2 = 9.971e+09: log likelihood = -1525.79
## AIC=3053.57 AICc=3053.61 BIC=3056.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -88.02939 99017.45 65793.72 0.06388803 0.560492 0.2059178
## ACF1
## Training set -0.1174913
pronosticoMA <- forecast(arimaMA, level = c(95),h=51)
pronosticoMA
## Point Forecast Lo 95 Hi 95
## 2020 39562858 39367149.6 39758566
## 2021 39613493 39175875.8 40051110
## 2022 39664128 38931854.3 40396402
## 2023 39714763 38642824.0 40786702
## 2024 39765398 38313985.8 41216810
## 2025 39816033 37949094.0 41682972
## 2026 39866668 37551015.2 42182321
## 2027 39917303 37122028.2 42712578
## 2028 39967938 36664000.2 43271876
## 2029 40018573 36178497.2 43858649
## 2030 40069208 35666856.8 44471559
## 2031 40119843 35130238.7 45109447
## 2032 40170478 34569661.1 45771295
## 2033 40221113 33986027.0 46456199
## 2034 40271748 33380144.7 47163351
## 2035 40322383 32752743.0 47892023
## 2036 40373018 32104483.6 48641552
## 2037 40423653 31435970.6 49411335
## 2038 40474288 30747758.6 50200817
## 2039 40524923 30040359.0 51009487
## 2040 40575558 29314245.3 51836871
## 2041 40626193 28569857.7 52682528
## 2042 40676828 27807606.7 53546049
## 2043 40727463 27027876.2 54427050
## 2044 40778098 26231026.6 55325169
## 2045 40828733 25417396.8 56240069
## 2046 40879368 24587306.3 57171430
## 2047 40930003 23741057.1 58118949
## 2048 40980638 22878935.0 59082341
## 2049 41031273 22001211.4 60061335
## 2050 41081908 21108144.0 61055672
## 2051 41132543 20199978.4 62065108
## 2052 41183178 19276948.5 63089407
## 2053 41233813 18339277.9 64128348
## 2054 41284448 17387180.2 65181716
## 2055 41335083 16420860.1 66249306
## 2056 41385718 15440513.7 67330922
## 2057 41436353 14446329.2 68426377
## 2058 41486988 13438487.6 69535488
## 2059 41537623 12417162.7 70658083
## 2060 41588258 11382522.0 71793994
## 2061 41638893 10334727.0 72943059
## 2062 41689528 9273933.2 74105123
## 2063 41740163 8200291.0 75280035
## 2064 41790798 7113945.5 76467651
## 2065 41841433 6015037.0 77667829
## 2066 41892068 4903701.4 78880435
## 2067 41942703 3780070.2 80105336
## 2068 41993338 2644270.7 81342405
## 2069 42043973 1496426.6 82591519
## 2070 42094608 336657.5 83852558
plot(pronosticoMA,main="Poblacion CA")

datos$Poblacion <- as.numeric(datos$Poblacion)
str(datos$Poblacion)
## num [1:6020] 135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...
df <- datos %>% filter(Estado == "TX")
df2 <- df %>% select(Poblacion)
ts1 <- ts(data=df2, start= 1900, frequency=1)
ts1
## Time Series:
## Start = 1900
## End = 2019
## Frequency = 1
## Poblacion
## [1,] 3055000
## [2,] 3132000
## [3,] 3210000
## [4,] 3291000
## [5,] 3374000
## [6,] 3459000
## [7,] 3546000
## [8,] 3636000
## [9,] 3727000
## [10,] 3821000
## [11,] 3922000
## [12,] 4016000
## [13,] 4107000
## [14,] 4207000
## [15,] 4300000
## [16,] 4368000
## [17,] 4444000
## [18,] 4563000
## [19,] 4666000
## [20,] 4631000
## [21,] 4723000
## [22,] 4853000
## [23,] 4955000
## [24,] 5077000
## [25,] 5210000
## [26,] 5332000
## [27,] 5453000
## [28,] 5577000
## [29,] 5675000
## [30,] 5762000
## [31,] 5844000
## [32,] 5907000
## [33,] 5961000
## [34,] 6014000
## [35,] 6053000
## [36,] 6123000
## [37,] 6192000
## [38,] 6250000
## [39,] 6301000
## [40,] 6360000
## [41,] 6425000
## [42,] 6585000
## [43,] 6711000
## [44,] 7012000
## [45,] 6876000
## [46,] 6826000
## [47,] 7197000
## [48,] 7388000
## [49,] 7626000
## [50,] 7623000
## [51,] 7776000
## [52,] 8111000
## [53,] 8314000
## [54,] 8336000
## [55,] 8382000
## [56,] 8660000
## [57,] 8830000
## [58,] 9070000
## [59,] 9252000
## [60,] 9405000
## [61,] 9624000
## [62,] 9820000
## [63,] 10053000
## [64,] 10159000
## [65,] 10270000
## [66,] 10378000
## [67,] 10492000
## [68,] 10599000
## [69,] 10819000
## [70,] 11045000
## [71,] 11198655
## [72,] 11509848
## [73,] 11759148
## [74,] 12019543
## [75,] 12268629
## [76,] 12568843
## [77,] 12904089
## [78,] 13193050
## [79,] 13500429
## [80,] 13888371
## [81,] 14338208
## [82,] 14746318
## [83,] 15331415
## [84,] 15751676
## [85,] 16007086
## [86,] 16272734
## [87,] 16561113
## [88,] 16621791
## [89,] 16667022
## [90,] 16806735
## [91,] 17044714
## [92,] 17339904
## [93,] 17650479
## [94,] 17996764
## [95,] 18338319
## [96,] 18679706
## [97,] 19006240
## [98,] 19355427
## [99,] 19712389
## [100,] 20044141
## [101,] 20944499
## [102,] 21319622
## [103,] 21690325
## [104,] 22030931
## [105,] 22394023
## [106,] 22778123
## [107,] 23359580
## [108,] 23831983
## [109,] 24309039
## [110,] 24801761
## [111,] 25241971
## [112,] 25645629
## [113,] 26084481
## [114,] 26480266
## [115,] 26964333
## [116,] 27470056
## [117,] 27914410
## [118,] 28295273
## [119,] 28628666
## [120,] 28995881
arimaMA <- auto.arima(ts1)
arimaMA
## Series: ts1
## ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -0.5950 -0.1798
## s.e. 0.0913 0.0951
##
## sigma^2 = 1.031e+10: log likelihood = -1527.14
## AIC=3060.28 AICc=3060.5 BIC=3068.6
summary(arimaMA)
## Series: ts1
## ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -0.5950 -0.1798
## s.e. 0.0913 0.0951
##
## sigma^2 = 1.031e+10: log likelihood = -1527.14
## AIC=3060.28 AICc=3060.5 BIC=3068.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 12147.62 99818.31 59257.39 0.1046163 0.5686743 0.2672197
## ACF1
## Training set -0.02136734
pronosticoMA <- forecast(arimaMA, level = c(95),h=51)
pronosticoMA
## Point Forecast Lo 95 Hi 95
## 2020 29398472 29199487 29597457
## 2021 29806827 29463665 30149990
## 2022 30215183 29742956 30687410
## 2023 30623538 30024100 31222977
## 2024 31031894 30303359 31760429
## 2025 31440249 30579246 32301253
## 2026 31848605 30851090 32846119
## 2027 32256960 31118581 33395339
## 2028 32665316 31381587 33949044
## 2029 33073671 31640070 34507272
## 2030 33482027 31894047 35070007
## 2031 33890382 32143561 35637204
## 2032 34298738 32388674 36208801
## 2033 34707093 32629456 36784730
## 2034 35115449 32865983 37364914
## 2035 35523804 33098330 37949278
## 2036 35932160 33326573 38537746
## 2037 36340515 33550788 39130242
## 2038 36748871 33771046 39726695
## 2039 37157226 33987418 40327034
## 2040 37565581 34199972 40931191
## 2041 37973937 34408774 41539100
## 2042 38382292 34613887 42150698
## 2043 38790648 34815371 42765925
## 2044 39199003 35013284 43384723
## 2045 39607359 35207682 44007036
## 2046 40015714 35398618 44632810
## 2047 40424070 35586145 45261995
## 2048 40832425 35770311 45894540
## 2049 41240781 35951163 46530399
## 2050 41649136 36128748 47169524
## 2051 42057492 36303110 47811874
## 2052 42465847 36474290 48457405
## 2053 42874203 36642330 49106076
## 2054 43282558 36807269 49757848
## 2055 43690914 36969145 50412683
## 2056 44099269 37127994 51070544
## 2057 44507625 37283853 51731396
## 2058 44915980 37436755 52395205
## 2059 45324336 37586734 53061937
## 2060 45732691 37733822 53731560
## 2061 46141047 37878050 54404044
## 2062 46549402 38019447 55079357
## 2063 46957758 38158044 55757471
## 2064 47366113 38293868 56438358
## 2065 47774469 38426948 57121989
## 2066 48182824 38557310 57808338
## 2067 48591180 38684979 58497380
## 2068 48999535 38809982 59189088
## 2069 49407891 38932343 59883438
## 2070 49816246 39052086 60580406
plot(pronosticoMA,main="Poblacion TX")

datos$Poblacion <- as.numeric(datos$Poblacion)
str(datos$Poblacion)
## num [1:6020] 135000 158000 189000 205000 215000 222000 224000 231000 224000 224000 ...
df <- datos %>% filter(Estado == "NV")
df2 <- df %>% select(Poblacion)
ts1 <- ts(data=df2, start= 1900, frequency=1)
ts1
## Time Series:
## Start = 1900
## End = 2019
## Frequency = 1
## Poblacion
## [1,] 43000
## [2,] 45000
## [3,] 49000
## [4,] 52000
## [5,] 56000
## [6,] 59000
## [7,] 64000
## [8,] 68000
## [9,] 73000
## [10,] 78000
## [11,] 82000
## [12,] 81000
## [13,] 81000
## [14,] 82000
## [15,] 83000
## [16,] 82000
## [17,] 82000
## [18,] 81000
## [19,] 77000
## [20,] 77000
## [21,] 78000
## [22,] 80000
## [23,] 81000
## [24,] 82000
## [25,] 84000
## [26,] 85000
## [27,] 87000
## [28,] 88000
## [29,] 89000
## [30,] 90000
## [31,] 92000
## [32,] 94000
## [33,] 96000
## [34,] 96000
## [35,] 98000
## [36,] 100000
## [37,] 101000
## [38,] 103000
## [39,] 105000
## [40,] 107000
## [41,] 113000
## [42,] 120000
## [43,] 137000
## [44,] 151000
## [45,] 153000
## [46,] 149000
## [47,] 143000
## [48,] 149000
## [49,] 156000
## [50,] 157000
## [51,] 162000
## [52,] 168000
## [53,] 181000
## [54,] 195000
## [55,] 213000
## [56,] 237000
## [57,] 250000
## [58,] 260000
## [59,] 269000
## [60,] 279000
## [61,] 291000
## [62,] 315000
## [63,] 352000
## [64,] 397000
## [65,] 426000
## [66,] 444000
## [67,] 446000
## [68,] 449000
## [69,] 464000
## [70,] 480000
## [71,] 488738
## [72,] 520018
## [73,] 546789
## [74,] 568991
## [75,] 596822
## [76,] 619972
## [77,] 646975
## [78,] 678333
## [79,] 719436
## [80,] 765367
## [81,] 810215
## [82,] 847655
## [83,] 881537
## [84,] 901977
## [85,] 924922
## [86,] 951030
## [87,] 980613
## [88,] 1023376
## [89,] 1075022
## [90,] 1137382
## [91,] 1218629
## [92,] 1285046
## [93,] 1330694
## [94,] 1380197
## [95,] 1456388
## [96,] 1525777
## [97,] 1596476
## [98,] 1675581
## [99,] 1743772
## [100,] 1809253
## [101,] 2018741
## [102,] 2098399
## [103,] 2173791
## [104,] 2248850
## [105,] 2346222
## [106,] 2432143
## [107,] 2522658
## [108,] 2601072
## [109,] 2653630
## [110,] 2684665
## [111,] 2702405
## [112,] 2712730
## [113,] 2743996
## [114,] 2775970
## [115,] 2817628
## [116,] 2866939
## [117,] 2917563
## [118,] 2969905
## [119,] 3027341
## [120,] 3080156
arimaMA <- auto.arima(ts1)
arimaMA
## Series: ts1
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.5476
## s.e. 0.0879
##
## sigma^2 = 312757986: log likelihood = -1321.21
## AIC=2646.41 AICc=2646.52 BIC=2651.95
summary(arimaMA)
## Series: ts1
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.5476
## s.e. 0.0879
##
## sigma^2 = 312757986: log likelihood = -1321.21
## AIC=2646.41 AICc=2646.52 BIC=2651.95
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 927.0664 17462.5 8548.911 0.2684665 1.668587 0.33125 0.04030084
pronosticoMA <- forecast(arimaMA, level = c(95),h=51)
pronosticoMA
## Point Forecast Lo 95 Hi 95
## 2020 3132873 3098211 3167535
## 2021 3185590 3124468 3246713
## 2022 3238308 3148333 3328282
## 2023 3291025 3169486 3412564
## 2024 3343742 3187997 3499487
## 2025 3396459 3203996 3588922
## 2026 3449176 3217617 3680736
## 2027 3501894 3228980 3774807
## 2028 3554611 3238193 3871028
## 2029 3607328 3245352 3969304
## 2030 3660045 3250543 4069548
## 2031 3712762 3253840 4171685
## 2032 3765480 3255312 4275647
## 2033 3818197 3255020 4381373
## 2034 3870914 3253021 4488807
## 2035 3923631 3249365 4597897
## 2036 3976348 3244099 4708597
## 2037 4029066 3237267 4820865
## 2038 4081783 3228907 4934659
## 2039 4134500 3219057 5049943
## 2040 4187217 3207752 5166682
## 2041 4239934 3195024 5284845
## 2042 4292652 3180902 5404401
## 2043 4345369 3165415 5525323
## 2044 4398086 3148590 5647582
## 2045 4450803 3130452 5771154
## 2046 4503520 3111025 5896016
## 2047 4556238 3090332 6022144
## 2048 4608955 3068393 6149517
## 2049 4661672 3045229 6278115
## 2050 4714389 3020860 6407918
## 2051 4767106 2995304 6538909
## 2052 4819824 2968579 6671068
## 2053 4872541 2940702 6804380
## 2054 4925258 2911689 6938828
## 2055 4977975 2881555 7074396
## 2056 5030692 2850316 7211069
## 2057 5083410 2817985 7348834
## 2058 5136127 2784578 7487676
## 2059 5188844 2750106 7627582
## 2060 5241561 2714583 7768540
## 2061 5294278 2678021 7910536
## 2062 5346996 2640432 8053559
## 2063 5399713 2601828 8197598
## 2064 5452430 2562219 8342641
## 2065 5505147 2521617 8488677
## 2066 5557864 2480032 8635697
## 2067 5610582 2437475 8783689
## 2068 5663299 2393954 8932644
## 2069 5716016 2349480 9082552
## 2070 5768733 2304061 9233405
plot(pronosticoMA,main="Poblacion NV")

selected_states <- c("MA", "NY", "CA", "TX", "NV")
forecasted_population <- data.frame()
for (state in selected_states) {
df <- datos %>% filter(Estado == state)
df2 <- df %>% select(Año, Poblacion)
ts1 <- ts(data = df2$Poblacion, start = min(df2$Año), frequency = 1)
arima_model <- auto.arima(ts1)
forecast_result <- forecast(arima_model, level = c(95), h = 51)
forecast_data <- data.frame(
Estado = state,
Year = seq(2023, 2073),
Forecasted_Population = forecast_result$mean
)
forecasted_population <- bind_rows(forecasted_population, forecast_data)
}
average_population <- forecasted_population %>%
mutate(Decade = as.numeric(cut(Year, breaks = seq(2020, 2070, by = 10), labels = FALSE))) %>%
group_by(Estado, Decade) %>%
summarise(Avg_Population = mean(Forecasted_Population, na.rm = TRUE))
## `summarise()` has grouped output by 'Estado'. You can override using the
## `.groups` argument.
print(average_population)
## # A tibble: 30 × 3
## # Groups: Estado [5]
## Estado Decade Avg_Population
## <chr> <dbl> <dbl>
## 1 CA 1 39740080.
## 2 CA 2 40195796.
## 3 CA 3 40702146.
## 4 CA 4 41208496.
## 5 CA 5 41714846.
## 6 CA NA 42043973
## 7 MA 1 7039088.
## 8 MA 2 7347866.
## 9 MA 3 7690952.
## 10 MA 4 8034038.
## # ℹ 20 more rows
decade_datasets <- split(average_population, f = average_population$Decade)
for (i in seq_along(decade_datasets)) {
assign(paste0("decade_", i), decade_datasets[[i]])
}
decade_datasets_list <- lapply(seq_along(decade_datasets), function(i) {
return(decade_datasets[[i]])
})
abbreviation_to_fullname <- c("MA" = "Massachusetts", "NV" = "Nevada", "NY" = "New York", "CA" = "California", "TX" = "Texas")
decade_1$Estado <- abbreviation_to_fullname[decade_1$Estado]
Mapa decada 1
map(database = "state")
color_vector <- heat.colors(unique(decade_1$Avg_Population))
grupo1 <- decade_1$Estado
for (i in seq_along(grupo1)) {
state <- grupo1[i]
population <- unique(decade_1$Avg_Population[decade_1$Estado == state])
color <- color_vector[findInterval(population, seq(min(decade_1$Avg_Population), max(decade_1$Avg_Population), length.out = length(color_vector) + 1))]
map(database = "state", regions = state, col = color, fill = TRUE, add = TRUE)
}

abbreviation_to_fullname_2 <- c("MA" = "Massachusetts", "NV" = "Nevada", "NY" = "New York", "CA" = "California", "TX" = "Texas")
decade_2$Estado <- abbreviation_to_fullname_2[decade_2$Estado]
Mapa decada 2
map(database = "state")
color_vector <- heat.colors(length(unique(decade_2$Avg_Population)))
grupo2 <- unique(decade_2$Estado)
for (i in seq_along(grupo1)) {
state <- grupo2[i]
population <- max(decade_2$Avg_Population[decade_2$Estado == state])
color <- color_vector[findInterval(population, seq(min(decade_2$Avg_Population), max(decade_2$Avg_Population), length.out = length(color_vector) + 1))]
map(database = "state", regions = state, col = color, fill = TRUE, add = TRUE)
}

abbreviation_to_fullname_3 <- c("MA" = "Massachusetts", "NV" = "Nevada", "NY" = "New York", "CA" = "California", "TX" = "Texas")
decade_3$Estado <- abbreviation_to_fullname_3[decade_3$Estado]
Mapa decada 3
map(database = "state")
color_vector <- heat.colors(length(unique(decade_3$Avg_Population)))
grupo3 <- unique(decade_3$Estado)
for (i in seq_along(grupo3)) {
state <- grupo3[i]
population <- max(decade_3$Avg_Population[decade_3$Estado == state])
color <- color_vector[findInterval(population, seq(min(decade_3$Avg_Population), max(decade_3$Avg_Population), length.out = length(color_vector) + 1))]
map(database = "state", regions = state, col = color, fill = TRUE, add = TRUE)
}

abbreviation_to_fullname_4 <- c("MA" = "Massachusetts", "NV" = "Nevada", "NY" = "New York", "CA" = "California", "TX" = "Texas")
decade_4$Estado <- abbreviation_to_fullname_4[decade_4$Estado]
Mapa decada 4
map(database = "state")
color_vector <- heat.colors(length(unique(decade_4$Avg_Population)))
grupo4 <- unique(decade_4$Estado)
for (i in seq_along(grupo4)) {
state <- grupo4[i]
population <- max(decade_4$Avg_Population[decade_4$Estado == state])
color <- color_vector[findInterval(population, seq(min(decade_4$Avg_Population), max(decade_4$Avg_Population), length.out = length(color_vector) + 1))]
map(database = "state", regions = state, col = color, fill = TRUE, add = TRUE)
}

abbreviation_to_fullname_5 <- c("MA" = "Massachusetts", "NV" = "Nevada", "NY" = "New York", "CA" = "California", "TX" = "Texas")
decade_5$Estado <- abbreviation_to_fullname_5[decade_5$Estado]
Mapa decada 5
map(database = "state")
color_vector <- heat.colors(length(unique(decade_5$Avg_Population)))
grupo5 <- unique(decade_4$Estado)
for (i in seq_along(grupo5)) {
state <- grupo4[i]
population <- unique(decade_4$Avg_Population[decade_5$Estado == state])
color <- color_vector[findInterval(population, seq(min(decade_5$Avg_Population), max(decade_5$Avg_Population), length.out = length(color_vector) + 1))]
map(database = "state", regions = state, col = color, fill = TRUE, add = TRUE)
}

