colnames(ICRISAT) <- c("dist_code", "year", "state_code", "state_name", "district_name",
"rice_area_1000ha", "rice_production_1000tons", "rice_yield_kg_per_ha",
"wheat_area_1000ha", "wheat_production_1000tons", "wheat_yield_kg_per_ha",
"kharif_sorghum_area_1000ha", "kharif_sorghum_production_1000tons", "kharif_sorghum_yield_kg_per_ha",
"rabi_sorghum_area_1000ha", "rabi_sorghum_production_1000tons", "rabi_sorghum_yield_kg_per_ha",
"sorghum_area_1000ha", "sorghum_production_1000tons", "sorghum_yield_kg_per_ha",
"pearl_millet_area_1000ha", "pearl_millet_production_1000tons", "pearl_millet_yield_kg_per_ha",
"maize_area_1000ha", "maize_production_1000tons", "maize_yield_kg_per_ha",
"finger_millet_area_1000ha", "finger_millet_production_1000tons", "finger_millet_yield_kg_per_ha",
"barley_area_1000ha", "barley_production_1000tons", "barley_yield_kg_per_ha",
"chickpea_area_1000ha", "chickpea_production_1000tons", "chickpea_yield_kg_per_ha",
"pigeonpea_area_1000ha", "pigeonpea_production_1000tons", "pigeonpea_yield_kg_per_ha",
"minor_pulses_area_1000ha", "minor_pulses_production_1000tons", "minor_pulses_yield_kg_per_ha",
"groundnut_area_1000ha", "groundnut_production_1000tons", "groundnut_yield_kg_per_ha",
"sesamum_area_1000ha", "sesamum_production_1000tons", "sesamum_yield_kg_per_ha",
"rapeseed_mustard_area_1000ha", "rapeseed_mustard_production_1000tons", "rapeseed_mustard_yield_kg_per_ha",
"safflower_area_1000ha", "safflower_production_1000tons", "safflower_yield_kg_per_ha",
"castor_area_1000ha", "castor_production_1000tons", "castor_yield_kg_per_ha",
"linseed_area_1000ha", "linseed_production_1000tons", "linseed_yield_kg_per_ha",
"sunflower_area_1000ha", "sunflower_production_1000tons", "sunflower_yield_kg_per_ha",
"soyabean_area_1000ha", "soyabean_production_1000tons", "soyabean_yield_kg_per_ha",
"oilseeds_area_1000ha", "oilseeds_production_1000tons", "oilseeds_yield_kg_per_ha",
"sugarcane_area_1000ha", "sugarcane_production_1000tons", "sugarcane_yield_kg_per_ha",
"cotton_area_1000ha", "cotton_production_1000tons", "cotton_yield_kg_per_ha",
"fruits_area_1000ha", "vegetables_area_1000ha", "fruits_vegetables_area_1000ha",
"potatoes_area_1000ha", "onion_area_1000ha", "fodder_area_1000ha")
colnames(ICRISAT)## [1] "dist_code"
## [2] "year"
## [3] "state_code"
## [4] "state_name"
## [5] "district_name"
## [6] "rice_area_1000ha"
## [7] "rice_production_1000tons"
## [8] "rice_yield_kg_per_ha"
## [9] "wheat_area_1000ha"
## [10] "wheat_production_1000tons"
## [11] "wheat_yield_kg_per_ha"
## [12] "kharif_sorghum_area_1000ha"
## [13] "kharif_sorghum_production_1000tons"
## [14] "kharif_sorghum_yield_kg_per_ha"
## [15] "rabi_sorghum_area_1000ha"
## [16] "rabi_sorghum_production_1000tons"
## [17] "rabi_sorghum_yield_kg_per_ha"
## [18] "sorghum_area_1000ha"
## [19] "sorghum_production_1000tons"
## [20] "sorghum_yield_kg_per_ha"
## [21] "pearl_millet_area_1000ha"
## [22] "pearl_millet_production_1000tons"
## [23] "pearl_millet_yield_kg_per_ha"
## [24] "maize_area_1000ha"
## [25] "maize_production_1000tons"
## [26] "maize_yield_kg_per_ha"
## [27] "finger_millet_area_1000ha"
## [28] "finger_millet_production_1000tons"
## [29] "finger_millet_yield_kg_per_ha"
## [30] "barley_area_1000ha"
## [31] "barley_production_1000tons"
## [32] "barley_yield_kg_per_ha"
## [33] "chickpea_area_1000ha"
## [34] "chickpea_production_1000tons"
## [35] "chickpea_yield_kg_per_ha"
## [36] "pigeonpea_area_1000ha"
## [37] "pigeonpea_production_1000tons"
## [38] "pigeonpea_yield_kg_per_ha"
## [39] "minor_pulses_area_1000ha"
## [40] "minor_pulses_production_1000tons"
## [41] "minor_pulses_yield_kg_per_ha"
## [42] "groundnut_area_1000ha"
## [43] "groundnut_production_1000tons"
## [44] "groundnut_yield_kg_per_ha"
## [45] "sesamum_area_1000ha"
## [46] "sesamum_production_1000tons"
## [47] "sesamum_yield_kg_per_ha"
## [48] "rapeseed_mustard_area_1000ha"
## [49] "rapeseed_mustard_production_1000tons"
## [50] "rapeseed_mustard_yield_kg_per_ha"
## [51] "safflower_area_1000ha"
## [52] "safflower_production_1000tons"
## [53] "safflower_yield_kg_per_ha"
## [54] "castor_area_1000ha"
## [55] "castor_production_1000tons"
## [56] "castor_yield_kg_per_ha"
## [57] "linseed_area_1000ha"
## [58] "linseed_production_1000tons"
## [59] "linseed_yield_kg_per_ha"
## [60] "sunflower_area_1000ha"
## [61] "sunflower_production_1000tons"
## [62] "sunflower_yield_kg_per_ha"
## [63] "soyabean_area_1000ha"
## [64] "soyabean_production_1000tons"
## [65] "soyabean_yield_kg_per_ha"
## [66] "oilseeds_area_1000ha"
## [67] "oilseeds_production_1000tons"
## [68] "oilseeds_yield_kg_per_ha"
## [69] "sugarcane_area_1000ha"
## [70] "sugarcane_production_1000tons"
## [71] "sugarcane_yield_kg_per_ha"
## [72] "cotton_area_1000ha"
## [73] "cotton_production_1000tons"
## [74] "cotton_yield_kg_per_ha"
## [75] "fruits_area_1000ha"
## [76] "vegetables_area_1000ha"
## [77] "fruits_vegetables_area_1000ha"
## [78] "potatoes_area_1000ha"
## [79] "onion_area_1000ha"
## [80] "fodder_area_1000ha"
## spc_tbl_ [16,146 × 80] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ dist_code : num [1:16146] 1 1 1 1 1 1 1 1 1 1 ...
## $ year : num [1:16146] 1966 1967 1968 1969 1970 ...
## $ state_code : num [1:16146] 14 14 14 14 14 14 14 14 14 14 ...
## $ state_name : chr [1:16146] "Chhattisgarh" "Chhattisgarh" "Chhattisgarh" "Chhattisgarh" ...
## $ district_name : chr [1:16146] "Durg" "Durg" "Durg" "Durg" ...
## $ rice_area_1000ha : num [1:16146] 548 547 556 563 572 ...
## $ rice_production_1000tons : num [1:16146] 185 409 468 401 474 ...
## $ rice_yield_kg_per_ha : num [1:16146] 338 748 841 711 829 ...
## $ wheat_area_1000ha : num [1:16146] 44 50 53.7 49.4 44.2 44.4 39.6 37.3 36.5 49.2 ...
## $ wheat_production_1000tons : num [1:16146] 20 26 30 26.5 29 25.8 20.6 18.6 22.4 27.8 ...
## $ wheat_yield_kg_per_ha : num [1:16146] 455 520 559 536 656 ...
## $ kharif_sorghum_area_1000ha : num [1:16146] 0.6 1.1 0.5 0.8 0.9 0.3 0.3 0.2 0.5 0.2 ...
## $ kharif_sorghum_production_1000tons : num [1:16146] 0.4 0.9 0.4 0.6 0.6 0.2 0.3 0.2 0.4 0.2 ...
## $ kharif_sorghum_yield_kg_per_ha : num [1:16146] 667 818 800 750 667 ...
## $ rabi_sorghum_area_1000ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ rabi_sorghum_production_1000tons : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ rabi_sorghum_yield_kg_per_ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ sorghum_area_1000ha : num [1:16146] 0.6 1.1 0.5 0.8 0.9 0.3 0.3 0.2 0.5 0.2 ...
## $ sorghum_production_1000tons : num [1:16146] 0.4 0.9 0.4 0.6 0.6 0.2 0.3 0.2 0.4 0.2 ...
## $ sorghum_yield_kg_per_ha : num [1:16146] 667 818 800 750 667 ...
## $ pearl_millet_area_1000ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ pearl_millet_production_1000tons : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ pearl_millet_yield_kg_per_ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ maize_area_1000ha : num [1:16146] 3 3 2.8 2.7 2.5 2.7 2.8 2.9 2.9 2.9 ...
## $ maize_production_1000tons : num [1:16146] 2 3 2 2.3 3.3 3.1 3.2 2.7 2.9 2.9 ...
## $ maize_yield_kg_per_ha : num [1:16146] 667 1000 714 852 1320 ...
## $ finger_millet_area_1000ha : num [1:16146] 0.8 0.9 0.8 0.8 0.8 0.9 0.8 0.8 0.8 0.8 ...
## $ finger_millet_production_1000tons : num [1:16146] 0.2 0.2 0.2 0.2 0.2 0.2 0.1 0.2 0.3 0.2 ...
## $ finger_millet_yield_kg_per_ha : num [1:16146] 250 222 250 250 250 ...
## $ barley_area_1000ha : num [1:16146] 0.1 0.2 0.2 0.2 0.1 0.2 0.1 0.1 0.2 0.2 ...
## $ barley_production_1000tons : num [1:16146] 0 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.2 ...
## $ barley_yield_kg_per_ha : num [1:16146] 0 500 500 500 1000 500 1000 1000 500 1000 ...
## $ chickpea_area_1000ha : num [1:16146] 54 52 51.3 52.6 53.3 50.3 52.3 56.4 54.9 58.3 ...
## $ chickpea_production_1000tons : num [1:16146] 27 15 23 23.1 32.6 32.8 32.6 32.4 41 30.2 ...
## $ chickpea_yield_kg_per_ha : num [1:16146] 500 288 448 439 612 ...
## $ pigeonpea_area_1000ha : num [1:16146] 37 36 35.7 35.7 33.3 33.9 32.8 32.2 32.5 34.2 ...
## $ pigeonpea_production_1000tons : num [1:16146] 15 26 28 27.3 35.2 34.6 42.4 22.8 37.6 26.7 ...
## $ pigeonpea_yield_kg_per_ha : num [1:16146] 405 722 784 765 1057 ...
## $ minor_pulses_area_1000ha : num [1:16146] 115 270 289 298 343 ...
## $ minor_pulses_production_1000tons : num [1:16146] -1 -1 -1 98.3 142.9 ...
## $ minor_pulses_yield_kg_per_ha : num [1:16146] -1 -1 -1 330 417 ...
## $ groundnut_area_1000ha : num [1:16146] 0.2 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0 0 ...
## $ groundnut_production_1000tons : num [1:16146] 0.1 0.1 0.1 0.1 0.1 0 0 0 0 0 ...
## $ groundnut_yield_kg_per_ha : num [1:16146] 500 1000 1000 1000 1000 0 0 0 0 0 ...
## $ sesamum_area_1000ha : num [1:16146] 3 11.6 11.3 11.4 10.8 10.8 10.4 9.6 9.9 8.6 ...
## $ sesamum_production_1000tons : num [1:16146] 0.3 0.9 1.6 1.4 1.1 1 1.9 1.3 1.4 0.9 ...
## $ sesamum_yield_kg_per_ha : num [1:16146] 100 77.6 141.6 122.8 101.8 ...
## $ rapeseed_mustard_area_1000ha : num [1:16146] 1 1.1 1.2 1.2 1.1 1.1 1.2 1.2 1.4 1.6 ...
## $ rapeseed_mustard_production_1000tons: num [1:16146] 0.2 0.3 0.5 0.4 0.4 0.4 0.7 0.7 0.8 0.9 ...
## $ rapeseed_mustard_yield_kg_per_ha : num [1:16146] 200 273 417 333 364 ...
## $ safflower_area_1000ha : num [1:16146] 0.07 0.01 0.02 0.02 0.02 0.07 0 0 0 0.1 ...
## $ safflower_production_1000tons : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ safflower_yield_kg_per_ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ castor_area_1000ha : num [1:16146] 0.7 0.5 0.6 0.5 0.4 0.3 0.3 0.2 0.2 0.2 ...
## $ castor_production_1000tons : num [1:16146] 0.2 0.1 0.3 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ castor_yield_kg_per_ha : num [1:16146] 286 200 500 200 250 ...
## $ linseed_area_1000ha : num [1:16146] 45.7 100.1 113.3 101.1 114.1 ...
## $ linseed_production_1000tons : num [1:16146] 6.8 25.8 23.8 13 22.7 18.5 22.7 28 21.4 28.7 ...
## $ linseed_yield_kg_per_ha : num [1:16146] 149 258 210 129 199 ...
## $ sunflower_area_1000ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ sunflower_production_1000tons : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ sunflower_yield_kg_per_ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ soyabean_area_1000ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ soyabean_production_1000tons : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ soyabean_yield_kg_per_ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ oilseeds_area_1000ha : num [1:16146] 50.7 113.5 126.6 114.1 126.4 ...
## $ oilseeds_production_1000tons : num [1:16146] -1 -1 -1 14.9 24.3 20.2 25.4 30.3 23.9 30.8 ...
## $ oilseeds_yield_kg_per_ha : num [1:16146] -1 -1 -1 131 192 ...
## $ sugarcane_area_1000ha : num [1:16146] 0.9 0.8 1 1 0.7 0.5 0.5 0.2 0.8 0.8 ...
## $ sugarcane_production_1000tons : num [1:16146] 1.6 1.2 1 1.9 1.4 1 1 1.2 1.5 1.4 ...
## $ sugarcane_yield_kg_per_ha : num [1:16146] 1778 1500 1000 1900 2000 ...
## $ cotton_area_1000ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ cotton_production_1000tons : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ cotton_yield_kg_per_ha : num [1:16146] 0 0 0 0 0 0 0 0 0 0 ...
## $ fruits_area_1000ha : num [1:16146] 5.95 5.77 5.41 5.52 5.45 5.42 5.48 5.3 5.21 5.11 ...
## $ vegetables_area_1000ha : num [1:16146] 6.64 7.24 7.4 7.16 7.19 7.48 7.53 7.6 7.44 7.86 ...
## $ fruits_vegetables_area_1000ha : num [1:16146] 12.6 13 12.8 12.7 12.6 ...
## $ potatoes_area_1000ha : num [1:16146] 0.01 0.01 0.1 0.01 0.02 0.01 0.01 0.01 0.03 0.05 ...
## $ onion_area_1000ha : num [1:16146] 0.6 0.56 0.58 0.56 0.52 0.54 0.55 0.53 0.45 0.52 ...
## $ fodder_area_1000ha : num [1:16146] 0.47 1.23 1.02 0.84 0.42 0.38 0.26 0.14 0.06 0.08 ...
## - attr(*, "spec")=
## .. cols(
## .. `Dist Code` = col_double(),
## .. Year = col_double(),
## .. `State Code` = col_double(),
## .. `State Name` = col_character(),
## .. `Dist Name` = col_character(),
## .. `RICE AREA (1000 ha)` = col_double(),
## .. `RICE PRODUCTION (1000 tons)` = col_double(),
## .. `RICE YIELD (Kg per ha)` = col_double(),
## .. `WHEAT AREA (1000 ha)` = col_double(),
## .. `WHEAT PRODUCTION (1000 tons)` = col_double(),
## .. `WHEAT YIELD (Kg per ha)` = col_double(),
## .. `KHARIF SORGHUM AREA (1000 ha)` = col_double(),
## .. `KHARIF SORGHUM PRODUCTION (1000 tons)` = col_double(),
## .. `KHARIF SORGHUM YIELD (Kg per ha)` = col_double(),
## .. `RABI SORGHUM AREA (1000 ha)` = col_double(),
## .. `RABI SORGHUM PRODUCTION (1000 tons)` = col_double(),
## .. `RABI SORGHUM YIELD (Kg per ha)` = col_double(),
## .. `SORGHUM AREA (1000 ha)` = col_double(),
## .. `SORGHUM PRODUCTION (1000 tons)` = col_double(),
## .. `SORGHUM YIELD (Kg per ha)` = col_double(),
## .. `PEARL MILLET AREA (1000 ha)` = col_double(),
## .. `PEARL MILLET PRODUCTION (1000 tons)` = col_double(),
## .. `PEARL MILLET YIELD (Kg per ha)` = col_double(),
## .. `MAIZE AREA (1000 ha)` = col_double(),
## .. `MAIZE PRODUCTION (1000 tons)` = col_double(),
## .. `MAIZE YIELD (Kg per ha)` = col_double(),
## .. `FINGER MILLET AREA (1000 ha)` = col_double(),
## .. `FINGER MILLET PRODUCTION (1000 tons)` = col_double(),
## .. `FINGER MILLET YIELD (Kg per ha)` = col_double(),
## .. `BARLEY AREA (1000 ha)` = col_double(),
## .. `BARLEY PRODUCTION (1000 tons)` = col_double(),
## .. `BARLEY YIELD (Kg per ha)` = col_double(),
## .. `CHICKPEA AREA (1000 ha)` = col_double(),
## .. `CHICKPEA PRODUCTION (1000 tons)` = col_double(),
## .. `CHICKPEA YIELD (Kg per ha)` = col_double(),
## .. `PIGEONPEA AREA (1000 ha)` = col_double(),
## .. `PIGEONPEA PRODUCTION (1000 tons)` = col_double(),
## .. `PIGEONPEA YIELD (Kg per ha)` = col_double(),
## .. `MINOR PULSES AREA (1000 ha)` = col_double(),
## .. `MINOR PULSES PRODUCTION (1000 tons)` = col_double(),
## .. `MINOR PULSES YIELD (Kg per ha)` = col_double(),
## .. `GROUNDNUT AREA (1000 ha)` = col_double(),
## .. `GROUNDNUT PRODUCTION (1000 tons)` = col_double(),
## .. `GROUNDNUT YIELD (Kg per ha)` = col_double(),
## .. `SESAMUM AREA (1000 ha)` = col_double(),
## .. `SESAMUM PRODUCTION (1000 tons)` = col_double(),
## .. `SESAMUM YIELD (Kg per ha)` = col_double(),
## .. `RAPESEED AND MUSTARD AREA (1000 ha)` = col_double(),
## .. `RAPESEED AND MUSTARD PRODUCTION (1000 tons)` = col_double(),
## .. `RAPESEED AND MUSTARD YIELD (Kg per ha)` = col_double(),
## .. `SAFFLOWER AREA (1000 ha)` = col_double(),
## .. `SAFFLOWER PRODUCTION (1000 tons)` = col_double(),
## .. `SAFFLOWER YIELD (Kg per ha)` = col_double(),
## .. `CASTOR AREA (1000 ha)` = col_double(),
## .. `CASTOR PRODUCTION (1000 tons)` = col_double(),
## .. `CASTOR YIELD (Kg per ha)` = col_double(),
## .. `LINSEED AREA (1000 ha)` = col_double(),
## .. `LINSEED PRODUCTION (1000 tons)` = col_double(),
## .. `LINSEED YIELD (Kg per ha)` = col_double(),
## .. `SUNFLOWER AREA (1000 ha)` = col_double(),
## .. `SUNFLOWER PRODUCTION (1000 tons)` = col_double(),
## .. `SUNFLOWER YIELD (Kg per ha)` = col_double(),
## .. `SOYABEAN AREA (1000 ha)` = col_double(),
## .. `SOYABEAN PRODUCTION (1000 tons)` = col_double(),
## .. `SOYABEAN YIELD (Kg per ha)` = col_double(),
## .. `OILSEEDS AREA (1000 ha)` = col_double(),
## .. `OILSEEDS PRODUCTION (1000 tons)` = col_double(),
## .. `OILSEEDS YIELD (Kg per ha)` = col_double(),
## .. `SUGARCANE AREA (1000 ha)` = col_double(),
## .. `SUGARCANE PRODUCTION (1000 tons)` = col_double(),
## .. `SUGARCANE YIELD (Kg per ha)` = col_double(),
## .. `COTTON AREA (1000 ha)` = col_double(),
## .. `COTTON PRODUCTION (1000 tons)` = col_double(),
## .. `COTTON YIELD (Kg per ha)` = col_double(),
## .. `FRUITS AREA (1000 ha)` = col_double(),
## .. `VEGETABLES AREA (1000 ha)` = col_double(),
## .. `FRUITS AND VEGETABLES AREA (1000 ha)` = col_double(),
## .. `POTATOES AREA (1000 ha)` = col_double(),
## .. `ONION AREA (1000 ha)` = col_double(),
## .. `FODDER AREA (1000 ha)` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
Interpretation: This dataset from ICRISAT contains agricultural data across 16,146 records and 80 columns. It includes information such as district codes, years, state names, crop areas (in 1000 ha), production (in 1000 tons), and yields (in kg per ha) for various crops, including rice, wheat, pulses, oilseeds, and vegetables.
## dist_code year
## 0 0
## state_code state_name
## 0 0
## district_name rice_area_1000ha
## 0 0
## rice_production_1000tons rice_yield_kg_per_ha
## 0 0
## wheat_area_1000ha wheat_production_1000tons
## 0 0
## wheat_yield_kg_per_ha kharif_sorghum_area_1000ha
## 0 0
## kharif_sorghum_production_1000tons kharif_sorghum_yield_kg_per_ha
## 0 0
## rabi_sorghum_area_1000ha rabi_sorghum_production_1000tons
## 0 0
## rabi_sorghum_yield_kg_per_ha sorghum_area_1000ha
## 0 0
## sorghum_production_1000tons sorghum_yield_kg_per_ha
## 0 0
## pearl_millet_area_1000ha pearl_millet_production_1000tons
## 0 0
## pearl_millet_yield_kg_per_ha maize_area_1000ha
## 0 0
## maize_production_1000tons maize_yield_kg_per_ha
## 0 0
## finger_millet_area_1000ha finger_millet_production_1000tons
## 0 0
## finger_millet_yield_kg_per_ha barley_area_1000ha
## 0 0
## barley_production_1000tons barley_yield_kg_per_ha
## 0 0
## chickpea_area_1000ha chickpea_production_1000tons
## 0 0
## chickpea_yield_kg_per_ha pigeonpea_area_1000ha
## 0 0
## pigeonpea_production_1000tons pigeonpea_yield_kg_per_ha
## 0 0
## minor_pulses_area_1000ha minor_pulses_production_1000tons
## 0 0
## minor_pulses_yield_kg_per_ha groundnut_area_1000ha
## 0 0
## groundnut_production_1000tons groundnut_yield_kg_per_ha
## 0 0
## sesamum_area_1000ha sesamum_production_1000tons
## 0 0
## sesamum_yield_kg_per_ha rapeseed_mustard_area_1000ha
## 0 0
## rapeseed_mustard_production_1000tons rapeseed_mustard_yield_kg_per_ha
## 0 0
## safflower_area_1000ha safflower_production_1000tons
## 0 0
## safflower_yield_kg_per_ha castor_area_1000ha
## 0 0
## castor_production_1000tons castor_yield_kg_per_ha
## 0 0
## linseed_area_1000ha linseed_production_1000tons
## 0 0
## linseed_yield_kg_per_ha sunflower_area_1000ha
## 0 0
## sunflower_production_1000tons sunflower_yield_kg_per_ha
## 0 0
## soyabean_area_1000ha soyabean_production_1000tons
## 0 0
## soyabean_yield_kg_per_ha oilseeds_area_1000ha
## 0 0
## oilseeds_production_1000tons oilseeds_yield_kg_per_ha
## 0 0
## sugarcane_area_1000ha sugarcane_production_1000tons
## 0 0
## sugarcane_yield_kg_per_ha cotton_area_1000ha
## 0 0
## cotton_production_1000tons cotton_yield_kg_per_ha
## 0 0
## fruits_area_1000ha vegetables_area_1000ha
## 0 0
## fruits_vegetables_area_1000ha potatoes_area_1000ha
## 0 0
## onion_area_1000ha fodder_area_1000ha
## 0 0
Interpretation: The dataset has no missing values, ensuring complete data for analysis. This eliminates the need for imputation and allows seamless filtering, grouping, and feature engineering. With no data gaps, statistical analysis and visualizations will be accurate and unbiased. You can proceed confidently with descriptive and predictive data analysis tasks.
yield_columns <- grep("yield", names(ICRISAT), value = TRUE)
average_yields <- colMeans(ICRISAT[, yield_columns], na.rm = TRUE)
average_yields## rice_yield_kg_per_ha wheat_yield_kg_per_ha
## 1486.92478 1492.41986
## kharif_sorghum_yield_kg_per_ha rabi_sorghum_yield_kg_per_ha
## 586.03107 225.63576
## sorghum_yield_kg_per_ha pearl_millet_yield_kg_per_ha
## 586.09356 517.91709
## maize_yield_kg_per_ha finger_millet_yield_kg_per_ha
## 1408.76322 354.84912
## barley_yield_kg_per_ha chickpea_yield_kg_per_ha
## 734.24637 630.81819
## pigeonpea_yield_kg_per_ha minor_pulses_yield_kg_per_ha
## 618.02017 453.63263
## groundnut_yield_kg_per_ha sesamum_yield_kg_per_ha
## 765.94765 264.72901
## rapeseed_mustard_yield_kg_per_ha safflower_yield_kg_per_ha
## 497.95363 73.04832
## castor_yield_kg_per_ha linseed_yield_kg_per_ha
## 215.40571 175.54807
## sunflower_yield_kg_per_ha soyabean_yield_kg_per_ha
## 278.44970 242.96065
## oilseeds_yield_kg_per_ha sugarcane_yield_kg_per_ha
## 593.65962 4500.15306
## cotton_yield_kg_per_ha
## 124.64482
Interpretation: The dataset shows sugarcane has the highest yield (4500.15 kg/ha), while safflower has the lowest (73.05 kg/ha). Cereals like rice (1486.92 kg/ha) and wheat (1492.42 kg/ha) yield more than pulses and oilseeds. Rabi sorghum (225.64 kg/ha) yields lower than Kharif sorghum (586.03 kg/ha), highlighting seasonal differences.
# Create a new column Total_Production by adding production of all crops.
# aggregate() sums up total production district-wise.
# order() sorts districts from highest to lowest, and we pick the top 10.
ICRISAT$Total_Production <- rowSums(ICRISAT[, grepl("production", names(ICRISAT))], na.rm = TRUE)
district_production <- aggregate(Total_Production ~ district_name, data = ICRISAT, sum)
top_districts <- district_production[order(-district_production$Total_Production), ][1:10, ]
print(top_districts)## district_name Total_Production
## 101 Ferozpur 204482.8
## 120 Hissar 172299.0
## 154 Karnal 155121.1
## 255 Sangrur 135274.1
## 42 Bhatinda 125960.4
## 221 Patiala 110206.3
## 191 Meerut 109794.8
## 193 Midnapur 107328.6
## 16 Amritsar 104968.6
## 102 Ganganagar 102189.8
Interpretation: The top 10 districts with the highest total crop production include Ferozpur (204,482.8), Hissar (172,299.0), and Karnal (155,121.1) as the leading contributors. Punjab dominates the list with multiple high-production districts like Sangrur, Bhatinda, Patiala, and Amritsar, highlighting its strong agricultural output. Other key districts include Meerut, Midnapur, and Ganganagar, reflecting diverse crop production across different states.
# Sort the dataset by rice_production_1000tons in descending order.
# Pick the top 5 rows with highest rice production.
top_rice_production <- ICRISAT[order(-ICRISAT$rice_production_1000tons),
c("state_name", "district_name", "year", "rice_production_1000tons")][1:5, ]
print(top_rice_production)## # A tibble: 5 × 4
## state_name district_name year rice_production_1000tons
## <chr> <chr> <dbl> <dbl>
## 1 West Bengal Midnapur 2015 3215.
## 2 West Bengal Midnapur 2014 3153.
## 3 West Bengal Midnapur 2017 3002.
## 4 West Bengal Midnapur 2010 2947.
## 5 West Bengal Midnapur 2012 2940.
Interpretation: The top 5 highest rice production records all belong to Midnapur district in West Bengal, indicating it is a major rice-producing region. The highest recorded production was 3,215.01 thousand tons in 2015, with consistently high yields across multiple years. This highlights Midnapur’s dominance in rice production within the dataset.
# Group data by state_name and sum minor pulses area.
# Sort from highest to lowest to get top 10 states.
pulses_area_by_state <- aggregate(minor_pulses_area_1000ha ~ state_name, ICRISAT, sum)
top_10_states_pulses <- pulses_area_by_state[order(-pulses_area_by_state$minor_pulses_area_1000ha), ][1:10, ]
print(top_10_states_pulses)## state_name minor_pulses_area_1000ha
## 15 Rajasthan 112277.68
## 12 Maharashtra 85307.95
## 13 Orissa 78237.72
## 11 Madhya Pradesh 73514.80
## 18 Uttar Pradesh 64293.63
## 9 Karnataka 49346.49
## 4 Chhattisgarh 36450.44
## 3 Bihar 32472.02
## 1 Andhra Pradesh 31438.27
## 16 Tamil Nadu 27775.98
Interpretation: Rajasthan has the largest cultivated area for pulses at 112,277.68 (1000 ha), followed by Maharashtra (85,307.95), Odisha (78,237.72), and Madhya Pradesh (73,514.80). Uttar Pradesh ranks fifth (64,293.63). Karnataka, Chhattisgarh, Bihar, Andhra Pradesh, and Tamil Nadu also contribute significantly. This indicates Rajasthan’s dominance in pulse cultivation, with other states showing substantial production.
# Create a Month column based on the year.
# Group by Month and count number of records.
# Find the month with most records (i.e., busiest sowing time).
ICRISAT$Month <- format(as.Date(paste(ICRISAT$year, "01", "01", sep = "-")), "%m")
busiest_month <- ICRISAT %>%
group_by(Month) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
head(1)
print(busiest_month)## # A tibble: 1 × 2
## Month Count
## <chr> <int>
## 1 01 16146
Interpretation: The busiest month for crop sowing is January (Month 01), with 1,038 recorded instances. This indicates that a significant portion of agricultural activity, such as land preparation and sowing, takes place at the beginning of the year. Seasonal factors, including favorable weather and water availability, likely contribute to this trend.
# Calculate average rice yield for each state-year.
ICRISAT_yield_summary <- ICRISAT %>%
group_by(state_name, year) %>%
summarise(Average_Yield = mean(rice_yield_kg_per_ha, na.rm = TRUE)) %>%
arrange(state_name, year)
# Add Yield Drop column
ICRISAT_yield_summary$Yield_Drop <- c(NA, diff(ICRISAT_yield_summary$Average_Yield))
# Count how many times each state had a yield drop and find the one with most failures.
crop_failure_state <- ICRISAT_yield_summary %>%
filter(Yield_Drop < 0) %>%
group_by(state_name) %>%
summarise(Failure_Count = n()) %>%
arrange(desc(Failure_Count)) %>%
head(1)
print(crop_failure_state)## # A tibble: 1 × 2
## state_name Failure_Count
## <chr> <int>
## 1 Maharashtra 28
Interpretation: Maharashtra experienced the highest number of crop failures, with 28 instances of yield decline over the recorded years. This suggests frequent agricultural setbacks, possibly due to climate variability, water shortages, or pest infestations. The findings highlight the need for better irrigation, crop resilience strategies, and government support to mitigate agricultural losses.
# Group data by district, calculate average rice yield, and sort from highest to lowest.
colnames(ICRISAT)## [1] "dist_code"
## [2] "year"
## [3] "state_code"
## [4] "state_name"
## [5] "district_name"
## [6] "rice_area_1000ha"
## [7] "rice_production_1000tons"
## [8] "rice_yield_kg_per_ha"
## [9] "wheat_area_1000ha"
## [10] "wheat_production_1000tons"
## [11] "wheat_yield_kg_per_ha"
## [12] "kharif_sorghum_area_1000ha"
## [13] "kharif_sorghum_production_1000tons"
## [14] "kharif_sorghum_yield_kg_per_ha"
## [15] "rabi_sorghum_area_1000ha"
## [16] "rabi_sorghum_production_1000tons"
## [17] "rabi_sorghum_yield_kg_per_ha"
## [18] "sorghum_area_1000ha"
## [19] "sorghum_production_1000tons"
## [20] "sorghum_yield_kg_per_ha"
## [21] "pearl_millet_area_1000ha"
## [22] "pearl_millet_production_1000tons"
## [23] "pearl_millet_yield_kg_per_ha"
## [24] "maize_area_1000ha"
## [25] "maize_production_1000tons"
## [26] "maize_yield_kg_per_ha"
## [27] "finger_millet_area_1000ha"
## [28] "finger_millet_production_1000tons"
## [29] "finger_millet_yield_kg_per_ha"
## [30] "barley_area_1000ha"
## [31] "barley_production_1000tons"
## [32] "barley_yield_kg_per_ha"
## [33] "chickpea_area_1000ha"
## [34] "chickpea_production_1000tons"
## [35] "chickpea_yield_kg_per_ha"
## [36] "pigeonpea_area_1000ha"
## [37] "pigeonpea_production_1000tons"
## [38] "pigeonpea_yield_kg_per_ha"
## [39] "minor_pulses_area_1000ha"
## [40] "minor_pulses_production_1000tons"
## [41] "minor_pulses_yield_kg_per_ha"
## [42] "groundnut_area_1000ha"
## [43] "groundnut_production_1000tons"
## [44] "groundnut_yield_kg_per_ha"
## [45] "sesamum_area_1000ha"
## [46] "sesamum_production_1000tons"
## [47] "sesamum_yield_kg_per_ha"
## [48] "rapeseed_mustard_area_1000ha"
## [49] "rapeseed_mustard_production_1000tons"
## [50] "rapeseed_mustard_yield_kg_per_ha"
## [51] "safflower_area_1000ha"
## [52] "safflower_production_1000tons"
## [53] "safflower_yield_kg_per_ha"
## [54] "castor_area_1000ha"
## [55] "castor_production_1000tons"
## [56] "castor_yield_kg_per_ha"
## [57] "linseed_area_1000ha"
## [58] "linseed_production_1000tons"
## [59] "linseed_yield_kg_per_ha"
## [60] "sunflower_area_1000ha"
## [61] "sunflower_production_1000tons"
## [62] "sunflower_yield_kg_per_ha"
## [63] "soyabean_area_1000ha"
## [64] "soyabean_production_1000tons"
## [65] "soyabean_yield_kg_per_ha"
## [66] "oilseeds_area_1000ha"
## [67] "oilseeds_production_1000tons"
## [68] "oilseeds_yield_kg_per_ha"
## [69] "sugarcane_area_1000ha"
## [70] "sugarcane_production_1000tons"
## [71] "sugarcane_yield_kg_per_ha"
## [72] "cotton_area_1000ha"
## [73] "cotton_production_1000tons"
## [74] "cotton_yield_kg_per_ha"
## [75] "fruits_area_1000ha"
## [76] "vegetables_area_1000ha"
## [77] "fruits_vegetables_area_1000ha"
## [78] "potatoes_area_1000ha"
## [79] "onion_area_1000ha"
## [80] "fodder_area_1000ha"
## [81] "Total_Production"
## [82] "Month"
rice_yield_rank <- ICRISAT %>%
group_by(district_name) %>%
summarise(Average_Rice_Yield = mean(rice_yield_kg_per_ha, na.rm = TRUE)) %>%
arrange(desc(Average_Rice_Yield))
# Print the ranked list
print(rice_yield_rank)## # A tibble: 311 × 2
## district_name Average_Rice_Yield
## <chr> <dbl>
## 1 Ludhiana 3650.
## 2 Sangrur 3533.
## 3 Thirunelveli 3388.
## 4 Bhatinda 3363.
## 5 Madurai 3277.
## 6 Kanyakumari 3241.
## 7 Ferozpur 3234.
## 8 Patiala 3222.
## 9 Jalandhar 3187.
## 10 Salem 3103.
## # ℹ 301 more rows
Interpretation: Ludhiana ranks highest in rice yield (3650 Kg/ha), followed by Sangrur and Thirunelveli. Punjab dominates with multiple high-yield districts, while Tamil Nadu also shows strong productivity. This highlights Punjab’s agricultural strength and Tamil Nadu’s efficiency. Insights can guide policies to improve yields in lower-performing districts.
# Sum maize production for each district.
# Sort from highest to lowest and pick top 5 districts.
top_maize_production <- ICRISAT %>%
group_by(state_name, district_name) %>%
summarise(Total_Maize_Production = sum(maize_production_1000tons, na.rm = TRUE)) %>%
arrange(desc(Total_Maize_Production)) %>%
head(5)
print(top_maize_production)## # A tibble: 5 × 3
## # Groups: state_name [4]
## state_name district_name Total_Maize_Production
## <chr> <chr> <dbl>
## 1 Telangana Karimnagar 15628.
## 2 Karnataka Chitradurga 14105.
## 3 Rajasthan Udaipur 13292.
## 4 Bihar Mungair 12616.
## 5 Karnataka Belgaum 12327.
Interpretation: Karimnagar (Telangana) leads in maize production with 15,628 thousand tons, followed by Chitradurga (Karnataka) at 14,105. Udaipur (Rajasthan), Mungair (Bihar), and Belgaum (Karnataka) also rank high. Karnataka appears twice, highlighting its strong maize cultivation. These districts contribute significantly to India’s maize output.
# Sum wheat production for each year.
# Sort to find the year with highest total production.
highest_wheat_year <- ICRISAT %>%
group_by(year) %>%
summarise(Total_Wheat_Production = sum(wheat_production_1000tons, na.rm = TRUE)) %>%
arrange(desc(Total_Wheat_Production)) %>%
head(1)
print(highest_wheat_year)## # A tibble: 1 × 2
## year Total_Wheat_Production
## <dbl> <dbl>
## 1 2016 112963.
Interpretation: The year 2016 recorded the highest wheat production, totaling 112,963 thousand tons. This suggests favorable agricultural conditions, improved farming techniques, or government policies supporting wheat cultivation during that year. It highlights 2016 as a peak year for wheat production in the dataset.
# Add production of Rice + Wheat + Maize for each row to create Total Cereal Production.
ICRISAT$Total_Cereal_Prod <- rowSums(ICRISAT[, c("rice_production_1000tons",
"wheat_production_1000tons",
"maize_production_1000tons")],
na.rm = TRUE)
# Then sum year-wise to analyze cereal production growth over years.
yearly_cereal_prod <- aggregate(Total_Cereal_Prod ~ year, data = ICRISAT, sum, na.rm = TRUE)
print(yearly_cereal_prod)## year Total_Cereal_Prod
## 1 1966 44954.58
## 2 1967 57150.07
## 3 1968 56562.78
## 4 1969 59850.01
## 5 1970 69127.88
## 6 1971 70218.89
## 7 1972 66637.01
## 8 1973 68160.45
## 9 1974 65860.75
## 10 1975 79810.26
## 11 1976 73582.46
## 12 1977 85933.56
## 13 1978 90613.48
## 14 1979 76682.55
## 15 1980 94703.57
## 16 1981 95932.59
## 17 1982 93599.50
## 18 1983 110096.90
## 19 1984 108056.94
## 20 1985 115284.77
## 21 1986 108756.01
## 22 1987 105658.25
## 23 1988 130323.05
## 24 1989 134700.15
## 25 1990 134381.74
## 26 1991 134645.82
## 27 1992 136320.09
## 28 1993 145503.25
## 29 1994 153130.74
## 30 1995 142510.03
## 31 1996 156588.48
## 32 1997 155965.18
## 33 1998 162776.68
## 34 1999 173505.32
## 35 2000 164759.51
## 36 2001 176081.22
## 37 2002 145805.63
## 38 2003 171237.55
## 39 2004 163090.05
## 40 2005 170248.47
## 41 2006 181915.78
## 42 2007 192037.90
## 43 2008 195046.56
## 44 2009 185118.80
## 45 2010 209859.43
## 46 2011 224350.94
## 47 2012 224326.58
## 48 2013 238360.60
## 49 2014 222151.01
## 50 2015 214763.63
## 51 2016 257759.58
## 52 2017 257462.85
Interpretation: The total cereal production has steadily increased from 44,954.58 in 1966 to 257,462.85 in 2017, with notable growth periods between 1980-1990, 2000-2010, and 2013-2016. Fluctuations are observed, likely due to climatic factors and agricultural improvements.
# Divide production by area to find productivity ratio for kharif sorghum crop.
ICRISAT$Productivity_Ratio_Sorghum <- ICRISAT$kharif_sorghum_production_1000tons / ICRISAT$kharif_sorghum_area_1000ha
print(ICRISAT[, c("year", "Productivity_Ratio_Sorghum")])## # A tibble: 16,146 × 2
## year Productivity_Ratio_Sorghum
## <dbl> <dbl>
## 1 1966 0.667
## 2 1967 0.818
## 3 1968 0.8
## 4 1969 0.75
## 5 1970 0.667
## 6 1971 0.667
## 7 1972 1
## 8 1973 1
## 9 1974 0.8
## 10 1975 1
## # ℹ 16,136 more rows
Interpretation: The code calculates the productivity ratio of Kharif Sorghum by dividing its production by the area under cultivation for each year. The output shows the ratio for each year, indicating fluctuations in sorghum productivity over time.
# Define a low yield threshold (500 kg/ha)..
low_yield_threshold_pearl_millet <- 500
# Create a new column to identify low yield years.
ICRISAT$Low_Yield_Pearl_Millet <- ICRISAT$pearl_millet_yield_kg_per_ha < low_yield_threshold_pearl_millet
low_yield_percentage_pearl_millet <- sum(ICRISAT$Low_Yield_Pearl_Millet) / nrow(ICRISAT) * 100
# Calculate percentage of records that have low yields.
message("Percentage of low yield years for Pearl Millet: ", round(low_yield_percentage_pearl_millet, 2), "%")Interpretation: The result indicates that 55.03% of the years in the dataset for Pearl Millet had a yield below the threshold of 500 Kg per ha. This means that more than half of the years observed had Pearl Millet yields considered “low” based on the defined threshold.
# Aggregating data by state
state_prod <- aggregate(cbind(rice_production_1000tons, wheat_production_1000tons, maize_production_1000tons) ~ state_name, data = ICRISAT, sum)
# Convert data to long format using tidyr::pivot_longer
state_prod_long <- state_prod %>%
pivot_longer(cols = c(rice_production_1000tons, wheat_production_1000tons, maize_production_1000tons),
names_to = "crop_type",
values_to = "production")
# Grouped Bar Chart
ggplot(state_prod_long, aes(x = state_name, y = production, fill = factor(crop_type))) +
geom_col(position = "dodge") +
ggtitle("Grouped Bar Chart: Total Crop Production by State") +
xlab("State") +
ylab("Production (1000 tons)") +
labs(fill = "Crop Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Interpretation :- The chart compares maize, rice, and wheat
production across Indian states, highlighting Uttar Pradesh’s dominance
in wheat, West Bengal and Punjab in rice, and Karnataka and Andhra
Pradesh in maize.
ggplot(ICRISAT, aes(x = barley_yield_kg_per_ha)) +
geom_histogram(bins = 30, fill = 'orange', color = 'black') +
theme_minimal() +
labs(title = 'Distribution of Barley Yield across Districts',
x = 'Barley Yield (kg/ha)',
y = 'Frequency')
Interpretation :- Most districts have low barley yields,
resulting in a tall bar on the left. The distribution is right-skewed,
indicating a few districts with very high yields.
# Select a state (e.g., "Chhattisgarh")
state_data <- subset(ICRISAT, state_name == 'Chhattisgarh')
# Summarize area data for cereals (rice, wheat, maize)
state_cereal_area <- state_data[, c('rice_area_1000ha', 'wheat_area_1000ha', 'maize_area_1000ha')]
state_cereal_area_total <- colSums(state_cereal_area)
# Pie chart
pie(state_cereal_area_total, labels = names(state_cereal_area_total), col = rainbow(length(state_cereal_area_total)),
main = 'Proportion of Cereal Crop Area in Chhattisgarh')
Interpretation :- The red segment dominates the chart,
indicating that rice occupies the largest area, while crops like maize
and wheat have relatively smaller shares.
# Subset for the pair plot
pair_data <- ICRISAT[, c('rice_area_1000ha', 'rice_production_1000tons', 'rice_yield_kg_per_ha',
'wheat_area_1000ha', 'wheat_production_1000tons', 'wheat_yield_kg_per_ha')]
# Pair plot
ggpairs(pair_data)
Interpretation :- The pair plot shows a strong positive
correlation between area and production for both rice (0.833) and wheat
(0.911). Yield has a moderate correlation with production for rice
(0.530) and wheat (0.687), but weak correlation with area.
# Select relevant columns for rice
rice_data <- ICRISAT[, c('state_name', 'rice_yield_kg_per_ha')]
# Gather the data into long format (though there's only one variable here, so no need to reshape much)
rice_data_long <- gather(rice_data, key = 'crop_type', value = 'yield', rice_yield_kg_per_ha)
# Boxplot
ggplot(rice_data_long, aes(x = state_name, y = yield, fill = crop_type)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = 'Rice Yield Variability across States', x = 'State', y = 'Rice Yield (kg/ha)')
Interpretation :- Rice yield varies widely across states, with
Punjab, Haryana, and West Bengal showing higher medians. Several states
like Madhya Pradesh have lower yields and many outliers, indicating
significant yield fluctuations within states.
# Select relevant data
wheat_data <- ICRISAT[, c('state_name', 'wheat_yield_kg_per_ha')]
# Boxplot for wheat yield across states
ggplot(wheat_data, aes(x = state_name, y = wheat_yield_kg_per_ha, fill = state_name)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = 'Wheat Yield (kg/ha) Across States',
x = 'State',
y = 'Wheat Yield (kg/ha)')
Interpretation :- Based on the boxplot, Punjab and Haryana
exhibit higher median wheat yields compared to other states, while
states like Bihar and Tamil Nadu tend to have lower median yields and
greater variability.States like Uttrakhand & MP have most
outliers.
# Filter the dataset for years between 1966 and 2017
ICRISAT_filtered <- ICRISAT %>%
filter(year >= 1966 & year <= 2017)
# Sum the production for all crops for each year (you can modify for different crops if needed)
ICRISAT_filtered$total_production <- ICRISAT_filtered$rice_production_1000tons +
ICRISAT_filtered$wheat_production_1000tons +
ICRISAT_filtered$barley_production_1000tons +
ICRISAT_filtered$sorghum_production_1000tons +
ICRISAT_filtered$pearl_millet_production_1000tons +
ICRISAT_filtered$maize_production_1000tons +
ICRISAT_filtered$finger_millet_production_1000tons +
ICRISAT_filtered$chickpea_production_1000tons +
ICRISAT_filtered$pigeonpea_production_1000tons +
ICRISAT_filtered$groundnut_production_1000tons # Add other crops if necessary
# Summarize the total production by year
production_by_year <- ICRISAT_filtered %>%
group_by(year) %>%
summarize(total_production = sum(total_production))
# Plot the total production for each year from 1996 to 2017
ggplot(production_by_year, aes(x = year, y = total_production)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "red", size = 2) + # Adds points to highlight each year
labs(title = "Total Crop Production from 1996 to 2017",
x = "Year",
y = "Total Production (1000 tons)") +
theme_minimal()
Interpretation :- Yes, the line chart illustrates a strong
positive growth trajectory in total crop production between 1966 and
2017.
anova_wheat_yield <- aov(wheat_yield_kg_per_ha ~ state_name, data = wheat_data)
summary(anova_wheat_yield)## Df Sum Sq Mean Sq F value Pr(>F)
## state_name 19 1.074e+10 565029057 1119 <2e-16 ***
## Residuals 16126 8.140e+09 504761
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interpretation :- With a very small p-value (< 0.05), reject the null hypothesis. There is a significant difference in wheat yield between states.
anova_maize_yield_year <- aov(maize_yield_kg_per_ha ~ factor(year), data = ICRISAT)
summary(anova_maize_yield_year)## Df Sum Sq Mean Sq F value Pr(>F)
## factor(year) 51 4.084e+09 80080912 68.39 <2e-16 ***
## Residuals 16094 1.884e+10 1170869
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interpretation :- With a very small p-value (< 0.05), reject the null hypothesis. There is a significant difference in maize yield across years.
simple_regression_maize <- lm(maize_production_1000tons ~ maize_area_1000ha, data = ICRISAT)
# Summary of the model
summary(simple_regression_maize)##
## Call:
## lm(formula = maize_production_1000tons ~ maize_area_1000ha, data = ICRISAT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -303.13 -6.08 1.19 1.86 1188.31
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.85684 0.44409 -4.181 2.91e-05 ***
## maize_area_1000ha 1.88400 0.01104 170.598 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.86 on 16144 degrees of freedom
## Multiple R-squared: 0.6432, Adjusted R-squared: 0.6432
## F-statistic: 2.91e+04 on 1 and 16144 DF, p-value: < 2.2e-16
# Plot the regression line
ggplot(ICRISAT, aes(x = maize_area_1000ha, y = maize_production_1000tons)) +
geom_point(color = 'blue') +
geom_smooth(method = 'lm', color = 'red') +
labs(title = 'Simple Linear Regression: Maize Production vs. Area',
x = 'Maize Area (1000 ha)', y = 'Maize Production (1000 tons)') +
theme_minimal()
Interpretation: The plot shows a positive
linear relationship between maize area and maize production. l
As the Maize Area (x-axis) increases, the Maize Production
(y-axis) also increases — points trend upward. The red
line (regression line) slopes upward, showing a positive
relationship.
multiple_regression_maize <- lm(maize_yield_kg_per_ha ~ maize_area_1000ha + maize_production_1000tons + state_name, data = ICRISAT)
# Summary of the model
summary(multiple_regression_maize)##
## Call:
## lm(formula = maize_yield_kg_per_ha ~ maize_area_1000ha + maize_production_1000tons +
## state_name, data = ICRISAT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6330.7 -387.8 -44.3 344.2 20296.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2771.1755 36.7213 75.47 <2e-16 ***
## maize_area_1000ha -14.2577 0.3522 -40.48 <2e-16 ***
## maize_production_1000tons 10.4973 0.1483 70.77 <2e-16 ***
## state_nameAssam -1954.4906 52.9989 -36.88 <2e-16 ***
## state_nameBihar -1291.2931 52.7464 -24.48 <2e-16 ***
## state_nameChhattisgarh -1491.2628 61.5959 -24.21 <2e-16 ***
## state_nameGujarat -1719.7050 46.7365 -36.80 <2e-16 ***
## state_nameHaryana -1786.3246 58.6336 -30.47 <2e-16 ***
## state_nameHimachal Pradesh -1005.4017 53.1183 -18.93 <2e-16 ***
## state_nameJharkhand -1569.1894 62.5631 -25.08 <2e-16 ***
## state_nameKarnataka -587.1615 45.9422 -12.78 <2e-16 ***
## state_nameKerala -2726.8697 53.0948 -51.36 <2e-16 ***
## state_nameMadhya Pradesh -1573.6552 41.9425 -37.52 <2e-16 ***
## state_nameMaharashtra -1588.4516 43.6504 -36.39 <2e-16 ***
## state_nameOrissa -1570.9338 49.7540 -31.57 <2e-16 ***
## state_namePunjab -849.3102 51.8112 -16.39 <2e-16 ***
## state_nameRajasthan -1850.4769 44.6140 -41.48 <2e-16 ***
## state_nameTamil Nadu -1201.1451 50.4540 -23.81 <2e-16 ***
## state_nameTelangana -1063.9191 54.8296 -19.40 <2e-16 ***
## state_nameUttar Pradesh -1605.4503 41.2592 -38.91 <2e-16 ***
## state_nameUttarakhand -1768.1483 56.3370 -31.39 <2e-16 ***
## state_nameWest Bengal -1257.5091 48.1816 -26.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 871.1 on 16124 degrees of freedom
## Multiple R-squared: 0.4664, Adjusted R-squared: 0.4657
## F-statistic: 671 on 21 and 16124 DF, p-value: < 2.2e-16
# Plot the model results for Punjab.
state_data <- subset(ICRISAT, state_name == 'Punjab')
# Plot the regression line
ggplot(state_data, aes(x = maize_area_1000ha, y = maize_yield_kg_per_ha)) +
geom_point(color = 'green') +
geom_smooth(method = 'lm', color = 'purple') +
labs(title = 'Multiple Linear Regression: Maize Yield vs. Area (Punjab)',
x = 'Maize Area (1000 ha)', y = 'Maize Yield (kg/ha)') +
theme_minimal()
Interpretation: The plot shows a negative
linear relationship between maize area and maize yield.
As the Maize Area (x-axis) increases, the Maize Yield (y-axis)
slightly decreases — points trend downward. The purple
line (regression line) slopes downward, showing a negative
relationship.
# Select Area, Production, Yield columns for rice, wheat, maize
cereals_data <- ICRISAT[, c("rice_area_1000ha", "rice_production_1000tons", "rice_yield_kg_per_ha",
"wheat_area_1000ha", "wheat_production_1000tons", "wheat_yield_kg_per_ha",
"maize_area_1000ha", "maize_production_1000tons", "maize_yield_kg_per_ha")]
# Calculate correlation matrix
cor_cereals <- cor(cereals_data, use = "complete.obs", method = "pearson")
# Visualize
corrplot(cor_cereals, method = "color", addCoef.col = "black", number.cex = 0.7)
Interpretation: There is a strong positive correlation
between area and production for all three crops (rice, wheat, and
maize). Increasing the area under cultivation
significantly boosts total production for each crop.
Cross-crop relationships (e.g., rice vs wheat, wheat vs maize)
show weak correlations. This suggests that production
trends across crops are independent and not strongly linked to each
other.**