Loading functions

rm(list = ls())
library(readxl) # reading excel files
library(tidyverse) # data wrangling and visualization
library(mice) # package for data imputation
library(purrr)

library(corrplot) # for correlation plot
# for manual edit in visualization
library(ggrepel) 
library(scales) 
library(ggpubr) 
#library(gganimate) # for manual edut in visualization
#library(plotly) # for interactive plot
library(viridis)

# working with map
library(sf) 
library(raster)
library(rgdal)
library(spdep)

library(plm) # panel regression
library(splm) # spatial panel regression
library(Benchmarking) # performance measurement package

Import data

setwd("C:/Users/MSI GF62 OEM/Desktop/R programing/data")
excel_sheets("rice_data.xlsx")
## [1] "main"   "second"
main <- read_excel("rice_data.xlsx", sheet = "main")
second <- read_excel("rice_data.xlsx", sheet = "second")

Data structure and summary

Structure

Most of the columns are numeric variable except name of province and region.

  • paddy - paddy rice produced

  • land - planted area in rai

  • land_h - harvested area in rai (land >= land_h)

  • hh - number of farmer household in the province

  • seed - amount of seed used in ton

  • fertilizer - amount of chemical fertilizer used in ton

  • seed_l - amount of seed used per planted area

  • fer_l - amount of fertilizer used per planted area

  • fer_f - amount of fertilizer used per planted area counting only area that report to use fertilizer

  • yield_l - yield or land productivity to the planted area

  • yield_h - yield or land productivity to the harvested area

str(main)
## tibble [3,306 x 16] (S3: tbl_df/tbl/data.frame)
##  $ year      : num [1:3306] 2524 2525 2526 2527 2528 ...
##  $ province  : chr [1:3306] "กรุงเทพมหานคร" "กรุงเทพมหานคร" "กรุงเทพมหานคร" "กรุงเทพมหานคร" ...
##  $ province_e: chr [1:3306] "Bangkok Metropolis" "Bangkok Metropolis" "Bangkok Metropolis" "Bangkok Metropolis" ...
##  $ ID        : num [1:3306] 10 10 10 10 10 10 10 10 10 10 ...
##  $ region    : chr [1:3306] "Central" "Central" "Central" "Central" ...
##  $ paddy     : num [1:3306] 108560 100614 44529 72884 95051 ...
##  $ land      : num [1:3306] 279075 298217 209057 220860 267750 ...
##  $ land_h    : num [1:3306] 275539 291257 204164 220474 258300 ...
##  $ hh        : num [1:3306] NA NA NA NA NA ...
##  $ seed      : num [1:3306] NA NA NA NA NA ...
##  $ fertilizer: num [1:3306] NA NA NA NA 7338 ...
##  $ seed_l    : num [1:3306] NA NA NA NA NA ...
##  $ fer_l     : num [1:3306] NA NA NA NA 27.4 ...
##  $ fer_f     : num [1:3306] NA NA NA NA 33.6 ...
##  $ yield_l   : num [1:3306] 389 337 213 330 355 318 266 300 406 143 ...
##  $ yield_h   : num [1:3306] 394 345 218 331 368 318 281 329 406 446 ...
str(second)
## tibble [2,682 x 16] (S3: tbl_df/tbl/data.frame)
##  $ year      : num [1:2682] 2531 2532 2533 2534 2535 ...
##  $ province  : chr [1:2682] "กรุงเทพมหานคร" "กรุงเทพมหานคร" "กรุงเทพมหานคร" "กรุงเทพมหานคร" ...
##  $ province_e: chr [1:2682] "Bangkok Metropolis" "Bangkok Metropolis" "Bangkok Metropolis" "Bangkok Metropolis" ...
##  $ ID        : num [1:2682] 10 10 10 10 10 10 10 10 10 10 ...
##  $ region    : chr [1:2682] "Central" "Central" "Central" "Central" ...
##  $ paddy     : num [1:2682] 26985 40637 26447 35650 24552 ...
##  $ land      : num [1:2682] 50803 71797 66283 55878 46965 ...
##  $ land_h    : num [1:2682] 48974 71797 64256 55721 46965 ...
##  $ hh        : num [1:2682] NA NA NA NA NA ...
##  $ seed      : num [1:2682] NA NA NA 1303 NA ...
##  $ fertilizer: num [1:2682] NA NA NA 2581 2346 ...
##  $ seed_l    : num [1:2682] NA NA NA 23.3 NA ...
##  $ fer_l     : num [1:2682] NA NA NA 46.2 50 ...
##  $ fer_f     : num [1:2682] NA NA NA 46.2 52.6 ...
##  $ yield_l   : num [1:2682] 531 566 399 638 523 653 566 727 707 781 ...
##  $ yield_h   : num [1:2682] 551 566 412 640 523 659 586 728 707 781 ...

Summary by season

  • Production size is relatively bigger in the main rice season.
summary(main)
##       year        province          province_e              ID        
##  Min.   :2524   Length:3306        Length:3306        Min.   : 10.00  
##  1st Qu.:2534   Class :character   Class :character   1st Qu.: 32.00  
##  Median :2544   Mode  :character   Mode  :character   Median : 53.00  
##  Mean   :2544                                         Mean   : 54.55  
##  3rd Qu.:2554                                         3rd Qu.: 76.00  
##  Max.   :2564                                         Max.   :104.00  
##                                                                       
##     region              paddy               land              land_h        
##  Length:3306        Min.   :      31   Min.   :      63   Min.   :      62  
##  Class :character   1st Qu.:   65818   1st Qu.:  157424   1st Qu.:  151811  
##  Mode  :character   Median :  214564   Median :  479547   Median :  454170  
##                     Mean   :  787322   Mean   : 2187786   Mean   : 2042262  
##                     3rd Qu.:  490203   3rd Qu.: 1245492   3rd Qu.: 1165458  
##                     Max.   :27233903   Max.   :65303711   Max.   :60261293  
##                     NA's   :14         NA's   :14         NA's   :14        
##        hh               seed            fertilizer          seed_l     
##  Min.   :      7   Min.   :       0   Min.   :      0   Min.   : 3.05  
##  1st Qu.:  14723   1st Qu.:    1988   1st Qu.:   2828   1st Qu.: 8.64  
##  Median :  37106   Median :    8593   Median :  13354   Median :13.84  
##  Mean   : 140193   Mean   :  297201   Mean   :  53364   Mean   :16.12  
##  3rd Qu.:  79324   3rd Qu.:   21190   3rd Qu.:  31075   3rd Qu.:24.71  
##  Max.   :4548731   Max.   :45026267   Max.   :1955635   Max.   :40.58  
##  NA's   :720       NA's   :721        NA's   :407       NA's   :721    
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 52.0   Min.   : 52.0  
##  1st Qu.:16.89   1st Qu.:21.25   1st Qu.:305.0   1st Qu.:322.0  
##  Median :24.51   Median :27.96   Median :369.0   Median :387.0  
##  Mean   :25.96   Mean   :29.77   Mean   :405.6   Mean   :426.3  
##  3rd Qu.:33.98   3rd Qu.:36.63   3rd Qu.:500.2   3rd Qu.:528.0  
##  Max.   :70.85   Max.   :75.64   Max.   :844.0   Max.   :907.0  
##  NA's   :407     NA's   :407     NA's   :14      NA's   :14
summary(second)
##       year        province          province_e              ID        
##  Min.   :2531   Length:2682        Length:2682        Min.   : 10.00  
##  1st Qu.:2540   Class :character   Class :character   1st Qu.: 32.00  
##  Median :2548   Mode  :character   Mode  :character   Median : 52.00  
##  Mean   :2548                                         Mean   : 54.15  
##  3rd Qu.:2556                                         3rd Qu.: 76.00  
##  Max.   :2564                                         Max.   :104.00  
##                                                                       
##     region              paddy               land              land_h        
##  Length:2682        Min.   :       4   Min.   :      12   Min.   :      12  
##  Class :character   1st Qu.:    3730   1st Qu.:    7788   1st Qu.:    7630  
##  Mode  :character   Median :   25594   Median :   47631   Median :   46951  
##                     Mean   :  235767   Mean   :  360409   Mean   :  356090  
##                     3rd Qu.:  129977   3rd Qu.:  211171   3rd Qu.:  210020  
##                     Max.   :12235347   Max.   :18101239   Max.   :17976574  
##                     NA's   :165        NA's   :165        NA's   :165       
##        hh              seed            fertilizer           seed_l     
##  Min.   :     4   Min.   :     0.0   Min.   :     0.0   Min.   : 0.00  
##  1st Qu.:   951   1st Qu.:   143.1   1st Qu.:   298.2   1st Qu.:18.28  
##  Median :  4259   Median :  1298.0   Median :  2245.0   Median :26.39  
##  Mean   : 18608   Mean   : 11344.1   Mean   : 19138.8   Mean   :24.01  
##  3rd Qu.: 11524   3rd Qu.:  6648.8   3rd Qu.: 11915.0   3rd Qu.:29.76  
##  Max.   :804786   Max.   :532966.0   Max.   :891056.0   Max.   :39.61  
##  NA's   :589      NA's   :512        NA's   :444        NA's   :512    
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 60.0   Min.   :207.0  
##  1st Qu.:37.23   1st Qu.:38.95   1st Qu.:457.0   1st Qu.:469.0  
##  Median :45.78   Median :46.67   Median :562.0   Median :571.0  
##  Mean   :44.64   Mean   :45.93   Mean   :557.5   Mean   :566.3  
##  3rd Qu.:51.58   3rd Qu.:52.59   3rd Qu.:668.0   3rd Qu.:675.0  
##  Max.   :94.09   Max.   :94.09   Max.   :873.0   Max.   :873.0  
##  NA's   :446     NA's   :446     NA's   :165     NA's   :165

Summary by season & region

  • Main rice is mainly cultivate in the northeastern region while the second rice is mainly cultivate in the central and northern of Thailand.
main %>%
  split(.$region) %>%
  map(summary)
## $Central
##       year        province          province_e              ID       
##  Min.   :2524   Length:1053        Length:1053        Min.   :10.00  
##  1st Qu.:2534   Class :character   Class :character   1st Qu.:16.00  
##  Median :2544   Mode  :character   Mode  :character   Median :22.00  
##  Mean   :2544                                         Mean   :35.53  
##  3rd Qu.:2554                                         3rd Qu.:71.00  
##  Max.   :2564                                         Max.   :77.00  
##                                                                      
##     region              paddy             land             land_h       
##  Length:1053        Min.   :   835   Min.   :   1269   Min.   :   1187  
##  Class :character   1st Qu.: 36262   1st Qu.:  88155   1st Qu.:  85483  
##  Mode  :character   Median :167969   Median : 332736   Median : 319087  
##                     Mean   :185718   Mean   : 391727   Mean   : 370949  
##                     3rd Qu.:239303   3rd Qu.: 625415   3rd Qu.: 586968  
##                     Max.   :960958   Max.   :1637378   Max.   :1499551  
##                                                                         
##        hh             seed            fertilizer        seed_l     
##  Min.   :  101   Min.   :      33   Min.   :    0   Min.   : 7.47  
##  1st Qu.: 4187   1st Qu.:    1436   1st Qu.: 1952   1st Qu.:19.34  
##  Median :15610   Median :    8524   Median :12891   Median :25.68  
##  Mean   :16967   Mean   :  258131   Mean   :13704   Mean   :23.56  
##  3rd Qu.:26764   3rd Qu.:   12856   3rd Qu.:18376   3rd Qu.:28.02  
##  Max.   :63558   Max.   :25636290   Max.   :70897   Max.   :40.58  
##  NA's   :227     NA's   :226        NA's   :126     NA's   :226    
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 0.00   Min.   : 0.00   Min.   :110.0   Min.   :176.0  
##  1st Qu.:25.44   1st Qu.:30.45   1st Qu.:355.0   1st Qu.:371.0  
##  Median :37.53   Median :40.23   Median :474.0   Median :494.0  
##  Mean   :35.45   Mean   :39.18   Mean   :489.9   Mean   :510.5  
##  3rd Qu.:46.26   3rd Qu.:47.88   3rd Qu.:640.0   3rd Qu.:660.0  
##  Max.   :70.85   Max.   :75.64   Max.   :844.0   Max.   :907.0  
##  NA's   :126     NA's   :126                                    
## 
## $Northeastern
##       year        province          province_e              ID       
##  Min.   :2524   Length:777         Length:777         Min.   :30.00  
##  1st Qu.:2535   Class :character   Class :character   1st Qu.:34.00  
##  Median :2545   Mode  :character   Mode  :character   Median :40.00  
##  Mean   :2545                                         Mean   :39.57  
##  3rd Qu.:2555                                         3rd Qu.:45.00  
##  Max.   :2564                                         Max.   :49.00  
##                                                                      
##     region              paddy              land             land_h       
##  Length:777         Min.   :  47585   Min.   : 228429   Min.   : 178753  
##  Class :character   1st Qu.: 267622   1st Qu.:1011736   1st Qu.: 940810  
##  Mode  :character   Median : 466679   Median :1678278   Median :1528638  
##                     Mean   : 511272   Mean   :1776901   Mean   :1648193  
##                     3rd Qu.: 707669   3rd Qu.:2588018   3rd Qu.:2367420  
##                     Max.   :1436095   Max.   :4314831   Max.   :4163693  
##                     NA's   :14        NA's   :14        NA's   :14       
##        hh              seed            fertilizer         seed_l      
##  Min.   : 27632   Min.   :    1449   Min.   :    51   Min.   : 4.240  
##  1st Qu.: 65079   1st Qu.:    7491   1st Qu.: 14096   1st Qu.: 6.957  
##  Median :113779   Median :   14244   Median : 28770   Median : 8.620  
##  Mean   :115223   Mean   :  434907   Mean   : 38105   Mean   :10.147  
##  3rd Qu.:153671   3rd Qu.:   29382   3rd Qu.: 58893   3rd Qu.:11.925  
##  Max.   :291308   Max.   :45026267   Max.   :122898   Max.   :27.110  
##  NA's   :169      NA's   :169        NA's   :101      NA's   :169     
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 0.12   Min.   :10.07   Min.   : 52.0   Min.   : 52.0  
##  1st Qu.:14.89   1st Qu.:18.50   1st Qu.:250.0   1st Qu.:270.0  
##  Median :18.97   Median :21.86   Median :292.0   Median :318.0  
##  Mean   :19.44   Mean   :22.53   Mean   :290.2   Mean   :312.9  
##  3rd Qu.:24.10   3rd Qu.:26.53   3rd Qu.:331.5   3rd Qu.:360.0  
##  Max.   :43.06   Max.   :44.41   Max.   :461.0   Max.   :543.0  
##  NA's   :101     NA's   :101     NA's   :14      NA's   :14     
## 
## $Northern
##       year        province          province_e              ID       
##  Min.   :2524   Length:697         Length:697         Min.   :50.00  
##  1st Qu.:2534   Class :character   Class :character   1st Qu.:54.00  
##  Median :2544   Mode  :character   Mode  :character   Median :58.00  
##  Mean   :2544                                         Mean   :58.47  
##  3rd Qu.:2554                                         3rd Qu.:63.00  
##  Max.   :2564                                         Max.   :67.00  
##                                                                      
##     region              paddy              land             land_h       
##  Length:697         Min.   :  29889   Min.   :  71347   Min.   :  70088  
##  Class :character   1st Qu.: 132087   1st Qu.: 271821   1st Qu.: 266161  
##  Mode  :character   Median : 272673   Median : 559670   Median : 542852  
##                     Mean   : 356779   Mean   : 774549   Mean   : 725920  
##                     3rd Qu.: 548877   3rd Qu.:1174347   3rd Qu.:1103297  
##                     Max.   :1425335   Max.   :2619800   Max.   :2456481  
##                                                                          
##        hh              seed            fertilizer         seed_l     
##  Min.   : 15019   Min.   :     658   Min.   :     5   Min.   : 6.33  
##  1st Qu.: 40194   1st Qu.:    3372   1st Qu.:  3940   1st Qu.:10.09  
##  Median : 52551   Median :    8869   Median : 10450   Median :13.05  
##  Mean   : 54103   Mean   :  366428   Mean   : 18456   Mean   :16.48  
##  3rd Qu.: 67596   3rd Qu.:   26780   3rd Qu.: 26120   3rd Qu.:24.71  
##  Max.   :123421   Max.   :40697486   Max.   :105947   Max.   :32.49  
##  NA's   :153      NA's   :153        NA's   :85       NA's   :153    
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 0.06   Min.   : 9.65   Min.   :176.0   Min.   :188.0  
##  1st Qu.:12.97   1st Qu.:19.54   1st Qu.:407.0   1st Qu.:422.0  
##  Median :20.55   Median :25.36   Median :482.0   Median :512.0  
##  Mean   :20.98   Mean   :26.09   Mean   :468.3   Mean   :491.7  
##  3rd Qu.:29.25   3rd Qu.:32.97   3rd Qu.:539.0   3rd Qu.:566.0  
##  Max.   :49.38   Max.   :53.09   Max.   :647.0   Max.   :772.0  
##  NA's   :85      NA's   :85                                     
## 
## $Southern
##       year        province          province_e              ID    
##  Min.   :2524   Length:574         Length:574         Min.   :80  
##  1st Qu.:2534   Class :character   Class :character   1st Qu.:83  
##  Median :2544   Mode  :character   Mode  :character   Median :88  
##  Mean   :2544                                         Mean   :88  
##  3rd Qu.:2554                                         3rd Qu.:93  
##  Max.   :2564                                         Max.   :96  
##                                                                   
##     region              paddy             land             land_h       
##  Length:574         Min.   :    31   Min.   :     63   Min.   :     62  
##  Class :character   1st Qu.:  5347   1st Qu.:  18842   1st Qu.:  18404  
##  Mode  :character   Median : 27284   Median :  78304   Median :  75863  
##                     Mean   : 51600   Mean   : 161334   Mean   : 151386  
##                     3rd Qu.: 66621   3rd Qu.: 202378   3rd Qu.: 193775  
##                     Max.   :354126   Max.   :1263299   Max.   :1231705  
##                                                                         
##        hh             seed            fertilizer          seed_l      
##  Min.   :    7   Min.   :       0   Min.   :    1.0   Min.   : 3.050  
##  1st Qu.: 1768   1st Qu.:     112   1st Qu.:  270.5   1st Qu.: 5.572  
##  Median :11230   Median :     457   Median : 1495.5   Median : 7.550  
##  Mean   :16575   Mean   :   74246   Mean   : 3594.8   Mean   : 9.745  
##  3rd Qu.:20628   3rd Qu.:    2104   3rd Qu.: 4145.2   3rd Qu.:12.970  
##  Max.   :81711   Max.   :11645198   Max.   :29578.0   Max.   :29.640  
##  NA's   :126     NA's   :128        NA's   :70        NA's   :128     
##      fer_l           fer_f          yield_l         yield_h   
##  Min.   : 0.12   Min.   :10.85   Min.   :168.0   Min.   :177  
##  1st Qu.:17.20   1st Qu.:21.05   1st Qu.:288.0   1st Qu.:297  
##  Median :23.89   Median :26.75   Median :331.0   Median :342  
##  Mean   :23.17   Mean   :26.54   Mean   :334.9   Mean   :348  
##  3rd Qu.:30.01   3rd Qu.:31.44   3rd Qu.:377.8   3rd Qu.:389  
##  Max.   :42.74   Max.   :44.09   Max.   :562.0   Max.   :584  
##  NA's   :70      NA's   :70
second %>%
  split(.$region) %>%
  map(summary)
## $Central
##       year        province          province_e              ID       
##  Min.   :2531   Length:858         Length:858         Min.   :10.00  
##  1st Qu.:2540   Class :character   Class :character   1st Qu.:16.00  
##  Median :2548   Mode  :character   Mode  :character   Median :22.00  
##  Mean   :2548                                         Mean   :35.71  
##  3rd Qu.:2556                                         3rd Qu.:71.00  
##  Max.   :2564                                         Max.   :77.00  
##                                                                      
##     region              paddy              land             land_h       
##  Length:858         Min.   :     18   Min.   :     35   Min.   :     35  
##  Class :character   1st Qu.:  10518   1st Qu.:  19203   1st Qu.:  18739  
##  Mode  :character   Median :  69564   Median : 111140   Median : 107590  
##                     Mean   : 125098   Mean   : 180855   Mean   : 178628  
##                     3rd Qu.: 190706   3rd Qu.: 268821   3rd Qu.: 267476  
##                     Max.   :1066345   Max.   :1410446   Max.   :1410032  
##                     NA's   :23        NA's   :23        NA's   :23       
##        hh             seed         fertilizer        seed_l     
##  Min.   :    9   Min.   :    0   Min.   :    0   Min.   : 0.00  
##  1st Qu.: 1062   1st Qu.:  493   1st Qu.:  827   1st Qu.:27.00  
##  Median : 4496   Median : 3294   Median : 6276   Median :29.21  
##  Mean   : 7272   Mean   : 5624   Mean   :10352   Mean   :28.77  
##  3rd Qu.:11008   3rd Qu.: 8258   3rd Qu.:15300   3rd Qu.:31.21  
##  Max.   :45293   Max.   :46305   Max.   :84542   Max.   :39.61  
##  NA's   :160     NA's   :135     NA's   :113     NA's   :135    
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 60.0   Min.   :207.0  
##  1st Qu.:46.19   1st Qu.:47.55   1st Qu.:572.5   1st Qu.:581.0  
##  Median :51.75   Median :52.73   Median :667.0   Median :675.0  
##  Mean   :51.76   Mean   :52.85   Mean   :637.0   Mean   :645.5  
##  3rd Qu.:57.73   3rd Qu.:58.51   3rd Qu.:720.5   3rd Qu.:724.5  
##  Max.   :94.09   Max.   :94.09   Max.   :873.0   Max.   :873.0  
##  NA's   :113     NA's   :113     NA's   :23      NA's   :23     
## 
## $Northeastern
##       year        province          province_e              ID       
##  Min.   :2531   Length:657         Length:657         Min.   :30.00  
##  1st Qu.:2540   Class :character   Class :character   1st Qu.:34.00  
##  Median :2548   Mode  :character   Mode  :character   Median :40.00  
##  Mean   :2548                                         Mean   :39.55  
##  3rd Qu.:2556                                         3rd Qu.:45.00  
##  Max.   :2564                                         Max.   :49.00  
##                                                                      
##     region              paddy             land            land_h      
##  Length:657         Min.   :    22   Min.   :    75   Min.   :    75  
##  Class :character   1st Qu.:  2432   1st Qu.:  5947   1st Qu.:  5898  
##  Mode  :character   Median : 10143   Median : 24131   Median : 22759  
##                     Mean   : 29781   Mean   : 56815   Mean   : 55894  
##                     3rd Qu.: 39672   3rd Qu.: 75601   3rd Qu.: 75005  
##                     Max.   :304030   Max.   :516963   Max.   :516302  
##                     NA's   :17       NA's   :17       NA's   :17      
##        hh               seed            fertilizer          seed_l     
##  Min.   :   11.0   Min.   :    1.00   Min.   :    1.0   Min.   : 5.19  
##  1st Qu.:  802.5   1st Qu.:   87.61   1st Qu.:  206.5   1st Qu.:15.62  
##  Median : 3303.0   Median :  519.00   Median : 1030.0   Median :21.30  
##  Mean   : 5368.3   Mean   : 1513.42   Mean   : 2720.0   Mean   :20.24  
##  3rd Qu.: 7757.5   3rd Qu.: 1950.50   3rd Qu.: 3543.0   3rd Qu.:25.62  
##  Max.   :26675.0   Max.   :13127.00   Max.   :23511.0   Max.   :31.58  
##  NA's   :119       NA's   :102        NA's   :86        NA's   :102    
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 1.67   Min.   :14.30   Min.   :213.0   Min.   :229.0  
##  1st Qu.:31.66   1st Qu.:32.99   1st Qu.:392.0   1st Qu.:407.8  
##  Median :39.73   Median :40.58   Median :461.0   Median :472.5  
##  Mean   :38.97   Mean   :40.17   Mean   :455.8   Mean   :466.1  
##  3rd Qu.:45.62   3rd Qu.:46.08   3rd Qu.:520.0   3rd Qu.:526.0  
##  Max.   :86.37   Max.   :86.97   Max.   :659.0   Max.   :666.0  
##  NA's   :86      NA's   :86      NA's   :17      NA's   :17     
## 
## $Northern
##       year        province          province_e              ID       
##  Min.   :2531   Length:563         Length:563         Min.   :50.00  
##  1st Qu.:2540   Class :character   Class :character   1st Qu.:54.00  
##  Median :2548   Mode  :character   Mode  :character   Median :58.00  
##  Mean   :2548                                         Mean   :58.47  
##  3rd Qu.:2556                                         3rd Qu.:63.00  
##  Max.   :2564                                         Max.   :67.00  
##                                                                      
##     region              paddy              land             land_h       
##  Length:563         Min.   :      8   Min.   :     20   Min.   :     20  
##  Class :character   1st Qu.:   4373   1st Qu.:   7949   1st Qu.:   7696  
##  Mode  :character   Median :  25918   Median :  42240   Median :  42007  
##                     Mean   : 126703   Mean   : 192819   Mean   : 190992  
##                     3rd Qu.: 193700   3rd Qu.: 282363   3rd Qu.: 277360  
##                     Max.   :1001796   Max.   :1436319   Max.   :1430339  
##                     NA's   :4         NA's   :4         NA's   :4        
##        hh             seed           fertilizer           seed_l     
##  Min.   :    5   Min.   :    0.2   Min.   :    0.55   Min.   : 8.00  
##  1st Qu.: 1580   1st Qu.:  163.8   1st Qu.:  366.25   1st Qu.:14.94  
##  Median : 5236   Median : 1169.6   Median : 2316.50   Median :26.14  
##  Mean   : 9717   Mean   : 6529.0   Mean   :10001.26   Mean   :23.49  
##  3rd Qu.:16300   3rd Qu.: 9626.2   3rd Qu.:16003.25   3rd Qu.:30.99  
##  Max.   :41211   Max.   :47788.0   Max.   :67286.00   Max.   :38.06  
##  NA's   :101     NA's   :83        NA's   :67         NA's   :83     
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 3.88   Min.   :12.35   Min.   :261.0   Min.   :261.0  
##  1st Qu.:37.33   1st Qu.:39.84   1st Qu.:559.5   1st Qu.:567.5  
##  Median :45.82   Median :46.73   Median :627.0   Median :634.0  
##  Mean   :44.19   Mean   :45.79   Mean   :611.9   Mean   :620.3  
##  3rd Qu.:49.41   3rd Qu.:50.47   3rd Qu.:675.5   3rd Qu.:681.5  
##  Max.   :68.97   Max.   :69.35   Max.   :799.0   Max.   :804.0  
##  NA's   :67      NA's   :67      NA's   :4       NA's   :4      
## 
## $Southern
##       year        province          province_e              ID       
##  Min.   :2531   Length:434         Length:434         Min.   :80.00  
##  1st Qu.:2541   Class :character   Class :character   1st Qu.:83.00  
##  Median :2549   Mode  :character   Mode  :character   Median :90.00  
##  Mean   :2549                                         Mean   :88.35  
##  3rd Qu.:2557                                         3rd Qu.:93.00  
##  Max.   :2564                                         Max.   :96.00  
##                                                                      
##     region              paddy             land            land_h      
##  Length:434         Min.   :     4   Min.   :    12   Min.   :    12  
##  Class :character   1st Qu.:   314   1st Qu.:   771   1st Qu.:   750  
##  Mode  :character   Median :  2500   Median :  5959   Median :  5952  
##                     Mean   : 11068   Mean   : 23045   Mean   : 22584  
##                     3rd Qu.: 13092   3rd Qu.: 30218   3rd Qu.: 30076  
##                     Max.   :104983   Max.   :208126   Max.   :206884  
##                     NA's   :121      NA's   :121      NA's   :121     
##        hh               seed            fertilizer       seed_l     
##  Min.   :    4.0   Min.   :   0.000   Min.   :   0   Min.   : 0.00  
##  1st Qu.:  112.5   1st Qu.:   7.999   1st Qu.:  21   1st Qu.:10.00  
##  Median :  704.0   Median :  76.000   Median : 153   Median :17.62  
##  Mean   : 2071.9   Mean   : 619.232   Mean   :1013   Mean   :18.08  
##  3rd Qu.: 3605.5   3rd Qu.: 708.702   3rd Qu.:1149   3rd Qu.:24.29  
##  Max.   :11675.0   Max.   :6425.000   Max.   :9401   Max.   :38.03  
##  NA's   :179       NA's   :167        NA's   :157    NA's   :167    
##      fer_l           fer_f          yield_l         yield_h     
##  Min.   : 0.00   Min.   : 0.00   Min.   :219.0   Min.   :245.0  
##  1st Qu.:28.55   1st Qu.:30.38   1st Qu.:388.0   1st Qu.:393.0  
##  Median :35.47   Median :37.08   Median :430.0   Median :436.0  
##  Mean   :36.17   Mean   :37.62   Mean   :437.6   Mean   :444.8  
##  3rd Qu.:45.30   3rd Qu.:46.06   3rd Qu.:505.0   3rd Qu.:509.0  
##  Max.   :51.95   Max.   :53.64   Max.   :609.0   Max.   :609.0  
##  NA's   :159     NA's   :159     NA's   :121     NA's   :121

NA for each rice season

main %>% filter(is.na(hh))
## # A tibble: 720 x 16
##     year province     province_e      ID region  paddy   land land_h    hh  seed
##    <dbl> <chr>        <chr>        <dbl> <chr>   <dbl>  <dbl>  <dbl> <dbl> <dbl>
##  1  2524 กรุงเทพมหานคร Bangkok Met~    10 Centr~ 108560 279075 275539    NA    NA
##  2  2525 กรุงเทพมหานคร Bangkok Met~    10 Centr~ 100614 298217 291257    NA    NA
##  3  2526 กรุงเทพมหานคร Bangkok Met~    10 Centr~  44529 209057 204164    NA    NA
##  4  2527 กรุงเทพมหานคร Bangkok Met~    10 Centr~  72884 220860 220474    NA    NA
##  5  2528 กรุงเทพมหานคร Bangkok Met~    10 Centr~  95051 267750 258300    NA    NA
##  6  2529 กรุงเทพมหานคร Bangkok Met~    10 Centr~  83854 263691 263691    NA    NA
##  7  2530 กรุงเทพมหานคร Bangkok Met~    10 Centr~  66385 249567 235956    NA    NA
##  8  2531 กรุงเทพมหานคร Bangkok Met~    10 Centr~  65204 217348 198268    NA    NA
##  9  2564 กรุงเทพมหานคร Bangkok Met~    10 Centr~  55466  80951  80385    NA    NA
## 10  2524 สมุทรปราการ   Samut Prakan    11 Centr~  80675 151930 151930    NA    NA
## # i 710 more rows
## # i 6 more variables: fertilizer <dbl>, seed_l <dbl>, fer_l <dbl>, fer_f <dbl>,
## #   yield_l <dbl>, yield_h <dbl>
main %>% group_by(year) %>% 
  summarize(n_NA = sum(is.na(hh)))
## # A tibble: 41 x 2
##     year  n_NA
##    <dbl> <int>
##  1  2524    77
##  2  2525    78
##  3  2526    78
##  4  2527    78
##  5  2528    78
##  6  2529    78
##  7  2530    78
##  8  2531    78
##  9  2532     0
## 10  2533     0
## # i 31 more rows
main %>% group_by(province) %>% 
  summarize(n_NA = sum(is.na(hh)))
## # A tibble: 82 x 2
##    province      n_NA
##    <chr>        <int>
##  1 กระบี่             9
##  2 กรุงเทพมหานคร     9
##  3 กาญจนบุรี          9
##  4 กาฬสินธุ์           9
##  5 กำแพงเพชร        9
##  6 ขอนแก่น           9
##  7 จันทบุรี            9
##  8 ฉะเชิงเทรา        9
##  9 ชลบุรี             9
## 10 ชัยนาท            9
## # i 72 more rows
second %>% filter(is.na(hh))
## # A tibble: 589 x 16
##     year province     province_e        ID region paddy  land land_h    hh  seed
##    <dbl> <chr>        <chr>          <dbl> <chr>  <dbl> <dbl>  <dbl> <dbl> <dbl>
##  1  2531 กรุงเทพมหานคร Bangkok Metro~    10 Centr~ 26985 50803  48974    NA    NA
##  2  2532 กรุงเทพมหานคร Bangkok Metro~    10 Centr~ 40637 71797  71797    NA    NA
##  3  2533 กรุงเทพมหานคร Bangkok Metro~    10 Centr~ 26447 66283  64256    NA    NA
##  4  2534 กรุงเทพมหานคร Bangkok Metro~    10 Centr~ 35650 55878  55721    NA  1303
##  5  2535 กรุงเทพมหานคร Bangkok Metro~    10 Centr~ 24552 46965  46965    NA    NA
##  6  2536 กรุงเทพมหานคร Bangkok Metro~    10 Centr~ 38287 58647  58137    NA    NA
##  7  2531 สมุทรปราการ   Samut Prakan      11 Centr~ 45452 86174  84327    NA    NA
##  8  2532 สมุทรปราการ   Samut Prakan      11 Centr~ 45305 65470  65298    NA    NA
##  9  2533 สมุทรปราการ   Samut Prakan      11 Centr~ 34441 59994  59263    NA    NA
## 10  2534 สมุทรปราการ   Samut Prakan      11 Centr~ 39758 68079  67632    NA  1880
## # i 579 more rows
## # i 6 more variables: fertilizer <dbl>, seed_l <dbl>, fer_l <dbl>, fer_f <dbl>,
## #   yield_l <dbl>, yield_h <dbl>
second %>% group_by(year) %>% 
  summarize(n_NA = sum(is.na(hh)))
## # A tibble: 34 x 2
##     year  n_NA
##    <dbl> <int>
##  1  2531    71
##  2  2532    71
##  3  2533    68
##  4  2534    71
##  5  2535    71
##  6  2536    71
##  7  2537     0
##  8  2538     0
##  9  2539     0
## 10  2540     9
## # i 24 more rows
second %>% group_by(province) %>% 
  summarize(n_NA = sum(is.na(hh)))
## # A tibble: 82 x 2
##    province      n_NA
##    <chr>        <int>
##  1 กระบี่            24
##  2 กรุงเทพมหานคร     6
##  3 กาญจนบุรี          6
##  4 กาฬสินธุ์           6
##  5 กำแพงเพชร        6
##  6 ขอนแก่น           6
##  7 จันทบุรี           18
##  8 ฉะเชิงเทรา        6
##  9 ชลบุรี             7
## 10 ชัยนาท            6
## # i 72 more rows

Select data that I will use for the study

Filter out regional value and select the period that have less missing value

  • 2540-2563 for the main rice

  • 2540-2564 for the second rice

main4063 <- main %>% filter(ID %in% c(1:99)) %>% filter(year %in% c(2540:2563))
second4064 <- second %>% filter(ID %in% c(1:99)) %>% filter(year %in% c(2540:2564))

#second4064 <- second %>% filter(ID %in% c(1:99)) %>% filter(between(year,2540,2564))
main4063$season <- "Main rice"
second4064$season <- "Second rice"
all_df <- rbind(main4063,second4064) # combind two farming seasons into the same dataframe
all_df$year <- as.integer(all_df$year)
all_df$ID <- as.character(all_df$ID)

Data Exploratory

Production size

Planted area

  • Production of the main rice is bigger than the second rice.

  • The main rice is mainly cultivate in the northeastern region while the second rice is mainly cultivate in the central and northern regions.

all_df %>% group_by(season, year, region) %>% 
  summarise(total_paddy = sum(paddy, na.rm = T),
            total_land = sum(land, na.rm = T)) %>% 
  ggplot()+
  geom_bar(aes(x = year, y = total_land, fill = region), position = "stack", stat = "identity")+
  scale_y_continuous(labels = scales::comma)+
  scale_x_continuous(labels = as.character(all_df$year), breaks = all_df$year)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  labs(x = "Year", y = "Planted area (rai)", fill = "Region")+
  facet_wrap(~season, ncol = 1, nrow = 2, scales = "free_y")

Paddy produced

all_df %>% group_by(season, year, region) %>% 
  summarise(total_paddy = sum(paddy, na.rm = T),
            total_land = sum(land, na.rm = T)) %>% 
  ggplot()+
  geom_bar(aes(x = year, y = total_paddy, fill = region), position = "stack", stat = "identity")+
  scale_y_continuous(labels = scales::comma)+
  scale_x_continuous(labels = as.character(all_df$year), breaks = all_df$year)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  labs(x = "Year", y = "Planted area (rai)", fill = "Region")+
  facet_wrap(~season, ncol = 1, nrow = 2, scales = "free_y")

Relation between planted area and paddy produced

  • Planted area and paddy rice produced seem to be positively related.
all_df %>% ggplot(aes(x = land, y = paddy, color = region))+
  geom_point(alpha = 0.3)+
  scale_x_continuous(labels = scales::comma)+
  scale_y_continuous(labels = scales::comma)+
  theme_bw()+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  labs(x = "Paddy produced (ton)", y = "Planted area (rai)", color = "Region")

Yield of rice

  • Yield of rice tends to be higher for the second rice.

  • Yield in the central and northern region are relatively higher than others for both farming season.

Histogram

all_df %>% ggplot(aes(x = yield_h)) +
  geom_histogram(aes(group = region, fill = region), color = "white", bins = 50)+
  facet_wrap(~season, ncol = 2, nrow = 1)+
  labs(x = "Yield")

The distributions

all_df %>% ggplot(aes(x = yield_h, y = region, color = region))+
  geom_boxplot()+
  geom_jitter(alpha = 0.2)+
  theme(legend.position = "none")+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  theme_bw()+
  labs(x = "Yield")

Box plot for each province

all_df %>% filter(season == "Main rice") %>% 
  ggplot(aes(x = yield_h, y = province_e, color = region))+
  geom_boxplot()+
  facet_wrap(~region, ncol = 1, scales = "free_y")+
  theme(legend.position = "none")+
  labs(title = "Main rice", x = "Yield", y = "")

all_df %>% filter(season == "Second rice") %>% 
  ggplot(aes(x = yield_h, y = province_e, color = region))+
  geom_boxplot()+
  facet_wrap(~region, ncol = 1, scales = "free_y")+
  theme(legend.position = "none")+
  labs(title = "Second rice", x = "Yield", y = "")

Trend of the yield

  • There are some improvement in yield but there is still some gap between central/northern and northeastern/southern.
all_df %>% group_by(season, region, year) %>% 
  summarize(meanyield = mean(yield_h, na.rm = T)) %>%
  ggplot(aes(x = year, y = meanyield, color = region))+
  geom_line(linewidth = 1, alpha = 0.5)+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  theme_bw()+
  scale_x_continuous(labels = as.character(all_df$year), breaks = all_df$year)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  labs(title = "Average yield", y = "Avg Yield")

Yearly regional median of the yield

all_df %>% filter(season == "Main rice") %>% 
  ggplot(aes(x = factor(year), y = yield_h, color = region))+
  geom_boxplot()+
  geom_jitter(alpha = 0.3)+
  facet_wrap(~region)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position="bottom")+
  labs(title = "Main rice", x = "", y = "Yield")

all_df %>% filter(season == "Second rice") %>% 
  ggplot(aes(x = factor(year), y = yield_h, color = region))+
  geom_boxplot()+
  geom_jitter(alpha = 0.3)+
  facet_wrap(~region)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position="bottom")+
  labs(title = "Second rice", x = "", y = "Yield")

Yield and production size

  • Yield and planted area

    • If look as the pooling data, the relationship seem to be negative for the main rice while positive for the second rice

    • If look at regional level, the relationship seem more flatten and the negative is only found in the northeastern region for the main rice

all_df %>% ggplot(aes(x = land, y = yield_h))+
  geom_point(aes(color = region) , size = 2, alpha = 0.3)+
  facet_wrap(~season, nrow = 2 , ncol = 1)+
  geom_smooth(method = "lm", se = F)+
  scale_x_continuous(labels = scales::comma)+
  scale_y_continuous(breaks = seq(200,1000, 200), labels = c("200","400","600","800","1,000"))+
  theme_bw()+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  labs(x = "Paddy produced", y = "Yield", color = "Region")

all_df %>% ggplot(aes(x = land, y = yield_h, color = region))+
  geom_point(size = 2, alpha = 0.3)+
  facet_wrap(~season, nrow = 2 , ncol = 1)+
  geom_smooth(method = "lm", se = F)+
  scale_x_continuous(labels = scales::comma)+
  scale_y_continuous(breaks = seq(200,1000, 200), labels = c("200","400","600","800","1,000"))+
  theme_bw()+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  labs(x = "Paddy produced", y = "Yield", color = "Region")

Looking at provincial production on the map

Load map and merge with data frame

map76 <- st_read("C:/Users/MSI GF62 OEM/Desktop/QGIS 6.3.20/TH_Provice_Map/TH_Provice_Map.shp", stringsAsFactors = FALSE)
## Reading layer `TH_Provice_Map' from data source 
##   `C:\Users\MSI GF62 OEM\Desktop\QGIS 6.3.20\TH_Provice_Map\TH_Provice_Map.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 76 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 97.34519 ymin: 5.616042 xmax: 105.6391 ymax: 20.46321
## Geodetic CRS:  WGS 84
unique(map76$cwt)
##  [1] 37 15 10 31 24 18 36 22 50 57 20 86 46 62 71 40 81 52 51 42 16 58 49 26 73
## [26] 48 30 60 80 55 39 43 12 13 94 82 93 56 67 76 66 65 14 54 83 25 77 85 70 21
## [51] 45 27 47 74 75 19 91 33 17 90 64 72 84 32 63 92 23 34 41 61 53 95 35 44 96
## [76] 11
length(unique(map76$cwt))
## [1] 76
all_df$ID <- as.numeric(all_df$ID)

all_map <- inner_join(map76, all_df, by = c("cwt" = "ID")) # join by province ID

plot data on the map

Planted area

Main rice

library(ggpubr) # for joint plot
a <- all_map %>% filter(season == "Main rice" & year == 2540) %>% 
  ggplot(aes(frame = year))+
  geom_sf(aes(fill = land))+
  scale_fill_gradient(low = "white", high = "blue3", guide = "legend", breaks = c(100000, 500000, 1000000, 1500000, 2000000, 3000000, 4000000))+
  theme_void()+
  labs(title="2540", fill = "Planted area")
b <- all_map %>% filter(season == "Main rice" & year == 2550) %>% 
  ggplot(aes(frame = year))+
  geom_sf(aes(fill = land))+
  scale_fill_gradient(low = "white", high = "blue3", guide = "legend", breaks = c(100000, 500000, 1000000, 1500000, 2000000, 3000000, 4000000))+
  theme_void()+
  labs(title="2550", fill = "Planted area")
c <- all_map %>% filter(season == "Main rice" & year == 2560) %>% 
  ggplot(aes(frame = year))+
  geom_sf(aes(fill = land))+
  scale_fill_gradient(low = "white", high = "blue3", guide = "legend", breaks = c(100000, 500000, 1000000, 1500000, 2000000, 3000000, 4000000))+
  theme_void()+
  labs(title="2560", fill = "Planted area")
ggarrange(a, b, c, ncol = 3, nrow = 1, common.legend = TRUE, legend = "right")

rm(a,b,c)

Second rice

a <- all_map %>% filter(season == "Second rice" & year == 2540) %>% 
  ggplot(aes(frame = year))+
  geom_sf(aes(fill = land))+
  scale_fill_gradient(low = "white", high = "blue3", guide = "legend", breaks = c(10000, 50000, 150000, 300000, 500000, 1000000, 1500000))+
  theme_void()+
  labs(title="2540", fill = "Planted area")
b <- all_map %>% filter(season == "Second rice" & year == 2550) %>% 
  ggplot(aes(frame = year))+
  geom_sf(aes(fill = land))+
  scale_fill_gradient(low = "white", high = "blue3", guide = "legend", breaks = c(10000, 50000, 150000, 300000, 500000, 1000000, 1500000))+
  theme_void()+
  labs(title="2550", fill = "Planted area")
c <- all_map %>% filter(season == "Second rice" & year == 2560) %>% 
  ggplot(aes(frame = year))+
  geom_sf(aes(fill = land))+
  scale_fill_gradient(low = "white", high = "blue3", guide = "legend", breaks = c(10000, 50000, 150000, 300000, 500000, 1000000, 1500000))+
  theme_void()+
  labs(title="2560", fill = "Planted area")
ggarrange(a, b, c, ncol = 3, nrow = 1, common.legend = TRUE, legend = "right")

rm(a,b,c)

Import irrigation data and merge with the data frame

irri <- read_excel("C:/Users/MSI GF62 OEM/Desktop/R programing/data/irrigation.xlsx")
colnames(irri)
## [1] "province" "ID"       "year"     "year_e"   "YID"      "iri_s"    "iri_m"   
## [8] "iri_l"    "iri_sum"
all_df$YID <- paste(all_df$year, all_df$ID, sep = "") # make unique ID for each province and time
all_df <- left_join(all_df, irri, by = "YID") %>% rename(year = year.x, province = province.x, ID = ID.x)
rm(irri)

Check correlation

Main rice

main_cor <- all_df %>% filter(season == "Main rice") %>% 
  filter(!is.na(hh)) %>% 
  dplyr::select(6:16, starts_with("iri_")) %>% 
  cor()
corrplot(main_cor, type="lower")

Second rice

second_cor <- all_df %>% filter(season == "Second rice") %>% 
  filter(!is.na(hh), !is.na(paddy)) %>% 
  dplyr::select(6:16, starts_with("iri_")) %>% 
  cor()
corrplot(second_cor , type="lower")

Merge values of Bueng Kan back to Nong Khai

main2 <- main4063
main2$paddy[main2$ID == 43 & main2$year >= 2554] <- main2$paddy[main2$ID == 38 & main2$year >= 2554] + main2$paddy[main2$ID == 43 & main2$year >= 2554]
main2$land[main2$ID == 43 & main2$year >= 2554] <- main2$land[main2$ID == 38 & main2$year >= 2554] + main2$land[main2$ID == 43 & main2$year >= 2554]
main2$hh[main2$ID == 43 & main2$year >= 2554] <- main2$hh[main2$ID == 38 & main2$year >= 2554] + main2$hh[main2$ID == 43 & main2$year >= 2554]
main2$seed[main2$ID == 43 & main2$year >= 2554] <- main2$seed[main2$ID == 38 & main2$year >= 2554] + main2$seed[main2$ID == 43 & main2$year >= 2554]
main2$fertilizer[main2$ID == 43 & main2$year >= 2554] <- main2$fertilizer[main2$ID == 38 & main2$year >= 2554] + main2$fertilizer[main2$ID == 43 & main2$year >= 2554]
main2 <- main2 %>% filter(ID != 38)

second2 <- second4064
second2$paddy[second2$ID == 43 & second2$year >= 2555] <- second2$paddy[second2$ID == 38 & second2$year >= 2555] + second2$paddy[second2$ID == 43 & second2$year >= 2555]
second2$land[second2$ID == 43 & second2$year >= 2555] <- second2$land[second2$ID == 38 & second2$year >= 2555] + second2$land[second2$ID == 43 & second2$year >= 2555]
second2$hh[second2$ID == 43 & second2$year >= 2555] <- second2$hh[second2$ID == 38 & second2$year >= 2555] + second2$hh[second2$ID == 43 & second2$year >= 2555]
second2$seed[second2$ID == 43 & second2$year >= 2555] <- second2$seed[second2$ID == 38 & second2$year >= 2555] + second2$seed[second2$ID == 43 & second2$year >= 2555]
second2$fertilizer[second2$ID == 43 & second2$year >= 2555] <- second2$fertilizer[second2$ID == 38 & second2$year >= 2555] + second2$fertilizer[second2$ID == 43 & second2$year >= 2555]
second2 <- second2 %>% filter(ID != 38)

Drop some provinces for the second rice

second2 <- second2 %>% filter(!ID %in% c(22, 81, 82, 83, 85, 92))
unique(second2$ID)
##  [1] 10 11 12 13 14 15 16 17 18 19 20 21 23 24 25 26 27 30 31 32 33 34 35 36 37
## [26] 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 60 61 62 63 64
## [51] 65 66 67 70 71 72 73 74 75 76 77 80 84 86 90 91 93 94 95 96
length(unique(second2$ID))
## [1] 70

Imputation for missing values

##      year province ID paddy land seed fertilizer hh  
## 1823    1        1  1     1    1    1          1  1 0
## 1       1        1  1     1    1    1          1  0 1
##         0        0  0     0    0    0          0  1 1

##      year province ID seed fertilizer paddy land hh    
## 1723    1        1  1    1          1     1    1  1   0
## 6       1        1  1    1          1     1    1  0   1
## 1       1        1  1    1          1     0    0  0   3
## 20      1        1  1    0          0     0    0  0   5
##         0        0  0   20         20    21   21 27 109
## 
##  iter imp variable
##   1   1  hh
##   1   2  hh
##   1   3  hh
##   1   4  hh
##   1   5  hh
##   2   1  hh
##   2   2  hh
##   2   3  hh
##   2   4  hh
##   2   5  hh
##   3   1  hh
##   3   2  hh
##   3   3  hh
##   3   4  hh
##   3   5  hh
##   4   1  hh
##   4   2  hh
##   4   3  hh
##   4   4  hh
##   4   5  hh
##   5   1  hh
##   5   2  hh
##   5   3  hh
##   5   4  hh
##   5   5  hh
## 
##  iter imp variable
##   1   1  hh
##   1   2  hh
##   1   3  hh
##   1   4  hh
##   1   5  hh
##   2   1  hh
##   2   2  hh
##   2   3  hh
##   2   4  hh
##   2   5  hh
##   3   1  hh
##   3   2  hh
##   3   3  hh
##   3   4  hh
##   3   5  hh
##   4   1  hh
##   4   2  hh
##   4   3  hh
##   4   4  hh
##   4   5  hh
##   5   1  hh
##   5   2  hh
##   5   3  hh
##   5   4  hh
##   5   5  hh
##   year province province_e ID  region  paddy   land land_h hh     seed
## 1 2541  สระบุรี   Saraburi 19 Central 166485 401168 398903 NA 6948.486
##   fertilizer seed_l fer_l fer_f yield_l yield_h    season ori  cart    rf
## 1      13853  17.32 34.53 35.99     415     417 Main rice  NA 20711 20392
## 
##  iter imp variable
##   1   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
## 
##  iter imp variable
##   1   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   1   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   2   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   3   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   4   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   1  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   2  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   3  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   4  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h
##   5   5  paddy  land  hh  seed  fertilizer  seed_l  fer_l  fer_f  yield_l  yield_h

Recheck the missing

main3 %>% dplyr::select(year, province, ID, paddy, land, hh, seed, fertilizer) %>% md.pattern()
##  /\     /\
## {  `---'  }
## {  O   O  }
## ==>  V <==  No need for mice. This data set is completely observed.
##  \  \|/  /
##   `-----'

##      year province ID paddy land hh seed fertilizer  
## 1824    1        1  1     1    1  1    1          1 0
##         0        0  0     0    0  0    0          0 0
second3 %>% dplyr::select(year, province, ID, paddy, land, hh, seed, fertilizer) %>% md.pattern()

##      year province ID paddy land hh seed fertilizer    
## 1729    1        1  1     1    1  1    1          1   0
## 21      1        1  1     0    0  0    0          0   5
##         0        0  0    21   21 21   21         21 105
second3 %>% filter(is.na(paddy))
## # A tibble: 21 x 17
##     year province  province_e       ID region     paddy  land land_h    hh  seed
##    <dbl> <chr>     <chr>         <dbl> <chr>      <dbl> <dbl>  <dbl> <dbl> <dbl>
##  1  2540 ตราด      Trat             23 Central       NA    NA     NA    NA    NA
##  2  2541 สระแก้ว    Sa Kaeo          27 Central       NA    NA     NA    NA    NA
##  3  2548 อำนาจเจริญ Amnat Charoen    37 Northeast~    NA    NA     NA    NA    NA
##  4  2548 มุกดาหาร   Mukdahan         49 Northeast~    NA    NA     NA    NA    NA
##  5  2540 แพร่       Phrae            54 Northern      NA    NA     NA    NA    NA
##  6  2540 แม่ฮ่องสอน  Mae Hong Son     58 Northern      NA    NA     NA    NA    NA
##  7  2548 แม่ฮ่องสอน  Mae Hong Son     58 Northern      NA    NA     NA    NA    NA
##  8  2550 แม่ฮ่องสอน  Mae Hong Son     58 Northern      NA    NA     NA    NA    NA
##  9  2541 สตูล       Satun            91 Southern      NA    NA     NA    NA    NA
## 10  2548 สตูล       Satun            91 Southern      NA    NA     NA    NA    NA
## # i 11 more rows
## # i 7 more variables: fertilizer <dbl>, seed_l <dbl>, fer_l <dbl>, fer_f <dbl>,
## #   yield_l <dbl>, yield_h <dbl>, season <chr>

Benchmarking - Performance

In the performance analysis, I calculate technical efficiency score and total factor productivity of rice production using Benchmarking package.

## Period  2 
## Period  3 
## Period  4 
## Period  5 
## Period  6 
## Period  7 
## Period  8 
## Period  9 
## Period  10 
## Period  11 
## Period  12 
## Period  13 
## Period  14 
## Period  15 
## Period  16 
## Period  17 
## Period  18 
## Period  19 
## Period  20 
## Period  21 
## Period  22 
## Period  23 
## Period  24
## Period  2 
## Period  3 
## Period  4 
## Period  5 
## Period  6 
## Period  7 
## Period  8 
## Period  9 
## Period  10 
## Period  11 
## Period  12 
## Period  13 
## Period  14 
## Period  15 
## Period  16 
## Period  17 
## Period  18 
## Period  19 
## Period  20 
## Period  21 
## Period  22 
## Period  23 
## Period  24 
## Period  25

Collect the results

main_mq <- data.frame(
  main3,
  te = 1/mq_main$e11,
  tec = 1/mq_main$ec,
  tcc = 1/mq_main$tc,
  tfpc = 1/mq_main$m)

second_mq <- data.frame(
  second3,
  te = 1/mq_second$e11,
  tec = 1/mq_second$ec,
  tcc = 1/mq_second$tc,
  tfpc = 1/mq_second$m)

second_mq$te[second_mq$te == Inf] <- NA
second_mq$tec[second_mq$tec == Inf] <- NA
second_mq$tec[second_mq$tec == 0.000] <- NA
second_mq$tcc[is.na(second_mq$tcc)] <- NA
second_mq$tfpc[is.na(second_mq$tfpc)] <- NA

all_mq <- rbind(main_mq, second_mq)
all_map <- inner_join(map76, all_mq, by = c("cwt" = "ID")) # join by province ID
#library(xlsx)
#write.xlsx(all_mq, file="C:/Users/MSI GF62 OEM/Desktop/R programing/data/all_mq.xlsx", row.names=FALSE)

Results of TE score

The technical efficiency (TE) score computed in this study is output-oriented. The score has a value between 0 to 1. TE score of 1 is the technical efficient unit and will be used as the point to construct the frontier line.. The interpretation is in percentage terms. If a province in given year has output-oriented TE score 0.75, this means the province should be able to increase output by 25% while using the same amount of inputs to reach the efficient frontier. So, the closer to 1, the more technical efficient unit (province) is in farming.

Create geometric average function

  • I will use the geometric mean instead of arithmetic mean but there is no generic function in R so I need to create one.
geomean <- function(x, ...) {
  prod(x, ...)^(1/sum(!is.na(x)))
}

Top average performance province

Main rice

all_mq %>% filter(season == "Main rice") %>% 
  group_by(region, province_e) %>% 
  summarise(avg.te = geomean(te, na.rm = T)) %>% 
  arrange(desc(avg.te))
## # A tibble: 76 x 3
## # Groups:   region [4]
##    region   province_e         avg.te
##    <chr>    <chr>               <dbl>
##  1 Central  Samut Prakan        1    
##  2 Northern Chiang Mai          1    
##  3 Northern Chiang Rai          0.997
##  4 Central  Pathum Thani        0.984
##  5 Northern Mae Hong Son        0.976
##  6 Northern Lamphun             0.976
##  7 Central  Nonthaburi          0.971
##  8 Northern Phetchabun          0.965
##  9 Central  Bangkok Metropolis  0.960
## 10 Northern Phrae               0.946
## # i 66 more rows

Second rice

all_mq %>% filter(season == "Second rice") %>% 
  group_by(region, province_e) %>% 
  summarise(avg.te = geomean(te, na.rm = T)) %>% 
  arrange(desc(avg.te))
## # A tibble: 70 x 3
## # Groups:   region [4]
##    region   province_e               avg.te
##    <chr>    <chr>                     <dbl>
##  1 Central  Samut Prakan              1    
##  2 Northern Chiang Mai                0.991
##  3 Northern Chiang Rai                0.991
##  4 Central  Nakhon Pathom             0.988
##  5 Northern Lamphun                   0.987
##  6 Central  Phra Nakhon Si Ayutthaya  0.969
##  7 Central  Pathum Thani              0.964
##  8 Central  Nonthaburi                0.963
##  9 Central  Chai Nat                  0.959
## 10 Central  Suphan Buri               0.948
## # i 60 more rows
all_mq %>% group_by(season, region, province_e) %>% 
  summarise(avg.te = geomean(te, na.rm = T)) %>% 
  filter(season == "Main rice") %>%
  ungroup() %>% 
  mutate(province_e = fct_reorder(province_e, avg.te)) %>% 
  ggplot(aes(x = province_e, y = avg.te))+
  geom_segment(aes(xend=province_e, yend=0, color = region))+
  geom_point(aes(color = region), size = 2.5)+
  labs(title = "Main rice", x = "", y = "Average TE score", color = "Region")+
  coord_flip()

all_mq %>% group_by(season, region, province_e) %>% 
  summarise(avg.te = geomean(te, na.rm = T)) %>% 
  filter(season == "Second rice") %>%
  ungroup() %>% 
  mutate(province_e = fct_reorder(province_e, avg.te)) %>% 
  ggplot(aes(x = province_e, y = avg.te))+
  geom_segment(aes(xend=province_e, yend=0, color = region))+
  geom_point(aes(color = region), size = 2.5)+
  labs(title = "Second rice", x = "", y = "Average TE score", color = "Region")+
  coord_flip()

Histogram of TE score

  • Provinces in the Central and North seem to clustered in high TE score.
all_mq %>% ggplot(aes(x = te)) +
  geom_histogram(aes(group = region, fill = region), color = "white", bins = 50)+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  labs(x = "TE score", y = "", fill = "Region")

Average TE score for each province

  • Central and North provinces tend to be more efficient at rice farming.
a <- all_mq %>% filter(season == "Main rice") %>% 
  group_by(ID) %>% 
  summarise(avg.te = geomean(te, na.rm = T))
a <- left_join(map76, a, by = c("cwt" = "ID"))

a %>% ggplot()+
  geom_sf(aes(fill = avg.te))+
  scale_fill_gradient(low = "white",
                      high = "blue3",
                      breaks = c(0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1),
                      labels = c("less than 40%", "40-50%","50-60%","60-70%","70-80%","80-85%","85-90%","90-95%","95-100%"),
                      name="Average TE score",
                      limits=c(0.4, 1),
                      guide = "legend")+
  theme_void() +
  labs(title = "Main rice")+
  theme(plot.title = element_text(hjust = 0.5), legend.text=element_text(size = 8), 
        legend.title=element_text(size=7.8), text=element_text(family="serif"))

b <- all_mq %>% filter(season == "Second rice") %>% 
  group_by(ID) %>% 
  summarise(avg.te = geomean(te, na.rm = T))
b <- left_join(map76, b, by = c("cwt" = "ID"))

b %>% ggplot()+
  geom_sf(aes(fill = avg.te))+
  scale_fill_gradient(low = "white",
                      high = "blue3",
                      breaks = c(0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1),
                      labels = c("less than 40%", "40-50%","50-60%","60-70%","70-80%","80-85%","85-90%","90-95%","95-100%"),
                      name="Average TE score",
                      limits=c(0.4, 1),
                      guide = "legend")+
  theme_void() +
  labs(title = "Second rice")+
  theme(plot.title = element_text(hjust = 0.5), legend.text=element_text(size = 8), 
        legend.title=element_text(size=7.8), text=element_text(family="serif"))

Average TE score for each region

all_mq %>% group_by(season, year, region) %>% 
  summarise(avg.te = geomean(te, na.rm = T),
            avg.tec = geomean(tec, na.rm = T),
            avg.tcc = geomean(tcc, na.rm = T),
            avg.tfpc = geomean(tfpc, na.rm = T)) %>% 
  ggplot()+
  geom_line(aes(x = year, y = avg.te, group = region, color = region),
            size = 1.2, alpha = 0.5)+
  labs(y = "Average TE score", x = "")+
  scale_x_continuous(labels = as.character(all_mq$year), breaks = all_mq$year)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  facet_wrap(~season, nrow = 2, ncol = 1)

Average TE score to the production size

all_mq %>% group_by(season, region, province_e) %>% 
  summarise(avg.te = geomean(te, na.rm = T),
            avg.land = mean(land, na.rm = T),
            avg.paddy = mean(paddy, na.rm = T)) %>% 
  ggplot(aes(x = avg.land, y = avg.te, size = avg.paddy, color = region, label = province_e))+
  geom_point(alpha = 0.4, )+
  geom_text(hjust=0.5, vjust=0.2, size = 3)+
  labs(y = "Average TE score", x = "Planted area")+
  scale_x_continuous(labels = scales::comma)+
  facet_wrap(~season, scales = "free_x", nrow = 2, ncol = 1)

all_mq %>% filter(season == "Main rice") %>% 
  group_by(region, province_e) %>% 
  summarise(avg.te = geomean(te, na.rm = T),
            avg.land = mean(land, na.rm = T),
            avg.paddy = mean(paddy, na.rm = T)) %>% 
  ggplot(aes(x = avg.land, y = avg.te, size = avg.paddy, color = region, label = province_e))+
  geom_point(alpha = 0.4, )+
  geom_text(hjust=0.5, vjust=0.2, size = 3)+
  labs(y = "Average TE score", x = "Planted area", size = "Paddy produced", title = "Main rice")+
  scale_x_continuous(labels = scales::comma)+
  scale_size(breaks = c(50000,100000, 250000, 400000, 600000, 1000000, 2000000), range = c(2, 10), labels = scales::comma)

all_mq %>% filter(season == "Second rice") %>% 
  group_by(region, province_e) %>% 
  summarise(avg.te = geomean(te, na.rm = T),
            avg.land = mean(land, na.rm = T),
            avg.paddy = mean(paddy, na.rm = T)) %>% 
  ggplot(aes(x = avg.land, y = avg.te, size = avg.paddy, color = region, label = province_e))+
  geom_point(alpha = 0.4, )+
  geom_text(hjust=0.5, vjust=0.2, size = 3)+
  labs(y = "Average TE score", x = "Planted area", size = "Paddy produced", title = "Second rice")+
  scale_x_continuous(labels = scales::comma)+
  scale_size(breaks = c(50000,100000, 250000, 400000, 600000, 1000000), range = c(2, 10), labels = scales::comma)

Yield and TE score

  • Yield and TE score seem to have positive relation when look at the data as a whole. But when look at the relation at each region, northeastern and southern regions seems to have negative relationship between yield and TE score.

  • The main rice that cultivated mainly in the Northeast are relatively bigger than other provinces. The farming in this provinces may already in the decreasing return to scale of production.

all_mq %>% ggplot(aes(x = yield_h, y = te))+
  geom_point(aes(color = region), alpha = 0.3)+
  geom_smooth(method = "lm", se = F)+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  labs(x = "Yield", y = "TE score", color = "Region")

all_mq %>% ggplot(aes(x = yield_h, y = te, color = region))+
  geom_point(alpha = 0.3)+
  geom_smooth(method = "lm", se = F)+
  facet_wrap(~season, ncol = 1, nrow = 2)+
  labs(x = "Yield", y = "TE score", color = "Region")

Results of TFP growth

all_mq %>% group_by(season, year, region) %>% 
  summarise(avg.tfpc = (geomean(tfpc, na.rm = T)-1)*100) %>% 
  ggplot()+
  geom_line(aes(x = year, y = avg.tfpc, group = region, color = region))+
  labs(y = "Average TFP growth (%)", x = "")+
  facet_wrap(~season, nrow = 2, ncol = 1)

Make chain index

all_mq$tfpc_a <- all_mq$tfpc
all_mq$tfpc_a[all_mq$year == 2540] <- 1
all_mq$tfpc_a[all_mq$year != 2540 & is.na(all_mq$tfpc_a)] <- 1 # given all NA to be 1
all_mq <- all_mq %>% group_by(season, ID) %>% 
  mutate(chain_TFP = cumprod(tfpc_a)) %>% 
  ungroup()

Top progress provinces

all_mq %>% filter(season == "Main rice" & year == 2563) %>% 
  dplyr::select(province, region, season, chain_TFP) %>% 
  arrange(desc(chain_TFP))
## # A tibble: 76 x 4
##    province      region       season    chain_TFP
##    <chr>         <chr>        <chr>         <dbl>
##  1 พระนครศรีอยุธยา Central      Main rice      1.79
##  2 กำแพงเพชร     Northern     Main rice      1.66
##  3 กาญจนบุรี       Central      Main rice      1.62
##  4 เพชรบุรี        Central      Main rice      1.56
##  5 ระยอง         Central      Main rice      1.44
##  6 นครสวรรค์      Northern     Main rice      1.38
##  7 สมุทรสาคร      Central      Main rice      1.38
##  8 ราชบุรี         Central      Main rice      1.37
##  9 ชัยภูมิ          Northeastern Main rice      1.34
## 10 นครศรีธรรมราช  Southern     Main rice      1.32
## # i 66 more rows
all_mq %>% filter(season == "Second rice" & year == 2563) %>% 
  dplyr::select(province, region, season, chain_TFP) %>% 
  arrange(desc(chain_TFP))
## # A tibble: 70 x 4
##    province    region       season      chain_TFP
##    <chr>       <chr>        <chr>           <dbl>
##  1 ชลบุรี        Central      Second rice      1.86
##  2 อุดรธานี      Northeastern Second rice      1.63
##  3 สระแก้ว      Central      Second rice      1.62
##  4 ประจวบคีรีขันธ์ Central      Second rice      1.48
##  5 ปัตตานี       Southern     Second rice      1.42
##  6 แม่ฮ่องสอน    Northern     Second rice      1.34
##  7 พัทลุง        Southern     Second rice      1.27
##  8 ร้อยเอ็ด      Northeastern Second rice      1.13
##  9 กาญจนบุรี     Central      Second rice      1.12
## 10 นครราชสีมา   Northeastern Second rice      1.11
## # i 60 more rows

Most regress provinces

all_mq %>% filter(season == "Main rice" & year == 2563) %>% 
  dplyr::select(province, region, season, chain_TFP) %>% 
  arrange(chain_TFP)
## # A tibble: 76 x 4
##    province  region       season    chain_TFP
##    <chr>     <chr>        <chr>         <dbl>
##  1 แม่ฮ่องสอน  Northern     Main rice     0.471
##  2 กาฬสินธุ์    Northeastern Main rice     0.620
##  3 ร้อยเอ็ด    Northeastern Main rice     0.669
##  4 ศรีสะเกษ   Northeastern Main rice     0.699
##  5 ตรัง       Southern     Main rice     0.706
##  6 หนองคาย   Northeastern Main rice     0.723
##  7 เลย       Northeastern Main rice     0.766
##  8 น่าน       Northern     Main rice     0.777
##  9 สระแก้ว    Central      Main rice     0.786
## 10 มหาสารคาม Northeastern Main rice     0.790
## # i 66 more rows
all_mq %>% filter(season == "Second rice" & year == 2563) %>% 
  dplyr::select(province, region, season, chain_TFP) %>% 
  arrange(chain_TFP)
## # A tibble: 70 x 4
##    province  region       season      chain_TFP
##    <chr>     <chr>        <chr>           <dbl>
##  1 เลย       Northeastern Second rice     0.517
##  2 นครพนม    Northeastern Second rice     0.553
##  3 มุกดาหาร   Northeastern Second rice     0.578
##  4 ฉะเชิงเทรา Central      Second rice     0.595
##  5 ขอนแก่น    Northeastern Second rice     0.620
##  6 ลำพูน      Northern     Second rice     0.633
##  7 ลำปาง     Northern     Second rice     0.638
##  8 อำนาจเจริญ Northeastern Second rice     0.646
##  9 สตูล       Southern     Second rice     0.661
## 10 นราธิวาส   Southern     Second rice     0.661
## # i 60 more rows
all_mq %>% ggplot()+
  geom_line(aes(x = year, y = chain_TFP, group = province_e, color = region))+
  labs(y = "TFP chain index (%)", x = "")+
  facet_wrap(~season, nrow = 2, ncol = 1)+
  theme(legend.position="none")

Prepare for regression

Import other data

rm(list=setdiff(ls(), c("all_mq"))) 

# separate farming season
main_df <- all_mq %>% filter(season == "Main rice")
second_df <- all_mq %>% filter(season == "Second rice")

# reading temp and total precipitation df
setwd("C:/Users/MSI GF62 OEM/Desktop/R programing/data")
temp_main <- read_excel("tempc_all.xlsx", sheet = "main")
temp_second <- read_excel("tempc_all.xlsx", sheet = "second")
tpre_main <- read_excel("tpre_all.xlsx", sheet = "main")
tpre_second <- read_excel("tpre_all.xlsx", sheet = "second")
temp_main$Year <- temp_main$Year + 543
tpre_main$Year <- tpre_main$Year + 543
temp_second$Year <- temp_second$Year + 543
tpre_second$Year <- tpre_second$Year + 543


# reading irri df 
irri <- read_excel("C:/Users/MSI GF62 OEM/Desktop/R programing/data/irrigation.xlsx")

# reading gpp df
gpp <- read_excel("C:/Users/MSI GF62 OEM/Desktop/R programing/data/GPP.xlsx", sheet = "gpp_76")

# reading high yielding
hy_main <- read_excel("C:/Users/MSI GF62 OEM/Desktop/R programing/data/variety_completed.xlsx", sheet = "main")
hy_second <- read_excel("C:/Users/MSI GF62 OEM/Desktop/R programing/data/variety_completed.xlsx", sheet = "second")

Merge data

main_df <- left_join(main_df, irri, by = c("ID" = "ID", "year" = "year"))
main_df <- left_join(main_df, hy_main, by = c("ID" = "ID", "year" = "year"))
main_df <- left_join(main_df, temp_main, by = c("ID" = "ID", "year" = "Year"))
main_df <- left_join(main_df, tpre_main, by = c("ID" = "ID", "year" = "Year"))
main_df <- left_join(main_df, gpp, by = c("ID" = "ID", "year" = "year_th"))

main_df <- main_df %>% mutate(tfp_g = (tfpc - 1)*100,
                   damage = (1-(land_h/land))*100,
                   irri_p_land = iri_sum/land) %>% 
  dplyr::select(ID, year, province.x, region.x, tfp_g, hy_half_share, 
                irri_p_land, tempc_new_500, tpre_new_500, damage, 
                gpp_cvm, gpp_cvm_agri, gpp_cvm_indus, gpp_cvm_ser) %>% 
  rename("region" = "region.x", 
         "province" = "province.x", 
         "temp" = "tempc_new_500",
         "tpre" = "tpre_new_500")

second_df <- left_join(second_df, irri, by = c("ID" = "ID", "year" = "year"))
second_df <- left_join(second_df, hy_second, by = c("ID" = "ID", "year" = "year"))
second_df <- left_join(second_df, temp_second, by = c("ID" = "ID", "year" = "Year"))
second_df <- left_join(second_df, tpre_second, by = c("ID" = "ID", "year" = "Year"))
second_df <- left_join(second_df, gpp, by = c("ID" = "ID", "year" = "year_th"))

second_df <- second_df %>% mutate(tfp_g = (tfpc - 1)*100,
                   damage = (1-(land_h/land))*100,
                   irri_p_land = iri_sum/land) %>% 
  dplyr::select(ID, year, province.x, region.x, tfp_g, hy_half_share, 
                irri_p_land, tempc_new_500, tpre_new_500, damage, 
                gpp_cvm, gpp_cvm_agri, gpp_cvm_indus, gpp_cvm_ser) %>% 
  rename("region" = "region.x", 
         "province" = "province.x", 
         "temp" = "tempc_new_500",
         "tpre" = "tpre_new_500")

rm(list=setdiff(ls(), c("all_mq", "main_df", "second_df"))) 

Remove observation that has missing values

summary(main_df)
##        ID             year        province            region         
##  Min.   :10.00   Min.   :2540   Length:1824        Length:1824       
##  1st Qu.:30.75   1st Qu.:2546   Class :character   Class :character  
##  Median :50.50   Median :2552   Mode  :character   Mode  :character  
##  Mean   :51.30   Mean   :2552                                        
##  3rd Qu.:72.25   3rd Qu.:2557                                        
##  Max.   :96.00   Max.   :2563                                        
##                                                                      
##      tfp_g          hy_half_share     irri_p_land            temp      
##  Min.   :-46.9774   Min.   :  0.00   Min.   :  0.0000   Min.   :22.35  
##  1st Qu.: -5.1489   1st Qu.: 30.47   1st Qu.:  0.2227   1st Qu.:25.73  
##  Median :  0.2929   Median : 52.78   Median :  0.6540   Median :26.64  
##  Mean   :  0.9224   Mean   : 47.93   Mean   :  3.2000   Mean   :26.66  
##  3rd Qu.:  5.6335   3rd Qu.: 67.95   3rd Qu.:  1.8723   3rd Qu.:27.66  
##  Max.   : 93.4887   Max.   :100.00   Max.   :153.9365   Max.   :29.98  
##  NA's   :76         NA's   :380                                        
##       tpre            damage           gpp_cvm         gpp_cvm_agri    
##  Min.   :0.6363   Min.   : 0.0000   Min.   :   4842   Min.   :  856.5  
##  1st Qu.:1.1311   1st Qu.: 0.9014   1st Qu.:  18151   1st Qu.: 3682.5  
##  Median :1.3307   Median : 2.6235   Median :  34650   Median : 6224.3  
##  Mean   :1.3749   Mean   : 4.8507   Mean   : 102668   Mean   : 7620.9  
##  3rd Qu.:1.5536   3rd Qu.: 6.1795   3rd Qu.:  70297   3rd Qu.:10401.1  
##  Max.   :2.8117   Max.   :59.3525   Max.   :3999343   Max.   :28899.8  
##                                                                        
##  gpp_cvm_indus       gpp_cvm_ser     
##  Min.   :   290.6   Min.   :   3664  
##  1st Qu.:  2139.4   1st Qu.:  10770  
##  Median :  4913.4   Median :  18752  
##  Mean   : 34945.7   Mean   :  60282  
##  3rd Qu.: 20754.9   3rd Qu.:  34359  
##  Max.   :453398.2   Max.   :3558524  
## 
summary(second_df)
##        ID             year        province            region         
##  Min.   :10.00   Min.   :2540   Length:1750        Length:1750       
##  1st Qu.:30.00   1st Qu.:2546   Class :character   Class :character  
##  Median :48.50   Median :2552   Mode  :character   Mode  :character  
##  Mean   :49.34   Mean   :2552                                        
##  3rd Qu.:67.00   3rd Qu.:2558                                        
##  Max.   :96.00   Max.   :2564                                        
##                                                                      
##      tfp_g           hy_half_share     irri_p_land            temp      
##  Min.   :-55.44302   Min.   :  0.00   Min.   :   0.000   Min.   :20.30  
##  1st Qu.: -4.72307   1st Qu.: 49.79   1st Qu.:   1.923   1st Qu.:25.53  
##  Median :  0.09839   Median : 59.36   Median :   5.005   Median :26.69  
##  Mean   :  0.58301   Mean   : 60.17   Mean   :  66.133   Mean   :26.65  
##  3rd Qu.:  5.09089   3rd Qu.: 76.27   3rd Qu.:  17.648   3rd Qu.:28.04  
##  Max.   :150.51898   Max.   :100.00   Max.   :8400.860   Max.   :31.33  
##  NA's   :105         NA's   :21       NA's   :21                        
##       tpre             damage            gpp_cvm         gpp_cvm_agri    
##  Min.   :0.02403   Min.   : 0.00000   Min.   :   4842   Min.   :  856.5  
##  1st Qu.:0.15580   1st Qu.: 0.02686   1st Qu.:  18095   1st Qu.: 3459.5  
##  Median :0.22317   Median : 0.30037   Median :  34525   Median : 5937.3  
##  Mean   :0.27536   Mean   : 1.36972   Mean   : 109446   Mean   : 7362.9  
##  3rd Qu.:0.32219   3rd Qu.: 0.93763   3rd Qu.:  77051   3rd Qu.:10135.0  
##  Max.   :1.93172   Max.   :36.28602   Max.   :3999343   Max.   :28899.8  
##                    NA's   :21                                            
##  gpp_cvm_indus       gpp_cvm_ser     
##  Min.   :   290.6   Min.   :   3664  
##  1st Qu.:  2268.6   1st Qu.:  10844  
##  Median :  5605.7   Median :  18877  
##  Mean   : 38099.2   Mean   :  64159  
##  3rd Qu.: 23706.2   3rd Qu.:  35376  
##  Max.   :453398.2   Max.   :3558524  
## 
main_df <- main_df %>% dplyr::filter(year >= 2545)
second_df <- second_df %>% dplyr::filter(year >= 2541)

second_df <- second_df %>% dplyr::filter(!ID %in% unique(second_df$ID[is.na(second_df$tfp_g)]))

Load shp file

THmap_76 <- readOGR(dsn = "C:/Users/MSI GF62 OEM/Desktop/R programing/data/Thai_map", layer = "TH_Provice_Map")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\MSI GF62 OEM\Desktop\R programing\data\Thai_map", layer: "TH_Provice_Map"
## with 76 features
## It has 4 fields
## Integer64 fields read as strings:  cwt
THmap_76 <- THmap_76[order(THmap_76$ID_1),]

Create spatial weight matrix

queen.TH76 <- poly2nb(THmap_76, row.names = THmap_76$ID_1)
queen.TH76[[66]] <- as.integer(65)
queen.TH76[[65]] <- as.integer(c(64, 66, 67, 68))
queen.TH76.listw <- nb2listw(queen.TH76)
plot(queen.TH76,  coordinates(THmap_76), col = "red")

## 60 provinces of second rice ####
THmap_60 <- THmap_76[THmap_76$ID_1 %in% unique(second_df$ID),]
queen.TH60 <- poly2nb(THmap_60, row.names = THmap_60$ID_1)
queen.TH60.listw <- nb2listw(queen.TH60)
plot(queen.TH60,  coordinates(THmap_60), col = "blue")

rm(THmap_60, THmap_76, queen.TH60, queen.TH76)

TFP growth determinant model

Equation

eq_1 <- tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) + damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)

Main rice

# pooled ols
reg1 <- plm(eq_1, data = main_df, index = c("ID", "year"), model = "pooling")

# fixed effect
reg2 <- plm(eq_1, data = main_df, index = c("ID", "year"), model = "within", effect = "individual")

# random effect
reg3 <- plm(eq_1, data = main_df, index = c("ID", "year"), model = "random", 
            random.method = "walhus", effect = "individual")

# pooled spatial lag
reg4 <- spml(formula = eq_1, data = main_df, index = c("ID", "year"),
             listw = queen.TH76.listw, model = "pooling", lag = TRUE, spatial.error = "none")

# pooled spatial error
reg5 <- spml(formula = eq_1, data = main_df, index = c("ID", "year"),
             listw = queen.TH76.listw, model = "pooling", lag = FALSE, spatial.error = "b")

# fixed effect spatial lag
reg6 <- spml(formula = eq_1, data = main_df, index = c("ID", "year"), 
             listw = queen.TH76.listw, model = "within", effect = "individual", 
             lag = TRUE, spatial.error = "none")

# random effects spatial lag
reg7 <- spml(formula = eq_1, data = main_df, index = c("ID", "year"), 
             listw = queen.TH76.listw, model = "random", lag = TRUE, spatial.error = "none")

# fixed effects spatial error
reg8 <- spml(formula = eq_1, data = main_df, index = c("ID", "year"), 
             listw = queen.TH76.listw, model = "within", effect = "individual", 
             lag = FALSE, spatial.error = "b", quiet = TRUE)

# random effects spatial error
reg9 <- spml(formula = eq_1, data = main_df, index = c("ID", "year"), 
             listw = queen.TH76.listw, model = "random", lag = FALSE, spatial.error = "b")

Tests

  • The results from tests suggest that there are spatial lag and spatial error dependence in the TFP growth determinant model. It means we should employ spatial regression instead of normal OLS or normal panel regression.

  • Test’s results suggest that fixed effect spatial lag and fixed effect spatial error are the most suitable models.

#LM test for random effect VS OLS
plmtest(reg1) # not reject H0 >> OLS is better
## 
##  Lagrange Multiplier Test - (Honda)
## 
## data:  eq_1
## normal = -2.6179, p-value = 0.9956
## alternative hypothesis: significant effects
#LM test for fixed effect VS OLS 
pFtest(reg2, reg1) # not reject H0 >> OLS is better
## 
##  F test for individual effects
## 
## data:  eq_1
## F = 0.82046, df1 = 75, df2 = 1360, p-value = 0.8631
## alternative hypothesis: significant effects
# Hausman Test for fixed effect VS random effect
# H0: alpha is not diff >> reject H0 >> use FE model
phtest(reg2, reg3) # reject H0 >> fixed effect is better
## 
##  Hausman Test
## 
## data:  eq_1
## chisq = 47.289, df = 8, p-value = 1.351e-07
## alternative hypothesis: one model is inconsistent
# spatial test

# moran I
iden_main <- diag(19)
mat_main <- listw2mat(queen.TH76.listw)
matpooled_main <- kronecker(iden_main, mat_main)
matpooled_mainlistw <- mat2listw(matpooled_main)
main_df_m <- main_df[order(main_df$year, main_df$ID),]
reg_main <- lm(eq_1, data = main_df_m)
lm.morantest(model = reg_main, listw = matpooled_mainlistw)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = eq_1, data = main_df_m)
## weights: matpooled_mainlistw
## 
## Moran I statistic standard deviate = 12.898, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##     0.2328977980    -0.0028394323     0.0003340583
lm.LMtests(model = reg_main, listw = matpooled_mainlistw, test = "all")
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = main_df_m)
## weights: matpooled_mainlistw
## 
## LMerr = 160.94, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = main_df_m)
## weights: matpooled_mainlistw
## 
## LMlag = 173.73, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = main_df_m)
## weights: matpooled_mainlistw
## 
## RLMerr = 0.63281, df = 1, p-value = 0.4263
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = main_df_m)
## weights: matpooled_mainlistw
## 
## RLMlag = 13.423, df = 1, p-value = 0.0002485
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = main_df_m)
## weights: matpooled_mainlistw
## 
## SARMA = 174.37, df = 2, p-value < 2.2e-16
# Locally robust panel Lagrange Multiplier tests for spatial dependence
slmtest(formula = eq_1, data = main_df, listw = queen.TH76.listw,  model="pooling", test = "lme") # reject H0 >> spatial error dependence
## 
##  LM test for spatial error dependence
## 
## data:  formula 
## LM = 160.94, df = 1, p-value < 2.2e-16
## alternative hypothesis: spatial error dependence
slmtest(formula = eq_1, data = main_df, listw = queen.TH76.listw,  model="pooling", test = "lml") # reject H0 >> spatial lag dependence
## 
##  LM test for spatial lag dependence
## 
## data:  formula 
## LM = 173.73, df = 1, p-value < 2.2e-16
## alternative hypothesis: spatial lag dependence
slmtest(formula = eq_1, data = main_df, listw = queen.TH76.listw,  model="pooling", test = "rlme")
## 
##  Locally robust LM test for spatial error dependence sub spatial lag
## 
## data:  formula 
## LM = 0.63281, df = 1, p-value = 0.4263
## alternative hypothesis: spatial error dependence
slmtest(formula = eq_1, data = main_df, listw = queen.TH76.listw,  model="pooling", test = "rlml")
## 
##  Locally robust LM test for spatial lag dependence sub spatial error
## 
## data:  formula 
## LM = 13.423, df = 1, p-value = 0.0002485
## alternative hypothesis: spatial lag dependence
bsktest(x = eq_1, data = main_df, listw = queen.TH76.listw, test = "LM1") # LM for random effect
## 
##  Baltagi, Song and Koh SLM1 marginal test
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM1 = -2.6179, p-value = 1.991
## alternative hypothesis: Random effects
bsktest(x = eq_1, data = main_df, listw = queen.TH76.listw, test = "LM2") # LM for spatial error
## 
##  Baltagi, Song and Koh LM2 marginal test
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM2 = 12.686, p-value < 2.2e-16
## alternative hypothesis: Spatial autocorrelation
bsktest(x = eq_1, data = main_df, listw = queen.TH76.listw, test = "LMH") # joint LM
## 
##  Baltagi, Song and Koh LM-H one-sided joint test
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM-H = 160.94, p-value < 2.2e-16
## alternative hypothesis: Random Regional Effects and Spatial autocorrelation
bsktest(x = eq_1, data = main_df, listw = queen.TH76.listw, test = "CLMlambda") # test for spatial error
## 
##  Baltagi, Song and Koh LM*-lambda conditional LM test (assuming
##  sigma^2_mu >= 0)
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM*-lambda = 12.029, p-value < 2.2e-16
## alternative hypothesis: Spatial autocorrelation
bsktest(x = eq_1, data = main_df, listw = queen.TH76.listw, test = "CLMmu") # test for random effect
## 
##  Baltagi, Song and Koh LM*- mu conditional LM test (assuming lambda may
##  or may not be = 0)
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM*-mu = 2.8943, p-value = 0.003801
## alternative hypothesis: Random regional effects
# spatial Hausman test
sphtest(reg6, reg7) # rejcet H0 >> FE spatial lag is better
## 
##  Hausman test for spatial models
## 
## data:  eq_1
## chisq = 44.396, df = 8, p-value = 4.789e-07
## alternative hypothesis: one model is inconsistent
sphtest(reg8, reg9) # rejcet H0 >> FE spatial error is better
## 
##  Hausman test for spatial models
## 
## data:  eq_1
## chisq = 46.746, df = 8, p-value = 1.714e-07
## alternative hypothesis: one model is inconsistent

Result of the main rice

  • Coefficient of the regressors are different, this is why we should include the spatial effects into consideration.
summary(reg1)
## Pooling Model
## 
## Call:
## plm(formula = eq_1, data = main_df, model = "pooling", index = c("ID", 
##     "year"))
## 
## Balanced Panel: n = 76, T = 19, N = 1444
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -47.34512  -5.49285  -0.98261   4.01578  91.80665 
## 
## Coefficients:
##                       Estimate  Std. Error  t-value  Pr(>|t|)    
## (Intercept)        -62.5468712  22.8500722  -2.7373  0.006272 ** 
## hy_half_share        0.0257145   0.0131657   1.9531  0.050996 .  
## irri_p_land         -0.0083232   0.0225334  -0.3694  0.711904    
## log(temp)           16.7416904   6.8615010   2.4399  0.014810 *  
## log(tpre)            2.1298387   1.6656166   1.2787  0.201206    
## damage              -0.4423133   0.0419070 -10.5546 < 2.2e-16 ***
## log(gpp_cvm_agri)    1.1680844   0.4603891   2.5372  0.011280 *  
## log(gpp_cvm_indus)  -0.1368482   0.3192035  -0.4287  0.668193    
## log(gpp_cvm_ser)    -0.0630511   0.4899998  -0.1287  0.897632    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    180470
## Residual Sum of Squares: 166790
## R-Squared:      0.075824
## Adj. R-Squared: 0.070672
## F-statistic: 14.7168 on 8 and 1435 DF, p-value: < 2.22e-16
summary(reg2)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = eq_1, data = main_df, effect = "individual", model = "within", 
##     index = c("ID", "year"))
## 
## Balanced Panel: n = 76, T = 19, N = 1444
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -47.68590  -5.42909  -0.95905   4.03804  87.66420 
## 
## Coefficients:
##                     Estimate Std. Error  t-value Pr(>|t|)    
## hy_half_share       0.060535   0.031712   1.9089  0.05649 .  
## irri_p_land        -0.063354   0.039202  -1.6161  0.10630    
## log(temp)          -5.853767  22.131626  -0.2645  0.79144    
## log(tpre)           1.710019   2.735987   0.6250  0.53207    
## damage             -0.627918   0.051685 -12.1489  < 2e-16 ***
## log(gpp_cvm_agri)   2.967474   1.761582   1.6845  0.09230 .  
## log(gpp_cvm_indus) -0.573099   1.626805  -0.3523  0.72468    
## log(gpp_cvm_ser)   -0.197350   2.127662  -0.0928  0.92611    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    178000
## Residual Sum of Squares: 159570
## R-Squared:      0.10354
## Adj. R-Squared: 0.048832
## F-statistic: 19.6352 on 8 and 1360 DF, p-value: < 2.22e-16
summary(reg3)
## Oneway (individual) effect Random Effect Model 
##    (Wallace-Hussain's transformation)
## 
## Call:
## plm(formula = eq_1, data = main_df, effect = "individual", model = "random", 
##     random.method = "walhus", index = c("ID", "year"))
## 
## Balanced Panel: n = 76, T = 19, N = 1444
## 
## Effects:
##                  var std.dev share
## idiosyncratic 118.15   10.87     1
## individual      0.00    0.00     0
## theta: 0
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -47.34512  -5.49285  -0.98261   4.01578  91.80665 
## 
## Coefficients:
##                       Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)        -62.5468712  22.8500722  -2.7373  0.006195 ** 
## hy_half_share        0.0257145   0.0131657   1.9531  0.050802 .  
## irri_p_land         -0.0083232   0.0225334  -0.3694  0.711849    
## log(temp)           16.7416904   6.8615010   2.4399  0.014689 *  
## log(tpre)            2.1298387   1.6656166   1.2787  0.201000    
## damage              -0.4423133   0.0419070 -10.5546 < 2.2e-16 ***
## log(gpp_cvm_agri)    1.1680844   0.4603891   2.5372  0.011175 *  
## log(gpp_cvm_indus)  -0.1368482   0.3192035  -0.4287  0.668129    
## log(gpp_cvm_ser)    -0.0630511   0.4899998  -0.1287  0.897614    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    180470
## Residual Sum of Squares: 166790
## R-Squared:      0.075824
## Adj. R-Squared: 0.070672
## Chisq: 117.734 on 8 DF, p-value: < 2.22e-16
summary(reg6)
## Spatial panel fixed effects lag model
##  
## 
## Call:
## spml(formula = eq_1, data = main_df, index = c("ID", "year"), 
##     listw = queen.TH76.listw, model = "within", effect = "individual", 
##     lag = TRUE, spatial.error = "none")
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -44.88815  -4.92218  -0.77047   3.80581  99.72773 
## 
## Spatial autoregressive coefficient:
##        Estimate Std. Error t-value  Pr(>|t|)    
## lambda 0.358987   0.030155  11.905 < 2.2e-16 ***
## 
## Coefficients:
##                      Estimate Std. Error  t-value Pr(>|t|)    
## hy_half_share        0.052383   0.028916   1.8116  0.07005 .  
## irri_p_land         -0.051082   0.035737  -1.4294  0.15289    
## log(temp)          -10.533531  20.176853  -0.5221  0.60163    
## log(tpre)            1.479205   2.494095   0.5931  0.55313    
## damage              -0.527151   0.047695 -11.0526  < 2e-16 ***
## log(gpp_cvm_agri)    2.713075   1.606615   1.6887  0.09128 .  
## log(gpp_cvm_indus)  -0.428676   1.482994  -0.2891  0.77253    
## log(gpp_cvm_ser)    -0.083710   1.939628  -0.0432  0.96558    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(reg8)
## Spatial panel fixed effects error model
##  
## 
## Call:
## spml(formula = eq_1, data = main_df, index = c("ID", "year"), 
##     listw = queen.TH76.listw, model = "within", effect = "individual", 
##     lag = FALSE, spatial.error = "b", quiet = TRUE)
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -48.02280  -5.40831  -0.98785   4.04028  87.99026 
## 
## Spatial error parameter:
##     Estimate Std. Error t-value  Pr(>|t|)    
## rho 0.362171   0.031091  11.649 < 2.2e-16 ***
## 
## Coefficients:
##                      Estimate Std. Error  t-value Pr(>|t|)    
## hy_half_share        0.061462   0.032610   1.8847  0.05946 .  
## irri_p_land         -0.052774   0.036866  -1.4315  0.15228    
## log(temp)          -13.165664  29.785517  -0.4420  0.65848    
## log(tpre)            0.619905   3.464564   0.1789  0.85799    
## damage              -0.551342   0.051469 -10.7120  < 2e-16 ***
## log(gpp_cvm_agri)    3.319982   1.732438   1.9164  0.05532 .  
## log(gpp_cvm_indus)  -0.102942   1.585444  -0.0649  0.94823    
## log(gpp_cvm_ser)     0.374705   2.298313   0.1630  0.87049    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Second rice

# pooled ols
regs1 <- plm(eq_1, data = second_df, index = c("ID", "year"), model = "pooling")

# fixed effect
regs2 <- plm(eq_1, data = second_df, index = c("ID", "year"), model = "within", effect = "individual")

# random effect
regs3 <- plm(eq_1, data = second_df, index = c("ID", "year"), model = "random", 
            random.method = "walhus", effect = "individual")

# pooled spatial lag
regs4 <- spml(formula = eq_1, data = second_df, index = c("ID", "year"),
             listw = queen.TH60.listw, model = "pooling", lag = TRUE, spatial.error = "none")

# pooled spatial error
regs5 <- spml(formula = eq_1, data = second_df, index = c("ID", "year"),
             listw = queen.TH60.listw, model = "pooling", lag = FALSE, spatial.error = "b")

# fixed effect spatial lag
regs6 <- spml(formula = eq_1, data = second_df, index = c("ID", "year"), 
             listw = queen.TH60.listw, model = "within", effect = "individual", 
             lag = TRUE, spatial.error = "none")

# random effects spatial lag
regs7 <- spml(formula = eq_1, data = second_df, index = c("ID", "year"), 
             listw = queen.TH60.listw, model = "random", lag = TRUE, spatial.error = "none")

# fixed effects spatial error
regs8 <- spml(formula = eq_1, data = second_df, index = c("ID", "year"), 
             listw = queen.TH60.listw, model = "within", effect = "individual", 
             lag = FALSE, spatial.error = "b", quiet = TRUE)

# random effects spatial error
regs9 <- spml(formula = eq_1, data = second_df, index = c("ID", "year"), 
             listw = queen.TH60.listw, model = "random", lag = FALSE, spatial.error = "b")

Tests

#LM test for random effect VS OLS
plmtest(regs1) # not reject H0 >> OLS is better
## 
##  Lagrange Multiplier Test - (Honda)
## 
## data:  eq_1
## normal = -2.9021, p-value = 0.9981
## alternative hypothesis: significant effects
#LM test for fixed effect VS OLS 
pFtest(regs2, regs1) # not reject H0 >> OLS is better
## 
##  F test for individual effects
## 
## data:  eq_1
## F = 0.59943, df1 = 59, df2 = 1372, p-value = 0.9931
## alternative hypothesis: significant effects
# Hausman Test for fixed effect VS random effect
# H0: alpha is not diff >> reject H0 >> use FE model
phtest(regs2, regs3) # reject H0 >> fixed effect is better
## 
##  Hausman Test
## 
## data:  eq_1
## chisq = 23.926, df = 8, p-value = 0.002358
## alternative hypothesis: one model is inconsistent
# spatial test

# moran I
iden_second <- diag(24)
mat_second <- listw2mat(queen.TH60.listw)
matpooled_second <- kronecker(iden_second, mat_second)
matpooled_secondlistw <- mat2listw(matpooled_second)
second_df_m <- second_df[order(second_df$year, second_df$ID),]
reg_second <- lm(eq_1, data = second_df_m)
lm.morantest(model = reg_second, listw = matpooled_secondlistw)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = eq_1, data = second_df_m)
## weights: matpooled_secondlistw
## 
## Moran I statistic standard deviate = 6.9864, p-value = 1.411e-12
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##     0.1311211345    -0.0028296716     0.0003676115
lm.LMtests(model = reg_second, listw = matpooled_secondlistw, test = "all")
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = second_df_m)
## weights: matpooled_secondlistw
## 
## LMerr = 46.465, df = 1, p-value = 9.328e-12
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = second_df_m)
## weights: matpooled_secondlistw
## 
## LMlag = 45.613, df = 1, p-value = 1.441e-11
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = second_df_m)
## weights: matpooled_secondlistw
## 
## RLMerr = 1.1522, df = 1, p-value = 0.2831
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = second_df_m)
## weights: matpooled_secondlistw
## 
## RLMlag = 0.30037, df = 1, p-value = 0.5837
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = eq_1, data = second_df_m)
## weights: matpooled_secondlistw
## 
## SARMA = 46.765, df = 2, p-value = 7e-11
# Locally robust panel Lagrange Multiplier tests for spatial dependence
slmtest(formula = eq_1, data = second_df, listw = queen.TH60.listw,  model="pooling", test = "lme") # reject H0 >> spatial error dependence
## 
##  LM test for spatial error dependence
## 
## data:  formula 
## LM = 46.465, df = 1, p-value = 9.328e-12
## alternative hypothesis: spatial error dependence
slmtest(formula = eq_1, data = second_df, listw = queen.TH60.listw,  model="pooling", test = "lml") # reject H0 >> spatial lag dependence
## 
##  LM test for spatial lag dependence
## 
## data:  formula 
## LM = 45.613, df = 1, p-value = 1.441e-11
## alternative hypothesis: spatial lag dependence
slmtest(formula = eq_1, data = second_df, listw = queen.TH60.listw,  model="pooling", test = "rlme")
## 
##  Locally robust LM test for spatial error dependence sub spatial lag
## 
## data:  formula 
## LM = 1.1522, df = 1, p-value = 0.2831
## alternative hypothesis: spatial error dependence
slmtest(formula = eq_1, data = second_df, listw = queen.TH60.listw,  model="pooling", test = "rlml")
## 
##  Locally robust LM test for spatial lag dependence sub spatial error
## 
## data:  formula 
## LM = 0.30037, df = 1, p-value = 0.5837
## alternative hypothesis: spatial lag dependence
bsktest(x = eq_1, data = second_df, listw = queen.TH60.listw, test = "LM1") # LM for random effect
## 
##  Baltagi, Song and Koh SLM1 marginal test
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM1 = -2.9021, p-value = 1.996
## alternative hypothesis: Random effects
bsktest(x = eq_1, data = second_df, listw = queen.TH60.listw, test = "LM2") # LM for spatial error
## 
##  Baltagi, Song and Koh LM2 marginal test
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM2 = 6.8165, p-value = 9.328e-12
## alternative hypothesis: Spatial autocorrelation
bsktest(x = eq_1, data = second_df, listw = queen.TH60.listw, test = "LMH") # joint LM
## 
##  Baltagi, Song and Koh LM-H one-sided joint test
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM-H = 46.465, p-value = 2.5e-11
## alternative hypothesis: Random Regional Effects and Spatial autocorrelation
#bsktest(x = eq_1, data = second_df, listw = queen.TH60.listw, test = "CLMlambda") # test for spatial error
bsktest(x = eq_1, data = second_df, listw = queen.TH60.listw, test = "CLMmu") # test for random effect
## 
##  Baltagi, Song and Koh LM*- mu conditional LM test (assuming lambda may
##  or may not be = 0)
## 
## data:  tfp_g ~ hy_half_share + irri_p_land + log(temp) + log(tpre) +     damage + log(gpp_cvm_agri) + log(gpp_cvm_indus) + log(gpp_cvm_ser)
## LM*-mu = 3.118, p-value = 0.001821
## alternative hypothesis: Random regional effects
# spatial Hausman test
sphtest(regs6, regs7) # rejcet H0 >> FE spatial lag is better
## 
##  Hausman test for spatial models
## 
## data:  eq_1
## chisq = 25.609, df = 8, p-value = 0.001225
## alternative hypothesis: one model is inconsistent
sphtest(regs8, regs9) # rejcet H0 >> FE spatial error is better
## 
##  Hausman test for spatial models
## 
## data:  eq_1
## chisq = 29.013, df = 8, p-value = 0.0003155
## alternative hypothesis: one model is inconsistent

Result of the second rice

summary(regs1)
## Pooling Model
## 
## Call:
## plm(formula = eq_1, data = second_df, model = "pooling", index = c("ID", 
##     "year"))
## 
## Balanced Panel: n = 60, T = 24, N = 1440
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -52.2080  -5.2033  -0.7881   3.9492  94.8038 
## 
## Coefficients:
##                       Estimate  Std. Error  t-value  Pr(>|t|)    
## (Intercept)        -28.7297226  16.2360841  -1.7695   0.07702 .  
## hy_half_share       -0.0094057   0.0156499  -0.6010   0.54793    
## irri_p_land          0.0119751   0.0018088   6.6206 5.046e-11 ***
## log(temp)            7.4018725   4.8657703   1.5212   0.12843    
## log(tpre)           -1.0421757   0.6537185  -1.5942   0.11111    
## damage              -0.9483057   0.0895441 -10.5904 < 2.2e-16 ***
## log(gpp_cvm_agri)    0.9203406   0.4829664   1.9056   0.05690 .  
## log(gpp_cvm_indus)  -0.1563359   0.3347778  -0.4670   0.64058    
## log(gpp_cvm_ser)    -0.1531876   0.5156794  -0.2971   0.76646    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    221210
## Residual Sum of Squares: 199770
## R-Squared:      0.096945
## Adj. R-Squared: 0.091897
## F-statistic: 19.2027 on 8 and 1431 DF, p-value: < 2.22e-16
summary(regs2)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = eq_1, data = second_df, effect = "individual", 
##     model = "within", index = c("ID", "year"))
## 
## Balanced Panel: n = 60, T = 24, N = 1440
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -51.35066  -5.01396  -0.77282   4.14941  90.80310 
## 
## Coefficients:
##                      Estimate Std. Error  t-value  Pr(>|t|)    
## hy_half_share      -0.0101276  0.0228841  -0.4426    0.6582    
## irri_p_land         0.0134042  0.0019844   6.7548 2.106e-11 ***
## log(temp)          10.7180768 14.8121409   0.7236    0.4694    
## log(tpre)          -1.8793770  1.2077385  -1.5561    0.1199    
## damage             -1.0553153  0.0967905 -10.9031 < 2.2e-16 ***
## log(gpp_cvm_agri)   1.9614205  1.6581398   1.1829    0.2371    
## log(gpp_cvm_indus) -1.3691797  1.5133087  -0.9048    0.3658    
## log(gpp_cvm_ser)   -1.7671378  2.0503153  -0.8619    0.3889    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    219620
## Residual Sum of Squares: 194750
## R-Squared:      0.11324
## Adj. R-Squared: 0.069938
## F-statistic: 21.901 on 8 and 1372 DF, p-value: < 2.22e-16
summary(regs3)
## Oneway (individual) effect Random Effect Model 
##    (Wallace-Hussain's transformation)
## 
## Call:
## plm(formula = eq_1, data = second_df, effect = "individual", 
##     model = "random", random.method = "walhus", index = c("ID", 
##         "year"))
## 
## Balanced Panel: n = 60, T = 24, N = 1440
## 
## Effects:
##                  var std.dev share
## idiosyncratic 141.86   11.91     1
## individual      0.00    0.00     0
## theta: 0
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -52.2080  -5.2033  -0.7881   3.9492  94.8038 
## 
## Coefficients:
##                       Estimate  Std. Error  z-value  Pr(>|z|)    
## (Intercept)        -28.7297226  16.2360841  -1.7695   0.07681 .  
## hy_half_share       -0.0094057   0.0156499  -0.6010   0.54784    
## irri_p_land          0.0119751   0.0018088   6.6206 3.578e-11 ***
## log(temp)            7.4018725   4.8657703   1.5212   0.12821    
## log(tpre)           -1.0421757   0.6537185  -1.5942   0.11089    
## damage              -0.9483057   0.0895441 -10.5904 < 2.2e-16 ***
## log(gpp_cvm_agri)    0.9203406   0.4829664   1.9056   0.05670 .  
## log(gpp_cvm_indus)  -0.1563359   0.3347778  -0.4670   0.64051    
## log(gpp_cvm_ser)    -0.1531876   0.5156794  -0.2971   0.76642    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    221210
## Residual Sum of Squares: 199770
## R-Squared:      0.096945
## Adj. R-Squared: 0.091897
## Chisq: 153.621 on 8 DF, p-value: < 2.22e-16
summary(regs6)
## Spatial panel fixed effects lag model
##  
## 
## Call:
## spml(formula = eq_1, data = second_df, index = c("ID", "year"), 
##     listw = queen.TH60.listw, model = "within", effect = "individual", 
##     lag = TRUE, spatial.error = "none")
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -48.87288  -4.63305  -0.71071   3.83415  89.66232 
## 
## Spatial autoregressive coefficient:
##        Estimate Std. Error t-value  Pr(>|t|)    
## lambda 0.184868   0.032629  5.6657 1.464e-08 ***
## 
## Coefficients:
##                      Estimate Std. Error  t-value  Pr(>|t|)    
## hy_half_share      -0.0061542  0.0219744  -0.2801    0.7794    
## irri_p_land         0.0132662  0.0019067   6.9578 3.456e-12 ***
## log(temp)           9.9561360 14.2248670   0.6999    0.4840    
## log(tpre)          -1.3417159  1.1634480  -1.1532    0.2488    
## damage             -1.0117728  0.0931341 -10.8636 < 2.2e-16 ***
## log(gpp_cvm_agri)   1.6048926  1.5926754   1.0077    0.3136    
## log(gpp_cvm_indus) -0.9998154  1.4531535  -0.6880    0.4914    
## log(gpp_cvm_ser)   -1.5982940  1.9695789  -0.8115    0.4171    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(regs8)
## Spatial panel fixed effects error model
##  
## 
## Call:
## spml(formula = eq_1, data = second_df, index = c("ID", "year"), 
##     listw = queen.TH60.listw, model = "within", effect = "individual", 
##     lag = FALSE, spatial.error = "b", quiet = TRUE)
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -50.9239  -5.0076  -0.7479   4.2088  91.0853 
## 
## Spatial error parameter:
##     Estimate Std. Error t-value  Pr(>|t|)    
## rho 0.193253   0.033513  5.7664 8.096e-09 ***
## 
## Coefficients:
##                      Estimate Std. Error  t-value  Pr(>|t|)    
## hy_half_share      -0.0010345  0.0235059  -0.0440    0.9649    
## irri_p_land         0.0136767  0.0019252   7.1040 1.212e-12 ***
## log(temp)          12.5872123 17.2610302   0.7292    0.4659    
## log(tpre)          -1.8090197  1.3869251  -1.3043    0.1921    
## damage             -1.0236948  0.0941451 -10.8736 < 2.2e-16 ***
## log(gpp_cvm_agri)   1.7844062  1.6590669   1.0755    0.2821    
## log(gpp_cvm_indus) -0.6391294  1.5033999  -0.4251    0.6707    
## log(gpp_cvm_ser)   -2.0053501  2.1414263  -0.9365    0.3490    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Conclusion