# Load necessary libraries
pacman::p_load(pacman, readr, dplyr, skimr, naniar, ggplot2, tidyr, patchwork)

# Load the dataset with appropriate encoding
my_data <- read_csv("VIW_FNT.csv", locale = locale(encoding = "UTF-8"))
## Rows: 156291 Columns: 49
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (12): WHOREGION, FLUSEASON, HEMISPHERE, ITZ, COUNTRY_CODE, COUNTRY_AREA...
## dbl  (35): ISO_YEAR, ISO_WEEK, MMWR_YEAR, MMWR_WEEK, SPEC_PROCESSED_NB, SPEC...
## date  (2): ISO_WEEKSTARTDATE, MMWR_WEEKSTARTDATE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Identify numeric and character columns
numeric_columns <- names(my_data)[sapply(my_data, is.numeric)]
character_columns <- names(my_data)[sapply(my_data, is.character)]

# Replace NA values in numeric columns with 0 (or another appropriate numeric value)
my_data <- my_data %>%
  mutate(across(all_of(numeric_columns), ~ replace_na(.x, 0)))

# Replace NA values in character columns with empty string (or another appropriate character value)
my_data <- my_data %>%
  mutate(across(all_of(character_columns), ~ replace_na(.x, "")))

# Basic Summary
summary(my_data)
##   WHOREGION          FLUSEASON          HEMISPHERE            ITZ           
##  Length:156291      Length:156291      Length:156291      Length:156291     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  COUNTRY_CODE       COUNTRY_AREA_TERRITORY ISO_WEEKSTARTDATE       ISO_YEAR   
##  Length:156291      Length:156291          Min.   :1995-01-02   Min.   :1995  
##  Class :character   Class :character       1st Qu.:2012-02-06   1st Qu.:2012  
##  Mode  :character   Mode  :character       Median :2017-02-20   Median :2017  
##                                            Mean   :2015-11-07   Mean   :2015  
##                                            3rd Qu.:2021-02-22   3rd Qu.:2021  
##                                            Max.   :2024-05-13   Max.   :2024  
##     ISO_WEEK     MMWR_WEEKSTARTDATE     MMWR_YEAR      MMWR_WEEK    
##  Min.   : 1.00   Min.   :1995-01-01   Min.   :1995   Min.   : 1.00  
##  1st Qu.:12.00   1st Qu.:2012-02-05   1st Qu.:2012   1st Qu.:12.00  
##  Median :25.00   Median :2017-02-19   Median :2017   Median :25.00  
##  Mean   :25.78   Mean   :2015-11-06   Mean   :2015   Mean   :25.78  
##  3rd Qu.:40.00   3rd Qu.:2021-02-21   3rd Qu.:2021   3rd Qu.:40.00  
##  Max.   :53.00   Max.   :2024-05-12   Max.   :2024   Max.   :53.00  
##  ORIGIN_SOURCE      SPEC_PROCESSED_NB  SPEC_RECEIVED_NB     AH1N12009       
##  Length:156291      Min.   :     0.0   Min.   :     0.0   Min.   :    0.00  
##  Class :character   1st Qu.:     2.0   1st Qu.:     0.0   1st Qu.:    0.00  
##  Mode  :character   Median :    23.0   Median :     0.0   Median :    0.00  
##                     Mean   :   463.2   Mean   :   295.9   Mean   :   11.27  
##                     3rd Qu.:    91.0   3rd Qu.:    13.0   3rd Qu.:    1.00  
##                     Max.   :191785.0   Max.   :191785.0   Max.   :10575.00  
##       AH1                AH3             AH5                AH7N9         
##  Min.   :  0.0000   Min.   :    0   Min.   : 0.000000   Min.   :0.000000  
##  1st Qu.:  0.0000   1st Qu.:    0   1st Qu.: 0.000000   1st Qu.:0.000000  
##  Median :  0.0000   Median :    0   Median : 0.000000   Median :0.000000  
##  Mean   :  0.5522   Mean   :   11   Mean   : 0.002604   Mean   :0.000282  
##  3rd Qu.:  0.0000   3rd Qu.:    1   3rd Qu.: 0.000000   3rd Qu.:0.000000  
##  Max.   :906.0000   Max.   :14991   Max.   :14.000000   Max.   :6.000000  
##   ANOTSUBTYPED      ANOTSUBTYPABLE     AOTHER_SUBTYPE    
##  Min.   :    0.00   Min.   : 0.00000   Min.   : 0.00000  
##  1st Qu.:    0.00   1st Qu.: 0.00000   1st Qu.: 0.00000  
##  Median :    0.00   Median : 0.00000   Median : 0.00000  
##  Mean   :   20.13   Mean   : 0.01832   Mean   : 0.00519  
##  3rd Qu.:    0.00   3rd Qu.: 0.00000   3rd Qu.: 0.00000  
##  Max.   :48835.00   Max.   :58.00000   Max.   :95.00000  
##  AOTHER_SUBTYPE_DETAILS     INF_A            BVIC_2DEL        
##  Length:156291          Min.   :    0.00   Min.   :  0.00000  
##  Class :character       1st Qu.:    0.00   1st Qu.:  0.00000  
##  Mode  :character       Median :    0.00   Median :  0.00000  
##                         Mean   :   42.99   Mean   :  0.01976  
##                         3rd Qu.:    6.00   3rd Qu.:  0.00000  
##                         Max.   :48835.00   Max.   :175.00000  
##    BVIC_3DEL          BVIC_NODEL       BVIC_DELUNK             BYAM          
##  Min.   :0.000000   Min.   :   0.00   Min.   :  0.00000   Min.   :   0.0000  
##  1st Qu.:0.000000   1st Qu.:   0.00   1st Qu.:  0.00000   1st Qu.:   0.0000  
##  Median :0.000000   Median :   0.00   Median :  0.00000   Median :   0.0000  
##  Mean   :0.000409   Mean   :   2.19   Mean   :  0.06771   Mean   :   0.9828  
##  3rd Qu.:0.000000   3rd Qu.:   0.00   3rd Qu.:  0.00000   3rd Qu.:   0.0000  
##  Max.   :5.000000   Max.   :6596.00   Max.   :129.00000   Max.   :2641.0000  
##  BNOTDETERMINED          INF_B             INF_ALL          INF_NEGATIVE     
##  Min.   :   -1.000   Min.   :    0.00   Min.   :    0.00   Min.   :     0.0  
##  1st Qu.:    0.000   1st Qu.:    0.00   1st Qu.:    0.00   1st Qu.:     0.0  
##  Median :    0.000   Median :    0.00   Median :    1.00   Median :     0.0  
##  Mean   :    9.103   Mean   :   12.28   Mean   :   55.26   Mean   :   193.3  
##  3rd Qu.:    1.000   3rd Qu.:    2.00   3rd Qu.:   10.00   3rd Qu.:     0.0  
##  Max.   :11264.000   Max.   :11264.00   Max.   :49007.00   Max.   :147198.0  
##   ILI_ACTIVITY       ADENO               BOCA            HUMAN_CORONA     
##  Min.   :0.000   Min.   :  0.0000   Min.   :  0.00000   Min.   :  0.0000  
##  1st Qu.:0.000   1st Qu.:  0.0000   1st Qu.:  0.00000   1st Qu.:  0.0000  
##  Median :0.000   Median :  0.0000   Median :  0.00000   Median :  0.0000  
##  Mean   :1.165   Mean   :  0.8912   Mean   :  0.06439   Mean   :  0.4873  
##  3rd Qu.:2.000   3rd Qu.:  0.0000   3rd Qu.:  0.00000   3rd Qu.:  0.0000  
##  Max.   :6.000   Max.   :376.0000   Max.   :226.00000   Max.   :708.0000  
##    METAPNEUMO       PARAINFLUENZA         RHINO               RSV          
##  Min.   :  0.0000   Min.   :  0.000   Min.   :   0.000   Min.   :   0.000  
##  1st Qu.:  0.0000   1st Qu.:  0.000   1st Qu.:   0.000   1st Qu.:   0.000  
##  Median :  0.0000   Median :  0.000   Median :   0.000   Median :   0.000  
##  Mean   :  0.8256   Mean   :  1.253   Mean   :   1.944   Mean   :   8.256  
##  3rd Qu.:  0.0000   3rd Qu.:  0.000   3rd Qu.:   0.000   3rd Qu.:   0.000  
##  Max.   :723.0000   Max.   :609.000   Max.   :1416.000   Max.   :3523.000  
##  OTHERRESPVIRUS    OTHER_RESPVIRUS_DETAILS LAB_RESULT_COMMENT
##  Min.   :  0.000   Length:156291           Length:156291     
##  1st Qu.:  0.000   Class :character        Class :character  
##  Median :  0.000   Mode  :character        Mode  :character  
##  Mean   :  1.036                                             
##  3rd Qu.:  0.000                                             
##  Max.   :732.000                                             
##  WCR_COMMENT            ISO2               ISOYW            MMWRYW      
##  Length:156291      Length:156291      Min.   :199501   Min.   :199501  
##  Class :character   Class :character   1st Qu.:201206   1st Qu.:201206  
##  Mode  :character   Mode  :character   Median :201708   Median :201708  
##                                        Mean   :201563   Mean   :201563  
##                                        3rd Qu.:202108   3rd Qu.:202108  
##                                        Max.   :202420   Max.   :202420
# Extended Summary with 'skimr'
skim(my_data)
Data summary
Name my_data
Number of rows 156291
Number of columns 49
_______________________
Column type frequency:
character 12
Date 2
numeric 35
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
WHOREGION 0 1 3 4 0 7 0
FLUSEASON 0 1 2 2 0 3 0
HEMISPHERE 0 1 2 2 0 2 0
ITZ 0 1 10 15 0 18 0
COUNTRY_CODE 0 1 3 3 0 186 0
COUNTRY_AREA_TERRITORY 0 1 4 70 0 186 0
ORIGIN_SOURCE 0 1 8 11 0 3 0
AOTHER_SUBTYPE_DETAILS 0 1 0 125 149312 82 0
OTHER_RESPVIRUS_DETAILS 0 1 0 320 132553 5593 0
LAB_RESULT_COMMENT 0 1 0 996 151688 1430 0
WCR_COMMENT 0 1 0 450 138621 11445 0
ISO2 0 1 0 2 93 186 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
ISO_WEEKSTARTDATE 0 1 1995-01-02 2024-05-13 2017-02-20 1506
MMWR_WEEKSTARTDATE 0 1 1995-01-01 2024-05-12 2017-02-19 1506

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ISO_YEAR 0 1 2015.37 6.50 1995 2012 2017 2021 2024 ▁▂▃▇▇
ISO_WEEK 0 1 25.78 15.70 1 12 25 40 53 ▇▆▆▆▇
MMWR_YEAR 0 1 2015.37 6.50 1995 2012 2017 2021 2024 ▁▂▃▇▇
MMWR_WEEK 0 1 25.78 15.70 1 12 25 40 53 ▇▆▆▆▇
SPEC_PROCESSED_NB 0 1 463.20 3457.80 0 2 23 91 191785 ▇▁▁▁▁
SPEC_RECEIVED_NB 0 1 295.94 3277.10 0 0 0 13 191785 ▇▁▁▁▁
AH1N12009 0 1 11.27 131.42 0 0 0 1 10575 ▇▁▁▁▁
AH1 0 1 0.55 10.48 0 0 0 0 906 ▇▁▁▁▁
AH3 0 1 11.00 124.37 0 0 0 1 14991 ▇▁▁▁▁
AH5 0 1 0.00 0.11 0 0 0 0 14 ▇▁▁▁▁
AH7N9 0 1 0.00 0.03 0 0 0 0 6 ▇▁▁▁▁
ANOTSUBTYPED 0 1 20.13 378.66 0 0 0 0 48835 ▇▁▁▁▁
ANOTSUBTYPABLE 0 1 0.02 0.61 0 0 0 0 58 ▇▁▁▁▁
AOTHER_SUBTYPE 0 1 0.01 0.38 0 0 0 0 95 ▇▁▁▁▁
INF_A 0 1 42.99 442.32 0 0 0 6 48835 ▇▁▁▁▁
BVIC_2DEL 0 1 0.02 1.12 0 0 0 0 175 ▇▁▁▁▁
BVIC_3DEL 0 1 0.00 0.03 0 0 0 0 5 ▇▁▁▁▁
BVIC_NODEL 0 1 2.19 59.47 0 0 0 0 6596 ▇▁▁▁▁
BVIC_DELUNK 0 1 0.07 1.54 0 0 0 0 129 ▇▁▁▁▁
BYAM 0 1 0.98 22.53 0 0 0 0 2641 ▇▁▁▁▁
BNOTDETERMINED 0 1 9.10 121.90 -1 0 0 1 11264 ▇▁▁▁▁
INF_B 0 1 12.28 140.00 0 0 0 2 11264 ▇▁▁▁▁
INF_ALL 0 1 55.26 519.78 0 0 1 10 49007 ▇▁▁▁▁
INF_NEGATIVE 0 1 193.26 2840.85 0 0 0 0 147198 ▇▁▁▁▁
ILI_ACTIVITY 0 1 1.16 1.56 0 0 0 2 6 ▇▁▂▁▁
ADENO 0 1 0.89 8.09 0 0 0 0 376 ▇▁▁▁▁
BOCA 0 1 0.06 1.23 0 0 0 0 226 ▇▁▁▁▁
HUMAN_CORONA 0 1 0.49 9.71 0 0 0 0 708 ▇▁▁▁▁
METAPNEUMO 0 1 0.83 11.55 0 0 0 0 723 ▇▁▁▁▁
PARAINFLUENZA 0 1 1.25 13.50 0 0 0 0 609 ▇▁▁▁▁
RHINO 0 1 1.94 26.23 0 0 0 0 1416 ▇▁▁▁▁
RSV 0 1 8.26 72.92 0 0 0 0 3523 ▇▁▁▁▁
OTHERRESPVIRUS 0 1 1.04 17.50 0 0 0 0 732 ▇▁▁▁▁
ISOYW 0 1 201563.16 649.42 199501 201206 201708 202108 202420 ▁▂▃▆▇
MMWRYW 0 1 201563.02 649.52 199501 201206 201708 202108 202420 ▁▂▃▆▇
# Missing Values Analysis
total_missing <- sum(is.na(my_data))
col_missing <- colSums(is.na(my_data))

# Print missing values summary
cat("Total missing values:", total_missing, "\n")
## Total missing values: 0
print(col_missing)
##               WHOREGION               FLUSEASON              HEMISPHERE 
##                       0                       0                       0 
##                     ITZ            COUNTRY_CODE  COUNTRY_AREA_TERRITORY 
##                       0                       0                       0 
##       ISO_WEEKSTARTDATE                ISO_YEAR                ISO_WEEK 
##                       0                       0                       0 
##      MMWR_WEEKSTARTDATE               MMWR_YEAR               MMWR_WEEK 
##                       0                       0                       0 
##           ORIGIN_SOURCE       SPEC_PROCESSED_NB        SPEC_RECEIVED_NB 
##                       0                       0                       0 
##               AH1N12009                     AH1                     AH3 
##                       0                       0                       0 
##                     AH5                   AH7N9            ANOTSUBTYPED 
##                       0                       0                       0 
##          ANOTSUBTYPABLE          AOTHER_SUBTYPE  AOTHER_SUBTYPE_DETAILS 
##                       0                       0                       0 
##                   INF_A               BVIC_2DEL               BVIC_3DEL 
##                       0                       0                       0 
##              BVIC_NODEL             BVIC_DELUNK                    BYAM 
##                       0                       0                       0 
##          BNOTDETERMINED                   INF_B                 INF_ALL 
##                       0                       0                       0 
##            INF_NEGATIVE            ILI_ACTIVITY                   ADENO 
##                       0                       0                       0 
##                    BOCA            HUMAN_CORONA              METAPNEUMO 
##                       0                       0                       0 
##           PARAINFLUENZA                   RHINO                     RSV 
##                       0                       0                       0 
##          OTHERRESPVIRUS OTHER_RESPVIRUS_DETAILS      LAB_RESULT_COMMENT 
##                       0                       0                       0 
##             WCR_COMMENT                    ISO2                   ISOYW 
##                       0                       0                       0 
##                  MMWRYW 
##                       0
# Plot 1: Visualize missing values
p_miss <- gg_miss_var(my_data) +
  labs(title = "Missing Values in Dataset",
       x = "Variables",
       y = "Number of Missing Values")

print(p_miss)

# Visualizations for Structure
str(my_data)
## tibble [156,291 × 49] (S3: tbl_df/tbl/data.frame)
##  $ WHOREGION              : chr [1:156291] "EUR" "AMR" "EUR" "WPR" ...
##  $ FLUSEASON              : chr [1:156291] "NH" "YR" "NH" "YR" ...
##  $ HEMISPHERE             : chr [1:156291] "NH" "SH" "NH" "NH" ...
##  $ ITZ                    : chr [1:156291] "FLU_WST_ASIA" "FLU_TRP_SAMR" "FLU_SW_EUR" "FLU_SE_ASIA" ...
##  $ COUNTRY_CODE           : chr [1:156291] "AZE" "BRA" "HRV" "PHL" ...
##  $ COUNTRY_AREA_TERRITORY : chr [1:156291] "Azerbaijan" "Brazil" "Croatia" "Philippines" ...
##  $ ISO_WEEKSTARTDATE      : Date[1:156291], format: "2015-11-16" "2009-06-15" ...
##  $ ISO_YEAR               : num [1:156291] 2015 2009 2014 2004 2024 ...
##  $ ISO_WEEK               : num [1:156291] 47 25 7 3 7 33 21 46 2 43 ...
##  $ MMWR_WEEKSTARTDATE     : Date[1:156291], format: "2015-11-15" "2009-06-14" ...
##  $ MMWR_YEAR              : num [1:156291] 2015 2009 2014 2004 2024 ...
##  $ MMWR_WEEK              : num [1:156291] 46 25 7 2 7 33 20 46 2 43 ...
##  $ ORIGIN_SOURCE          : chr [1:156291] "NONSENTINEL" "NOTDEFINED" "SENTINEL" "NOTDEFINED" ...
##  $ SPEC_PROCESSED_NB      : num [1:156291] 5 227 149 58 169 ...
##  $ SPEC_RECEIVED_NB       : num [1:156291] 0 267 0 0 169 ...
##  $ AH1N12009              : num [1:156291] 0 85 0 0 52 0 0 264 0 18 ...
##  $ AH1                    : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AH3                    : num [1:156291] 0 1 0 0 0 0 1 30 0 0 ...
##  $ AH5                    : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AH7N9                  : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ANOTSUBTYPED           : num [1:156291] 0 34 0 0 0 0 1 4 0 0 ...
##  $ ANOTSUBTYPABLE         : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AOTHER_SUBTYPE         : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AOTHER_SUBTYPE_DETAILS : chr [1:156291] "" "" "" "" ...
##  $ INF_A                  : num [1:156291] 0 120 0 0 52 0 2 298 0 18 ...
##  $ BVIC_2DEL              : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ BVIC_3DEL              : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ BVIC_NODEL             : num [1:156291] 0 0 0 0 13 0 0 3 0 0 ...
##  $ BVIC_DELUNK            : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ BYAM                   : num [1:156291] 0 0 0 0 0 0 4 7 0 0 ...
##  $ BNOTDETERMINED         : num [1:156291] 0 7 0 0 5 0 0 0 0 0 ...
##  $ INF_B                  : num [1:156291] 0 7 0 0 18 0 4 10 0 0 ...
##  $ INF_ALL                : num [1:156291] 0 127 0 0 70 0 6 308 0 18 ...
##  $ INF_NEGATIVE           : num [1:156291] 0 0 0 0 99 ...
##  $ ILI_ACTIVITY           : num [1:156291] 0 6 0 2 0 0 0 3 0 3 ...
##  $ ADENO                  : num [1:156291] 0 0 0 0 0 0 11 0 0 0 ...
##  $ BOCA                   : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ HUMAN_CORONA           : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ METAPNEUMO             : num [1:156291] 0 0 0 0 0 0 1 0 0 0 ...
##  $ PARAINFLUENZA          : num [1:156291] 0 0 0 0 0 0 51 0 0 0 ...
##  $ RHINO                  : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ RSV                    : num [1:156291] 0 0 0 0 1 0 30 0 11 0 ...
##  $ OTHERRESPVIRUS         : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ OTHER_RESPVIRUS_DETAILS: chr [1:156291] "" "0" "" "Rhinovirus" ...
##  $ LAB_RESULT_COMMENT     : chr [1:156291] "" "No unsubtyped or antiviral resistence was found." "" "" ...
##  $ WCR_COMMENT            : chr [1:156291] "" "The Flu seasonal activity is low and H1N1 pandemic first detection." "" "" ...
##  $ ISO2                   : chr [1:156291] "AZ" "BR" "HR" "PH" ...
##  $ ISOYW                  : num [1:156291] 201547 200925 201407 200403 202407 ...
##  $ MMWRYW                 : num [1:156291] 201546 200925 201407 200402 202407 ...
# Convert appropriate columns to factors or dates
my_data <- my_data %>%
  mutate(
    FLUSEASON = as.factor(FLUSEASON),
    HEMISPHERE = as.factor(HEMISPHERE),
    ISO_WEEKSTARTDATE = as.Date(ISO_WEEKSTARTDATE, format="%Y-%m-%d"),
    MMWR_WEEKSTARTDATE = as.Date(MMWR_WEEKSTARTDATE, format="%Y-%m-%d")
  )

# Recheck the structure
str(my_data)
## tibble [156,291 × 49] (S3: tbl_df/tbl/data.frame)
##  $ WHOREGION              : chr [1:156291] "EUR" "AMR" "EUR" "WPR" ...
##  $ FLUSEASON              : Factor w/ 3 levels "NH","SH","YR": 1 3 1 3 3 1 2 3 1 1 ...
##  $ HEMISPHERE             : Factor w/ 2 levels "NH","SH": 1 2 1 1 1 1 2 1 1 1 ...
##  $ ITZ                    : chr [1:156291] "FLU_WST_ASIA" "FLU_TRP_SAMR" "FLU_SW_EUR" "FLU_SE_ASIA" ...
##  $ COUNTRY_CODE           : chr [1:156291] "AZE" "BRA" "HRV" "PHL" ...
##  $ COUNTRY_AREA_TERRITORY : chr [1:156291] "Azerbaijan" "Brazil" "Croatia" "Philippines" ...
##  $ ISO_WEEKSTARTDATE      : Date[1:156291], format: "2015-11-16" "2009-06-15" ...
##  $ ISO_YEAR               : num [1:156291] 2015 2009 2014 2004 2024 ...
##  $ ISO_WEEK               : num [1:156291] 47 25 7 3 7 33 21 46 2 43 ...
##  $ MMWR_WEEKSTARTDATE     : Date[1:156291], format: "2015-11-15" "2009-06-14" ...
##  $ MMWR_YEAR              : num [1:156291] 2015 2009 2014 2004 2024 ...
##  $ MMWR_WEEK              : num [1:156291] 46 25 7 2 7 33 20 46 2 43 ...
##  $ ORIGIN_SOURCE          : chr [1:156291] "NONSENTINEL" "NOTDEFINED" "SENTINEL" "NOTDEFINED" ...
##  $ SPEC_PROCESSED_NB      : num [1:156291] 5 227 149 58 169 ...
##  $ SPEC_RECEIVED_NB       : num [1:156291] 0 267 0 0 169 ...
##  $ AH1N12009              : num [1:156291] 0 85 0 0 52 0 0 264 0 18 ...
##  $ AH1                    : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AH3                    : num [1:156291] 0 1 0 0 0 0 1 30 0 0 ...
##  $ AH5                    : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AH7N9                  : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ANOTSUBTYPED           : num [1:156291] 0 34 0 0 0 0 1 4 0 0 ...
##  $ ANOTSUBTYPABLE         : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AOTHER_SUBTYPE         : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AOTHER_SUBTYPE_DETAILS : chr [1:156291] "" "" "" "" ...
##  $ INF_A                  : num [1:156291] 0 120 0 0 52 0 2 298 0 18 ...
##  $ BVIC_2DEL              : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ BVIC_3DEL              : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ BVIC_NODEL             : num [1:156291] 0 0 0 0 13 0 0 3 0 0 ...
##  $ BVIC_DELUNK            : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ BYAM                   : num [1:156291] 0 0 0 0 0 0 4 7 0 0 ...
##  $ BNOTDETERMINED         : num [1:156291] 0 7 0 0 5 0 0 0 0 0 ...
##  $ INF_B                  : num [1:156291] 0 7 0 0 18 0 4 10 0 0 ...
##  $ INF_ALL                : num [1:156291] 0 127 0 0 70 0 6 308 0 18 ...
##  $ INF_NEGATIVE           : num [1:156291] 0 0 0 0 99 ...
##  $ ILI_ACTIVITY           : num [1:156291] 0 6 0 2 0 0 0 3 0 3 ...
##  $ ADENO                  : num [1:156291] 0 0 0 0 0 0 11 0 0 0 ...
##  $ BOCA                   : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ HUMAN_CORONA           : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ METAPNEUMO             : num [1:156291] 0 0 0 0 0 0 1 0 0 0 ...
##  $ PARAINFLUENZA          : num [1:156291] 0 0 0 0 0 0 51 0 0 0 ...
##  $ RHINO                  : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ RSV                    : num [1:156291] 0 0 0 0 1 0 30 0 11 0 ...
##  $ OTHERRESPVIRUS         : num [1:156291] 0 0 0 0 0 0 0 0 0 0 ...
##  $ OTHER_RESPVIRUS_DETAILS: chr [1:156291] "" "0" "" "Rhinovirus" ...
##  $ LAB_RESULT_COMMENT     : chr [1:156291] "" "No unsubtyped or antiviral resistence was found." "" "" ...
##  $ WCR_COMMENT            : chr [1:156291] "" "The Flu seasonal activity is low and H1N1 pandemic first detection." "" "" ...
##  $ ISO2                   : chr [1:156291] "AZ" "BR" "HR" "PH" ...
##  $ ISOYW                  : num [1:156291] 201547 200925 201407 200403 202407 ...
##  $ MMWRYW                 : num [1:156291] 201546 200925 201407 200402 202407 ...
# Visualizing key numeric columns
numeric_long <- my_data %>%
  select(all_of(numeric_columns)) %>%
  gather(key = "Variable", value = "Value")

# Plot 2: Histograms for Numeric Columns
p_hist <- ggplot(numeric_long, aes(x = Value)) +
  geom_histogram(bins = 30, fill = "blue", alpha = 0.7) +
  facet_wrap(~Variable, scales = "free") +
  theme_minimal() +
  labs(title = "Histograms of Numeric Columns",
       x = "Value",
       y = "Frequency")

print(p_hist)

# Plot 3: Boxplot for a numeric column by FLUSEASON
p_box <- ggplot(my_data, aes(x = FLUSEASON, y = SPEC_PROCESSED_NB)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Boxplot of SPEC_PROCESSED_NB by FLUSEASON",
       x = "FLUSEASON",
       y = "SPEC_PROCESSED_NB")

print(p_box)

# Plot 4: Time Series Plot for SPEC_PROCESSED_NB over ISO_WEEKSTARTDATE
p_time <- ggplot(my_data, aes(x = ISO_WEEKSTARTDATE, y = SPEC_PROCESSED_NB)) +
  geom_line() +
  theme_minimal() +
  labs(title = "Time Series of SPEC_PROCESSED_NB",
       x = "ISO Week Start Date",
       y = "SPEC_PROCESSED_NB")

print(p_time)