Library

library(jsonlite)
library(htmlTable)
library(rvest)
library(xml2)
library(arrow)
## 
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
## 
##     timestamp
library(gt)

Create the data frame

cunymart_data <- data.frame(
  Category = c("Electronics", "Electronics", "Electronics", "Electronics", 
               "Home Appliances", "Home Appliances", "Home Appliances", "Home Appliances",
               "Clothing", "Clothing", "Clothing", "Clothing", "Clothing",
               "Books", "Books", "Books", "Books",
               "Sports Equipment", "Sports Equipment", "Sports Equipment"),
  Item_Name = c("Smartphone", "Smartphone", "Laptop", "Laptop",
                "Refrigerator", "Refrigerator", "Washing Machine", "Washing Machine",
                "T-Shirt", "T-Shirt", "T-Shirt", "Jeans", "Jeans",
                "Fiction Novel", "Fiction Novel", "Non-Fiction Guide", "Non-Fiction Guide",
                "Basketball", "Tennis Racket", "Tennis Racket"),
  Item_ID = c(101, 101, 102, 102, 
              201, 201, 202, 202,
              301, 301, 301, 302, 302,
              401, 401, 402, 402,
              501, 502, 502),
  Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand",
            "HomeCool", "HomeCool", "CleanTech", "CleanTech",
            "FashionCo", "FashionCo", "FashionCo", "DenimWorks", "DenimWorks",
            "-", "-", "-", "-",
            "SportsGear", "RacketPro", "RacketPro"),
  Price = c(699.99, 699.99, 1099.99, 1099.99,
            899.99, 899.99, 499.99, 499.99,
            19.99, 19.99, 19.99, 49.99, 49.99,
            14.99, 14.99, 24.99, 24.99,
            29.99, 89.99, 89.99),
  Variation_ID = c("101-A", "101-B", "102-A", "102-B",
                   "201-A", "201-B", "202-A", "202-B",
                   "301-A", "301-B", "301-C", "302-A", "302-B",
                   "401-A", "401-B", "402-A", "402-B",
                   "501-A", "502-A", "502-B"),
  Variation_Details = c("Color: Black, Storage: 64GB", "Color: White, Storage: 128GB",
                        "Color: Silver, Storage: 256GB", "Color: Space Gray, Storage: 512GB",
                        "Color: Stainless Steel, Capacity: 20 cu ft", "Color: White, Capacity: 18 cu ft",
                        "Type: Front Load, Capacity: 4.5 cu ft", "Type: Top Load, Capacity: 5.0 cu ft",
                        "Color: Blue, Size: S", "Color: Red, Size: M", "Color: Green, Size: L",
                        "Color: Dark Blue, Size: 32", "Color: Light Blue, Size: 34",
                        "Format: Hardcover, Language: English", "Format: Paperback, Language: Spanish",
                        "Format: eBook, Language: English", "Format: Paperback, Language: French",
                        "Size: Size 7, Color: Orange", "Material: Graphite, Color: Black", 
                        "Material: Aluminum, Color: Silver"),
  stringsAsFactors = FALSE
)

head(cunymart_data)
##          Category    Item_Name Item_ID      Brand   Price Variation_ID
## 1     Electronics   Smartphone     101  TechBrand  699.99        101-A
## 2     Electronics   Smartphone     101  TechBrand  699.99        101-B
## 3     Electronics       Laptop     102 CompuBrand 1099.99        102-A
## 4     Electronics       Laptop     102 CompuBrand 1099.99        102-B
## 5 Home Appliances Refrigerator     201   HomeCool  899.99        201-A
## 6 Home Appliances Refrigerator     201   HomeCool  899.99        201-B
##                            Variation_Details
## 1                Color: Black, Storage: 64GB
## 2               Color: White, Storage: 128GB
## 3              Color: Silver, Storage: 256GB
## 4          Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6           Color: White, Capacity: 18 cu ft

Export to Json

# Convert to JSON
json_data <- toJSON(cunymart_data, pretty = TRUE)

# Write to file
write(json_data, "cunymart_data.json")

# View JSON
cat(json_data)
## [
##   {
##     "Category": "Electronics",
##     "Item_Name": "Smartphone",
##     "Item_ID": 101,
##     "Brand": "TechBrand",
##     "Price": 699.99,
##     "Variation_ID": "101-A",
##     "Variation_Details": "Color: Black, Storage: 64GB"
##   },
##   {
##     "Category": "Electronics",
##     "Item_Name": "Smartphone",
##     "Item_ID": 101,
##     "Brand": "TechBrand",
##     "Price": 699.99,
##     "Variation_ID": "101-B",
##     "Variation_Details": "Color: White, Storage: 128GB"
##   },
##   {
##     "Category": "Electronics",
##     "Item_Name": "Laptop",
##     "Item_ID": 102,
##     "Brand": "CompuBrand",
##     "Price": 1099.99,
##     "Variation_ID": "102-A",
##     "Variation_Details": "Color: Silver, Storage: 256GB"
##   },
##   {
##     "Category": "Electronics",
##     "Item_Name": "Laptop",
##     "Item_ID": 102,
##     "Brand": "CompuBrand",
##     "Price": 1099.99,
##     "Variation_ID": "102-B",
##     "Variation_Details": "Color: Space Gray, Storage: 512GB"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Refrigerator",
##     "Item_ID": 201,
##     "Brand": "HomeCool",
##     "Price": 899.99,
##     "Variation_ID": "201-A",
##     "Variation_Details": "Color: Stainless Steel, Capacity: 20 cu ft"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Refrigerator",
##     "Item_ID": 201,
##     "Brand": "HomeCool",
##     "Price": 899.99,
##     "Variation_ID": "201-B",
##     "Variation_Details": "Color: White, Capacity: 18 cu ft"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Washing Machine",
##     "Item_ID": 202,
##     "Brand": "CleanTech",
##     "Price": 499.99,
##     "Variation_ID": "202-A",
##     "Variation_Details": "Type: Front Load, Capacity: 4.5 cu ft"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Washing Machine",
##     "Item_ID": 202,
##     "Brand": "CleanTech",
##     "Price": 499.99,
##     "Variation_ID": "202-B",
##     "Variation_Details": "Type: Top Load, Capacity: 5.0 cu ft"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "T-Shirt",
##     "Item_ID": 301,
##     "Brand": "FashionCo",
##     "Price": 19.99,
##     "Variation_ID": "301-A",
##     "Variation_Details": "Color: Blue, Size: S"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "T-Shirt",
##     "Item_ID": 301,
##     "Brand": "FashionCo",
##     "Price": 19.99,
##     "Variation_ID": "301-B",
##     "Variation_Details": "Color: Red, Size: M"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "T-Shirt",
##     "Item_ID": 301,
##     "Brand": "FashionCo",
##     "Price": 19.99,
##     "Variation_ID": "301-C",
##     "Variation_Details": "Color: Green, Size: L"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "Jeans",
##     "Item_ID": 302,
##     "Brand": "DenimWorks",
##     "Price": 49.99,
##     "Variation_ID": "302-A",
##     "Variation_Details": "Color: Dark Blue, Size: 32"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "Jeans",
##     "Item_ID": 302,
##     "Brand": "DenimWorks",
##     "Price": 49.99,
##     "Variation_ID": "302-B",
##     "Variation_Details": "Color: Light Blue, Size: 34"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Fiction Novel",
##     "Item_ID": 401,
##     "Brand": "-",
##     "Price": 14.99,
##     "Variation_ID": "401-A",
##     "Variation_Details": "Format: Hardcover, Language: English"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Fiction Novel",
##     "Item_ID": 401,
##     "Brand": "-",
##     "Price": 14.99,
##     "Variation_ID": "401-B",
##     "Variation_Details": "Format: Paperback, Language: Spanish"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Non-Fiction Guide",
##     "Item_ID": 402,
##     "Brand": "-",
##     "Price": 24.99,
##     "Variation_ID": "402-A",
##     "Variation_Details": "Format: eBook, Language: English"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Non-Fiction Guide",
##     "Item_ID": 402,
##     "Brand": "-",
##     "Price": 24.99,
##     "Variation_ID": "402-B",
##     "Variation_Details": "Format: Paperback, Language: French"
##   },
##   {
##     "Category": "Sports Equipment",
##     "Item_Name": "Basketball",
##     "Item_ID": 501,
##     "Brand": "SportsGear",
##     "Price": 29.99,
##     "Variation_ID": "501-A",
##     "Variation_Details": "Size: Size 7, Color: Orange"
##   },
##   {
##     "Category": "Sports Equipment",
##     "Item_Name": "Tennis Racket",
##     "Item_ID": 502,
##     "Brand": "RacketPro",
##     "Price": 89.99,
##     "Variation_ID": "502-A",
##     "Variation_Details": "Material: Graphite, Color: Black"
##   },
##   {
##     "Category": "Sports Equipment",
##     "Item_Name": "Tennis Racket",
##     "Item_ID": 502,
##     "Brand": "RacketPro",
##     "Price": 89.99,
##     "Variation_ID": "502-B",
##     "Variation_Details": "Material: Aluminum, Color: Silver"
##   }
## ]

Import Json

# Read JSON file
imported_json <- fromJSON("cunymart_data.json")

# View imported data
head(imported_json)
##          Category    Item_Name Item_ID      Brand   Price Variation_ID
## 1     Electronics   Smartphone     101  TechBrand  699.99        101-A
## 2     Electronics   Smartphone     101  TechBrand  699.99        101-B
## 3     Electronics       Laptop     102 CompuBrand 1099.99        102-A
## 4     Electronics       Laptop     102 CompuBrand 1099.99        102-B
## 5 Home Appliances Refrigerator     201   HomeCool  899.99        201-A
## 6 Home Appliances Refrigerator     201   HomeCool  899.99        201-B
##                            Variation_Details
## 1                Color: Black, Storage: 64GB
## 2               Color: White, Storage: 128GB
## 3              Color: Silver, Storage: 256GB
## 4          Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6           Color: White, Capacity: 18 cu ft

Export to HTML

# Create HTML table
html_table <- htmlTable(cunymart_data, 
                        rnames = FALSE,
                        caption = "CUNYMart Inventory Data")

# Write to file
write(html_table, "cunymart_data.html")

# View HTML (in RStudio Viewer)
html_table
CUNYMart Inventory Data
Category Item_Name Item_ID Brand Price Variation_ID Variation_Details
Electronics Smartphone 101 TechBrand 699.99 101-A Color: Black, Storage: 64GB
Electronics Smartphone 101 TechBrand 699.99 101-B Color: White, Storage: 128GB
Electronics Laptop 102 CompuBrand 1099.99 102-A Color: Silver, Storage: 256GB
Electronics Laptop 102 CompuBrand 1099.99 102-B Color: Space Gray, Storage: 512GB
Home Appliances Refrigerator 201 HomeCool 899.99 201-A Color: Stainless Steel, Capacity: 20 cu ft
Home Appliances Refrigerator 201 HomeCool 899.99 201-B Color: White, Capacity: 18 cu ft
Home Appliances Washing Machine 202 CleanTech 499.99 202-A Type: Front Load, Capacity: 4.5 cu ft
Home Appliances Washing Machine 202 CleanTech 499.99 202-B Type: Top Load, Capacity: 5.0 cu ft
Clothing T-Shirt 301 FashionCo 19.99 301-A Color: Blue, Size: S
Clothing T-Shirt 301 FashionCo 19.99 301-B Color: Red, Size: M
Clothing T-Shirt 301 FashionCo 19.99 301-C Color: Green, Size: L
Clothing Jeans 302 DenimWorks 49.99 302-A Color: Dark Blue, Size: 32
Clothing Jeans 302 DenimWorks 49.99 302-B Color: Light Blue, Size: 34
Books Fiction Novel 401 - 14.99 401-A Format: Hardcover, Language: English
Books Fiction Novel 401 - 14.99 401-B Format: Paperback, Language: Spanish
Books Non-Fiction Guide 402 - 24.99 402-A Format: eBook, Language: English
Books Non-Fiction Guide 402 - 24.99 402-B Format: Paperback, Language: French
Sports Equipment Basketball 501 SportsGear 29.99 501-A Size: Size 7, Color: Orange
Sports Equipment Tennis Racket 502 RacketPro 89.99 502-A Material: Graphite, Color: Black
Sports Equipment Tennis Racket 502 RacketPro 89.99 502-B Material: Aluminum, Color: Silver

Import HTML

# Read HTML file
html_data <- read_html("cunymart_data.html")

# Extract table
imported_html <- html_data %>% 
  html_node("table") %>% 
  html_table()

# View imported data
head(imported_html)
## # A tibble: 6 × 7
##   X1                      X2                      X3     X4    X5    X6    X7   
##   <chr>                   <chr>                   <chr>  <chr> <chr> <chr> <chr>
## 1 CUNYMart Inventory Data CUNYMart Inventory Data CUNYM… CUNY… CUNY… CUNY… CUNY…
## 2 Category                Item_Name               Item_… Brand Price Vari… Vari…
## 3 Electronics             Smartphone              101    Tech… 699.… 101-A Colo…
## 4 Electronics             Smartphone              101    Tech… 699.… 101-B Colo…
## 5 Electronics             Laptop                  102    Comp… 1099… 102-A Colo…
## 6 Electronics             Laptop                  102    Comp… 1099… 102-B Colo…

Export to XML

# Create XML document
xml_doc <- xml_new_document()
root <- xml_add_child(xml_doc, "CUNYMartInventory")

# Add records
for (i in 1:nrow(cunymart_data)) {
  record <- xml_add_child(root, "Item")
  xml_add_child(record, "Category", cunymart_data$Category[i])
  xml_add_child(record, "Item_Name", cunymart_data$Item_Name[i])
  xml_add_child(record, "Item_ID", cunymart_data$Item_ID[i])
  xml_add_child(record, "Brand", cunymart_data$Brand[i])
  xml_add_child(record, "Price", cunymart_data$Price[i])
  xml_add_child(record, "Variation_ID", cunymart_data$Variation_ID[i])
  xml_add_child(record, "Variation_Details", cunymart_data$Variation_Details[i])
}

# Write to file
write_xml(xml_doc, "cunymart_data.xml")

# View XML
cat(as.character(xml_doc))
## <?xml version="1.0" encoding="UTF-8"?>
## <CUNYMartInventory>
##   <Item>
##     <Category>Electronics</Category>
##     <Item_Name>Smartphone</Item_Name>
##     <Item_ID>101</Item_ID>
##     <Brand>TechBrand</Brand>
##     <Price>699.99</Price>
##     <Variation_ID>101-A</Variation_ID>
##     <Variation_Details>Color: Black, Storage: 64GB</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Electronics</Category>
##     <Item_Name>Smartphone</Item_Name>
##     <Item_ID>101</Item_ID>
##     <Brand>TechBrand</Brand>
##     <Price>699.99</Price>
##     <Variation_ID>101-B</Variation_ID>
##     <Variation_Details>Color: White, Storage: 128GB</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Electronics</Category>
##     <Item_Name>Laptop</Item_Name>
##     <Item_ID>102</Item_ID>
##     <Brand>CompuBrand</Brand>
##     <Price>1099.99</Price>
##     <Variation_ID>102-A</Variation_ID>
##     <Variation_Details>Color: Silver, Storage: 256GB</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Electronics</Category>
##     <Item_Name>Laptop</Item_Name>
##     <Item_ID>102</Item_ID>
##     <Brand>CompuBrand</Brand>
##     <Price>1099.99</Price>
##     <Variation_ID>102-B</Variation_ID>
##     <Variation_Details>Color: Space Gray, Storage: 512GB</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Home Appliances</Category>
##     <Item_Name>Refrigerator</Item_Name>
##     <Item_ID>201</Item_ID>
##     <Brand>HomeCool</Brand>
##     <Price>899.99</Price>
##     <Variation_ID>201-A</Variation_ID>
##     <Variation_Details>Color: Stainless Steel, Capacity: 20 cu ft</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Home Appliances</Category>
##     <Item_Name>Refrigerator</Item_Name>
##     <Item_ID>201</Item_ID>
##     <Brand>HomeCool</Brand>
##     <Price>899.99</Price>
##     <Variation_ID>201-B</Variation_ID>
##     <Variation_Details>Color: White, Capacity: 18 cu ft</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Home Appliances</Category>
##     <Item_Name>Washing Machine</Item_Name>
##     <Item_ID>202</Item_ID>
##     <Brand>CleanTech</Brand>
##     <Price>499.99</Price>
##     <Variation_ID>202-A</Variation_ID>
##     <Variation_Details>Type: Front Load, Capacity: 4.5 cu ft</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Home Appliances</Category>
##     <Item_Name>Washing Machine</Item_Name>
##     <Item_ID>202</Item_ID>
##     <Brand>CleanTech</Brand>
##     <Price>499.99</Price>
##     <Variation_ID>202-B</Variation_ID>
##     <Variation_Details>Type: Top Load, Capacity: 5.0 cu ft</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Clothing</Category>
##     <Item_Name>T-Shirt</Item_Name>
##     <Item_ID>301</Item_ID>
##     <Brand>FashionCo</Brand>
##     <Price>19.99</Price>
##     <Variation_ID>301-A</Variation_ID>
##     <Variation_Details>Color: Blue, Size: S</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Clothing</Category>
##     <Item_Name>T-Shirt</Item_Name>
##     <Item_ID>301</Item_ID>
##     <Brand>FashionCo</Brand>
##     <Price>19.99</Price>
##     <Variation_ID>301-B</Variation_ID>
##     <Variation_Details>Color: Red, Size: M</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Clothing</Category>
##     <Item_Name>T-Shirt</Item_Name>
##     <Item_ID>301</Item_ID>
##     <Brand>FashionCo</Brand>
##     <Price>19.99</Price>
##     <Variation_ID>301-C</Variation_ID>
##     <Variation_Details>Color: Green, Size: L</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Clothing</Category>
##     <Item_Name>Jeans</Item_Name>
##     <Item_ID>302</Item_ID>
##     <Brand>DenimWorks</Brand>
##     <Price>49.99</Price>
##     <Variation_ID>302-A</Variation_ID>
##     <Variation_Details>Color: Dark Blue, Size: 32</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Clothing</Category>
##     <Item_Name>Jeans</Item_Name>
##     <Item_ID>302</Item_ID>
##     <Brand>DenimWorks</Brand>
##     <Price>49.99</Price>
##     <Variation_ID>302-B</Variation_ID>
##     <Variation_Details>Color: Light Blue, Size: 34</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Books</Category>
##     <Item_Name>Fiction Novel</Item_Name>
##     <Item_ID>401</Item_ID>
##     <Brand>-</Brand>
##     <Price>14.99</Price>
##     <Variation_ID>401-A</Variation_ID>
##     <Variation_Details>Format: Hardcover, Language: English</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Books</Category>
##     <Item_Name>Fiction Novel</Item_Name>
##     <Item_ID>401</Item_ID>
##     <Brand>-</Brand>
##     <Price>14.99</Price>
##     <Variation_ID>401-B</Variation_ID>
##     <Variation_Details>Format: Paperback, Language: Spanish</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Books</Category>
##     <Item_Name>Non-Fiction Guide</Item_Name>
##     <Item_ID>402</Item_ID>
##     <Brand>-</Brand>
##     <Price>24.99</Price>
##     <Variation_ID>402-A</Variation_ID>
##     <Variation_Details>Format: eBook, Language: English</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Books</Category>
##     <Item_Name>Non-Fiction Guide</Item_Name>
##     <Item_ID>402</Item_ID>
##     <Brand>-</Brand>
##     <Price>24.99</Price>
##     <Variation_ID>402-B</Variation_ID>
##     <Variation_Details>Format: Paperback, Language: French</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Sports Equipment</Category>
##     <Item_Name>Basketball</Item_Name>
##     <Item_ID>501</Item_ID>
##     <Brand>SportsGear</Brand>
##     <Price>29.99</Price>
##     <Variation_ID>501-A</Variation_ID>
##     <Variation_Details>Size: Size 7, Color: Orange</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Sports Equipment</Category>
##     <Item_Name>Tennis Racket</Item_Name>
##     <Item_ID>502</Item_ID>
##     <Brand>RacketPro</Brand>
##     <Price>89.99</Price>
##     <Variation_ID>502-A</Variation_ID>
##     <Variation_Details>Material: Graphite, Color: Black</Variation_Details>
##   </Item>
##   <Item>
##     <Category>Sports Equipment</Category>
##     <Item_Name>Tennis Racket</Item_Name>
##     <Item_ID>502</Item_ID>
##     <Brand>RacketPro</Brand>
##     <Price>89.99</Price>
##     <Variation_ID>502-B</Variation_ID>
##     <Variation_Details>Material: Aluminum, Color: Silver</Variation_Details>
##   </Item>
## </CUNYMartInventory>

Import XML

# Read XML file
xml_data <- read_xml("cunymart_data.xml")

# Extract data
items <- xml_find_all(xml_data, "//Item")
imported_xml <- data.frame(
  Category = xml_text(xml_find_first(items, "Category")),
  Item_Name = xml_text(xml_find_first(items, "Item_Name")),
  Item_ID = xml_text(xml_find_first(items, "Item_ID")),
  Brand = xml_text(xml_find_first(items, "Brand")),
  Price = xml_text(xml_find_first(items, "Price")),
  Variation_ID = xml_text(xml_find_first(items, "Variation_ID")),
  Variation_Details = xml_text(xml_find_first(items, "Variation_Details")),
  stringsAsFactors = FALSE
)

# Convert numeric columns
imported_xml$Item_ID <- as.integer(imported_xml$Item_ID)
imported_xml$Price <- as.numeric(imported_xml$Price)

# View imported data
head(imported_xml)
##          Category    Item_Name Item_ID      Brand   Price Variation_ID
## 1     Electronics   Smartphone     101  TechBrand  699.99        101-A
## 2     Electronics   Smartphone     101  TechBrand  699.99        101-B
## 3     Electronics       Laptop     102 CompuBrand 1099.99        102-A
## 4     Electronics       Laptop     102 CompuBrand 1099.99        102-B
## 5 Home Appliances Refrigerator     201   HomeCool  899.99        201-A
## 6 Home Appliances Refrigerator     201   HomeCool  899.99        201-B
##                            Variation_Details
## 1                Color: Black, Storage: 64GB
## 2               Color: White, Storage: 128GB
## 3              Color: Silver, Storage: 256GB
## 4          Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6           Color: White, Capacity: 18 cu ft

Export to Parquet

write_parquet(cunymart_data, "cunymart_data.parquet")

Import Parquet

# Read Parquet file
imported_parquet <- read_parquet("cunymart_data.parquet")

# View imported data
head(imported_parquet)
## # A tibble: 6 × 7
##   Category        Item_Name   Item_ID Brand Price Variation_ID Variation_Details
##   <chr>           <chr>         <dbl> <chr> <dbl> <chr>        <chr>            
## 1 Electronics     Smartphone      101 Tech…  700. 101-A        Color: Black, St…
## 2 Electronics     Smartphone      101 Tech…  700. 101-B        Color: White, St…
## 3 Electronics     Laptop          102 Comp… 1100. 102-A        Color: Silver, S…
## 4 Electronics     Laptop          102 Comp… 1100. 102-B        Color: Space Gra…
## 5 Home Appliances Refrigerat…     201 Home…  900. 201-A        Color: Stainless…
## 6 Home Appliances Refrigerat…     201 Home…  900. 201-B        Color: White, Ca…

Summary Table

pros_cons <- data.frame(
  Format = c("JSON", "HTML", "XML", "Parquet"),
  Pros = c("Simple to debug, Easy to read & write, Used in APIs",
           "Great for displaying in web browsers, Interative visualizations",
           "Hierarchial data support, self-descriptive format",
           "Highly compressed, Fast for big data and analytics"),
  Cons = c("No Metadata, Harder to work with large files",
           "Not a suitable data exchange format, Parsing is harder",
           "Verbose and large file sizes, Harder to read",
           "Harder to read, Complex to debug")
)

proscons_table <- gt(pros_cons) %>%
  tab_header(
    title = "Pros and Cons of Different Data Formats"
  )

proscons_table
Pros and Cons of Different Data Formats
Format Pros Cons
JSON Simple to debug, Easy to read & write, Used in APIs No Metadata, Harder to work with large files
HTML Great for displaying in web browsers, Interative visualizations Not a suitable data exchange format, Parsing is harder
XML Hierarchial data support, self-descriptive format Verbose and large file sizes, Harder to read
Parquet Highly compressed, Fast for big data and analytics Harder to read, Complex to debug