Library
library(jsonlite)
library(htmlTable)
library(rvest)
library(xml2)
library(arrow)
##
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
##
## timestamp
library(gt)
Create the data frame
cunymart_data <- data.frame(
Category = c("Electronics", "Electronics", "Electronics", "Electronics",
"Home Appliances", "Home Appliances", "Home Appliances", "Home Appliances",
"Clothing", "Clothing", "Clothing", "Clothing", "Clothing",
"Books", "Books", "Books", "Books",
"Sports Equipment", "Sports Equipment", "Sports Equipment"),
Item_Name = c("Smartphone", "Smartphone", "Laptop", "Laptop",
"Refrigerator", "Refrigerator", "Washing Machine", "Washing Machine",
"T-Shirt", "T-Shirt", "T-Shirt", "Jeans", "Jeans",
"Fiction Novel", "Fiction Novel", "Non-Fiction Guide", "Non-Fiction Guide",
"Basketball", "Tennis Racket", "Tennis Racket"),
Item_ID = c(101, 101, 102, 102,
201, 201, 202, 202,
301, 301, 301, 302, 302,
401, 401, 402, 402,
501, 502, 502),
Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand",
"HomeCool", "HomeCool", "CleanTech", "CleanTech",
"FashionCo", "FashionCo", "FashionCo", "DenimWorks", "DenimWorks",
"-", "-", "-", "-",
"SportsGear", "RacketPro", "RacketPro"),
Price = c(699.99, 699.99, 1099.99, 1099.99,
899.99, 899.99, 499.99, 499.99,
19.99, 19.99, 19.99, 49.99, 49.99,
14.99, 14.99, 24.99, 24.99,
29.99, 89.99, 89.99),
Variation_ID = c("101-A", "101-B", "102-A", "102-B",
"201-A", "201-B", "202-A", "202-B",
"301-A", "301-B", "301-C", "302-A", "302-B",
"401-A", "401-B", "402-A", "402-B",
"501-A", "502-A", "502-B"),
Variation_Details = c("Color: Black, Storage: 64GB", "Color: White, Storage: 128GB",
"Color: Silver, Storage: 256GB", "Color: Space Gray, Storage: 512GB",
"Color: Stainless Steel, Capacity: 20 cu ft", "Color: White, Capacity: 18 cu ft",
"Type: Front Load, Capacity: 4.5 cu ft", "Type: Top Load, Capacity: 5.0 cu ft",
"Color: Blue, Size: S", "Color: Red, Size: M", "Color: Green, Size: L",
"Color: Dark Blue, Size: 32", "Color: Light Blue, Size: 34",
"Format: Hardcover, Language: English", "Format: Paperback, Language: Spanish",
"Format: eBook, Language: English", "Format: Paperback, Language: French",
"Size: Size 7, Color: Orange", "Material: Graphite, Color: Black",
"Material: Aluminum, Color: Silver"),
stringsAsFactors = FALSE
)
head(cunymart_data)
## Category Item_Name Item_ID Brand Price Variation_ID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## Variation_Details
## 1 Color: Black, Storage: 64GB
## 2 Color: White, Storage: 128GB
## 3 Color: Silver, Storage: 256GB
## 4 Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6 Color: White, Capacity: 18 cu ft
Export to Json
# Convert to JSON
json_data <- toJSON(cunymart_data, pretty = TRUE)
# Write to file
write(json_data, "cunymart_data.json")
# View JSON
cat(json_data)
## [
## {
## "Category": "Electronics",
## "Item_Name": "Smartphone",
## "Item_ID": 101,
## "Brand": "TechBrand",
## "Price": 699.99,
## "Variation_ID": "101-A",
## "Variation_Details": "Color: Black, Storage: 64GB"
## },
## {
## "Category": "Electronics",
## "Item_Name": "Smartphone",
## "Item_ID": 101,
## "Brand": "TechBrand",
## "Price": 699.99,
## "Variation_ID": "101-B",
## "Variation_Details": "Color: White, Storage: 128GB"
## },
## {
## "Category": "Electronics",
## "Item_Name": "Laptop",
## "Item_ID": 102,
## "Brand": "CompuBrand",
## "Price": 1099.99,
## "Variation_ID": "102-A",
## "Variation_Details": "Color: Silver, Storage: 256GB"
## },
## {
## "Category": "Electronics",
## "Item_Name": "Laptop",
## "Item_ID": 102,
## "Brand": "CompuBrand",
## "Price": 1099.99,
## "Variation_ID": "102-B",
## "Variation_Details": "Color: Space Gray, Storage: 512GB"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Refrigerator",
## "Item_ID": 201,
## "Brand": "HomeCool",
## "Price": 899.99,
## "Variation_ID": "201-A",
## "Variation_Details": "Color: Stainless Steel, Capacity: 20 cu ft"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Refrigerator",
## "Item_ID": 201,
## "Brand": "HomeCool",
## "Price": 899.99,
## "Variation_ID": "201-B",
## "Variation_Details": "Color: White, Capacity: 18 cu ft"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Washing Machine",
## "Item_ID": 202,
## "Brand": "CleanTech",
## "Price": 499.99,
## "Variation_ID": "202-A",
## "Variation_Details": "Type: Front Load, Capacity: 4.5 cu ft"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Washing Machine",
## "Item_ID": 202,
## "Brand": "CleanTech",
## "Price": 499.99,
## "Variation_ID": "202-B",
## "Variation_Details": "Type: Top Load, Capacity: 5.0 cu ft"
## },
## {
## "Category": "Clothing",
## "Item_Name": "T-Shirt",
## "Item_ID": 301,
## "Brand": "FashionCo",
## "Price": 19.99,
## "Variation_ID": "301-A",
## "Variation_Details": "Color: Blue, Size: S"
## },
## {
## "Category": "Clothing",
## "Item_Name": "T-Shirt",
## "Item_ID": 301,
## "Brand": "FashionCo",
## "Price": 19.99,
## "Variation_ID": "301-B",
## "Variation_Details": "Color: Red, Size: M"
## },
## {
## "Category": "Clothing",
## "Item_Name": "T-Shirt",
## "Item_ID": 301,
## "Brand": "FashionCo",
## "Price": 19.99,
## "Variation_ID": "301-C",
## "Variation_Details": "Color: Green, Size: L"
## },
## {
## "Category": "Clothing",
## "Item_Name": "Jeans",
## "Item_ID": 302,
## "Brand": "DenimWorks",
## "Price": 49.99,
## "Variation_ID": "302-A",
## "Variation_Details": "Color: Dark Blue, Size: 32"
## },
## {
## "Category": "Clothing",
## "Item_Name": "Jeans",
## "Item_ID": 302,
## "Brand": "DenimWorks",
## "Price": 49.99,
## "Variation_ID": "302-B",
## "Variation_Details": "Color: Light Blue, Size: 34"
## },
## {
## "Category": "Books",
## "Item_Name": "Fiction Novel",
## "Item_ID": 401,
## "Brand": "-",
## "Price": 14.99,
## "Variation_ID": "401-A",
## "Variation_Details": "Format: Hardcover, Language: English"
## },
## {
## "Category": "Books",
## "Item_Name": "Fiction Novel",
## "Item_ID": 401,
## "Brand": "-",
## "Price": 14.99,
## "Variation_ID": "401-B",
## "Variation_Details": "Format: Paperback, Language: Spanish"
## },
## {
## "Category": "Books",
## "Item_Name": "Non-Fiction Guide",
## "Item_ID": 402,
## "Brand": "-",
## "Price": 24.99,
## "Variation_ID": "402-A",
## "Variation_Details": "Format: eBook, Language: English"
## },
## {
## "Category": "Books",
## "Item_Name": "Non-Fiction Guide",
## "Item_ID": 402,
## "Brand": "-",
## "Price": 24.99,
## "Variation_ID": "402-B",
## "Variation_Details": "Format: Paperback, Language: French"
## },
## {
## "Category": "Sports Equipment",
## "Item_Name": "Basketball",
## "Item_ID": 501,
## "Brand": "SportsGear",
## "Price": 29.99,
## "Variation_ID": "501-A",
## "Variation_Details": "Size: Size 7, Color: Orange"
## },
## {
## "Category": "Sports Equipment",
## "Item_Name": "Tennis Racket",
## "Item_ID": 502,
## "Brand": "RacketPro",
## "Price": 89.99,
## "Variation_ID": "502-A",
## "Variation_Details": "Material: Graphite, Color: Black"
## },
## {
## "Category": "Sports Equipment",
## "Item_Name": "Tennis Racket",
## "Item_ID": 502,
## "Brand": "RacketPro",
## "Price": 89.99,
## "Variation_ID": "502-B",
## "Variation_Details": "Material: Aluminum, Color: Silver"
## }
## ]
Import Json
# Read JSON file
imported_json <- fromJSON("cunymart_data.json")
# View imported data
head(imported_json)
## Category Item_Name Item_ID Brand Price Variation_ID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## Variation_Details
## 1 Color: Black, Storage: 64GB
## 2 Color: White, Storage: 128GB
## 3 Color: Silver, Storage: 256GB
## 4 Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6 Color: White, Capacity: 18 cu ft
Export to HTML
# Create HTML table
html_table <- htmlTable(cunymart_data,
rnames = FALSE,
caption = "CUNYMart Inventory Data")
# Write to file
write(html_table, "cunymart_data.html")
# View HTML (in RStudio Viewer)
html_table
|
CUNYMart Inventory Data
|
|
Category
|
Item_Name
|
Item_ID
|
Brand
|
Price
|
Variation_ID
|
Variation_Details
|
|
Electronics
|
Smartphone
|
101
|
TechBrand
|
699.99
|
101-A
|
Color: Black, Storage: 64GB
|
|
Electronics
|
Smartphone
|
101
|
TechBrand
|
699.99
|
101-B
|
Color: White, Storage: 128GB
|
|
Electronics
|
Laptop
|
102
|
CompuBrand
|
1099.99
|
102-A
|
Color: Silver, Storage: 256GB
|
|
Electronics
|
Laptop
|
102
|
CompuBrand
|
1099.99
|
102-B
|
Color: Space Gray, Storage: 512GB
|
|
Home Appliances
|
Refrigerator
|
201
|
HomeCool
|
899.99
|
201-A
|
Color: Stainless Steel, Capacity: 20 cu ft
|
|
Home Appliances
|
Refrigerator
|
201
|
HomeCool
|
899.99
|
201-B
|
Color: White, Capacity: 18 cu ft
|
|
Home Appliances
|
Washing Machine
|
202
|
CleanTech
|
499.99
|
202-A
|
Type: Front Load, Capacity: 4.5 cu ft
|
|
Home Appliances
|
Washing Machine
|
202
|
CleanTech
|
499.99
|
202-B
|
Type: Top Load, Capacity: 5.0 cu ft
|
|
Clothing
|
T-Shirt
|
301
|
FashionCo
|
19.99
|
301-A
|
Color: Blue, Size: S
|
|
Clothing
|
T-Shirt
|
301
|
FashionCo
|
19.99
|
301-B
|
Color: Red, Size: M
|
|
Clothing
|
T-Shirt
|
301
|
FashionCo
|
19.99
|
301-C
|
Color: Green, Size: L
|
|
Clothing
|
Jeans
|
302
|
DenimWorks
|
49.99
|
302-A
|
Color: Dark Blue, Size: 32
|
|
Clothing
|
Jeans
|
302
|
DenimWorks
|
49.99
|
302-B
|
Color: Light Blue, Size: 34
|
|
Books
|
Fiction Novel
|
401
|
-
|
14.99
|
401-A
|
Format: Hardcover, Language: English
|
|
Books
|
Fiction Novel
|
401
|
-
|
14.99
|
401-B
|
Format: Paperback, Language: Spanish
|
|
Books
|
Non-Fiction Guide
|
402
|
-
|
24.99
|
402-A
|
Format: eBook, Language: English
|
|
Books
|
Non-Fiction Guide
|
402
|
-
|
24.99
|
402-B
|
Format: Paperback, Language: French
|
|
Sports Equipment
|
Basketball
|
501
|
SportsGear
|
29.99
|
501-A
|
Size: Size 7, Color: Orange
|
|
Sports Equipment
|
Tennis Racket
|
502
|
RacketPro
|
89.99
|
502-A
|
Material: Graphite, Color: Black
|
|
Sports Equipment
|
Tennis Racket
|
502
|
RacketPro
|
89.99
|
502-B
|
Material: Aluminum, Color: Silver
|
Import HTML
# Read HTML file
html_data <- read_html("cunymart_data.html")
# Extract table
imported_html <- html_data %>%
html_node("table") %>%
html_table()
# View imported data
head(imported_html)
## # A tibble: 6 × 7
## X1 X2 X3 X4 X5 X6 X7
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 CUNYMart Inventory Data CUNYMart Inventory Data CUNYM… CUNY… CUNY… CUNY… CUNY…
## 2 Category Item_Name Item_… Brand Price Vari… Vari…
## 3 Electronics Smartphone 101 Tech… 699.… 101-A Colo…
## 4 Electronics Smartphone 101 Tech… 699.… 101-B Colo…
## 5 Electronics Laptop 102 Comp… 1099… 102-A Colo…
## 6 Electronics Laptop 102 Comp… 1099… 102-B Colo…
Export to XML
# Create XML document
xml_doc <- xml_new_document()
root <- xml_add_child(xml_doc, "CUNYMartInventory")
# Add records
for (i in 1:nrow(cunymart_data)) {
record <- xml_add_child(root, "Item")
xml_add_child(record, "Category", cunymart_data$Category[i])
xml_add_child(record, "Item_Name", cunymart_data$Item_Name[i])
xml_add_child(record, "Item_ID", cunymart_data$Item_ID[i])
xml_add_child(record, "Brand", cunymart_data$Brand[i])
xml_add_child(record, "Price", cunymart_data$Price[i])
xml_add_child(record, "Variation_ID", cunymart_data$Variation_ID[i])
xml_add_child(record, "Variation_Details", cunymart_data$Variation_Details[i])
}
# Write to file
write_xml(xml_doc, "cunymart_data.xml")
# View XML
cat(as.character(xml_doc))
## <?xml version="1.0" encoding="UTF-8"?>
## <CUNYMartInventory>
## <Item>
## <Category>Electronics</Category>
## <Item_Name>Smartphone</Item_Name>
## <Item_ID>101</Item_ID>
## <Brand>TechBrand</Brand>
## <Price>699.99</Price>
## <Variation_ID>101-A</Variation_ID>
## <Variation_Details>Color: Black, Storage: 64GB</Variation_Details>
## </Item>
## <Item>
## <Category>Electronics</Category>
## <Item_Name>Smartphone</Item_Name>
## <Item_ID>101</Item_ID>
## <Brand>TechBrand</Brand>
## <Price>699.99</Price>
## <Variation_ID>101-B</Variation_ID>
## <Variation_Details>Color: White, Storage: 128GB</Variation_Details>
## </Item>
## <Item>
## <Category>Electronics</Category>
## <Item_Name>Laptop</Item_Name>
## <Item_ID>102</Item_ID>
## <Brand>CompuBrand</Brand>
## <Price>1099.99</Price>
## <Variation_ID>102-A</Variation_ID>
## <Variation_Details>Color: Silver, Storage: 256GB</Variation_Details>
## </Item>
## <Item>
## <Category>Electronics</Category>
## <Item_Name>Laptop</Item_Name>
## <Item_ID>102</Item_ID>
## <Brand>CompuBrand</Brand>
## <Price>1099.99</Price>
## <Variation_ID>102-B</Variation_ID>
## <Variation_Details>Color: Space Gray, Storage: 512GB</Variation_Details>
## </Item>
## <Item>
## <Category>Home Appliances</Category>
## <Item_Name>Refrigerator</Item_Name>
## <Item_ID>201</Item_ID>
## <Brand>HomeCool</Brand>
## <Price>899.99</Price>
## <Variation_ID>201-A</Variation_ID>
## <Variation_Details>Color: Stainless Steel, Capacity: 20 cu ft</Variation_Details>
## </Item>
## <Item>
## <Category>Home Appliances</Category>
## <Item_Name>Refrigerator</Item_Name>
## <Item_ID>201</Item_ID>
## <Brand>HomeCool</Brand>
## <Price>899.99</Price>
## <Variation_ID>201-B</Variation_ID>
## <Variation_Details>Color: White, Capacity: 18 cu ft</Variation_Details>
## </Item>
## <Item>
## <Category>Home Appliances</Category>
## <Item_Name>Washing Machine</Item_Name>
## <Item_ID>202</Item_ID>
## <Brand>CleanTech</Brand>
## <Price>499.99</Price>
## <Variation_ID>202-A</Variation_ID>
## <Variation_Details>Type: Front Load, Capacity: 4.5 cu ft</Variation_Details>
## </Item>
## <Item>
## <Category>Home Appliances</Category>
## <Item_Name>Washing Machine</Item_Name>
## <Item_ID>202</Item_ID>
## <Brand>CleanTech</Brand>
## <Price>499.99</Price>
## <Variation_ID>202-B</Variation_ID>
## <Variation_Details>Type: Top Load, Capacity: 5.0 cu ft</Variation_Details>
## </Item>
## <Item>
## <Category>Clothing</Category>
## <Item_Name>T-Shirt</Item_Name>
## <Item_ID>301</Item_ID>
## <Brand>FashionCo</Brand>
## <Price>19.99</Price>
## <Variation_ID>301-A</Variation_ID>
## <Variation_Details>Color: Blue, Size: S</Variation_Details>
## </Item>
## <Item>
## <Category>Clothing</Category>
## <Item_Name>T-Shirt</Item_Name>
## <Item_ID>301</Item_ID>
## <Brand>FashionCo</Brand>
## <Price>19.99</Price>
## <Variation_ID>301-B</Variation_ID>
## <Variation_Details>Color: Red, Size: M</Variation_Details>
## </Item>
## <Item>
## <Category>Clothing</Category>
## <Item_Name>T-Shirt</Item_Name>
## <Item_ID>301</Item_ID>
## <Brand>FashionCo</Brand>
## <Price>19.99</Price>
## <Variation_ID>301-C</Variation_ID>
## <Variation_Details>Color: Green, Size: L</Variation_Details>
## </Item>
## <Item>
## <Category>Clothing</Category>
## <Item_Name>Jeans</Item_Name>
## <Item_ID>302</Item_ID>
## <Brand>DenimWorks</Brand>
## <Price>49.99</Price>
## <Variation_ID>302-A</Variation_ID>
## <Variation_Details>Color: Dark Blue, Size: 32</Variation_Details>
## </Item>
## <Item>
## <Category>Clothing</Category>
## <Item_Name>Jeans</Item_Name>
## <Item_ID>302</Item_ID>
## <Brand>DenimWorks</Brand>
## <Price>49.99</Price>
## <Variation_ID>302-B</Variation_ID>
## <Variation_Details>Color: Light Blue, Size: 34</Variation_Details>
## </Item>
## <Item>
## <Category>Books</Category>
## <Item_Name>Fiction Novel</Item_Name>
## <Item_ID>401</Item_ID>
## <Brand>-</Brand>
## <Price>14.99</Price>
## <Variation_ID>401-A</Variation_ID>
## <Variation_Details>Format: Hardcover, Language: English</Variation_Details>
## </Item>
## <Item>
## <Category>Books</Category>
## <Item_Name>Fiction Novel</Item_Name>
## <Item_ID>401</Item_ID>
## <Brand>-</Brand>
## <Price>14.99</Price>
## <Variation_ID>401-B</Variation_ID>
## <Variation_Details>Format: Paperback, Language: Spanish</Variation_Details>
## </Item>
## <Item>
## <Category>Books</Category>
## <Item_Name>Non-Fiction Guide</Item_Name>
## <Item_ID>402</Item_ID>
## <Brand>-</Brand>
## <Price>24.99</Price>
## <Variation_ID>402-A</Variation_ID>
## <Variation_Details>Format: eBook, Language: English</Variation_Details>
## </Item>
## <Item>
## <Category>Books</Category>
## <Item_Name>Non-Fiction Guide</Item_Name>
## <Item_ID>402</Item_ID>
## <Brand>-</Brand>
## <Price>24.99</Price>
## <Variation_ID>402-B</Variation_ID>
## <Variation_Details>Format: Paperback, Language: French</Variation_Details>
## </Item>
## <Item>
## <Category>Sports Equipment</Category>
## <Item_Name>Basketball</Item_Name>
## <Item_ID>501</Item_ID>
## <Brand>SportsGear</Brand>
## <Price>29.99</Price>
## <Variation_ID>501-A</Variation_ID>
## <Variation_Details>Size: Size 7, Color: Orange</Variation_Details>
## </Item>
## <Item>
## <Category>Sports Equipment</Category>
## <Item_Name>Tennis Racket</Item_Name>
## <Item_ID>502</Item_ID>
## <Brand>RacketPro</Brand>
## <Price>89.99</Price>
## <Variation_ID>502-A</Variation_ID>
## <Variation_Details>Material: Graphite, Color: Black</Variation_Details>
## </Item>
## <Item>
## <Category>Sports Equipment</Category>
## <Item_Name>Tennis Racket</Item_Name>
## <Item_ID>502</Item_ID>
## <Brand>RacketPro</Brand>
## <Price>89.99</Price>
## <Variation_ID>502-B</Variation_ID>
## <Variation_Details>Material: Aluminum, Color: Silver</Variation_Details>
## </Item>
## </CUNYMartInventory>
Import XML
# Read XML file
xml_data <- read_xml("cunymart_data.xml")
# Extract data
items <- xml_find_all(xml_data, "//Item")
imported_xml <- data.frame(
Category = xml_text(xml_find_first(items, "Category")),
Item_Name = xml_text(xml_find_first(items, "Item_Name")),
Item_ID = xml_text(xml_find_first(items, "Item_ID")),
Brand = xml_text(xml_find_first(items, "Brand")),
Price = xml_text(xml_find_first(items, "Price")),
Variation_ID = xml_text(xml_find_first(items, "Variation_ID")),
Variation_Details = xml_text(xml_find_first(items, "Variation_Details")),
stringsAsFactors = FALSE
)
# Convert numeric columns
imported_xml$Item_ID <- as.integer(imported_xml$Item_ID)
imported_xml$Price <- as.numeric(imported_xml$Price)
# View imported data
head(imported_xml)
## Category Item_Name Item_ID Brand Price Variation_ID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## Variation_Details
## 1 Color: Black, Storage: 64GB
## 2 Color: White, Storage: 128GB
## 3 Color: Silver, Storage: 256GB
## 4 Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6 Color: White, Capacity: 18 cu ft
Export to Parquet
write_parquet(cunymart_data, "cunymart_data.parquet")
Import Parquet
# Read Parquet file
imported_parquet <- read_parquet("cunymart_data.parquet")
# View imported data
head(imported_parquet)
## # A tibble: 6 × 7
## Category Item_Name Item_ID Brand Price Variation_ID Variation_Details
## <chr> <chr> <dbl> <chr> <dbl> <chr> <chr>
## 1 Electronics Smartphone 101 Tech… 700. 101-A Color: Black, St…
## 2 Electronics Smartphone 101 Tech… 700. 101-B Color: White, St…
## 3 Electronics Laptop 102 Comp… 1100. 102-A Color: Silver, S…
## 4 Electronics Laptop 102 Comp… 1100. 102-B Color: Space Gra…
## 5 Home Appliances Refrigerat… 201 Home… 900. 201-A Color: Stainless…
## 6 Home Appliances Refrigerat… 201 Home… 900. 201-B Color: White, Ca…
Summary Table
pros_cons <- data.frame(
Format = c("JSON", "HTML", "XML", "Parquet"),
Pros = c("Simple to debug, Easy to read & write, Used in APIs",
"Great for displaying in web browsers, Interative visualizations",
"Hierarchial data support, self-descriptive format",
"Highly compressed, Fast for big data and analytics"),
Cons = c("No Metadata, Harder to work with large files",
"Not a suitable data exchange format, Parsing is harder",
"Verbose and large file sizes, Harder to read",
"Harder to read, Complex to debug")
)
proscons_table <- gt(pros_cons) %>%
tab_header(
title = "Pros and Cons of Different Data Formats"
)
proscons_table
| Pros and Cons of Different Data Formats |
| Format |
Pros |
Cons |
| JSON |
Simple to debug, Easy to read & write, Used in APIs |
No Metadata, Harder to work with large files |
| HTML |
Great for displaying in web browsers, Interative visualizations |
Not a suitable data exchange format, Parsing is harder |
| XML |
Hierarchial data support, self-descriptive format |
Verbose and large file sizes, Harder to read |
| Parquet |
Highly compressed, Fast for big data and analytics |
Harder to read, Complex to debug |