library(selectr)
library(rvest)
## Loading required package: xml2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
content <- read_html('https://news.ycombinator.com/')
content
## {xml_document}
## <html op="news">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset= ...
## [2] <body><center><table id="hnmain" border="0" cellpadding="0" cellspac ...
## [3] <script type="text/javascript" src="hn.js?bKMiySB2nhUo09N5Z8Du"></sc ...
title<-content%>%html_nodes('a.storylink')%>%html_text()
title
##  [1] "Google accused of 'trust demolition' over health app"                            
##  [2] "Amazon No-cost, multiplatform, production-ready distribution of OpenJDK"         
##  [3] "Rendered Insecure: GPU Side Channel Attacks Are Practical [pdf]"                 
##  [4] "Show HN: Smallest Node.js Docker images"                                         
##  [5] "Chinese Tokamak reaches over 100M degrees"                                       
##  [6] "Shumway: Flash implementation in JavaScript"                                     
##  [7] "Sourdough Hands: How Bakers and Bread Are a Microbial Match"                     
##  [8] "HyperMinHash: Bringing intersections to HyperLogLog"                             
##  [9] "Show HN: Mobile Patterns – Design inspiration gallery for successful mobile apps"
## [10] "Private by Design: How We Built Firefox Sync"                                    
## [11] "Toronto Cleared Cars Off a Major Transit Corridor – And It Worked"               
## [12] "Compelled Decryption and the Privilege Against Self-Incrimination"               
## [13] "Mark Shuttleworth is not selling Canonical or Ubuntu – yet"                      
## [14] "Non cogito, ergo sum"                                                            
## [15] "Faked Out"                                                                       
## [16] "A book list for OS kernel developers and device driver writers (2006)"           
## [17] "Snap Says DOJ and SEC Are Investigating IPO Disclosures"                         
## [18] "KDevelop 5.3 released"                                                           
## [19] "Build a do-it-yourself home air purifier for about $25"                          
## [20] "Destruction of evidence charges filed for remotely wiping iPhone"                
## [21] "Gods and Robots: Myths, Machines, and Ancient Dreams of Technology"              
## [22] "China's brightest children are being recruited to weaponize AI"                  
## [23] "Blue Apron lays off more workers"                                                
## [24] "Is Twitter causally-consistent?"                                                 
## [25] "There’s Less to Private Equity Than Meets the Eye"                               
## [26] "Pattern Matching Without Regex – Introducing the Rosie Pattern Language"         
## [27] "C library system-call wrappers, or the lack thereof"                             
## [28] "Infinite procedurally-generated city with the Wave Function Collapse algorithm"  
## [29] "Check Out PC Classic, a $99 Adorable Tiny DOS Games Console"                     
## [30] "Best and Worst Cities for Nursing Homes in the US"
source<-content%>%html_nodes('span.sitestr')%>%html_text()
source
##  [1] "bbc.co.uk"                 "amazon.com"               
##  [3] "ucr.edu"                   "github.com"               
##  [5] "cas.cn"                    "github.com"               
##  [7] "npr.org"                   "github.com"               
##  [9] "mobile-patterns.com"       "hacks.mozilla.org"        
## [11] "streetsblog.org"           "ssrn.com"                 
## [13] "zdnet.com"                 "1843magazine.com"         
## [15] "reallifemag.com"           "jdebp.eu"                 
## [17] "bloomberg.com"             "kdevelop.org"             
## [19] "uofmhealth.org"            "dailygazette.com"         
## [21] "spectator.us"              "scmp.com"                 
## [23] "wsj.com"                   "muratbuffalo.blogspot.com"
## [25] "bloomberg.com"             "atomicobject.com"         
## [27] "lwn.net"                   "itch.io"                  
## [29] "hothardware.com"           "priceonomics.com"
age<-content%>%html_nodes('.age')%>%html_text()
age
##  [1] "2 hours ago"    "3 hours ago"    "2 hours ago"    "1 hour ago"    
##  [5] "11 hours ago"   "3 hours ago"    "5 hours ago"    "2 hours ago"   
##  [9] "3 hours ago"    "15 hours ago"   "1 hour ago"     "9 hours ago"   
## [13] "38 minutes ago" "10 hours ago"   "2 hours ago"    "5 hours ago"   
## [17] "12 hours ago"   "3 hours ago"    "17 hours ago"   "14 hours ago"  
## [21] "9 hours ago"    "3 hours ago"    "15 hours ago"   "7 hours ago"   
## [25] "7 hours ago"    "14 minutes ago" "15 hours ago"   "21 hours ago"  
## [29] "28 minutes ago" "14 hours ago"
processedData<-data.frame(title=title,source=source,age=age)
processedData
##                                                                               title
## 1                              Google accused of 'trust demolition' over health app
## 2           Amazon No-cost, multiplatform, production-ready distribution of OpenJDK
## 3                   Rendered Insecure: GPU Side Channel Attacks Are Practical [pdf]
## 4                                           Show HN: Smallest Node.js Docker images
## 5                                         Chinese Tokamak reaches over 100M degrees
## 6                                       Shumway: Flash implementation in JavaScript
## 7                       Sourdough Hands: How Bakers and Bread Are a Microbial Match
## 8                               HyperMinHash: Bringing intersections to HyperLogLog
## 9  Show HN: Mobile Patterns – Design inspiration gallery for successful mobile apps
## 10                                     Private by Design: How We Built Firefox Sync
## 11                Toronto Cleared Cars Off a Major Transit Corridor – And It Worked
## 12                Compelled Decryption and the Privilege Against Self-Incrimination
## 13                       Mark Shuttleworth is not selling Canonical or Ubuntu – yet
## 14                                                             Non cogito, ergo sum
## 15                                                                        Faked Out
## 16            A book list for OS kernel developers and device driver writers (2006)
## 17                          Snap Says DOJ and SEC Are Investigating IPO Disclosures
## 18                                                            KDevelop 5.3 released
## 19                           Build a do-it-yourself home air purifier for about $25
## 20                 Destruction of evidence charges filed for remotely wiping iPhone
## 21               Gods and Robots: Myths, Machines, and Ancient Dreams of Technology
## 22                   China's brightest children are being recruited to weaponize AI
## 23                                                 Blue Apron lays off more workers
## 24                                                  Is Twitter causally-consistent?
## 25                                There’s Less to Private Equity Than Meets the Eye
## 26          Pattern Matching Without Regex – Introducing the Rosie Pattern Language
## 27                              C library system-call wrappers, or the lack thereof
## 28   Infinite procedurally-generated city with the Wave Function Collapse algorithm
## 29                      Check Out PC Classic, a $99 Adorable Tiny DOS Games Console
## 30                                Best and Worst Cities for Nursing Homes in the US
##                       source            age
## 1                  bbc.co.uk    2 hours ago
## 2                 amazon.com    3 hours ago
## 3                    ucr.edu    2 hours ago
## 4                 github.com     1 hour ago
## 5                     cas.cn   11 hours ago
## 6                 github.com    3 hours ago
## 7                    npr.org    5 hours ago
## 8                 github.com    2 hours ago
## 9        mobile-patterns.com    3 hours ago
## 10         hacks.mozilla.org   15 hours ago
## 11           streetsblog.org     1 hour ago
## 12                  ssrn.com    9 hours ago
## 13                 zdnet.com 38 minutes ago
## 14          1843magazine.com   10 hours ago
## 15           reallifemag.com    2 hours ago
## 16                  jdebp.eu    5 hours ago
## 17             bloomberg.com   12 hours ago
## 18              kdevelop.org    3 hours ago
## 19            uofmhealth.org   17 hours ago
## 20          dailygazette.com   14 hours ago
## 21              spectator.us    9 hours ago
## 22                  scmp.com    3 hours ago
## 23                   wsj.com   15 hours ago
## 24 muratbuffalo.blogspot.com    7 hours ago
## 25             bloomberg.com    7 hours ago
## 26          atomicobject.com 14 minutes ago
## 27                   lwn.net   15 hours ago
## 28                   itch.io   21 hours ago
## 29           hothardware.com 28 minutes ago
## 30          priceonomics.com   14 hours ago
contentTOI <- read_html("https://timesofindia.indiatimes.com/")
contentTOI
## {xml_document}
## <html xmlns:g="http://base.google.com/ns/1.0" xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
## [1] <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article ...
## [2] <script>function otab(t,n){var e="_blank";try{e="undefined"==typeof  ...
## [3] <script>try{axs("//timesofindia.indiatimes.com/acms/jsAds/getContent ...
## [4] <body bgcolor="#EBEBEB" data-page-name="HP"><div id="container">\n<s ...
gadgetName<-contentTOI%>%html_nodes('.shopping_times_wdgt .shopping_gadgets_frame li .gad_name')%>%html_text()
length(gadgetName)
## [1] 27
gadgetName
##  [1] "Lenovo K8 Note 32GB (Black, 3GB RAM)"              
##  [2] "Redmi 6A 16GB (Black, 2GB RAM)"                    
##  [3] "Moto E5 Plus 32GB (Black, 3GB RAM)"                
##  [4] "Vivo V11 Pro (Dazzling Gold, 64 GB)  (6 GB RAM)"   
##  [5] "Lenovo K8 Note 64GB (Black, 4GB RAM)"              
##  [6] "OnePlus 6T 64GB (Black, 6GB RAM)"                  
##  [7] "Panasonic Eluga A4 32GB (Champagne Gold, 3GB RAM)" 
##  [8] "Honor 8X 64GB (Blue, 6GB RAM)"                     
##  [9] "Lenovo K8 Plus 32GB (Black, 3GB RAM)"              
## [10] "Redmi 6 Pro 64GB (Black, 4GB RAM)"                 
## [11] "Panasonic Eluga Ray 700 32GB (Mocha Gold, 3GB RAM)"
## [12] "Vivo V9 Pro 64GB (Black, 6GB RAM)"                 
## [13] "Google Pixel 3 64GB (Clearly White, 4GB RAM)"      
## [14] "Samsung Galaxy A7 64GB (Black, 4GB RAM)"           
## [15] "Moto G6 32GB (Black, 3GB RAM)"                     
## [16] "Lenovo K6 Power (Gold, 32GB) (4GB RAM)"            
## [17] "Google Pixel 3 XL 64GB (Just Black, 4GB RAM)"      
## [18] "Mobiistar C1 Lite 8GB (Gold, 1GB RAM)"             
## [19] "Redmi Y2 64GB (Gold, 4GB RAM)"                     
## [20] "Mobiistar E1 Selfie 32GB (Black, 3GB RAM)"         
## [21] "InFocus Snap 4 (Gold, Four Camera Phone)"          
## [22] "Moto E5 16GB (Grey, 2GB RAM)"                      
## [23] "Honor 7C 32GB (Black, 3GB RAM)"                    
## [24] "Panasonic Eluga Ray 500 32GB (Mocha Gold, 3GB RAM)"
## [25] "Motorola One Power 64GB (Black, 4GB RAM)"          
## [26] "Vivo V7 Plus 64GB (Matte Black, 4GB RAM)"          
## [27] "Tecno Camon iClick 64GB (Gold, 4GB RAM)"
gadgetPrice<-contentTOI%>%html_nodes('.shopping_times_wdgt .shopping_gadgets_frame .id_selrprice')%>%html_text()
length(gadgetPrice)
## [1] 27
gadgetPrice
##  [1] "<U+20B9>7899" "<U+20B9>6999" "<U+20B9>9499" "<U+20B9>27999" "<U+20B9>9225"
##  [6] "<U+20B9>38990" "<U+20B9>7750" "<U+20B9>16999" "<U+20B9>7640" "<U+20B9>12999"
## [11] "<U+20B9>8499" "<U+20B9>19990" "<U+20B9>67999" "<U+20B9>24990" "<U+20B9>11750"
## [16] "<U+20B9>9999" "<U+20B9>83000" "<U+20B9>4390" "<U+20B9>12999" "<U+20B9>8299"
## [21] "<U+20B9>7999" "<U+20B9>8599" "<U+20B9>9999" "<U+20B9>7240" "<U+20B9>15999"
## [26] "<U+20B9>18790" "<U+20B9>10999"
processedDataTOI<-data.frame(gadgetName=gadgetName,gadgetPrice=gadgetPrice)
processedDataTOI
##                                            gadgetName   gadgetPrice
## 1                Lenovo K8 Note 32GB (Black, 3GB RAM)  <U+20B9>7899
## 2                      Redmi 6A 16GB (Black, 2GB RAM)  <U+20B9>6999
## 3                  Moto E5 Plus 32GB (Black, 3GB RAM)  <U+20B9>9499
## 4     Vivo V11 Pro (Dazzling Gold, 64 GB)  (6 GB RAM) <U+20B9>27999
## 5                Lenovo K8 Note 64GB (Black, 4GB RAM)  <U+20B9>9225
## 6                    OnePlus 6T 64GB (Black, 6GB RAM) <U+20B9>38990
## 7   Panasonic Eluga A4 32GB (Champagne Gold, 3GB RAM)  <U+20B9>7750
## 8                       Honor 8X 64GB (Blue, 6GB RAM) <U+20B9>16999
## 9                Lenovo K8 Plus 32GB (Black, 3GB RAM)  <U+20B9>7640
## 10                  Redmi 6 Pro 64GB (Black, 4GB RAM) <U+20B9>12999
## 11 Panasonic Eluga Ray 700 32GB (Mocha Gold, 3GB RAM)  <U+20B9>8499
## 12                  Vivo V9 Pro 64GB (Black, 6GB RAM) <U+20B9>19990
## 13       Google Pixel 3 64GB (Clearly White, 4GB RAM) <U+20B9>67999
## 14            Samsung Galaxy A7 64GB (Black, 4GB RAM) <U+20B9>24990
## 15                      Moto G6 32GB (Black, 3GB RAM) <U+20B9>11750
## 16             Lenovo K6 Power (Gold, 32GB) (4GB RAM)  <U+20B9>9999
## 17       Google Pixel 3 XL 64GB (Just Black, 4GB RAM) <U+20B9>83000
## 18              Mobiistar C1 Lite 8GB (Gold, 1GB RAM)  <U+20B9>4390
## 19                      Redmi Y2 64GB (Gold, 4GB RAM) <U+20B9>12999
## 20          Mobiistar E1 Selfie 32GB (Black, 3GB RAM)  <U+20B9>8299
## 21           InFocus Snap 4 (Gold, Four Camera Phone)  <U+20B9>7999
## 22                       Moto E5 16GB (Grey, 2GB RAM)  <U+20B9>8599
## 23                     Honor 7C 32GB (Black, 3GB RAM)  <U+20B9>9999
## 24 Panasonic Eluga Ray 500 32GB (Mocha Gold, 3GB RAM)  <U+20B9>7240
## 25           Motorola One Power 64GB (Black, 4GB RAM) <U+20B9>15999
## 26           Vivo V7 Plus 64GB (Matte Black, 4GB RAM) <U+20B9>18790
## 27            Tecno Camon iClick 64GB (Gold, 4GB RAM) <U+20B9>10999