rm(list=ls())


Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre1.8.0_66') # for 64-bit
install.packages("rJava",repos="http://cran.rstudio.com/")
## Installing package into 'C:/Users/sundeep/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'rJava' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Public\Documents\Wondershare\CreatorTemp\Rtmpgdz4v1\downloaded_packages
library(rvest)
## Loading required package: xml2
library(NLP)
install.packages("openNLP",repos="http://cran.rstudio.com/")
## Installing package into 'C:/Users/sundeep/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'openNLP' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Public\Documents\Wondershare\CreatorTemp\Rtmpgdz4v1\downloaded_packages
install.packages("rworldxtra",repos="http://cran.rstudio.com/")
## Installing package into 'C:/Users/sundeep/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'rworldxtra' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Public\Documents\Wondershare\CreatorTemp\Rtmpgdz4v1\downloaded_packages
library(rworldxtra)
## Loading required package: sp
library(openNLP)
install.packages("ggmap",repos="http://cran.rstudio.com/")
## Installing package into 'C:/Users/sundeep/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'ggmap' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Public\Documents\Wondershare\CreatorTemp\Rtmpgdz4v1\downloaded_packages
library(ggmap)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
install.packages("rworldmap",repos="http://cran.rstudio.com/")
## Installing package into 'C:/Users/sundeep/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'rworldmap' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Public\Documents\Wondershare\CreatorTemp\Rtmpgdz4v1\downloaded_packages
library(rworldmap)
## ### Welcome to rworldmap ###
## For a short introduction type :   vignette('rworldmap')

Read Wiki Page

page = read_html('https://en.wikipedia.org/wiki/Apple_Inc.')

text = html_text(html_nodes(page,'p'))
text = text[text != ""]
text = gsub("\\[[0-9]]|\\[[0-9][0-9]]|\\[[0-9][0-9][0-9]]","",text) # removing refrences [101] type

Make one complete document

text = paste(text,collapse = " ") 

text = as.String(text)
t1 = Sys.time()
sent_annot = Maxent_Sent_Token_Annotator()
word_annot = Maxent_Word_Token_Annotator()
loc_annot = Maxent_Entity_Annotator(kind = "location")
Person_annot = Maxent_Entity_Annotator(kind = "person")
Person_annot
## An annotator inheriting from classes
##   Simple_Entity_Annotator Annotator
## with description
##   Computes entity annotations using the Apache OpenNLP Maxent name
##   finder employing the default model for language 'en' and kind
##   'person'.
annot.l1 = NLP::annotate(text, list(sent_annot,word_annot,loc_annot,Person_annot))

Retrieve locations and persons

k <- sapply(annot.l1$features, `[[`, "kind")
## retrieve the locations on the page
apple_locations = text[annot.l1[k == "location"]]
## Retrieve persons names on the page
Persons= text[annot.l1[k == "person"]]

List the persons

Persons
##  [1] "Mac"                    "Mac App Store"         
##  [3] "Apple Music"            "Steve Jobs"            
##  [5] "Steve Wozniak"          "Ronald Wayne"          
##  [7] "Apple Inc."             "115,000"               
##  [9] "Steve Jobs"             "Steve Wozniak"         
## [11] "Ronald Wayne"           "RAM"                   
## [13] "Wayne"                  "Mike Markkula"         
## [15] "Ridley Scott"           "John Sculley"          
## [17] "John Sculley"           "Michael Spindler"      
## [19] "Gil Amelio"             "Mac OS"                
## [21] "Gershwin"               "Steve Jobs"            
## [23] "Jonathan Ive"           "Mac OS X"              
## [25] "Mac OS"                 "Mac OS"                
## [27] "Intel-based Mac"        "Mac"                   
## [29] "Mac Pro"                "Mac OS X"              
## [31] "Michael Dell"           "EMI"                   
## [33] "Mac OS X Lion"          "Mac OS X."             
## [35] "Apple"                  "Tim Cook"              
## [37] "Cook"                   "Andrea Jung"           
## [39] "Arthur"                 "Steve Jobs"            
## [41] "Phil Schiller"          "Mac OS"                
## [43] "Siri"                   "Mac Mini"              
## [45] "Tim Cook"               "Cook"                  
## [47] "Paul Deneve"            "Yves Saint Laurent"    
## [49] "Tim Cook"               "Angela Ahrendts"       
## [51] "Randall Stephenson"     "Edward Snowden NSA"    
## [53] "Cook"                   "Jimmy Iovine"          
## [55] "Anand Lal Shimpi"       "Paul Hunter"           
## [57] "James Vincent"          "Magic Mouse"           
## [59] "Magic Trackpad"         "Magic Keyboard"        
## [61] "Steve Jobs"             "Mac OS X"              
## [63] "Mac OS"                 "Jonathan Ive"          
## [65] "Logic Pro"              "Johnson Controls"      
## [67] "Steve Jobs"             "Ron Wayne"             
## [69] "Isaac Newton"           "Rob Janoff"            
## [71] "Alan Turing"            "Steve Jobs"            
## [73] "Steve Wozniak"          "Newton"                
## [75] "Guy Kawasaki"           "John Sculley"          
## [77] "Jonathan Ive"           "SixtyEight Research"   
## [79] "Norman Foster."         "Bill Atkinson"         
## [81] "Capps"                  "Rod Holt,Alan Kay"     
## [83] "Guy Kawasaki,Al Alcorn" "Don Norman"            
## [85] "Page"                   "Steve Wozniak"         
## [87] "Ron Johnson<U+0097>Senior"     "Cook"                  
## [89] "Scott Forstall"         "Thomas Ricker"         
## [91] "Tim Kobe"               "Steven Dowling"        
## [93] "Silver"                 "Lisa P. Jackson"       
## [95] "Tim Cook"               "Greenpeace"            
## [97] "115,000"                "George Osborne"        
## [99] "Christian Kern"

List the locations

apple_locations
##   [1] "California"               "United States"           
##   [3] "Ireland"                  "Macintosh"               
##   [5] "Microsoft"                "Key"                     
##   [7] "Virginia"                 "California"              
##   [9] "Tokyo"                    "Paris"                   
##  [11] "France"                   "Cisco"                   
##  [13] "Silicon Valley"           "New York City"           
##  [15] "India"                    "India"                   
##  [17] "India"                    "Silicon Valley-based"    
##  [19] "Tel Aviv."                "Turkey"                  
##  [21] "Ankara"                   "Turkey"                  
##  [23] "Istanbul"                 "Tokyo"                   
##  [25] "Japan"                    "Tokyo"                   
##  [27] "India"                    "San Jose"                
##  [29] "Magic Mouse"              "Magic Trackpad"          
##  [31] "Magic Keyboard"           "3G"                      
##  [33] "Japan"                    "Sydney"                  
##  [35] "Herald"                   "California"              
##  [37] "Nevada"                   "North Carolina"          
##  [39] "North Carolina"           "Newton"                  
##  [41] "London"                   "New York City"           
##  [43] "Fifth Avenue"             "Tokyo"                   
##  [45] "United States"            "Silicon Valley"          
##  [47] "California"               "(79,000"                 
##  [49] "Sunnyvale"                "California"              
##  [51] "Europe"                   "Middle East"             
##  [53] "Africa"                   "Cork"                    
##  [55] "Ireland"                  "United States"           
##  [57] "Apple"                    "United Kingdom"          
##  [59] "Stockley Park"            "London"                  
##  [61] "Herzliya"                 "Israel"                  
##  [63] "Israel"                   "Haifa"                   
##  [65] "Manhattan"                "Fifth Avenue"            
##  [67] "Regent Street"            "London"                  
##  [69] "Europe"                   "London"                  
##  [71] "Covent Garden"            "New York City"           
##  [73] "United States"            "Fifth Avenue"            
##  [75] "New York City"            "Paris Sydney Hong Kong"  
##  [77] "Apple"                    "Virginia"                
##  [79] "California"               "United States"           
##  [81] "America"                  "China"                   
##  [83] "China"                    "China"                   
##  [85] "China"                    "China"                   
##  [87] "Jackson"                  "Maine"                   
##  [89] "North Carolina"           "China"                   
##  [91] "China"                    "China"                   
##  [93] "Singapore"                "California"              
##  [95] "China"                    "China"                   
##  [97] "China"                    "United States"           
##  [99] "Ireland"                  "the Netherlands"         
## [101] "Virgin Islands"           "Irish"                   
## [103] "the Netherlands"          "Caribbean"               
## [105] "United States of America" "United States"           
## [107] "United States"            "United States"           
## [109] "Ireland"                  "Irish"                   
## [111] "Austria"                  "Austria"                 
## [113] "Earth"                    "San Francisco"

## We could do much with this info, e.g., improve lists by editing them with external domain knowledge, etc.

E.g., geocode the locations and create a map of the world of each article.

all_places = unique(apple_locations)

## Get the geo codes

all_places_geocoded <- geocode(all_places) #[1:10]
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=California&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=United%20States&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Ireland&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Macintosh&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Microsoft&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Key&sensor=false
## Warning: geocode failed with status ZERO_RESULTS, location = "Key"
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Virginia&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Tokyo&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Paris&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=France&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Cisco&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Silicon%20Valley&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=New%20York%20City&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=India&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Silicon%20Valley-based&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Tel%20Aviv.&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Turkey&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Ankara&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Istanbul&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Japan&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=San%20Jose&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Magic%20Mouse&sensor=false
## Warning: geocode failed with status ZERO_RESULTS, location = "Magic Mouse"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Magic%20Trackpad&sensor=false
## Warning: geocode failed with status ZERO_RESULTS, location = "Magic
## Trackpad"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Magic%20Keyboard&sensor=false
## Warning: geocode failed with status ZERO_RESULTS, location = "Magic
## Keyboard"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=3G&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Sydney&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Herald&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Nevada&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=North%20Carolina&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Newton&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=London&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Fifth%20Avenue&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=(79,000&sensor=false
## Warning: geocode failed with status ZERO_RESULTS, location = "(79,000"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Sunnyvale&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Europe&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Middle%20East&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Africa&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Cork&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Apple&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=United%20Kingdom&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Stockley%20Park&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Herzliya&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Israel&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Haifa&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Manhattan&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Regent%20Street&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Covent%20Garden&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Paris%20Sydney%20Hong%20Kong&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=America&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=China&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Jackson&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Maine&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Singapore&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=the%20Netherlands&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Virgin%20Islands&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Irish&sensor=false
## Warning: geocode failed with status ZERO_RESULTS, location = "Irish"
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Caribbean&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=United%20States%20of%20America&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Austria&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Earth&sensor=false
## .
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=San%20Francisco&sensor=false

## List the geo codes

all_places_geocoded
##             lon        lat
## 1  -119.4179324  36.778261
## 2   -95.7128910  37.090240
## 3    -8.2438900  53.412910
## 4  -118.4505268  35.135952
## 5  -122.1304930  47.640282
## 6            NA         NA
## 7   -78.6568942  37.431573
## 8   139.6917064  35.689487
## 9     2.3522219  48.856614
## 10    2.2137490  46.227638
## 11  -98.9792336  32.388186
## 12 -122.0575434  37.387474
## 13  -74.0059413  40.712784
## 14   78.9628800  20.593684
## 15 -122.0575434  37.387474
## 16   34.7817676  32.085300
## 17   35.2433220  38.963745
## 18   32.8597419  39.933363
## 19   28.9783589  41.008238
## 20  138.2529240  36.204824
## 21 -121.8863286  37.338208
## 22           NA         NA
## 23           NA         NA
## 24           NA         NA
## 25  -71.5303386  43.715973
## 26  151.2092955 -33.868820
## 27 -121.2455569  38.294758
## 28 -116.4193890  38.802610
## 29  -79.0192997  35.759573
## 30  -71.2092214  42.337041
## 31   -0.1277583  51.507351
## 32  -73.9969848  40.731412
## 33           NA         NA
## 34 -122.0363496  37.368830
## 35   15.2551187  54.525961
## 36   42.5509603  29.298528
## 37   34.5085230  -8.783195
## 38   -8.4863157  51.896892
## 39 -114.0757739  35.912754
## 40   -3.4359730  55.378051
## 41   -1.7018229  52.829064
## 42   34.8446750  32.162413
## 43   34.8516120  31.046051
## 44   34.9895710  32.794046
## 45  -96.5716694  39.183608
## 46  -89.4362855  43.068016
## 47   -0.1232697  51.511732
## 48  114.1094970  22.396428
## 49  -95.7128910  37.090240
## 50  104.1953970  35.861660
## 51  -90.1848103  32.298757
## 52  -69.4454689  45.253783
## 53  103.8198360   1.352083
## 54    5.2912660  52.132633
## 55  -64.8963350  18.335765
## 56           NA         NA
## 57  -78.6568942  21.469114
## 58  -95.7128910  37.090240
## 59   14.5500720  47.516231
## 60 -102.4107493  34.233137
## 61 -122.4194155  37.774929

Load the map and highlight the loctions

## Recemmendations From the above data, Wiki page talks more about the founders of the Apple company and its products. Steve Jobs and Steve Wozniak played a key role in the apple’s establishment.Page talks about the products like magic mouse, siri and Mas OS.It provides information about the strength of the employees.

Looking at the locations data, Seems like apples concentration is more in North America region, Europe and to some extent in Asian Countries.

Apple can try to increase their market in African and South American regions.

Increasing their foot print in Asia, South America and South Africa would increase their revenue more.

Further, Apple can look at Asian countries to manufacturing and assembling their products. This would decrease the actual involved in the making the products.