The “worldfloras.csv” file contains information about all the countries in the world. This file is in ‘comma separated’ format.
with open('worldfloras.csv', 'r') as f:
world = f.read()
We will use the Python ‘re’ library for regular expressions.
import re
pattern = re.compile(r'[^ ,][DE]\w+\s[A-Za-z]+|[^ ,][DE]\w+')
matches = pattern.findall(world)
for match in matches:
print(match)
##
## Denmark
##
## Dominican Republic
##
## Ecuador
##
## Egypt
##
## El Salvador
##
## Ethiopia
pattern = re.compile(r'New\s\w+')
matches = pattern.findall(world)
for match in matches:
print(match)
## New Caledonia
## New Zealand
## New Guinea
pattern = re.compile(r'[A-Z]y\w+')
matches = pattern.findall(world)
for match in matches:
print(match)
## Cyprus
## Syria
pattern = re.compile(r'[A-Z][a-z]{4}y\w*')
matches = pattern.findall(world)
for match in matches:
print(match)
## Norway
## Sicily
## Turkey
pattern = re.compile(r'[A-Z][a-z]{2}c\w*')
matches = pattern.findall(world)
for match in matches:
print(match)
## Czechoslovakia
## Liechtenstein
## Seychelles