nlp-web-scraper / utils.py
kuldeep0204's picture
Create utils.py
7645142 verified
raw
history blame contribute delete
466 Bytes
import time, urllib.parse, logging, random
from dateutil import parser as dateparser
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
def canonicalize_url(url, base=None):
if base:
return urllib.parse.urljoin(base, url)
return url
def parse_date(s):
try:
return dateparser.parse(s)
except Exception:
return None
def backoff_sleep(attempt):
time.sleep(min(2**attempt + random.random(), 10))