From 63cbac5490ada4c97808bac84a6c28ef7aab5572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20M=C3=BCller?= Date: Mon, 17 May 2021 17:53:20 +0200 Subject: [PATCH] BETTERZON-58: Fixing API endpoint of the crawler - The list of products in the API request was treated like a string and henceforth, only the first product has been crawled --- Crawler/api.py | 4 ++-- Crawler/crawler.py | 9 +++++++-- Crawler/sql.py | 1 - 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Crawler/api.py b/Crawler/api.py index d4d7e6d..7b7e0c2 100644 --- a/Crawler/api.py +++ b/Crawler/api.py @@ -10,8 +10,8 @@ api = Api(app) # To parse request data parser = reqparse.RequestParser() -parser.add_argument('key') -parser.add_argument('products') +parser.add_argument('key', type=str) +parser.add_argument('products', type=int, action='append') class CrawlerApi(Resource): diff --git a/Crawler/crawler.py b/Crawler/crawler.py index 0b20671..79db9a3 100644 --- a/Crawler/crawler.py +++ b/Crawler/crawler.py @@ -73,9 +73,14 @@ def __crawl_amazon__(product_info: dict) -> tuple: try: price = int(soup.find(id='priceblock_ourprice').get_text().replace(".", "").replace(",", "").replace("€", "").strip()) except RuntimeError: - price = '' + price = -1 + except AttributeError: + price = -1 - return (product_info['product_id'], product_info['vendor_id'], price) + if price != -1: + return (product_info['product_id'], product_info['vendor_id'], price) + else: + return None def __crawl_apple__(product_info: dict) -> tuple: diff --git a/Crawler/sql.py b/Crawler/sql.py index 0c5ce82..c1b2669 100644 --- a/Crawler/sql.py +++ b/Crawler/sql.py @@ -54,7 +54,6 @@ def getProductLinksForProduct(product_id: int) -> [dict]: cur = conn.cursor() query = 'SELECT vendor_id, url FROM product_links WHERE product_id = %s' - print(query) cur.execute(query, (product_id,)) products = list(map(lambda x: {'product_id': product_id, 'vendor_id': x[0], 'url': x[1]}, cur.fetchall()))