BETTERZON-58: Fixing API endpoint of the crawler

- The list of products in the API request was treated like a string and henceforth, only the first product has been crawled
2025-07-01 01:09:19 +00:00 · 2021-05-17 17:53:20 +02:00 · 2021-05-17 17:53:20 +02:00 · 63cbac5490
commit 63cbac5490
parent 73effffc89
3 changed files with 9 additions and 5 deletions
--- a/Crawler/api.py
+++ b/Crawler/api.py
@ -10,8 +10,8 @@ api = Api(app)

 # To parse request data
 parser = reqparse.RequestParser()
-parser.add_argument('key')
-parser.add_argument('products')
+parser.add_argument('key', type=str)
+parser.add_argument('products', type=int, action='append')


 class CrawlerApi(Resource):
--- a/Crawler/crawler.py
+++ b/Crawler/crawler.py
@ -73,9 +73,14 @@ def __crawl_amazon__(product_info: dict) -> tuple:
    try:
        price = int(soup.find(id='priceblock_ourprice').get_text().replace(".", "").replace(",", "").replace("€", "").strip())
    except RuntimeError:
-        price = ''
+        price = -1
+    except AttributeError:
+        price = -1

-    return (product_info['product_id'], product_info['vendor_id'], price)
+    if price != -1:
+        return (product_info['product_id'], product_info['vendor_id'], price)
+    else:
+        return None


 def __crawl_apple__(product_info: dict) -> tuple:
--- a/Crawler/sql.py
+++ b/Crawler/sql.py
@ -54,7 +54,6 @@ def getProductLinksForProduct(product_id: int) -> [dict]:
    cur = conn.cursor()

    query = 'SELECT vendor_id, url FROM product_links WHERE product_id = %s'
-    print(query)
    cur.execute(query, (product_id,))

    products = list(map(lambda x: {'product_id': product_id, 'vendor_id': x[0], 'url': x[1]}, cur.fetchall()))