diff --git a/Crawler/crawler.py b/Crawler/crawler.py index 025bcd7..0b20671 100644 --- a/Crawler/crawler.py +++ b/Crawler/crawler.py @@ -34,13 +34,19 @@ def crawl(product_ids: [int]) -> dict: # Call the appropriate vendor crawling function and append the result to the list of crawled data if product_vendor_info['vendor_id'] == 1: # Amazon - crawled_data.append(__crawl_amazon__(product_vendor_info)) + data = __crawl_amazon__(product_vendor_info) + if data: + crawled_data.append(data) elif product_vendor_info['vendor_id'] == 2: # Apple - crawled_data.append(__crawl_apple__(product_vendor_info)) + data = __crawl_apple__(product_vendor_info) + if data: + crawled_data.append(data) elif product_vendor_info['vendor_id'] == 3: # Media Markt - crawled_data.append(__crawl_mediamarkt__(product_vendor_info)) + data = __crawl_mediamarkt__(product_vendor_info) + if data: + crawled_data.append(data) else: products_with_problems.append(product_vendor_info) continue @@ -78,7 +84,8 @@ def __crawl_apple__(product_info: dict) -> tuple: :param product_info: A dict with product info containing product_id, vendor_id, url :return: A tuple with the crawled data, containing (product_id, vendor_id, price_in_cents) """ - return (product_info['product_id'], product_info['vendor_id'], 123) + #return (product_info['product_id'], product_info['vendor_id'], 123) + pass def __crawl_mediamarkt__(product_info: dict) -> tuple: diff --git a/Crawler/sql.py b/Crawler/sql.py index ed81741..0c5ce82 100644 --- a/Crawler/sql.py +++ b/Crawler/sql.py @@ -35,7 +35,7 @@ def getProductsForVendor(vendor_id: int) -> [{}]: conn = __getConnection__() cur = conn.cursor() - query = 'SELECT product_id, url FROM product_links WHERE vendor_id = %s' % vendor_id + query = 'SELECT product_id, url FROM product_links WHERE vendor_id = %s' cur.execute(query, (vendor_id,)) @@ -53,7 +53,7 @@ def getProductLinksForProduct(product_id: int) -> [dict]: conn = __getConnection__() cur = conn.cursor() - query = 'SELECT vendor_id, url FROM product_links WHERE product_id = %s' % product_id + query = 'SELECT vendor_id, url FROM product_links WHERE product_id = %s' print(query) cur.execute(query, (product_id,))