Betterzon/Crawler/sql.py
henningxtro 26ba21156a
BETTERZON-58 (#53)
* BETTERZON-58: Basic Functionality with scrapy

* Added independent crawler function, yielding price

* moved logic to amazon.py

* .

* moved scrapy files to unused folder

* Added basic amazon crawler using beautifulsoup4

* Connected Api to Crawler

* Fixed string concatenation for sql statement in getProductLinksForProduct

* BETTERZON-58: Fixing SQL insert

* BETTERZON-58: Adding access key verification

* BETTERZON-58: Fixing API endpoint of the crawler
- The list of products in the API request was treated like a string and henceforth, only the first product has been crawled

* Added another selector for price on amazon (does not work for books)

Co-authored-by: root <root@DESKTOP-ARBPL82.localdomain>
Co-authored-by: Patrick Müller <patrick@mueller-patrick.tech>
Co-authored-by: Patrick <50352812+Mueller-Patrick@users.noreply.github.com>
2021-05-19 00:46:14 +02:00

89 lines
2.6 KiB
Python

import logging
import pymysql
import os
def __getConnection__() -> pymysql.Connection:
"""
Opens a new pymysql connection and returns it
:return: A pymysql Connection object
"""
logger = logging.getLogger()
try:
conn = pymysql.connect(
user=os.environ['BETTERZON_CRAWLER_USER'],
password=os.environ['BETTERZON_CRAWLER_PASSWORD'],
host=os.environ['BETTERZON_CRAWLER_HOST'],
port=3306,
database=os.environ['BETTERZON_CRAWLER_DB']
)
return conn
except pymysql.Error as e:
logger.error('SQL Connection error: %s', e)
return
def getProductsForVendor(vendor_id: int) -> [{}]:
"""
Queries the product links for all products of the given shop
:param vendor_id: The vendor / shop to query products for
:return: A list of product objects, each having the following parameters:
product_id, vendor_id, url
"""
conn = __getConnection__()
cur = conn.cursor()
query = 'SELECT product_id, url FROM product_links WHERE vendor_id = %s'
cur.execute(query, (vendor_id,))
products = list(map(lambda x: {'product_id': x[0], 'vendor_id': vendor_id, 'url': x[1]}, cur.fetchall()))
return products
def getProductLinksForProduct(product_id: int) -> [dict]:
"""
Queries all the product links for the given product
:param product_id: The product to query data for
:return: A list of product objects, each having the following parameters:
product_id, vendor_id, url
"""
conn = __getConnection__()
cur = conn.cursor()
query = 'SELECT vendor_id, url FROM product_links WHERE product_id = %s'
cur.execute(query, (product_id,))
products = list(map(lambda x: {'product_id': product_id, 'vendor_id': x[0], 'url': x[1]}, cur.fetchall()))
return products
def insertData(data_to_insert: [tuple]) -> bool:
"""
Inserts the given list of tuples into the DB
:param dataToInsert: A list of tuples, where each tuple has to contain product id, vendor id and the price
in exactly this order
:return: If the insert was successful
"""
conn = __getConnection__()
cur = conn.cursor()
query = 'INSERT INTO prices (product_id, vendor_id, price_in_cents, timestamp) VALUES (%s, %s, %s, NOW())'
affectedRows = cur.executemany(query, data_to_insert)
if affectedRows != len(data_to_insert):
# Something went wrong, revert the changes
conn.rollback()
else:
conn.commit()
cur.close()
conn.close()
return affectedRows == len(data_to_insert)