Merge pull request #27 from Mueller-Patrick/BETTERZON-57

BETTERZON-57: Adding utility sql functions
This commit is contained in:
henningxtro 2021-04-13 21:15:51 +02:00 committed by GitHub
commit 04d12955cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 129 additions and 3 deletions

1
.gitignore vendored
View File

@ -27,6 +27,7 @@ speed-measure-plugin*.json
!Backend.iml !Backend.iml
!CucumberTests.iml !CucumberTests.iml
!Crawler.iml !Crawler.iml
!Crawler-Loadbalancer.iml
# Include IntelliJ modules # Include IntelliJ modules
!/.idea/modules.xml !/.idea/modules.xml

View File

@ -5,6 +5,7 @@
<module fileurl="file://$PROJECT_DIR$/Backend/Backend.iml" filepath="$PROJECT_DIR$/Backend/Backend.iml" /> <module fileurl="file://$PROJECT_DIR$/Backend/Backend.iml" filepath="$PROJECT_DIR$/Backend/Backend.iml" />
<module fileurl="file://$PROJECT_DIR$/Betterzon.iml" filepath="$PROJECT_DIR$/Betterzon.iml" /> <module fileurl="file://$PROJECT_DIR$/Betterzon.iml" filepath="$PROJECT_DIR$/Betterzon.iml" />
<module fileurl="file://$PROJECT_DIR$/Crawler/Crawler.iml" filepath="$PROJECT_DIR$/Crawler/Crawler.iml" /> <module fileurl="file://$PROJECT_DIR$/Crawler/Crawler.iml" filepath="$PROJECT_DIR$/Crawler/Crawler.iml" />
<module fileurl="file://$PROJECT_DIR$/Crawler-Loadbalancer/Crawler-Loadbalancer.iml" filepath="$PROJECT_DIR$/Crawler-Loadbalancer/Crawler-Loadbalancer.iml" />
<module fileurl="file://$PROJECT_DIR$/CucumberTests/CucumberTests.iml" filepath="$PROJECT_DIR$/CucumberTests/CucumberTests.iml" /> <module fileurl="file://$PROJECT_DIR$/CucumberTests/CucumberTests.iml" filepath="$PROJECT_DIR$/CucumberTests/CucumberTests.iml" />
<module fileurl="file://$PROJECT_DIR$/Frontend/Frontend.iml" filepath="$PROJECT_DIR$/Frontend/Frontend.iml" /> <module fileurl="file://$PROJECT_DIR$/Frontend/Frontend.iml" filepath="$PROJECT_DIR$/Frontend/Frontend.iml" />
</modules> </modules>

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -0,0 +1,2 @@
pymysql
logging

View File

@ -0,0 +1,42 @@
import pymysql
import os
import logging
def __getConnection__() -> pymysql.Connection:
"""
Opens a new pymysql connection and returns it
:return: A pymysql Connection object
"""
logger = logging.getLogger()
try:
conn = pymysql.connect(
user=os.environ['BETTERZON_CRAWLER_USER'],
password=os.environ['BETTERZON_CRAWLER_PASSWORD'],
host=os.environ['BETTERZON_CRAWLER_HOST'],
port=3306,
database=os.environ['BETTERZON_CRAWLER_DB']
)
return conn
except pymysql.Error as e:
logger.error('SQL Connection error: %s', e)
return
def getShopsToCrawl() -> [int]:
"""
Queries the list of vendor IDs and returns them
:return: The list of IDs
"""
conn = __getConnection__()
cur = conn.cursor()
query = 'SELECT vendor_id FROM vendors'
cur.execute(query)
# Extract the IDs from the returned tuples into a list
vendor_ids = list(map(lambda x: x[0], cur.fetchall()))
return vendor_ids

View File

@ -2,13 +2,13 @@
<module type="WEB_MODULE" version="4"> <module type="WEB_MODULE" version="4">
<component name="FacetManager"> <component name="FacetManager">
<facet type="Python" name="Python"> <facet type="Python" name="Python">
<configuration sdkName="Python 3.9 (venv)" /> <configuration sdkName="Python 3.9" />
</facet> </facet>
</component> </component>
<component name="NewModuleRootManager" inherit-compiler-output="true"> <component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output /> <exclude-output />
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Python 3.9 (venv) interpreter library" level="application" /> <orderEntry type="library" name="Python 3.9 interpreter library" level="application" />
</component> </component>
</module> </module>

View File

@ -2,3 +2,4 @@ pymysql
flask flask
flask-sqlalchemy flask-sqlalchemy
flask_restful flask_restful
logging

70
Crawler/sql.py Normal file
View File

@ -0,0 +1,70 @@
import logging
import pymysql
import os
def __getConnection__() -> pymysql.Connection:
"""
Opens a new pymysql connection and returns it
:return: A pymysql Connection object
"""
logger = logging.getLogger()
try:
conn = pymysql.connect(
user=os.environ['BETTERZON_CRAWLER_USER'],
password=os.environ['BETTERZON_CRAWLER_PASSWORD'],
host=os.environ['BETTERZON_CRAWLER_HOST'],
port=3306,
database=os.environ['BETTERZON_CRAWLER_DB']
)
return conn
except pymysql.Error as e:
logger.error('SQL Connection error: %s', e)
return
def getProductsForShop(vendor_id: int) -> [{}]:
"""
Queries the product links for all products of the given shop
:param vendor_id: The vendor / shop to query products for
:return: A list of product objects, each having the following parameters:
product_id, vendor_id, url
"""
conn = __getConnection__()
cur = conn.cursor()
query = 'SELECT product_id, url FROM product_links WHERE vendor_id = %s'
cur.execute(query, (vendor_id,))
products = list(map(lambda x: {'product_id': x[0], 'vendor_id': vendor_id, 'url': x[1]}, cur.fetchall()))
return products
def insertShopData(data_to_insert: [tuple]) -> bool:
"""
Inserts the given list of tuples into the DB
:param dataToInsert: A list of tuples, where each tuple has to contain product id, vendor id and the price
in exactly this order
:return: If the insert was successful
"""
conn = __getConnection__()
cur = conn.cursor()
query = 'INSERT INTO prices (product_id, vendor_id, price_in_cents, timestamp) VALUES (%s, %s, %s, NOW())'
affectedRows = cur.executemany(query, data_to_insert)
if affectedRows != len(data_to_insert):
# Something went wrong, revert the changes
conn.rollback()
else:
conn.commit()
cur.close()
conn.close()
return affectedRows == len(data_to_insert)