mirror of
https://github.com/Mueller-Patrick/Betterzon.git
synced 2024-12-22 03:35:13 +00:00
BETTERZON-57: Adding utility sql functions
This commit is contained in:
parent
aaf829f090
commit
fafacdd942
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -27,6 +27,7 @@ speed-measure-plugin*.json
|
||||||
!Backend.iml
|
!Backend.iml
|
||||||
!CucumberTests.iml
|
!CucumberTests.iml
|
||||||
!Crawler.iml
|
!Crawler.iml
|
||||||
|
!Crawler-Loadbalancer.iml
|
||||||
|
|
||||||
# Include IntelliJ modules
|
# Include IntelliJ modules
|
||||||
!/.idea/modules.xml
|
!/.idea/modules.xml
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
<module fileurl="file://$PROJECT_DIR$/Backend/Backend.iml" filepath="$PROJECT_DIR$/Backend/Backend.iml" />
|
<module fileurl="file://$PROJECT_DIR$/Backend/Backend.iml" filepath="$PROJECT_DIR$/Backend/Backend.iml" />
|
||||||
<module fileurl="file://$PROJECT_DIR$/Betterzon.iml" filepath="$PROJECT_DIR$/Betterzon.iml" />
|
<module fileurl="file://$PROJECT_DIR$/Betterzon.iml" filepath="$PROJECT_DIR$/Betterzon.iml" />
|
||||||
<module fileurl="file://$PROJECT_DIR$/Crawler/Crawler.iml" filepath="$PROJECT_DIR$/Crawler/Crawler.iml" />
|
<module fileurl="file://$PROJECT_DIR$/Crawler/Crawler.iml" filepath="$PROJECT_DIR$/Crawler/Crawler.iml" />
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/Crawler-Loadbalancer/Crawler-Loadbalancer.iml" filepath="$PROJECT_DIR$/Crawler-Loadbalancer/Crawler-Loadbalancer.iml" />
|
||||||
<module fileurl="file://$PROJECT_DIR$/CucumberTests/CucumberTests.iml" filepath="$PROJECT_DIR$/CucumberTests/CucumberTests.iml" />
|
<module fileurl="file://$PROJECT_DIR$/CucumberTests/CucumberTests.iml" filepath="$PROJECT_DIR$/CucumberTests/CucumberTests.iml" />
|
||||||
<module fileurl="file://$PROJECT_DIR$/Frontend/Frontend.iml" filepath="$PROJECT_DIR$/Frontend/Frontend.iml" />
|
<module fileurl="file://$PROJECT_DIR$/Frontend/Frontend.iml" filepath="$PROJECT_DIR$/Frontend/Frontend.iml" />
|
||||||
</modules>
|
</modules>
|
||||||
|
|
9
Crawler-Loadbalancer/Crawler-Loadbalancer.iml
Normal file
9
Crawler-Loadbalancer/Crawler-Loadbalancer.iml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||||
|
<exclude-output />
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
2
Crawler-Loadbalancer/requirements.txt
Normal file
2
Crawler-Loadbalancer/requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
pymysql
|
||||||
|
logging
|
42
Crawler-Loadbalancer/sql.py
Normal file
42
Crawler-Loadbalancer/sql.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
import pymysql
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
def __getConnection__() -> pymysql.Connection:
|
||||||
|
"""
|
||||||
|
Opens a new pymysql connection and returns it
|
||||||
|
:return: A pymysql Connection object
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger()
|
||||||
|
try:
|
||||||
|
conn = pymysql.connect(
|
||||||
|
user=os.environ['BETTERZON_CRAWLER_USER'],
|
||||||
|
password=os.environ['BETTERZON_CRAWLER_PASSWORD'],
|
||||||
|
host=os.environ['BETTERZON_CRAWLER_HOST'],
|
||||||
|
port=3306,
|
||||||
|
database=os.environ['BETTERZON_CRAWLER_DB']
|
||||||
|
)
|
||||||
|
|
||||||
|
return conn
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logger.error('SQL Connection error: %s', e)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def getShopsToCrawl() -> [int]:
|
||||||
|
"""
|
||||||
|
Queries the list of vendor IDs and returns them
|
||||||
|
:return: The list of IDs
|
||||||
|
"""
|
||||||
|
conn = __getConnection__()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
query = 'SELECT vendor_id FROM vendors'
|
||||||
|
|
||||||
|
cur.execute(query)
|
||||||
|
|
||||||
|
# Extract the IDs from the returned tuples into a list
|
||||||
|
vendor_ids = list(map(lambda x: x[0], cur.fetchall()))
|
||||||
|
|
||||||
|
return vendor_ids
|
|
@ -2,13 +2,13 @@
|
||||||
<module type="WEB_MODULE" version="4">
|
<module type="WEB_MODULE" version="4">
|
||||||
<component name="FacetManager">
|
<component name="FacetManager">
|
||||||
<facet type="Python" name="Python">
|
<facet type="Python" name="Python">
|
||||||
<configuration sdkName="Python 3.9 (venv)" />
|
<configuration sdkName="Python 3.9" />
|
||||||
</facet>
|
</facet>
|
||||||
</component>
|
</component>
|
||||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||||
<exclude-output />
|
<exclude-output />
|
||||||
<content url="file://$MODULE_DIR$" />
|
<content url="file://$MODULE_DIR$" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
<orderEntry type="library" name="Python 3.9 (venv) interpreter library" level="application" />
|
<orderEntry type="library" name="Python 3.9 interpreter library" level="application" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
|
@ -1,4 +1,5 @@
|
||||||
pymysql
|
pymysql
|
||||||
flask
|
flask
|
||||||
flask-sqlalchemy
|
flask-sqlalchemy
|
||||||
flask_restful
|
flask_restful
|
||||||
|
logging
|
||||||
|
|
70
Crawler/sql.py
Normal file
70
Crawler/sql.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def __getConnection__() -> pymysql.Connection:
|
||||||
|
"""
|
||||||
|
Opens a new pymysql connection and returns it
|
||||||
|
:return: A pymysql Connection object
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger()
|
||||||
|
try:
|
||||||
|
conn = pymysql.connect(
|
||||||
|
user=os.environ['BETTERZON_CRAWLER_USER'],
|
||||||
|
password=os.environ['BETTERZON_CRAWLER_PASSWORD'],
|
||||||
|
host=os.environ['BETTERZON_CRAWLER_HOST'],
|
||||||
|
port=3306,
|
||||||
|
database=os.environ['BETTERZON_CRAWLER_DB']
|
||||||
|
)
|
||||||
|
|
||||||
|
return conn
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logger.error('SQL Connection error: %s', e)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def getProductsForShop(vendor_id: int) -> [{}]:
|
||||||
|
"""
|
||||||
|
Queries the product links for all products of the given shop
|
||||||
|
:param vendor_id: The vendor / shop to query products for
|
||||||
|
:return: A list of product objects, each having the following parameters:
|
||||||
|
product_id, vendor_id, url
|
||||||
|
"""
|
||||||
|
conn = __getConnection__()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
query = 'SELECT product_id, url FROM product_links WHERE vendor_id = %s'
|
||||||
|
|
||||||
|
cur.execute(query, (vendor_id,))
|
||||||
|
|
||||||
|
products = list(map(lambda x: {'product_id': x[0], 'vendor_id': vendor_id, 'url': x[1]}, cur.fetchall()))
|
||||||
|
|
||||||
|
return products
|
||||||
|
|
||||||
|
|
||||||
|
def insertShopData(data_to_insert: [tuple]) -> bool:
|
||||||
|
"""
|
||||||
|
Inserts the given list of tuples into the DB
|
||||||
|
:param dataToInsert: A list of tuples, where each tuple has to contain product id, vendor id and the price
|
||||||
|
in exactly this order
|
||||||
|
:return: If the insert was successful
|
||||||
|
"""
|
||||||
|
conn = __getConnection__()
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
query = 'INSERT INTO prices (product_id, vendor_id, price_in_cents, timestamp) VALUES (%s, %s, %s, NOW())'
|
||||||
|
|
||||||
|
affectedRows = cur.executemany(query, data_to_insert)
|
||||||
|
|
||||||
|
if affectedRows != len(data_to_insert):
|
||||||
|
# Something went wrong, revert the changes
|
||||||
|
conn.rollback()
|
||||||
|
else:
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return affectedRows == len(data_to_insert)
|
Loading…
Reference in New Issue
Block a user