diff --git a/.gitignore b/.gitignore
index ff35e3c..d099c29 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,7 @@ speed-measure-plugin*.json
!Backend.iml
!CucumberTests.iml
!Crawler.iml
+!Crawler-Loadbalancer.iml
# Include IntelliJ modules
!/.idea/modules.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
index 44a6847..53cf20e 100644
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -5,6 +5,7 @@
+
diff --git a/Backend/src/models/products/products.router.ts b/Backend/src/models/products/products.router.ts
index f2f3353..03649de 100644
--- a/Backend/src/models/products/products.router.ts
+++ b/Backend/src/models/products/products.router.ts
@@ -69,6 +69,25 @@ productsRouter.get('/search/:term', async (req: Request, res: Response) => {
}
});
+// GET products/list/[1,2,3]
+
+productsRouter.get('/list/:ids', async (req: Request, res: Response) => {
+ const ids: [number] = JSON.parse(req.params.ids);
+
+ if (!ids) {
+ res.status(400).send('Missing parameters.');
+ return;
+ }
+
+ try {
+ const products: Products = await ProductService.findList(ids);
+
+ res.status(200).send(products);
+ } catch (e) {
+ res.status(404).send(e.message);
+ }
+});
+
// GET products/bestDeals
diff --git a/Backend/src/models/products/products.service.ts b/Backend/src/models/products/products.service.ts
index 8cca5b0..2c612e2 100644
--- a/Backend/src/models/products/products.service.ts
+++ b/Backend/src/models/products/products.service.ts
@@ -122,6 +122,29 @@ export const findBySearchTerm = async (term: string): Promise => {
return prodRows;
};
+export const findList = async (ids: [number]): Promise => {
+ let conn;
+ let prodRows = [];
+ try {
+ conn = await pool.getConnection();
+ const rows = await conn.query('SELECT product_id, name, asin, is_active, short_description, long_description, image_guid, date_added, last_modified, manufacturer_id, selling_rank, category_id FROM products WHERE product_id IN (?)', [ids]);
+ for (let row in rows) {
+ if (row !== 'meta') {
+ prodRows.push(rows[row]);
+ }
+ }
+
+ } catch (err) {
+ throw err;
+ } finally {
+ if (conn) {
+ conn.end();
+ }
+ }
+
+ return prodRows;
+};
+
// export const create = async (newItem: Product): Promise => {
// let conn;
// try {
diff --git a/Crawler-Loadbalancer/Crawler-Loadbalancer.iml b/Crawler-Loadbalancer/Crawler-Loadbalancer.iml
new file mode 100644
index 0000000..ad3c0a3
--- /dev/null
+++ b/Crawler-Loadbalancer/Crawler-Loadbalancer.iml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Crawler-Loadbalancer/dailycrawl.py b/Crawler-Loadbalancer/dailycrawl.py
new file mode 100644
index 0000000..a3da39c
--- /dev/null
+++ b/Crawler-Loadbalancer/dailycrawl.py
@@ -0,0 +1,59 @@
+import json
+import requests
+import os
+
+import sql
+
+
+def call_crawlers() -> bool:
+ """
+ Fetches the list of all shops, does some load balancing magic and calls all registered crawler
+ instances to start them
+ :return: If the calls have been successful
+ """
+ product_ids = sql.getProductsToCrawl()
+
+ # crawler_urls = ['crawl.p4ddy.com', 'crawl.betterzon.xyz']
+ crawler_urls = ['http://localhost:22026']
+
+ balanced_lists = []
+
+ products_per_crawler = len(product_ids) // len(crawler_urls)
+ rest = len(product_ids) % len(crawler_urls)
+
+ # Distrubute available products over available crawler instances
+ for crawler_id in range(len(crawler_urls)):
+ amount_of_prods = products_per_crawler
+
+ # If we e.g. have 7 products but 2 crawlers, the first needs to crawl 4 products and the 2nd 3
+ if crawler_id < rest:
+ amount_of_prods += 1
+
+ # Assign the required amount of product ids to the current crawler and remove them from the
+ # list of all product ids
+ balanced_lists.append(product_ids[:amount_of_prods])
+ product_ids = product_ids[amount_of_prods:]
+
+ # Make the callouts to the instances
+ successful = 0
+ for crawler_id in range(len(crawler_urls)):
+ prods = balanced_lists[crawler_id]
+ url = crawler_urls[crawler_id]
+
+ # Send request
+ data = {
+ 'key': os.environ['CRAWLER_ACCESS_KEY'],
+ 'products': prods
+ }
+ headers = {'content-type': 'application/json', 'accept': 'application/json'}
+
+ resp = requests.post(url=url, data=json.dumps(data), headers=headers)
+
+ if resp.status_code == 200:
+ successful += 1
+
+ return successful == len(crawler_urls)
+
+
+if __name__ == '__main__':
+ call_crawlers()
diff --git a/Crawler-Loadbalancer/requirements.txt b/Crawler-Loadbalancer/requirements.txt
new file mode 100644
index 0000000..d4a7eda
--- /dev/null
+++ b/Crawler-Loadbalancer/requirements.txt
@@ -0,0 +1 @@
+pymysql
diff --git a/Crawler-Loadbalancer/sql.py b/Crawler-Loadbalancer/sql.py
new file mode 100644
index 0000000..13fa354
--- /dev/null
+++ b/Crawler-Loadbalancer/sql.py
@@ -0,0 +1,59 @@
+import pymysql
+import os
+import logging
+
+
+def __getConnection__() -> pymysql.Connection:
+ """
+ Opens a new pymysql connection and returns it
+ :return: A pymysql Connection object
+ """
+ logger = logging.getLogger()
+ try:
+ conn = pymysql.connect(
+ user=os.environ['BETTERZON_CRAWLER_USER'],
+ password=os.environ['BETTERZON_CRAWLER_PASSWORD'],
+ host=os.environ['BETTERZON_CRAWLER_HOST'],
+ port=3306,
+ database=os.environ['BETTERZON_CRAWLER_DB']
+ )
+
+ return conn
+ except pymysql.Error as e:
+ logger.error('SQL Connection error: %s', e)
+ return
+
+
+def getShopsToCrawl() -> [int]:
+ """
+ Queries the list of vendor IDs and returns them
+ :return: The list of IDs
+ """
+ conn = __getConnection__()
+ cur = conn.cursor()
+
+ query = 'SELECT vendor_id FROM vendors'
+
+ cur.execute(query)
+
+ # Extract the IDs from the returned tuples into a list
+ vendor_ids = list(map(lambda x: x[0], cur.fetchall()))
+
+ return vendor_ids
+
+def getProductsToCrawl() -> [int]:
+ """
+ Queries the list of product IDs and returns them
+ :return: The list of IDs
+ """
+ conn = __getConnection__()
+ cur = conn.cursor()
+
+ query = 'SELECT product_id FROM products'
+
+ cur.execute(query)
+
+ # Extract the IDs from the returned tuples into a list
+ product_ids = list(map(lambda x: x[0], cur.fetchall()))
+
+ return product_ids
diff --git a/Crawler/Crawler.iml b/Crawler/Crawler.iml
index db1dd1c..8568e2d 100644
--- a/Crawler/Crawler.iml
+++ b/Crawler/Crawler.iml
@@ -2,13 +2,13 @@
-
+
-
+
\ No newline at end of file
diff --git a/Crawler/api.py b/Crawler/api.py
index b28ba1b..92617c4 100644
--- a/Crawler/api.py
+++ b/Crawler/api.py
@@ -1,14 +1,24 @@
from flask import Flask
-from flask_restful import Resource, Api
+from flask_restful import Resource, Api, reqparse
app = Flask(__name__)
api = Api(app)
+# To parse request data
+parser = reqparse.RequestParser()
+parser.add_argument('key')
+parser.add_argument('products')
+
class CrawlerApi(Resource):
def get(self):
return {'Hallo': 'Betterzon'}
+ def post(self):
+ # Accept crawler request here
+ args = parser.parse_args()
+ return args
+
api.add_resource(CrawlerApi, '/')
diff --git a/Crawler/crawler.py b/Crawler/crawler.py
new file mode 100644
index 0000000..99ff867
--- /dev/null
+++ b/Crawler/crawler.py
@@ -0,0 +1,78 @@
+import sql
+
+
+def crawl(product_ids: [int]) -> dict:
+ """
+ Crawls the given list of products and saves the results to sql
+ :param products: The list of product IDs to fetch
+ :return: A dict with the following fields:
+ total_crawls: number of total crawl tries (products * vendors per product)
+ successful_crawls: number of successful products
+ products_with_problems: list of products that have not been crawled successfully
+ """
+ total_crawls = 0
+ successful_crawls = 0
+ products_with_problems = []
+
+ # Iterate over every product that has to be crawled
+ for product_id in product_ids:
+ # Get all links for this product
+ product_links = sql.getProductLinksForProduct(product_id)
+
+ crawled_data = []
+
+ # Iterate over every link / vendor
+ for product_vendor_info in product_links:
+ total_crawls += 1
+
+ # Call the appropriate vendor crawling function and append the result to the list of crawled data
+ if product_vendor_info['vendor_id'] == 1:
+ # Amazon
+ crawled_data.append(__crawl_amazon__(product_vendor_info))
+ elif product_vendor_info['vendor_id'] == 2:
+ # Apple
+ crawled_data.append(__crawl_apple__(product_vendor_info))
+ elif product_vendor_info['vendor_id'] == 3:
+ # Media Markt
+ crawled_data.append(__crawl_mediamarkt__(product_vendor_info))
+ else:
+ products_with_problems.append(product_vendor_info)
+ continue
+
+ successful_crawls += 1
+
+ # Insert data to SQL
+ sql.insertData(crawled_data)
+
+ return {
+ 'total_crawls': total_crawls,
+ 'successful_crawls': successful_crawls,
+ 'products_with_problems': products_with_problems
+ }
+
+
+def __crawl_amazon__(product_info: dict) -> tuple:
+ """
+ Crawls the price for the given product from amazon
+ :param product_info: A dict with product info containing product_id, vendor_id, url
+ :return: A tuple with the crawled data, containing (product_id, vendor_id, price_in_cents)
+ """
+ return (product_info['product_id'], product_info['vendor_id'], 123)
+
+
+def __crawl_apple__(product_info: dict) -> tuple:
+ """
+ Crawls the price for the given product from apple
+ :param product_info: A dict with product info containing product_id, vendor_id, url
+ :return: A tuple with the crawled data, containing (product_id, vendor_id, price_in_cents)
+ """
+ return (product_info['product_id'], product_info['vendor_id'], 123)
+
+
+def __crawl_mediamarkt__(product_info: dict) -> tuple:
+ """
+ Crawls the price for the given product from media markt
+ :param product_info: A dict with product info containing product_id, vendor_id, url
+ :return: A tuple with the crawled data, containing (product_id, vendor_id, price_in_cents)
+ """
+ pass
diff --git a/Crawler/requirements.txt b/Crawler/requirements.txt
index 04ca272..0b9c558 100644
--- a/Crawler/requirements.txt
+++ b/Crawler/requirements.txt
@@ -1,4 +1,4 @@
pymysql
flask
flask-sqlalchemy
-flask_restful
\ No newline at end of file
+flask_restful
diff --git a/Crawler/sql.py b/Crawler/sql.py
new file mode 100644
index 0000000..1cf3a58
--- /dev/null
+++ b/Crawler/sql.py
@@ -0,0 +1,89 @@
+import logging
+
+import pymysql
+import os
+
+
+def __getConnection__() -> pymysql.Connection:
+ """
+ Opens a new pymysql connection and returns it
+ :return: A pymysql Connection object
+ """
+ logger = logging.getLogger()
+ try:
+ conn = pymysql.connect(
+ user=os.environ['BETTERZON_CRAWLER_USER'],
+ password=os.environ['BETTERZON_CRAWLER_PASSWORD'],
+ host=os.environ['BETTERZON_CRAWLER_HOST'],
+ port=3306,
+ database=os.environ['BETTERZON_CRAWLER_DB']
+ )
+
+ return conn
+ except pymysql.Error as e:
+ logger.error('SQL Connection error: %s', e)
+ return
+
+
+def getProductsForVendor(vendor_id: int) -> [{}]:
+ """
+ Queries the product links for all products of the given shop
+ :param vendor_id: The vendor / shop to query products for
+ :return: A list of product objects, each having the following parameters:
+ product_id, vendor_id, url
+ """
+ conn = __getConnection__()
+ cur = conn.cursor()
+
+ query = 'SELECT product_id, url FROM product_links WHERE vendor_id = %s'
+
+ cur.execute(query, (vendor_id,))
+
+ products = list(map(lambda x: {'product_id': x[0], 'vendor_id': vendor_id, 'url': x[1]}, cur.fetchall()))
+
+ return products
+
+def getProductLinksForProduct(product_id: int) -> [dict]:
+ """
+ Queries all the product links for the given product
+ :param product_id: The product to query data for
+ :return: A list of product objects, each having the following parameters:
+ product_id, vendor_id, url
+ """
+ conn = __getConnection__()
+ cur = conn.cursor()
+
+ query = 'SELECT vendor_id, url FROM product_links WHERE product_id = %s'
+
+ cur.execute(query, (product_id,))
+
+ products = list(map(lambda x: {'product_id': product_id, 'vendor_id': x[0], 'url': x[1]}, cur.fetchall()))
+
+ return products
+
+
+
+def insertData(data_to_insert: [tuple]) -> bool:
+ """
+ Inserts the given list of tuples into the DB
+ :param dataToInsert: A list of tuples, where each tuple has to contain product id, vendor id and the price
+ in exactly this order
+ :return: If the insert was successful
+ """
+ conn = __getConnection__()
+ cur = conn.cursor()
+
+ query = 'INSERT INTO prices (product_id, vendor_id, price_in_cents, timestamp) VALUES (%s, %s, %s, NOW())'
+
+ affectedRows = cur.executemany(query, data_to_insert)
+
+ if affectedRows != len(data_to_insert):
+ # Something went wrong, revert the changes
+ conn.rollback()
+ else:
+ conn.commit()
+
+ cur.close()
+ conn.close()
+
+ return affectedRows == len(data_to_insert)
diff --git a/doku/AC_AddProducts.drawio b/doku/AC_AddProducts.drawio
new file mode 100644
index 0000000..aa2fff0
--- /dev/null
+++ b/doku/AC_AddProducts.drawio
@@ -0,0 +1,137 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doku/AC_AddProducts.png b/doku/AC_AddProducts.png
new file mode 100644
index 0000000..1386a5f
Binary files /dev/null and b/doku/AC_AddProducts.png differ
diff --git a/doku/AC_Administration.drawio b/doku/AC_Administration.drawio
new file mode 100644
index 0000000..b2e81c2
--- /dev/null
+++ b/doku/AC_Administration.drawio
@@ -0,0 +1,72 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doku/AC_Administration.png b/doku/AC_Administration.png
new file mode 100644
index 0000000..eeabc3c
Binary files /dev/null and b/doku/AC_Administration.png differ
diff --git a/doku/AC_Crawler.drawio b/doku/AC_Crawler.drawio
index 84cd5f1..5cbcc88 100644
--- a/doku/AC_Crawler.drawio
+++ b/doku/AC_Crawler.drawio
@@ -1,142 +1,190 @@
-
+
-
+
-
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
-
-
-
-
-
-
-
-
+
-
+
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
+
+
-
+
-
+
-
+
-
-
+
+
-
-
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doku/AC_Crawler.png b/doku/AC_Crawler.png
index 498c96f..185fa41 100644
Binary files a/doku/AC_Crawler.png and b/doku/AC_Crawler.png differ