mirror of
https://github.com/Mueller-Patrick/Betterzon.git
synced 2024-12-04 03:25:12 +00:00
26ba21156a
* BETTERZON-58: Basic Functionality with scrapy * Added independent crawler function, yielding price * moved logic to amazon.py * . * moved scrapy files to unused folder * Added basic amazon crawler using beautifulsoup4 * Connected Api to Crawler * Fixed string concatenation for sql statement in getProductLinksForProduct * BETTERZON-58: Fixing SQL insert * BETTERZON-58: Adding access key verification * BETTERZON-58: Fixing API endpoint of the crawler - The list of products in the API request was treated like a string and henceforth, only the first product has been crawled * Added another selector for price on amazon (does not work for books) Co-authored-by: root <root@DESKTOP-ARBPL82.localdomain> Co-authored-by: Patrick Müller <patrick@mueller-patrick.tech> Co-authored-by: Patrick <50352812+Mueller-Patrick@users.noreply.github.com>
36 lines
822 B
Python
36 lines
822 B
Python
import os
|
|
|
|
from flask import Flask
|
|
from flask_restful import Resource, Api, reqparse
|
|
|
|
import crawler
|
|
|
|
app = Flask(__name__)
|
|
api = Api(app)
|
|
|
|
# To parse request data
|
|
parser = reqparse.RequestParser()
|
|
parser.add_argument('key', type=str)
|
|
parser.add_argument('products', type=int, action='append')
|
|
|
|
|
|
class CrawlerApi(Resource):
|
|
def get(self):
|
|
return {'Hallo': 'Betterzon'}
|
|
|
|
def post(self):
|
|
# Accept crawler request here
|
|
args = parser.parse_args()
|
|
access_key = os.getenv('CRAWLER_ACCESS_KEY')
|
|
if(args['key'] == access_key):
|
|
crawler.crawl(args['products'])
|
|
return {'message': 'success'}
|
|
else:
|
|
return {'message': 'Wrong access key'}
|
|
|
|
|
|
api.add_resource(CrawlerApi, '/')
|
|
|
|
if __name__ == '__main__':
|
|
app.run(host='0.0.0.0', port=22026)
|