diff --git a/Crawler/amazonspider.py b/Crawler/unused/scrapy/amazonspider.py similarity index 100% rename from Crawler/amazonspider.py rename to Crawler/unused/scrapy/amazonspider.py diff --git a/Crawler/crawler/__init__.py b/Crawler/unused/scrapy/crawler/__init__.py similarity index 100% rename from Crawler/crawler/__init__.py rename to Crawler/unused/scrapy/crawler/__init__.py diff --git a/Crawler/crawler/items.py b/Crawler/unused/scrapy/crawler/items.py similarity index 100% rename from Crawler/crawler/items.py rename to Crawler/unused/scrapy/crawler/items.py diff --git a/Crawler/crawler/middlewares.py b/Crawler/unused/scrapy/crawler/middlewares.py similarity index 100% rename from Crawler/crawler/middlewares.py rename to Crawler/unused/scrapy/crawler/middlewares.py diff --git a/Crawler/crawler/pipelines.py b/Crawler/unused/scrapy/crawler/pipelines.py similarity index 100% rename from Crawler/crawler/pipelines.py rename to Crawler/unused/scrapy/crawler/pipelines.py diff --git a/Crawler/crawler/settings.py b/Crawler/unused/scrapy/crawler/settings.py similarity index 100% rename from Crawler/crawler/settings.py rename to Crawler/unused/scrapy/crawler/settings.py diff --git a/Crawler/scrapy.cfg b/Crawler/unused/scrapy/scrapy.cfg similarity index 100% rename from Crawler/scrapy.cfg rename to Crawler/unused/scrapy/scrapy.cfg diff --git a/Crawler/crawler/spiders/__init__.py b/Crawler/unused/scrapy/spiders/__init__.py similarity index 100% rename from Crawler/crawler/spiders/__init__.py rename to Crawler/unused/scrapy/spiders/__init__.py diff --git a/Crawler/unused/scrapy/spiders/amazon.py b/Crawler/unused/scrapy/spiders/amazon.py new file mode 100644 index 0000000..c74196b --- /dev/null +++ b/Crawler/unused/scrapy/spiders/amazon.py @@ -0,0 +1,25 @@ +import scrapy +import re + +class AmazonSpider(scrapy.Spider): + name = 'amazon' + allowed_domains = ['amazon.de'] + start_urls = ['https://amazon.de/dp/B083DRCPJG'] + + def parse(self, response): + price = response.xpath('//*[@id="priceblock_ourprice"]/text()').extract_first() + if not price: + price = response.xpath('//*[@data-asin-price]/@data-asin-price').extract_first() or \ + response.xpath('//*[@id="price_inside_buybox"]/text()').extract_first() + + euros = re.match('(\d*),\d\d', price).group(1) + cents = re.match('\d*,(\d\d)', price).group(1) + priceincents = euros + cents + + yield {'price': priceincents} + + + + + +