ka-news-yaa-crawler/main.py

41 lines
988 B
Python
Raw Normal View History

import sql
2022-07-02 18:15:13 +00:00
from crawler import Crawler
def __check_and_insert_in_database__(conn, article) -> bool:
"""
Checks, if the article is already known. If not, inserts it into the db.
:param conn: SQL connection
:param article: The article to check / insert
:return: If the article is already known
"""
cur = conn.cursor()
cur.execute('SELECT article_id FROM yaa_articles WHERE url = %s', article.url)
res = cur.fetchall()
if len(res) > 0:
cur.close()
return True
else:
cur.execute('INSERT INTO yaa_articles (title, summary, url, image_url) VALUES (%s, %s, %s, %s)',
(article.title, article.summary, article.url, article.image_url))
conn.commit()
cur.close()
return False
2022-07-02 18:15:13 +00:00
if __name__ == '__main__':
crawl = Crawler('https://www.ka-news.de')
conn = sql.get_connection()
articles = crawl.check_for_new_yaa_articles()
for article in articles:
if not __check_and_insert_in_database__(conn, article):
print('New!')
else:
print('Old!')
2022-07-02 18:15:13 +00:00
conn.close()