From a25985e93619e443f7b1d5ffe391022055ec961a Mon Sep 17 00:00:00 2001 From: Sam Hadow Date: Thu, 28 Dec 2023 13:38:21 +0100 Subject: [PATCH] postgresql connection --- .gitignore | 1 + aliexpress.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ config.py | 3 +++ db.py | 17 +++++++++++++++++ main.py | 50 ++++++-------------------------------------------- 5 files changed, 77 insertions(+), 44 deletions(-) create mode 100644 aliexpress.py create mode 100644 config.py create mode 100644 db.py diff --git a/.gitignore b/.gitignore index d4acada..8f9adc0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ cookies.json test*.txt +settings.yaml diff --git a/aliexpress.py b/aliexpress.py new file mode 100644 index 0000000..014d5f9 --- /dev/null +++ b/aliexpress.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +import requests, re, json, os, yaml + +def load_cookies_from_file(file_path): + with open(file_path, 'r') as file: + cookies_data = json.load(file) + + cookies_dict = {} + + for cookie_data in cookies_data: + name_raw = cookie_data.get("Name raw", "") + content_raw = cookie_data.get("Content raw", "") + cookie_value = f"{content_raw}" + + if len(cookie_value) > 0: + cookies_dict[name_raw] = cookie_value + + return cookies_dict + +def check_items(settings_id): + + item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}') + choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"') + #shipping_cost_regex = re.compile(r'') + + session = requests.Session() + cookies_file_path = './cookies.json' + cookies = load_cookies_from_file(cookies_file_path) + session.cookies.update(cookies) + + extract = dict() + + for item in settings_id: + url = 'https://aliexpress.com/item/'+item+'.html' + target_page_response = session.get(url) + + if target_page_response.status_code == 200: + content = re.findall(item_regex, target_page_response.text) + is_choice = bool(re.search(choice_regex, target_page_response.text)) + for elem in content: + key = item+':'+elem[0] + discount = 0 if len(elem[5]) == 0 else int(elem[5]) + price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11]) + + # skuId, quantity, discount_percentage, price, currency, choice_delivery + extract[key] = (elem[2], elem[3], discount, price, elem[12], is_choice) + return extract + else: + print(f'Failed to fetch target page. Status code: {target_page_response.status_code}') + return None diff --git a/config.py b/config.py new file mode 100644 index 0000000..27845be --- /dev/null +++ b/config.py @@ -0,0 +1,3 @@ +#!/usr/bin/python +import os, yaml + diff --git a/db.py b/db.py new file mode 100644 index 0000000..ec5dd17 --- /dev/null +++ b/db.py @@ -0,0 +1,17 @@ +#!/usr/bin/python +import psycopg2 + +def connect_db(db_settings): + conn = None + print("Connecting to the PostgreSQL database...") + try: + conn = psycopg2.connect(**db_settings) + print("Connection success") + except (Exception, psycopg2.DatabaseError) as error: + print(error) + return conn + +def close_db(conn): + if conn is not None: + conn.close() + print("Connection closed") diff --git a/main.py b/main.py index e00022d..0e91809 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,7 @@ +#!/usr/bin/python import requests, re, json, os, yaml +from db import * +from aliexpress import * def get_conf(): '''return settings in settings.yaml file''' @@ -6,54 +9,13 @@ def get_conf(): settings = yaml.safe_load(conf_file) return settings -def load_cookies_from_file(file_path): - with open(file_path, 'r') as file: - cookies_data = json.load(file) - - cookies_dict = {} - - for cookie_data in cookies_data: - name_raw = cookie_data.get("Name raw", "") - content_raw = cookie_data.get("Content raw", "") - cookie_value = f"{content_raw}" - - if len(cookie_value) > 0: - cookies_dict[name_raw] = cookie_value - - return cookies_dict - if __name__ == '__main__': - item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}') - choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"') - #shipping_cost_regex = re.compile(r'') - settings = get_conf() - session = requests.Session() - cookies_file_path = './cookies.json' - cookies = load_cookies_from_file(cookies_file_path) - session.cookies.update(cookies) + print(check_items(settings["id"])) - extract = dict() - - for item in settings['id']: - url = 'https://aliexpress.com/item/'+item+'.html' - target_page_response = session.get(url) - - if target_page_response.status_code == 200: - content = re.findall(item_regex, target_page_response.text) - is_choice = bool(re.search(choice_regex, target_page_response.text)) - for elem in content: - key = item+':'+elem[0] - discount = 0 if len(elem[5]) == 0 else int(elem[5]) - price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11]) - - # skuId, quantity, discount_percentage, price, currency, choice_delivery - extract[key] = (elem[2], elem[3], discount, price, elem[12], is_choice) - #print(elem,"\n\n") - print(extract) - else: - print(f'Failed to fetch target page. Status code: {target_page_response.status_code}') + connection = connect_db(settings["db"]) + close_db(connection)