fill database with items in settings.yaml

This commit is contained in:
Sam Hadow 2023-12-28 17:39:36 +01:00
parent a25985e936
commit f298da7ac5
5 changed files with 103 additions and 20 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
cookies.json cookies.json
test*.txt test*.txt
settings.yaml settings.yaml
__pycache__/*

View File

@ -1,5 +1,6 @@
#!/usr/bin/python #!/usr/bin/python
import requests, re, json, os, yaml import requests, re, json, os, yaml
from db import *
def load_cookies_from_file(file_path): def load_cookies_from_file(file_path):
with open(file_path, 'r') as file: with open(file_path, 'r') as file:
@ -17,7 +18,7 @@ def load_cookies_from_file(file_path):
return cookies_dict return cookies_dict
def check_items(settings_id): def check_items(settings_items):
item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}') item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"') choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"')
@ -30,21 +31,37 @@ def check_items(settings_id):
extract = dict() extract = dict()
for item in settings_id: for (item,filter_attributes) in settings_items:
url = 'https://aliexpress.com/item/'+item+'.html' url = 'https://aliexpress.com/item/'+item+'.html'
target_page_response = session.get(url) target_page_response = session.get(url)
if target_page_response.status_code == 200: if target_page_response.status_code == 200:
content = re.findall(item_regex, target_page_response.text) content = re.findall(item_regex, target_page_response.text)
is_choice = bool(re.search(choice_regex, target_page_response.text)) is_choice = bool(re.search(choice_regex, target_page_response.text))
for elem in content: for elem in content:
key = item+':'+elem[0] if set(get_attributes(elem[0])) == set(filter_attributes):
discount = 0 if len(elem[5]) == 0 else int(elem[5]) key = (item,tuple(filter_attributes))
price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11]) discount = 0 if len(elem[5]) == 0 else int(elem[5])
price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11])
# skuId, quantity, discount_percentage, price, currency, choice_delivery # skuId, quantity, discount_percentage, price, currency, choice_delivery
extract[key] = (elem[2], elem[3], discount, price, elem[12], is_choice) extract[key] = {"skuid": elem[2], "quantity": elem[3], "discount_percentage": discount, "price": price, "currency": elem[12], "choice_delivery": is_choice}
return extract
else: else:
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}') print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
return None return extract
def get_attributes(attributes_raw):
# id_regex = re.compile(r'([0-9]*)=')
attr_regex = re.compile(r'#([0-9a-zA-Z ]*)')
# item_id = re.search(id_regex, attributes_raw).group(1)
attributes = re.findall(attr_regex, attributes_raw)
return attributes
def fill_db(db_settings, items_dict):
for key,value in items_dict.items():
add_history_entry(db_settings, key[0], value["skuid"], value["choice_delivery"], list(key[1]), value["price"], value["currency"], value["quantity"], value["discount_percentage"])

77
db.py
View File

@ -11,7 +11,76 @@ def connect_db(db_settings):
print(error) print(error)
return conn return conn
def close_db(conn): def add_item(db_settings, itemid, skuid, choice, attributes):
if conn is not None: connection = connect_db(db_settings)
conn.close() cursor = connection.cursor()
print("Connection closed")
cursor.execute("""
INSERT INTO item (itemid, skuid, choice, attributes)
VALUES (%s, %s, %s, %s)
""", (itemid, skuid, choice, attributes))
connection.commit()
connection.close()
def add_history_entry(db_settings, itemid, skuid, choice, attributes, price, currency, quantity, discount_percentage):
connection = connect_db(db_settings)
cursor = connection.cursor()
cursor.execute("""
SELECT *
FROM item
WHERE itemid = %s
AND skuid = %s
""", (itemid, skuid))
if cursor.rowcount == 0:
add_item(db_settings, itemid, skuid, choice, attributes)
cursor.execute("""
INSERT INTO history (itemid, skuid, price, currency, quantity, discount_percentage, h_timestamp)
VALUES (%s, %s, %s, %s, %s, %s, (SELECT LOCALTIMESTAMP))
""", (itemid, skuid, price, currency, quantity, discount_percentage))
connection.commit()
connection.close()
def initialize(db_settings):
connection = connect_db(db_settings)
cursor = connection.cursor()
cursor.execute("""
DROP TABLE IF EXISTS history
""")
cursor.execute("""
DROP TABLE IF EXISTS item
""")
cursor.execute("""
CREATE TABLE item
(
itemid bigint,
skuid bigint,
choice boolean,
attributes text[],
primary key (itemid,skuid)
)
""")
cursor.execute("""
CREATE TABLE history
(
itemid bigint,
skuid bigint,
quantity integer,
discount_percentage numeric(2),
price money,
currency varchar(4),
h_timestamp timestamp,
foreign key (itemid,skuid) references item(itemid,skuid),
primary key (itemid,skuid,h_timestamp)
)
""")
connection.commit()
connection.close()
print("Database initialized")

View File

@ -14,8 +14,7 @@ if __name__ == '__main__':
settings = get_conf() settings = get_conf()
print(check_items(settings["id"])) #initialize(settings["db"])
fill_db(settings["db"], check_items(settings["item"]))
connection = connect_db(settings["db"])
close_db(connection)

View File

@ -1,3 +0,0 @@
id:
- "1005005769229528"
- "4001259224639"