#!/usr/bin/python import requests, re, json, os, yaml from db import * def load_cookies_from_file(file_path): '''load cookies from a file and return a dict usable in a request session''' with open(file_path, 'r') as file: cookies_data = json.load(file) cookies_dict = {} for cookie_data in cookies_data: name_raw = cookie_data.get("Name raw", "") content_raw = cookie_data.get("Content raw", "") cookie_value = f"{content_raw}" if len(cookie_value) > 0: cookies_dict[name_raw] = cookie_value return cookies_dict def check_items(settings_items): ''' return a dict with items data extracted from aliexpress. a file containing aliexpress login token cookies has to be provided in ./cookies.json (obtained with cookie-quick-manager https://github.com/ysard/cookie-quick-manager/) for accurate prices (no "welcome discount") extracted data: skuId, quantity, discount_percentage, price, currency, choice_delivery, image parameter settings_item is a list of tables (string(itemid), attributes) itemid is in aliexpress link to item page. attributes is a list of string. Each string is a choice value (for example which length, or which colour) if multiple items are on the same page, only one by category, order doesn't matter. ''' item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}') choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"') magnifier_image_regex = re.compile(r'') session = requests.Session() cookies_file_path = os.path.dirname(os.path.realpath(__file__))+'/cookies.json' cookies = load_cookies_from_file(cookies_file_path) session.cookies.update(cookies) extract = dict() for (item,filter_attributes) in settings_items: url = 'https://aliexpress.com/item/'+item+'.html' target_page_response = session.get(url) if target_page_response.status_code == 200: content = re.findall(item_regex, target_page_response.text) is_choice = bool(re.search(choice_regex, target_page_response.text)) for elem in content: if set(get_attributes(elem[0])) == set(filter_attributes): key = (item,tuple(filter_attributes)) discount = 0 if len(elem[5]) == 0 else int(elem[5]) price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11]) # get item image image_link = re.findall(magnifier_image_regex, target_page_response.text)[0] for attr in filter_attributes: image_regex = re.compile(fr'\"propertyValueDefinitionName\":\"{attr}\",\"propertyValueIdLong\":[0-9]*,\"skuPropertyImageSummPath\":\"(https:\/\/[0-9a-zA-Z.\/\-\_]*)\"') image = re.findall(image_regex, target_page_response.text) if len(image)>0: image_link = image[0] image_link = re.sub(r'jpg_[0-9]+x[0-9]+', "jpg_800x800", image_link) # get bigger image instead of preview break # skuId, quantity, discount_percentage, price, currency, choice_delivery, image extract[key] = {"skuid": elem[2], "quantity": elem[3], "discount_percentage": discount, "price": price, "currency": elem[12], "choice_delivery": is_choice, "image": image_link} else: print(f'Failed to fetch target page. Status code: {target_page_response.status_code}') return extract def get_attributes(attributes_raw): '''return a list of attributes from attributes raw string''' # id_regex = re.compile(r'([0-9]*)=') attr_regex = re.compile(r'#([0-9a-zA-Z ]*)') # item_id = re.search(id_regex, attributes_raw).group(1) attributes = re.findall(attr_regex, attributes_raw) return attributes def fill_db(db_settings, items_dict): '''add new history entries in database with data extracted in item_dict''' for key,value in items_dict.items(): add_history_entry(db_settings, key[0], value["skuid"], value["choice_delivery"], list(key[1]), value["image"], value["price"], value["currency"], value["quantity"], value["discount_percentage"])