import requests, re, json, os, yaml def get_conf(): '''return settings in settings.yaml file''' with open(os.path.dirname(os.path.realpath(__file__))+"/settings.yaml", 'r') as conf_file: settings = yaml.safe_load(conf_file) return settings def load_cookies_from_file(file_path): with open(file_path, 'r') as file: cookies_data = json.load(file) cookies_dict = {} for cookie_data in cookies_data: name_raw = cookie_data.get("Name raw", "") content_raw = cookie_data.get("Content raw", "") cookie_value = f"{content_raw}" if len(cookie_value) > 0: cookies_dict[name_raw] = cookie_value return cookies_dict if __name__ == '__main__': item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}') choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"') #shipping_cost_regex = re.compile(r'') settings = get_conf() session = requests.Session() cookies_file_path = './cookies.json' cookies = load_cookies_from_file(cookies_file_path) session.cookies.update(cookies) extract = dict() for item in settings['id']: url = 'https://aliexpress.com/item/'+item+'.html' target_page_response = session.get(url) if target_page_response.status_code == 200: content = re.findall(item_regex, target_page_response.text) is_choice = bool(re.search(choice_regex, target_page_response.text)) for elem in content: key = item+':'+elem[0] discount = 0 if len(elem[5]) == 0 else int(elem[5]) price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11]) # skuId, quantity, discount_percentage, price, currency, choice_delivery extract[key] = (elem[2], elem[3], discount, price, elem[12], is_choice) #print(elem,"\n\n") print(extract) else: print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')