diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d4acada --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +cookies.json +test*.txt diff --git a/main.py b/main.py index c139a70..e00022d 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,10 @@ -import requests, re, json +import requests, re, json, os, yaml + +def get_conf(): + '''return settings in settings.yaml file''' + with open(os.path.dirname(os.path.realpath(__file__))+"/settings.yaml", 'r') as conf_file: + settings = yaml.safe_load(conf_file) + return settings def load_cookies_from_file(file_path): with open(file_path, 'r') as file: @@ -19,25 +25,35 @@ def load_cookies_from_file(file_path): if __name__ == '__main__': - item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}') - choice_regex = re.compile(r'businessModel\\\":\\\"(UN_CHOICE|CHOICE)\\\"') + choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"') #shipping_cost_regex = re.compile(r'') - target_url = 'https://fr.aliexpress.com/item/1005005769229528.html' + settings = get_conf() session = requests.Session() cookies_file_path = './cookies.json' cookies = load_cookies_from_file(cookies_file_path) session.cookies.update(cookies) - url = 'https://fr.aliexpress.com/item/1005005769229528.html' - target_page_response = session.get(url) - if target_page_response.status_code == 200: - #regex = re.compile(r'\"skuActivityAmountLocal\":\"[0-9]*,[0-9]{2}') - content = re.search(item_regex, target_page_response.text).group(12) - print(content) - else: - print(f'Failed to fetch target page. Status code: {target_page_response.status_code}') + extract = dict() + for item in settings['id']: + url = 'https://aliexpress.com/item/'+item+'.html' + target_page_response = session.get(url) + + if target_page_response.status_code == 200: + content = re.findall(item_regex, target_page_response.text) + is_choice = bool(re.search(choice_regex, target_page_response.text)) + for elem in content: + key = item+':'+elem[0] + discount = 0 if len(elem[5]) == 0 else int(elem[5]) + price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11]) + + # skuId, quantity, discount_percentage, price, currency, choice_delivery + extract[key] = (elem[2], elem[3], discount, price, elem[12], is_choice) + #print(elem,"\n\n") + print(extract) + else: + print(f'Failed to fetch target page. Status code: {target_page_response.status_code}') diff --git a/settings.yaml b/settings.yaml new file mode 100644 index 0000000..2754e5a --- /dev/null +++ b/settings.yaml @@ -0,0 +1,3 @@ +id: + - "1005005769229528" + - "4001259224639"