regex item properties

This commit is contained in:
Sam Hadow 2023-12-27 21:52:57 +01:00
parent 7d4d52144f
commit de5988764b

13
main.py
View File

@ -18,9 +18,14 @@ def load_cookies_from_file(file_path):
if __name__ == '__main__': if __name__ == '__main__':
target_url = 'https://fr.aliexpress.com/item/1005005769229528.html'
item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
choice_regex = re.compile(r'businessModel\\\":\\\"(UN_CHOICE|CHOICE)\\\"')
#shipping_cost_regex = re.compile(r'')
target_url = 'https://fr.aliexpress.com/item/1005005769229528.html'
session = requests.Session() session = requests.Session()
cookies_file_path = './cookies.json' cookies_file_path = './cookies.json'
cookies = load_cookies_from_file(cookies_file_path) cookies = load_cookies_from_file(cookies_file_path)
@ -28,11 +33,9 @@ if __name__ == '__main__':
url = 'https://fr.aliexpress.com/item/1005005769229528.html' url = 'https://fr.aliexpress.com/item/1005005769229528.html'
target_page_response = session.get(url) target_page_response = session.get(url)
if target_page_response.status_code == 200: if target_page_response.status_code == 200:
regex = re.compile(r'\"skuActivityAmountLocal\":\"[0-9]*,[0-9]{2}') #regex = re.compile(r'\"skuActivityAmountLocal\":\"[0-9]*,[0-9]{2}')
content = re.findall(regex, target_page_response.text) content = re.search(item_regex, target_page_response.text).group(12)
print(content) print(content)
else: else:
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}') print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')