diff --git a/.gitignore b/.gitignore index 0f6c712..97d1ebc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ settings.yaml __pycache__/* src/__pycache__/* web/test.html +*.csv diff --git a/src/aliexpress.py b/src/aliexpress.py index b395e60..7274b54 100644 --- a/src/aliexpress.py +++ b/src/aliexpress.py @@ -31,6 +31,7 @@ def check_item(settings_item): item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9;:]*#?[a-zA-Z0-9 \.\-]*;?)*)?\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"bulkOrder\\\":([0-9]*),)?(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}') choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"') magnifier_image_regex = re.compile(r'') + punish_regex = re.compile(r'(pid: \'punish-page\')|(Deny from x5)') session = requests.Session() cookies_file_path = os.path.dirname(os.path.realpath(__file__))+'/cookies.json' @@ -46,6 +47,11 @@ def check_item(settings_item): url = 'https://aliexpress.com/item/'+str(item)+'.html' target_page_response = session.get(url) if target_page_response.status_code == 200: + + punish = bool(re.search(punish_regex, target_page_response.text)) + if punish: + raise ValueError("punish") + content = re.findall(item_regex, target_page_response.text) is_choice = bool(re.search(choice_regex, target_page_response.text)) for elem in content: @@ -96,10 +102,27 @@ def fill_db(items_dict): def update_items(): '''add new history entries for items in database''' in_db = get_item_keys() + retry = [] for item in in_db: time.sleep(2) - new_entry = check_item(item) - fill_db(new_entry) + try: + new_entry = check_item(item) + fill_db(new_entry) + except ValueError: + retry.append(item) + return retry + +def retry_update(retry_list): + '''update entries from the retry list only''' + retry = [] + for item in retry_list: + time.sleep(2) + try: + new_entry = check_item(item) + fill_db(new_entry) + except ValueError: + retry.append(item) + return retry diff --git a/src/app.py b/src/app.py index 024f83c..6171827 100644 --- a/src/app.py +++ b/src/app.py @@ -1,6 +1,6 @@ from flask import Flask, request, jsonify, render_template from flask_cors import CORS -import requests, re, json, os, yaml +import requests, re, json, os, yaml, time from db import * from aliexpress import * @@ -21,7 +21,12 @@ def init_db(): @app.route('/update', methods = ['GET']) def update_hist(): print("update") - update_items() + retry = update_items() + retry_count = 0 + while len(retry)>0 and retry_count < 12: + time.sleep(300) # wait 5 minutes between each retry + retry = retry_update(retry) + retry_count += 1 return 'items updated' @app.route('/app/add', methods=['POST']) @@ -34,20 +39,23 @@ def add_item(): attributes = data.get('attributes', '').split(',') if data.get('attributes') else [] new_item = [itemid, attributes] - extr = check_item(new_item) + try: + extr = check_item(new_item) + except ValueError: + return jsonify({'status': 4, "info": "aliexpress punish page"}), 400 if len(extr) > 0: skuid = list(extr.values())[0]["skuid"] if check_exist(itemid, skuid): # item already exists - return jsonify({'status': 3}), 400 + return jsonify({'status': 3, "info": "item already exists"}), 400 else: # item is valid fill_db(extr) - return jsonify({'status': 0}), 200 + return jsonify({'status': 0, , "info": "item added to database"}), 200 else: # item not valid or can't be parsed - return jsonify({'status': 1}), 400 + return jsonify({'status': 1, "info": "item not valid or can't be parsed"}), 400 @app.route('/app/delete', methods=['POST']) def del_item(): diff --git a/src/main.py b/src/main.py index 75795e0..7acd444 100644 --- a/src/main.py +++ b/src/main.py @@ -34,9 +34,9 @@ if __name__ == '__main__': c_l = ["1005005824413309", ["00350"]] print(check_item(c_l)) - # TODO : fix regex for this item - c_l = ["1005005777900699", ["Black"]] - print(check_item(c_l)) + # + # c_l = ["1005005777900699", ["Black"]] + # print(check_item(c_l)) # print(get_item_keys()) # initialize(settings["db"])