regex catastrophic backtracking fix + background job update db
This commit is contained in:
parent
3b6e66c886
commit
c7a4bc711d
20
CronDockerfile
Normal file
20
CronDockerfile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
FROM python:bookworm
|
||||||
|
|
||||||
|
COPY /Cronrequirements.txt /
|
||||||
|
|
||||||
|
RUN pip3 install --upgrade pip
|
||||||
|
|
||||||
|
RUN pip3 install -r /Cronrequirements.txt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
COPY ./src/background.py /app/background.py
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ENV WAIT_TIME="1d"
|
||||||
|
|
||||||
|
CMD ["python", "background.py"]
|
1
Cronrequirements.txt
Normal file
1
Cronrequirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
requests
|
@ -1,8 +1,10 @@
|
|||||||
podman build --tag alipricetrack:1.0.0 -f ./Dockerfile
|
podman build --tag alipricetrack:1.0.0 -f ./Dockerfile
|
||||||
|
podman build --tag alipricetrack_cron:1.0.0 -f ./CronDockerfile
|
||||||
|
|
||||||
podman pod create --name aliexpress -p 8086:8080
|
podman pod create --name aliexpress -p 8086:8080
|
||||||
|
|
||||||
podman run -d --pod=aliexpress -e POSTGRES_DB="aliexpress" -e POSTGRES_USER="postgres" -e POSTGRES_PASSWORD="postgres" -v /home/data/podman/aliexpress/db:/var/lib/postgresql/data:Z --name ali-db docker.io/postgres
|
podman run -d --pod=aliexpress -e POSTGRES_DB="aliexpress" -e POSTGRES_USER="postgres" -e POSTGRES_PASSWORD="postgres" -v /home/data/podman/aliexpress/db:/var/lib/postgresql/data:Z --name ali-db docker.io/postgres
|
||||||
|
|
||||||
podman run -d --pod=aliexpress -e POSTGRES_DB="aliexpress" -e POSTGRES_USER="postgres" -e POSTGRES_PASSWORD="postgres" --name ali-app alipricetrack:1.0.0
|
podman run -d --pod=aliexpress -e POSTGRES_DB="aliexpress" -e POSTGRES_USER="postgres" -e POSTGRES_PASSWORD="postgres" --name ali-app alipricetrack:1.0.0
|
||||||
|
|
||||||
|
podman run -d --pod=aliexpress --name ali-cron alipricetrack_cron:1.0.0
|
||||||
|
@ -19,7 +19,7 @@ def load_cookies_from_file(file_path):
|
|||||||
|
|
||||||
return cookies_dict
|
return cookies_dict
|
||||||
|
|
||||||
def check_items(settings_items):
|
def check_item(settings_item):
|
||||||
'''
|
'''
|
||||||
return a dict with items data extracted from aliexpress.
|
return a dict with items data extracted from aliexpress.
|
||||||
a file containing aliexpress login token cookies has to be provided in ./cookies.json (obtained with cookie-quick-manager https://github.com/ysard/cookie-quick-manager/) for accurate prices (no "welcome discount")
|
a file containing aliexpress login token cookies has to be provided in ./cookies.json (obtained with cookie-quick-manager https://github.com/ysard/cookie-quick-manager/) for accurate prices (no "welcome discount")
|
||||||
@ -29,7 +29,7 @@ def check_items(settings_items):
|
|||||||
itemid is in aliexpress link to item page. attributes is a list of string. Each string is a choice value (for example which length, or which colour) if multiple items are on the same page, only one by category, order doesn't matter.
|
itemid is in aliexpress link to item page. attributes is a list of string. Each string is a choice value (for example which length, or which colour) if multiple items are on the same page, only one by category, order doesn't matter.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
item_regex = re.compile(r'skuAttr\\\":\\\"([0-9:;]*#([a-zA-Z0-9 ]*;?)*)?\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"bulkOrder\\\":([0-9]*),)?(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
|
item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9;:]*#[a-zA-Z0-9 \.]*;?)*)?\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"bulkOrder\\\":([0-9]*),)?(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
|
||||||
choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"')
|
choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"')
|
||||||
magnifier_image_regex = re.compile(r'<meta property=\"og:image\" content=\"(https:[0-9a-zA-Z\/\-\_.]*)\"\/>')
|
magnifier_image_regex = re.compile(r'<meta property=\"og:image\" content=\"(https:[0-9a-zA-Z\/\-\_.]*)\"\/>')
|
||||||
|
|
||||||
@ -40,8 +40,11 @@ def check_items(settings_items):
|
|||||||
|
|
||||||
extract = dict()
|
extract = dict()
|
||||||
|
|
||||||
for (item,filter_attributes) in settings_items:
|
print(settings_item)
|
||||||
url = 'https://aliexpress.com/item/'+item+'.html'
|
item = settings_item[0]
|
||||||
|
filter_attributes = settings_item[1]
|
||||||
|
|
||||||
|
url = 'https://aliexpress.com/item/'+str(item)+'.html'
|
||||||
target_page_response = session.get(url)
|
target_page_response = session.get(url)
|
||||||
if target_page_response.status_code == 200:
|
if target_page_response.status_code == 200:
|
||||||
content = re.findall(item_regex, target_page_response.text)
|
content = re.findall(item_regex, target_page_response.text)
|
||||||
@ -66,12 +69,13 @@ def check_items(settings_items):
|
|||||||
extract[key] = {"skuid": elem[2], "quantity": elem[3], "discount_percentage": discount, "price": price, "currency": elem[12], "choice_delivery": is_choice, "image": image_link}
|
extract[key] = {"skuid": elem[2], "quantity": elem[3], "discount_percentage": discount, "price": price, "currency": elem[12], "choice_delivery": is_choice, "image": image_link}
|
||||||
else:
|
else:
|
||||||
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
|
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
|
||||||
|
|
||||||
return extract
|
return extract
|
||||||
|
|
||||||
def get_attributes(attributes_raw):
|
def get_attributes(attributes_raw):
|
||||||
'''return a list of attributes from attributes raw string'''
|
'''return a list of attributes from attributes raw string'''
|
||||||
# id_regex = re.compile(r'([0-9]*)=')
|
# id_regex = re.compile(r'([0-9]*)=')
|
||||||
attr_regex = re.compile(r'#([0-9a-zA-Z ]*)')
|
attr_regex = re.compile(r'#([0-9a-zA-Z \.]*)')
|
||||||
|
|
||||||
# item_id = re.search(id_regex, attributes_raw).group(1)
|
# item_id = re.search(id_regex, attributes_raw).group(1)
|
||||||
attributes = re.findall(attr_regex, attributes_raw)
|
attributes = re.findall(attr_regex, attributes_raw)
|
||||||
@ -86,8 +90,9 @@ def fill_db(items_dict):
|
|||||||
def update_items():
|
def update_items():
|
||||||
'''add new history entries for items in database'''
|
'''add new history entries for items in database'''
|
||||||
in_db = get_item_keys()
|
in_db = get_item_keys()
|
||||||
new_entries = check_items(in_db)
|
for item in in_db:
|
||||||
fill_db(new_entries)
|
new_entry = check_item(item)
|
||||||
|
fill_db(new_entry)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,8 +16,8 @@ def init_db():
|
|||||||
@app.route('/update')
|
@app.route('/update')
|
||||||
def update_hist():
|
def update_hist():
|
||||||
print("update")
|
print("update")
|
||||||
fill_db(check_items())
|
update_items()
|
||||||
return 'Hello, World!'
|
return 'items updated'
|
||||||
|
|
||||||
@app.route('/add', methods=['POST'])
|
@app.route('/add', methods=['POST'])
|
||||||
def add_item():
|
def add_item():
|
||||||
|
@ -1,5 +1,28 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
from aliexpress import *
|
import requests, re, time, os
|
||||||
|
|
||||||
|
def update():
|
||||||
|
url = "http://127.0.0.1:8080/update"
|
||||||
|
response = requests.get(url)
|
||||||
|
print(response)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print("ok")
|
regex_time = re.compile(r'([1-9][0-9]*)([smhd])')
|
||||||
|
formatted_time = os.environ.get('WAIT_TIME')
|
||||||
|
units = {
|
||||||
|
's':1,
|
||||||
|
'm':60,
|
||||||
|
'h':3600,
|
||||||
|
'd':86400
|
||||||
|
}
|
||||||
|
match = re.search(regex_time, formatted_time)
|
||||||
|
if bool(match):
|
||||||
|
raw_time = float(match.group(1))*units[match.group(2)]
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
update()
|
||||||
|
except:
|
||||||
|
print("update failed")
|
||||||
|
time.sleep(raw_time)
|
||||||
|
else:
|
||||||
|
print("WAIT_TIME incorrect")
|
||||||
|
@ -45,6 +45,7 @@ def add_history_entry(itemid, skuid, choice, attributes, image, price, currency,
|
|||||||
|
|
||||||
if not check_exist(itemid, skuid):
|
if not check_exist(itemid, skuid):
|
||||||
add_item(itemid, skuid, choice, attributes, image)
|
add_item(itemid, skuid, choice, attributes, image)
|
||||||
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
SELECT uuid
|
SELECT uuid
|
||||||
FROM item
|
FROM item
|
||||||
|
25
src/main.py
25
src/main.py
@ -3,27 +3,24 @@ import requests, re, json, os, yaml
|
|||||||
from db import *
|
from db import *
|
||||||
from aliexpress import *
|
from aliexpress import *
|
||||||
|
|
||||||
def get_conf():
|
|
||||||
'''return settings in settings.yaml file'''
|
|
||||||
with open(os.path.dirname(os.path.realpath(__file__))+"/settings.yaml", 'r') as conf_file:
|
|
||||||
settings = yaml.safe_load(conf_file)
|
|
||||||
return settings
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
settings = get_conf()
|
c_l = ["33058732737", ["1.2 M Probe"]]
|
||||||
|
print(check_item(c_l))
|
||||||
c_l = [("33058732737",["3 M Probe"])]
|
c_l = ["1005005769229528", ["2 E27 Clip EU"]]
|
||||||
print(check_items(c_l))
|
print(check_item(c_l))
|
||||||
c_l = [("1005005769229528", ["2 E27 Clip EU"])]
|
c_l = ["1005004130931033", []]
|
||||||
print(check_items(c_l))
|
print(check_item(c_l))
|
||||||
c_l = [("1005004130931033", [])]
|
print("########")
|
||||||
print(check_items(c_l))
|
c_l = ["1005006030884318", ["Natural White", "7W", "E27"]]
|
||||||
|
print(check_item(c_l))
|
||||||
|
|
||||||
|
# print(get_item_keys())
|
||||||
|
|
||||||
# initialize(settings["db"])
|
# initialize(settings["db"])
|
||||||
# fill_db(settings["db"], check_items(settings["item"]))
|
# fill_db(settings["db"], check_item(settings["item"]))
|
||||||
|
|
||||||
# export_csv(settings["db"])
|
# export_csv(settings["db"])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user