regex catastrophic backtracking fix + background job update db
This commit is contained in:
parent
3b6e66c886
commit
c7a4bc711d
20
CronDockerfile
Normal file
20
CronDockerfile
Normal file
@ -0,0 +1,20 @@
|
||||
FROM python:bookworm
|
||||
|
||||
COPY /Cronrequirements.txt /
|
||||
|
||||
RUN pip3 install --upgrade pip
|
||||
|
||||
RUN pip3 install -r /Cronrequirements.txt
|
||||
|
||||
|
||||
|
||||
COPY ./src/background.py /app/background.py
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
|
||||
|
||||
ENV WAIT_TIME="1d"
|
||||
|
||||
CMD ["python", "background.py"]
|
1
Cronrequirements.txt
Normal file
1
Cronrequirements.txt
Normal file
@ -0,0 +1 @@
|
||||
requests
|
@ -1,8 +1,10 @@
|
||||
podman build --tag alipricetrack:1.0.0 -f ./Dockerfile
|
||||
|
||||
podman build --tag alipricetrack_cron:1.0.0 -f ./CronDockerfile
|
||||
|
||||
podman pod create --name aliexpress -p 8086:8080
|
||||
|
||||
podman run -d --pod=aliexpress -e POSTGRES_DB="aliexpress" -e POSTGRES_USER="postgres" -e POSTGRES_PASSWORD="postgres" -v /home/data/podman/aliexpress/db:/var/lib/postgresql/data:Z --name ali-db docker.io/postgres
|
||||
|
||||
podman run -d --pod=aliexpress -e POSTGRES_DB="aliexpress" -e POSTGRES_USER="postgres" -e POSTGRES_PASSWORD="postgres" --name ali-app alipricetrack:1.0.0
|
||||
|
||||
podman run -d --pod=aliexpress --name ali-cron alipricetrack_cron:1.0.0
|
||||
|
@ -19,7 +19,7 @@ def load_cookies_from_file(file_path):
|
||||
|
||||
return cookies_dict
|
||||
|
||||
def check_items(settings_items):
|
||||
def check_item(settings_item):
|
||||
'''
|
||||
return a dict with items data extracted from aliexpress.
|
||||
a file containing aliexpress login token cookies has to be provided in ./cookies.json (obtained with cookie-quick-manager https://github.com/ysard/cookie-quick-manager/) for accurate prices (no "welcome discount")
|
||||
@ -29,7 +29,7 @@ def check_items(settings_items):
|
||||
itemid is in aliexpress link to item page. attributes is a list of string. Each string is a choice value (for example which length, or which colour) if multiple items are on the same page, only one by category, order doesn't matter.
|
||||
'''
|
||||
|
||||
item_regex = re.compile(r'skuAttr\\\":\\\"([0-9:;]*#([a-zA-Z0-9 ]*;?)*)?\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"bulkOrder\\\":([0-9]*),)?(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
|
||||
item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9;:]*#[a-zA-Z0-9 \.]*;?)*)?\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"bulkOrder\\\":([0-9]*),)?(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
|
||||
choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"')
|
||||
magnifier_image_regex = re.compile(r'<meta property=\"og:image\" content=\"(https:[0-9a-zA-Z\/\-\_.]*)\"\/>')
|
||||
|
||||
@ -40,38 +40,42 @@ def check_items(settings_items):
|
||||
|
||||
extract = dict()
|
||||
|
||||
for (item,filter_attributes) in settings_items:
|
||||
url = 'https://aliexpress.com/item/'+item+'.html'
|
||||
target_page_response = session.get(url)
|
||||
if target_page_response.status_code == 200:
|
||||
content = re.findall(item_regex, target_page_response.text)
|
||||
is_choice = bool(re.search(choice_regex, target_page_response.text))
|
||||
for elem in content:
|
||||
if set(get_attributes(elem[0])) == set(filter_attributes):
|
||||
key = (item,tuple(filter_attributes))
|
||||
discount = 0 if len(elem[7]) == 0 else int(elem[7])
|
||||
price = float(elem[15]) if len(elem[13]) == 0 else float(elem[13])
|
||||
print(settings_item)
|
||||
item = settings_item[0]
|
||||
filter_attributes = settings_item[1]
|
||||
|
||||
# get item image
|
||||
image_link = re.findall(magnifier_image_regex, target_page_response.text)[0]
|
||||
for attr in filter_attributes:
|
||||
image_regex = re.compile(fr'\"propertyValueDefinitionName\":\"{attr}\",\"propertyValueIdLong\":[0-9]*,\"skuPropertyImageSummPath\":\"(https:\/\/[0-9a-zA-Z.\/\-\_]*)\"')
|
||||
image = re.findall(image_regex, target_page_response.text)
|
||||
if len(image)>0:
|
||||
image_link = image[0]
|
||||
image_link = re.sub(r'jpg_[0-9]+x[0-9]+', "jpg_800x800", image_link) # get bigger image instead of preview
|
||||
break
|
||||
url = 'https://aliexpress.com/item/'+str(item)+'.html'
|
||||
target_page_response = session.get(url)
|
||||
if target_page_response.status_code == 200:
|
||||
content = re.findall(item_regex, target_page_response.text)
|
||||
is_choice = bool(re.search(choice_regex, target_page_response.text))
|
||||
for elem in content:
|
||||
if set(get_attributes(elem[0])) == set(filter_attributes):
|
||||
key = (item,tuple(filter_attributes))
|
||||
discount = 0 if len(elem[7]) == 0 else int(elem[7])
|
||||
price = float(elem[15]) if len(elem[13]) == 0 else float(elem[13])
|
||||
|
||||
# get item image
|
||||
image_link = re.findall(magnifier_image_regex, target_page_response.text)[0]
|
||||
for attr in filter_attributes:
|
||||
image_regex = re.compile(fr'\"propertyValueDefinitionName\":\"{attr}\",\"propertyValueIdLong\":[0-9]*,\"skuPropertyImageSummPath\":\"(https:\/\/[0-9a-zA-Z.\/\-\_]*)\"')
|
||||
image = re.findall(image_regex, target_page_response.text)
|
||||
if len(image)>0:
|
||||
image_link = image[0]
|
||||
image_link = re.sub(r'jpg_[0-9]+x[0-9]+', "jpg_800x800", image_link) # get bigger image instead of preview
|
||||
break
|
||||
|
||||
# skuId, quantity, discount_percentage, price, currency, choice_delivery, image
|
||||
extract[key] = {"skuid": elem[2], "quantity": elem[3], "discount_percentage": discount, "price": price, "currency": elem[12], "choice_delivery": is_choice, "image": image_link}
|
||||
else:
|
||||
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
|
||||
|
||||
# skuId, quantity, discount_percentage, price, currency, choice_delivery, image
|
||||
extract[key] = {"skuid": elem[2], "quantity": elem[3], "discount_percentage": discount, "price": price, "currency": elem[12], "choice_delivery": is_choice, "image": image_link}
|
||||
else:
|
||||
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
|
||||
return extract
|
||||
|
||||
def get_attributes(attributes_raw):
|
||||
'''return a list of attributes from attributes raw string'''
|
||||
# id_regex = re.compile(r'([0-9]*)=')
|
||||
attr_regex = re.compile(r'#([0-9a-zA-Z ]*)')
|
||||
attr_regex = re.compile(r'#([0-9a-zA-Z \.]*)')
|
||||
|
||||
# item_id = re.search(id_regex, attributes_raw).group(1)
|
||||
attributes = re.findall(attr_regex, attributes_raw)
|
||||
@ -86,8 +90,9 @@ def fill_db(items_dict):
|
||||
def update_items():
|
||||
'''add new history entries for items in database'''
|
||||
in_db = get_item_keys()
|
||||
new_entries = check_items(in_db)
|
||||
fill_db(new_entries)
|
||||
for item in in_db:
|
||||
new_entry = check_item(item)
|
||||
fill_db(new_entry)
|
||||
|
||||
|
||||
|
||||
|
@ -16,8 +16,8 @@ def init_db():
|
||||
@app.route('/update')
|
||||
def update_hist():
|
||||
print("update")
|
||||
fill_db(check_items())
|
||||
return 'Hello, World!'
|
||||
update_items()
|
||||
return 'items updated'
|
||||
|
||||
@app.route('/add', methods=['POST'])
|
||||
def add_item():
|
||||
|
@ -1,5 +1,28 @@
|
||||
#!/usr/bin/python
|
||||
from aliexpress import *
|
||||
import requests, re, time, os
|
||||
|
||||
def update():
|
||||
url = "http://127.0.0.1:8080/update"
|
||||
response = requests.get(url)
|
||||
print(response)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("ok")
|
||||
regex_time = re.compile(r'([1-9][0-9]*)([smhd])')
|
||||
formatted_time = os.environ.get('WAIT_TIME')
|
||||
units = {
|
||||
's':1,
|
||||
'm':60,
|
||||
'h':3600,
|
||||
'd':86400
|
||||
}
|
||||
match = re.search(regex_time, formatted_time)
|
||||
if bool(match):
|
||||
raw_time = float(match.group(1))*units[match.group(2)]
|
||||
while True:
|
||||
try:
|
||||
update()
|
||||
except:
|
||||
print("update failed")
|
||||
time.sleep(raw_time)
|
||||
else:
|
||||
print("WAIT_TIME incorrect")
|
||||
|
13
src/db.py
13
src/db.py
@ -45,12 +45,13 @@ def add_history_entry(itemid, skuid, choice, attributes, image, price, currency,
|
||||
|
||||
if not check_exist(itemid, skuid):
|
||||
add_item(itemid, skuid, choice, attributes, image)
|
||||
cursor.execute("""
|
||||
SELECT uuid
|
||||
FROM item
|
||||
WHERE itemid = %s
|
||||
AND skuid = %s
|
||||
""", (itemid, skuid))
|
||||
|
||||
cursor.execute("""
|
||||
SELECT uuid
|
||||
FROM item
|
||||
WHERE itemid = %s
|
||||
AND skuid = %s
|
||||
""", (itemid, skuid))
|
||||
|
||||
uuid = cursor.fetchall()[0]
|
||||
|
||||
|
25
src/main.py
25
src/main.py
@ -3,27 +3,24 @@ import requests, re, json, os, yaml
|
||||
from db import *
|
||||
from aliexpress import *
|
||||
|
||||
def get_conf():
|
||||
'''return settings in settings.yaml file'''
|
||||
with open(os.path.dirname(os.path.realpath(__file__))+"/settings.yaml", 'r') as conf_file:
|
||||
settings = yaml.safe_load(conf_file)
|
||||
return settings
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
settings = get_conf()
|
||||
|
||||
c_l = [("33058732737",["3 M Probe"])]
|
||||
print(check_items(c_l))
|
||||
c_l = [("1005005769229528", ["2 E27 Clip EU"])]
|
||||
print(check_items(c_l))
|
||||
c_l = [("1005004130931033", [])]
|
||||
print(check_items(c_l))
|
||||
c_l = ["33058732737", ["1.2 M Probe"]]
|
||||
print(check_item(c_l))
|
||||
c_l = ["1005005769229528", ["2 E27 Clip EU"]]
|
||||
print(check_item(c_l))
|
||||
c_l = ["1005004130931033", []]
|
||||
print(check_item(c_l))
|
||||
print("########")
|
||||
c_l = ["1005006030884318", ["Natural White", "7W", "E27"]]
|
||||
print(check_item(c_l))
|
||||
|
||||
# print(get_item_keys())
|
||||
|
||||
# initialize(settings["db"])
|
||||
# fill_db(settings["db"], check_items(settings["item"]))
|
||||
# fill_db(settings["db"], check_item(settings["item"]))
|
||||
|
||||
# export_csv(settings["db"])
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user