specific dir for python app
This commit is contained in:
88
src/aliexpress.py
Normal file
88
src/aliexpress.py
Normal file
@ -0,0 +1,88 @@
|
||||
#!/usr/bin/python
|
||||
import requests, re, json, os, yaml
|
||||
from db import *
|
||||
|
||||
def load_cookies_from_file(file_path):
|
||||
'''load cookies from a file and return a dict usable in a request session'''
|
||||
with open(file_path, 'r') as file:
|
||||
cookies_data = json.load(file)
|
||||
|
||||
cookies_dict = {}
|
||||
|
||||
for cookie_data in cookies_data:
|
||||
name_raw = cookie_data.get("Name raw", "")
|
||||
content_raw = cookie_data.get("Content raw", "")
|
||||
cookie_value = f"{content_raw}"
|
||||
|
||||
if len(cookie_value) > 0:
|
||||
cookies_dict[name_raw] = cookie_value
|
||||
|
||||
return cookies_dict
|
||||
|
||||
def check_items(settings_items):
|
||||
'''
|
||||
return a dict with items data extracted from aliexpress.
|
||||
a file containing aliexpress login token cookies has to be provided in ./cookies.json (obtained with cookie-quick-manager https://github.com/ysard/cookie-quick-manager/) for accurate prices (no "welcome discount")
|
||||
extracted data:
|
||||
skuId, quantity, discount_percentage, price, currency, choice_delivery, image
|
||||
parameter settings_item is a list of tables (string(itemid), attributes)
|
||||
itemid is in aliexpress link to item page. attributes is a list of string. Each string is a choice value (for example which length, or which colour) if multiple items are on the same page, only one by category, order doesn't matter.
|
||||
'''
|
||||
|
||||
item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
|
||||
choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"')
|
||||
magnifier_image_regex = re.compile(r'<meta property=\"og:image\" content=\"(https:[0-9a-zA-Z\/\-\_.]*)\"\/>')
|
||||
|
||||
session = requests.Session()
|
||||
cookies_file_path = './cookies.json'
|
||||
cookies = load_cookies_from_file(cookies_file_path)
|
||||
session.cookies.update(cookies)
|
||||
|
||||
extract = dict()
|
||||
|
||||
for (item,filter_attributes) in settings_items:
|
||||
url = 'https://aliexpress.com/item/'+item+'.html'
|
||||
target_page_response = session.get(url)
|
||||
if target_page_response.status_code == 200:
|
||||
content = re.findall(item_regex, target_page_response.text)
|
||||
is_choice = bool(re.search(choice_regex, target_page_response.text))
|
||||
for elem in content:
|
||||
if set(get_attributes(elem[0])) == set(filter_attributes):
|
||||
key = (item,tuple(filter_attributes))
|
||||
discount = 0 if len(elem[5]) == 0 else int(elem[5])
|
||||
price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11])
|
||||
|
||||
# get item image
|
||||
image_link = re.findall(magnifier_image_regex, target_page_response.text)[0]
|
||||
for attr in filter_attributes:
|
||||
image_regex = re.compile(fr'\"propertyValueDefinitionName\":\"{attr}\",\"propertyValueIdLong\":[0-9]*,\"skuPropertyImageSummPath\":\"(https:\/\/[0-9a-zA-Z.\/\-\_]*)\"')
|
||||
image = re.findall(image_regex, target_page_response.text)
|
||||
if len(image)>0:
|
||||
image_link = image[0]
|
||||
image_link = re.sub(r'jpg_[0-9]+x[0-9]+', "jpg_800x800", image_link) # get bigger image instead of preview
|
||||
break
|
||||
|
||||
# skuId, quantity, discount_percentage, price, currency, choice_delivery, image
|
||||
extract[key] = {"skuid": elem[2], "quantity": elem[3], "discount_percentage": discount, "price": price, "currency": elem[12], "choice_delivery": is_choice, "image": image_link}
|
||||
else:
|
||||
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
|
||||
return extract
|
||||
|
||||
def get_attributes(attributes_raw):
|
||||
'''return a list of attributes from attributes raw string'''
|
||||
# id_regex = re.compile(r'([0-9]*)=')
|
||||
attr_regex = re.compile(r'#([0-9a-zA-Z ]*)')
|
||||
|
||||
# item_id = re.search(id_regex, attributes_raw).group(1)
|
||||
attributes = re.findall(attr_regex, attributes_raw)
|
||||
|
||||
return attributes
|
||||
|
||||
def fill_db(db_settings, items_dict):
|
||||
'''add new history entries in database with data extracted in item_dict'''
|
||||
for key,value in items_dict.items():
|
||||
add_history_entry(db_settings, key[0], value["skuid"], value["choice_delivery"], list(key[1]), value["image"], value["price"], value["currency"], value["quantity"], value["discount_percentage"])
|
||||
|
||||
|
||||
|
||||
|
33
src/app.py
Normal file
33
src/app.py
Normal file
@ -0,0 +1,33 @@
|
||||
from flask import Flask, request, jsonify
|
||||
import requests, re, json, os, yaml
|
||||
from db import *
|
||||
from aliexpress import *
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
def get_conf():
|
||||
'''return settings in settings.yaml file'''
|
||||
with open(os.path.dirname(os.path.realpath(__file__))+"/settings.yaml", 'r') as conf_file:
|
||||
settings = yaml.safe_load(conf_file)
|
||||
return settings
|
||||
|
||||
@app.route('/')
|
||||
def hello_world():
|
||||
return 'Hello, World!'
|
||||
|
||||
@app.route('/datahistory',methods = ['POST', 'GET'])
|
||||
def data_history_request():
|
||||
if request.method == 'GET':
|
||||
print("fetching data history")
|
||||
settings = get_conf()
|
||||
return jsonify(get_history(settings["db"]))
|
||||
|
||||
@app.route('/dataitem',methods = ['POST', 'GET'])
|
||||
def data_item_request():
|
||||
if request.method == 'GET':
|
||||
print("fetching data item")
|
||||
settings = get_conf()
|
||||
return jsonify(get_item(settings["db"]))
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug = True)
|
154
src/db.py
Normal file
154
src/db.py
Normal file
@ -0,0 +1,154 @@
|
||||
#!/usr/bin/python
|
||||
import psycopg2
|
||||
import csv
|
||||
import os
|
||||
|
||||
def connect_db(db_settings):
|
||||
'''open and return a connection to the database'''
|
||||
conn = None
|
||||
print("Connecting to the PostgreSQL database...")
|
||||
try:
|
||||
conn = psycopg2.connect(**db_settings)
|
||||
print("Connection success")
|
||||
except (Exception, psycopg2.DatabaseError) as error:
|
||||
print(error)
|
||||
return conn
|
||||
|
||||
def add_item(db_settings, itemid, skuid, choice, attributes, image):
|
||||
'''insert a new item in the database'''
|
||||
connection = connect_db(db_settings)
|
||||
cursor = connection.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO item (uuid, itemid, skuid, choice, attributes, image)
|
||||
VALUES (nextval('uuid_sequence'), %s, %s, %s, %s, %s)
|
||||
""", (itemid, skuid, choice, attributes, image))
|
||||
connection.commit()
|
||||
connection.close()
|
||||
|
||||
def add_history_entry(db_settings, itemid, skuid, choice, attributes, image, price, currency, quantity, discount_percentage):
|
||||
'''Add a new history entry for an item in the database. If item isn't in database yet, add it.'''
|
||||
connection = connect_db(db_settings)
|
||||
cursor = connection.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT uuid
|
||||
FROM item
|
||||
WHERE itemid = %s
|
||||
AND skuid = %s
|
||||
""", (itemid, skuid))
|
||||
|
||||
if cursor.rowcount == 0:
|
||||
add_item(db_settings, itemid, skuid, choice, attributes, image)
|
||||
cursor.execute("""
|
||||
SELECT uuid
|
||||
FROM item
|
||||
WHERE itemid = %s
|
||||
AND skuid = %s
|
||||
""", (itemid, skuid))
|
||||
|
||||
uuid = cursor.fetchall()[0]
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO history (uuid, price, currency, quantity, discount_percentage, h_timestamp)
|
||||
VALUES (%s, %s, %s, %s, %s, (SELECT LOCALTIMESTAMP))
|
||||
""", (uuid, price, currency, quantity, discount_percentage))
|
||||
connection.commit()
|
||||
connection.close()
|
||||
|
||||
def get_history(db_settings):
|
||||
'''return history data from database'''
|
||||
connection = connect_db(db_settings)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("""
|
||||
SELECT uuid, quantity, discount_percentage, price, currency, h_timestamp
|
||||
FROM history
|
||||
""")
|
||||
results = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return results
|
||||
|
||||
def get_item(db_settings):
|
||||
'''return items data from database'''
|
||||
connection = connect_db(db_settings)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("""
|
||||
SELECT uuid, itemid, skuid, choice, attributes, image
|
||||
FROM item
|
||||
""")
|
||||
results = cursor.fetchall()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
return results
|
||||
|
||||
def export_csv(db_settings):
|
||||
'''join item and history data from database and export it in ./output.csv'''
|
||||
connection = connect_db(db_settings)
|
||||
cursor = connection.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT i.uuid, i.itemid, i.skuid, i.choice, i.attributes, i.image, h.quantity, h.discount_percentage, h.price, h.currency, h.h_timestamp
|
||||
FROM item i, history h
|
||||
WHERE i.uuid = h.uuid
|
||||
""")
|
||||
results = cursor.fetchall()
|
||||
with open(os.path.dirname(os.path.realpath(__file__))+"/output.csv", 'w') as csv_file:
|
||||
# Create a CSV writer
|
||||
writer = csv.writer(csv_file)
|
||||
# write the column names
|
||||
writer.writerow([col[0] for col in cursor.description])
|
||||
# write the query results
|
||||
writer.writerows(results)
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
|
||||
def initialize(db_settings):
|
||||
'''Create tables and sequence in database. Drop them first if they already exist.'''
|
||||
connection = connect_db(db_settings)
|
||||
cursor = connection.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
DROP TABLE IF EXISTS history
|
||||
""")
|
||||
cursor.execute("""
|
||||
DROP TABLE IF EXISTS item
|
||||
""")
|
||||
cursor.execute("""
|
||||
DROP SEQUENCE IF EXISTS uuid_sequence
|
||||
""")
|
||||
cursor.execute("""
|
||||
CREATE SEQUENCE uuid_sequence
|
||||
INCREMENT BY 1
|
||||
START WITH 1
|
||||
""")
|
||||
cursor.execute("""
|
||||
CREATE TABLE item
|
||||
(
|
||||
uuid int,
|
||||
itemid bigint,
|
||||
skuid bigint,
|
||||
choice boolean,
|
||||
attributes text[],
|
||||
image text,
|
||||
primary key (uuid)
|
||||
)
|
||||
""")
|
||||
cursor.execute("""
|
||||
CREATE TABLE history
|
||||
(
|
||||
uuid int,
|
||||
quantity integer,
|
||||
discount_percentage numeric(2),
|
||||
price money,
|
||||
currency varchar(4),
|
||||
h_timestamp timestamp,
|
||||
foreign key (uuid) references item(uuid),
|
||||
primary key (uuid, h_timestamp)
|
||||
)
|
||||
""")
|
||||
|
||||
connection.commit()
|
||||
connection.close()
|
||||
print("Database initialized")
|
17
src/gunicorn_config.py
Normal file
17
src/gunicorn_config.py
Normal file
@ -0,0 +1,17 @@
|
||||
import os
|
||||
|
||||
|
||||
|
||||
workers = int(os.environ.get('GUNICORN_PROCESSES', '2'))
|
||||
|
||||
threads = int(os.environ.get('GUNICORN_THREADS', '4'))
|
||||
|
||||
# timeout = int(os.environ.get('GUNICORN_TIMEOUT', '120'))
|
||||
|
||||
bind = os.environ.get('GUNICORN_BIND', '0.0.0.0:8080')
|
||||
|
||||
|
||||
|
||||
forwarded_allow_ips = '*'
|
||||
|
||||
secure_scheme_headers = { 'X-Forwarded-Proto': 'https' }
|
21
src/main.py
Normal file
21
src/main.py
Normal file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/python
|
||||
import requests, re, json, os, yaml
|
||||
from db import *
|
||||
from aliexpress import *
|
||||
|
||||
def get_conf():
|
||||
'''return settings in settings.yaml file'''
|
||||
with open(os.path.dirname(os.path.realpath(__file__))+"/settings.yaml", 'r') as conf_file:
|
||||
settings = yaml.safe_load(conf_file)
|
||||
return settings
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
settings = get_conf()
|
||||
|
||||
# initialize(settings["db"])
|
||||
fill_db(settings["db"], check_items(settings["item"]))
|
||||
|
||||
export_csv(settings["db"])
|
||||
|
Reference in New Issue
Block a user