postgresql connection
This commit is contained in:
parent
5338204851
commit
a25985e936
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
cookies.json
|
cookies.json
|
||||||
test*.txt
|
test*.txt
|
||||||
|
settings.yaml
|
||||||
|
50
aliexpress.py
Normal file
50
aliexpress.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import requests, re, json, os, yaml
|
||||||
|
|
||||||
|
def load_cookies_from_file(file_path):
|
||||||
|
with open(file_path, 'r') as file:
|
||||||
|
cookies_data = json.load(file)
|
||||||
|
|
||||||
|
cookies_dict = {}
|
||||||
|
|
||||||
|
for cookie_data in cookies_data:
|
||||||
|
name_raw = cookie_data.get("Name raw", "")
|
||||||
|
content_raw = cookie_data.get("Content raw", "")
|
||||||
|
cookie_value = f"{content_raw}"
|
||||||
|
|
||||||
|
if len(cookie_value) > 0:
|
||||||
|
cookies_dict[name_raw] = cookie_value
|
||||||
|
|
||||||
|
return cookies_dict
|
||||||
|
|
||||||
|
def check_items(settings_id):
|
||||||
|
|
||||||
|
item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
|
||||||
|
choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"')
|
||||||
|
#shipping_cost_regex = re.compile(r'')
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
cookies_file_path = './cookies.json'
|
||||||
|
cookies = load_cookies_from_file(cookies_file_path)
|
||||||
|
session.cookies.update(cookies)
|
||||||
|
|
||||||
|
extract = dict()
|
||||||
|
|
||||||
|
for item in settings_id:
|
||||||
|
url = 'https://aliexpress.com/item/'+item+'.html'
|
||||||
|
target_page_response = session.get(url)
|
||||||
|
|
||||||
|
if target_page_response.status_code == 200:
|
||||||
|
content = re.findall(item_regex, target_page_response.text)
|
||||||
|
is_choice = bool(re.search(choice_regex, target_page_response.text))
|
||||||
|
for elem in content:
|
||||||
|
key = item+':'+elem[0]
|
||||||
|
discount = 0 if len(elem[5]) == 0 else int(elem[5])
|
||||||
|
price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11])
|
||||||
|
|
||||||
|
# skuId, quantity, discount_percentage, price, currency, choice_delivery
|
||||||
|
extract[key] = (elem[2], elem[3], discount, price, elem[12], is_choice)
|
||||||
|
return extract
|
||||||
|
else:
|
||||||
|
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
|
||||||
|
return None
|
17
db.py
Normal file
17
db.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
def connect_db(db_settings):
|
||||||
|
conn = None
|
||||||
|
print("Connecting to the PostgreSQL database...")
|
||||||
|
try:
|
||||||
|
conn = psycopg2.connect(**db_settings)
|
||||||
|
print("Connection success")
|
||||||
|
except (Exception, psycopg2.DatabaseError) as error:
|
||||||
|
print(error)
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def close_db(conn):
|
||||||
|
if conn is not None:
|
||||||
|
conn.close()
|
||||||
|
print("Connection closed")
|
50
main.py
50
main.py
@ -1,4 +1,7 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
import requests, re, json, os, yaml
|
import requests, re, json, os, yaml
|
||||||
|
from db import *
|
||||||
|
from aliexpress import *
|
||||||
|
|
||||||
def get_conf():
|
def get_conf():
|
||||||
'''return settings in settings.yaml file'''
|
'''return settings in settings.yaml file'''
|
||||||
@ -6,54 +9,13 @@ def get_conf():
|
|||||||
settings = yaml.safe_load(conf_file)
|
settings = yaml.safe_load(conf_file)
|
||||||
return settings
|
return settings
|
||||||
|
|
||||||
def load_cookies_from_file(file_path):
|
|
||||||
with open(file_path, 'r') as file:
|
|
||||||
cookies_data = json.load(file)
|
|
||||||
|
|
||||||
cookies_dict = {}
|
|
||||||
|
|
||||||
for cookie_data in cookies_data:
|
|
||||||
name_raw = cookie_data.get("Name raw", "")
|
|
||||||
content_raw = cookie_data.get("Content raw", "")
|
|
||||||
cookie_value = f"{content_raw}"
|
|
||||||
|
|
||||||
if len(cookie_value) > 0:
|
|
||||||
cookies_dict[name_raw] = cookie_value
|
|
||||||
|
|
||||||
return cookies_dict
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
item_regex = re.compile(r'skuAttr\\\":\\\"(([0-9]*:[0-9]*#[a-zA-Z0-9 ]*;?)*)\\\",\\\"skuId\\\":([0-9]*),\\\"skuIdStr\\\":\\\"[0-9]*\\\",\\\"skuPropIds\\\":\\\"[0-9,]*\\\",\\\"skuVal\\\":{\\\"availQuantity\\\":([0-9]*),(\\\"discount\\\":\\\"([0-9]*)\\\",\\\"discountTips\\\":\\\"-[0-9]*%\\\",)?\\\"hideOriPrice\\\":(false|true),\\\"inventory\\\":([0-9]*),\\\"isActivity\\\":(true|false),\\\"optionalWarrantyPrice\\\":\[\],(\\\"skuActivityAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)},\\\"skuActivityAmountLocal\\\":\\\"[0-9]*,[0-9]*.\|[0-9]*\|[0-9]*\\\",)?\\\"skuAmount\\\":{\\\"currency\\\":\\\"([A-Z]*)\\\",\\\"formatedAmount\\\":\\\"[0-9]*,[0-9]*.\\\",\\\"value\\\":([0-9]*\.[0-9]*)}')
|
|
||||||
choice_regex = re.compile(r'businessModel\\\":\\\"CHOICE\\\"')
|
|
||||||
#shipping_cost_regex = re.compile(r'')
|
|
||||||
|
|
||||||
settings = get_conf()
|
settings = get_conf()
|
||||||
|
|
||||||
session = requests.Session()
|
print(check_items(settings["id"]))
|
||||||
cookies_file_path = './cookies.json'
|
|
||||||
cookies = load_cookies_from_file(cookies_file_path)
|
|
||||||
session.cookies.update(cookies)
|
|
||||||
|
|
||||||
extract = dict()
|
connection = connect_db(settings["db"])
|
||||||
|
close_db(connection)
|
||||||
for item in settings['id']:
|
|
||||||
url = 'https://aliexpress.com/item/'+item+'.html'
|
|
||||||
target_page_response = session.get(url)
|
|
||||||
|
|
||||||
if target_page_response.status_code == 200:
|
|
||||||
content = re.findall(item_regex, target_page_response.text)
|
|
||||||
is_choice = bool(re.search(choice_regex, target_page_response.text))
|
|
||||||
for elem in content:
|
|
||||||
key = item+':'+elem[0]
|
|
||||||
discount = 0 if len(elem[5]) == 0 else int(elem[5])
|
|
||||||
price = float(elem[13]) if len(elem[11]) == 0 else float(elem[11])
|
|
||||||
|
|
||||||
# skuId, quantity, discount_percentage, price, currency, choice_delivery
|
|
||||||
extract[key] = (elem[2], elem[3], discount, price, elem[12], is_choice)
|
|
||||||
#print(elem,"\n\n")
|
|
||||||
print(extract)
|
|
||||||
else:
|
|
||||||
print(f'Failed to fetch target page. Status code: {target_page_response.status_code}')
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user