forked from andrusch/python-web-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
30 lines (26 loc) · 1.12 KB
/
Copy pathmain.py
File metadata and controls
30 lines (26 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from bs4 import BeautifulSoup
import re
from lib import *
load_envs()
def parse_jcrew_product_page(html: str):
products =[]
soup = BeautifulSoup(html, 'html.parser')
section = soup.find_all(name="ul", attrs={"data-qaid": "arrProductListItem0ArrayWrapper"})
for tag in section:
for list_item in tag.findAll('li'):
product = list_item.find(name="h2")
price = list_item.find(attrs={"data-qaid": re.compile('PriceWasFormattedPrice')})
if product and price:
print(product.text)
print(price.text)
products.append({"price": price.text, "product": product.text})
else:
print('-----------------------------------------')
print(list_item)
print('-----------------------------------------')
return products
if __name__ == "__main__":
url = "https://www.jcrew.com/all/womens?intcmp=newHP_oneup_1_null_allwomens&om_i=newHP_p1"
html = get_website(url)
for result in parse_jcrew_product_page(html):
save_to_supabase(url, result["product"], result["price"])