-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebDataToExcel.py
More file actions
30 lines (22 loc) · 785 Bytes
/
Copy pathWebDataToExcel.py
File metadata and controls
30 lines (22 loc) · 785 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = "https://www.geeksforgeeks.org/fundamentals-of-algorithms/"
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
headings = []
content = []
for heading in soup.find_all('h2'):
heading_text = heading.text.strip()
next_paragraph = heading.find_next('p')
headings.append(heading_text)
if next_paragraph:
content.append(next_paragraph.text.strip())
else:
content.append("")
df = pd.DataFrame({'Heading': headings, 'Content': content})
df.to_excel("scramp.xlsx", sheet_name="Headings and Content", index=False)
print("Data writed")
else:
print("Error")