-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwgetpro.py
More file actions
47 lines (37 loc) · 1.44 KB
/
wgetpro.py
File metadata and controls
47 lines (37 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import requests
import bs4
from urllib.parse import unquote
"""
Download files from a url with this python script
Made because don't want wget
"""
directory = "http://151.80.40.155/tlmc/%5BC.H.S%5D/2017.05.07%20%5BCHS-0029%5D%20t%26T.%20%28tpz%20And%20TOUHOU.%29%20from%202005%20%5B%E4%BE%8B%E5%A4%A7%E7%A5%AD14%5D/"
def downloader(directory:str):
page = requests.get(directory, stream=True)
# now, recursively download all files in the directory
soup = bs4.BeautifulSoup(page.text, "html.parser")
for link in soup.find_all('a'):
link : bs4.element.Tag
hr = link.get('href')
if hr == '../':
continue
# hr is url encoded, so we need to decode it, all the way, not jusr %20
hr_unquoted = unquote(hr)
# download the file
np = requests.get(directory + hr, stream=True)
if np.status_code == 200:
print("Downloading: " + hr)
else:
print("Error: " + str(np.status_code))
continue
# make down directory if it doesn't exist
if not os.path.exists("down"):
os.makedirs("down")
with open("down/" + hr_unquoted, 'wb') as f:
for chunk in np.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
print("Done: " + hr_unquoted)
downloader(directory)