-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtml_to_csv.py
More file actions
97 lines (83 loc) · 4.05 KB
/
Copy pathhtml_to_csv.py
File metadata and controls
97 lines (83 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from bs4 import BeautifulSoup
import csv
from datetime import datetime
import urllib.parse
Init_ID = 187
# 读取 Edge 导出的 HTML 收藏夹文件
with open('bookmarks.html', 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f.read(), 'html.parser')
# 获取当前系统时间
current_time = datetime.now()
current_time_str = current_time.strftime('%Y-%m-%d %H:%M:%S')
current_time_gmt_str = current_time.strftime('%Y-%m-%d %H:%M:%S')
# 先获取所有链接的数量
links = list(soup.find_all('a'))
total_links = len(links)
# 准备写入 CSV 文件
with open('bookmarks.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# 写入表头,对应 wp_posts 表的字段
writer.writerow([
'ID', 'post_author', 'post_date', 'post_date_gmt', 'post_content',
'post_title', '_sites_link', '_sites_sescribe', '_sites_order', 'post_excerpt', 'post_status', 'comment_status',
'ping_status', 'post_password', 'post_name', 'to_ping', 'pinged',
'post_modified', 'post_modified_gmt', 'post_content_filtered',
'post_parent', 'guid', 'menu_order', 'post_type', 'post_mime_type',
'comment_count'
])
# 计数器,用于生成ID
post_id = Init_ID
# 提取所有链接并转换为 wp_posts 格式
for a in soup.find_all('a'):
title = a.text.strip()
_sites_link = a.get('href', '')
# 获取添加日期(Unix时间戳转换为日期时间)
add_date = a.get('add_date', '0')
if add_date:
try:
post_date = datetime.fromtimestamp(int(add_date))
post_date_str = post_date.strftime('%Y-%m-%d %H:%M:%S')
post_date_gmt_str = post_date.strftime('%Y-%m-%d %H:%M:%S')
except:
post_date_str = current_time_str # 使用当前系统时间
post_date_gmt_str = current_time_gmt_str # 使用当前系统时间
else:
post_date_str = current_time_str # 使用当前系统时间
post_date_gmt_str = current_time_gmt_str # 使用当前系统时间
# 生成 post_name(URL编码的标题)
post_name = urllib.parse.quote(title)
# 计算倒序的排序值(第一个链接排序值最大)
_sites_order = total_links + 2*Init_ID - post_id - 1
# 构建一行数据
row = [
"", # ID
1, # post_author (默认1)
post_date_str, # post_date
post_date_gmt_str, # post_date_gmt
'', # post_content
title, # post_title
_sites_link, # _sites_link
'', # _sites_sescribe
_sites_order, # _sites_order
'', # post_excerpt
'publish', # post_status
'open', # comment_status
'', # ping_status
'', # post_password
post_name, # post_name
'', # to_ping
'', # pinged
post_date_str, # post_modified
post_date_gmt_str, # post_modified_gmt
'', # post_content_filtered
0, # post_parent
f'http://192.168.0.180:8088/sites/{post_id}.html', # guid
0, # menu_order
'sites', # post_type
'', # post_mime_type
0 # comment_count
]
writer.writerow(row)
post_id += 1
print("转换完成,已生成 bookmarks.csv")
print(f"系统时间: {current_time_str}")