-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsite_survey.py
More file actions
executable file
·106 lines (78 loc) · 4.06 KB
/
site_survey.py
File metadata and controls
executable file
·106 lines (78 loc) · 4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
"""Produces a declaration of the contents of my website, as described at
http://patrickbrianmooney.nfshost.com/~patrick/feeds/geographical-surveys/
This script is copyright 2017-20 by Patrick Mooney. It is licensed under the GNU
GPL, either version 3 or (at your option) any later version. See the file
LICENSE.md for details.
"""
import bz2
import datetime
import html
import requests
import subprocess
import time
import uuid
import file_utils as fu # https://github.com/patrick-brian-mooney/personal-library/blob/master/
local_website_root = '/website-root'
description_file = '/home/patrick/Documents/programming/python-library/site_survey_description.txt'
survey_directory = '/~patrick/feeds/geographical-surveys/'
remote_website_root = 'http://patrickbrianmooney.nfshost.com'
IA_save_prefix = 'http://web.archive.org/save/'
skip_strings_list = ['.git', '.thumbnails', 'IF/']
def tz_offset():
return abs(int(round((datetime.datetime.now() - datetime.datetime.utcnow()).total_seconds())) / 3600)
def IA_archive(files_list):
"""Get the Internet Archive to save all of the files in FILES_LIST."""
for which_page in files_list: # Request a URL that causes the Internet Archive to archive the page in question
print('INFO: archiving %s' % which_page)
req = requests.get(IA_save_prefix + which_page)
for the_item in req.iter_content(chunk_size=100000): pass # read the file to make the IArchive archive it.
time.sleep(3)
def GPG_sign_file(which_file):
subprocess.check_output(['gpg --detach-sign %s' % which_file ], shell=True)
def produce_feed(files_list):
"""Produce the Atom XML feed."""
short_date = datetime.date.today().strftime('%d %B %Y')
ISO8601_date = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S-0' + str(int(round(tz_offset()))) + ':00' )
two_digit_year = datetime.datetime.now().strftime('%y')
eight_digit_date = datetime.date.today().strftime('%Y%m%d')
the_feed = """<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Geographical Site Survey, %s: Patrick Brian Mooney's site</title>
<link href="http://patrickbrianmooney.nfshost.com/~patrick/" />
<updated>%s</updated>
<author>
<name>Patrick Brian Mooney</name>
<uri>http://patrickbrianmooney.nfshost.com/~patrick/</uri>
</author>
<link rel="self" href="http://patrickbrianmooney.nfshost.com/~patrick/feeds/geographical-surveys/%s.xml.bz2" />
<generator uri="https://github.com/patrick-brian-mooney/personal-library/blob/master/site_survey.py" version="0.1">Patrick's geographical site survey script</generator>
<icon>http://patrickbrianmooney.nfshost.com/~patrick/icons/gear.png</icon>
<rights>© 2015–%s Patrick Brian Mooney</rights>
<id>urn:uuid:%s</id>
<subtitle>A listing of all files on Patrick Brian Mooney's personal web site as of %s; also, a summary of site contents.</subtitle>
<entry>
<title>Site Survey</title>
<id>urn:uuid:%s</id>
<updated>%s</updated>
""" % (short_date, ISO8601_date, eight_digit_date, two_digit_year, uuid.uuid4(), short_date, uuid.uuid4(), ISO8601_date)
for the_file in files_list:
the_feed = the_feed + ' <link rel="related self" href="%s" />\n' % requests.utils.quote(the_file)
the_feed = the_feed + """ <content type="html">
""" + html.escape(open(description_file).read())
the_feed = the_feed + """
</content>
</entry>
</feed>
"""
bzipped_feed = bz2.compress(the_feed.encode(), compresslevel=9)
feed_location = '%s/%s.xml.bz2' % (survey_directory, eight_digit_date)
with open(feed_location, 'wb') as the_atom_file:
the_atom_file.write(bzipped_feed)
GPG_sign_file(feed_location)
if __name__ == "__main__":
local_files = fu.get_files_list(local_website_root, skip_strings_list)
remote_files = [ the_item.replace(local_website_root, remote_website_root) for the_item in local_files ]
produce_feed(remote_files)
IA_archive(remote_files)
print("\n\n\nWE'RE DONE! Don't forget to update the site survey web page and survey list feed.")