-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaggregate-data
More file actions
executable file
·65 lines (49 loc) · 1.29 KB
/
aggregate-data
File metadata and controls
executable file
·65 lines (49 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env ruby
require 'nokogiri'
require 'csv'
require 'net/http'
require 'micro-optparse'
opts = Parser.new do |p|
p.banner = "Terrible MissHK parser"
p.version = "2019.09.08"
p.option :refresh_cache, "force refresh of cache"
p.option :year, "year", :default => 2017
end.process!
year = opts[:year]
header = []
csv = []
files = []
if opts[:refresh_cache]
(1..15).each do |num|
source = Net::HTTP.get(URI("http://misshk.tvb.com/#{year}/contestant.php?ID=#{num}"))
title = Nokogiri::HTML(source).xpath("//title").children.to_s.split("-")[1].strip
# terrible wide space
file_name = "#{year}-#{title.gsub(/\s+|\t+| /, "-")}.php"
files << file_name
File.open(file_name, 'w') { |file| file.write(source) }
end
end
files = Dir.glob("#{year}-*.php")
files.each do |file|
f = File.open(file)
p = Nokogiri::HTML(f)
hash = {}
div = p.css("div.cname")
div.each do |d|
hash["name"] = div.text
end
spans = p.xpath("//span")
spans.each_with_index do |s,i|
next if i % 2 == 0 || s.text == "Prev"
if spans[i + 1]
hash[spans[i].text.split('︰')[0]] = spans[i + 1].text
end
end
header = hash.keys
csv << hash.values.to_csv
end
csv.insert(0, header.to_csv)
puts csv
f = File.open("contestant-data.csv", "w")
f.write(csv.join(""))
f.close