-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathRakefile
More file actions
71 lines (56 loc) · 2.15 KB
/
Rakefile
File metadata and controls
71 lines (56 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
RAW_DUMP_PATH = "data/pages-articles.xml.bz2"
file "data/links-raw.txt", :dump_path do |t,args|
args.with_defaults(:dump_path => RAW_DUMP_PATH)
dump = args[:dump_path]
raise "#{dump} must exist" unless File.exist?(dump)
sh "bzip2 -dc \"#{dump}\" | ruby gen/dumplinks.rb data/links-raw.txt data/redirects-raw.txt"
end
file "data/links-filt.txt" => ["data/links-raw.txt"] do
sh "grep -Ev \"^(File|Template|Help|Draft):\" data/links-raw.txt > data/links-filt.txt"
end
file "data/titles.txt" => ["data/links-filt.txt"] do
sh "cut -d'|' -f1 data/links-filt.txt > data/titles.txt"
end
file "data/redirects.txt" => ["data/links-raw.txt","data/titles.txt"] do
ruby "gen/filtredirs.rb data/titles.txt data/redirects-raw.txt data/redirects.txt"
end
file "data/links.txt" => ["data/links-filt.txt","data/redirects.txt","data/titles.txt"] do
ruby "gen/proclinks.rb data/titles.txt data/redirects.txt data/links-filt.txt data/links.txt"
end
file "data/xindex.db" => ["data/links.txt"] do
ruby "gen/sqlindex.rb data/links.txt data/xindex.db"
end
file "data/index.bin" => ["data/links.txt","data/xindex.db"] do
ruby "gen/binindex.rb data/links.txt data/xindex.db data/index.bin"
end
file "data/indexbi.bin" => ["data/index.bin"] do
if system("which","nim")
sh "nim c -d:release gen/doublelink.nim"
sh "./gen/doublelink"
else
ruby "gen/doublelink.rb data/index.bin data/indexbi.bin"
end
end
directory "bin"
file "bin/strong_conn" => ["bin"] do
sh "rustc -O -o bin/strong_conn analyze/strong_conn.rs"
end
task :verify => "data/index.bin" do
ruby "analyze/verify.rb data/index.bin data/xindex.db"
end
task :inspect, :page do |t, args|
ruby "analyze/inspect.rb data/indexbi.bin data/xindex.db \"#{args[:page]}\""
end
task :link_stats do
ruby "analyze/link_stats.rb data/links.txt data/titles.txt"
end
task :invalid_links do
sh "ruby analyze/invalid_links.rb data/links.txt data/titles.txt 1000 > data/invalid-links.txt"
end
task :strong_conn => ["bin/strong_conn"] do
sh "./bin/strong_conn data/index.bin"
end
task :nocase => ["data/xindex.db"] do
cp "data/xindex.db", "data/xindex-nocase.db"
sh "sqlite3 data/xindex-nocase.db < gen/nocase.sql"
end