|
| 1 | +require 'net/http' |
| 2 | +require 'uri' |
| 3 | + |
| 4 | +# e.g. |
| 5 | +# url = http://t.co/z4t0E1vArh |
| 6 | +# ExpandUrl.expand_url(url) |
| 7 | +# => "http://www.haaretz.com/news/national/israel-s-ag-impels-ministers-to-crack-down-on-exclusion-of-women.premium-1.519917" |
| 8 | +module ExpandUrl |
| 9 | + class ExpansionError < StandardError; end |
| 10 | + module ExpansionErrors |
| 11 | + class BadUrl < ExpansionError; end |
| 12 | + class BadResponse < ExpansionError; end |
| 13 | + end |
| 14 | + HTTP_ERRORS = [Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, |
| 15 | + Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError] |
| 16 | + class BasicResponse < Struct.new(:url, :code); end |
| 17 | + extend self |
| 18 | + |
| 19 | + # raises ExpandUrl::ExpansionError |
| 20 | + def expand_url(url) |
| 21 | + response = get_response(url) |
| 22 | + case response.code |
| 23 | + when '301' |
| 24 | + log "url: #{url}\tresponse: #{response.inspect}" |
| 25 | + expand_url(response['location']) |
| 26 | + when '200' |
| 27 | + log "url: #{url}\tresponse: #{response.inspect}" |
| 28 | + url |
| 29 | + else |
| 30 | + log "url: #{url}\tresponse: #{response.inspect}" |
| 31 | + expand_url(response['location']) |
| 32 | + end |
| 33 | + end |
| 34 | + |
| 35 | + def get_response(url) |
| 36 | + uri = url_to_uri(url) |
| 37 | + Net::HTTP.get_response(uri) |
| 38 | + rescue EOFError => e |
| 39 | + BasicResponse.new(url, '200') |
| 40 | + rescue *HTTP_ERRORS => e |
| 41 | + log url.inspect + e.inspect |
| 42 | + raise ExpansionErrors::BadResponse.new(e) |
| 43 | + end |
| 44 | + |
| 45 | + def url_to_uri(url) |
| 46 | + begin |
| 47 | + uri = URI.parse(url) |
| 48 | + rescue URI::InvalidURIError, SocketError => e |
| 49 | + raise ExpansionErrors::BadUrl.new(e) |
| 50 | + end |
| 51 | + end |
| 52 | + |
| 53 | + def log(msg) |
| 54 | + STDOUT.puts "#{msg}\t#{caller[1]}" if (ENV['debug'] == 'true') |
| 55 | + end |
| 56 | + |
| 57 | +end |
0 commit comments