#!/usr/local/bin/ruby
# Google PageRank
# K.Kodama 2007-12-21 bug fix "get" method(check digit)
# K.Kodama 2007-07-10 changed "get" method, because of the format of query/reply is changed.
# K.Kodama 2004-09-01 ruby version
# Original: http://blog.outer-court.com/archive/2004_06_27_index.html#108834386239051706
require "net/http"
module GooglePageRank
M=0x100000000 # modulo for unsigned int 32bit(4byte)
def m1(a,b,c,d)
return (((a+(M-b)+(M-c))%M)^(d%M))%M # mix/power mod
end
def c2i(s="",k=0)
# char codes to int. Little Endian
return ((s[k+3].to_i*0x100+s[k+2].to_i)*0x100+s[k+1].to_i)*0x100+s[k].to_i
end
def mix(a,b,c)
a=a%M; b=b%M; c=c%M
a = m1(a,b,c, c >> 13); b = m1(b,c,a, a << 8); c = m1(c,a,b, b >> 13);
a = m1(a,b,c, c >> 12); b = m1(b,c,a, a << 16); c = m1(c,a,b, b >> 5);
a = m1(a,b,c, c >> 3); b = m1(b,c,a, a << 10); c = m1(c,a,b, b >> 15);
return [a,b,c];
end
def checkSum(url="http://sample/index.html")
a= 0x9E3779B9; b= 0x9E3779B9;
c= 0xE6359A60; # Google Magic Number
iurl="info:"+url; len = iurl.size; k=0;
while (len>=k+12) do
a += c2i(iurl,k) ; b += c2i(iurl,k+4); c += c2i(iurl,k+8); a,b,c = mix(a,b,c);
k=k+12
end
a += c2i(iurl,k); b += c2i(iurl,k+4); c += (c2i(iurl,k+8)<<8)+len; a,b,c = mix(a,b,c);
return c;
end
def get(url="http://sample/index.html",port=80,proxy=nil,proxy_port=nil)
##### http query/reply
# get Google PageRank. return -1 if pagerank if failed.
ch = sprintf("6%u",checkSum(url)) ###### ch ver.2
# ch = sprintf("7%u",checkSum(url)) ###### ch ver.3
# printf("CheckSUM: %s\n", ch);
###### Query ver.2/ch ver.2
#g_path=sprintf("/search?client=navclient-auto&failedip=216.239.51.102;821&ch=%s&q=info:%s", ch, url);
###### Query ver.3/ch ver.2
g_path=sprintf("/search?client=navclient-auto&features=Rank&failedip=216.239.51.102;821&q=info:%s&ch=%s", url, ch);
# e.g.
## q=info:http://www.example.com/&ch=6540747202
## q=info:http://www.hyperposition.com/&ch=6768349016
# http://www.google.co.jp/search?client=navclient-auto&ch=6...&q=info:http://...
# http://www.google.co.jp/search?client=navclient-auto&ch=6...&features=Rank&q=info:http://...
# http://www.google.co.jp/search?client=navclient-auto&features=Rank&q=info:http://...&ch=6...
# http://toolbarqueries.google.co.jp/search?client=navclient-auto&googleip=F;66.102.7.147;1602&features=Rank&q=info:...&ch=7...
# printf("%s\n",g_path)
p=-1 # pagerank
g_server="toolbarqueries.google.com" # toolbarqueries.google.co.jp www.google.co.jp
Net::HTTP::new(g_server, port, proxy, proxy_port).get(g_path){|line|
# printf("%s\n", line)
###### reply ver.2 # format: (rank)
# pos=line.index("") ; if( pos != nil) then p=(line[pos+4,2]).to_i; break; end;
###### reply ver.3 # format : Rank_1:1:(rank) or Rank_1:2:10
pos=line.index("Rank_1:") ;
if( pos != nil) then
n=(line[pos+7,1]).to_i; # digit # K.Kodama 2007-12-21 bug fix
p=(line[pos+9,n]).to_i; # pagerank of n digit
break;
end;
}
return p
end
module_function :m1, :c2i, :mix
module_function :checkSum, :get
end;
if $0 == __FILE__ then
require "getoptlong"
opt_url=""
opt_silent=0;
uri=""
port=80 # 80/HTTP
proxy=nil; proxy_port=nil # without proxy
proxy="10.1.4.1"; proxy_port=8080 # with proxy port 8080/HTTP-Alternate
def usage
puts "usage: foo.rb [-h] [-s] -u http://... "
puts "Options:\n";
puts "-h help\n";
puts "-s silent\n"
end;
arg_parser=GetoptLong.new
arg_parser.set_options(
["-h", "--help" , GetoptLong::NO_ARGUMENT],
["-s", "--silent" , GetoptLong::NO_ARGUMENT],
["-u", "--url", GetoptLong::REQUIRED_ARGUMENT]
)
arg_parser.each{|opt,arg|
begin
case opt
when "-h"; usage; exit;
when "-s"; opt_silent=1;
when "-u"; opt_url=arg;
end;
rescue => err; puts err; break
end
}
#puts "opt_silent= #{opt_silent}"
#puts "opt_url= #{opt_url}"
uri=opt_url
if (opt_silent == 0) then
rank = GooglePageRank.get(opt_url,port,proxy,proxy_port)
if (rank >= 0) then
printf("PageRank: %d : %s\n", rank, opt_url);
else
printf("PageRank: NO_INDEX : %s\n", opt_url);
end
else
printf("%d\n", GooglePageRank.get(opt_url));
end
exit(0);
end