#! /usr/bin/python

"""
 Who:    Keshi Dai
 What:   PageRank.py
 When:   06/20/09
 Usage: PageRank.py [-t] [-i iteration_num] inlink_file > output
"""

import sys

if len(sys.argv)==1:
    print >> sys.stderr, "Usage: PageRank.py [-t] [-i iteration_num] inlink_file > output\n"
    sys.exit()

if sys.argv[1] == "-t":
    teleport = True
    if sys.argv[2] == "-i":
        iternum = int(sys.argv[3])
        inlink_file_name = sys.argv[4]
    else:
        iternum = 10
        inlink_file_name = sys.argv[2]
else:
    teleport = False
    inlink_file_name = sys.argv[1]

#damping factor    
d=0.85

inlink_file = open(inlink_file_name, 'r')
outlink_count = {}
inlinks = {}
oldpagerank = {}
newpagerank = {}
docids = {}
dangling_docs = {}

print >> sys.stderr, "Processing inlink file", inlink_file_name, "....."

inlink_docnum = 0
outlink_docnum = 0
docnum = 0

for line in inlink_file:
    line = line.strip();
    nodes = line.split(" ");
    inlink_docnum += 1
    inlinks[nodes[0]] = [];
    inlinks[nodes[0]] = tuple(nodes[1:]);
    if not docids.has_key(nodes[0]):
        docids[nodes[0]] = 1
        docnum += 1
    for node in nodes[1:]:
        if outlink_count.has_key(node):
            outlink_count[node] += 1
        else:
            outlink_count[node] = 1
            outlink_docnum += 1
        if not docids.has_key(node):
            docids[node] = 1;
            docnum += 1
    
print >> sys.stderr, "Number of Documents:", docnum
print >> sys.stderr, "Number of Documents with in-links:", inlink_docnum
print >> sys.stderr, "Number of Documents with out-links:", outlink_docnum

for key in docids.keys():
    if not inlinks.has_key(key):
        inlinks[key] = ()
        
    oldpagerank[key] = 1.0/docnum
    if not outlink_count.has_key(key):
        dangling_docs[key] = 1
print >> sys.stderr, "Number of Dangling Documents:", len(dangling_docs)

while iternum > 0:
    if teleport:
        dp = 0
        for key in dangling_docs.keys():
            dp += d*oldpagerank[key]/docnum
    
    for key in oldpagerank.keys():
        if teleport:
            newpagerank[key] = (1-d)/docnum + dp
        else:
            newpagerank[key] = (1-d)/docnum
        for inlink in inlinks[key]:
            if outlink_count.has_key(inlink):
                newpagerank[key] += d*oldpagerank[inlink]/outlink_count[inlink]

    for key in newpagerank.keys():
        oldpagerank[key] = newpagerank[key]
    iternum -= 1
    
    print >> sys.stderr, "PageRank iteration remaining", iternum
for key in newpagerank.keys():
    print key, newpagerank[key]





            
            


            



        
 
