#!/usr/bin/python

# Vote history converter, from the Filmweb format to the Criticker format.
# Usage: ./filmweb2criticker.py <filmweb-vote-history-url>
#
# Tip: if you have the vote history HTML page stored locally, the file:// type URL will work too.
#
# (C)2009 Lukasz Bolikowski

import datetime
import re
import sys
import urllib

def parse(filmwebVoteHistory):
	now = datetime.datetime.now().strftime("%b %d %Y, %H:%M")
	cvh = '<recentrankings>\n'

	stage = 0
	for line in filmwebVoteHistory.split('\n'):
		line = line.strip()

		# First line
		if stage == 0:
			firstLine = re.match('<a title="[^"]+" href="([^"]+)" class="n"><img [^>]+ src="([^"]+)" [^>]+><br style="[^"]+">.* (\(\d+\))</a>', line)
			if firstLine <> None:
				link = firstLine.group(1).strip()
				img = firstLine.group(2).strip()
				year = firstLine.group(3).strip()
				stage = 1
				continue
		
		# Second line
		if stage == 1:
			secondLine = re.match('<br><span style="[^"]+">(.*)</span>', line)
			if secondLine <> None:
				title = secondLine.group(1).strip()
				stage = 2
				continue

		# Third line
		if stage == 2:
			thirdLine = re.match('<b style="[^"]+">(\d+)</b>/10', line)
			if thirdLine <> None:
				vote = thirdLine.group(1).strip()

				cvh += '  <film>\n'
				cvh += '    <filmid>' + '-1' + '</filmid>\n'
				cvh += '    <filmname>' + title + " " + year + '</filmname>\n'
				cvh += '    <filmlink>' + link + '</filmlink>\n'
				cvh += '    <img/>\n'
				cvh += '    <score>' + vote + '</score>\n'
				cvh += '    <quote/>\n'
				cvh += '    <reviewdate>' + now + '</reviewdate>\n'
				cvh += '    <tier>' + vote + '</tier>\n'
				cvh += '  </film>\n'

				stage = 0
				continue

	cvh += '</recentrankings>\n'
	return cvh

if len(sys.argv) == 1:
	print "Usage %s <filmweb-vote-history-url>" % sys.argv[0]
	sys.exit(1)
filmwebUrl = sys.argv[1]
try:
	filmwebVoteHistory = urllib.urlopen(filmwebUrl).read()
	critickerVoteHistory = parse(filmwebVoteHistory)
	print critickerVoteHistory,
except IOError:
	print "Could not get %s" % filmwebUrl
	sys.exit(2)


