#!/usr/bin/env ruby
require 'net/http'
require 'rubygems'
require 'nokogiri'

class Quote
	#acces to readeable attributes
	attr_accessor :id, :text, :date, :network

	#initialize from variables
	def initialize(id, date, network, text)
		@id = id
		@date = date
		@network = network
		@text = text
	end
	#initialize from node set
	def initialize(nodes)
		#find out the ID
		@id = Integer(nodes.xpath('.//span[@class="id"]/a').text[(1..-1)])
		#extract the date as string
		@date = nodes.xpath('.//span[@class="date"]').text
		#extract the network
		@network = nodes.xpath('.//span[@class="network"]/a').text

		#extract the quote text
		lines = nodes.xpath('.//div[@class="zitat"]/span[@class="quote_zeile"]')
		@text = ""
		lines.each do |line|
			@text = @text + line.text.strip+"\n"
		end
		@text.rstrip!

	end
	#form quote to string
	def to_s
		"#"+String(@id)+"\n"+@date+"\n"+network+"\n"+@text
	end
end

quotes = []

site = "german-bash.org"

path = "/action/browse/page/"

npage = 1
maxpage = 1


while npage<=maxpage do
	#Request this page
	print "Looking up: "+path+String(npage)+"\n"
	page = Net::HTTP.get(site, path+String(npage));

	#Parse this page
	parsed = Nokogiri::HTML(page)
	
	#find out how many pages there are
	maxpage = Integer(parsed.xpath('//select[@name="page"]/option')[-1].text)
	
	
	#Parse each quote
	parsed.xpath('//div[@class="quote"]').each do |node|
		#if id is empty, this is advertising, not a quote
		id = node.xpath('.//span[@class="id"]/a')
		if id.length != 0 then
			quotes.push Quote.new(node)
		end
	end

	
	
	
	npage = npage+1
end

out = open("german-bash", "w")

quotes.each do |q|
	out.puts q.to_s+"\n%"
end
