/*************************************************************************
 *  Compilation:  javac Scrape.java
 *  Execution:    java Scrape
 *  Dependencies: In.java
 *
 *************************************************************************/

import java.util.regex.Pattern;
import java.util.regex.Matcher;


public class Scrape { 

    public static void main(String[] args) { 
        In codes = new In("codes.txt");
        String data = codes.readAll();
        String re = "\\t([A-Z][A-Z])\\t";
        data = "\tVT\t";
        Pattern pattern = Pattern.compile(re);
        Matcher matcher = pattern.matcher(data);
        while(matcher.find()) {
            String state = matcher.group(1);
            System.out.println("'" + state + "'");
            String url = "http://www.usatoday.com/news/politicselections/vote2004/PresidentialByCounty.aspx?oi=P&rti=G&tf=l&sp=";
            In in = new In(url + state);
            String input = in.readAll();
            Out file = new Out(state + ".txt");

            int p = 1 + input.indexOf("width=\"153\"", 0);
            int from, to;

            int bushTotal = 0, kerryTotal = 0;

            while (true) {

                // county
                p    = input.indexOf("width=\"153\"", p);
                if (p == -1) break;

                from = input.indexOf("<b>", p);
                to   = input.indexOf("</b>", from);
                String county = input.substring(from + 3, to);

                // total precincts
                p = to + 1;
                p    = input.indexOf("width=\"65\"", p);
                from = input.indexOf(">", p);
                to   = input.indexOf("</td>", from);
                String precincts = input.substring(from + 1, to);

                // precincts reporting
                p = to + 1; 
                p    = input.indexOf("width=\"70\"", p);
                from = input.indexOf(">", p);
                to   = input.indexOf("</td>", from);
                String reporting = input.substring(from + 1, to);

                // bush
                p = to + 1;
                p    = input.indexOf("width=\"60\"", p);
                from = input.indexOf(">", p);
                to   = input.indexOf("</td>", from);
                String bush = input.substring(from + 1, to);
                bush = bush.replaceAll(",", "");

                // kerry
                p    = to + 1;
                p    = input.indexOf("width=\"60\"", p);
                from = input.indexOf(">", p);
                to   = input.indexOf("</td>", from);
                String kerry = input.substring(from + 1, to);
                kerry = kerry.replaceAll(",", "");

                // nader
                p    = to + 1;
                p    = input.indexOf("width=\"60\"", p);
                from = input.indexOf(">", p);
                to   = input.indexOf("</td>", from);
                String nader = input.substring(from + 1, to);
                nader = nader.replaceAll(",", "");

                bushTotal  += Integer.parseInt(bush);
                kerryTotal += Integer.parseInt(kerry);
                file.println(county + "," + bush + "," + kerry + "," + nader);
            }
            file.println(state + "," + bushTotal + "," + kerryTotal);
            file.close();
        }
    }

}

