/******************************************************************************
* Compilation: javac ElectionScraper.java
* Execution: java ElectionScraper year < codes.csv
* Dependencies: StdIn.java Out.java
* https://introcs.cs.princeton.edu/java/data/codes.csv
*
* Screen scrape election result data from http://uselectionatlas.org
* URLs of the form
*
* http://uselectionatlas.org/RESULTS/datagraph.php?year=2004&fips=34
*
* where 2004 is the election year and 34 is the FIPS code of the
* desired state (34 = New Jersey). The file codes.csv is used to
* convert from state abbreviation to FIPS code.
*
* The resulting .php file has the following general format,
* which we rely on to screen scrape the data:
*
* width:100px ...
* County name ...
*
Republican vote count | ...
* Democrat vote count | ...
* Third party vote count | ...
*
* County name ...
* Republican vote count | ...
* Democrat vote count | ...
* Third party vote count | ...
*
* ...
*
*
* This program creates 51 files of the form NJ2004.txt. The first
* line of the file contains the name of the election and the candidates.
* Each succeeding line contains a list of counties and the number of
* corresponding Republican, Democrat, and Third party votes.
* This program also creates a file of the form USA2004.txt with
* the cumulative totals for each state in the given election.
*
* % java ElectionScraper 2004 < csv.txt
*
* % more NJ2004.txt
* 2004 Presidential Election,Bush,Kerry,Other,
* Atlantic,49487,55746,864,
* Bergen,189833,207666,2745,
* Burlington,95936,110411,1609,
* Camden,81427,137765,1741,
* Cape May,28832,21475,455,
* Cumberland,24362,27875,948,
* Essex,83374,203681,2293,
* Gloucester,60033,66835,1096,
* Hudson,60646,127447,1353,
* Hunterdon,39888,26050,742,
* Mercer,56604,91580,1326,
* Middlesex,126492,166628,2685,
* Monmouth,163650,133773,2516,
* Morris,135241,98066,1847,
* Ocean,154204,99839,2263,
* Passaic,75200,94962,1149,
* Salem,15721,13749,311,
* Somerset,72508,66476,1295,
* Sussex,44506,23990,900,
* Union,82517,119372,1498,
* Warren,29542,18044,622,
*
*
* Remarks
* -------
* - In some elections there are 4 candidates (e.g., Clinton, Dole,
* Perot, Other) but this program only uses first three.
*
******************************************************************************/
public class ElectionScraper {
public static void main(String[] args) {
// year of presidential election
int year = Integer.parseInt(args[0]);
// scrape the data
while (!StdIn.isEmpty()) {
String line = StdIn.readLine();
String[] fields = line.split(",");
String state = fields[0];
String usps = fields[1];
String fips = fields[2];
System.err.printf("State = %s, usps = %s, fips = %s\n", state, usps, fips);
// screen scrape from uselectionatlas.org
String url = "http://uselectionatlas.org/RESULTS/datagraph.php";
In in = new In(url + "?year=" + year + "&fips=" + fips);
String input = in.readAll();
// when data is not available
if (input.contains("Warning")) {
System.err.println("Error reading: " + state + " " + usps + " " + fips);
continue;
}
// save results to this file
Out file = new Out(usps + year + ".txt");
// indices into string
int p = 0;
int from, to;
// extract names of 3 candidates
p = input.indexOf("width:100px", p);
// name of Democratic candidate
p = input.indexOf("", p);
to = input.indexOf(" | ", from);
String candidate1 = input.substring(from + 1, to);
// name of Republican candidate
p = to + 1;
from = input.indexOf("", p);
to = input.indexOf(" | ", from);
String candidate2 = input.substring(from + 8, to);
// name of third party candidate
p = to + 1;
from = input.indexOf("
", p);
to = input.indexOf(" | ", from);
String candidate3 = input.substring(from + 8, to);
// check if there is a 3rd-party candidate
if (candidate3.equals(candidate2)) {
StdOut.println("No 3rd party candidate");
candidate3 = "";
}
file.println(year + " US Presidential Election,"
+ candidate2 + "," + candidate1 + "," + candidate3 + ",");
// read in vote tallies for each county in the given state
p = 0;
while (true) {
// county name occurs after width:100px label
p = input.indexOf("width:100px", p);
if (p == -1) break;
// extract county name
from = input.indexOf("", p);
to = input.indexOf("", from);
if (from == -1 || to == -1) {
System.err.println("Check results");
break;
}
String county = input.substring(from + 3, to);
// extract number of votes for democratic candidate
p = to + 1;
p = input.indexOf("", p);
to = input.indexOf(" | ", from);
if (p == -1 || from == -1 || to == -1) {
System.err.println("Check results");
break;
}
String democrat = input.substring(from + 1, to); // number of votes
democrat = democrat.replaceAll(",", ""); // remove any commas
// extract number of votes for republican candidate
p = to + 1;
p = input.indexOf("", p);
to = input.indexOf(" | ", from);
if (p == -1 || from == -1 || to == -1) {
System.err.println("Check results");
file.println(county + "0," + democrat + ",0,");
break;
}
String republican = input.substring(from + 1, to);
republican = republican.replaceAll(",", "");
// extract number of votes for third party candidate, if exists
p = to + 1;
int p1 = input.indexOf("= 0 && p2 <= p1)) {
System.err.println("No third party candidate in " + county + ", " + state);
file.println(county + "," + republican + "," + democrat + ",0,");
}
// third party candidate
else {
p = p1;
from = input.indexOf(">", p);
to = input.indexOf(" | ", from);
if (p == -1 || from == -1 || to == -1) {
System.err.println("Check results");
break;
}
String independent = input.substring(from + 1, to);
independent = independent.replaceAll(",", "");
file.println(county + "," + republican + "," + democrat + "," + independent + ",");
}
}
file.close();
}
}
}