/****************************************************************************** * Compilation: javac ElectionScraper.java * Execution: java ElectionScraper year < codes.csv * Dependencies: StdIn.java Out.java * https://introcs.cs.princeton.edu/java/data/codes.csv * * Screen scrape election result data from http://uselectionatlas.org * URLs of the form * * http://uselectionatlas.org/RESULTS/datagraph.php?year=2004&fips=34 * * where 2004 is the election year and 34 is the FIPS code of the * desired state (34 = New Jersey). The file codes.csv is used to * convert from state abbreviation to FIPS code. * * The resulting .php file has the following general format, * which we rely on to screen scrape the data: * * width:100px ... * County name ... * Republican vote count ... * Democrat vote count ... * Third party vote count ... * * County name ... * Republican vote count ... * Democrat vote count ... * Third party vote count ... * * ... * * * This program creates 51 files of the form NJ2004.txt. The first * line of the file contains the name of the election and the candidates. * Each succeeding line contains a list of counties and the number of * corresponding Republican, Democrat, and Third party votes. * This program also creates a file of the form USA2004.txt with * the cumulative totals for each state in the given election. * * % java ElectionScraper 2004 < csv.txt * * % more NJ2004.txt * 2004 Presidential Election,Bush,Kerry,Other, * Atlantic,49487,55746,864, * Bergen,189833,207666,2745, * Burlington,95936,110411,1609, * Camden,81427,137765,1741, * Cape May,28832,21475,455, * Cumberland,24362,27875,948, * Essex,83374,203681,2293, * Gloucester,60033,66835,1096, * Hudson,60646,127447,1353, * Hunterdon,39888,26050,742, * Mercer,56604,91580,1326, * Middlesex,126492,166628,2685, * Monmouth,163650,133773,2516, * Morris,135241,98066,1847, * Ocean,154204,99839,2263, * Passaic,75200,94962,1149, * Salem,15721,13749,311, * Somerset,72508,66476,1295, * Sussex,44506,23990,900, * Union,82517,119372,1498, * Warren,29542,18044,622, * * * Remarks * ------- * - In some elections there are 4 candidates (e.g., Clinton, Dole, * Perot, Other) but this program only uses first three. * ******************************************************************************/ public class ElectionScraper { public static void main(String[] args) { // year of presidential election int year = Integer.parseInt(args[0]); // scrape the data while (!StdIn.isEmpty()) { String line = StdIn.readLine(); String[] fields = line.split(","); String state = fields[0]; String usps = fields[1]; String fips = fields[2]; System.err.printf("State = %s, usps = %s, fips = %s\n", state, usps, fips); // screen scrape from uselectionatlas.org String url = "http://uselectionatlas.org/RESULTS/datagraph.php"; In in = new In(url + "?year=" + year + "&fips=" + fips); String input = in.readAll(); // when data is not available if (input.contains("Warning")) { System.err.println("Error reading: " + state + " " + usps + " " + fips); continue; } // save results to this file Out file = new Out(usps + year + ".txt"); // indices into string int p = 0; int from, to; // extract names of 3 candidates p = input.indexOf("width:100px", p); // name of Democratic candidate p = input.indexOf("", p); to = input.indexOf("", from); String candidate1 = input.substring(from + 1, to); // name of Republican candidate p = to + 1; from = input.indexOf("", p); to = input.indexOf("", from); String candidate2 = input.substring(from + 8, to); // name of third party candidate p = to + 1; from = input.indexOf("", p); to = input.indexOf("", from); String candidate3 = input.substring(from + 8, to); // check if there is a 3rd-party candidate if (candidate3.equals(candidate2)) { StdOut.println("No 3rd party candidate"); candidate3 = ""; } file.println(year + " US Presidential Election," + candidate2 + "," + candidate1 + "," + candidate3 + ","); // read in vote tallies for each county in the given state p = 0; while (true) { // county name occurs after width:100px label p = input.indexOf("width:100px", p); if (p == -1) break; // extract county name from = input.indexOf("", p); to = input.indexOf("", from); if (from == -1 || to == -1) { System.err.println("Check results"); break; } String county = input.substring(from + 3, to); // extract number of votes for democratic candidate p = to + 1; p = input.indexOf("", p); to = input.indexOf("", from); if (p == -1 || from == -1 || to == -1) { System.err.println("Check results"); break; } String democrat = input.substring(from + 1, to); // number of votes democrat = democrat.replaceAll(",", ""); // remove any commas // extract number of votes for republican candidate p = to + 1; p = input.indexOf("", p); to = input.indexOf("", from); if (p == -1 || from == -1 || to == -1) { System.err.println("Check results"); file.println(county + "0," + democrat + ",0,"); break; } String republican = input.substring(from + 1, to); republican = republican.replaceAll(",", ""); // extract number of votes for third party candidate, if exists p = to + 1; int p1 = input.indexOf("= 0 && p2 <= p1)) { System.err.println("No third party candidate in " + county + ", " + state); file.println(county + "," + republican + "," + democrat + ",0,"); } // third party candidate else { p = p1; from = input.indexOf(">", p); to = input.indexOf("", from); if (p == -1 || from == -1 || to == -1) { System.err.println("Check results"); break; } String independent = input.substring(from + 1, to); independent = independent.replaceAll(",", ""); file.println(county + "," + republican + "," + democrat + "," + independent + ","); } } file.close(); } } }