Wildcard.java


Below is the syntax highlighted version of Wildcard.java from §5.1 Formal Languages.


/******************************************************************************
 *  Compilation:  javac Wildcard.java
 *  Execution:    java Wildcard pattern < wordlist.txt
 *  Dependencies: In.java
 *  
 *  Find all lines in wordlist.txt that match the given pattern by
 *  simulating a nondeterminstic finite state automaton using an
 *  Boolean array states[] which records all states that the NFSA
 *  could be in after reading in a certain number of characters.
 *
 *     Patterns supported
 *     ------------------------------
 *     *  any zero or more characters
 *     .  any one character
 *     c  character c
 *  
 *
 *  Sample execution:
 *
 *     % java Wildcard *wa*t**c*d* < wordlist.txt
 *     unwatched
 *     waistcoated
 *     watchdog
 *     watched
 *     watchword
 *
 *     % java Wildcard .a.e*i*o*u*y < wordlist.txt
 *     facetiously
 *
 *     % java Wildcard ........................ < wordlist.txt
 *     formaldehydesulphoxylate
 *     pathologicopsychological
 *     scientificophilosophical
 *     tetraiodophenolphthalein
 *     thyroparathyroidectomize
 *
 *  Note: not the most efficient algorithm.
 *
 ******************************************************************************/

public class Wildcard { 

  /***********************************************************************
   *  Check if pattern string matches text string.
   *
   *  At the beginning of iteration i of main loop
   *
   *     old[j]    = true if pattern[0..j] matches text[0..i-1] 
   *
   *  By comparing pattern[j] with text[i], the main loop computes
   *
   *     states[j] = true if pattern[0..j] matches text[0..i] 
   *
   ***********************************************************************/
    public static boolean matches(String pattern, String text) {
        // add sentinel so don't need to worry about *'s at end of pattern
        text    += '\0';
        pattern += '\0';

        int n = pattern.length();

        boolean[] states = new boolean[n+1];
        boolean[] old = new boolean[n+1];
        old[0] = true;

        for (int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);
            states = new boolean[n+1];       // initialized to false
            for (int j = 0; j < n; j++) {
                char p = pattern.charAt(j);

                // hack to handle *'s that match 0 characters
                if (old[j] && (p == '*')) old[j+1] = true;

                if (old[j] && (p == c))   states[j+1] = true;
                if (old[j] && (p == '.')) states[j+1] = true;
                if (old[j] && (p == '*')) states[j]   = true;
                if (old[j] && (p == '*')) states[j+1] = true;
            }
            old = states;
        }
        return states[n];
    }


    public static void main(String[] args) { 
        String regexp = args[0];
        while (!StdIn.isEmpty()) {
            String text = StdIn.readString();
            if (matches(regexp, text))
            StdOut.println(text);
        }    
    }

}


Copyright © 2000–2017, Robert Sedgewick and Kevin Wayne.
Last updated: Fri Oct 20 14:12:12 EDT 2017.