GeneFind.java


Below is the syntax highlighted version of GeneFind.java from §3.1 Using Data Types.


/******************************************************************************
 *  Compilation:  javac GeneFind.java
 *  Execution:    java GeneFind start stop < input.txt
 *
 *  To find a gene in a genome, we scan for the start codon,
 *  remember its index, then scan from the next stop codon.
 *  If the length of the intervening sequence is a multiple of 3,
 *  we have found a gene.
 *
 *  % more genomeTiny.txt
 *  ATAGATGCATAGCGCATAGCTAGATGTGCTAGCAT
 *
 *  % java GeneFind ATG TAG < genomeTiny.txt
 *  CATAGCGCA
 *  TGC
 *
 *  % java GeneFind ATG TAG < genomeVirus.txt
 *  CGCCTGCGTCTGTAC
 *  TCGAGCGGATCGCTCACAACCAGTCGG
 *  AGATTATCAAAAAGGATCTTCACC
 *
 ******************************************************************************/

public class GeneFind {
    public static void main(String[] args)  {

        // read in data
        String start = args[0];
        String stop  = args[1];
        String genome = StdIn.readAll();
        StdOut.println("genome = '" + genome + "'");
        StdOut.println("start  = '" + start  + "'");
        StdOut.println("stop   = '" + stop   + "'");

        // find genes
        int beg = -1;
        for (int i = 0; i < genome.length() - 2; i++) {
            String codon = genome.substring(i, i+3);

            // start codon
            if (codon.equals(start)) beg = i;

            // stop codon
            if ((codon.equals(stop)) && (beg != -1) && (beg + 3 < i)) {
                // check putative gene alignment
                String gene = genome.substring(beg+3, i);
                if (gene.length() % 3 == 0) {
                    StdOut.println(gene);
                    beg = -1;
                }
            }
        }
    }
}


Copyright © 2000–2017, Robert Sedgewick and Kevin Wayne.
Last updated: Fri Oct 20 14:12:12 EDT 2017.