Mercurial > repos > pfrommolt > ngsrich
view NGSrich_0.5.5/src/filters/TargetFilter.java @ 0:89ad0a9cca52 default tip
Uploaded
author | pfrommolt |
---|---|
date | Mon, 21 Nov 2011 08:12:19 -0500 |
parents | |
children |
line wrap: on
line source
package filters; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.util.Scanner; import middlewares.Misc; import datastructures.Frame; import datastructures.TargetLine; import exceptions.ChromosomeFormatException; import exceptions.ChromosomeNotFoundException; import exceptions.NullOrNegativeRangeException; import exceptions.RangeFormatException; import exceptions.RangeLimitNotFoundException; public class TargetFilter extends Filter { // variables used for test purposes. public int target_size, target_regions; public TargetFilter(String input, String output) { super(input, output); target_size = 0; target_regions = 0; } public void filter() { try { Scanner s = new Scanner(new File(getInputPath())); FileWriter fw; System.out.println("TARGET REGIONS FILE:"); /* * 1. STEP: * a. Remove all but the first track. * b. Remove browser and track header lines of the first track * c. Save the output in the output directory and return the path of the output file. */ String finput = filterTracks(s); setInputPath(finput); /* * 2. STEP: * a. Sort the target region file lexicographically by chromomose-name. * b. If chr-names are identical then numerically by the start position. * c. If start positions are identical then numerically by the end position. */ sort(); /* * 3. STEP: * Unify all overlapping target regions. */ s = new Scanner(new File(getInputPath())); fw = new FileWriter(getOutputPath()); // for all target regions do the following: if (s.hasNextLine()) { try { // parse the current target region. TargetLine tl = new TargetLine(s.nextLine()); // create a new frame representing the current target region. Frame union = new Frame(tl.start(), tl.end() - tl.start() + 1); String chrom = tl.chrom(); while (s.hasNextLine()) { // parse the next target region. tl = new TargetLine(s.nextLine()); // create the corresponding frame. Frame nextTarget = new Frame(tl.start(), tl.end() - tl.start() + 1); String nextChrom = tl.chrom(); // if current and next overlap each other. if (union.overlaps(nextTarget) && chrom.equals(nextChrom)) { // unify regions. union = union.unify(nextTarget); // go to the next iteration. continue; } // else // write the computed overlap-free target region. target_size += union.end()-union.start()+1; target_regions++; fw.write(chrom +"\t"+ union.start() +"\t"+ union.end() +"\n"); // refresh the union and chrom variables for the next computation. union = nextTarget; chrom = nextChrom; } try { target_size += union.end()-union.start()+1; target_regions++; fw.write(chrom+"\t"+union.start()+"\t"+union.end()+"\n"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } fw.close(); } catch (RangeFormatException e) { e.printStackTrace(); } catch (ChromosomeFormatException e) { e.printStackTrace(); } catch (ChromosomeNotFoundException e) { e.printStackTrace(); } catch (RangeLimitNotFoundException e) { e.printStackTrace(); } catch (NullOrNegativeRangeException e) { e.printStackTrace(); } } } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } System.out.println(getInputPath()+" reduced to "+getOutputPath()); } /** * 1. Removes all but the first track. * 2. Removes browser and track header lines of the first track * 3. Saves the output in the output directory and return the path of the output file. * * @param s the scanner reading the raw file. * @return the path of the output file. * @throws IOException if writing fails. */ private String filterTracks(Scanner s) throws IOException { String finput = Misc.path(getOutputPath())+ Misc.prefix(getInputPath()) + ".bed"; FileWriter fw = new FileWriter(finput); // Counters for browser header lines and track header lines. int browserRead = 0; int trackRead = 0; while (s.hasNextLine()) { String line = s.nextLine(); // Count browser lines. if (line.startsWith("browser")) { browserRead++; // Count header lines. } else if (line.startsWith("track")) { trackRead++; } else { // Write lines as long as they corresponds to the first track. if (trackRead <= 1 && browserRead <= 1) fw.write(line + "\n"); else // Otherwise cancel the computation. break; } } fw.close(); return finput; } /** * 1. Sort the target region file lexicographically by chromomose-name. * 2. If chr-names are identical then numerically by the start position. * 3. If start positions are identical then numerically by the end position. */ public void sort() { Runtime rt = Runtime.getRuntime(); try { String unsorted = getInputPath(); String tmpD = new File(getOutputPath()).getParentFile().getAbsolutePath(); String sorted = tmpD+Misc.slash(tmpD)+Misc.prefix(unsorted)+"Sorted"; setInputPath(sorted); if(!new File(getInputPath()).exists()) new File(getInputPath()).createNewFile(); String command = "sort -k1,1 -k2n,2 -k3n,3 -T "+tmpD+" "+unsorted; Process p = rt.exec(command); Scanner ps = new Scanner(p.getInputStream()); FileWriter fw = new FileWriter(getInputPath()); while(ps.hasNextLine()){ String nextLine = ps.nextLine(); fw.write(nextLine+"\n"); } fw.close(); new File(sorted).renameTo(new File(unsorted)); setInputPath(unsorted); System.out.println("Target file "+new File(unsorted).getAbsolutePath()+" sorted"); } catch (IOException e1) { e1.printStackTrace(); } } public static void main(String[] args){ new TargetFilter("/home/abdallah/Desktop/input/Agilent_SureSelect_50Mb.bed", "/home/abdallah/Desktop/output/Agilent_SureSelect_50Mb_Output.bed") .filter(); } }