Galaxy |

Changeset 6:769e306b7933 (2013-01-18)

Previous changeset 5:ea3082881bf8 (2013-01-18) Next changeset 7:86c781421239 (2013-01-18)

Commit message:
Change the repository level.

diff -r ea3082881bf8 -r 769e306b7933 README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,58 @@
+----------
+| NAME |
+----------
+S-MART
+
+
+Description
+-----------
+Several tools are now available for mapping high-throughput sequencing data from a genome, but few can extract biological knowledge from the mapped reads. We have developed a toolbox, S-MART, which handles mapped RNA-Seq and ChIP-Seq data.
+
+S-MART is an intuitive and lightweight tool, performing several tasks that are usually required during the analysis of mapped RNA-Seq and ChIP-Seq reads, including data selection and data visualization.
+
+S-MART does not require a computer science background and thus can be used by all biologists through a graphical interface. S-MART can run on any personal computer, yielding results within an hour for most queries.
+
+
+Instructions
+------------
+Environment Installation :
+1). Put the downloaded S-MART directories into your Galaxy tools directory.
+2). Open .bashrc (if it dosen't exist, create one).
+Write or add the S-MART directory path to PYTHONPATH :
+export PYTHONPATH=S-MART_PATH:/usr/lib/python2.7/
+(ex. export PYTHONPATH=/home/user/galaxy-dist/tools/s_mart/)
+3). Run .bashrc using . .bashrc command.
+
+Supplementary Softwares :
+ * R, under the GNU General Public License
+ * Python, under the Python License, compatible with the GNU General Public License
+Installation instructions and the user guide are available in the file
+"documentation.pdf".
+
+
+Copyright
+---------
+Copyright INRA-URGI 2009-2013
+
+
+Authors
+-------
+Matthias Zytnicki
+Yufei Luo
+
+Contact
+-------
+urgi-support@versailles.inra.fr
+
+
+License
+-------
+This library is distributed under the terms of the CeCILL license
+(http://www.cecill.info/index.en.html).
+See the LICENSE.txt file.
+
+
+Acknowledgements
+----------------
+Yufei Luo was supported by the Plant Breeding and Genetics research division of
+the INRA, and by the Groupement d'intérêt scientifique IBISA.

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/File.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/File.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,55 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+public class File {
+  String name;
+  String formatType;
+  String format;
+
+
+  public File(String name, String type, String format) {
+    this.name       = name;
+    this.formatType = type;
+    this.format     = format;
+  }
+
+  public String getName() {
+    return this.name;
+  }
+
+  public String getFormatType() {
+    return this.formatType;
+  }
+
+  public String getFormat() {
+    return this.format;
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Files.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Files.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,75 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class Files {
+  HashMap <String, File> files;
+
+  public Files () {
+    files = new HashMap < String, File> ();
+  }
+
+  public void addFile(String fileName, String type, String format) {
+    this.addFile(new File(fileName, type, format));
+  }
+
+  public void addFile(File file) {
+    files.put(file.name, file);
+  }
+
+  public void clear() {
+    files.clear();
+  }
+
+  public String getType(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format type of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).formatType;
+  }
+
+  public String getFormat(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).format;
+  }
+}
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/FormatType.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/FormatType.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,64 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class FormatType {
+  String type;
+  Vector < String > formats;
+
+  public FormatType (String type) {
+    this.type    = type;
+    this.formats = new Vector < String > ();
+  }
+
+  public String getType () {
+    return this.type;
+  }
+
+  public void addFormat (String format) {
+    formats.add(format);
+  }
+
+  public boolean containsFormat (String format) {
+    for (int i = 0; i < formats.size(); i++) {
+      if (((String) formats.get(i)).compareToIgnoreCase(format) == 0) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public Vector < String > getFormats () {
+    return formats;
+  }
+}
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/FormatsContainer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/FormatsContainer.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,90 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class FormatsContainer {
+
+  HashMap < String, FormatType > formatTypes;
+
+
+  public FormatsContainer() {
+    this.formatTypes = new HashMap < String, FormatType > ();
+  }
+
+
+  public void addFormat(String type, String format) {
+    FormatType formatType;
+    if (formatTypes.containsKey(type)) {
+      formatType = this.formatTypes.get(type);
+    }
+    else {
+      formatType = new FormatType(type);
+      this.formatTypes.put(type, formatType);
+    }
+    formatType.addFormat(format);
+  }
+
+
+  public Vector < String > getFormatTypes () {
+    Vector < String > v = new Vector < String > ();
+    v.addAll(this.formatTypes.keySet());
+    return v;
+  }
+
+
+  public FormatType getFormats (String type) {
+    if (! formatTypes.containsKey(type)) {
+      System.out.print("Format type " + type + " is unavailable. Got: ");
+      Iterator it = formatTypes.entrySet().iterator();
+      while (it.hasNext()) {
+        Map.Entry pairs = (Map.Entry) it.next();
+        System.out.print(pairs.getKey() + " ");
+      }
+      System.out.println();
+    }
+    return formatTypes.get(type);
+  }
+
+
+  public String getFormatType (String format) {
+    for (Iterator it = formatTypes.keySet().iterator(); it.hasNext(); ) {
+      Object type       =  it.next();
+      Object formatType = formatTypes.get(type);
+      if (((FormatType) formatType).containsFormat(format)) {
+        return (String) type;
+      }
+    }
+    return null;
+  }
+}
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/FormatsReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/FormatsReader.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,83 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.File;
+import java.io.*;
+
+
+public class FormatsReader {
+
+  String fileName;
+  Vector < FormatType > formatTypes;
+  Vector < String > typeNames;
+
+
+  public FormatsReader(String fileName) {
+    this.fileName    = fileName;
+    this.formatTypes = new Vector < FormatType > ();
+  }
+
+
+  public boolean read() {
+    File file = new File(this.fileName);
+
+    try {
+      BufferedReader reader = new BufferedReader(new FileReader(file));
+      String     line = null;
+      String[]   lineElements;
+      String[]   formats;
+      String     typeName;
+
+      while ((line = reader.readLine()) != null) {
+        if (line.length() > 0) {
+          lineElements = line.split(":");
+          typeName     = lineElements[0].trim();
+          formats      = lineElements[1].split(",");
+          for (int i = 0; i < formats.length; i++) {
+            Global.formats.addFormat(typeName, formats[i].trim());
+          }
+        }
+      }
+
+      reader.close();
+    }
+    catch (FileNotFoundException e) {
+      return false;
+    }
+    catch (IOException e) {
+      return false;
+    }
+
+    return true;
+  }
+}
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Global.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Global.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,70 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.Vector;
+import java.util.HashMap;
+import javax.swing.DefaultListModel;
+import javax.swing.JButton;
+import javax.swing.JTextField;
+
+public class Global {
+
+  public static int logAreaSize = 100;
+
+  public static String smartConfFileName = "smart.conf";
+
+  public static String smartProgramsFileName = "programs.txt";
+
+  public static String smartFormatsFileName = "formats.txt";
+
+  public static String pythonPath = new String();
+
+  public static String pythonCommand = "python";
+
+  public static String mysqlCommand = "mysql";
+
+  public static String rCommand = "R";
+
+  public static Files files = new Files();
+
+  public static Vector < String > fileNames = new Vector < String >();
+
+  public static FormatsContainer formats = new FormatsContainer();
+
+  public static boolean programRunning = false;
+
+  public static HashMap < JButton, JTextField > otherFilesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherDirectoriesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherFileConcatenationChooser = new HashMap < JButton, JTextField >();
+
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/Old/PasswordAsker.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/Old/PasswordAsker.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,87 @@
+import java.awt.*;
+import java.awt.event.*;
+import javax.swing.*;
+import java.util.concurrent.CountDownLatch;
+
+public class PasswordAsker {
+
+  static String password;
+  static JFrame frame;
+  static CountDownLatch latch;
+
+
+  public PasswordAsker() {
+    password = null;
+    javax.swing.SwingUtilities.invokeLater(new Runnable() {
+      public void run() {
+        createAndShowGUI();
+      }
+    });
+    latch = new CountDownLatch(1);
+  }
+
+
+  private static void createAndShowGUI() {
+    //Create and set up the window.
+    frame = new JFrame("Password");
+    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+    frame.setContentPane(setMainPane());
+
+    //Display the window.
+    frame.pack();
+    frame.setVisible(true);
+  }
+
+
+  private static JPanel setMainPane() {
+    JPanel rootPanel = new JPanel(false);
+    rootPanel.setLayout(new GridLayout(0, 1));
+
+    JPanel infoPanel = new JPanel(false);
+    JLabel infoLabel = new JLabel("Please write here the password that you entered for the mySQL root account.\r\nNo information is stored nor sent. I promise.");
+    infoPanel.add(infoLabel);
+
+    JPanel passPanel = new JPanel(false);
+    passPanel.setLayout(new GridLayout(1, 0));
+    JLabel passLabel = new JLabel("password");
+    final JTextField passText = new JTextField(20);
+    passLabel.setLabelFor(passText);
+    passPanel.add(passLabel);
+    passPanel.add(passText);
+
+    JPanel  okPanel  = new JPanel(false);
+    JButton okButton = new JButton("OK");
+    okPanel.add(okButton);
+
+    okButton.addActionListener(new ActionListener() {
+      public void actionPerformed(ActionEvent e) {
+        password = passText.getText();
+        frame.setVisible(false);
+        frame.dispose();
+        latch.countDown();
+      }
+    });
+
+    rootPanel.add(infoPanel);
+    rootPanel.add(passPanel);
+    rootPanel.add(okPanel);
+
+    return rootPanel;
+  }
+
+
+  public boolean waitForPassword() {
+    try {
+      latch.await();
+    }
+    catch (InterruptedException e) {
+      return false;
+    }
+    return true;
+  }
+
+
+  public String getPassword() {
+    return password;
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/Old/SmartInstaller.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/Old/SmartInstaller.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,167 @@
+import java.util.*;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.*;
+import javax.swing.*;
+import javax.swing.filechooser.*;
+import javax.swing.border.*;
+import javax.swing.SwingUtilities;
+import java.net.*;
+
+public class SmartInstaller extends JPanel implements ActionListener {
+  int       BUFFER = 1024;
+
+  JFrame    mainFrame;
+  JTextArea logArea;
+
+  // configuration chooser buttons
+  String       configurations[] = {"32 bits", "64 bits"};
+  JRadioButton configurationButtons[];
+
+  // program chooser buttons
+  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"};
+  JCheckBox programChooserButtons[];
+
+  JButton   goButton;
+
+  // install directory
+  JButton    installDirectoryChooserButton;
+  JTextField installDirectoryChooserTextField;
+
+
+  public SmartInstaller() {
+    super();
+
+    Box box = Box.createVerticalBox();
+
+    // Header
+    JPanel       headerPanel = new JPanel(false);
+    JTextArea    headerArea  = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters.\r\nPlease remember the root password if you install MySQL!");
+    TitledBorder headerBorder = BorderFactory.createTitledBorder("Wellcome to the S-MART installer!");
+    headerArea.setEditable(false);
+    headerArea.setBackground(headerPanel.getBackground());
+    headerPanel.add(headerArea);
+    headerPanel.setBorder(headerBorder);
+
+
+    // Configuration
+    JPanel configurationPanel = new JPanel(false);
+    configurationPanel.setLayout(new GridLayout(1, 0));
+    configurationButtons = new JRadioButton[configurations.length];
+    ButtonGroup configurationGroup = new ButtonGroup();
+    for (int i = 0; i < configurations.length; i++) {
+      JRadioButton button = new JRadioButton(configurations[i]);
+      configurationPanel.add(button);
+      configurationButtons[i] = button;
+      configurationGroup.add(button);
+    }
+    configurationButtons[0].setSelected(true);
+    TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration");
+    configurationPanel.setBorder(configurationBorder);
+
+
+    // Program chooser panel
+    JPanel programPanel = new JPanel(false);
+    programPanel.setLayout(new GridLayout(0, 1));
+
+    JLabel programLabel = new JLabel("Choose which programs to install:");
+    programPanel.add(programLabel);
+    programChooserButtons = new JCheckBox[programChoosers.length];
+    for (int i = 0; i < programChoosers.length; i++) {
+      JCheckBox button = new JCheckBox(programChoosers[i]);
+      button.setSelected(true);
+      programPanel.add(button);
+      programChooserButtons[i] = button;
+    }
+    TitledBorder programBorder = BorderFactory.createTitledBorder("Programs");
+    programPanel.setBorder(programBorder);
+
+    // Install directory chooser
+    JPanel installDirectoryChooserPanel = new JPanel(false);
+    installDirectoryChooserPanel.setLayout(new GridLayout(1, 0));
+    JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: ");
+    installDirectoryChooserTextField = new JTextField();
+    installDirectoryChooserButton = new JButton("Open...");
+    installDirectoryChooserButton.addActionListener(this);
+
+    installDirectoryChooserPanel.add(installDirectoryChooserLabel);
+    installDirectoryChooserPanel.add(installDirectoryChooserTextField);
+    installDirectoryChooserPanel.add(installDirectoryChooserButton);
+    TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory");
+    installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder);
+
+    // GO!
+    JPanel goPanel = new JPanel(false);
+    goButton = new JButton("GO!");
+    goButton.addActionListener(this);
+    goButton.setSelected(true);
+    goPanel.add(goButton);
+    TitledBorder goBorder = BorderFactory.createTitledBorder("Start install");
+    goPanel.setBorder(goBorder);
+
+    // Log
+    logArea = new JTextArea(10, 120);
+    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));
+    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+    TitledBorder logBorder = BorderFactory.createTitledBorder("Log");
+    logScroll.setBorder(logBorder);
+
+    GridLayout horizontalLayout = new GridLayout(1, 0);
+
+    box.add(headerPanel);
+    box.add(configurationPanel);
+    box.add(programPanel);
+    box.add(installDirectoryChooserPanel);
+    box.add(goPanel);
+    box.add(logScroll);
+
+    add(box);
+  }
+
+
+  public void actionPerformed(ActionEvent e) {
+
+    // Install directories chooser
+    if (e.getSource() == goButton) {
+      boolean[] selectedPrograms = new boolean[programChoosers.length];
+      for (int i = 0; i < programChoosers.length; i++) {
+        selectedPrograms[i] = programChooserButtons[i].isSelected();
+      }
+      SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1);
+      task.execute();
+    }
+    // Install directories chooser
+    else if (e.getSource() == installDirectoryChooserButton) {
+      JFileChooser chooser = new JFileChooser();
+      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {
+        installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath());
+      }
+    }
+  }
+
+  private static void createAndShowGUI() {
+    // Create and set up the window.
+    JFrame mainFrame = new JFrame("S-Mart Installer");
+    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+    //Create and set up the content pane.
+    JComponent newContentPane = new SmartInstaller();
+    newContentPane.setOpaque(true);
+    mainFrame.setContentPane(newContentPane);
+
+    // Display the window.
+    mainFrame.pack();
+    mainFrame.setVisible(true);
+  }
+
+
+  public static void main(String[] args) {
+    javax.swing.SwingUtilities.invokeLater(new Runnable() {
+      public void run() {
+        createAndShowGUI();
+      }
+    });
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/Old/SmartInstallerTask.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/Old/SmartInstallerTask.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,455 @@\n+import java.util.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+import java.net.*;\n+import java.util.Stack;\n+import java.util.zip.ZipEntry;\n+import java.util.zip.ZipInputStream;\n+\n+public class SmartInstallerTask extends SwingWorker<Boolean, String> {\n+\n+ int BUFFER = 1024;\n+\n+ int architecture = 0;\n+ String installDirectoryName = null;\n+ JTextArea logArea = null;\n+ boolean[] selectedPrograms = null;\n+\n+ // program chooser buttons\n+ String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"};\n+\n+ // Web addresses for the tools\n+ String packageAddresses[][] = {\n+ {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"},\n+ {"", ""},\n+ {"", ""},\n+ {"http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-win32.msi", "http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-winx64.msi"},\n+ {"", ""},\n+ {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"},\n+ {"http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe", "http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe"},\n+ {"http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip", "http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip"}\n+ };\n+\n+ // Packages to install\n+ String rPackages[] = {"RColorBrewer", "Hmisc"};\n+\n+ // Script lines\n+ String scriptLines[][] = {\n+ {"\\"<INSTALLDIR>\\\\R-2.11.0-win32.exe\\"", "\\"<INSTALLDIR>\\\\R-2.11.0-win64.exe\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\""},\n+ {"msiexec /i \\"<INSTALLDIR>\\\\mysql-essential-5.1.47-win32.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\mysql-essential-5.1.47-winx64.msi\\""},\n+ {"", ""},\n+ {"msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.amd64.msi\\""},\n+ {"<INSTALLDIR>\\\\MySQL-python-1.2.2.win32-py2.6.exe", "<INSTALLDIR>\\\\MySQL-python-1.2.2.win32-py2.6.exe"},\n+ {"", ""}\n+ };\n+\n+ // Files to uncompress\n+ String compressedFiles[][] = {\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"<INSTALLDIR>\\\\s-mart.zip", "<INSTALLDIR>\\\\s-mart.zip"}\n+ };\n+\n+\n+ public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) {\n+ logArea = ta;\n+ selectedPrograms = b;\n+ installDirectoryName = s;\n+ architecture = a;\n+ }\n+\n+\n+ @Override\n+ public Boolean doInBackground() {\n+ boolean installOk;\n+ publish("Starting install\\n");\n+ writeFiles();\n+ for (int i = 0; i < selectedPrograms.length; i++) {\n+ if (selectedPrograms[i]) {\n+ if (! install(i)) {\n+ return Boolean.FALSE;\n+ }\n+ }\n+ }\n+ removeFiles();\n+ setEnvironmentVariables();\n+ publish("Ending install\\n");\n+ return Boolean.TRUE;\n+ }\n+\n+\n+ @Override\n+ protected void process(List<String> chunks) {\n+ for (String chunk: chunks) {\n+ logArea.append(chunk);\n+ }\n+ }\n+\n+\n+ private boolean launch(String command) {\n+ return realLaunch(new ProcessBuilder(command), command);\n+ }\n+\n+ private boolean launch(String[] command) {\n+ return realLaunch(new ProcessBuilder(command), Arrays.toString(command));\n+ }\n+\n+ private boolean realLaunch(ProcessBuilder pb, String command) {\n+ BufferedReader outputReader;\n+ pb = pb.redirectErrorStream(true);\n+ Process process '..b' boolean uncompressPackage(int element) {\n+ String file = compressedFiles[element][architecture];\n+ if (! "".equals(file)) {\n+ file = replaceSubstring(file);\n+ publish(" Starting uncompressing file \'" + file + "\'\\n");\n+ try {\n+ FileInputStream fis = new FileInputStream(file);\n+ BufferedInputStream bis = new BufferedInputStream(fis);\n+ ZipInputStream zis = new ZipInputStream(bis);\n+ ZipEntry entry;\n+ while ((entry = zis.getNextEntry()) != null) {\n+ if (! entry.isDirectory()) {\n+ File newFile = new File(installDirectoryName + File.separator + entry.getName());\n+ // create parent directories\n+ File upDirectory = newFile.getParentFile();\n+ while (upDirectory != null){\n+ if (! upDirectory.exists()) {\n+ upDirectory.mkdir();\n+ publish(" Creating directory \'" + upDirectory.getAbsolutePath() + "\'\\n");\n+ }\n+ upDirectory = upDirectory.getParentFile();\n+ }\n+ // write the files to the disk\n+ publish(" Extracting \'" + entry.getName() + "\' to \'" + newFile.getAbsolutePath() + "\'\\n");\n+ int count;\n+ byte data[] = new byte[BUFFER];\n+ FileOutputStream fos = new FileOutputStream(newFile);\n+ BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);\n+ while ((count = zis.read(data, 0, BUFFER)) != -1){\n+ bos.write(data, 0, count);\n+ }\n+ bos.flush();\n+ bos.close();\n+ fos.close();\n+ }\n+ }\n+ zis.close();\n+ bis.close();\n+ fis.close();\n+ }\n+ catch(FileNotFoundException e) {\n+ publish(" !Cannot find file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ catch(Exception e){\n+ publish(" !Cannot uncompress file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ publish(" Ending uncompressing file \'" + file + "\'\\n");\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean removePackage(int element) {\n+ String packageName = packageAddresses[element][architecture];\n+ if ("".equals(packageName)) {\n+ return true;\n+ }\n+ String fileName = getLocalName(packageAddresses[element][architecture]);\n+ return removeFile(fileName);\n+ }\n+\n+\n+ private boolean postProcess(int element) {\n+ switch (element) {\n+ case 4:\n+ // Create mySQL user\n+ PasswordAsker pa = new PasswordAsker();\n+ if (! pa.waitForPassword()) {\n+ publish("Problem in the password asker!\\n");\n+ return false;\n+ }\n+ String command = "\\"<MYSQLFILE>\\" --user=root --password=" + pa.getPassword() + " -e \\"source <INSTALLDIR>\\\\createUser.sql\\"";\n+ command = replaceSubstring(command);\n+ if (! launch(command)) {\n+ publish(" !Cannot create SQL accounts!\\n");\n+ return false;\n+ }\n+ return true;\n+ case 7:\n+ // Move S-MART files to parent directory\n+ File installDirectory = new File(installDirectoryName + File.separator + "S-Mart");\n+ for (File file: installDirectory.listFiles()) {\n+ File destinationFile = new File(file.getParentFile().getParentFile(), file.getName());\n+ if (! file.renameTo(destinationFile)) {\n+ publish(" !Cannot move \'" + file.getAbsolutePath() + "\' to \'" + destinationFile.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ if (! installDirectory.delete()) {\n+ publish(" !Cannot remove installation S-MART directory \'" + installDirectory.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean setEnvironmentVariables() {\n+ String[] command = {"REG", "ADD", "HKCU\\\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\\"" + installDirectoryName + "\\\\Python\\"", "/f"};\n+ return launch(command);\n+ }\n+}\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/PasswordAsker.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/PasswordAsker.java Fri Jan 18 04:54:14 2013 -0500

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/SmartInstaller.jar

Binary file SMART/Java/Installer/SmartInstaller.jar has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/SmartInstaller.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/SmartInstaller.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,167 @@
+import java.util.*;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.*;
+import javax.swing.*;
+import javax.swing.filechooser.*;
+import javax.swing.border.*;
+import javax.swing.SwingUtilities;
+import java.net.*;
+
+public class SmartInstaller extends JPanel implements ActionListener {
+  int       BUFFER = 1024;
+
+  JFrame    mainFrame;
+  JTextArea logArea;
+
+  // configuration chooser buttons
+  String       configurations[] = {"32 bits", "64 bits"};
+  JRadioButton configurationButtons[];
+
+  // program chooser buttons
+  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"};
+  JCheckBox programChooserButtons[];
+
+  JButton   goButton;
+
+  // install directory
+  JButton    installDirectoryChooserButton;
+  JTextField installDirectoryChooserTextField;
+
+
+  public SmartInstaller() {
+    super();
+
+    Box box = Box.createVerticalBox();
+
+    // Header
+    JPanel       headerPanel = new JPanel(false);
+    JTextArea    headerArea  = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters.");
+    TitledBorder headerBorder = BorderFactory.createTitledBorder("Welcome to the S-MART installer!");
+    headerArea.setEditable(false);
+    headerArea.setBackground(headerPanel.getBackground());
+    headerPanel.add(headerArea);
+    headerPanel.setBorder(headerBorder);
+
+
+    // Configuration
+    JPanel configurationPanel = new JPanel(false);
+    configurationPanel.setLayout(new GridLayout(1, 0));
+    configurationButtons = new JRadioButton[configurations.length];
+    ButtonGroup configurationGroup = new ButtonGroup();
+    for (int i = 0; i < configurations.length; i++) {
+      JRadioButton button = new JRadioButton(configurations[i]);
+      configurationPanel.add(button);
+      configurationButtons[i] = button;
+      configurationGroup.add(button);
+    }
+    configurationButtons[0].setSelected(true);
+    TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration");
+    configurationPanel.setBorder(configurationBorder);
+
+
+    // Program chooser panel
+    JPanel programPanel = new JPanel(false);
+    programPanel.setLayout(new GridLayout(0, 1));
+
+    JLabel programLabel = new JLabel("Choose which programs to install:");
+    programPanel.add(programLabel);
+    programChooserButtons = new JCheckBox[programChoosers.length];
+    for (int i = 0; i < programChoosers.length; i++) {
+      JCheckBox button = new JCheckBox(programChoosers[i]);
+      button.setSelected(true);
+      programPanel.add(button);
+      programChooserButtons[i] = button;
+    }
+    TitledBorder programBorder = BorderFactory.createTitledBorder("Programs");
+    programPanel.setBorder(programBorder);
+
+    // Install directory chooser
+    JPanel installDirectoryChooserPanel = new JPanel(false);
+    installDirectoryChooserPanel.setLayout(new GridLayout(1, 0));
+    JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: ");
+    installDirectoryChooserTextField = new JTextField();
+    installDirectoryChooserButton = new JButton("Open...");
+    installDirectoryChooserButton.addActionListener(this);
+
+    installDirectoryChooserPanel.add(installDirectoryChooserLabel);
+    installDirectoryChooserPanel.add(installDirectoryChooserTextField);
+    installDirectoryChooserPanel.add(installDirectoryChooserButton);
+    TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory");
+    installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder);
+
+    // GO!
+    JPanel goPanel = new JPanel(false);
+    goButton = new JButton("GO!");
+    goButton.addActionListener(this);
+    goButton.setSelected(true);
+    goPanel.add(goButton);
+    TitledBorder goBorder = BorderFactory.createTitledBorder("Start install");
+    goPanel.setBorder(goBorder);
+
+    // Log
+    logArea = new JTextArea(10, 120);
+    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));
+    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+    TitledBorder logBorder = BorderFactory.createTitledBorder("Log");
+    logScroll.setBorder(logBorder);
+
+    GridLayout horizontalLayout = new GridLayout(1, 0);
+
+    box.add(headerPanel);
+    box.add(configurationPanel);
+    box.add(programPanel);
+    box.add(installDirectoryChooserPanel);
+    box.add(goPanel);
+    box.add(logScroll);
+
+    add(box);
+  }
+
+
+  public void actionPerformed(ActionEvent e) {
+
+    // Install directories chooser
+    if (e.getSource() == goButton) {
+      boolean[] selectedPrograms = new boolean[programChoosers.length];
+      for (int i = 0; i < programChoosers.length; i++) {
+        selectedPrograms[i] = programChooserButtons[i].isSelected();
+      }
+      SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1);
+      task.execute();
+    }
+    // Install directories chooser
+    else if (e.getSource() == installDirectoryChooserButton) {
+      JFileChooser chooser = new JFileChooser();
+      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {
+        installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath());
+      }
+    }
+  }
+
+  private static void createAndShowGUI() {
+    // Create and set up the window.
+    JFrame mainFrame = new JFrame("S-Mart Installer");
+    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+    //Create and set up the content pane.
+    JComponent newContentPane = new SmartInstaller();
+    newContentPane.setOpaque(true);
+    mainFrame.setContentPane(newContentPane);
+
+    // Display the window.
+    mainFrame.pack();
+    mainFrame.setVisible(true);
+  }
+
+
+  public static void main(String[] args) {
+    javax.swing.SwingUtilities.invokeLater(new Runnable() {
+      public void run() {
+        createAndShowGUI();
+      }
+    });
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/SmartInstallerTask.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/SmartInstallerTask.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,419 @@\n+import java.util.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+import java.net.*;\n+import java.util.Stack;\n+import java.util.zip.ZipEntry;\n+import java.util.zip.ZipInputStream;\n+\n+public class SmartInstallerTask extends SwingWorker<Boolean, String> {\n+\n+ int BUFFER = 1024;\n+\n+ int architecture = 0;\n+ String installDirectoryName = null;\n+ JTextArea logArea = null;\n+ boolean[] selectedPrograms = null;\n+\n+ // program chooser buttons\n+ String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"};\n+\n+ // Web addresses for the tools\n+ String packageAddresses[][] = {\n+ {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"},\n+ {"", ""},\n+ {"", ""},\n+ {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"},\n+ {"http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip", "http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip"}\n+ };\n+\n+ // Packages to install\n+ String rPackages[] = {"RColorBrewer", "Hmisc"};\n+\n+ // Script lines\n+ String scriptLines[][] = {\n+ {"\\"<INSTALLDIR>\\\\R-2.11.0-win32.exe\\"", "\\"<INSTALLDIR>\\\\R-2.11.0-win64.exe\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\""},\n+ {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\""},\n+ {"msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.amd64.msi\\""},\n+ {"", ""}\n+ };\n+\n+ // Files to uncompress\n+ String compressedFiles[][] = {\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"", ""},\n+ {"<INSTALLDIR>\\\\s-mart-1.0.15.zip", "<INSTALLDIR>\\\\s-mart-1.0.15.zip"}\n+ };\n+\n+\n+ public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) {\n+ logArea = ta;\n+ selectedPrograms = b;\n+ installDirectoryName = s;\n+ architecture = a;\n+ }\n+\n+\n+ @Override\n+ public Boolean doInBackground() {\n+ boolean installOk;\n+ publish("Starting install\\n");\n+ writeFiles();\n+ for (int i = 0; i < selectedPrograms.length; i++) {\n+ if (selectedPrograms[i]) {\n+ if (! install(i)) {\n+ return Boolean.FALSE;\n+ }\n+ }\n+ }\n+ removeFiles();\n+ setEnvironmentVariables();\n+ publish("Ending install\\n");\n+ return Boolean.TRUE;\n+ }\n+\n+\n+ @Override\n+ protected void process(List<String> chunks) {\n+ for (String chunk: chunks) {\n+ logArea.append(chunk);\n+ }\n+ }\n+\n+\n+ private boolean launch(String command) {\n+ return realLaunch(new ProcessBuilder(command), command);\n+ }\n+\n+ private boolean launch(String[] command) {\n+ return realLaunch(new ProcessBuilder(command), Arrays.toString(command));\n+ }\n+\n+ private boolean realLaunch(ProcessBuilder pb, String command) {\n+ BufferedReader outputReader;\n+ pb = pb.redirectErrorStream(true);\n+ Process process = null;\n+ publish(" Starting command \'" + command + "\'\\n");\n+ try {\n+ process = pb.start();\n+ BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());\n+ InputStream is = process.getInputStream();\n+ InputStreamReader isr = new InputStreamReader(is);\n+ outputReader = new BufferedReader(isr);\n+ }\n+ catch (Exception exception) {\n+ publish(" !Process cannot be started (command is \'" + command + "\')!\\n");\n+ exception.printStackTrace();\n+ return false;\n+ }\n+ if (outputReader == null) {\n+ publish(" !Problem in the outp'..b'turn false;\n+ }\n+ try {\n+ process.waitFor();\n+ }\n+ catch (InterruptedException e) {\n+ publish(" !Cannot wait for the end of the command \'" + command + "\'!\\n");\n+ return false;\n+ }\n+ int exitValue = process.exitValue();\n+ if (exitValue != 0) {\n+ publish(" !Problem during the execution of the command \'" + command + "\'!\\n");\n+ return false;\n+ }\n+ publish(" Ending command \'" + command + "\'\\n");\n+ }\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean uncompressPackage(int element) {\n+ String file = compressedFiles[element][architecture];\n+ if (! "".equals(file)) {\n+ file = replaceSubstring(file);\n+ publish(" Starting uncompressing file \'" + file + "\'\\n");\n+ try {\n+ FileInputStream fis = new FileInputStream(file);\n+ BufferedInputStream bis = new BufferedInputStream(fis);\n+ ZipInputStream zis = new ZipInputStream(bis);\n+ ZipEntry entry;\n+ while ((entry = zis.getNextEntry()) != null) {\n+ if (! entry.isDirectory()) {\n+ File newFile = new File(installDirectoryName + File.separator + entry.getName());\n+ // create parent directories\n+ File upDirectory = newFile.getParentFile();\n+ while (upDirectory != null){\n+ if (! upDirectory.exists()) {\n+ upDirectory.mkdir();\n+ publish(" Creating directory \'" + upDirectory.getAbsolutePath() + "\'\\n");\n+ }\n+ upDirectory = upDirectory.getParentFile();\n+ }\n+ // write the files to the disk\n+ publish(" Extracting \'" + entry.getName() + "\' to \'" + newFile.getAbsolutePath() + "\'\\n");\n+ int count;\n+ byte data[] = new byte[BUFFER];\n+ FileOutputStream fos = new FileOutputStream(newFile);\n+ BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);\n+ while ((count = zis.read(data, 0, BUFFER)) != -1){\n+ bos.write(data, 0, count);\n+ }\n+ bos.flush();\n+ bos.close();\n+ fos.close();\n+ }\n+ }\n+ zis.close();\n+ bis.close();\n+ fis.close();\n+ }\n+ catch(FileNotFoundException e) {\n+ publish(" !Cannot find file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ catch(Exception e){\n+ publish(" !Cannot uncompress file \'" + file + "\'!\\n");\n+ return false;\n+ }\n+ publish(" Ending uncompressing file \'" + file + "\'\\n");\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean removePackage(int element) {\n+ String packageName = packageAddresses[element][architecture];\n+ if ("".equals(packageName)) {\n+ return true;\n+ }\n+ String fileName = getLocalName(packageAddresses[element][architecture]);\n+ return removeFile(fileName);\n+ }\n+\n+\n+ private boolean postProcess(int element) {\n+ switch (element) {\n+ case 4:\n+ // Move S-MART files to parent directory\n+ File installDirectory = new File(installDirectoryName + File.separator + "S-Mart");\n+ for (File file: installDirectory.listFiles()) {\n+ File destinationFile = new File(file.getParentFile().getParentFile(), file.getName());\n+ if (! file.renameTo(destinationFile)) {\n+ publish(" !Cannot move \'" + file.getAbsolutePath() + "\' to \'" + destinationFile.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ if (! installDirectory.delete()) {\n+ publish(" !Cannot remove installation S-MART directory \'" + installDirectory.getAbsolutePath() + "\'!\\n");\n+ }\n+ }\n+ return true;\n+ }\n+\n+\n+ private boolean setEnvironmentVariables() {\n+ String[] command = {"REG", "ADD", "HKCU\\\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\\"" + installDirectoryName + "\\\\Python\\"", "/f"};\n+ return launch(command);\n+ }\n+}\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/build.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/build.sh Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,5 @@
+#! /bin/sh
+
+rm -rf SmartInstaller.jar
+javac *.java
+jar cvfm SmartInstaller.jar manifest.txt *.class

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/manifest.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Installer/manifest.txt Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Created-By: Matthias Zytnicki
+Main-Class: SmartInstaller

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Installer/s-mart.zip

Binary file SMART/Java/Installer/s-mart.zip has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/LICENSE.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/LICENSE.txt Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,506 @@\n+\n+CeCILL FREE SOFTWARE LICENSE AGREEMENT\n+\n+\n+ Notice\n+\n+This Agreement is a Free Software license agreement that is the result\n+of discussions between its authors in order to ensure compliance with\n+the two main principles guiding its drafting:\n+\n+ * firstly, compliance with the principles governing the distribution\n+ of Free Software: access to source code, broad rights granted to\n+ users,\n+ * secondly, the election of a governing law, French law, with which\n+ it is conformant, both as regards the law of torts and\n+ intellectual property law, and the protection that it offers to\n+ both authors and holders of the economic rights over software.\n+\n+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])\n+license are:\n+\n+Commissariat \xe0 l\'Energie Atomique - CEA, a public scientific, technical\n+and industrial research establishment, having its principal place of\n+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.\n+\n+Centre National de la Recherche Scientifique - CNRS, a public scientific\n+and technological establishment, having its principal place of business\n+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.\n+\n+Institut National de Recherche en Informatique et en Automatique -\n+INRIA, a public scientific and technological establishment, having its\n+principal place of business at Domaine de Voluceau, Rocquencourt, BP\n+105, 78153 Le Chesnay cedex, France.\n+\n+\n+ Preamble\n+\n+The purpose of this Free Software license agreement is to grant users\n+the right to modify and redistribute the software governed by this\n+license within the framework of an open source distribution model.\n+\n+The exercising of these rights is conditional upon certain obligations\n+for users so as to preserve this status for all subsequent redistributions.\n+\n+In consideration of access to the source code and the rights to copy,\n+modify and redistribute granted by the license, users are provided only\n+with a limited warranty and the software\'s author, the holder of the\n+economic rights, and the successive licensors only have limited liability.\n+\n+In this respect, the risks associated with loading, using, modifying\n+and/or developing or reproducing the software by the user are brought to\n+the user\'s attention, given its Free Software status, which may make it\n+complicated to use, with the result that its use is reserved for\n+developers and experienced professionals having in-depth computer\n+knowledge. Users are therefore encouraged to load and test the\n+suitability of the software as regards their requirements in conditions\n+enabling the security of their systems and/or data to be ensured and,\n+more generally, to use and operate it in the same conditions of\n+security. This Agreement may be freely reproduced and published,\n+provided it is not altered, and that no provisions are either added or\n+removed herefrom.\n+\n+This Agreement may apply to any or all software for which the holder of\n+the economic rights decides to submit the use thereof to its provisions.\n+\n+\n+ Article 1 - DEFINITIONS\n+\n+For the purpose of this Agreement, when the following expressions\n+commence with a capital letter, they shall have the following meaning:\n+\n+Agreement: means this license agreement, and its possible subsequent\n+versions and annexes.\n+\n+Software: means the software in its Object Code and/or Source Code form\n+and, where applicable, its documentation, "as is" when the Licensee\n+accepts the Agreement.\n+\n+Initial Software: means the Software in its Source Code and possibly its\n+Object Code form and, where applicable, its documentation, "as is" when\n+it is first distributed under the terms and conditions of the Agreement.\n+\n+Modified Software: means the Software modified by at least one\n+Contribution.\n+\n+Source Code: means all the Software\'s instructions and program lines to\n+which access is required so as to modify the Software.\n+\n+Object Code: means the binary files originating from the co'..b"a case-by-case basis between the relevant Licensor and the\n+Licensee pursuant to a memorandum of understanding. The Licensor\n+disclaims any and all liability as regards the Licensee's use of the\n+name of the Software. No warranty is given as regards the existence of\n+prior rights over the name of the Software or as regards the existence\n+of a trademark.\n+\n+\n+ Article 10 - TERMINATION\n+\n+10.1 In the event of a breach by the Licensee of its obligations\n+hereunder, the Licensor may automatically terminate this Agreement\n+thirty (30) days after notice has been sent to the Licensee and has\n+remained ineffective.\n+\n+10.2 A Licensee whose Agreement is terminated shall no longer be\n+authorized to use, modify or distribute the Software. However, any\n+licenses that it may have granted prior to termination of the Agreement\n+shall remain valid subject to their having been granted in compliance\n+with the terms and conditions hereof.\n+\n+\n+ Article 11 - MISCELLANEOUS\n+\n+\n+ 11.1 EXCUSABLE EVENTS\n+\n+Neither Party shall be liable for any or all delay, or failure to\n+perform the Agreement, that may be attributable to an event of force\n+majeure, an act of God or an outside cause, such as defective\n+functioning or interruptions of the electricity or telecommunications\n+networks, network paralysis following a virus attack, intervention by\n+government authorities, natural disasters, water damage, earthquakes,\n+fire, explosions, strikes and labor unrest, war, etc.\n+\n+11.2 Any failure by either Party, on one or more occasions, to invoke\n+one or more of the provisions hereof, shall under no circumstances be\n+interpreted as being a waiver by the interested Party of its right to\n+invoke said provision(s) subsequently.\n+\n+11.3 The Agreement cancels and replaces any or all previous agreements,\n+whether written or oral, between the Parties and having the same\n+purpose, and constitutes the entirety of the agreement between said\n+Parties concerning said purpose. No supplement or modification to the\n+terms and conditions hereof shall be effective as between the Parties\n+unless it is made in writing and signed by their duly authorized\n+representatives.\n+\n+11.4 In the event that one or more of the provisions hereof were to\n+conflict with a current or future applicable act or legislative text,\n+said act or legislative text shall prevail, and the Parties shall make\n+the necessary amendments so as to comply with said act or legislative\n+text. All other provisions shall remain effective. Similarly, invalidity\n+of a provision of the Agreement, for any reason whatsoever, shall not\n+cause the Agreement as a whole to be invalid.\n+\n+\n+ 11.5 LANGUAGE\n+\n+The Agreement is drafted in both French and English and both versions\n+are deemed authentic.\n+\n+\n+ Article 12 - NEW VERSIONS OF THE AGREEMENT\n+\n+12.1 Any person is authorized to duplicate and distribute copies of this\n+Agreement.\n+\n+12.2 So as to ensure coherence, the wording of this Agreement is\n+protected and may only be modified by the authors of the License, who\n+reserve the right to periodically publish updates or new versions of the\n+Agreement, each with a separate number. These subsequent versions may\n+address new issues encountered by Free Software.\n+\n+12.3 Any Software distributed under a given version of the Agreement may\n+only be subsequently distributed under the same version of the Agreement\n+or a subsequent version, subject to the provisions of Article 5.3.4.\n+\n+\n+ Article 13 - GOVERNING LAW AND JURISDICTION\n+\n+13.1 The Agreement is governed by French law. The Parties agree to\n+endeavor to seek an amicable solution to any disagreements or disputes\n+that may arise during the performance of the Agreement.\n+\n+13.2 Failing an amicable solution within two (2) months as from their\n+occurrence, and unless emergency proceedings are necessary, the\n+disagreements or disputes shall be referred to the Paris Courts having\n+jurisdiction, by the more diligent Party.\n+\n+\n+Version 2.0 dated 2006-09-05.\n"

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Program.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Program.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,175 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.awt.*;
+import javax.swing.*;
+
+
+public class Program {
+  String                 shortName;
+  String                 name;
+  String                 section;
+  String                 description;
+  Vector <ProgramOption> options;
+  JPanel                 panel;
+  JButton                button;
+
+
+  public Program() {
+    this.shortName = null;
+    this.name      = null;
+    this.options   = new Vector <ProgramOption> ();
+  }
+
+
+  public void setShortName(String shortName) {
+    this.shortName = shortName;
+  }
+
+
+  public void setName(String name) {
+    this.name = name;
+  }
+
+
+  public void setSection(String section) {
+    this.section = section;
+  }
+
+  public void setDescription(String description) {
+    this.description = description;
+  }
+
+
+  public void addOption(ProgramOption option) {
+    options.add(option);
+  }
+
+
+  public String getShortName() {
+    return this.shortName;
+  }
+
+
+  public String getName() {
+    return this.name;
+  }
+
+
+  public String getSection() {
+    return this.section;
+  }
+
+  public String getDescription() {
+    return this.description;
+  }
+
+
+  public String checkValues() {
+    for (int i = 0; i < options.size(); i++) {
+      String comment = options.get(i).checkValue();
+      if (comment != null) {
+        return comment;
+      }
+    }
+    return null;
+  }
+
+
+  public LinkedList<String> getCommand() {
+    LinkedList<String> parameterList = new LinkedList<String>();
+    parameterList.add(Global.pythonCommand);
+    parameterList.add("Python" + java.io.File.separator + this.shortName);
+    for (int i = 0; i < options.size(); i++) {
+      ProgramOption option = options.get(i);
+      parameterList.addAll(option.getCommand());
+    }
+    return parameterList;
+  }
+
+
+  public JPanel getPanel() {
+    if (this.panel != null) {
+      return this.panel;
+    }
+
+    this.panel = new JPanel(false);
+    this.panel.setLayout(new FlowLayout());
+    Box box = Box.createVerticalBox();
+
+    JPanel descriptionPanel = new JPanel(false);
+    JLabel descriptionLabel = new JLabel(this.description);
+    descriptionPanel.add(descriptionLabel);
+    box.add(descriptionPanel);
+
+    for (int i = 0; i < options.size(); i++) {
+      ProgramOption option = options.get(i);
+      JPanel        panel  = option.getPanel();
+      if (panel == null) {
+        System.out.println("Problem with Python program '" + this.shortName + "'.");
+        return null;
+      }
+      box.add(option.getPanel());
+    }
+
+    JPanel buttonPanel = new JPanel(false);
+    this.button = new JButton("GO!");
+
+    buttonPanel.add(button);
+
+    box.add(buttonPanel);
+
+    this.panel.add(box);
+
+    return this.panel;
+  }
+
+
+  public JButton getButton() {
+    if (this.button == null) {
+      this.getPanel();
+    }
+    return this.button;
+  }
+
+
+  public Vector < File > getOutputFiles() {
+    Vector < File > files = new Vector < File > ();
+    for (int i = 0; i < options.size(); i++) {
+      ProgramOption option = options.get(i);
+      if (! option.isInput()) {
+        files.add(option.getOutputFile());
+      }
+    }
+    return files;
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/ProgramFileReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/ProgramFileReader.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,174 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.File;
+import java.io.*;
+
+
+public class ProgramFileReader {
+  String fileName;
+  Vector <Program> programs;
+
+
+  public ProgramFileReader(String fileName) {
+    this.fileName = fileName;
+    this.programs = new Vector <Program> ();
+  }
+
+
+  public boolean read() {
+//  File    file    = new File(this.fileName);
+//  Program program = null;
+//  int     step    = 0;
+//  TreeMap <String, ProgramOption> options = new TreeMap <String, ProgramOption> ();
+
+//  try {
+//    BufferedReader reader = new BufferedReader(new FileReader(file));
+//    String line    = null;
+//    String section = null;
+
+//    while ((line = reader.readLine()) != null) {
+
+//      line = line.trim();
+
+//      if (line.length() == 0) {
+//        if (program != null) {
+//          programs.add(program);
+//        }
+//        program = null;
+//        step = 0;
+//        continue;
+//      }
+
+//      if ((line.charAt(0) == '[') && (line.charAt(line.length() - 1) == ']')) {
+//        section = line.substring(1, line.length() - 1).trim();
+//        continue;
+//      }
+//      switch (step) {
+//        case 0:
+//        program = new Program();
+//          program.setName(line);
+//          if (section == null) {
+//            System.out.println("Error! Section of program '" + line + "' is not set!");
+//          }
+//          program.setSection(section);
+//          step = 1;
+//          break;
+//        case 1:
+//          program.setShortName(line);
+//          step = 2;
+//          break;
+//        case 2:
+//          ProgramOption option = new ProgramOption();
+
+//          String[] elements    = line.split(":");
+//          boolean  input       = elements[0].trim().equalsIgnoreCase("input")? true: false;
+//          String[] subElements = elements[1].split(";");
+//          String   identifier = subElements[0].trim();
+
+//          option.setInput(input);
+
+//          if (input) {
+
+//            if (subElements.length < 4) {
+//              System.out.println("Line '" + line + "' is weird...");
+//            }
+
+//            String   type       = subElements[1].trim();
+//            String   comment    = subElements[2].trim();
+//            boolean  compulsory = subElements[3].trim().equalsIgnoreCase("0")? false: true;
+
+//            option.setIdentifier(identifier);
+//            option.setType(type);
+//            option.setComment(comment);
+//            option.setCompulsory(compulsory);
+
+//            if ("file".compareToIgnoreCase(type) == 0) {
+//              if (subElements.length < 5) {
+//                System.out.println("Line '" + line + "' is weird...");
+//              }
+
+//              String formatIdentifier = subElements[4].trim();
+//              option.setFormatIdentifier(formatIdentifier);
+//            }
+//            else if ("choice".compareToIgnoreCase(type) == 0) {
+//              if (subElements.length < 5) {
+//                System.out.println("Line '" + line + "' is weird...");
+//              }
+
+//              String[] choices = subElements[4].trim().split(",");
+//              for (int i = 0; i < choices.length; i++) {
+//                choices[i] = choices[i].trim();
+//              }
+//              option.setChoices(choices);
+//            }
+//            options.put(identifier, option);
+//          }
+//          else {
+//            String format = subElements[1].trim();
+
+//            option.setFormat(format);
+//            option.setAssociatedOption(options.get(identifier));
+//          }
+
+//          program.addOption(option);
+
+//          break;
+//        default:
+//          return false;
+//      }
+//    }
+
+//    reader.close();
+//  }
+//  catch (FileNotFoundException e) {
+//    return false;
+//  }
+//  catch (IOException e) {
+//    return false;
+//  }
+
+//  if (program != null) {
+//    programs.add(program);
+//  }
+
+    return true;
+  }
+
+  public int getNbPrograms() {
+    return programs.size();
+  }
+
+  public Program getProgram(int i) {
+    return programs.get(i);
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/ProgramLauncher.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/ProgramLauncher.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,209 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.*;
+import javax.swing.SwingUtilities;
+import javax.swing.*;
+import java.util.concurrent.CountDownLatch;
+
+public class ProgramLauncher extends SwingWorker<Boolean, String>  {
+
+  String[]     command;
+  JTextArea    logArea;
+  JLabel       messageField;
+  JProgressBar progressBar;
+  JLabel       etaField;
+  int          exitValue;
+  CountDownLatch latch;
+
+
+
+  public ProgramLauncher (LinkedList <String> c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = new String[c.size()];
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+    c.toArray(command);
+    latch = new CountDownLatch(1);
+  }
+
+
+  public ProgramLauncher (String[] c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = c;
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+    latch = new CountDownLatch(1);
+  }
+
+
+  @Override
+  public Boolean doInBackground() {
+    ProcessBuilder pb           = new ProcessBuilder(command);
+    Process process             = null;
+    BufferedReader outputReader = null;
+    pb                          = pb.redirectErrorStream(true);
+    Map<String, String> env     = pb.environment();
+    env.put("PYTHONPATH", System.getProperty("user.dir"));
+    env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "SMART" + java.io.File.separator + "Java" + java.io.File.separator + "Python");
+    env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+    env.put("SMARTRPATH", Global.rCommand);
+    String commandJoined = Arrays.toString(command);
+
+    try {
+      publish("=== Starting command '" + commandJoined.trim() + "' ===\n");
+      process = pb.start();
+
+      BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());
+      InputStream is                   = process.getInputStream();
+      InputStreamReader isr            = new InputStreamReader(is);
+      outputReader                     = new BufferedReader(isr);
+    }
+    catch (Exception exception) {
+      publish("!Process cannot be started (command is '" + commandJoined + "')!\n");
+      exception.printStackTrace();
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    if (outputReader == null) {
+      publish("!Problem in the output of the command!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    else {
+      try {
+        String line;
+        while ((line = outputReader.readLine()) != null) {
+          publish(line + "\n");
+        }
+      }
+      catch (IOException e) {
+        e.printStackTrace();
+        publish("!Cannot get the output of the command!\n");
+        latch.countDown();
+        return Boolean.FALSE;
+      }
+    }
+    try {
+      process.waitFor();
+    }
+    catch (InterruptedException e) {
+      e.printStackTrace();
+      publish("!Cannot wait for the end of the command!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    try {
+      exitValue = process.exitValue();
+    }
+    catch (IllegalThreadStateException e) {
+      e.printStackTrace();
+      publish("!Cannot get the exit value of the command!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    if (exitValue != 0) {
+      publish("!Problem during the execution of the command '" + commandJoined + "'!\n");
+      latch.countDown();
+      return Boolean.FALSE;
+    }
+    publish("=== Ending command '" + commandJoined.trim() + "' ===\n");
+    latch.countDown();
+    return Boolean.TRUE;
+  }
+
+
+  @Override
+  protected void process(List<String> chunks) {
+    String message = "";
+    String text    = logArea.getText();
+    for (String chunk: chunks) {
+      text += chunk;
+    }
+    for (String lineSeparatedByCarriageReturn: text.split("\n")) {
+      for (String line: lineSeparatedByCarriageReturn.split("\r")) {
+        boolean progressLine = false;
+        if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*")) {
+          String[] ratioElements = line.split("\\]")[1].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("");
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*ETA:\\s*.*")) {
+          String[] ratioElements = line.split("\\]")[1].split("E")[0].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          String   eta           = line.split("ETA:")[1].trim();
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("ETA: " + eta);
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*\\s*completed in.*")) {
+          String nbElements = line.split("\\]")[1].split("completed")[0].trim();
+          String timeSpent  = line.split("completed in")[1].trim();
+          message          += line.split("\\[")[0].trim() + ": " + nbElements + " elements completed in " + timeSpent + "\n";
+          messageField.setText(line.split("\\[")[0].trim());
+          progressLine = true;
+        }
+        if (! progressLine) {
+          message += line + "\n";
+        }
+      }
+    }
+    String lines[]     = message.split("\n");
+    String toBeWritten = "";
+    for (int i = Math.max(0, lines.length - Global.logAreaSize); i < lines.length; i++) {
+      toBeWritten += lines[i] + "\n";
+    }
+    logArea.setText(toBeWritten);
+  }
+
+  public int getExitValue() {
+    try {
+      latch.await();
+    }
+    catch (InterruptedException e) {
+      logArea.append("Cannot wait for the end of the process!\n");
+      e.printStackTrace();
+      return -1;
+    }
+    return exitValue;
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/ProgramOption.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/ProgramOption.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,358 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+\n+\n+public class ProgramOption {\n+ boolean input;\n+ String identifier;\n+ String type;\n+ String comment;\n+ boolean compulsory;\n+ String[] format;\n+ String formatIdentifier;\n+ ProgramOption associatedOption;\n+ String defaultValue;\n+ String[] choices;\n+ JComponent component;\n+ JPanel panel;\n+\n+\n+ public ProgramOption() {\n+ this.input = true;\n+ this.identifier = null;\n+ this.type = null;\n+ this.comment = null;\n+ this.compulsory = false;\n+ this.format = null;\n+ this.formatIdentifier = null;\n+ this.associatedOption = null;\n+ this.defaultValue = "";\n+ this.choices = null;\n+ this.component = null;\n+ this.panel = null;\n+ }\n+\n+\n+ public void setInput(boolean input) {\n+ this.input = input;\n+ }\n+\n+\n+ public void setIdentifier(String identifier) {\n+ this.identifier = identifier;\n+ }\n+\n+\n+ public void setType(String type) {\n+ this.type = type;\n+ }\n+\n+\n+ public void setComment(String comment) {\n+ this.comment = comment;\n+ }\n+\n+\n+ public void setCompulsory(boolean compulsory) {\n+ this.compulsory = compulsory;\n+ }\n+\n+\n+ public void setFormat(String[] format) {\n+ this.format = format;\n+ }\n+\n+\n+ public void setFormat(String format) {\n+ this.format = new String[1];\n+ this.format[0] = format;\n+ }\n+\n+\n+ public void setFormatIdentifier(String formatIdentifier) {\n+ this.formatIdentifier = formatIdentifier;\n+ }\n+\n+\n+ public void setAssociatedOption(ProgramOption option) {\n+ this.associatedOption = option;\n+ }\n+\n+\n+ public void setChoices(String[] choices) {\n+ this.choices = new String[choices.length+1];\n+ this.choices[0] = "---";\n+ for (int i = 0; i < choices.length; i++) {\n+ this.choices[i+1] = choices[i];\n+ }\n+ }\n+\n+\n+ public void setDefault(String defaultValue) {\n+ this.defaultValue = defaultValue;\n+ }\n+\n+\n+ public boolean isInput() {\n+ return this.input;\n+ }\n+\n+\n+ public boolean checkSettings() {\n+ if (this.identifier == null) {\n+ return false;\n+ }\n+ if (this.type == nul'..b'{\n+ this.component = new JComboBox(this.choices);\n+ label.setLabelFor(this.component);\n+ this.panel.add(label);\n+ this.panel.add(this.component);\n+ }\n+ else {\n+ System.out.println("Do not know how to read type " + this.type);\n+ }\n+\n+ return this.panel;\n+ }\n+\n+\n+ public JComponent getComponent() {\n+ if (component == null) {\n+ this.getPanel();\n+ }\n+ return this.component;\n+ }\n+\n+\n+ private String getValue() {\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("directory".equals(this.type)) || ("files".equals(this.type))) {\n+ String s = ((JTextField) this.component).getText();\n+ if ("None".equals(s)) {\n+ return "";\n+ }\n+ return s;\n+ }\n+ if ("file".equals(this.type)) {\n+ return (String) ((JComboBox) this.component).getSelectedItem();\n+ }\n+ if ("boolean".equals(this.type)) {\n+ return ((JCheckBox) this.component).isSelected()? "true": "false";\n+ }\n+ if ("format".equals(this.type)) {\n+ return (String) ((JComboBox) this.component).getSelectedItem();\n+ }\n+ if ("choice".equals(this.type)) {\n+ String s = (String) ((JComboBox) this.component).getSelectedItem();\n+ if ("---".equals(s)) {\n+ return "";\n+ }\n+ return s;\n+ }\n+ System.out.println("Do not know how to get value of \'" + this.type + "\' (" + this.identifier + ").");\n+ return null;\n+ }\n+\n+\n+ public String checkValue() {\n+ String value = this.getValue();\n+ if ((this.compulsory) && ((value == null) || ("".equals(value)))) {\n+ return "Option \'" + this.comment + "\' has no value... Please specify it.\\n";\n+ }\n+ if ("int".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value)) && (! "None".equals(value))) {\n+ try {\n+ int i = Integer.parseInt(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be an integer... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ else if ("float".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value))) {\n+ try {\n+ float i = Float.parseFloat(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be a float... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ return null;\n+ }\n+\n+\n+ public LinkedList <String> getCommand() {\n+ LinkedList <String> list = new LinkedList <String> ();\n+\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("format".equals(this.type)) || ("directory".equals(this.type)) || ("files".equals(this.type)) || ("choice".equals(this.type))) {\n+ String value = this.getValue();\n+ if (value.length() == 0) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(value);\n+ return list;\n+ }\n+ if ("file".equals(this.type)) {\n+ String fileName = (String) ((JComboBox) this.component).getSelectedItem();\n+ if (fileName == null) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(this.getValue());\n+ return list;\n+ }\n+ if (("boolean".equals(this.type)) || ("bool".equals(this.type))) {\n+ if ("true".equals(this.getValue())) {\n+ list.add(this.identifier);\n+ }\n+ return list;\n+ }\n+ System.out.println("Cannot get type of option " + this.type + " (" + this.identifier + "): " + this.getValue());\n+ return null;\n+ }\n+\n+\n+ public File getOutputFile() {\n+ if (this.input) return null;\n+ String format = "";\n+ if (this.format != null) {\n+ format = this.format[0];\n+ }\n+ if (this.associatedOption != null) {\n+ format = this.associatedOption.getValue();\n+ }\n+ return new File(this.getValue(), Global.formats.getFormatType(format), format);\n+ }\n+}\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/.gitignore Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,1 @@
+/CleanTranscriptFile.py

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/CleanTranscriptFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CleanTranscriptFile.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,74 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from SMART.Java.Python.cleaning.CleanerChooser import CleanerChooser
+
+
+class CleanTranscriptFile(object):
+
+ def __init__(self, verbosity):
+ self.verbosity = verbosity
+ self.chooser   = CleanerChooser(self.verbosity)
+
+ def setInputFile(self, fileName, format):
+ self.chooser.findFormat(format)
+ self.cleaner = self.chooser.getCleaner()
+ self.cleaner.setInputFileName(fileName)
+
+ def setOutputFile(self, fileName):
+ self.cleaner.setOutputFileName(fileName)
+
+ def setAcceptedTypes(self, types):
+ if types != None:
+ self.cleaner.setAcceptedTypes(types)
+
+ def run(self):
+ self.cleaner.clean()
+
+
+if __name__ == "__main__":
+
+ description = "Clean Transcript File v1.0.1: Clean a transcript file so that it is useable for S-MART. [Category: Other]"
+
+ parser = OptionParser(description = description)
+ parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+ parser.add_option("-f", "--format",      dest="format",         action="store",                     type="string", help="format of previous file [compulsory] [format: transcript file format]")
+ parser.add_option("-t", "--types",       dest="acceptedTypes",  action="store",      default=None,  type="string", help="name of the types you want to keep in GFF/GTF (list separated by commas) [format: string] [default: None]")
+ parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [format: output file in GFF3 format]")
+ parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+ (options, args) = parser.parse_args()
+
+ ctf = CleanTranscriptFile(options.verbosity)
+ ctf.setInputFile(options.inputFileName, options.format)
+ ctf.setOutputFile(options.outputFileName)
+ ctf.setAcceptedTypes(None if options.acceptedTypes == None else options.acceptedTypes.split(","))
+ ctf.run()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ClusterizeByTags.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ClusterizeByTags.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,157 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2011\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import random\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n+\n+\n+OPERATIONS = ("diff", "div")\n+BOOLTOSTRANDS = {True: [0], False: [-1, 1]}\n+\n+class ClusterizeByTags(object):\n+\n+ def __init__(self, verbosity):\n+ self.verbosity = verbosity\n+ self.connection = MySqlConnection(self.verbosity-1)\n+ self.defautValue = None\n+ self.maxDistance = None\n+ self.oneStrand = False\n+\n+ def setInputFile(self, fileName, format):\n+ chooser = ParserChooser(self.verbosity)\n+ chooser.findFormat(format)\n+ parser = chooser.getParser(fileName)\n+ writer = MySqlTranscriptWriter(self.connection, None, self.verbosity)\n+ writer.addTranscriptList(parser)\n+ writer.write()\n+ self.transcriptTables = writer.getTables()\n+\n+ def setOutputFile(self, fileName):\n+ self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+\n+ def setTag(self, tagName, defaultValue):\n+ self.tagName = tagName\n+ self.defaultValue = defaultValue\n+\n+ def setThreshold(self, threshold):\n+ self.threshold = threshold\n+\n+ def setOperation(self, operation):\n+ self.operation = operation\n+ if self.operation not in OPERATIONS:\n+ raise Exception("Operation \'%s\' unsupported: choose among %s" % (self.operation, ", ".join(OPERATIONS)))\n+\n+ def setMaxDistance(self, distance):\n+ self.maxDistance = distance\n+\n+ def setOneStrand(self, oneStrand):\n+ self.oneStrand = oneStrand\n+\n+ def run(self):\n+ for chromosome in sorted(self.transcriptTables.keys()):\n+ progress = Progress(self.transcriptTables[chromosome].getNbElements(), "Analyzing %s" % (chromosome), self.verbosity)\n+ for strand in BOOLTOSTRANDS[self.oneStrand]:\n+ previousValue = None\n+ previousTrend = None\n+ previousTranscript = None\n+ sumValue = 0\n+ command = "SELECT * FROM %s" % (self.tran'..b' trend = value / previousValue\n+ if previousTranscript == None:\n+ sumValue = value\n+ elif (previousTrend == None or abs(trend - previousTrend) <= self.threshold) and (self.maxDistance == None or previousTranscript.getDistance(transcript) <= self.maxDistance) and (previousTranscript.getDirection() == transcript.getDirection() or not self.oneStrand):\n+ if previousTranscript.getDirection() != transcript.getDirection():\n+ transcript.reverse()\n+ previousTranscript.merge(transcript)\n+ transcript = previousTranscript\n+ sumValue += value\n+ previousTrend = trend\n+ else:\n+ previousTranscript.setTagValue(self.tagName, sumValue)\n+ self.writer.addTranscript(previousTranscript)\n+ sumValue = value\n+ previousTrend = None\n+ previousValue = value\n+ previousTranscript = transcript\n+ progress.inc()\n+ if previousTranscript != None:\n+ previousTranscript.setTagValue(self.tagName, sumValue)\n+ self.writer.addTranscript(previousTranscript)\n+ progress.done()\n+ self.writer.close()\n+\n+\n+if __name__ == "__main__":\n+ \n+ description = "Clusterize By Tags v1.0.1: Clusterize a set of element using their tag values. [Category: Merge]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+ parser.add_option("-t", "--tag", dest="tagName", action="store", type="string", help="name of the tag [format: string] [compulsory]")\n+ parser.add_option("-e", "--default", dest="defaultValue", action="store", default=None, type="int", help="default value for the tag [format: string]")\n+ parser.add_option("-r", "--threshold", dest="threshold", action="store", type="int", help="threshold between two consecutive tags [format: int] [compulsory]")\n+ parser.add_option("-p", "--operation", dest="operation", action="store", type="string", help="operation to apply between 2 different clusters to compare them [format: choice (diff, div)] [compulsory]")\n+ parser.add_option("-d", "--distance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance for 2 clusters to be merged [format: int] [default: None]")\n+ parser.add_option("-1", "--oneStrand", dest="oneStrand", action="store_true", default=False, help="also cluster the elements which are on different strands [format: bool] [default: False]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ cbt = ClusterizeByTags(options.verbosity)\n+ cbt.setInputFile(options.inputFileName, options.format)\n+ cbt.setOutputFile(options.outputFileName)\n+ cbt.setTag(option.tagName, option.defaultValue)\n+ cbt.setThreshold(option.threshold)\n+ cbt.setOperation(option.operation)\n+ cbt.setMaxDistance(operation.maxDistance)\n+ cbt.setOneStrand(operation.oneStrand)\n+ cbt.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/CollapseReads.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CollapseReads.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,174 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os\n+from optparse import OptionParser, OptionGroup\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle\n+from SMART.Java.Python.ncList.FileSorter import FileSorter\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+\n+class CollapseReads(object):\n+ """\n+ Merge two reads if they have exactly the same genomic coordinates\n+ """\n+\n+ def __init__(self, verbosity = 0):\n+ self.verbosity = verbosity\n+ self.inputReader = None\n+ self.outputWriter = None\n+ self.strands = True\n+ self.nbRead = 0\n+ self.nbWritten = 0\n+ self.nbMerges = 0\n+ self.splittedFileNames = {}\n+\n+ def __del__(self):\n+ for fileName in self.splittedFileNames.values():\n+ os.remove(fileName)\n+ \n+ def close(self):\n+ self.outputWriter.close()\n+ \n+ def setInputFile(self, fileName, format):\n+ parserChooser = ParserChooser(self.verbosity)\n+ parserChooser.findFormat(format, "transcript")\n+ self.parser = parserChooser.getParser(fileName)\n+ self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])\n+\n+ def setOutputFile(self, fileName):\n+ self.outputWriter = Gff3Writer(fileName, self.verbosity)\n+\n+ def getNbElements(self):\n+ return self.parser.getNbTranscripts()\n+\n+ def _sortFile(self):\n+ fs = FileSorter(self.parser, self.verbosity-4)\n+ fs.perChromosome(True)\n+ fs.setOutputFileName(self.sortedFileName)\n+ fs.sort()\n+ self.splittedFileNames = fs.getOutputFileNames()\n+ self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()\n+ self.nbRead = fs.getNbElements()\n+ \n+ def _iterate(self, chromosome):\n+ progress = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity)\n+ transcripts = []\n+ parser = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)\n+ for newTranscript in parser.getIterator():\n+ newTranscripts = []\n+ for oldTranscript in transcripts:\n+ if self._checkOverlap(newTranscript, oldTranscr'..b'pt2):\n+ self.nbMerges += 1\n+ transcript2.setDirection(transcript1.getDirection())\n+ transcript1.merge(transcript2)\n+\n+ def _write(self, transcript):\n+ self.nbWritten += 1\n+ self.outputWriter.addTranscript(transcript)\n+\n+ def _checkOverlap(self, transcript1, transcript2):\n+ if transcript1.getStart() != transcript2.getStart() or transcript1.getEnd() != transcript2.getEnd():\n+ return False\n+ return (not self.strands or transcript1.getDirection() == transcript2.getDirection())\n+\n+ def _checkPassed(self, transcript1, transcript2):\n+ return (transcript2.getStart() < transcript1.getStart())\n+\n+ def collapseChromosome(self, chromosome):\n+ progress = Progress(table.getNbElements(), "Analysing chromosome %s" % (chromosome), self.verbosity)\n+ command = "SELECT * FROM %s ORDER BY start ASC, end DESC" % (table.name)\n+ transcriptStart = None\n+ transcriptEnd = None\n+ transcriptDirection = None\n+ currentTranscript = None\n+ if self.strands:\n+ command += ", direction"\n+ for index, transcript in table.selectTranscripts(command, True):\n+ self.nbRead += 1\n+ if not self.strands:\n+ transcript.setDirection("+")\n+ if transcriptStart != transcript.getStart() or transcriptEnd != transcript.getEnd() or transcriptDirection != transcript.getDirection():\n+ self.writeTranscript(currentTranscript)\n+ transcriptStart = transcript.getStart()\n+ transcriptEnd = transcript.getEnd()\n+ transcriptDirection = transcript.getDirection()\n+ currentTranscript = transcript\n+ else:\n+ currentTranscript.setTagValue("nbElements", (currentTranscript.getTagValue("nbElements") + 1) if "nbElements" in currentTranscript.getTagNames() else 1)\n+ progress.inc()\n+ self.writeTranscript(currentTranscript)\n+ progress.done()\n+\n+ def collapse(self):\n+ self._sortFile()\n+ for chromosome in sorted(self.nbElementsPerChromosome.keys()):\n+ self._iterate(chromosome)\n+ self.outputWriter.close()\n+ if self.verbosity > 1:\n+ print "# reads read: %d" % (self.nbRead)\n+ print "# reads written: %d (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbRead * 100)\n+ print "# reads merges: %d" % (self.nbMerges)\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Collapse Reads v1.0.3: Merge two reads if they have exactly the same genomic coordinates. [Category: Merge]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in mapping format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the file [compulsory] [format: mapping file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+ parser.add_option("-s", "--strands", dest="strands", action="store_true", default=False, help="merge elements on 2 different strands [format: bool] [default: false]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ collapser = CollapseReads(options.verbosity)\n+ collapser.setInputFile(options.inputFileName, options.format)\n+ collapser.setOutputFile(options.outputFileName)\n+ collapser.strands = not options.strands\n+ collapser.collapse()\n+ collapser.close()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/CombineTags.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CombineTags.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,115 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+import random
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+OPERATIONS = ("plus", "minus", "times", "div")
+
+class CombineTags(object):
+
+    def __init__(self, verbosity = 0):
+        self.verbosity       = verbosity
+
+    def setInputFile(self, fileName, format):
+        self.inputFileName = fileName
+        parserChooser = ParserChooser(self.verbosity)
+        parserChooser.findFormat(format, "transcript")
+        self.parser = parserChooser.getParser(fileName)
+
+    def setOutputFile(self, fileName):
+        self.outputWriter = Gff3Writer(fileName, self.verbosity)
+
+    def setTags(self, tag1, tag2, outputTag, defaultValue = None):
+        self.tag1         = tag1
+        self.tag2         = tag2
+        self.outputTag    = outputTag
+        self.defaultValue = defaultValue
+
+    def setOperation(self, operation):
+        self.operation = operation
+        if self.operation not in OPERATIONS:
+            raise Exception("Do no handle operation %s, only: %s" % (self.operation, ", ".join(OPERATIONS)))
+
+    def run(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Printing transcripts %s" % (self.inputFileName), self.verbosity)
+        for transcript in self.parser.getIterator():
+            tag1 = transcript.getTagValue(self.tag1)
+            tag2 = transcript.getTagValue(self.tag2)
+            if tag1 == None or tag2 == None:
+                if self.defaultValue == None:
+                    raise Exception("Transcript %s misses one of the tags %s and %s, and has no default value !" % (transcript, self.tag1, self.tag2))
+                newTag = self.defaultValue
+            else:
+                tag1, tag2 = float(tag1), float(tag2)
+                if self.operation == "plus":
+                    newTag = tag1 + tag2
+                elif self.operation == "minus":
+                    newTag = tag1 - tag2
+                elif self.operation == "times":
+                    newTag = tag1 * tag2
+                elif self.operation == "div":
+                    newTag = tag1 / tag2
+            transcript.setTagValue(self.outputTag, newTag)
+            self.outputWriter.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+        self.parser.close()
+        self.outputWriter.close()
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Change Tag Name v1.0.1: Change the name of tag of a list of transcripts. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",               type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-t", "--tag1",        dest="tag1",           action="store",               type="string", help="name of the first tag [compulsory] [format: string]")
+    parser.add_option("-T", "--tag2",        dest="tag2",           action="store",               type="string", help="name of the second tag [compulsory] [format: string]")
+    parser.add_option("-d", "--default",     dest="defaultValue",   action="store", default=None, type="string", help="default value when one of the tag is absent [compulsory] [format: float]")
+    parser.add_option("-n", "--new",         dest="newTag",         action="store",               type="string", help="name of the new tag [compulsory] [format: string]")
+    parser.add_option("-p", "--operation",   dest="operation",      action="store",               type="string", help="operation combining the tags [compulsory] [format: choice (plus, minus, times, div)]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int] [default: 1]")
+    (options, args) = parser.parse_args()
+
+    combiner = CombineTags(options.verbosity)
+    combiner.setInputFile(options.inputFileName, options.inputFormat)
+    combiner.setOutputFile("%s.gff3" % (options.outputFileName))
+    combiner.setTags(options.tag1, options.tag2, options.newTag, options.defaultValue)
+    combiner.setOperation(options.operation)
+    combiner.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/CompareOverlapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CompareOverlapping.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,491 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os, struct, time, random\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.ncList.NCList import NCList\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n+from SMART.Java.Python.ncList.NCListHandler import NCListHandler\n+from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+from SMART.Java.Python.misc import Utils\n+try:\n+\timport cPickle as pickle\n+except:\n+\timport pickle\n+\n+REFERENCE = 0\n+QUERY = 1\n+TYPES = (REFERENCE, QUERY)\n+TYPETOSTRING = {0: "reference", 1: "query"}\n+\n+class CompareOverlapping(object):\n+\n+\tdef __init__(self, verbosity = 1):\n+\t\tself._outputFileName\t\t = "outputOverlaps.gff3"\n+\t\tself._iWriter\t\t\t\t = None\n+\t\tself._nbOverlappingQueries\t = 0\n+\t\tself._nbOverlaps\t\t\t = 0\n+\t\tself._nbLines\t\t\t\t = {REFERENCE: 0, QUERY: 0}\n+\t\tself._verbosity\t\t\t\t = verbosity\n+\t\tself._ncLists\t\t\t\t = {}\n+\t\tself._cursors\t\t\t\t = {}\n+\t\tself._splittedFileNames\t\t = {}\n+\t\tself._nbElements\t\t\t = {}\n+\t\tself._nbElementsPerChromosome = {}\n+\t\tself._inputFileNames\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._inputFileFormats\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._starts\t\t\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._ends\t\t\t\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._fivePrimes\t\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._threePrimes\t\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._ncListHandlers\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._convertedFileNames\t = {REFERENCE: False, QUERY: False}\n+\t\tself._sorted = False\n+\t\tself._index = False\n+\t\tself._introns\t\t\t\t = False\n+\t\tself._antisense\t\t\t\t = False\n+\t\tself._colinear\t\t\t\t = False\n+\t\tself._invert\t\t\t\t = False\n+\t\tself._distance\t\t\t\t = 0\n+\t\tself._minOverlap\t\t\t = 1\n+\t\tself._pcOverlap\t\t\t\t = None\n+\t\tself._included\t\t\t\t = False\n+\t\tself._including\t\t\t\t = False\n+\t\tself._outputNotOverlapping\t = False\n+\t\tself._tmpRefFileName\t\t = None\n+\t\tself._currentQueryTranscript = None\n+\t\tself._currentOrQueryTranscript = None\n+\t'..b'in file 1 (do not use it with -S) [format: int]")\n+\tparser.add_option("-u", "--end2",\t\t\t dest="end2",\t\t action="store",\t default=None, type="int",\thelp="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")\n+\tparser.add_option("-t", "--intron",\t\t dest="introns",\t\t action="store_true", default=False,\t\t\t\thelp="also report introns [format: bool] [default: false]")\n+\tparser.add_option("-E", "--5primeExtension1", dest="fivePrime1",\t action="store",\t default=None, type="int",\thelp="extension towards 5\' in file 1 [format: int]")\n+\tparser.add_option("-e", "--5primeExtension2", dest="fivePrime2",\t action="store",\t default=None, type="int",\thelp="extension towards 5\' in file 2 [format: int]")\n+\tparser.add_option("-N", "--3primeExtension1", dest="threePrime1",\t action="store",\t default=None, type="int",\thelp="extension towards 3\' in file 1 [format: int]")\n+\tparser.add_option("-n", "--3primeExtension2", dest="threePrime2",\t action="store",\t default=None, type="int",\thelp="extension towards 3\' in file 2 [format: int]")\n+\tparser.add_option("-c", "--colinear",\t\t dest="colinear",\t\t action="store_true", default=False,\t\t\t\thelp="colinear only [format: bool] [default: false]")\n+\tparser.add_option("-a", "--antisense",\t\t dest="antisense",\t\t action="store_true", default=False,\t\t\t\thelp="antisense only [format: bool] [default: false]")\n+\tparser.add_option("-d", "--distance",\t\t dest="distance",\t action="store",\t default=0,\t type="int",\thelp="accept some distance between query and reference [format: int]")\n+\tparser.add_option("-k", "--included",\t\t dest="included",\t action="store_true", default=False,\t\t\t\thelp="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")\n+\tparser.add_option("-K", "--including",\t\t dest="including",\t action="store_true", default=False,\t\t\t\thelp="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")\n+\tparser.add_option("-m", "--minOverlap",\t\t dest="minOverlap",\t action="store",\t default=1,\t type="int",\thelp="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")\n+\tparser.add_option("-p", "--pcOverlap",\t\t dest="pcOverlap",\t action="store",\t default=None, type="int",\thelp="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")\n+\tparser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,\t\t\t\thelp="also output not overlapping data [format: bool] [default: false]")\n+\tparser.add_option("-x", "--exclude",\t\t dest="exclude",\t\t action="store_true", default=False,\t\t\t\thelp="invert the match [format: bool] [default: false]")\n+\tparser.add_option("-v", "--verbosity",\t\t dest="verbosity",\t\t action="store",\t default=1,\t type="int",\thelp="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n+\n+\tco = CompareOverlapping(options.verbosity)\n+\tco.setInput(options.inputFileName1, options.format1, QUERY)\n+\tco.setInput(options.inputFileName2, options.format2, REFERENCE)\n+\tco.setOutput(options.output)\n+\tco.setSorted(options.sorted)\n+\tco.setIndex(options.index)\n+\tco.restrictToStart(options.start1, QUERY)\n+\tco.restrictToStart(options.start2, REFERENCE)\n+\tco.restrictToEnd(options.end1, QUERY)\n+\tco.restrictToEnd(options.end2, REFERENCE)\n+\tco.extendFivePrime(options.fivePrime1, QUERY)\n+\tco.extendFivePrime(options.fivePrime2, REFERENCE)\n+\tco.extendThreePrime(options.threePrime1, QUERY)\n+\tco.extendThreePrime(options.threePrime2, REFERENCE)\n+\tco.acceptIntrons(options.introns)\n+\tco.getAntisenseOnly(options.antisense)\n+\tco.getColinearOnly(options.colinear)\n+\tco.getInvert(options.exclude)\n+\tco.setMaxDistance(options.distance)\n+\tco.setMinOverlap(options.minOverlap)\n+\tco.setPcOverlap(options.pcOverlap)\n+\tco.setIncludedOnly(options.included)\n+\tco.setIncludingOnly(options.including)\n+\tco.includeNotOverlapping(options.notOverlapping)\n+\tco.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/CompareOverlappingSmallQuery.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CompareOverlappingSmallQuery.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,226 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2011\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Mapping import Mapping\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+\n+MINBIN = 3\n+MAXBIN = 7\n+REFERENCE = 0\n+QUERY = 1\n+\n+def getBin(start, end):\n+\tfor i in range(MINBIN, MAXBIN + 1):\n+\t\tbinLevel = 10 ** i\n+\t\tif int(start / binLevel) == int(end / binLevel):\n+\t\t\treturn int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))\n+\treturn int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n+\n+def getOverlappingBins(start, end):\n+\tarray\t= []\n+\tbigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n+\tfor i in range(MINBIN, MAXBIN + 1):\n+\t\tbinLevel = 10 ** i\n+\t\tarray.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))\n+\tarray.append((bigBin, bigBin))\n+\treturn array\n+\n+\n+class CompareOverlappingSmallQuery(object):\n+\n+\tdef __init__(self, verbosity):\n+\t\tself.verbosity = verbosity\n+\t\tself.tableNames = {}\n+\t\tself.nbQueries = 0\n+\t\tself.nbRefs\t = 0\n+\t\tself.nbWritten = 0\n+\t\tself.nbOverlaps = 0\n+\t\tself.distance = None\n+\t\tself.invert = False\n+\t\tself.antisense = False\n+\t\tself.collinear = False\n+\t\tself.bins\t = {}\n+\t\tself.overlaps = {}\n+\t\tself.notOverlapping = False\n+\n+\tdef setReferenceFile(self, fileName, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tself.refParser = chooser.getParser(fileName)\n+\n+\tdef setQueryFile(self, fileName, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tself.queryParser = chooser.getParser(fileName)\n+\n+\tdef setOutputFile(self, fileName):\n+\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+\n+\tdef setDistance(self, distance):\n+\t\tself.distance = distance\n+\n+\tdef setInvert(self, boolean):\n+\t\tself.invert = boolean\n+\n+\tdef setCollinear(self, boolean):\n+\t\tself.collinear = boolean\n+\n+\tdef setAntisense(self, boolean):\n+\t\tself.antisense = boolean\n+\n+\tdef includeNotOverlapping(self, boolean):\n+\t\tself.notOverlapping = boolean\n+\n+\tdef loadQuery(self):\n+\t\tprogress = UnlimitedProgress(10000, "Reading queries", self.verbosity)\n+\t\tfor tr'..b'nts\n+\n+\tdef _updateTranscript(self, queryTranscript):\n+\t\toverlaps = self.overlaps[queryTranscript]\n+\t\tqueryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))\n+\t\tif overlaps:\n+\t\t\tqueryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100])\n+\t\treturn queryTranscript\n+\n+\tdef compare(self):\n+\t\tprogress = UnlimitedProgress(10000, "Comparing references", self.verbosity)\n+\t\tfor refTranscript in self.refParser.getIterator():\n+\t\t\tif refTranscript.__class__.__name__ == "Mapping":\n+\t\t\t\trefTranscript = refTranscript.getTranscript()\n+\t\t\trefTranscript = self._alterTranscript(refTranscript, REFERENCE)\n+\t\t\tself._compareTranscript(refTranscript)\n+\t\t\tself.nbRefs += 1\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n+\n+\tdef printResults(self):\n+\t\tfor transcript in self.overlaps:\n+\t\t\tif not self.invert or not self.overlaps[transcript]:\n+\t\t\t\tif not self.invert:\n+\t\t\t\t\ttranscript = self._updateTranscript(transcript)\n+\t\t\t\tself.writer.addTranscript(transcript)\n+\t\t\t\tself.nbWritten += 1\n+\t\tself.writer.close()\n+\n+\tdef displayResults(self):\n+\t\tprint "# queries: %d" % (self.nbQueries)\n+\t\tprint "# refs: %d" % (self.nbRefs)\n+\t\tprint "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)\n+\n+\tdef run(self):\n+\t\tself.loadQuery()\n+\t\tself.compare()\n+\t\tself.printResults()\n+\t\tself.displayResults()\n+\n+if __name__ == "__main__":\n+\t\n+\tdescription = "Compare Overlapping Small Query v1.0.1: Provide the queries that overlap with a reference, when the query is small. [Category: Data Comparison]"\n+\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--input1",\t dest="inputFileName1", action="store",\t\t\t type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n+\tparser.add_option("-f", "--format1", dest="format1",\t\t action="store",\t\t\t type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+\tparser.add_option("-j", "--input2",\t dest="inputFileName2", action="store",\t\t\t type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n+\tparser.add_option("-g", "--format2", dest="format2",\t\t action="store",\t\t\t type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+\tparser.add_option("-o", "--output",\t dest="outputFileName", action="store",\t\t\t type="string", help="output file [format: output file in GFF3 format]")\n+\tparser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,\t\t\t\t help="also output not overlapping data [format: bool] [default: false]")\n+\tparser.add_option("-d", "--distance",\t\tdest="distance",\t action="store",\t default=0,\t type="int",\t help="accept some distance between query and reference [format: int]")\n+\tparser.add_option("-c", "--collinear",\t\tdest="collinear",\t action="store_true", default=False,\t\t\t \t help="provide collinear features [format: bool] [default: false]")\n+\tparser.add_option("-a", "--antisense",\t\tdest="antisense",\t action="store_true", default=False,\t\t\t \t help="provide antisense features [format: bool] [default: false]")\n+\tparser.add_option("-x", "--exclude",\t\tdest="exclude",\t\t action="store_true", default=False,\t\t\t \t help="invert the match [format: bool] [default: false]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity",\t action="store", default=1, type="int",\t help="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n+\n+\tcosq = CompareOverlappingSmallQuery(options.verbosity)\n+\tcosq.setQueryFile(options.inputFileName1, options.format1)\n+\tcosq.setReferenceFile(options.inputFileName2, options.format2)\n+\tcosq.setOutputFile(options.outputFileName)\n+\tcosq.includeNotOverlapping(options.notOverlapping)\n+\tcosq.setDistance(options.distance)\n+\tcosq.setCollinear(options.collinear)\n+\tcosq.setAntisense(options.antisense)\n+\tcosq.setInvert(options.exclude)\n+\tcosq.run()\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/CompareOverlappingSmallRef.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CompareOverlappingSmallRef.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,217 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2011\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Mapping import Mapping\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+\n+MINBIN = 3\n+MAXBIN = 7\n+REFERENCE = 0\n+QUERY = 1\n+\n+def getBin(start, end):\n+\tfor i in range(MINBIN, MAXBIN + 1):\n+\t\tbinLevel = 10 ** i\n+\t\tif int(start / binLevel) == int(end / binLevel):\n+\t\t\treturn int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))\n+\treturn int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n+\n+def getOverlappingBins(start, end):\n+\tarray\t= []\n+\tbigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n+\tfor i in range(MINBIN, MAXBIN + 1):\n+\t\tbinLevel = 10 ** i\n+\t\tarray.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))\n+\tarray.append((bigBin, bigBin))\n+\treturn array\n+\n+\n+class CompareOverlappingSmallRef(object):\n+\n+\tdef __init__(self, verbosity):\n+\t\tself.verbosity = verbosity\n+\t\tself.tableNames = {}\n+\t\tself.nbQueries = 0\n+\t\tself.nbRefs\t = 0\n+\t\tself.nbWritten = 0\n+\t\tself.nbOverlaps = 0\n+\t\tself.invert = False\n+\t\tself.antisense = False\n+\t\tself.collinear = False\n+\t\tself.distance = None\n+\t\tself.bins\t = {}\n+\t\tself.notOverlapping = False\n+\n+\tdef setReferenceFile(self, fileName, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tself.refParser = chooser.getParser(fileName)\n+\n+\tdef setQueryFile(self, fileName, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tself.queryParser = chooser.getParser(fileName)\n+\n+\tdef setOutputFile(self, fileName):\n+\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+\n+\tdef setDistance(self, distance):\n+\t\tself.distance = distance\n+\n+\tdef setCollinear(self, boolean):\n+\t\tself.collinear = boolean\n+\n+\tdef setAntisense(self, boolean):\n+\t\tself.antisense = boolean\n+\n+\tdef setInvert(self, boolean):\n+\t\tself.invert = boolean\n+\n+\tdef includeNotOverlapping(self, boolean):\n+\t\tself.notOverlapping = boolean\n+\n+\tdef loadRef(self):\n+\t\tprogress = UnlimitedProgress(10000, "Reading references", self.verbosity)\n+\t\tfor transcript in self.refParser.ge'..b') if "nbElements" in refTranscript.getTagNames() else 1\n+\t\t\t\t\t\tself.nbOverlaps += nbElements\n+\t\treturn overlaps\n+\n+\tdef _updateTranscript(self, queryTranscript, overlaps):\n+\t\tqueryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))\n+\t\tif overlaps:\n+\t\t\tqueryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100])\n+\t\treturn queryTranscript\n+\n+\tdef compare(self):\n+\t\tprogress = UnlimitedProgress(10000, "Comparing queries", self.verbosity)\n+\t\tfor queryTranscript in self.queryParser.getIterator():\n+\t\t\tif queryTranscript.__class__.__name__ == "Mapping":\n+\t\t\t\tqueryTranscript = queryTranscript.getTranscript()\n+\t\t\tprogress.inc()\n+\t\t\tself.nbQueries += 1\n+\t\t\toverlaps = self._compareTranscript(queryTranscript)\n+\t\t\tif self.notOverlapping or (overlaps and not self.invert) or (not overlaps and self.invert):\n+\t\t\t\tif not self.invert:\n+\t\t\t\t\tqueryTranscript = self._updateTranscript(queryTranscript, overlaps)\n+\t\t\t\tself.writer.addTranscript(queryTranscript)\n+\t\t\t\tself.nbWritten += 1\n+\t\tprogress.done()\n+\t\tself.writer.close()\n+\n+\tdef displayResults(self):\n+\t\tprint "# queries: %d" % (self.nbQueries)\n+\t\tprint "# refs: %d" % (self.nbRefs)\n+\t\tprint "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)\n+\n+\tdef run(self):\n+\t\tself.loadRef()\n+\t\tself.compare()\n+\t\tself.displayResults()\n+\n+if __name__ == "__main__":\n+\t\n+\tdescription = "Compare Overlapping Small Reference v1.0.1: Provide the queries that overlap with a reference, when the reference is small. [Category: Data Comparison]"\n+\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--input1",\t dest="inputFileName1", action="store",\t\t\t type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n+\tparser.add_option("-f", "--format1", dest="format1",\t\t action="store",\t\t\t type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+\tparser.add_option("-j", "--input2",\t dest="inputFileName2", action="store",\t\t\t type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n+\tparser.add_option("-g", "--format2", dest="format2",\t\t action="store",\t\t\t type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+\tparser.add_option("-o", "--output",\t dest="outputFileName", action="store",\t\t\t type="string", help="output file [format: output file in GFF3 format]")\n+\tparser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,\t\t\t\thelp="also output not overlapping data [format: bool] [default: false]")\n+\tparser.add_option("-d", "--distance",\t\tdest="distance",\t action="store",\t default=0,\t type="int",\t help="accept some distance between query and reference [format: int]")\n+\tparser.add_option("-c", "--collinear",\t\tdest="collinear",\t action="store_true", default=False,\t\t\t \t help="provide collinear features [format: bool] [default: false]")\n+\tparser.add_option("-a", "--antisense",\t\tdest="antisense",\t action="store_true", default=False,\t\t\t \t help="provide antisense features [format: bool] [default: false]")\n+\tparser.add_option("-x", "--exclude",\t\tdest="exclude",\t\t action="store_true", default=False,\t\t\t \t help="invert the match [format: bool] [default: false]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity",\t action="store", default=1, type="int",\thelp="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n+\n+\tcosr = CompareOverlappingSmallRef(options.verbosity)\n+\tcosr.setQueryFile(options.inputFileName1, options.format1)\n+\tcosr.setReferenceFile(options.inputFileName2, options.format2)\n+\tcosr.setOutputFile(options.outputFileName)\n+\tcosr.includeNotOverlapping(options.notOverlapping)\n+\tcosr.setDistance(options.distance)\n+\tcosr.setAntisense(options.antisense)\n+\tcosr.setInvert(options.exclude)\n+\tcosr.setInvert(options.exclude)\n+\tcosr.run()\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ComputeCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ComputeCoverage.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,142 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os, random
+from optparse import OptionParser, OptionGroup
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+
+class CoverageComputer(object):
+
+ def __init__(self, verbosity = 0):
+ self.verbosity      = verbosity
+ self.queryReader = None
+ self.referenceReader = None
+ self.outputWriter = None
+ self.introns = False
+ self.nbNucleotides   = 0
+ self.nbCovered      = 0
+
+ def setInputQueryFile(self, fileName, format):
+ self.queryReader = TranscriptContainer(fileName, format, self.verbosity-1)
+
+ def setInputReferenceFile(self, fileName, format):
+ self.referenceReader = TranscriptContainer(fileName, format, self.verbosity-1)
+
+ def includeIntrons(self, boolean):
+ self.introns = boolean
+
+ def setOutputFileName(self, fileName, title="S-MART", feature="transcript", featurePart="exon"):
+ self.outputWriter = Gff3Writer(fileName, self.verbosity-1)
+ self.outputWriter.setTitle(title)
+ self.outputWriter.setFeature(feature)
+ self.outputWriter.setFeaturePart(featurePart)
+
+ def readReference(self):
+ self.coveredRegions = {}
+ progress = Progress(self.referenceReader.getNbTranscripts(), "Reading reference file", self.verbosity-1)
+ for transcript in self.referenceReader.getIterator():
+ chromosome = transcript.getChromosome()
+ if chromosome not in self.coveredRegions:
+ self.coveredRegions[chromosome] = {}
+ if self.introns:
+ transcript.removeExons()
+ for exon in transcript.getExons():
+ for position in range(exon.getStart(), exon.getEnd()+1):
+ self.coveredRegions[chromosome][position] = 1
+ progress.inc()
+ progress.done()
+
+ def readQuery(self):
+ progress = Progress(self.queryReader.getNbTranscripts(), "Reading query file", self.verbosity-1)
+ for transcript in self.queryReader.getIterator():
+ progress.inc()
+ chromosome = transcript.getChromosome()
+ if chromosome not in self.coveredRegions:
+ continue
+ if self.introns:
+ transcript.removeExons()
+ for exon in transcript.getExons():
+ for position in range(exon.getStart(), exon.getEnd()+1):
+ self.nbNucleotides += 1
+ self.nbCovered     += self.coveredRegions[chromosome].get(position, 0)
+ progress.done()
+
+ def write(self):
+ progress = Progress(self.queryReader.getNbTranscripts(), "Writing output file", self.verbosity-1)
+ for transcript in self.queryReader.getIterator():
+ chromosome = transcript.getChromosome()
+ if self.introns:
+ transcript.removeExons()
+ size = transcript.getSize()
+ coverage = 0
+ for exon in transcript.getExons():
+ for position in range(exon.getStart(), exon.getEnd()+1):
+ coverage += self.coveredRegions[chromosome].get(position, 0)
+ transcript.setTagValue("coverage", 0 if size == 0 else float(coverage) / size * 100)
+ self.outputWriter.addTranscript(transcript)
+ progress.inc()
+ progress.done()
+
+ def sumUp(self):
+ print "%d nucleotides in query, %d (%.f%%) covered" % (self.nbNucleotides, self.nbCovered, 0 if self.nbNucleotides == 0 else float(self.nbCovered) / self.nbNucleotides * 100)
+
+ def run(self):
+ self.readReference()
+ self.readQuery()
+ if self.outputWriter != None:
+ self.write()
+ self.sumUp()
+
+
+if __name__ == "__main__":
+
+ # parse command line
+ description = "Compute Coverage v1.0.1: Compute the coverage of a set with respect to another set. [Category: Personal]"
+
+ parser = OptionParser(description = description)
+ parser.add_option("-i", "--input1",    dest="inputFileName1", action="store",                     type="string", help="input query file [compulsory] [format: file in transcript format given by -f]")
+ parser.add_option("-f", "--format1",   dest="format1",        action="store",                     type="string", help="format of the first file [compulsory] [format: transcript file format]")
+ parser.add_option("-j", "--input2",    dest="inputFileName2", action="store",                     type="string", help="input reference file [compulsory] [format: file in transcript format given by -f]")
+ parser.add_option("-g", "--format2",   dest="format2",        action="store",                     type="string", help="format of the second file [compulsory] [format: transcript file format]")
+ parser.add_option("-t", "--introns",   dest="introns",        action="store_true", default=False,                help="also include introns [format: boolean] [default: false]")
+ parser.add_option("-o", "--output",    dest="outputFileName", action="store",    default=None,  type="string", help="output file [format: output file in GFF3 format]")
+ parser.add_option("-v", "--verbosity", dest="verbosity",   action="store",                     type="int",    help="trace level [default: 1] [format: int]")
+ (options, args) = parser.parse_args()
+
+ computer = CoverageComputer(options.verbosity)
+ computer.setInputQueryFile(options.inputFileName1, options.format1)
+ computer.setInputReferenceFile(options.inputFileName2, options.format2)
+ computer.includeIntrons(options.introns)
+ computer.setOutputFileName(options.outputFileName)
+ computer.run()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/CountReadGCPercent.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CountReadGCPercent.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow
+
+
+class CountReadGCPercent(object):
+
+    def __init__(self):
+        self.referenceReader = None
+        self.gffReader = None
+        self.outputWriter = None
+        self.verbose = 0
+
+    def setInputReferenceFile(self, fileName):
+        self.referenceReader = fileName
+
+    def setInputGffFile(self, fileName):
+        self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose)
+
+    def setOutputFileName(self, fileName):
+        self.outputWriter = Gff3Writer(fileName, self.verbose)
+
+    def readGffAnnotation(self):
+        self.coveredRegions = {}
+        progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose)
+        for transcript in self.gffReader.getIterator():
+            chromosome = transcript.getChromosome()
+            if chromosome not in self.coveredRegions:
+                self.coveredRegions[chromosome] = {}
+            for exon in transcript.getExons():
+                for position in range(exon.getStart(), exon.getEnd()+1):
+                    self.coveredRegions[chromosome][position] = 1
+            progress.inc()
+        progress.done()
+
+    def write(self):
+        iParser = FastaParser(self.referenceReader)
+        iParser.setTags()
+        iGetGCPercentBySW = CountGCPercentBySlidingWindow()
+        progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose)
+        for transcript in self.gffReader.getIterator():
+            chromosome = transcript.getChromosome()
+            GCpercent = 0
+            nPercent = 0
+            for exon in transcript.getExons():
+                    for sequenceName in iParser.getTags().keys():
+                        if sequenceName != chromosome:
+                            continue
+                        else:
+                            subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1)
+                            GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence)
+                            print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent)
+            transcript.setTagValue("GCpercent", GCpercent)
+            transcript.setTagValue("NPercent", nPercent)
+            self.outputWriter.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+
+    def run(self):
+        self.readGffAnnotation()
+        if self.outputWriter != None:
+            self.write()
+
+if __name__ == "__main__":
+        description = "Count GC percent for each read against a genome."
+        usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]"
+        examples = "\nExample: \n"
+        examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3"
+        examples += "\n\n"
+        parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples)
+        parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None )
+        parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None)
+        parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None )
+        parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 )
+        (options, args) = parser.parse_args()
+
+        readGCPercent = CountReadGCPercent()
+        readGCPercent.setInputReferenceFile(options.fastaFile)
+        readGCPercent.setInputGffFile(options.gffFile)
+        readGCPercent.setOutputFileName(options.outputFile)
+        readGCPercent.run()
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/FindOverlapsOptim.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/FindOverlapsOptim.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,343 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2012\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+\n+import os, struct, time, shutil\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.ncList.NCList import NCList\n+from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList\n+from SMART.Java.Python.ncList.NCListParser import NCListParser\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n+from SMART.Java.Python.ncList.NCListHandler import NCListHandler\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+try:\n+ import cPickle as pickle\n+except:\n+ import pickle\n+\n+REFERENCE = 0\n+QUERY = 1\n+TYPES = (REFERENCE, QUERY)\n+TYPETOSTRING = {0: "reference", 1: "query"}\n+\n+class FindOverlapsOptim(object):\n+\t\n+\tdef __init__(self, verbosity = 1):\n+\t\tself._parsers\t\t\t\t = {}\n+\t\tself._sortedFileNames\t\t = {}\n+\t\tself._outputFileName\t\t = "outputOverlaps.gff3"\n+\t\tself._iWriter\t\t\t\t = None\n+\t\tself._inputFileNames\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._convertedFileNames = {REFERENCE: False, QUERY: False}\n+\t\tself._inputFileFormats\t\t = {REFERENCE: None, QUERY: None}\n+\t\tself._converted\t\t\t = {REFERENCE: False, QUERY: False}\n+\t\tself._ncListHandlers = {REFERENCE: None, QUERY: None}\n+\t\tself._splittedFileNames\t = {REFERENCE: {},\tQUERY: {}}\n+\t\tself._nbOverlappingQueries\t = 0\n+\t\tself._nbOverlaps\t\t\t = 0\n+\t\tself._nbLines\t\t\t\t = {REFERENCE: 0, QUERY: 0}\n+\t\tself._sorted = False\n+\t\tself._index = False\n+\t\tself._verbosity\t\t\t = verbosity\n+\t\tself._ncLists\t\t\t\t = {}\n+\t\tself._cursors\t\t\t\t = {}\n+\t\tself._nbElementsPerChromosome = {}\n+\t\tself._tmpDirectories\t\t = {REFERENCE: False, QUERY: False}\n+\t\t\n+\tdef close(self):\n+\t\tself._iWriter.close()\n+\t\tfor fileName in (self._sortedFileNames.values()):\n+\t\t\tif os.path.exists(fileName):\n+\t\t\t\tos.remove(fileName)\n+\t\tfor fileName in self._convertedFileNames.values():\n+\t\t\tif fileName:\n+\t\t\t\tos.remove(fileName)\n+\t\t\n+\tdef setRefFileName(self, fileName, format):\n+\t\tself.setFileName(fileName, format, REFERENCE)\n+\t\t\n+\tdef setQueryFileName(self, fileName, format):\n+\t\tself.setFileName(fileName, format, QUERY)\n+\n+\tdef se'..b'def isOverlapping(self, queryTranscript, refTranscript):\n+\t\tif (queryTranscript.getStart() <= refTranscript.getEnd() and queryTranscript.getEnd() >= refTranscript.getStart()):\n+\t\t\treturn 0 \n+\t\tif queryTranscript.getEnd() < refTranscript.getStart():\n+\t\t\treturn 1\n+\t\treturn -1\n+\n+\tdef checkIndex(self, transcript, cursor):\n+\t\tif not self._index:\n+\t\t\treturn None\n+\t\tchromosome = transcript.getChromosome()\n+\t\tnextLIndex = self._indices[REFERENCE][chromosome].getIndex(transcript)\n+\t\tif nextLIndex == None:\n+\t\t\treturn None\n+\t\tncList\t\t = self._ncLists[REFERENCE][chromosome]\n+\t\tnextGffAddress = ncList.getRefGffAddr(nextLIndex)\n+\t\tthisGffAddress = cursor.getGffAddress()\n+\t\tif nextGffAddress > thisGffAddress:\n+\t\t\treturn nextLIndex\n+\t\treturn None\n+\t\t\n+\tdef _writeIntervalInNewGFF3(self, transcript, names):\n+\t\tnbOverlaps = 0\n+\t\tfor cpt in names.values():\n+\t\t\tnbOverlaps += cpt\n+\t\tif not names:\n+\t\t\treturn\n+\t\ttranscript.setTagValue("overlapsWith", "--".join(sorted(names.keys())))\n+\t\ttranscript.setTagValue("nbOverlaps", nbOverlaps)\n+\t\tself._iWriter.addTranscript(transcript)\n+\t\tself._iWriter.write()\n+\t\tself._nbOverlappingQueries += 1\n+\t\tself._nbOverlaps\t\t += nbOverlaps\n+\t\t\n+\tdef _extractID(self, transcript):\n+\t\tnbElements = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1\n+\t\tid\t\t = transcript.getTagValue("ID")\t\t\t\t if "ID"\t\t in transcript.getTagNames() else transcript.getUniqueName()\n+\t\treturn {id: nbElements}\n+\t\t\n+\tdef run(self):\n+\t\tself.createNCLists()\n+\t\tself.compare()\n+\t\tself.close()\n+\t\tif self._verbosity > 0:\n+\t\t\tprint "# queries: %d" % (self._nbLines[QUERY])\n+\t\t\tprint "# refs: %d" % (self._nbLines[REFERENCE])\n+\t\t\tprint "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)\n+\t\t\tprint "time: %.2gs" % (self._timeSpent)\n+\n+\n+if __name__ == "__main__":\n+\tdescription = "Find Overlaps Optim v1.0.0: Finds overlaps with several query intervals. [Category: Data Comparison]"\n+\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--query",\t dest="inputQueryFileName", action="store",\t\t\t type="string", help="query input file [compulsory] [format: file in transcript or other format given by -f]")\n+\tparser.add_option("-f", "--queryFormat", dest="queryFormat",\t\taction="store",\t\t\t type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n+\tparser.add_option("-j", "--ref",\t\t dest="inputRefFileName", action="store",\t\t\t type="string", help="reference input file [compulsory] [format: file in transcript or other format given by -g]")\n+\tparser.add_option("-g", "--refFormat", dest="refFormat",\t\t action="store",\t\t\t type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n+\tparser.add_option("-o", "--output",\t dest="outputFileName",\t action="store",\t\t\t type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+\tparser.add_option("-d", "--index",\t dest="index",\t action="store_true", default=False,\t help="add an index to the reference file (faster but more memory) [format: boolean] [default: False]")\n+\tparser.add_option("-s", "--sorted",\t dest="sorted",\t action="store_true", default=False,\t help="input files are already sorted [format: boolean] [default: False]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity",\t\t action="store", default=1, type="int",\t help="Trace level [format: int] [default: 1]")\n+\t(options, args) = parser.parse_args()\n+\t\n+\tiFOO = FindOverlapsOptim(options.verbosity)\n+\tiFOO.setRefFileName(options.inputRefFileName, options.refFormat)\n+\tiFOO.setQueryFileName(options.inputQueryFileName, options.queryFormat)\n+\tiFOO.setOutputFileName(options.outputFileName)\n+\tiFOO.setIndex(options.index)\n+\tiFOO.setSorted(options.sorted)\n+\tiFOO.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/GetDifferentialExpression.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetDifferentialExpression.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,441 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Get the differential expression between 2 conditions (2 files), on regions defined by a third file"""\n+\n+import os, re\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc import Utils\n+from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n+from SMART.Java.Python.structure.Transcript import Transcript\n+\n+class GetDifferentialExpression(object):\n+ \n+ def __init__(self, verbosity = 1):\n+ self.verbosity = verbosity\n+ self.mySqlConnection = MySqlConnection(verbosity)\n+ self.inputs = (0, 1)\n+ self.transcriptContainers = [None, None]\n+ self.transcriptContainerRef = None\n+ self.outputFileName = None\n+ self.writer = None\n+ self.tables = [None, None]\n+ self.nbElements = [0, 0]\n+\n+ self.regionsToValues = {}\n+ self.regionsToNames = {}\n+ self.valuesToPvalues = {}\n+\n+ self.oriented = True\n+ self.simpleNormalization = False\n+ self.simpleNormalizationParameters = None\n+ self.adjustedNormalization = False\n+ self.fixedSizeFactor = None\n+ self.normalizationSize = None\n+ self.normalizationFactors = [1, 1]\n+ self.fdr = None \n+ self.fdrPvalue = None \n+\n+ self.plot = False\n+ self.plotter = None\n+ self.plotterName = None\n+ self.points = {}\n+\n+\n+ def setInputFile(self, i, fileName, fileFormat):\n+ self.transcriptContainers[i] = TranscriptContainer(fileName, fileFormat, self.verbosity)\n+ self.transcriptContainers[i].mySqlConnection = self.mySqlConnection\n+\n+\n+ def setReferenceFile(self, fileName, fileFormat):\n+ self.transcriptContainerRef = TranscriptContainer(fileName, fileFormat, self.verbosity)\n+ self.transcriptContainerRef.mySqlConnection = self.mySqlConnection\n+\n+\n+ def setOutputFile(self, fileName):\n+ self.outputFileName = fileName\n+ self.writer = Gff3Writer(fileName, self.verbosity)\n+\n+ \n+ def setOriented(self'..b' file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n+ parser.add_option("-k", "--reference", dest="referenceFileName", action="store", type="string", help="reference file [compulsory] [format: file in transcript format given by -l]")\n+ parser.add_option("-l", "--referenceFormat", dest="referenceFormat", action="store", type="string", help="format of reference file [compulsory] [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in gff3 format]")\n+ parser.add_option("-n", "--notOriented", dest="notOriented", action="store_true", default=False, help="if the reads are not oriented [default: False] [format: bool]")\n+ parser.add_option("-s", "--simple", dest="simple", action="store_true", default=False, help="normalize using the number of reads in each condition [format: bool]")\n+ parser.add_option("-S", "--simpleParameters", dest="simpleParameters", action="store", default=None, type="string", help="provide the number of reads [format: bool]")\n+ parser.add_option("-a", "--adjusted", dest="adjusted", action="store_true", default=False, help="normalize using the number of reads of \'mean\' regions [format: bool]")\n+ parser.add_option("-x", "--fixedSizeFactor", dest="fixedSizeFactor", action="store", default=None, type="int", help="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization) [format: int]")\n+ parser.add_option("-d", "--fdr", dest="fdr", action="store", default=None, type="float", help="use FDR [format: float]")\n+ parser.add_option("-p", "--plot", dest="plotName", action="store", default=None, type="string", help="plot cloud plot [format: output file in PNG format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+\n+ \n+ differentialExpression = GetDifferentialExpression(options.verbosity)\n+ differentialExpression.setInputFile(0, options.inputFileName1, options.format1)\n+ differentialExpression.setInputFile(1, options.inputFileName2, options.format2)\n+ differentialExpression.setReferenceFile(options.referenceFileName, options.referenceFormat)\n+ differentialExpression.setOutputFile(options.outputFileName)\n+ if options.plotName != None :\n+ differentialExpression.setPlotterName(options.plotName)\n+ differentialExpression.setPlotter()\n+ differentialExpression.setOriented(not options.notOriented)\n+ differentialExpression.setSimpleNormalization(options.simple)\n+ differentialExpression.setSimpleNormalizationParameters(options.simpleParameters)\n+ differentialExpression.setAdjustedNormalization(options.adjusted)\n+ differentialExpression.setFixedSizeNormalization(options.fixedSizeFactor)\n+ differentialExpression.setFdr(options.fdr)\n+ differentialExpression.getDifferentialExpression()\n+ differentialExpression.mySqlConnection.deleteDatabase()\n+ \n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/GetDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,362 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2012\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.parsing.FastaParser import FastaParser\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc.MultipleRPlotter import MultipleRPlotter\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+TWOSTRANDS = {True: [1, -1], False: [0]}\n+STRANDTOSTR = {1: "(+)", -1: "(-)", 0: ""}\n+\n+class GetDistribution(object):\n+\n+\tdef __init__(self, verbosity):\n+\t\tself.verbosity = verbosity\n+\t\tself.sizes = None\n+\t\tself.twoStrands = False\n+\t\tself.start = 1\n+\t\tself.names = ["nbElements"]\n+\t\tself.average = False\n+\t\tself.nbValues = {}\n+\t\tself.height = 300\n+\t\tself.width = 600\n+\t\tself.colors = None\n+\t\tself.gffFileName = None\n+\t\tself.csvFileName = None\n+\t\tself.yMin = None\n+\t\tself.yMax = None\n+\t\tself.chromosome = None\n+\t\tself.merge = False\n+\t\tself.nbTranscripts = None\n+\n+\tdef setInputFile(self, fileName, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tself.parser = chooser.getParser(fileName)\n+\n+\tdef setReferenceFile(self, fileName):\n+\t\tif fileName == None:\n+\t\t\treturn\n+\t\tfastaParser = FastaParser(fileName, self.verbosity)\n+\t\tself.chromosomes = fastaParser.getRegions()\n+\t\tself.sizes = dict([region, fastaParser.getSizeOfRegion(region)] for region in self.chromosomes)\n+\t\tself.maxSize = max(self.sizes.values())\n+\n+\tdef setRegion(self, chromosome, start, end):\n+\t\tif chromosome == None:\n+\t\t\treturn\n+\t\tself.maxSize = options.end\n+\t\tself.sizes = {chromosome: end}\n+\t\tself.chromosomes = [chromosome]\n+\t\tself.chromosome = chromosome\n+\t\tself.start = start\n+\t\tself.end = end\n+\n+\tdef setOutputFile(self, fileName):\n+\t\tself.outputFileName = fileName\n+\n+\tdef setNbBins(self, nbBins):\n+\t\tself.nbBins = nbBins\n+\n+\tdef set2Strands(self, twoStrands):\n+\t\tself.twoStrands = twoStrands\n+\n+\tdef setNames(self, names):\n+\t\tself.names = names\n+\n+\tdef setAverage(self, average):\n+\t\tself.average = average\n+\n+\tdef setNormalization(self, normalization):\n+\t\tself.normalization = normalization\n+\t\n+\tdef setImageSize(self, height, width):\n+\t\tself.height = height\n+\t\tself.width '..b' action="store", default=1000, type="int", help="number of bins [default: 1000] [format: int]")\n+\tparser.add_option("-2", "--bothStrands", dest="bothStrands", action="store_true", default=False, help="plot one curve per strand [format: bool] [default: false]")\n+\tparser.add_option("-c", "--chromosome", dest="chromosome", action="store", default=None, type="string", help="plot only a chromosome [format: string]")\n+\tparser.add_option("-s", "--start", dest="start", action="store", default=None, type="int", help="start from a given region [format: int]")\n+\tparser.add_option("-e", "--end", dest="end", action="store", default=None, type="int", help="end from a given region [format: int]")\n+\tparser.add_option("-y", "--yMin", dest="yMin", action="store", default=None, type="int", help="minimum value on the y-axis to plot [format: int]")\n+\tparser.add_option("-Y", "--yMax", dest="yMax", action="store", default=None, type="int", help="maximum value on the y-axis to plot [format: int]")\n+\tparser.add_option("-x", "--csv", dest="csv", action="store", default=None, help="write a .csv file [format: output file in CSV format] [default: None]")\n+\tparser.add_option("-g", "--gff", dest="gff", action="store", default=None, help="also write GFF3 file [format: output file in GFF format] [default: None]")\n+\tparser.add_option("-H", "--height", dest="height", action="store", default=300, type="int", help="height of the graphics [format: int] [default: 300]")\n+\tparser.add_option("-W", "--width", dest="width", action="store", default=600, type="int", help="width of the graphics [format: int] [default: 1000]")\n+\tparser.add_option("-a", "--average", dest="average", action="store_true", default=False, help="plot average (instead of sum) [default: false] [format: boolean]")\n+\tparser.add_option("-n", "--names", dest="names", action="store", default="nbElements", type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]")\n+\tparser.add_option("-l", "--color", dest="colors", action="store", default=None, type="string", help="color of the lines (separated by commas and no space) [format: string]")\n+\tparser.add_option("-z", "--normalize", dest="normalize", action="store_true", default=False, help="normalize data (when panels are different) [format: bool] [default: false]")\n+\tparser.add_option("-m", "--merge", dest="mergePlots", action="store_true", default=False, help="merge all plots in one figure [format: bool] [default: false]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]")\n+\t(options, args) = parser.parse_args()\n+\n+\tgt = GetDistribution(options.verbosity)\n+\tgt.setInputFile(options.inputFileName, options.format)\n+\tgt.setOutputFile(options.outputFileName)\n+\tgt.setReferenceFile(options.referenceFileName)\n+\tgt.setNbBins(int(options.nbBins))\n+\tgt.set2Strands(options.bothStrands)\n+\tgt.setRegion(options.chromosome, options.start, options.end)\n+\tgt.setNormalization(options.normalize)\n+\tgt.setAverage(options.average)\n+\tgt.setYLimits(options.yMin, options.yMax)\n+\tgt.writeCsv(options.csv)\n+\tgt.writeGff(options.gff)\n+\tgt.setImageSize(options.height, options.width)\n+\tgt.setNames(options.names.split(","))\n+\tgt.setColors(None if options.colors == None else options.colors.split(","))\n+\tgt.setNormalization(options.normalize)\n+\tgt.mergePlots(options.mergePlots)\n+\tgt.run()\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/GetFlanking.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetFlanking.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,231 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2011\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+QUERY = 0\n+REFERENCE = 1\n+INPUTS = (QUERY, REFERENCE)\n+STRANDS = (-1, 1)\n+TAG_DISTANCE = "distance_"\n+TAG_SENSE = "_sense"\n+TAG_REGION = "_region"\n+TAGS_REGION = {-1: "_upstream", 0: "", 1: "_downstream"}\n+TAGS_RREGION = {-1: "upstream", 0: "overlapping", 1: "downstream"}\n+TAGS_SENSE = {-1: "antisense", 0: "", 1: "colinear"}\n+STRANDSTOSTR = {-1: "(-)", 0: "", 1: "(+)"}\n+\n+\n+def getOrderKey(transcript, direction):\n+ if direction == 1:\n+ return transcript.getEnd()\n+ return - transcript.getStart()\n+\n+def isInGoodRegion(transcriptRef, transcriptQuery, direction):\n+ if direction == 1:\n+ return transcriptQuery.getEnd() > transcriptRef.getEnd()\n+ return transcriptQuery.getStart() < transcriptRef.getStart()\n+\n+\n+class GetFlanking(object):\n+\n+ def __init__(self, verbosity):\n+ self.verbosity = verbosity\n+ self.transcripts = dict([id, {}] for id in INPUTS)\n+ self.directions = []\n+ self.noOverlap = False\n+ self.colinear = False\n+ self.antisense = False\n+ self.distance = None\n+ self.minDistance = None\n+ self.maxDistance = None\n+ self.tagName = "flanking"\n+\n+ def setInputFile(self, fileName, format, id):\n+ chooser = ParserChooser(self.verbosity)\n+ chooser.findFormat(format)\n+ parser = chooser.getParser(fileName)\n+ for transcript in parser.getIterator():\n+ chromosome = transcript.getChromosome()\n+ if chromosome not in self.transcripts[id]:\n+ self.transcripts[id][chromosome] = []\n+ self.transcripts[id][chromosome].append(transcript)\n+\n+ def setOutputFile(self, fileName):\n+ self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+\n+ def addUpstreamDirection(self, upstream):\n+ if upstream:\n+ self.directions.append(-1)\n+\n+ def addDownstreamDirection(self, downstream):\n+ if downstream:\n+ self.directions.append(1)\n+\n+ def setColinear(self, colinear):\n+ self.colinear = colinear\n+\n+ def setAntisense(self,'..b' progress.inc()\n+ for transcript in sorted(list(outputs), key = lambda flanking: (flanking.getChromosome(), flanking.getStart(), flanking.getEnd())):\n+ self.writer.addTranscript(transcript)\n+ self.writer.close()\n+ progress.done()\n+\n+ def run(self):\n+ self.flankings = {}\n+ for direction in STRANDS:\n+ self.getFlanking(direction)\n+ self.write()\n+\n+if __name__ == "__main__":\n+ \n+ description = "Get Flanking v1.0.1: Get the flanking regions of a set of reference. [Category: Data Selection]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+ parser.add_option("-5", "--upstream", dest="upstream", action="store_true", default=False, help="output upstream elements [format: boolean] [default: False]")\n+ parser.add_option("-3", "--downstream", dest="downstream", action="store_true", default=False, help="output downstream elements [format: boolean] [default: False]")\n+ parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="find first colinear element [format: boolean] [default: False]")\n+ parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="find first anti-sense element [format: boolean] [default: False]")\n+ parser.add_option("-e", "--noOverlap", dest="noOverlap", action="store_true", default=False, help="do not consider elements which are overlapping reference elements [format: boolean] [default: False]")\n+ parser.add_option("-d", "--minDistance", dest="minDistance", action="store", default=None, type="int", help="minimum distance between 2 elements [format: int]")\n+ parser.add_option("-D", "--maxDistance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance between 2 elements [format: int]")\n+ parser.add_option("-t", "--tag", dest="tagName", action="store", default="flanking", type="string", help="name of the new tag [format: string] [default: flanking]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ gf = GetFlanking(options.verbosity)\n+ gf.setInputFile(options.inputFileName1, options.format1, QUERY)\n+ gf.setInputFile(options.inputFileName2, options.format2, REFERENCE)\n+ gf.setOutputFile(options.outputFileName)\n+ gf.addUpstreamDirection(options.upstream)\n+ gf.addDownstreamDirection(options.downstream)\n+ gf.setColinear(options.colinear)\n+ gf.setAntisense(options.antisense)\n+ gf.setNoOverlap(options.noOverlap)\n+ gf.setMinDistance(options.minDistance)\n+ gf.setMaxDistance(options.maxDistance)\n+ gf.setNewTagName(options.tagName)\n+ gf.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/GetRandomSubset.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetRandomSubset.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,96 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+
+class GetRandomSubset(object):
+
+    def __init__(self, verbosity):
+        self.verbosity = verbosity
+
+    def setInputFile(self, fileName, format):
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        self.parser = chooser.getParser(fileName)
+
+    def setNumber(self, number, percent):
+        if number != None:
+            self.number = number
+        elif percent != None:
+            self.number = int(float(percent) / 100 * self.parser.getNbTranscripts())
+        else:
+            raise Exception("Error! Number of elements to output is not given!")
+
+    def setOutputFile(self, fileName):
+        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+    def chooseElements(self):
+        self.randomIndices = random.sample(range(self.parser.getNbTranscripts()), self.number)
+
+    def run(self):
+        self.chooseElements()
+        progress  = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
+        nbWritten = 0
+        for cpt1, transcript in enumerate(self.parser.getIterator()):
+            if cpt1 in self.randomIndices:
+                self.writer.addTranscript(transcript)
+                nbWritten += 1
+            progress.inc()
+        self.writer.write()
+        self.writer.close()
+        progress.done()
+        if self.verbosity > 1:
+            print "%d transcripts read" % (self.parser.getNbTranscripts())
+            print "%d transcripts written" % (nbWritten)
+
+
+if __name__ == "__main__":
+
+    description = "Get Random Subset v1.0.1: Get a random sub-set of a list of genomic coordinates. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="string", help="number of elements to output [format: int]")
+    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="string", help="percentage of elements to output (between 0 and 100) [format: int]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int", help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    grs = GetRandomSubset(options.verbosity)
+    grs.setInputFile(options.inputFileName, options.format)
+    grs.setNumber(options.number, options.percent)
+    grs.setOutputFile(options.outputFileName)
+    grs.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/GetReadDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetReadDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,283 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import random, os, glob, subprocess\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.parsing.GffParser import GffParser\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc import Utils\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+\n+LOG_DEPTH = "smart"\n+DEFAULT_REGION = "_all_"\n+MULTIPLE_STR = {1: "", 1000: " (in kpb)", 1000000: " (in Gbp)"}\n+\n+class GetReadDistribution(object):\n+\n+\tdef __init__(self, verbosity = 0):\n+\t\tself.xLab = ""\n+\t\tself.yLab = "# reads"\n+\t\tself.verbosity = verbosity\n+\t\tself.number = random.randint(0, 100000)\n+\t\tself.log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)\n+\t\tself.parsers = {}\n+\t\tself.distribution = {}\n+\t\tself.factors = {}\n+\t\tself.regions = None\n+\t\tself.tmpDatName = None\n+\t\tself.tmpRName = None\n+\t\tself.quorum = 1\n+\t\tself.width = 800\n+\t\tself.height = 300\n+\n+\tdef setNames(self, names):\n+\t\tself.names = names\n+\n+\tdef setInputFiles(self, fileNames, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tfor cpt, fileName in enumerate(fileNames):\n+\t\t\tself.parsers[self.names[cpt]] = chooser.getParser(fileName)\n+\n+\tdef setOutputFileName(self, fileName):\n+\t\tself.outputFileName = fileName\n+\n+\tdef setLabs(self, xLab, yLab):\n+\t\tself.xLab = xLab\n+\t\tself.yLab = yLab\n+\n+\tdef setBinSize(self, binSize):\n+\t\tself.binSize = binSize\n+\n+\tdef setColors(self, colors):\n+\t\tself.colors = colors\n+\n+\tdef setFactors(self, factors):\n+\t\tself.factors = dict(zip(self.names, factors))\n+\n+\tdef setMultiple(self, boolean):\n+\t\tself.multiple = boolean\n+\t\n+\tdef setImageSize(self, width, height):\n+\t\tif width != None:\n+\t\t\tself.width = width\n+\t\tif height != None:\n+\t\t\tself.height = height\n+\n+\tdef setQuorum(self, quorum):\n+\t\tself.quorum = quorum\n+\n+\tdef setRegionsFile(self, fileName):\n+\t\tif fileName != None:\n+\t\t\tself._loadRegions(fileName)\n+\n+\tdef _checkOptions(self):\n+\t\tif not self.parsers:\n+\t\t\tself.logAndRaise("ERROR: Missing input file names")\n+\n+\tdef _logAndRaise(self, errorMsg):\n+\t\tself.log.error(errorMsg)\n+\t\traise Exception(errorMsg)\n+\n+\tdef _loadRegions(self, fileName):\n+\t\tself.regions = {}\n+\t\tparser = GffParser(fileName, self.verbosity)\n+\t\tfor tran'..b'\t\tself._plot()\n+\t\tself._cleanFiles()\n+\t\tself.log.info("END Get Read Distribution")\n+\n+\n+if __name__ == "__main__":\n+\tdescription = "Usage: GetReadDistribution.py [options]\\n\\nGet Read Distribution v1.0.1: Get the distribution of a set of reads. [Category: Personal]\\n"\n+\tepilog = ""\n+\tparser = RepetOptionParser(description = description, epilog = epilog)\n+\tparser.add_option("-i", "--input", dest="inputFileNames", action="store", default=None, type="string", help="input files, separated by commas [compulsory] [format: string]")\n+\tparser.add_option("-f", "--format", dest="format", action="store", default=None, type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")\n+\tparser.add_option("-n", "--names", dest="names", action="store", default=None, type="string", help="name of the input data, separated by commas [compulsory] [format: string]")\n+\tparser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [format: output file in PNG format]")\n+\tparser.add_option("-s", "--binSize", dest="binSize", action="store", default=10000, type="int", help="bin size [format: int] [default: 10000]")\n+\tparser.add_option("-l", "--xLabel", dest="xLab", action="store", default="", type="string", help="x-axis label name [format: string]")\n+\tparser.add_option("-L", "--yLabel", dest="yLab", action="store", default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")\n+\tparser.add_option("-c", "--colors", dest="colors", action="store", default=None, type="string", help="colors of the bars, separated by commas [format: string]")\n+\tparser.add_option("-a", "--factors", dest="factors", action="store", default=None, type="string", help="normalization factors, separated by commas [format: string]")\n+\tparser.add_option("-r", "--regions", dest="regionsFileName", action="store", default=None, type="string", help="regions to plot [format: transcript file in GFF format]")\n+\tparser.add_option("-m", "--multiple", dest="multiple", action="store_true", default=False, help="print position using multiples (k, G) [format: boolean] [default: False]")\n+\tparser.add_option("-q", "--quorum", dest="quorum", action="store", default=1, type="int", help="minimum number of intervals to plot a region [format: int] [default: 1]")\n+\tparser.add_option("-z", "--width", dest="width", action="store", default=800, type="int", help="width of the image [format: int] [default: 800]")\n+\tparser.add_option("-Z", "--height", dest="height", action="store", default=300, type="int", help="height of the image [format: int] [default: 300]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+\toptions = parser.parse_args()[0]\n+\tiGetReadDistribution = GetReadDistribution(options.verbosity)\n+\tiGetReadDistribution.setNames(options.names.split(","))\n+\tiGetReadDistribution.setInputFiles(options.inputFileNames.split(","), options.format)\n+\tiGetReadDistribution.setOutputFileName(options.outputFileName)\n+\tiGetReadDistribution.setLabs(options.xLab, options.yLab)\n+\tiGetReadDistribution.setBinSize(options.binSize)\n+\tiGetReadDistribution.setColors(None if options.colors == None else options.colors.split(","))\n+\tiGetReadDistribution.setFactors(None if options.factors == None else map(float, options.factors.split(",")))\n+\tiGetReadDistribution.setRegionsFile(options.regionsFileName)\n+\tiGetReadDistribution.setMultiple(options.multiple)\n+\tiGetReadDistribution.setQuorum(options.quorum)\n+\tiGetReadDistribution.setImageSize(options.width, options.height)\n+\tiGetReadDistribution.run()\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/GetReadSizes.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetReadSizes.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,255 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import random, os, glob, subprocess\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.parsing.GffParser import GffParser\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc import Utils\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+\n+LOG_DEPTH = "smart"\n+DEFAULT_REGION = "_all_"\n+\n+class GetReadSizes(object):\n+\n+\tdef __init__(self, verbosity = 0):\n+\t\tself.xLab = "Size"\n+\t\tself.yLab = "# reads"\n+\t\tself.verbosity = verbosity\n+\t\tself.number = random.randint(0, 100000)\n+\t\tself.log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)\n+\t\tself.parsers = {}\n+\t\tself.sizes = {}\n+\t\tself.factors = {}\n+\t\tself.regions = None\n+\t\tself.tmpDatName = None\n+\t\tself.tmpRName = None\n+\t\tself.width = 800\n+\t\tself.height = 300\n+\n+\tdef setNames(self, names):\n+\t\tself.names = names\n+\n+\tdef setInputFiles(self, fileNames, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tfor cpt, fileName in enumerate(fileNames):\n+\t\t\tself.parsers[self.names[cpt]] = chooser.getParser(fileName)\n+\n+\tdef setOutputFileName(self, fileName):\n+\t\tself.outputFileName = fileName\n+\n+\tdef setLabs(self, xLab, yLab):\n+\t\tself.xLab = xLab\n+\t\tself.yLab = yLab\n+\n+\tdef setSizes(self, minSize, maxSize):\n+\t\tself.minSize = minSize\n+\t\tself.maxSize = maxSize\n+\n+\tdef setColors(self, colors):\n+\t\tself.colors = colors\n+\n+\tdef setFactors(self, factors):\n+\t\tself.factors = dict(zip(self.names, factors))\n+\n+\tdef setRegionsFile(self, fileName):\n+\t\tif fileName != None:\n+\t\t\tself._loadRegions(fileName)\n+\n+\tdef setImageSize(self, width, height):\n+\t\tif width != None:\n+\t\t\tself.width = width\n+\t\tif height != None:\n+\t\t\tself.height = height\n+\n+\tdef _checkOptions(self):\n+\t\tif not self.parsers:\n+\t\t\tself.logAndRaise("ERROR: Missing input file names")\n+\n+\tdef _logAndRaise(self, errorMsg):\n+\t\tself.log.error(errorMsg)\n+\t\traise Exception(errorMsg)\n+\n+\tdef _loadRegions(self, fileName):\n+\t\tself.regions = {}\n+\t\tparser = GffParser(fileName, self.verbosity)\n+\t\tfor transcript in parser.getIterator():\n+\t\t\tchromosome = transcript.getChromosome()\n+\t\t\tstart = transcript.getStart()\n+\t\t\tend = transcript.getEnd()\n+\t\t\tname = transcript.getName()\n+\t\t\tif chromosome'..b'ogress.done()\n+\n+\tdef _cleanFiles(self):\n+\t\tfor fileName in (self.tmpDatName, self.tmpRName):\n+\t\t\tif fileName != None and os.path.exists(fileName):\n+\t\t\t\tos.remove(fileName)\n+\t\t\t\tfor otherFileName in glob.glob("%s*" % (fileName)):\n+\t\t\t\t\tos.remove(otherFileName)\n+\n+\tdef run(self):\n+\t\tLoggerFactory.setLevel(self.log, self.verbosity)\n+\t\tself._checkOptions()\n+\t\tself.log.info("START Get Read Sizes")\n+\t\tfor name in self.names:\n+\t\t\tself._parse(name)\n+\t\tself._plot()\n+\t\tself._cleanFiles()\n+\t\tself.log.info("END Get Read Sizes")\n+\n+\n+if __name__ == "__main__":\n+\tdescription = "Usage: GetReadSizes.py [options]\\n\\nGet Read Sizes v1.0.1: Get the sizes of a set of reads. [Category: Personal]\\n"\n+\tepilog = ""\n+\tparser = RepetOptionParser(description = description, epilog = epilog)\n+\tparser.add_option("-i", "--input", dest="inputFileNames", action="store", default=None, type="string", help="input files, separated by commas [compulsory] [format: string]")\n+\tparser.add_option("-f", "--format", dest="format", action="store", default=None, type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")\n+\tparser.add_option("-n", "--names", dest="names", action="store", default=None, type="string", help="name of the input data, separated by commas [compulsory] [format: string]")\n+\tparser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [format: output file in PNG format]")\n+\tparser.add_option("-s", "--minSize", dest="minSize", action="store", default=None, type="int", help="minimum size [format: int]")\n+\tparser.add_option("-S", "--maxSize", dest="maxSize", action="store", default=None, type="int", help="maximum size [format: int]")\n+\tparser.add_option("-l", "--xLabel", dest="xLab", action="store", default="Size", type="string", help="x-axis label name [format: string] [default: Size]")\n+\tparser.add_option("-L", "--yLabel", dest="yLab", action="store", default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")\n+\tparser.add_option("-c", "--colors", dest="colors", action="store", default=None, type="string", help="colors of the bars, separated by commas [format: string]")\n+\tparser.add_option("-a", "--factors", dest="factors", action="store", default=None, type="string", help="normalization factors, separated by commas [format: string]")\n+\tparser.add_option("-r", "--regions", dest="regionsFileName", action="store", default=None, type="string", help="regions to plot [format: transcript file in GFF format]")\n+\tparser.add_option("-z", "--width", dest="width", action="store", default=800, type="int", help="width of the image [format: int] [default: 800]")\n+\tparser.add_option("-Z", "--height", dest="height", action="store", default=300, type="int", help="height of the image [format: int] [default: 300]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+\toptions = parser.parse_args()[0]\n+\tiGetReadSizes = GetReadSizes(options.verbosity)\n+\tiGetReadSizes.setNames(options.names.split(","))\n+\tiGetReadSizes.setInputFiles(options.inputFileNames.split(","), options.format)\n+\tiGetReadSizes.setOutputFileName(options.outputFileName)\n+\tiGetReadSizes.setLabs(options.xLab, options.yLab)\n+\tiGetReadSizes.setSizes(options.minSize, options.maxSize)\n+\tiGetReadSizes.setColors(None if options.colors == None else options.colors.split(","))\n+\tiGetReadSizes.setFactors(None if options.factors == None else map(float, options.factors.split(",")))\n+\tiGetReadSizes.setRegionsFile(options.regionsFileName)\n+\tiGetReadSizes.setImageSize(options.width, options.height)\n+\tiGetReadSizes.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/GetUpDownStream.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetUpDownStream.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,152 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+from optparse import OptionParser, OptionGroup
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+
+
+class GetUpDownStream(object):
+
+    def __init__(self, verbosity = 0):
+        self.verbosity         = verbosity
+        self.inputReader       = None
+        self.outputWriter      = None
+        self.nbRead            = 0
+        self.nbWritten         = 0
+        self.nbMerges          = 0
+        self.splittedFileNames = {}
+
+    def __del__(self):
+        for fileName in self.splittedFileNames.values():
+            os.remove(fileName)
+
+    def setInputFile(self, fileName, format):
+        parserChooser = ParserChooser(self.verbosity)
+        parserChooser.findFormat(format, "transcript")
+        self.parser = parserChooser.getParser(fileName)
+        self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])
+
+    def setOutputFile(self, fileName):
+        self.outputWriter = Gff3Writer(fileName, self.verbosity)
+
+    def setDistances(self, up, down):
+        self.upDistance   = up
+        self.downDistance = down
+
+    def _sortFile(self):
+        fs = FileSorter(self.parser, self.verbosity-4)
+        fs.perChromosome(True)
+        fs.setOutputFileName(self.sortedFileName)
+        fs.sort()
+        self.splittedFileNames       = fs.getOutputFileNames()
+        self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
+        self.nbRead                  = fs.getNbElements()
+
+    def _write(self, start, end, reference, after):
+        if start > end:
+            return
+        transcript = Transcript()
+        transcript.setChromosome(reference.getChromosome())
+        transcript.setStart(start)
+        transcript.setEnd(end)
+        transcript.setDirection("+")
+        transcript.setName("%s_%s" % ("up" if Utils.xor(reference.getDirection() == 1, after) else "down", reference.getName()))
+        self.outputWriter.addTranscript(transcript)
+
+    def _getFlanking(self, chromosome):
+        progress    = Progress(self.nbElementsPerChromosome[chromosome], "Analyzing chromosome %s" % (chromosome), self.verbosity)
+        parser      = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
+        previous    = None
+        for transcript in parser.getIterator():
+            progress.inc()
+            transcript.removeExons()
+            if previous == None:
+                distance = self.upDistance if transcript.getDirection() == 1 else self.downDistance
+                start    = max(1, transcript.getStart() - distance)
+                self._write(start, transcript.getStart()-1, transcript, False)
+                previous = transcript
+                continue
+            if previous.include(transcript):
+                continue
+            if transcript.overlapWith(previous):
+                previous = transcript
+                continue
+            distancePrevious = self.downDistance if previous.getDirection()   == 1 else self.upDistance
+            distanceCurrent  = self.upDistance   if transcript.getDirection() == 1 else self.downDistance
+            distance = transcript.getDistance(previous)
+            if distancePrevious + distanceCurrent == 0:
+                previous = transcript
+                continue
+            if distance >= distancePrevious + distanceCurrent:
+                endPrevious  = previous.getEnd() + distancePrevious
+                startCurrent = transcript.getStart() - distanceCurrent
+            else:
+                middle       = previous.getEnd() + int((distance-1) * float(distancePrevious) / (distancePrevious + distanceCurrent))
+                endPrevious  = middle
+                startCurrent = middle+1
+            self._write(previous.getEnd() + 1, endPrevious, previous, True)
+            self._write(startCurrent, transcript.getStart() - 1, transcript, False)
+            previous = transcript
+        distance = self.downDistance if previous.getDirection() == 1 else self.upDistance
+        self._write(previous.getEnd() + 1, previous.getEnd() + distance, previous, True)
+        progress.done()
+
+    def run(self):
+        self._sortFile()
+        for chromosome in sorted(self.nbElementsPerChromosome.keys()):
+            self._getFlanking(chromosome)
+        self.outputWriter.close()
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get Up and Down Stream v1.0.0: Get the flanking regions of an annotation. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the file [compulsory] [format: mapping file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-u", "--up",        dest="up",             action="store",      default=0,     type="int",    help="the upstream distance  [format: int]")
+    parser.add_option("-d", "--down",      dest="down",           action="store",      default=0,     type="int",    help="the downstream distance  [format: int]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+    (options, args) = parser.parse_args()
+
+    guds = GetUpDownStream(options.verbosity)
+    guds.setInputFile(options.inputFileName, options.format)
+    guds.setOutputFile(options.outputFileName)
+    guds.setDistances(options.up, options.down)
+    guds.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/RestrictFromCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/RestrictFromCoverage.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,224 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2012\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os, struct, time, random\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.ncList.NCList import NCList\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n+from SMART.Java.Python.ncList.FileSorter import FileSorter\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+from SMART.Java.Python.misc import Utils\n+try:\n+ import cPickle as pickle\n+except:\n+ import pickle\n+\n+REFERENCE = 0\n+QUERY = 1\n+TYPES = (REFERENCE, QUERY)\n+TYPETOSTRING = {0: "reference", 1: "query"}\n+\n+class RestrictFromCoverage(object):\n+\n+ def __init__(self, verbosity = 1):\n+ self._verbosity = verbosity\n+ self._randomNumber = random.randint(0, 100000)\n+ self._nbWritten = 0\n+ self._nbLines = dict([type, 0] for type in TYPES)\n+ self._splittedFileNames = dict([type, {}] for type in TYPES)\n+ self._nbElementsPerChromosome = dict([type, {}] for type in TYPES)\n+ self._nbElements = dict([type, 0] for type in TYPES)\n+ \n+ def __del__(self):\n+ pass\n+\n+ def _close(self):\n+ self._writer.close()\n+ \n+ def setInputFileName(self, fileName, format, type):\n+ chooser = ParserChooser(self._verbosity)\n+ chooser.findFormat(format)\n+ parser = chooser.getParser(fileName)\n+ sortedFileName = "%s_%d_%d_sorted.pkl" % (os.path.splitext(fileName)[0], self._randomNumber, type)\n+ if self._verbosity > 2:\n+ print "Preparing %s file..." % (TYPETOSTRING[type])\n+ startTime = time.time()\n+ fs = FileSorter(parser, self._verbosity-1)\n+ fs.perChromosome(True)\n+ fs.setOutputFileName(sortedFileName)\n+ fs.sort()\n+ self._nbLines[type] = fs.getNbElements()\n+ self._splittedFileNames[type] = fs.getOutputFileNames()\n+ self._nbElementsPerChromosome[type] = fs.getNbElementsPerChromosome()\n+ self._nbElements[type] = fs.getNbElements()\n+ endTime = time.time'..b'ranscript):\n+ self._writer.addTranscript(transcript)\n+ self._nbWritten += 1\n+\n+ def run(self):\n+ for chromosome in sorted(self._splittedFileNames[QUERY].keys()):\n+ self._compareChromosome(chromosome)\n+ self._close()\n+ if self._verbosity > 0:\n+ print "# queries: %d" % (self._nbElements[QUERY])\n+ print "# refs: %d" % (self._nbElements[REFERENCE])\n+ print "# written: %d (%d%%)" % (self._nbWritten, 0 if self._nbElements[QUERY] == 0 else round(float(self._nbWritten) / self._nbElements[QUERY] * 100))\n+ \n+\n+if __name__ == "__main__":\n+ description = "Restrict From Coverage v1.0.0: Select the elements from the first set which have a given coverage. [Category: Data Comparison]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="output", action="store", default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+ parser.add_option("-n", "--minNucleotides", dest="minNucleotides", action="store", default=None, type="int", help="minimum number of nucleotides overlapping to declare an overlap [format: int]")\n+ parser.add_option("-N", "--maxNucleotides", dest="maxNucleotides", action="store", default=None, type="int", help="maximum number of nucleotides overlapping to declare an overlap [format: int]")\n+ parser.add_option("-p", "--minPercent", dest="minPercent", action="store", default=None, type="int", help="minimum percentage of nucleotides overlapping to declare an overlap [format: int]")\n+ parser.add_option("-P", "--maxPercent", dest="maxPercent", action="store", default=None, type="int", help="maximum percentage of nucleotides overlapping to declare an overlap [format: int]")\n+ parser.add_option("-e", "--minOverlap", dest="minOverlap", action="store", default=None, type="int", help="minimum number of elements from 2nd file to declare an overlap [format: int]")\n+ parser.add_option("-E", "--maxOverlap", dest="maxOverlap", action="store", default=None, type="int", help="maximum number of elements from 2nd file to declare an overlap [format: int]")\n+ parser.add_option("-s", "--strands", dest="strands", action="store_true", default=False, help="consider the two strands separately [format: bool] [default: false]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ rfc = RestrictFromCoverage(options.verbosity)\n+ rfc.setInputFileName(options.inputFileName1, options.format1, QUERY)\n+ rfc.setInputFileName(options.inputFileName2, options.format2, REFERENCE)\n+ rfc.setOutputFileName(options.output)\n+ rfc.setNbNucleotides(options.minNucleotides, options.maxNucleotides)\n+ rfc.setPercent(options.minPercent, options.maxPercent)\n+ rfc.setOverlap(options.minOverlap, options.maxOverlap)\n+ rfc.setStrands(options.strands)\n+ rfc.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/SelectByTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/SelectByTag.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,148 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Select the transcript such that a tag value is not less than a given threshold"""
+import os
+import sys
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer import MySqlTranscriptWriter
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+
+class SelectByTag(object):
+
+    def __init__(self, verbosity = 1):
+        self.input     = None
+        self.format    = None
+        self.tag       = None
+        self.value     = None
+        self.min       = None
+        self.max       = None
+        self.default   = None
+        self.output    = None
+        self.mysql     = None
+        self.verbosity = verbosity
+
+        self.parser      = None
+        self.writer      = None
+        self.mysqlWriter = None
+        self.nbElements  = None
+        self.nbWritten   = 0
+
+
+    def setParser(self):
+        self.parser     = TranscriptContainer(self.input, self.format, self.verbosity)
+        self.nbElements = self.parser.getNbTranscripts()
+
+
+    def setWriter(self):
+        self.writer = Gff3Writer(self.output, self.verbosity)
+        if self.mysql:
+            self.mysqlWriter = MySqlTranscriptWriter(self.output, self.verbosity)
+
+
+    def isAccepted(self, transcript):
+        value = transcript.getTagValue(self.tag)
+        if value == None:
+            if self.default != None:
+                value = self.default
+            else:
+                raise Exception("Error! Transcript %s no tag called '%s'" % (transcript, self.tag))
+        if self.value != None:
+            if self.value == str(value):
+                return True
+            return self.value.isdigit() and value == float(self.value)
+        value = float(value)
+        return (self.min == None or self.min <= value) and (self.max == None or self.max >= value)
+
+
+    def readInputFile(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Writing transcripts", self.verbosity)
+        for transcript in self.parser.getIterator():
+            if self.isAccepted(transcript):
+                self.writer.addTranscript(transcript)
+                if self.mysql:
+                    self.mysqlWriter.addTranscript(transcript)
+                self.nbWritten += 1
+            progress.inc()
+        progress.done()
+
+
+    def writeFile(self):
+        self.writer.write()
+        if self.mysql:
+            self.mysqlWriter.write()
+
+
+    def run(self):
+        self.setParser()
+        self.setWriter()
+        self.readInputFile()
+        self.writeFile()
+        if self.verbosity > 0:
+            print "%d input" % (self.nbElements)
+            if self.nbElements != 0:
+                print "%d output (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbElements * 100)
+
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Select by Tag v1.0.2: Keep the genomic coordinates such that a the value of a given tag is between two limits. [Category: Data Selection]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the input [compulsory] [format: transcript file format]")
+    parser.add_option("-g", "--tag", dest="tag", action="store", default=None, type="string", help="the tag [compulsory] [format: string]")
+    parser.add_option("-a", "--value", dest="value", action="store", default=None, type="string", help="the value to be found [format: string]")
+    parser.add_option("-m", "--min", dest="min", action="store", default=None, type="float", help="the minimum threshold [format: float]")
+    parser.add_option("-M", "--max", dest="max", action="store", default=None, type="float", help="the maximum threshold [format: float]")
+    parser.add_option("-d", "--default", dest="default", action="store", default=None, type="float", help="value if tag is not present [format: float]")
+    parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-y", "--mysql", dest="mysql", action="store_true", default=False, help="write output into MySQL tables [format: boolean] [default: False]")
+    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    selectByTag         = SelectByTag(options.verbosity)
+    selectByTag.input   = options.inputFileName
+    selectByTag.format  = options.format
+    selectByTag.tag     = options.tag
+    selectByTag.value   = options.value
+    selectByTag.min     = options.min
+    selectByTag.max     = options.max
+    selectByTag.default = options.default
+    selectByTag.output  = options.outputFileName
+    selectByTag.mysql   = options.mysql
+    selectByTag.run()
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/WrappGetDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappGetDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,96 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+
+
+if __name__ == "__main__":
+
+    magnifyingFactor = 1000
+
+    # parse command line
+    description = "Get Distribution v1.0.1: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",     action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",      dest="format",            action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outTarFileName",    action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-r", "--reference",   dest="referenceFileName", action="store",      default=None,  type="string", help="file containing the genome [compulsory] [format: file in FASTA format]")
+    parser.add_option("-n", "--nbBins",      dest="nbBins",            action="store",      default=1000,  type="int",    help="number of bins [default: 1000] [format: int]")
+    parser.add_option("-2", "--bothStrands", dest="bothStrands",       action="store_true", default=False,                help="plot one curve per strand [format: bool] [default: false]")
+    parser.add_option("-w", "--raw",         dest="raw",               action="store_true", default=False,                help="plot raw number of occurrences instead of density [format: bool] [default: false]")
+    parser.add_option("-x", "--csv",         dest="csv",               action="store_true", default=False,                help="write a .csv file [format: bool]")
+    parser.add_option("-c", "--chromosome",  dest="chromosome",        action="store",      default=None,  type="string", help="plot only a chromosome [format: string]")
+    parser.add_option("-s", "--start",       dest="start",             action="store",      default=None,  type="int",    help="start from a given region [format: int]")
+    parser.add_option("-e", "--end",         dest="end",               action="store",      default=None,  type="int",    help="end from a given region [format: int]")
+    parser.add_option("-y", "--yMin",        dest="yMin",              action="store",      default=None,  type="int",    help="minimum value on the y-axis to plot [format: int]")
+    parser.add_option("-Y", "--yMax",        dest="yMax",              action="store",      default=None,  type="int",    help="maximum value on the y-axis to plot [format: int]")
+    parser.add_option("-g", "--gff",         dest="gff",               action="store_true", default=False,                help="also write GFF3 file [format: bool] [default: false]")
+    parser.add_option("-H", "--height",      dest="height",            action="store",      default=None,  type="int",    help="height of the graphics [format: int] [default: 300]")
+    parser.add_option("-W", "--width",       dest="width",             action="store",      default=None,  type="int",    help="width of the graphics [format: int] [default: 1000]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",         action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+    parser.add_option("-l", "--log",         dest="log",               action="store_true", default=False,                help="write a log file [format: bool]")
+    (options, args) = parser.parse_args()
+
+
+    absPath = os.getcwd()
+    print "the current path is :", absPath
+    directory = "/tmp/wrappGetDistribution"
+    print "the dir path is :", directory
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        cmd = "python %s/Java/Python/getDistribution.py -i %s -f %s -o %s -D %s" % (SMART_PATH, options.inputFileName, options.format, outputFileName, directory)
+    if options.referenceFileName != None :
+        cmd += " -r %s" % options.referenceFileName
+    if options.nbBins != None :
+        cmd += " -n %s" % options.nbBins
+    if options.chromosome :
+        cmd += " -c %s" % options.chromosome
+    if options.start != None :
+        cmd += " -s %s" % options.start
+    if options.end != None :
+        cmd += " -e %s" % options.end
+    if options.yMin != None :
+        cmd += " -y %s" % options.yMin
+    if options.yMax != None :
+        cmd += " -Y %s" % options.yMax
+    if options.height != None :
+        cmd += " -H %s" % options.height
+    if options.width != None :
+        cmd += " -W %s" % options.width
+    if options.bothStrands :
+        cmd += " -2"
+    if options.raw :
+        cmd += " -w"
+    if options.csv :
+        cmd += " -x"
+    if options.gff :
+        cmd += " -g"
+    if options.log :
+        cmd += " -l"
+    print "cmd is: ", cmd
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/WrappGetReadDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappGetReadDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,58 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file sequence [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of the file [compulsory] [format: sequence file format]")
+    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="int",    help="keep the best n    [format: int]")
+    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="float",  help="keep the best n\% [format: float]")
+    parser.add_option("-o", "--output",    dest="outTarFileName", action="store",               type="string", help="output file [compulsory] [format: zip]")
+
+    (options, args) = parser.parse_args()
+
+
+    absPath = os.getcwd()
+    print "the current path is :", absPath
+    directory = "/tmp/wrappGetReadDistribution"
+    print "the dir path is :", directory
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        cmd = "python %s/Java/Python/getReadDistribution.py -i %s -f %s -o %s -D %s" % (SMART_PATH, options.inputFileName, options.format, outputFileName, directory)
+    if options.number != None :
+        cmd += " -n %s" % options.number
+    if options.percent != None :
+        cmd += " -p %s" % options.percent
+    print "cmd is: ", cmd
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/WrappPlotCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappPlotCoverage.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,89 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input1",       dest="inputFileName1", action="store",                       type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat1", dest="inputFormat1",   action="store",                       type="string", help="format of input file 1 [compulsory] [format: transcript file format]")
+    parser.add_option("-j", "--input2",       dest="inputFileName2", action="store",                       type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
+    parser.add_option("-g", "--inputFormat2", dest="inputFormat2",   action="store",                       type="string", help="format of input file 2 [compulsory] [format: transcript file format]")
+    parser.add_option("-q", "--sequence",     dest="inputSequence",  action="store",      default=None,    type="string", help="input sequence file [format: file in FASTA format] [default: None]")
+    parser.add_option("-o", "--output",       dest="outTarFileName", action="store",                       type="string", help="output file [compulsory] [format: output file in zip format]")
+    parser.add_option("-w", "--width",        dest="width",          action="store",      default=1500,    type="int",    help="width of the plots (in px) [format: int] [default: 1500]")
+    parser.add_option("-e", "--height",       dest="height",         action="store",      default=1000,    type="int",    help="height of the plots (in px) [format: int] [default: 1000]")
+    parser.add_option("-t", "--title",        dest="title",          action="store",      default="",      type="string", help="title of the plots [format: string]")
+    parser.add_option("-x", "--xlab",         dest="xLabel",         action="store",      default="",      type="string", help="label on the x-axis [format: string]")
+    parser.add_option("-y", "--ylab",         dest="yLabel",         action="store",      default="",      type="string", help="label on the y-axis [format: string]")
+    parser.add_option("-p", "--plusColor",    dest="plusColor",      action="store",      default="red",   type="string", help="color for the elements on the plus strand [format: string] [default: red]")
+    parser.add_option("-m", "--minusColor",   dest="minusColor",     action="store",      default="blue",  type="string", help="color for the elements on the minus strand [format: string] [default: blue]")
+    parser.add_option("-s", "--sumColor",     dest="sumColor",       action="store",      default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]")
+    parser.add_option("-l", "--lineColor",    dest="lineColor",      action="store",      default="black", type="string", help="color for the lines [format: string] [default: black]")
+    parser.add_option("-1", "--merge",        dest="merge",          action="store_true", default=False,                  help="merge the 2 plots in 1 [format: boolean] [default: false]")
+    parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,       type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    absPath = os.getcwd()
+    directory = "/tmp/wrappPlotCov"
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName1 != None and options.inputFormat1 != None and options.inputFileName2 != None and options.inputFormat2 != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        print 'outputfile is :', outputFileName
+        cmd = "python %s/Java/Python/plotCoverage.py -i %s -f %s -j %s -g %s -o %s -D %s" % (SMART_PATH, options.inputFileName1, options.inputFormat1, options.inputFileName2, options.inputFormat2, outputFileName, directory)
+    if options.inputSequence!= None:
+        cmd += " -q %s" % options.inputSequence
+    if options.width != None:
+        cmd += " -w %s" % options.width
+    if options.height != None:
+        cmd += " -e %s" % options.height
+    if options.title != None:
+        cmd += " -t %s" % options.title
+    if options.xLabel != None:
+        cmd += " -x %s" % options.xLabel
+    if options.yLabel != None:
+        cmd += " -y %s" % options.yLabel
+    if options.plusColor != None:
+        cmd += " -p %s" % options.plusColor
+    if options.minusColor != None:
+        cmd += " -m %s" % options.minusColor
+    if options.sumColor != None:
+        cmd += " -s %s" % options.sumColor
+    if options.lineColor != None:
+        cmd += " -l %s" % options.lineColor
+    if options.merge:
+        cmd += " -1"
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+
+
+
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/WrappPlotRepartition.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappPlotRepartition.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,71 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%sSMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+
+
+if __name__ == "__main__":
+
+    magnifyingFactor = 1000
+
+    # parse command line
+    description = "Plot the repartition of different data on a whole genome. (This tool uses 1 input file only, the different values being stored in the tags.    See documentation to know more about it.) [Category: Visualization]"
+
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",dest="inputFileName",action="store",type="string",help="input file name [compulsory] [format: file in GFF3 format]")
+    parser.add_option("-n", "--names",dest="names", action="store", type="string", help="name for the tags (separated by commas and no space) [compulsory] [format: string]")
+    parser.add_option("-o", "--output",dest="outTarFileName",action="store",type="string", help="output file [compulsory] [format: output file tar format]")
+    parser.add_option("-c", "--color",dest="colors",action="store",default=None,type="string", help="color of the lines (separated by commas and no space) [format: string]")
+    parser.add_option("-f", "--format",dest="format",action="store",default="png",type="string", help="format of the output file [format: string] [default: png]")
+    parser.add_option("-r", "--normalize",dest="normalize",action="store_true", default=False,help="normalize data (when panels are different) [format: bool] [default: false]")
+    parser.add_option("-l", "--log",dest="log",action="store",default="",type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string]")
+    parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1,type="int",help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+
+    absPath = os.getcwd()
+    print "the current path is :", absPath
+    directory = "/tmp/wrappPlotRepartition"
+    print "the dir path is :", directory
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        cmd = "python %s/Java/Python/plotRepartition.py -i %s -o %s -D %s" % (SMART_PATH, options.inputFileName, outputFileName, directory)
+    if options.names != None :
+        cmd += " -n %s" % options.names
+    else: print "You must choose tag names !"
+    if options.colors != None :
+        cmd += " -c %s" % options.colors
+    if options.format != None:
+        cmd += " -f %s" % options.format
+    if options.normalize :
+        cmd += " -r "
+    if options.log != "" :
+        cmd += " -l %s" % options.log
+
+    print "cmd is: ", cmd
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/__init__.pyc

Binary file SMART/Java/Python/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/adaptorStripper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/adaptorStripper.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,115 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Remove adaptors"""
+
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.Sequence import Sequence
+from SMART.Java.Python.structure.SequenceList import SequenceList
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.writer.FastaWriter import FastaWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+def distance (string1, string2):
+    if len(string1) != len(string2):
+        return None
+    distance = 0
+    for i in range(0, len(string1)):
+        if string1[i] != string2[i]:
+            distance += 1
+    return distance
+
+
+
+if __name__ == "__main__":
+    nbRemaining = 0
+
+    # parse command line
+    description = "Adaptor Stripper v1.0.1: Remove the adaptor of a list of reads. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",         dest="inputFileName",      action="store",                     type="string", help="input file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output",        dest="outputFileName",     action="store",                     type="string", help="output file [compulsory] [format: output file in FASTA format]")
+    parser.add_option("-5", "--5primeAdaptor", dest="fivePrimeAdaptor",   action="store",                     type="string", help="five prime adaptor [format: string]")
+    parser.add_option("-3", "--3primeAdaptor", dest="threePrimeAdaptor",  action="store",                     type="string", help="three prime adaptor [format: string]")
+    parser.add_option("-d", "--5primeDist",    dest="fivePrimeDistance",  action="store",      default=3,     type="int",    help="five prime distance [format: int] [default: 3]")
+    parser.add_option("-e", "--3primeDist",    dest="threePrimeDistance", action="store",      default=3,     type="int",    help="three prime distance [format: int [default: 3]]")
+    parser.add_option("-m", "--3primeSize",    dest="threePrimeSize",     action="store",      default=10,    type="int",    help="three prime size [format: int] [default: 10]")
+    parser.add_option("-v", "--verbosity",     dest="verbosity",          action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
+    parser.add_option("-l", "--log",           dest="log",                action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.log:
+        logHandle = open(options.outputFileName + ".log", "w")
+
+
+    writer         = FastaWriter(options.outputFileName + ".fas", options.verbosity)
+    sequenceParser = FastaParser(options.inputFileName, options.verbosity)
+    nbSequences    = sequenceParser.getNbSequences()
+
+    # treat sequences
+    progress = Progress(sequenceParser.getNbSequences(), "Analyzing " + options.inputFileName, options.verbosity)
+    for sequence in sequenceParser.getIterator():
+        fivePrimeAdaptor  = sequence.getSequence()[0:len(options.fivePrimeAdaptor)]
+        threePrimeAdaptor = sequence.getSequence()[len(sequence.sequence)-len(options.threePrimeAdaptor):]
+
+        # check 5' adaptor
+        fivePrimeDistance = distance(fivePrimeAdaptor, options.fivePrimeAdaptor)
+        # check 3' adaptor
+        threePrimeDistance = len(threePrimeAdaptor)
+        for i in range(options.threePrimeSize, len(threePrimeAdaptor)+1):
+            threePrimeDistance = min(threePrimeDistance, distance(threePrimeAdaptor[-i:], options.threePrimeAdaptor[:i]))
+
+        # sort candidates
+        if fivePrimeDistance > options.fivePrimeDistance:
+            if options.log:
+                logHandle.write("Sequence %s does not start with the right adaptor (%s != %s)\n" % (sequence.getSequence(), fivePrimeAdaptor, options.fivePrimeAdaptor))
+        elif threePrimeDistance > options.threePrimeDistance:
+            if options.log:
+                logHandle.write("Sequence %s does not end with the right adaptor (%s != %s)\n" % (sequence.getSequence(), threePrimeAdaptor, options.threePrimeAdaptor))
+        else:
+            nbRemaining += 1
+            sequence.setSequence(sequence.getSequence()[len(options.fivePrimeAdaptor):len(sequence.getSequence())-len(options.threePrimeAdaptor)])
+            writer.addSequence(sequence)
+
+        progress.inc()
+
+    progress.done()
+
+    if options.log:
+        logHandle.close()
+
+    writer.write()
+
+    print "kept %i over %i (%.f%%)" % (nbRemaining, nbSequences, float(nbRemaining) / nbSequences * 100)
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/changeGffFeatures.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/changeGffFeatures.sh Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,2 @@
+#!/bin/bash
+sed "s/\t$2\t/\t$3\t/g" $1

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/changeTagName.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/changeTagName.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,90 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Change the name of a tag
+"""
+
+import os
+import random
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Change Tag Name v1.0.1: Change the name of tag of a list of transcripts. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                      type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                      type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-t", "--tag",         dest="tag",            action="store",                      type="string", help="name of the tag to change [compulsory] [format: string]")
+    parser.add_option("-n", "--name",        dest="name",           action="store",                      type="string", help="new name for the tag [compulsory] [format: string]")
+    parser.add_option("-y", "--mysql",       dest="mysql",          action="store_true", default=False,                 help="mySQL output [format: bool] [default: false]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,      type="int",    help="trace level [format: int] [default: 1]")
+    parser.add_option("-l", "--log",         dest="log",            action="store_true", default=False,                 help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.log:
+        logHandle = open("%s.log" % options.outputFileName, "w")
+
+    # create parser and writer(s)
+    parser      = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
+    tmpFileName = "tmpTranscriptFile%d.gff3" % (random.randint(0, 100000))
+    writer      = Gff3Writer(tmpFileName, options.verbosity)
+    if options.mysql:
+        mysqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
+    outputData = {}
+
+    # process transcripts
+    progress = Progress(parser.getNbTranscripts(), "Printing transcripts %s" % (options.inputFileName), options.verbosity)
+    for transcript in parser.getIterator():
+        if options.tag in transcript.tags:
+            value = transcript.tags[options.tag]
+            del transcript.tags[options.tag]
+            transcript.tags[options.name] = value
+        writer.addTranscript(transcript)
+        if options.mysql:
+            mysqlWriter.addTranscript(transcript)
+        progress.inc()
+    progress.done()
+    parser.transcriptListParser.close()
+
+    writer.write()
+
+    if options.mysql:
+        mysqlWriter.write()
+
+    os.rename(tmpFileName, options.outputFileName)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/cleanGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleanGff.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,195 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Clean a GFF file (as given by NCBI or TAIR) and outputs a GFF3 file.
+"""
+
+import os
+import re
+from optparse import OptionParser
+from commons.core.parsing.GffParser import *
+from SMART.Java.Python.misc.RPlotter import *
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+count = {}
+
+class ParsedLine(object):
+    def __init__(self, line, cpt):
+        self.line = line
+        self.cpt  = cpt
+        self.parse()
+
+    def parse(self):
+        self.line = self.line.strip()
+        self.splittedLine = self.line.split(None, 8)
+        if len(self.splittedLine) < 9:
+            raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
+        self.type = self.splittedLine[2]
+        self.parseOptions()
+        self.getId()
+        self.getParents()
+
+    def parseOptions(self):
+        self.parsedOptions = {}
+        for option in self.splittedLine[8].split(";"):
+            option = option.strip()
+            if option == "": continue
+            posSpace = option.find(" ")
+            posEqual = option.find("=")
+            if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
+                key, value = option.split("=", 1)
+            elif posSpace != -1:
+                key, value = option.split(None, 1)
+            else:
+                key   = "ID"
+                value = option
+            self.parsedOptions[key.strip()] = value.strip(" \"")
+
+    def getId(self):
+        for key in self.parsedOptions:
+            if key.lower() == "id":
+                self.id = self.parsedOptions[key]
+                return
+        if "Parent" in self.parsedOptions:
+            parent = self.parsedOptions["Parent"].split(",")[0]
+            if parent not in count:
+                count[parent] = {}
+            if self.type not in count[parent]:
+                count[parent][self.type] = 0
+            count[parent][self.type] += 1
+            self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])
+        else:
+            self.id = "smart%d" % (self.cpt)
+        self.parsedOptions["ID"] = self.id
+
+    def getParents(self):
+        for key in self.parsedOptions:
+            if key.lower() in ("parent", "derives_from"):
+                self.parents = self.parsedOptions[key].split(",")
+                return
+        self.parents = None
+
+    def removeParent(self):
+        for key in self.parsedOptions.keys():
+            if key.lower() in ("parent", "derives_from"):
+                del self.parsedOptions[key]
+
+    def export(self):
+        self.splittedLine[8] = ";".join(["%s=%s" % (key, value) for key, value in self.parsedOptions.iteritems()])
+        return "%s\n" % ("\t".join(self.splittedLine))
+
+
+class CleanGff(object):
+
+    def __init__(self, verbosity = 1):
+        self.verbosity = verbosity
+        self.lines         = {}
+        self.acceptedTypes = []
+        self.parents       = []
+        self.children      = {}
+
+    def setInputFileName(self, name):
+        self.inputFile = open(name)
+
+    def setOutputFileName(self, name):
+        self.outputFile = open(name, "w")
+
+    def setAcceptedTypes(self, types):
+        self.acceptedTypes = types
+
+    def parse(self):
+        progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
+        for cpt, line in enumerate(self.inputFile):
+            if not line or line[0] == "#": continue
+            if line[0] == ">": break
+            parsedLine = ParsedLine(line, cpt)
+            if parsedLine.type in self.acceptedTypes:
+                self.lines[parsedLine.id] = parsedLine
+            progress.inc()
+        progress.done()
+
+    def sort(self):
+        progress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)
+        for line in self.lines.values():
+            parentFound = False
+            if line.parents:
+                for parent in line.parents:
+                    if parent in self.lines:
+                        parentFound = True
+                        if parent in self.children:
+                            self.children[parent].append(line)
+                        else:
+                            self.children[parent] = [line]
+            if not parentFound:
+                line.removeParent()
+                self.parents.append(line)
+            progress.inc()
+        progress.done()
+
+    def write(self):
+        progress = Progress(len(self.parents), "Writing output file", self.verbosity)
+        for line in self.parents:
+            self.writeLine(line)
+            progress.inc()
+        self.outputFile.close()
+        progress.done()
+
+    def writeLine(self, line):
+        self.outputFile.write(line.export())
+        if line.id in self.children:
+            for child in self.children[line.id]:
+                self.writeLine(child)
+
+    def run(self):
+        self.parse()
+        self.sort()
+        self.write()
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                      type="string", help="input file name [compulsory] [format: file in GFF format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-t", "--types",     dest="types",          action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,           type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    cleanGff = CleanGff(options.verbosity)
+    cleanGff.setInputFileName(options.inputFileName)
+    cleanGff.setOutputFileName(options.outputFileName)
+    cleanGff.setAcceptedTypes(options.types.split(","))
+    cleanGff.run()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/cleaning/CleanerChooser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/CleanerChooser.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,80 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.cleaning.GffCleaner import GffCleaner
+from SMART.Java.Python.cleaning.GtfCleaner import GtfCleaner
+from SMART.Java.Python.cleaning.DefaultCleaner import DefaultCleaner
+
+#Attention!! Do not delete the imports!! They are used to know the type of file format!!!
+
+class CleanerChooser(object):
+ """
+ A class that finds the correct cleaner
+ @ivar format: the format
+ @type format: string
+ @ivar cleaner: the parser
+ @type cleaner: object
+ @ivar cleanerClass: the class of the parser
+ @type cleanerClass: class
+ @ivar verbosity: verbosity
+ @type verbosity: int
+ """
+
+ def __init__(self, verbosity = 0):
+ """
+ Constructor
+ @param verbosity: verbosity
+ @type verbosity: int
+ """
+ self.verbosity = verbosity
+
+
+ def findFormat(self, format):
+ """
+ Find the correct parser
+ @ivar format: the format
+ @type format: string
+ @return: a cleaner
+ """
+ for cleanerClass in TranscriptListCleaner.__subclasses__():
+ if cleanerClass != None:
+ if cleanerClass.getFileFormats() != None and format in cleanerClass.getFileFormats():
+ self.cleanerClass = cleanerClass
+ return
+ self.cleanerClass = DefaultCleaner
+
+
+ def getCleaner(self):
+ """
+ Get the parser previously found
+ @return: the parser
+ """
+ return self.cleanerClass(self.verbosity)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/cleaning/DefaultCleaner.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/DefaultCleaner.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,45 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Default cleaner. Does nothing but copying.
+"""
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+
+class DefaultCleaner(TranscriptListCleaner):
+
+ def __init__(self, verbosity = 1):
+ super(DefaultCleaner, self).__init__(verbosity)
+
+ def _clean(self):
+ self.outputHandle.write(self.inputHandle.read())

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/cleaning/GffCleaner.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/GffCleaner.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,168 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Clean a GFF file (as given by NCBI or TAIR) and outputs a GFF3 file.
+"""
+
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+count = {}
+
+class ParsedLine(object):
+ def __init__(self, line, cpt):
+ self.line = line
+ self.cpt  = cpt
+ self.parse()
+
+ def parse(self):
+ self.line = self.line.strip()
+ self.splittedLine = self.line.split(None, 8)
+ if len(self.splittedLine) < 9:
+ raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
+ self.type = self.splittedLine[2]
+ self.parseOptions()
+ self.getId()
+ self.getParents()
+
+ def parseOptions(self):
+ self.parsedOptions = {}
+ for option in self.splittedLine[8].split(";"):
+ option = option.strip()
+ if option == "": continue
+ posSpace = option.find(" ")
+ posEqual = option.find("=")
+ if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
+ key, value = option.split("=", 1)
+ elif posSpace != -1:
+ key, value = option.split(None, 1)
+ else:
+ key   = "ID"
+ value = option
+ self.parsedOptions[key.strip()] = value.strip(" \"")
+
+ def getId(self):
+ for key in self.parsedOptions:
+ if key.lower() == "id":
+ self.id = self.parsedOptions[key]
+ return
+ if "Parent" in self.parsedOptions:
+ parent = self.parsedOptions["Parent"].split(",")[0]
+ if parent not in count:
+ count[parent] = {}
+ if self.type not in count[parent]:
+ count[parent][self.type] = 0
+ count[parent][self.type] += 1
+ self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])
+ else:
+ self.id = "smart%d" % (self.cpt)
+ self.parsedOptions["ID"] = self.id
+
+ def getParents(self):
+ for key in self.parsedOptions:
+ if key.lower() in ("parent", "derives_from"):
+ self.parents = self.parsedOptions[key].split(",")
+ return
+ self.parents = None
+
+ def removeParent(self):
+ for key in self.parsedOptions.keys():
+ if key.lower() in ("parent", "derives_from"):
+ del self.parsedOptions[key]
+
+ def export(self):
+ self.splittedLine[8] = ";".join(["%s=%s" % (key, value) for key, value in self.parsedOptions.iteritems()])
+ return "%s\n" % ("\t".join(self.splittedLine))
+
+
+class GffCleaner(TranscriptListCleaner):
+
+ def __init__(self, verbosity = 1):
+ super(GffCleaner, self).__init__(verbosity)
+ self.lines = {}
+ self.acceptedTypes = ["mRNA", "transcript", "exon"]
+ self.parents    = []
+ self.children   = {}
+
+ def getFileFormats():
+ return ["gff", "gff2", "gff3"]
+ getFileFormats = staticmethod(getFileFormats)
+
+ def setAcceptedTypes(self, types):
+ self.acceptedTypes = types
+
+ def parse(self):
+ progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
+ for cpt, line in enumerate(self.inputHandle):
+ if not line or line[0] == "#": continue
+ if line[0] == ">": break
+ parsedLine = ParsedLine(line, cpt)
+ if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
+ self.lines[parsedLine.id] = parsedLine
+ progress.inc()
+ progress.done()
+
+ def sort(self):
+ progress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)
+ for line in self.lines.values():
+ parentFound = False
+ if line.parents:
+ for parent in line.parents:
+ if parent in self.lines:
+ parentFound = True
+ if parent in self.children:
+ self.children[parent].append(line)
+ else:
+ self.children[parent] = [line]
+ if not parentFound:
+ line.removeParent()
+ self.parents.append(line)
+ progress.inc()
+ progress.done()
+
+ def write(self):
+ progress = Progress(len(self.parents), "Writing output file", self.verbosity)
+ for line in self.parents:
+ self.writeLine(line)
+ progress.inc()
+ progress.done()
+
+ def writeLine(self, line):
+ self.outputHandle.write(line.export())
+ if line.id in self.children:
+ for child in self.children[line.id]:
+ self.writeLine(child)
+
+ def _clean(self):
+ self.parse()
+ self.sort()
+ self.write()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/cleaning/GtfCleaner.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/GtfCleaner.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,121 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Clean a GTF file
+"""
+
+import shlex
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+count = {}
+
+class ParsedLine(object):
+ def __init__(self, line, cpt):
+ self.line = line
+ self.cpt  = cpt
+ self.parse()
+
+ def parse(self):
+ self.line = self.line.strip()
+ self.splittedLine = self.line.split(None, 8)
+ if len(self.splittedLine) < 9:
+ raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
+ self.type = self.splittedLine[2]
+ self.parseOptions()
+
+ def parseOptions(self):
+ self.parsedOptions = {}
+ key   = None
+ value = ""
+ for option in shlex.split(self.splittedLine[8]):
+ option = option.strip()
+ if option == "": continue
+ if key == None:
+ key = option
+ else:
+ endValue = False
+ if option[-1] == ";":
+ endValue = True
+ option.rstrip(";")
+ value = "%s \"%s\"" % (value, option)
+ if endValue:
+ self.parsedOptions[key] = value
+ if key == "transcript_id":
+ self.transcriptId = value
+ key   = None
+ value = ""
+
+ def export(self):
+ return "%s\n" % (self.line)
+
+
+class GtfCleaner(TranscriptListCleaner):
+
+ def __init__(self, verbosity = 1):
+ super(GtfCleaner, self).__init__(verbosity)
+ self.acceptedTypes = ["exon"]
+ self.parents    = {}
+
+ def getFileFormats():
+ return ["gtf"]
+ getFileFormats = staticmethod(getFileFormats)
+
+ def setAcceptedTypes(self, types):
+ self.acceptedTypes = types
+
+ def parse(self):
+ progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
+ for cpt, line in enumerate(self.inputHandle):
+ if not line or line[0] == "#": continue
+ parsedLine = ParsedLine(line, cpt)
+ if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
+ transcriptId = parsedLine.transcriptId
+ if transcriptId not in self.parents:
+ self.parents[parsedLine.transcriptId] = [parsedLine]
+ else:
+ self.parents[parsedLine.transcriptId].append(parsedLine)
+ progress.inc()
+ progress.done()
+
+ def write(self):
+ progress = Progress(len(self.parents.keys()), "Writing output file", self.verbosity)
+ for parent in sorted(self.parents.keys()):
+ for line in self.parents[parent]:
+ self.outputHandle.write(line.export())
+ progress.inc()
+ progress.done()
+
+ def _clean(self):
+ self.parse()
+ self.write()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/cleaning/TranscriptListCleaner.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/TranscriptListCleaner.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,63 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from SMART.Java.Python.structure.TranscriptList import TranscriptList
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+class TranscriptListCleaner(object):
+ """A (quite generic) class that cleans a file containing transcripts"""
+
+ def __init__(self, verbosity = 0):
+ self.verbosity = verbosity
+
+ def setInputFileName(self, fileName):
+ try:
+ self.inputHandle = open(fileName)
+ except IOError:
+ raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
+
+ def setOutputFileName(self, fileName):
+ try:
+ self.outputHandle = open(fileName, "w")
+ except IOError:
+ raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
+
+ def getFileFormats():
+ pass
+ getFileFormats = staticmethod(getFileFormats)
+
+ def close(self):
+ self.inputHandle.close()
+ self.outputHandle.close()
+
+ def clean(self):
+ self._clean()
+ self.close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/clusterize.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/clusterize.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,165 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from commons.core.writer.WriterChooser import WriterChooser\n+"""Clusterize a set of transcripts"""\n+\n+import os\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle\n+from SMART.Java.Python.ncList.FileSorter import FileSorter\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+class Clusterize(object):\n+ \n+ def __init__(self, verbosity):\n+ self.normalize = False\n+ self.presorted = False\n+ self.distance = 1\n+ self.colinear = False\n+ self.nbWritten = 0\n+ self.nbMerges = 0\n+ self.verbosity = verbosity\n+ self.splittedFileNames = {}\n+\n+ def __del__(self):\n+ for fileName in self.splittedFileNames.values():\n+ os.remove(fileName)\n+\n+ def setInputFile(self, fileName, format):\n+ parserChooser = ParserChooser(self.verbosity)\n+ parserChooser.findFormat(format)\n+ self.parser = parserChooser.getParser(fileName)\n+ self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])\n+\n+ def setOutputFileName(self, fileName, format="gff3", title="S-MART", feature="transcript", featurePart="exon"):\n+ writerChooser = WriterChooser()\n+ writerChooser.findFormat(format)\n+ self.writer = writerChooser.getWriter(fileName)\n+ self.writer.setTitle(title)\n+ self.writer.setFeature(feature)\n+ self.writer.setFeaturePart(featurePart)\n+\n+ def setDistance(self, distance):\n+ self.distance = distance\n+\n+ def setColinear(self, colinear):\n+ self.colinear = colinear\n+\n+ def setNormalize(self, normalize):\n+ self.normalize = normalize\n+ \n+ def setPresorted(self, presorted):\n+ self.presorted = presorted\n+\n+ def _sortFile(self):\n+ fs = FileSorter(self.parser, self.verbosity-4)\n+ fs.perChromosome(True)\n+ fs.setPresorted(self.presorted)\n+ fs.setOutputFileName(self.sortedFileName)\n+ fs.sort()\n+ self.splittedFileNames = fs.getOutputFileNames()\n+ self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()\n+ self.nbElements = fs.getNbElements()\n+ \n+ def _iterate(self,'..b'omosome], self.verbosity)\n+ for newTranscript in parser.getIterator():\n+ newTranscripts = []\n+ for oldTranscript in transcripts:\n+ if self._checkOverlap(newTranscript, oldTranscript):\n+ self._merge(newTranscript, oldTranscript)\n+ elif self._checkPassed(newTranscript, oldTranscript):\n+ self._write(oldTranscript)\n+ else:\n+ newTranscripts.append(oldTranscript)\n+ newTranscripts.append(newTranscript)\n+ transcripts = newTranscripts\n+ progress.inc()\n+ for transcript in transcripts:\n+ self._write(transcript)\n+ progress.done()\n+\n+ def _merge(self, transcript1, transcript2):\n+ self.nbMerges += 1\n+ transcript2.setDirection(transcript1.getDirection())\n+ transcript1.merge(transcript2)\n+\n+ def _write(self, transcript):\n+ self.nbWritten += 1\n+ self.writer.addTranscript(transcript)\n+\n+ def _checkOverlap(self, transcript1, transcript2):\n+ if self.colinear and transcript1.getDirection() != transcript2.getDirection():\n+ return False\n+ if transcript1.getDistance(transcript2) > self.distance:\n+ return False\n+ return True\n+\n+ def _checkPassed(self, transcript1, transcript2):\n+ return (transcript1.getDistance(transcript2) > self.distance)\n+\n+ def run(self):\n+ self._sortFile()\n+ for chromosome in sorted(self.splittedFileNames.keys()):\n+ self._iterate(chromosome)\n+ self.writer.close()\n+ if self.verbosity > 0:\n+ print "# input: %d" % (self.nbElements)\n+ print "# written: %d (%d%% overlaps)" % (self.nbWritten, 0 if (self.nbElements == 0) else ((float(self.nbWritten) / self.nbElements) * 100))\n+ print "# merges: %d" % (self.nbMerges)\n+ \n+\n+if __name__ == "__main__":\n+ description = "Clusterize v1.0.3: clusterize the data which overlap. [Category: Merge]"\n+ \n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of file [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")\n+ parser.add_option("-u", "--outputFormat", dest="outputFormat", action="store", default="gff", type="string", help="output file format [format: transcript file format]")\n+ parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="merge colinear transcripts only [format: bool] [default: false]")\n+ parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="max. distance between two transcripts to be merged [format: int] [default: 0]")\n+ parser.add_option("-n", "--normalize", dest="normalize", action="store_true", default=False, help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int] [default: 1]")\n+ (options, args) = parser.parse_args()\n+ \n+ c = Clusterize(options.verbosity)\n+ c.setInputFile(options.inputFileName, options.format)\n+ c.setOutputFileName(options.outputFileName, options.outputFormat)\n+ c.setColinear(options.colinear)\n+ c.setDistance(options.distance)\n+ c.setNormalize(options.normalize)\n+ c.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/clusterizeBySlidingWindows.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/clusterizeBySlidingWindows.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,344 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import re\n+from commons.core.writer.WriterChooser import WriterChooser\n+"""\n+Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.\n+"""\n+\n+import os, os.path\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc.Progress import Progress\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+\n+class ClusterizeBySlidingWindows(object):\n+\n+ def __init__(self, verbosity = 0):\n+ self.verbosity = verbosity\n+ self.strands = (0, )\n+ self.normalize = False\n+ self.plot = None\n+ self.excel = None\n+ self.outputFileName = \'\'\n+ self.defaultValue = None\n+\n+ def __del__(self):\n+ pass\n+\n+ def setInputFile(self, fileName, format):\n+ self.parser = TranscriptContainer(fileName, format, self.verbosity)\n+\n+ def setOutputFileName(self, fileName, format="gff", title="S-MART", feature="transcript", featurePart="exon"):\n+ writerChooser = WriterChooser(self.verbosity)\n+ writerChooser.findFormat(format)\n+ self.writer = writerChooser.getWriter(fileName)\n+ self.writer.setTitle(title)\n+ self.writer.setFeature(feature)\n+ self.writer.setFeaturePart(featurePart)\n+# self.outputFileName = fileName\n+# self.outputFormat = format\n+\n+ def setWindowSize(self, size):\n+ self.size = size\n+\n+ def setWindowOverlap(self, overlap):\n+ self.overlap = overlap\n+\n+ def setTag(self, tag):\n+ self.tag = tag\n+\n+ def setOperation(self, operation):\n+ self.operation = operation\n+\n+ def setBothStrands(self, bothStrands):\n+ if bothStrands:\n+ self.strands = (-1, 1)\n+\n+ def setNormalize(self, normalize):\n+ self.normalize = normalize\n+\n+ def setPlot(self, plot):\n+ self.plot = plot\n+\n+ def setExcel(self, excel):\n+ self.excel = excel\n+\n+ def setOutputTag(self, tag):\n+ self.outputTagName = tag\n+ \n+ def setDefaultValue(self, defaultValue):\n+ self.defaultValue = defaultValue\n+\n+ def checkOptions(self):\n+# if self.operation != None:\n+# raise Exception("Trying to combine the values without specifying tag! Aborting...")\n+ if self.operation != '..b'lf.excel:\n+ self.writeExcel()\n+ if self.plot:\n+ self.plotData()\n+ self.printRegions()\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Clusterize by Sliding Windows v1.0.1: Produces a GFF3 file that clusters a list of transcripts using a sliding window. [Category: Sliding Windows]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--inputFormat", dest="inputFormat", action="store", type="string", help="format of the input file [compulsory] [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")\n+ parser.add_option("-u", "--outputFormat", dest="outputFormat", action="store", default="gff", type="string", help="format of the output file [format: transcript file format]")\n+ parser.add_option("-s", "--size", dest="size", action="store", type="int", help="size of the regions [compulsory] [format: int]")\n+ parser.add_option("-e", "--overlap", dest="overlap", action="store", type="int", help="overlap between two consecutive regions [compulsory] [format: int]")\n+ parser.add_option("-m", "--normalize", dest="normalize", action="store_true", default=False, help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n+ parser.add_option("-g", "--tag", dest="tag", action="store", default=None, type="string", help="use a given tag as input (instead of summing number of features) [format: string]") \n+ parser.add_option("-r", "--operation", dest="operation", action="store", default=None, type="string", help="combine tag value with given operation [format: choice (sum, avg, med, min, max)]")\n+ parser.add_option("-d", "--defaultValue",dest="defaultValue", action="store", type="float", help="default value for input tag [format: float]")\n+ parser.add_option("-w", "--write", dest="writeTag", action="store", default=None, type="string", help="print the result in the given tag (default usually is \'nbElements\') [format: string]") \n+ parser.add_option("-2", "--strands", dest="strands", action="store_true", default=False, help="consider the two strands separately [format: bool] [default: false]")\n+ parser.add_option("-p", "--plot", dest="plot", action="store", default=None, type="string", help="plot regions to the given file [format: output file in PNG format]")\n+ parser.add_option("-x", "--excel", dest="excel", action="store", default=None, type="string", help="write an Excel file to the given file [format: output file in Excel format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int] [default: 1]")\n+ (options, args) = parser.parse_args()\n+\n+ cbsw = ClusterizeBySlidingWindows(options.verbosity)\n+ cbsw.setInputFile(options.inputFileName, options.inputFormat)\n+ cbsw.setOutputFileName(options.outputFileName, options.outputFormat)\n+ cbsw.setWindowSize(options.size)\n+ cbsw.setWindowOverlap(options.overlap)\n+ cbsw.setTag(options.tag)\n+ cbsw.setDefaultValue(options.defaultValue)\n+ cbsw.setOperation(options.operation)\n+ cbsw.setOutputTag(options.writeTag)\n+ cbsw.setBothStrands(options.strands)\n+ cbsw.setPlot(options.plot)\n+ cbsw.setExcel(options.excel)\n+ cbsw.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/compareOverlapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/compareOverlapping.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,126 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Compare overlap of two transcript lists"""\n+import sys\n+import os\n+from optparse import OptionParser\n+from SMART.Java.Python.misc import Utils\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+\n+class CompareOverlapping(object):\n+\n+ def __init__(self):\n+ self._options = None\n+\n+\n+ def setAttributesFromCmdLine(self):\n+ description = "Compare Overlapping v1.0.3: Get the data which overlap with a reference set. [Category: Data Comparison]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="output", action="store", default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+ parser.add_option("-S", "--start1", dest="start1", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")\n+ parser.add_option("-s", "--start2", dest="start2", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")\n+ parser.add_option("-U", "--end1", dest="end1", action="store", default=None, type="int'..b'pping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]")\n+ parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ parser.add_option("-l", "--log", dest="log", action="store_true", default=False, help="write a log file [format: bool] [default: false]")\n+ (self._options, args) = parser.parse_args()\n+\n+\n+ def run(self): \n+ logHandle = None\n+ if self._options.log:\n+ logHandle = open(self._options.output, "w")\n+\n+ transcriptContainer1 = TranscriptContainer(self._options.inputFileName1, self._options.format1, self._options.verbosity)\n+ transcriptContainer2 = TranscriptContainer(self._options.inputFileName2, self._options.format2, self._options.verbosity)\n+ writer = TranscriptWriter(self._options.output, "gff3", self._options.verbosity)\n+\n+ transcriptListComparator = TranscriptListsComparator(logHandle, self._options.verbosity)\n+ transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, self._options.start1)\n+ transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, self._options.start2)\n+ transcriptListComparator.restrictToEnd(transcriptListComparator.QUERY, self._options.end1)\n+ transcriptListComparator.restrictToEnd(transcriptListComparator.REFERENCE, self._options.end2)\n+ transcriptListComparator.extendFivePrime(transcriptListComparator.QUERY, self._options.fivePrime1)\n+ transcriptListComparator.extendFivePrime(transcriptListComparator.REFERENCE, self._options.fivePrime2)\n+ transcriptListComparator.extendThreePrime(transcriptListComparator.QUERY, self._options.threePrime1)\n+ transcriptListComparator.extendThreePrime(transcriptListComparator.REFERENCE, self._options.threePrime2)\n+ transcriptListComparator.acceptIntrons(transcriptListComparator.QUERY, self._options.introns)\n+ transcriptListComparator.acceptIntrons(transcriptListComparator.REFERENCE, self._options.introns)\n+ transcriptListComparator.getAntisenseOnly(self._options.antisense)\n+ transcriptListComparator.getColinearOnly(self._options.colinear)\n+ transcriptListComparator.getInvert(self._options.exclude)\n+ transcriptListComparator.setMaxDistance(self._options.distance)\n+ transcriptListComparator.setMinOverlap(self._options.minOverlap)\n+ transcriptListComparator.setPcOverlap(self._options.pcOverlap)\n+ transcriptListComparator.setIncludedOnly(self._options.included)\n+ transcriptListComparator.setIncludingOnly(self._options.including)\n+ transcriptListComparator.includeNotOverlapping(self._options.notOverlapping)\n+ transcriptListComparator.computeOdds(True)\n+ transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer1)\n+ transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, transcriptContainer2)\n+ transcriptListComparator.setOutputWriter(writer)\n+ transcriptListComparator.compareTranscriptList()\n+\n+ if self._options.log:\n+ logHandle.close()\n+\n+ if not self._options.exclude:\n+ odds = transcriptListComparator.getOdds()\n+ if self._options.verbosity > 0 and odds:\n+ print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(odds)\n+ \n+if __name__ == "__main__":\n+ icompareOverlapping = CompareOverlapping()\n+ icompareOverlapping.setAttributesFromCmdLine()\n+ icompareOverlapping.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/convertTranscriptFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/convertTranscriptFile.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,115 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Read a transcript file and convert it to another format
+"""
+
+import os, re
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+class ConvertTranscriptFile(object):
+    def __init__(self,inputFileName="", inputFormat ="", outputFileName="", outputFormat="", name="", sequenceFileName=None, strands=False, galaxy=False, feature=None, featurePart=None, verbosity=1):
+        self.inputFileName = inputFileName
+        self.inputFormat = inputFormat
+        self.outputFileName = outputFileName
+        self.outputFormat = outputFormat
+        self.name = name
+        self.sequenceFileName = sequenceFileName
+        self.strands = strands
+        self.galaxy = galaxy
+
+        self.feature=feature
+        self.featurePart=featurePart
+
+        self.verbosity = verbosity
+
+    def setAttributesFromCmdLine(self):
+        description = "Convert Transcript File v1.0.3: Convert a file from a format to another. [Category: Conversion]"
+        parser = OptionParser(description = description)
+        parser.add_option("-i", "--input",        dest="inputFileName",    action="store",                       type="string", help="input file [compulsory] [format: file in format given by -f]")
+        parser.add_option("-f", "--inputFormat",  dest="inputFormat",      action="store",                       type="string", help="format of the input file [compulsory] [format: transcript or mapping file format]")
+        parser.add_option("-o", "--output",       dest="outputFileName",   action="store",                       type="string", help="output file [compulsory] [format: output file in format given by -g]")
+        parser.add_option("-g", "--outputFormat", dest="outputFormat",     action="store",                       type="string", help="format of the output file [compulsory] [format: transcript file format]")
+        parser.add_option("-n", "--name",         dest="name",             action="store",      default="SMART", type="string", help="name for the transcripts [format: string] [default: SMART]")
+        parser.add_option("-s", "--sequences",    dest="sequenceFileName", action="store",      default=None,    type="string", help="give the corresponding Multi-Fasta file (useful for EMBL format) [format: string]")
+        parser.add_option("-t", "--strands",      dest="strands",          action="store_true", default=False,                  help="consider the 2 strands as different (only useful for writing WIG files) [format: bool] [default: False]")
+        parser.add_option("-v", "--verbosity",    dest="verbosity",        action="store",      default=1,       type="int",    help="trace level [format: int] [default: 1]")
+        parser.add_option("-G", "--galaxy",       dest="galaxy",           action="store_true", default=False,                  help="used for galaxy [format: bool] [default: False]")
+        (options, args) = parser.parse_args()
+        self._setAttributesFromOptions(options)
+
+    def _setAttributesFromOptions(self, options):
+        self.inputFileName = options.inputFileName
+        self.inputFormat = options.inputFormat
+        self.outputFileName = options.outputFileName
+        self.outputFormat = options.outputFormat
+        self.name = options.name
+        self.sequenceFileName = options.sequenceFileName
+        self.strands = options.strands
+        self.galaxy =  options.galaxy
+        self.verbosity = options.verbosity
+
+    def run(self):
+        # create parser
+        parser = TranscriptContainer(self.inputFileName, self.inputFormat, self.verbosity)
+        # create writer
+        writer = TranscriptWriter(self.outputFileName, self.outputFormat, self.verbosity)
+        # connect parser and writer
+        writer.setContainer(parser)
+
+        if self.name != None:
+            writer.setTitle(self.name)
+        if self.feature != None:
+            writer.setFeature(self.feature)
+        if self.featurePart != None:
+            writer.setFeaturePart(self.featurePart)
+        if self.sequenceFileName != None:
+            writer.addSequenceFile(self.sequenceFileName)
+
+        nbItems = 0
+        if self.verbosity > 0:
+            nbItems = parser.getNbItems()
+            print "%i items found" % (nbItems)
+
+        if self.strands:
+            writer.setStrands(True)
+        # convert
+        writer.write()
+        writer.close()
+
+if __name__ == "__main__":
+    iConvertTranscriptFile = ConvertTranscriptFile()
+    iConvertTranscriptFile.setAttributesFromCmdLine()
+    iConvertTranscriptFile.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/coordinatesToSequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/coordinatesToSequence.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,64 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Convert a list of coordinates to sequences"""
+
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.FastaWriter import FastaWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Coordinates to Sequences v1.0.2: Extract the sequences from a list of coordinates. [Category: Conversion]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-s", "--sequences", dest="sequences", action="store",  type="string", help="file that contains the sequences [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output", dest="outputFileName", action="store",  default=None, type="string", help="output file (FASTA format) [format: output file in FASTA format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    # create parser
+    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+
+    sequenceParser = FastaParser(options.sequences, options.verbosity)
+
+    writer = FastaWriter(options.outputFileName, options.verbosity)
+    progress = Progress(parser.getNbTranscripts(), "Reading %s" % (options.inputFileName), options.verbosity)
+    for transcript in parser.getIterator():
+        sequence = transcript.extractSequence(sequenceParser)
+        writer.addSequence(sequence)
+        progress.inc()
+    progress.done()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/fastqToFasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/fastqToFasta.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,96 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Convert a FASTQ file to a FASTA file"""
+
+import os
+import sys
+from optparse import OptionParser
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.Progress import Progress
+from math import *
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "FastQ to FastA v1.0.1: Convert a FastQ file into a FastA file. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in FASTQ format]")
+    parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in FASTA format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]")
+    (options, args) = parser.parse_args()
+
+    inputFile = open(options.inputFileName)
+    outputFastaFile = open(options.outputFileName, "w")
+
+    inSequenceName = False
+    inQualityName = False
+    inSequence = False
+    inQuality = True
+    sequenceName = None
+    lineNumber = 1
+
+    for line in inputFile:
+
+        if inSequenceName:
+            inSequence = True
+            inSequenceName = False
+        elif inQualityName:
+            inQuality = True
+            inQualityName = False
+        elif inSequence:
+            inQualityName = True
+            inSequence = False
+        elif inQuality:
+            inSequenceName = True
+            inQuality = False
+        else:
+            sys.exit("Error! Do not in which section I am (line is %d)" % (lineNumber))
+
+        line = line.strip()
+        if inSequenceName:
+            if line[0] != "@":
+                sys.exit("Error! Sequence name '%s' does not start with '@' (line is %d)" % (line, lineNumber))
+            sequenceName = line[1:]
+            outputFastaFile.write(">%s\n" % (sequenceName))
+        elif inQualityName:
+            if line[0] != "+":
+                sys.exit("Error! Quality name '%s' does not start with '+' (line is %d)" % (line, lineNumber))
+            if len(line) > 1 and sequenceName != line[1:]:
+                sys.exit("Names in sequence and qual are different (%s, %s) (line is %d)" % (sequenceName, line[1:], lineNumber))
+        elif inSequence:
+            outputFastaFile.write("%s\n" % (line))
+        elif inQuality:
+            pass
+        lineNumber += 1
+
+    inputFile.close()
+    outputFastaFile.close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/findTss.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/findTss.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,77 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Find TSS from short reads"""
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Find TSS v1.0.1: Find the transcription start site of a list of transcripts. [Category: Merge]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName", action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",        action="store",                     type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",    dest="output",        action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-n", "--normalize", dest="normalize",     action="store_true", default=False,                help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
+    parser.add_option("-d", "--distance",  dest="distance",      action="store",      default=10,    type="int",    help="distance between two reads to mark the same TSS [format: int] [default: 10]")
+    parser.add_option("-e", "--colinear",  dest="colinear",      action="store_true", default=False,                help="group by strand [format: bool] [default: false]")
+    parser.add_option("-c", "--csv",       dest="csv",           action="store",      default=None,  type="string", help="output a CSV file in the given path [format: output file in Excel format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",     action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+    transcriptListComparator = TranscriptListsComparator(None, options.verbosity)
+    transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, 1)
+    transcriptListComparator.setMaxDistance(options.distance)
+    transcriptListComparator.aggregate(True)
+    transcriptListComparator.computeOdds(True)
+    transcriptListComparator.getColinearOnly(options.colinear)
+    transcriptListComparator.setNormalization(options.normalize)
+    transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer)
+    transcriptListComparator.setOutputWriter(Gff3Writer(options.output, options.verbosity))
+    transcriptListComparator.compareTranscriptListSelfMerge()
+
+    if options.csv != None:
+        csvResults = transcriptListComparator.getOddsPerTranscript()
+        csvFile    = open(options.csv, "w")
+        csvFile.write("Number,Transcript\n")
+        for number in sorted(list(set(csvResults.values()))):
+            csvFile.write("%d," % (number))
+            for name in csvResults:
+                if csvResults[name] == number:
+                    csvFile.write("%s " % (name))
+            csvFile.write("\n")
+        csvFile.close()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/fold.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/fold.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,95 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Read a mapping file (many formats supported) and select some of them
+Mappings should be sorted by read names
+"""
+
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.toolLauncher.RnaFoldLauncher import RnaFoldLauncher
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+
+class Fold(object):
+    """
+    Fold a series of transcripts
+    """
+
+    def __init__(self, verbosity = 0):
+        self.verbosity       = verbosity
+        self.rnaFoldLauncher = RnaFoldLauncher(verbosity)
+        self.gff3Writer      = None
+
+
+    def setInputFileName(self, fileName, format):
+        transcriptContainer = TranscriptContainer(fileName, format, options.verbosity)
+        self.rnaFoldLauncher.setTranscriptList(transcriptContainer)
+
+
+    def setOutputFileName(self, fileName):
+        self.gff3Writer = Gff3Writer("%s.gff3" % (fileName), self.verbosity)
+
+
+    def setGenomeFileName(self, fileName):
+        self.rnaFoldLauncher.setGenomeFile(fileName)
+
+
+    def setExtensions(self, fivePrime, threePrime):
+        self.rnaFoldLauncher.setExtensions(fivePrime, threePrime)
+
+
+    def start(self):
+        self.gff3Writer.addTranscriptList(self.rnaFoldLauncher.getResults())
+
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Fold v1.0.1: Fold a list of transcript and give the energy. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",            type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",     dest="format",         action="store",            type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",     dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-g", "--genome",     dest="genomeFileName", action="store",            type="string", help="genome file name [format: file in FASTA format]")
+    parser.add_option("-5", "--fivePrime",  dest="fivePrime",      action="store",            type="int",    help="extend towards the 5' end [format: int]")
+    parser.add_option("-3", "--threePrime", dest="threePrime",     action="store",            type="int",    help="extend towards the 3' end [format: int]")
+    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store", default=1, type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    folder = Fold(options.verbosity)
+    folder.setInputFileName(options.inputFileName, options.format)
+    folder.setOutputFileName(options.outputFileName)
+    folder.setExtensions(options.fivePrime, options.threePrime)
+    folder.setGenomeFileName(options.genomeFileName)
+    folder.start()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getDifference.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getDifference.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,155 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Restrict a transcript list with some parameters (regions)"""\n+\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from commons.core.parsing.FastaParser import FastaParser\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+class DifferenceGetter(object):\n+\n+ def __init__(self, verbosity):\n+ self.verbosity = verbosity\n+ self.annotationParser = None\n+ self.referenceParser = None\n+ self.sequenceParser = None\n+ self.transcriptCount = 1\n+ self.split = False\n+\n+ def createTranscript(self, chromosome, start, end):\n+ transcript = Transcript()\n+ transcript.setChromosome(chromosome)\n+ transcript.setDirection("+")\n+ transcript.setStart(start)\n+ transcript.setEnd(end)\n+ transcript.setName("region_%d" % self.transcriptCount)\n+ transcript.setTagValue("ID", "region_%d" % self.transcriptCount)\n+ self.transcriptCount += 1\n+ return transcript\n+\n+ def setSplit(self, split):\n+ self.split = split\n+\n+ def setAnnotationFile(self, fileName, format):\n+ if fileName != None:\n+ self.annotationParser = TranscriptContainer(fileName, format, self.verbosity)\n+\n+ def setReferenceFile(self, fileName, format):\n+ if fileName != None:\n+ self.referenceParser = TranscriptContainer(fileName, format, self.verbosity)\n+\n+ def setSequenceFile(self, fileName):\n+ if fileName != None:\n+ self.sequenceParser = FastaParser(fileName, self.verbosity)\n+\n+ def setOutputFile(self, fileName):\n+ self.writer = Gff3Writer(fileName, self.verbosity)\n+\n+ def initialize(self):\n+ self.presence = {}\n+ for chromosome in self.sequenceParser.getRegions():\n+ self.presence[chromosome] = [[1, self.sequenceParser.getSizeOfRegion(chromosome)]]\n+\n+ def readTranscripts(self):\n+ nbTranscripts = self.annotationParser.getNbTranscripts()\n+ progress = Progress(nbTranscripts, "Parsing annotation file" , self.verbosity)\n+ for transcript in self.annotationParser.getIterator():\n+ chromosome = transcript.getChromosome()\n+ '..b'me]):\n+ start, end = element\n+ if start <= transcript.getEnd() and transcript.getStart() <= end:\n+ toBeDeleted.append(i)\n+ if start < transcript.getStart():\n+ toBeAppended.append([start, transcript.getStart() - 1])\n+ if end > transcript.getEnd():\n+ toBeAppended.append([transcript.getEnd() + 1, end])\n+ for i in reversed(toBeDeleted):\n+ del self.presence[chromosome][i]\n+ self.presence[chromosome].extend(toBeAppended)\n+ progress.inc()\n+ progress.done()\n+\n+ def writeOutput(self):\n+ for chromosome in self.presence:\n+ for element in self.presence[chromosome]:\n+ start, end = element\n+ self.writer.addTranscript(self.createTranscript(chromosome, start, end))\n+ self.writer.write()\n+\n+ def compareToSequence(self):\n+ self.initialize()\n+ self.readTranscripts()\n+ self.writeOutput()\n+\n+ def compareToAnnotation(self):\n+ transcriptListComparator = TranscriptListsComparator(None, self.verbosity)\n+ transcriptListComparator.setSplitDifference(self.split)\n+ transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, self.annotationParser)\n+ transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, self.referenceParser)\n+ transcriptListComparator.setOutputWriter(self.writer)\n+ transcriptListComparator.getDifferenceTranscriptList()\n+\n+ def run(self):\n+ if self.referenceParser != None:\n+ self.compareToAnnotation()\n+ else:\n+ self.compareToSequence()\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Get Difference v1.0.1: Get all the regions of the genome, except the one given or get all the elements from the first set which does not ovelap with the second set (at the nucleotide level). [Category: Data Comparison]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", default=None, type="string", help="reference file [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", default=None, type="string", help="format of the reference file [format: transcript file format]")\n+ parser.add_option("-s", "--sequence", dest="sequenceFileName", action="store", default=None, type="string", help="sequence file [format: file in FASTA format]")\n+ parser.add_option("-p", "--split", dest="split", action="store_true", default=False, help="when comparing to a set of genomic coordinates, do not join [format: boolean] [default: False")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ getter = DifferenceGetter(options.verbosity)\n+ getter.setSplit(options.split)\n+ getter.setAnnotationFile(options.inputFileName1, options.format1)\n+ getter.setSequenceFile(options.sequenceFileName)\n+ getter.setReferenceFile(options.inputFileName2, options.format2)\n+ getter.setOutputFile(options.outputFileName)\n+ getter.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getDistance.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getDistance.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,241 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Get the distance between the transcripts of two lists"""\n+\n+import os\n+import sys\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+\n+class GetDistance(object):\n+\n+ def __init__(self, verbosity = 0):\n+ self.verbosity = verbosity\n+ self.writer = None\n+ self.spearman = False\n+ self.tlc = TranscriptListsComparator(None, self.verbosity)\n+ self.strands = (0, )\n+ self.buckets = None\n+ self.title = ""\n+ self.xMin = None\n+ self.xMax = None\n+ self.proportion = False\n+ self.outputFileName = None\n+ self.keep = False\n+\n+ def __del__(self):\n+ pass\n+\n+ def setQueryFile(self, fileName, format):\n+ self.transcriptContainer1 = TranscriptContainer(fileName, format, self.verbosity)\n+ \n+ def setReferenceFile(self, fileName, format):\n+ self.transcriptContainer2 = TranscriptContainer(fileName, format, self.verbosity)\n+\n+ def setOutputFile(self, fileName):\n+ self.outputFileName = fileName\n+ \n+ def setOutputTranscriptFile(self, fileName):\n+ if fileName != None:\n+ self.writer = Gff3Writer(fileName, self.verbosity)\n+ \n+ def restrictQueryToStart(self, number):\n+ self.tlc.restrictToStart(self.tlc.QUERY, number)\n+\n+ def restrictReferenceToStart(self, number):\n+ self.tlc.restrictToStart(self.tlc.REFERENCE, number)\n+\n+ def restrictQueryToEnd(self, number):\n+ self.tlc.restrictToEnd(self.tlc.QUERY, number)\n+\n+ def restrictReferenceToEnd(self, number):\n+ self.tlc.restrictToEnd(self.tlc.REFERENCE, number)\n+\n+ def setAbsolute(self, boolean):\n+ self.tlc.setAbsolute(boolean)\n+\n+ def setProportion(self, boolean):\n+ self.proportion = boolean\n+\n+ def setColinear(self, boolean):\n+ self.tlc.getColinearOnly(boolean)\n+\n+ def setAntisense(self, boolean):\n+ self.tlc.getAntisenseOnly(boolean)\n+\n+ def setDistances(self, minDistance, maxDistance):\n+ self.tlc.setMinDistance(minDistance)\n+ self.tlc.setMaxDistance(maxDistance)\n+\n+ def setStrands(s'..b'"--start2", dest="start2", action="store", default=None, type="int", help="only consider the n first 5\' nucleotides for list 2 [format: int]")\n+ parser.add_option("-e", "--end1", dest="end1", action="store", default=None, type="int", help="only consider the n last 3\' nucleotides for list 1 [format: int]")\n+ parser.add_option("-E", "--end2", dest="end2", action="store", default=None, type="int", help="only consider the n last 3\' nucleotides for list 2 [format: int]")\n+ parser.add_option("-m", "--minDistance", dest="minDistance", action="store", default=None, type="int", help="minimum distance considered between two transcripts [format: int] [default: None]")\n+ parser.add_option("-M", "--maxDistance", dest="maxDistance", action="store", default=1000, type="int", help="maximum distance considered between two transcripts [format: int] [default: 1000]")\n+ parser.add_option("-5", "--fivePrime", dest="fivePrime", action="store_true", default=False, help="consider the elements from list 1 which are upstream of elements of list 2 [format: bool] [default: False]")\n+ parser.add_option("-3", "--threePrime", dest="threePrime", action="store_true", default=False, help="consider the elements from list 1 which are downstream of elements of list 2 [format: bool] [default: False]")\n+ parser.add_option("-u", "--buckets", dest="buckets", action="store", default=None, type="int", help="plot histogram instead of line plot with given interval size [format: int] [default: None]")\n+ parser.add_option("-2", "--2strands", dest="twoStrands", action="store_true", default=False, help="plot the distributions of each strand separately [format: bool] [default: False]")\n+ parser.add_option("-r", "--spearman", dest="spearman", action="store_true", default=False, help="compute Spearman rho [format: bool] [default: False]")\n+ parser.add_option("-x", "--xMin", dest="xMin", action="store", default=None, type="int", help="minimum value on the x-axis to plot [format: int] [default: None]")\n+ parser.add_option("-X", "--xMax", dest="xMax", action="store", default=None, type="int", help="maximum value on the x-axis to plot [format: int] [default: None]")\n+ parser.add_option("-t", "--title", dest="title", action="store", default=None, type="string", help="title for the graph [format: int] [default: None]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ parser.add_option("-k", "--keep", dest="keep", action="store_true", default=False, help="keep temporary files [format: bool]")\n+ (options, args) = parser.parse_args()\n+\n+ gd = GetDistance(options.verbosity)\n+ gd.setQueryFile(options.inputFileName1, options.format1)\n+ gd.setReferenceFile(options.inputFileName2, options.format2)\n+ gd.setOutputFile(options.outputFileName)\n+ gd.setOutputTranscriptFile(options.outputDistances)\n+ gd.setColinear(options.colinear)\n+ gd.setAntisense(options.antisense)\n+ gd.setAbsolute(options.absolute)\n+ gd.setProportion(options.proportion)\n+ gd.restrictQueryToStart(options.start1)\n+ gd.restrictReferenceToStart(options.start2)\n+ gd.restrictQueryToEnd(options.end1)\n+ gd.restrictReferenceToEnd(options.end2)\n+ gd.setDistances(options.minDistance, options.maxDistance)\n+ gd.setUpstream(options.fivePrime)\n+ gd.setDownstream(options.threePrime)\n+ gd.setStrands(options.twoStrands)\n+ gd.setBuckets(options.buckets)\n+ gd.setTitle(options.title)\n+ gd.setXValues(options.xMin, options.xMax)\n+ gd.keepTmpValues(options.keep)\n+ gd.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,291 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Get the repartition of some elements in a chromosomes"""\n+\n+import os\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc.Progress import Progress\n+from math import *\n+\n+def divideKeyDict(dictionary, ratio):\n+ return dict([(key / ratio, dictionary[key]) for key in dictionary])\n+\n+\n+def setTranscript(chromosome, direction, start, end, name, value):\n+ transcript = Transcript()\n+ transcript.setChromosome(chromosome)\n+ transcript.setDirection(direction)\n+ transcript.setStart(start)\n+ transcript.setEnd(end)\n+ transcript.setName(name)\n+ transcript.setTagValue("nbElements", value)\n+ return transcript\n+\n+\n+\n+if __name__ == "__main__":\n+ \n+ magnifyingFactor = 1000\n+ \n+ # parse command line\n+ description = "Get Distribution v1.0.1: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the input file [compulsory] [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+ parser.add_option("-r", "--reference", dest="referenceFileName", action="store", default=None, type="string", help="file containing the genome [compulsory] [format: file in FASTA format]")\n+ parser.add_option("-n", "--nbBins", dest="nbBins", action="store", default=1000, type="int", help="number of bins [default: 1000] [format: int]")\n+ parser.add_option("-2", "--bothStrands", dest="bothStrands", action="store_true", default=False, help="plot one curve per strand [format: bool] [default: false]")\n+ parser.add_option("-w", "--raw", dest="raw", '..b' plotter.addLine(divideKeyDict(densityPlus[chromosome], ratio))\n+ if options.raw:\n+ plotter.addLine(divideKeyDict(binsMinus[chromosome], ratio))\n+ else:\n+ plotter.addLine(divideKeyDict(densityMinus[chromosome], ratio))\n+ else:\n+ if options.raw:\n+ plotter.addLine(divideKeyDict(bins[chromosome], ratio))\n+ else:\n+ plotter.addLine(divideKeyDict(density[chromosome], ratio))\n+ plotter.plot()\n+ \n+ if options.csv:\n+ outputFileName = "%s" % (options.outputFileName)\n+ if options.chromosome != None:\n+ outputFileName += "_%s" % (options.chromosome)\n+ if options.start != None and options.end != None:\n+ outputFileName += ":%d-%d" % (options.start, options.end)\n+ outputFileName += ".csv"\n+ csvHandle = open(outputFileName, "w")\n+ for slice in range(start / sliceSize, maxSlice + 1):\n+ csvHandle.write(";%d-%d" % (slice * sliceSize + 1, (slice+1) * sliceSize))\n+ csvHandle.write("\\n")\n+ if options.bothStrands:\n+ for chromosome in densityPlus:\n+ if len(densityPlus[chromosome]) > 0:\n+ csvHandle.write("%s [+]" % (chromosome))\n+ for slice in sorted(densityPlus[chromosome].keys()):\n+ csvHandle.write(";%.2f" % (densityPlus[chromosome][slice]))\n+ csvHandle.write("\\n") \n+ if len(densityMinus[chromosome]) > 0:\n+ csvHandle.write("%s [-]" % (chromosome))\n+ for slice in sorted(densityPlus[chromosome].keys()):\n+ csvHandle.write(";%.2f" % (-densityMinus[chromosome][slice]))\n+ csvHandle.write("\\n") \n+ else:\n+ for chromosome in density:\n+ if len(density[chromosome]) > 0:\n+ csvHandle.write(chromosome)\n+ for slice in sorted(density[chromosome].keys()):\n+ csvHandle.write(";%.2f" % (density[chromosome][slice]))\n+ csvHandle.write("\\n")\n+ csvHandle.close()\n+ \n+ if options.gff:\n+ chromosome = "" if options.chromosome == None else options.chromosome.capitalize()\n+ start = "" if options.start == None else "%d" % (options.start)\n+ end = "" if options.end == None else "%d" % (options.end)\n+ link1 = "" if options.start == None and options.end == None else ":"\n+ link2 = "" if options.start == None and options.end == None else "-"\n+ writer = Gff3Writer("%s%s%s%s%s.gff3" % (options.outputFileName, link1, start, link2, end), options.verbosity)\n+ cpt = 1\n+ if options.raw:\n+ valuesPlus = binsPlus\n+ valuesMinus = binsMinus\n+ values = bins\n+ else:\n+ valuesPlus = densityPlus\n+ valuesMinus = densityMinus\n+ values = density\n+ if options.bothStrands:\n+ for chromosome in values:\n+ for slice in valuesPlus[chromosome]:\n+ writer.addTranscript(setTranscript(chromosome, 1, slice, slice + sliceSize, "region%d" % (cpt), valuesPlus[chromosome][slice]))\n+ cpt += 1\n+ for slice in valuesMinus[chromosome]:\n+ writer.addTranscript(setTranscript(chromosome, -1, slice, slice + sliceSize, "region%d" % (cpt), - valuesMinus[chromosome][slice]))\n+ cpt += 1\n+ else:\n+ for chromosome in values:\n+ for slice in values[chromosome]:\n+ writer.addTranscript(setTranscript(chromosome, 1, slice, slice + sliceSize, "region%d" % (cpt), values[chromosome][slice]))\n+ cpt += 1\n+ writer.write()\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getElement.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getElement.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,106 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Get the first element (exon / intron) from a list of transcripts"""
+
+import os
+from optparse import OptionParser
+from commons.core.writer.Gff3Writer import *
+from SMART.Java.Python.structure.TranscriptContainer import *
+from SMART.Java.Python.misc.Progress import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get Element v1.0.1: Get the first element (exon / intron) from a list of transcripts. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",         dest="inputFileName",    action="store",                                                type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",        dest="format",                 action="store",                                                type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                                                type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-y", "--mysql",         dest="mysql",                    action="store_true", default=False,                                     help="mySQL output [format: bool] [default: false]")
+    parser.add_option("-t", "--type",            dest="type",                     action="store",                                                type="string", help="type of the element    [format: choice (exon, intron)]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",            action="store",            default=1,                type="int",        help="trace level [format: int]")
+    parser.add_option("-l", "--log",             dest="log",                        action="store_true", default=False,                                     help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    parser        = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+    writer        = Gff3Writer(options.outputFileName, options.verbosity)
+    sqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
+
+    nbLines = parser.getNbTranscripts()
+    print "%i lines found" % (nbLines)
+
+    # treat transcripts
+    nbWritten = 0
+    nbUsed        = 0
+    progress    = Progress(nbLines, "Analyzing transcripts of " + options.inputFileName, options.verbosity)
+    for transcript in parser.getIterator():
+
+        outTranscript = Transcript()
+        outTranscript.setName(transcript.getName())
+        outTranscript.setDirection(transcript.getDirection())
+        outTranscript.setChromosome(transcript.getChromosome())
+
+        if options.type == "exon":
+            if len(transcript.getExons()) > 1:
+                transcript.sortExons()
+                outTranscript.setStart(transcript.getExons()[0].getStart())
+                outTranscript.setEnd(transcript.getExons()[0].getEnd())
+                writer.addTranscript(outTranscript)
+                if options.mysql:
+                    sqlWriter.addTranscript(transcript)
+                nbWritten += 1
+                nbUsed        += 1
+        elif options.type == "intron":
+            used = False
+            for intron in transcript.getIntrons():
+                used = True
+                thisTranscript = Transcript()
+                thisTranscript.copy(outTranscript)
+                thisTranscript.setStart(intron.getStart())
+                thisTranscript.setEnd(intron.getEnd())
+                writer.addTranscript(thisTranscript)
+                if options.mysql:
+                    sqlWriter.addTranscript(transcript)
+                nbWritten += 1
+            if used:
+                nbUsed += 1
+        else:
+            sys.exit("Cannot understan type %s" % (options.type))
+        progress.inc()
+    progress.done()
+
+    if options.mysql:
+        sqlWriter.write()
+
+    print "nb sequences used: %d" % (nbUsed)
+    print "nb elements used: %d" % (nbWritten)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getExons.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getExons.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,128 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+
+zeroBaseToOneBaseConvertor = (lambda x: x - 1 if x > 0 else x)
+
+class GetExons(object):
+
+    def __init__(self, verbosity):
+        self.verbosity = verbosity
+        self.selection = False
+
+    def setInputFile(self, fileName, format):
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        self.parser = chooser.getParser(fileName)
+
+    def setSelection(self, selection):
+        if selection == None:
+            return
+        self.selection = True
+        self.selectionItems = []
+        self.selectionIntervals = []
+        for part in selection.split(","):
+            try:
+                splittedPart = map(int, part.split(".."))
+            except Exception:
+                raise Exception("Elements '" + splittedPart + "' of selection '" + selection + "' do no seem to be integers!")
+            if len(splittedPart) == 1:
+                self.selectionItems.append(splittedPart[0])
+            elif len(splittedPart) == 2:
+                self.selectionIntervals.append((splittedPart[0], splittedPart[1]))
+            else:
+                raise Exception("Cannot parse elements '" + splittedPart + "' of selection '" + selection + "'!")
+
+    def getSelectionExonIndices(self, nbExons):
+        if not self.selection:
+            return range(nbExons)
+        indices = []
+        for item in self.selectionItems:
+            indices.append(range(nbExons)[zeroBaseToOneBaseConvertor(item)])
+        for start, end in self.selectionIntervals:
+            start, end = map(zeroBaseToOneBaseConvertor, (start, end))
+            if end > 0:
+                end += 1
+            indices.extend(range(nbExons)[start:end])
+        return indices
+
+    def setOutputFile(self, fileName):
+        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+    def run(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
+        nbExons = 0
+        for cpt1, transcript in enumerate(self.parser.getIterator()):
+            selectedExons = self.getSelectionExonIndices(transcript.getNbExons())
+            transcript.sortExons()
+            for cpt2, exon in enumerate(transcript.getExons()):
+                if cpt2 not in selectedExons:
+                    continue
+                exonTranscript = Transcript()
+                exonTranscript.copy(exon)
+                if "Parent" in exonTranscript.tags:
+                    del exonTranscript.tags["Parent"]
+                exonTranscript.tags["feature"] = "transcript"
+                if "ID" not in exonTranscript.tags or exonTranscript.tags["ID"] == "unnamed transcript":
+                    exonTranscript.tags["ID"] = "exon_%d-%d" % (cpt1+1, cpt2+1)
+                if exonTranscript.getName() == "unnamed transcript":
+                    exonTranscript.setName("exon_%d-%d" % (cpt1+1, cpt2+1))
+                self.writer.addTranscript(exonTranscript)
+                nbExons += 1
+            progress.inc()
+        self.writer.write()
+        self.writer.close()
+        progress.done()
+        if self.verbosity > 1:
+            print "%d transcripts read" % (self.parser.getNbTranscripts())
+            print "%d exons written" % (nbExons)
+
+if __name__ == "__main__":
+
+    description = "Get Exons v1.0.1: Get the exons of a set of transcripts. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-s", "--select",    dest="select",         action="store", default=None, type="string", help="select some of the exons (like '1,2,5..-3,-1') [format: string]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    ge = GetExons(options.verbosity)
+    ge.setInputFile(options.inputFileName, options.format)
+    ge.setSelection(options.select)
+    ge.setOutputFile(options.outputFileName)
+    ge.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getInfoPerCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getInfoPerCoverage.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,167 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Compare overlap of a transcript list and list of read, and get some info depending on the coverage"""\n+\n+import os\n+from optparse import OptionParser\n+from commons.core.parsing.SequenceListParser import *\n+from commons.core.writer.Gff3Writer import *\n+from SMART.Java.Python.mySql.MySqlConnection import *\n+from SMART.Java.Python.structure.TranscriptListsComparator import *\n+from SMART.Java.Python.misc.RPlotter import *\n+from SMART.Java.Python.misc.Progress import *\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Get Info per Coverage v1.0.1: Get a list of information clustered by the density of the coverage on a genome. [Category: Personnal]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="output", action="store", default=None, type="string", help="output file [compulsory] [format: output file in TXT format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ parser.add_option("-l", "--log", dest="log", action="store", default=None, type="string", help="write a log file [format: bool] [default: false]")\n+ (options, args) = parser.parse_args()\n+\n+ logHandle = None\n+ if options.log != None:\n+ logHandle = open(options.log, "w")\n+ '..b'script.name]\n+ else:\n+ averageSizesWithoutIntrons[transcript.getSize()] += coverages[transcript.name]\n+ if transcript.getSize() not in sumSizesWithoutIntrons:\n+ sumSizesWithoutIntrons[transcript.getSize()] = 1\n+ else:\n+ sumSizesWithoutIntrons[transcript.getSize()] += 1\n+ if transcript.getNbExons() not in averageNbExons:\n+ averageNbExons[transcript.getNbExons()] = coverages[transcript.name]\n+ else:\n+ averageNbExons[transcript.getNbExons()] += coverages[transcript.name]\n+ if transcript.getNbExons() not in sumSizesNbExons:\n+ sumSizesNbExons[transcript.getNbExons()] = 1\n+ else:\n+ sumSizesNbExons[transcript.getNbExons()] += 1\n+ sizesWithIntrons[transcript.name] = (transcript.getSizeWithIntrons(), coverages[transcript.name])\n+ sizesWithoutIntrons[transcript.name] = (transcript.getSize(), coverages[transcript.name])\n+ nbExons[transcript.name] = (transcript.getNbExons(), coverages[transcript.name])\n+ progress.inc()\n+ progress.done()\n+ \n+ plotterSizeWithIntrons = RPlotter("%sWithIntrons.png" % (options.output), options.verbosity)\n+ plotterSizeWithIntrons.setPoints(True)\n+ plotterSizeWithIntrons.setMaximumX(10000)\n+ plotterSizeWithIntrons.setMaximumY(1000) \n+ plotterSizeWithIntrons.setLog("y")\n+ plotterSizeWithIntrons.addLine(sizesWithIntrons)\n+ plotterSizeWithIntrons.plot()\n+ \n+ plotterSizeWithoutIntrons = RPlotter("%sWithoutIntrons.png" % (options.output), options.verbosity)\n+ plotterSizeWithoutIntrons.setPoints(True)\n+ plotterSizeWithoutIntrons.setMaximumX(10000) \n+ plotterSizeWithoutIntrons.setMaximumY(1000)\n+ plotterSizeWithoutIntrons.setLog("y")\n+ plotterSizeWithoutIntrons.addLine(sizesWithoutIntrons)\n+ plotterSizeWithoutIntrons.plot()\n+ \n+ plotterNbExons = RPlotter("%sNbExons.png" % (options.output), options.verbosity)\n+ plotterNbExons.setPoints(True)\n+ plotterNbExons.addLine(nbExons)\n+ plotterNbExons.plot()\n+ \n+ for element in averageSizesWithIntrons:\n+ averageSizesWithIntrons[element] = int(float(averageSizesWithIntrons[element]) / sumSizesWithIntrons[element])\n+ plotterAverageSizeWithIntrons = RPlotter("%sAverageWithIntrons.png" % (options.output), options.verbosity)\n+ plotterAverageSizeWithIntrons.setMaximumX(10000)\n+ plotterAverageSizeWithIntrons.setMaximumY(1000) \n+ plotterAverageSizeWithIntrons.setLog("y")\n+ plotterAverageSizeWithIntrons.addLine(averageSizesWithIntrons)\n+ plotterAverageSizeWithIntrons.plot()\n+ print "min/avg/med/max sizes with introns: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(averageSizesWithIntrons)\n+\n+ for element in averageSizesWithoutIntrons:\n+ averageSizesWithoutIntrons[element] = int(float(averageSizesWithoutIntrons[element]) / sumSizesWithoutIntrons[element])\n+ plotterAverageSizeWithoutIntrons = RPlotter("%sAverageWithoutIntrons.png" % (options.output), options.verbosity)\n+ plotterAverageSizeWithoutIntrons.setMaximumX(10000)\n+ plotterAverageSizeWithoutIntrons.setMaximumY(1000) \n+ plotterAverageSizeWithoutIntrons.setLog("y")\n+ plotterAverageSizeWithoutIntrons.addLine(averageSizesWithoutIntrons)\n+ plotterAverageSizeWithoutIntrons.plot()\n+ print "min/avg/med/max sizes without introns: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(averageSizesWithoutIntrons)\n+\n+ for element in averageNbExons:\n+ averageNbExons[element] = int(float(averageNbExons[element]) / sumSizesNbExons[element])\n+ plotterAverageNbExons = RPlotter("%sAverageNbExons.png" % (options.output), options.verbosity)\n+ plotterAverageNbExons.addLine(averageNbExons)\n+ plotterAverageNbExons.plot()\n+ print "min/avg/med/max # exons: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(averageNbExons)\n+\n+ if options.log:\n+ logHandle.close()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getIntrons.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getIntrons.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,89 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+
+class GetIntrons(object):
+
+    def __init__(self, verbosity):
+        self.verbosity = verbosity
+
+    def setInputFile(self, fileName, format):
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        self.parser = chooser.getParser(fileName)
+
+    def setOutputFile(self, fileName):
+        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+    def run(self):
+        progress  = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
+        nbIntrons = 0
+        for cpt1, transcript in enumerate(self.parser.getIterator()):
+            for cpt2, intron in enumerate(transcript.getIntrons()):
+                intronTranscript = Transcript()
+                intronTranscript.copy(intron)
+                if "Parent" in intronTranscript.tags:
+                    del intronTranscript.tags["Parent"]
+                intronTranscript.tags["feature"] = "transcript"
+                if "ID" not in intronTranscript.tags or intronTranscript.tags["ID"] == "unnamed transcript":
+                    intronTranscript.tags["ID"] = "intron_%d-%d" % (cpt1+1, cpt2+1)
+                if intronTranscript.getName() == "unnamed transcript":
+                    intronTranscript.setName("intron_%d-%d" % (cpt1+1, cpt2+1))
+                self.writer.addTranscript(intronTranscript)
+                nbIntrons += 1
+            progress.inc()
+        self.writer.write()
+        self.writer.close()
+        progress.done()
+        if self.verbosity > 1:
+            print "%d transcripts read" % (self.parser.getNbTranscripts())
+            print "%d introns written" % (nbIntrons)
+
+
+if __name__ == "__main__":
+
+    description = "Get Introns v1.0.1: Get the introns of a set of transcripts. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",             type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",             type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",             type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",  default=1, type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    gi = GetIntrons(options.verbosity)
+    gi.setInputFile(options.inputFileName, options.format)
+    gi.setOutputFile(options.outputFileName)
+    gi.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getLetterDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getLetterDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,153 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Get the size distribution of a Fasta / BED file"""
+
+import os
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import *
+from SMART.Java.Python.misc.Progress import *
+from SMART.Java.Python.misc.RPlotter import *
+from commons.core.parsing.ParserChooser import ParserChooser
+
+
+def writeCVSfile(outHandler):
+    for pos in range(len(letters)):
+        posTrue = pos +1
+        outHandler.write( "%s;" % (posTrue))
+        for letter in lettersRate:
+            if positionRate[letter].has_key(pos):
+                outHandler.write("%s=%.2f%s;" %(letter, positionRate[letter][pos], "%"))
+            else:
+                outHandler.write("%s=0%s;" % (letter, "%"))
+        outHandler.write("\n")
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get Letter Distribution v1.0.1: Compute the distribution of nucleotides of a set of genomic coordinates. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file to be analyzed [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of file [format: sequence file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in PNG format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+    parser.add_option("-c", "--csv",       dest="csv",            action="store_true", default=False,                help="write a .csv file [format: bool] [default: false]")
+    parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    chooser = ParserChooser()
+    chooser.findFormat(options.format)
+    parser      = chooser.getParser(options.inputFileName)
+    nbSequences = parser.getNbSequences()
+    print "%i sequences read" % (nbSequences)
+
+    # treat items
+    progress       = Progress(nbSequences, "Analyzing sequences of " + options.inputFileName, options.verbosity)
+    nbLettersTotal = 0
+    nbLetters      = {}
+    lettersRate    = {}
+    nbPositions    = {}
+    positionCount  = {}
+    positionRate   = {}
+    nbPositionRate = {}
+    for sequence in parser.getIterator():
+        letters            = sequence.getSequence()
+        thisNbLettersTotal = sequence.getSize()
+        nbLettersTotal    += thisNbLettersTotal
+        thisNbLetters      = {}
+
+        for pos in range(len(letters)):
+            letter = letters[pos]
+            if letter not in thisNbLetters:
+                thisNbLetters[letter] = 1
+            else:
+                thisNbLetters[letter] += 1
+            if pos+1 not in nbPositions:
+                nbPositions[pos+1] = 1
+            else:
+                nbPositions[pos+1] += 1
+            if letter not in positionCount:
+                positionCount[letter] = {}
+            if pos+1 not in positionCount[letter]:
+                positionCount[letter][pos+1] = 1
+            else:
+                positionCount[letter][pos+1] += 1
+
+        for letter in thisNbLetters:
+            if letter not in nbLetters:
+                nbLetters[letter] = thisNbLetters[letter]
+            else:
+                nbLetters[letter] += thisNbLetters[letter]
+            if letter not in lettersRate:
+                lettersRate[letter] = {}
+            rate = int(float(thisNbLetters[letter]) / thisNbLettersTotal * 100)
+            if rate not in lettersRate[letter]:
+                lettersRate[letter][rate] = 1
+            else:
+                lettersRate[letter][rate] += 1
+        progress.inc()
+    progress.done()
+
+    for letter in positionCount:
+        positionRate[letter] = {}
+        for pos in positionCount[letter]:
+            positionRate[letter][pos] = positionCount[letter][pos] / float(nbPositions[pos]) * 100
+    for pos in nbPositions:
+        nbPositionRate[pos] = nbPositions[pos] / float(nbPositions[1]) * 100
+
+    # plot content distributions
+    plotter = RPlotter("%s.png" % (options.outputFileName), options.verbosity, True)
+    plotter.setFill(0)
+    plotter.setLegend(True)
+    for letter in lettersRate:
+        plotter.addLine(lettersRate[letter], letter)
+    plotter.plot()
+
+    # plot distribution per position
+    plotter = RPlotter("%sPerNt.png" % (options.outputFileName), options.verbosity, True)
+    plotter.setFill(0)
+    plotter.setLegend(True)
+    plotter.setXLabel("Position on the read")
+    plotter.setYLabel("Percentage")
+    for letter in positionRate:
+        plotter.addLine(positionRate[letter], letter)
+    plotter.addLine(nbPositionRate, "#")
+    plotter.plot()
+
+    if options.csv:
+        outHandler = open("%s.csv" % (options.outputFileName), "w")
+        writeCVSfile(outHandler)
+        outHandler.close()
+
+    print "%d sequences" % (nbSequences)
+    print "%d letters" % (nbLettersTotal)
+    for letter in nbLetters:
+        print "%s: %d (%.2f%%)" % (letter, nbLetters[letter], float(nbLetters[letter]) / nbLettersTotal * 100)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getNb.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getNb.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,99 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Get the repartition of some elements (# exons per transcripts, # of repetitions of a mapping or # of transcripts in a cluster)"""
+
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+from math import *
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get Nb v1.0.1: Get the distribution of exons per transcripts, or mapping per read, or transcript per cluster. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in png format]")
+    parser.add_option("-q", "--query",     dest="query",          action="store",                     type="string", help="query  [compulsory] (# exons, # transcripts) [format: choice (exon, transcript, cluster)]")
+    parser.add_option("-b", "--barplot",   dest="barplot",        action="store_true", default=False,                help="use barplot representation [format: bool] [default: false]")
+    parser.add_option("-x", "--xMax",      dest="xMax",           action="store",      default=None,  type="int",    help="maximum value on the x-axis to plot [format: int]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+    parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.query != "exon" and options.query != "transcript" and options.query != "cluster":
+        raise Exception("Do not understand query %s" % (options.query))
+
+    exonDistribution       = {}
+    transcriptDistribution = {}
+    clusterDistribution    = {}
+
+    transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+
+    progress = Progress(transcriptContainer.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
+    # count the number of reads
+    for element in transcriptContainer.getIterator():
+        if options.query == "exon":
+            nbExons = element.getNbExons()
+            exonDistribution[nbExons] = exonDistribution.get(nbExons, 0) + 1
+        elif options.query == "transcript":
+            name = element.getName()
+            transcriptDistribution[name] = transcriptDistribution.get(name, 0) + 1
+        elif options.query == "cluster":
+            nbElements = 1 if "nbElements" not in element.getTagNames() else element.getTagValue("nbElements")
+            clusterDistribution[nbElements] = clusterDistribution.get(nbElements, 0) + 1
+        progress.inc()
+    progress.done()
+
+    if options.query == "exon":
+        distribution = exonDistribution
+    elif options.query == "transcript":
+        distribution = {}
+        for name in transcriptDistribution:
+            distribution[transcriptDistribution[name]] = distribution.get(transcriptDistribution[name], 0) + 1
+    elif options.query == "cluster":
+        distribution = clusterDistribution
+
+    outputFileName = options.outputFileName
+    plotter = RPlotter(outputFileName, options.verbosity)
+    plotter.setImageSize(1000, 300)
+    plotter.setFill(0)
+    plotter.setMaximumX(options.xMax)
+    plotter.setBarplot(options.barplot)
+    plotter.addLine(distribution)
+    plotter.plot()
+
+    print "min/avg/med/max: %d/%.2f/%.1f/%d" % (Utils.getMinAvgMedMax(distribution))
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getRandomRegions.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getRandomRegions.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,267 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Find random regions in a genome"""\n+\n+import random, math\n+from optparse import OptionParser\n+from commons.core.parsing.FastaParser import *\n+from commons.core.writer.Gff3Writer import *\n+from commons.core.writer.MySqlTranscriptWriter import *\n+from SMART.Java.Python.misc.Progress import *\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+\n+repetitions = 100\n+\n+\n+class RandomRegionsGenerator(object):\n+\n+ def __init__(self, verbosity):\n+ self.verbosity = verbosity\n+ self.strands = False\n+ self.distribution = "uniform"\n+ self.transcripts = None\n+ self.sequenceParser = None\n+ random.seed()\n+\n+\n+ def setInput(self, fileName):\n+ self.sequenceParser = FastaParser(fileName, self.verbosity)\n+\n+\n+ def setGenomeSize(self, size):\n+ self.genomeSize = size\n+\n+\n+ def setChromosomeName(self, name):\n+ self.chromosomeName = name\n+\n+\n+ def setAnnotation(self, fileName, format):\n+ parser = TranscriptContainer(fileName, format, self.verbosity)\n+ self.transcripts = []\n+ for transcript in parser.getIterator():\n+ self.transcripts.append(transcript)\n+ self.setNumber(len(self.transcripts))\n+ self.setSize(0)\n+\n+\n+ def setOutputFile(self, fileName):\n+ self.outputFileName = fileName\n+\n+\n+ def setSize(self, size):\n+ self.minSize = size\n+ self.maxSize = size\n+\n+\n+ def setMinSize(self, size):\n+ self.minSize = size\n+\n+\n+ def setMaxSize(self, size):\n+ self.maxSize = size\n+\n+\n+ def setNumber(self, number):\n+ self.number = number\n+\n+\n+ def setStrands(self, strands):\n+ self.strands = strands\n+\n+\n+ def setMaxDistribution(self, maxElements):\n+ if maxElements == None:\n+ return\n+ self.maxElements = maxElements\n+ self.distribution = "gaussian"\n+\n+\n+ def setDeviationDistribution(self, deviation):\n+ if deviation == None:\n+ return\n+ self.deviation = deviation\n+ self.distribution = "gaussian"\n+\n+\n+ def getSizes(self):\n+ if self.sequenceParser == None:\n+ self.chromosomes = [self.chromosomeName]\n+ self.sizes = {self.chromosomeName: self.genomeSize}\n+ self.cumulatedSize = self.genomeSize\n+ self.cumulatedSizes = {'..b'n(self):\n+ self.getSizes()\n+ self.writeRegions()\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Get Random Regions v1.0.2: Get some random coordinates on a genome. May use uniform or gaussian distribution (in gaussion distribution, # of element per cluster follows a power law). [Category: Other]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-r", "--reference", dest="reference", action="store", default=None, type="string", help="file that contains the sequences [format: file in FASTA format]")\n+ parser.add_option("-S", "--referenceSize", dest="referenceSize", action="store", default=None, type="int", help="size of the chromosome (when no reference is given) [format: int]")\n+ parser.add_option("-c", "--chromosome", dest="chromosome", action="store", default=None, type="string", help="name of the chromosome (when no reference is given) [format: string]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in FASTA format]")\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", default=None, type="string", help="optional file containing regions to shuffle [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", default=None, type="string", help="format of the previous file [format: transcript file format]")\n+ parser.add_option("-s", "--size", dest="size", action="store", default=None, type="int", help="size of the regions (if no region set is provided) [format: int]")\n+ parser.add_option("-z", "--minSize", dest="minSize", action="store", default=None, type="int", help="minimum size of the regions (if no region set nor a fixed size are provided) [format: int]")\n+ parser.add_option("-Z", "--maxSize", dest="maxSize", action="store", default=None, type="int", help="maximum size of the regions (if no region set nor a fixed size are provided) [format: int]")\n+ parser.add_option("-n", "--number", dest="number", action="store", default=None, type="int", help="number of regions (if no region set is provided) [format: int]")\n+ parser.add_option("-t", "--strands", dest="strands", action="store_true", default=False, help="use both strands (if no region set is provided) [format: boolean]")\n+ parser.add_option("-m", "--max", dest="max", action="store", default=None, type="int", help="max. # reads in a cluster (for Gaussian dist.) [format: int]")\n+ parser.add_option("-d", "--deviation", dest="deviation", action="store", default=None, type="int", help="deviation around the center of the cluster (for Gaussian dist.) [format: int]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ rrg = RandomRegionsGenerator(options.verbosity)\n+ if options.reference == None:\n+ rrg.setGenomeSize(options.referenceSize)\n+ rrg.setChromosomeName(options.chromosome)\n+ else:\n+ rrg.setInput(options.reference)\n+ rrg.setOutputFile(options.outputFileName)\n+ if options.inputFileName == None:\n+ if options.size != None:\n+ rrg.setSize(options.size)\n+ else:\n+ rrg.setMinSize(options.minSize)\n+ rrg.setMaxSize(options.maxSize)\n+ rrg.setNumber(options.number)\n+ rrg.setStrands(options.strands)\n+ else:\n+ rrg.setAnnotation(options.inputFileName, options.format)\n+ rrg.setMaxDistribution(options.max)\n+ rrg.setDeviationDistribution(options.deviation)\n+ rrg.run()\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getReadDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getReadDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,129 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Plot the data from the data files
+"""
+import os
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.parsing.FastqParser import FastqParser
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file sequence [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of the file [compulsory] [format: sequence file format]")
+    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="int",    help="keep the best n    [format: int]")
+    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="float",  help="keep the best n\% [format: float]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output files in PNG format and txt format]")
+    parser.add_option("-x", "--xMax",      dest="xMax",           action="store", default=None, type="int",    help="maximum value on the x-axis to plot [format: int]")
+    parser.add_option("-D", "--directory", dest="working_Dir",    action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    if options.working_Dir[-1] != '/':
+        options.outputFileName = options.working_Dir + '/' + options.outputFileName
+
+    if options.format == "fasta":
+        parser = FastaParser(options.inputFileName, options.verbosity)
+    elif options.format == "fastq":
+        parser = FastqParser(options.inputFileName, options.verbosity)
+    else:
+        raise Exception("Do not understand '%s' file format." % (options.format))
+
+    progress  = Progress(parser.getNbSequences(), "Reading %s" % (options.inputFileName), options.verbosity)
+    sequences = {}
+    for sequence in parser.getIterator():
+        sequence = sequence.sequence
+        if sequence not in sequences:
+            sequences[sequence] = 1
+        else:
+            sequences[sequence] += 1
+        progress.inc()
+    progress.done()
+
+    values = sequences.values()
+    values.sort()
+    if options.percent != None:
+        threshold = values[int(float(options.percent) / 100 * len(values))]
+    elif options.number != None:
+        threshold = values[-options.number]
+    else:
+        threshold = 0
+
+    # sort by value
+    progress     = Progress(parser.getNbSequences(), "Sorting values", options.verbosity)
+    sortedValues = dict([(value, []) for value in sequences.values()])
+    for sequence, value in sequences.iteritems():
+        sortedValues[value].append(sequence)
+        progress.inc()
+    progress.done()
+
+    outputFileName = "%s.txt" % (options.outputFileName)
+    handle         = open(outputFileName, "w")
+    progress       = Progress(parser.getNbSequences(), "Writing into %s" % (outputFileName), options.verbosity)
+    for value in reversed(sorted(sortedValues.keys())):
+        if value >= threshold:
+            for sequence in sortedValues[value]:
+                handle.write("%s\t%d\n" % (sequence, value))
+        progress.inc()
+    progress.done()
+    handle.close()
+
+    line     = {}
+    progress = Progress(len(values), "Preparing plot", options.verbosity)
+    for value in values:
+        if value not in line:
+            line[value] = 1
+        else:
+            line[value] += 1
+        progress.inc()
+    progress.done()
+
+    plot = RPlotter("%s.png" % (options.outputFileName), options.verbosity)
+    plot.setFill(0)
+    plot.setMaximumX(options.xMax)
+    plot.setXLabel("# occurrences")
+    plot.setYLabel("# reads")
+    plot.addLine(line)
+    plot.plot()
+
+    if options.verbosity > 0:
+        print "%d/%.2f/%.1f/%d occurrences" % (Utils.getMinAvgMedMax(line))
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getSequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getSequence.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,60 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Get a given sequence in a multi-Fasta file"""
+import sys
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.writer.FastaWriter import FastaWriter
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get Sequence v1.0.1: Get a single sequence in a FASTA file. [Category: Data Selection]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName",action="store",type="string", help="multi-FASTA file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-n", "--name",dest="name",action="store",type="string", help="name of the sequence [compulsory] [format: string]")
+    parser.add_option("-o", "--output",dest="outputFileName",action="store",type="string", help="output sequence file (FASTA) [compulsory] [format: file in FASTA format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",action="store",default=1,type="int",help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    # read Fasta file
+    sequenceListParser = FastaParser(options.inputFileName, options.verbosity)
+    for sequence in sequenceListParser.getIterator():
+        name = sequence.name.split(" ")[0]
+        if name == options.name:
+            writer = FastaWriter(options.outputFileName, options.verbosity)
+            writer.addSequence(sequence)
+            print sequence.printFasta(),
+            sys.exit(0)
+    writer.close()
+    print "No sequence found"

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getSizes.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getSizes.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,238 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os, sys\n+from optparse import OptionParser\n+from commons.core.parsing.FastaParser import FastaParser\n+from commons.core.parsing.FastqParser import FastqParser\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from commons.core.parsing.GffParser import GffParser\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc import Utils\n+\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.utils.RepetOptionParser import RepetOptionParser\n+\n+LOG_DEPTH = "smart"\n+\n+class GetSizes(object):\n+ \n+ def __init__(self, inFileName = None, inFormat=None, outFileName = None, query=None,xMax=None, xMin=None, csv=False, verbosity = 0):\n+ self.inFileName = inFileName\n+ self.inFormat= inFormat\n+ self.outFileName = outFileName\n+ self.query = query\n+ self.xMax = xMax\n+ self.xMin = xMin\n+ self.xLab = "Size"\n+ self.yLab = "# reads"\n+ self.barplot = False\n+ self.csv = csv\n+ self._verbosity = verbosity\n+ self.parser = None\n+ self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n+ \n+ def setAttributesFromCmdLine(self):\n+ description = "Usage: getSizes.py [options]\\n\\nGet Sizes v1.0.2: Get the sizes of a set of genomic coordinates. [Category: Visualization]\\n"\n+ epilog = ""\n+ parser = RepetOptionParser(description = description, epilog = epilog)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", default=None, type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", default=None, type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")\n+ parser.add_option("-q", "--query", dest="query", action="store", default=None, type="string", help="type to mesure [default: size] [format: choice (size, intron size, exon size, 1st exon size)]") \n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [format: output file in PNG format]")\n+ parser.add_option("-x", "--xMax", dest="x'..b'nFileName, self._verbosity)\n+ elif self.inFormat == "fastq":\n+ self.parser = FastqParser(self.inFileName, self._verbosity)\n+ else:\n+ self.parser = TranscriptContainer(self.inFileName, self.inFormat, self._verbosity)\n+ \n+ def _logAndRaise(self, errorMsg):\n+ self._log.error(errorMsg)\n+ raise Exception(errorMsg)\n+ \n+ def run(self):\n+ LoggerFactory.setLevel(self._log, self._verbosity)\n+ self._checkOptions()\n+ self._log.info("START getsizes")\n+ self._log.debug("Input file name: %s" % self.inFileName)\n+\n+ nbItems = self.parser.getNbItems()\n+ self._log.info( "%i items found" % (nbItems))\n+ \n+ # treat items\n+ progress = Progress(nbItems, "Analyzing sequences of %s" % (self.inFileName), self._verbosity)\n+ sizes = {}\n+ names = {}\n+ minimum = 1000000000000\n+ maximum = 0\n+ sum = 0\n+ number = 0\n+ nbSubItems = 0\n+ for item in self.parser.getIterator():\n+ items = []\n+ if self.query == "exon":\n+ items = item.getExons()\n+ elif self.query == "exon1":\n+ if len(item.getExons()) > 1:\n+ item.sortExons()\n+ items = [item.getExons()[0]]\n+ elif self.query == "intron":\n+ items = item.getIntrons()\n+ else:\n+ items = [item, ]\n+ \n+ for thisItem in items:\n+ try:\n+ nbElements = int(float(thisItem.getTagValue("nbElements")))\n+ if nbElements == None:\n+ nbElements = 1\n+ except:\n+ nbElements = 1\n+ size = thisItem.getSize()\n+ minimum = min(minimum, size)\n+ maximum = max(maximum, size)\n+ name = thisItem.name.split()[0]\n+ \n+ if size not in sizes:\n+ sizes[size] = nbElements\n+ if self.csv:\n+ names[size] = [name, ]\n+ else:\n+ sizes[size] += nbElements\n+ if self.csv:\n+ names[size].append(name)\n+ sum += size\n+ nbSubItems += nbElements\n+ number += 1\n+ progress.inc()\n+ progress.done()\n+\n+ if self.outFileName != None:\n+ plotter = RPlotter(self.outFileName, self._verbosity)\n+ plotter.setFill(0)\n+ plotter.setMinimumX(self.xMin)\n+ plotter.setMaximumX(self.xMax)\n+ plotter.setXLabel(self.xLab)\n+ plotter.setYLabel(self.yLab)\n+ plotter.setBarplot(self.barplot)\n+ plotter.addLine(sizes)\n+ plotter.plot()\n+ \n+ if nbSubItems == 0:\n+ self._logAndRaise("No item found")\n+ \n+ if self.csv:\n+ csvHandle = open(self.csv, "w")\n+ for size in range(min(sizes.keys()), max(sizes.keys())+1):\n+ if size not in sizes:\n+ csvHandle.write("%d,0,\\n" % (size))\n+ else:\n+ csvHandle.write("%d,%d,%s\\n" % (size, sizes[size], ";".join(names[size])))\n+ csvHandle.close()\n+ \n+ self.items = number \n+ self.subItems = nbSubItems\n+ self.nucleotides = sum\n+ self.minAvgMedMax = Utils.getMinAvgMedMax(sizes)\n+ \n+ print "%d items" % (number)\n+ print "%d sub-items" % (nbSubItems)\n+ print "%d nucleotides" % (sum)\n+ print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(sizes)\n+\n+ self._log.info("END getsizes")\n+\n+\n+if __name__ == "__main__":\n+ iGetSizes = GetSizes()\n+ iGetSizes.setAttributesFromCmdLine()\n+ iGetSizes.run()\n+ \n+#TODO: add two more options!!!!!!\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getWigData.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getWigData.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,67 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.parsing.WigParser import WigParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.misc.Progress import Progress
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get WIG Data v1.0.1: Compute the average data for some genomic coordinates using WIG files (thus covering a large proportion of the genome) and update a tag. [Category: WIG Tools]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-w", "--wig",         dest="wig",            action="store",                     type="string", help="wig file name [compulsory] [format: file in WIG format]")
+    parser.add_option("-t", "--tag",         dest="tag",            action="store",                     type="string", help="choose a tag name to write the wig information to output file [compulsory] [format: file in WIG format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-s", "--strands",     dest="strands",        action="store_true", default=False,                help="consider both strands separately [format: boolean] [default: False]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    # create parsers and writers
+    transcriptParser = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
+    wigParser        = WigParser(options.wig)
+    writer           = Gff3Writer(options.outputFileName, options.verbosity)
+    wigParser.setStrands(options.strands)
+
+    progress = Progress(transcriptParser.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
+    for transcript in transcriptParser.getIterator():
+        values = transcript.extractWigData(wigParser)
+        if options.strands:
+            values = values[transcript.getDirection()]
+        transcript.setTagValue(options.tag, str(float(sum(values)) / len(values)))
+        writer.addTranscript(transcript)
+        progress.inc()
+    progress.done()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getWigDistance.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getWigDistance.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,105 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.
+"""
+
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.parsing.WigParser import WigParser
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Get WIG Data v1.0.2: Compute the average data around some genomic coordinates using WIG files (thus covering a large proportion of the genome). [Category: WIG Tools]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-w", "--wig",         dest="wig",            action="store",                     type="string", help="wig file name [compulsory] [format: file in WIG format]")
+    parser.add_option("-d", "--distance",    dest="distance",       action="store",      default=1000,  type="int",    help="distance around position [compulsory] [format: int] [default: 1000]")
+    parser.add_option("-s", "--strands",     dest="strands",        action="store_true", default=False,                help="consider both strands separately [format: boolean] [default: False]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in PNG format]")
+    parser.add_option("-a", "--default",     dest="defaultValue",   action="store",      default=0.0,   type="float",  help="default value (when value is NA) [default: 0.0] [format: float]")
+    parser.add_option("-l", "--log",         dest="log",            action="store_true", default=False,                help="use log scale for y-axis [format: boolean] [default: False]")
+    parser.add_option("-k", "--keep",        dest="keep",           action="store_true", default=False,                help="keep temporary files [format: boolean] [default: False]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    # create parsers and writers
+    transcriptParser = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
+    wigParser        = WigParser(options.wig)
+    wigParser.setStrands(options.strands)
+    wigParser.setDefaultValue(options.defaultValue)
+
+    # allocate data
+    strands = (1, -1) if options.strands else (1, )
+    values    = {}
+    for strand in strands:
+        values[strand] = dict([(i, 0.0) for i in range(-options.distance, options.distance+1)])
+
+    # read transcripts
+    progress = Progress(transcriptParser.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
+    for transcript in transcriptParser.getIterator():
+        transcript.removeExons()
+        transcript.restrictStart(2)
+        transcript.extendStart(options.distance)
+        transcript.extendEnd(options.distance-1)
+        theseValues = transcript.extractWigData(wigParser)
+        if len(strands) == 1:
+            theseValues = {1: theseValues}
+        for strand in strands:
+            if len(theseValues[strand]) < 2 * options.distance + 1:
+                theseValues[strand] = [options.defaultValue] * (2 * options.distance + 1 - len(theseValues[strand])) + theseValues[strand]
+            if len(theseValues[strand]) != 2 * options.distance + 1:
+ raise Exception("Got something wrong with the size of the WIG data concerning %s: %d found instead of %d" % (transcript, len(theseValues[strand]), 2 * options.distance + 1))
+            for i in range(-options.distance, options.distance+1):
+                values[strand][i] += theseValues[strand][i + options.distance]
+        progress.inc()
+    progress.done()
+
+    for strand in strands:
+        for i in range(-options.distance, options.distance+1):
+            values[strand][i] /= transcriptParser.getNbTranscripts() * strand
+
+    # draw plot
+    plotter = RPlotter(options.outputFileName, options.verbosity, options.keep)
+    plotter.setXLabel("Distance")
+    plotter.setYLabel("WigValue")
+    for strand in strands:
+        plotter.addLine(values[strand])
+    if options.log:
+        plotter.setLog("y")
+    plotter.plot()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/getWigProfile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getWigProfile.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,160 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""\n+Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.\n+"""\n+\n+import math\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from commons.core.parsing.WigParser import WigParser\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+\n+class GetWigProfile(object):\n+\n+\tdef __init__(self, verbosity):\n+\t\tself.verbosity\t= verbosity\n+\t\tself.values\t\t = {}\n+\t\tself.defaultValue = 0.0\n+\n+\tdef _iToJ(self, i, size):\n+\t\treturn min(self.nbPoints+1, int(math.floor(float(i - self.distance) / (size) * (self.nbPoints))))\n+\n+\tdef readTranscripts(self):\n+\t\tself.strandNames = (1, -1) if self.strands else (1, )\n+\t\tself.values\t\t= dict([(strand, dict([(i, 0.0) for i in range(self.nbPoints + 2 * self.distance)])) for strand in self.strandNames])\n+\t\ttranscriptParser = TranscriptContainer(self.inputFileName, self.inputFormat, self.verbosity)\n+\t\twigParser\t\t= WigParser(self.wig)\n+\t\tnbValues\t\t = dict([(strand, dict([(i, 0.0) for i in range(self.nbPoints + 2 * self.distance)])) for strand in self.strandNames])\n+\t\twigParser.setStrands(self.strands)\n+\t\twigParser.setDefaultValue(self.defaultValue)\n+\n+\t\tprogress = Progress(transcriptParser.getNbTranscripts(), "Parsing %s" % (self.inputFileName), self.verbosity)\n+\t\tfor transcript in transcriptParser.getIterator():\n+\t\t\ttranscriptSize = transcript.getSize()\n+\t\t\texpectedSize = transcriptSize + 2 * self.distance\n+\t\t\ttranscript.extendStart(self.distance)\n+\t\t\ttranscript.extendEnd(self.distance)\n+\t\t\ttheseValues = transcript.extractWigData(wigParser)\n+\n+\t\t\tif len(self.strandNames) == 1:\n+\t\t\t\ttheseValues = {1: theseValues}\n+\t\t\tfor strand in self.strandNames:\n+\t\t\t\tif len(theseValues[strand]) < expectedSize:\n+\t\t\t\t\ttheseValues[strand] = [self.defaultValue] * (expectedSize - len(theseValues[strand])) + theseValues[strand]\n+\t\t\t\tif len(theseValues[strand]) != expectedSize:\n+\t\t\t\t\traise Exception("Got something wrong with the size of the WIG data concerning %s [%s]: %d found instead of %d" % (transcript, ",".join(["%d-%d" % (exon.getStart(), exon.getEnd()) for exon in transcript.getExons()]), len(theseValues[strand]), expectedSize))\n+\t\t\t\tfivePValues = theseValues[strand][: self.distance]\n+\t\t\t\tnbValues = [0.0] * (self.nbPoints)\n+\t\t\t\ttranscriptValues = [0.0] * (self.nbPoints)\n+\t\t\t\tfor i in range(self.distance, len(theseValues[stra'..b'+\t\t\t\tstrand = 1\n+\t\t\tfor i in range(self.nbPoints + 2 * self.distance):\n+\t\t\t\tself.values[strand][i] /= transcriptParser.getNbTranscripts() * strand\n+\n+\n+\tdef smoothen(self):\n+\t\tif self.smoothenForce == None:\n+\t\t\treturn\n+\t\tfor strand in self.strandNames:\n+\t\t\taverageValues = {}\n+\t\t\tfor center in range(self.distance, self.distance + self.nbPoints):\n+\t\t\t\tsum\t\t= 0.0\n+\t\t\t\tnbValues = 0.0\n+\t\t\t\tfor i in range(center - self.smoothenForce + 1, center + self.smoothenForce):\n+\t\t\t\t\tif i > self.distance and i < self.distance + self.nbPoints:\n+\t\t\t\t\t\tnbValues += 1\n+\t\t\t\t\t\tsum\t\t+= self.values[strand][i]\n+\t\t\t\taverageValues[center] = sum / nbValues\n+\t\t\tfor position in range(self.distance, self.distance + self.nbPoints):\n+\t\t\t\tself.values[strand][position] = averageValues[position]\n+\t\t\n+\n+\tdef plot(self):\n+\t\tplotter = RPlotter(self.outputFileName, self.verbosity)\n+\t\tfor strand in self.strandNames:\n+\t\t\tplotter.addLine(self.values[strand])\n+\t\tif self.log:\n+\t\t\tplotter.setLog("y")\n+\t\tplotter.setAxisLabel("x", {0: -self.distance, self.distance: "start", self.distance+self.nbPoints-1: "end", 2*self.distance+self.nbPoints-1: self.distance})\n+\t\tplotter.plot()\n+\n+\n+\n+if __name__ == "__main__":\n+\t\n+\t# parse command line\n+\tdescription = "Get WIG Profile v1.0.1: Compute the average profile of some genomic coordinates using WIG files (thus covering a large proportion of the genome). [Category: WIG Tools]"\n+\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--input",\t\t\t dest="inputFileName",\taction="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+\tparser.add_option("-f", "--inputFormat", dest="inputFormat",\t\taction="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="format of the input file [compulsory] [format: transcript file format]")\n+\tparser.add_option("-w", "--wig",\t\t\t\t dest="wig",\t\t\t\t\t\taction="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="wig file name [compulsory] [format: file in WIG format]")\t\n+\tparser.add_option("-p", "--nbPoints",\t\t dest="nbPoints",\t\t\t\taction="store",\t\t\t default=1000,\ttype="int",\t\t help="number of points on the x-axis [compulsory] [format: int] [default: 1000]")\t\n+\tparser.add_option("-d", "--distance",\t\t dest="distance",\t\t\t\taction="store",\t\t\t default=0,\t\t\ttype="int",\t\t help="distance around genomic coordinates [compulsory] [format: int] [default: 0]")\t\n+\tparser.add_option("-s", "--strands",\t\t dest="strands",\t\t\t\taction="store_true", default=False,\t\t\t\t\t\t\t\t help="consider both strands separately [format: boolean] [default: False]")\t\n+\tparser.add_option("-m", "--smoothen",\t\t dest="smoothen",\t\t\t\taction="store",\t\t\t default=None,\ttype="int",\t\t help="smoothen the curve [format: int] [default: None]")\t\n+\tparser.add_option("-a", "--default",\t\t dest="defaultValue",\t action="store",\t\t\t default=0.0,\t type="float",\thelp="default value (when value is NA) [default: 0.0] [format: float]")\n+\tparser.add_option("-o", "--output",\t\t\t dest="outputFileName", action="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="output file [compulsory] [format: output file in PNG format]")\n+\tparser.add_option("-l", "--log",\t\t\t\t dest="log",\t\t\t\t\t\taction="store_true", default=False,\t\t\t\t\t\t\t\t help="use log scale for y-axis\t[format: boolean] [default: False]")\n+\tparser.add_option("-v", "--verbosity",\t dest="verbosity",\t\t\taction="store",\t\t\t default=1,\t\t\ttype="int",\t\t help="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n+\n+\twigProfile\t\t\t\t\t\t\t\t= GetWigProfile(options.verbosity)\n+\twigProfile.strands\t\t\t \t= options.strands\n+\twigProfile.inputFileName\t= options.inputFileName\n+\twigProfile.inputFormat\t\t= options.inputFormat\n+\twigProfile.wig\t\t\t\t\t\t= options.wig\n+\twigProfile.nbPoints\t\t\t\t= options.nbPoints\n+\twigProfile.distance\t\t\t\t= options.distance\n+\twigProfile.smoothenForce\t= options.smoothen\n+\twigProfile.defaultValue\t = options.defaultValue\n+\twigProfile.outputFileName = options.outputFileName\n+\twigProfile.log\t\t\t\t\t\t= options.log\n+\n+\twigProfile.readTranscripts()\n+\twigProfile.smoothen()\n+\twigProfile.plot()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mapperAnalyzer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mapperAnalyzer.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,486 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""\n+Read a mapping file (many formats supported) and select some of them\n+Mappings should be sorted by read names\n+"""\n+import os, random, shelve\n+from optparse import OptionParser, OptionGroup\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.parsing.FastaParser import FastaParser\n+from commons.core.parsing.FastqParser import FastqParser\n+from commons.core.parsing.GffParser import GffParser\n+from commons.core.writer.BedWriter import BedWriter\n+from commons.core.writer.UcscWriter import UcscWriter\n+from commons.core.writer.GbWriter import GbWriter\n+from commons.core.writer.Gff2Writer import Gff2Writer\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from commons.core.writer.FastaWriter import FastaWriter\n+from commons.core.writer.FastqWriter import FastqWriter\n+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n+from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n+from SMART.Java.Python.mySql.MySqlTable import MySqlTable\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+\n+\n+distanceExons = 20\n+exonSize = 20\n+\n+\n+class MapperAnalyzer(object):\n+ """\n+ Analyse the output of a parser\n+ """\n+\n+ def __init__(self, verbosity = 0):\n+ self.verbosity = verbosity\n+ self.mySqlConnection = MySqlConnection(verbosity)\n+ self.tooShort = 0\n+ self.tooManyMismatches = 0\n+ self.tooManyGaps = 0\n+ self.tooShortExons = 0\n+ self.tooManyMappings = 0\n+ self.nbMappings = 0\n+ self.nbSequences = 0\n+ self.nbAlreadyMapped = 0\n+ self.nbAlreadyMappedSequences = 0\n+ self.nbWrittenMappings = 0\n+ self.nbWrittenSequences = 0\n+ self.parser = None\n+ self.logHandle = None\n+ self.randomNumber = random.randint(0, 100000)\n+ self.gff3Writer = None\n+ self.alreadyMappedReader = None\n+ self.unmatchedWriter = None\n+ self.sequenceListParser = None\n+ self.sequences = None\n+ self.alreadyMapped = None\n+ self.mappedNamesTable = None\n+ s'..b'up.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]")\n+ otheGroup.add_option("-l", "--log", dest="log", action="store_true", default=False, help="write a log file [format: bool] [default: false]")\n+ parser.add_option_group(compGroup)\n+ parser.add_option_group(filtGroup)\n+ parser.add_option_group(tranGroup)\n+ parser.add_option_group(outpGroup)\n+ parser.add_option_group(otheGroup)\n+ (options, args) = parser.parse_args()\n+\n+ \n+ analyzer = MapperAnalyzer(options.verbosity)\n+ analyzer.setMappingFile(options.inputFileName, options.format)\n+ analyzer.setSequenceFile(options.sequencesFileName, options.sequenceFormat)\n+ analyzer.setOutputFile(options.outputFileName, options.title)\n+ if options.appendFileName != None:\n+ analyzer.setAlreadyMatched(options.appendFileName)\n+ if options.remaining:\n+ analyzer.setRemainingFile(options.outputFileName, options.sequenceFormat)\n+ if options.number != None:\n+ analyzer.setMaxMappings(options.number)\n+ if options.size != None:\n+ analyzer.setMinSize(options.size)\n+ if options.identity != None:\n+ analyzer.setMinId(options.identity)\n+ if options.mismatch != None:\n+ analyzer.setMaxMismatches(options.mismatch)\n+ if options.gap != None:\n+ analyzer.setMaxGaps(options.gap)\n+ if options.mergeExons:\n+ analyzer.mergeExons(True)\n+ if options.removeExons:\n+ analyzer.acceptShortExons(False)\n+ if options.log:\n+ analyzer.setLog("%s.log" % (options.outputFileName))\n+ analyzer.analyze()\n+ \n+ if options.verbosity > 0:\n+ print "kept %i sequences over %s (%f%%)" % (analyzer.nbWrittenSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)\n+ if options.appendFileName != None:\n+ print "kept %i sequences over %s (%f%%) including already mapped sequences" % (analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)\n+ print "kept %i mappings over %i (%f%%)" % (analyzer.nbWrittenMappings, analyzer.nbMappings, float(analyzer.nbWrittenMappings) / analyzer.nbMappings * 100)\n+ if options.appendFileName != None:\n+ print "kept %i mappings over %i (%f%%) including already mapped" % (analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped, analyzer.nbMappings, float(analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped) / analyzer.nbMappings * 100)\n+ print "removed %i too short mappings (%f%%)" % (analyzer.tooShort, float(analyzer.tooShort) / analyzer.nbMappings * 100)\n+ print "removed %i mappings with too many mismatches (%f%%)" % (analyzer.tooManyMismatches, float(analyzer.tooManyMismatches) / analyzer.nbMappings * 100)\n+ print "removed %i mappings with too many gaps (%f%%)" % (analyzer.tooManyGaps, float(analyzer.tooManyGaps) / analyzer.nbMappings * 100)\n+ print "removed %i mappings with too short exons (%f%%)" % (analyzer.tooShortExons, float(analyzer.tooShortExons) / analyzer.nbMappings * 100)\n+ print "removed %i sequences with too many hits (%f%%)" % (analyzer.tooManyMappings, float(analyzer.tooManyMappings) / analyzer.nbSequences * 100)\n+ print "%i sequences have no mapping (%f%%)" % (analyzer.nbSequences - analyzer.nbWrittenSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)\n+ if options.appendFileName != None:\n+ print "%i sequences have no mapping (%f%%) excluding already mapped sequences" % (analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mappingToCoordinates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mappingToCoordinates.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,91 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+
+"""Convert files with some mapping format to coordinates format"""
+
+import os
+from optparse import OptionParser
+from commons.core.parsing.PslParser import PslParser
+from commons.core.parsing.AxtParser import AxtParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+
+
+class MappingToCoordinates(object):
+    def __init__(self,verbosity=1, inputFileName=None, format = None, output=None,galaxy = False, title="S-MART"):
+        self.verbosity = verbosity
+        self.inputFileName = inputFileName
+        self.format = format
+        self.output = output
+        self.galaxy = galaxy
+        self.title = title
+
+    def setAttributesFromCmdLine(self):
+        description = "Mapping To Coordinates v1.0.1: Convert a set of mappings (given by a mapping tool) to a set of transcripts. [Category: Conversion]"
+        parser = OptionParser(description = description)
+        parser.add_option("-i", "--input",     dest="inputFileName", action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")
+        parser.add_option("-f", "--format",    dest="format",        action="store",                     type="string", help="format of file [compulsory] [format: mapping file format]")
+        parser.add_option("-o", "--output",    dest="output",        action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+        parser.add_option("-v", "--verbosity", dest="verbosity",     action="store",      default=1,     type="int",    help="trace level [format: int]")
+        parser.add_option("-G", "--galaxy",    dest="galaxy",        action="store_true", default=False,                help="used for galaxy [format: bool] [default: False]")
+        (options, args) = parser.parse_args()
+
+        self.verbosity = options.verbosity
+        self.inputFileName = options.inputFileName
+        self.format = options.format
+        self.output = options.output
+        self.galaxy = options.galaxy
+
+    def run(self):
+        if self.verbosity > 0:
+            print "Reading input file..."
+        parser = TranscriptContainer(self.inputFileName, self.format, self.verbosity)
+        if self.verbosity > 0:
+            print "... done"
+        writer = Gff3Writer(self.output, self.verbosity, self.title)
+
+        progress = Progress(parser.getNbTranscripts(), "Reading %s" % (self.inputFileName), self.verbosity)
+        for transcript in parser.getIterator():
+            writer.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+
+        if self.galaxy:
+            os.rename("%s.gff3" % (self.output), self.output)
+
+if __name__ == '__main__':
+    launcher = MappingToCoordinates()
+    launcher.setAttributesFromCmdLine()
+    launcher.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mergeSlidingWindowsClusters.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mergeSlidingWindowsClusters.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,144 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Merge sliding windows of two different clusterings
+"""
+
+import sys
+import re
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.structure.Transcript import Transcript
+
+class MergeSlidingWindowsClusters(object):
+    """
+    Merge the ouptput of several sets of sliding windows
+    """
+
+    def __init__(self, verbosity = 0):
+        self.verbosity     = verbosity
+        self.inputs        = []
+        self.outputData    = {}
+        self.nbData        = 0
+        self.nbWrittenData = 0
+        self.chromosomes   = []
+        self.writer        = None
+
+    def __del__(self):
+        if self.writer != None:
+            self.writer.close()
+
+    def addInput(self, fileName, fileFormat):
+        self.inputs.append(TranscriptContainer(fileName, fileFormat, self.verbosity))
+        self.chromosomes = list(set(self.chromosomes).union(set(self.inputs[-1].getChromosomes())))
+
+    def setOutput(self, fileName):
+        self.writer = Gff3Writer(fileName, self.verbosity)
+
+    def readInput(self, i, chromosome):
+        progress = Progress(self.inputs[i].getNbTranscripts(), "Reading file #%d -- chromosome %s" % (i+1, chromosome), self.verbosity)
+        for transcript in self.inputs[i].getIterator():
+            progress.inc()
+            if chromosome != transcript.getChromosome(): continue
+            start     = transcript.getStart()
+            end       = transcript.getEnd()
+            direction = transcript.getDirection()
+            tags      = transcript.tags
+            if chromosome not in self.outputData:
+                self.outputData[chromosome] = {}
+            if direction not in self.outputData[chromosome]:
+                self.outputData[chromosome][direction] = {}
+            if start not in self.outputData[chromosome][direction]:
+                self.outputData[chromosome][direction][start] = {}
+            if end in self.outputData[chromosome][direction][start]:
+                ends = self.outputData[chromosome][direction][start].keys()
+                if ends[0] != end:
+                    sys.exit("Error! Two regions starting at %d end are not consistent (%d and %d) in %s on strand %d" % (start, end, ends[0], chromosome, direction))
+                self.outputData[chromosome][direction][start][end].update(tags)
+            else:
+                self.outputData[chromosome][direction][start][end] = tags
+                self.nbData += 1
+        progress.done()
+
+
+    def writeOutput(self, chromosome):
+        progress = Progress(self.nbData - self.nbWrittenData, "Writing output for chromosome %s" % (chromosome), self.verbosity)
+        for direction in self.outputData[chromosome]:
+            for start in self.outputData[chromosome][direction]:
+                for end in self.outputData[chromosome][direction][start]:
+                    transcript = Transcript()
+                    transcript.setChromosome(chromosome)
+                    transcript.setStart(start)
+                    transcript.setEnd(end)
+                    transcript.setDirection(direction)
+                    transcript.tags = self.outputData[chromosome][direction][start][end]
+                    transcript.setName("region_%d" % (self.nbWrittenData + 1))
+                    tags = transcript.getTagNames()
+                    for tag in tags:
+                        if tag.startswith("Name_") or tag.startswith("ID_"):
+                            del transcript.tags[tag]
+                    self.nbWrittenData += 1
+                    self.writer.addTranscript(transcript)
+                    progress.inc()
+        self.writer.write()
+        progress.done()
+        self.outputData = {}
+
+    def merge(self):
+        for chromosome in self.chromosomes:
+            for i, input in enumerate(self.inputs):
+                self.readInput(i, chromosome)
+            self.writeOutput(chromosome)
+        self.writer.close()
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Merge Sliding Windows Clusters v1.0.2: Merge two files containing the results of a sliding windows clustering. [Category: Sliding Windows]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input1",       dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat1", dest="inputFormat1",   action="store",                     type="string", help="format of the input file 1 [compulsory] [format: transcript file format]")
+    parser.add_option("-j", "--input2",       dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
+    parser.add_option("-g", "--inputFormat2", dest="inputFormat2",   action="store",                     type="string", help="format of the input file 2 [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",       dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    merger = MergeSlidingWindowsClusters(options.verbosity)
+    merger.addInput(options.inputFileName1, options.inputFormat1)
+    merger.addInput(options.inputFileName2, options.inputFormat2)
+    merger.setOutput(options.outputFileName)
+    merger.merge()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mergeTranscriptLists.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mergeTranscriptLists.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,174 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Merge elements of two transcript lists with some condition"""\n+\n+import os, random, shutil, glob\n+from optparse import OptionParser\n+from commons.core.parsing.SequenceListParser import SequenceListParser\n+from commons.core.parsing.BedParser import BedParser\n+from commons.core.parsing.GffParser import GffParser\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+\n+\n+class MergeLists(object):\n+\n+ def __init__(self, verbosity):\n+ self.verbosity = verbosity\n+ self.seed = random.randint(0, 100000)\n+ self.aggregation = False\n+ self.normalization = False\n+ self.distance = False\n+ self.antisense = False\n+ self.colinear = False\n+ self.fileNames = {}\n+ self.formats = {}\n+ self.tmpFileNames = []\n+ self.logHandle = None\n+\n+# def __del__(self):\n+# for fileNameRoot in self.tmpFileNames:\n+# for fileName in glob.glob("%s*" % (fileNameRoot)):\n+# os.remove(fileName)\n+# if self.logHandle != None:\n+# self.logHandle.close()\n+# self.logHandle = None\n+\n+ def setLogFileName(self, fileName):\n+ self.logHandle = open(fileName, "w")\n+\n+ def setInputFileName(self, fileName, format, id):\n+ self.fileNames[id] = fileName\n+ self.formats[id] = format\n+\n+ def setOutputFileName(self, fileName):\n+ self.outputFileName = fileName\n+\n+ def setAggregate(self, aggregation):\n+ self.aggregation = aggregation\n+\n+ def setNormalization(self, normalization):\n+ self.normalization = normalization\n+\n+ def setDistance(self, distance):\n+ self.distance = distance\n+\n+ def setAntisense(self, antisense):\n+ self.antisense = antisense\n+\n+ def setColinear(self, colinear):\n+ self.colinear = colinear\n+\n+ def createTmpFileName(self, root):\n+ fileName = "tmp_%s_%d.gff3" % (root, self.seed)\n+ self.tmpFileNames.append(fileName)\n+ return fileName\n+\n+ def selfMerge(self, fileName, format, outputFileName):\n+ transcriptListComparator = TranscriptListsComparator(self.logHandle,'..b'nscriptListComparator.compareTranscriptList()\n+\n+ def mergeFiles(self, fileName1, fileName2, outputFileName):\n+ outputFile = open(outputFileName, "w")\n+ shutil.copyfileobj(open(fileName1, "r"), outputFile)\n+ shutil.copyfileobj(open(fileName2, "r"), outputFile)\n+ outputFile.close()\n+\n+ def run(self):\n+ selectedFileQuery = self.createTmpFileName("query")\n+ self.keepOverlapping({0: self.fileNames[0], 1: self.fileNames[0]}, {0: "gff3", 1: "gff3"}, selectedFileQuery)\n+ mergeFileTarget = self.createTmpFileName("target")\n+ self.selfMerge(self.fileNames[1], self.formats[1], mergeFileTarget)\n+ if not self.aggregation:\n+ overlapFile = self.createTmpFileName("overlap")\n+ self.keepOverlapping({0: mergeFileTarget, 1: selectedFileQuery}, {0: "gff3", 1: "gff3"}, overlapFile)\n+ mergeFileTarget = overlapFile\n+ mergeFileMerged = self.createTmpFileName("merged")\n+ self.mergeFiles(mergeFileTarget, selectedFileQuery, mergeFileMerged)\n+ self.selfMerge(mergeFileMerged, "gff3", self.outputFileName)\n+\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Merge Lists v1.0.3: Merge the elements of two lists of genomic coordinates. [Category: Merge]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", default=None, type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--format2", dest="format2", action="store", default=None, type="string", help="format of file 2 [compulsory] [format: file in transcript format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+ parser.add_option("-k", "--all", dest="all", action="store_true", default=False, help="print all the transcripts, not only those overlapping [format: bool] [default: false]")\n+ parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="max. distance between two transcripts [format: int] [default: 0]")\n+ parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="antisense only [format: bool] [default: false]")\n+ parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="colinear only [format: bool] [default: false]")\n+ parser.add_option("-n", "--normalize", dest="normalize", action="store_true", default=False, help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+# ml = MergeLists(logHandle, options.verbosity)\n+ \n+ ml = MergeLists(0)\n+ ml.setInputFileName(options.inputFileName1, options.format1, 0)\n+ ml.setInputFileName(options.inputFileName2, options.format2, 1)\n+ ml.setOutputFileName(options.outputFileName)\n+ ml.setAntisense(options.antisense)\n+ ml.setColinear(options.colinear)\n+ ml.setAggregate(options.all)\n+ ml.setNormalization(options.normalize)\n+ ml.setDistance(options.distance)\n+ ml.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/MultipleRPlotter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/misc/MultipleRPlotter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,160 @@
+#
+# Copyright INRA-URGI 2009-2012
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import os
+import subprocess
+import random
+import math
+from SMART.Java.Python.misc.RPlotter import RPlotter
+
+NBCOLORS = 9
+
+"""
+Plot multiple curves with RPlotter
+"""
+
+class MultipleRPlotter(object):
+ """
+ Plot some curves
+ @ivar fileName: name of the file
+ @type fileName: string
+ @ivar height: height of the file
+ @type height: int
+ @ivar width: width of the file
+ @type width: int
+ @ivar plots: plots to be included
+ @type plots: list of L{RPlotter{RPlotter}}
+ @ivar keep: keep script lines
+ @type keep: boolean
+ @ivar format: format of the file
+ @type format: string
+ """
+
+ def __init__(self, fileName, verbosity = 0, keep = False):
+ """
+ Constructor
+ @param fileName: name of the file to produce
+ @type  fileName: string
+ @param verbosity: verbosity
+ @type  verbosity: int
+ @param keep: keep temporary files
+ @type  keep: boolean
+ """
+ self.fileName = fileName
+ self.verbosity = verbosity
+ self.keep = keep
+ self.format = "png"
+ self.width = 1000
+ self.height = 500
+ self.plots = []
+ self.scriptFileName = "tmpScript-%d.R" % (os.getpid())
+
+ def __del__(self):
+ """
+ Destructor
+ Remove script files
+ """
+ if not self.keep:
+ if os.path.exists(self.scriptFileName):
+ os.remove(self.scriptFileName)
+ outputFileName = "%sout" % (self.scriptFileName)
+ if os.path.exists(outputFileName):
+ os.remove(outputFileName)
+
+ def setFormat(self, format):
+ """
+ Set the format of the picture
+ @param format: the format
+ @type format: string
+ """
+ if format not in ("png", "pdf", "jpeg", "bmp", "tiff"):
+ raise Exception("Format '%s' is not supported by RPlotter" % (format))
+ self.format = format
+
+
+ def setWidth(self, width):
+ """
+ Set the dimensions of the image produced
+ @param width: width of the image
+ @type width: int
+ """
+ self.width = width
+
+
+ def setHeight(self, height):
+ """
+ Set the dimensions of the image produced
+ @param height: heigth of the image
+ @type height: int
+ """
+ self.height = height
+
+
+ def setImageSize(self, width, height):
+ """
+ Set the dimensions of the image produced
+ @param width: width of the image
+ @type width: int
+ @param height: heigth of the image
+ @type height: int
+ """
+ self.width = width
+ self.height = height
+
+ def addPlot(self, plot):
+ """
+ Add a plot
+ @param plots: plot to be included
+ @type  plots: L{RPlotter{RPlotter}}
+ """
+ self.plots.append(plot)
+
+ def plot(self):
+ """
+ Plot the figures
+ """
+ scriptHandle = open(self.scriptFileName, "w")
+ scriptHandle.write("library(RColorBrewer)\n")
+ scriptHandle.write("colorPanel = brewer.pal(n=%d, name=\"Set1\")\n" % (NBCOLORS))
+ scriptHandle.write("%s(%s = \"%s\", width = %d, height = %d, bg = \"white\")\n" % (self.format, "filename" if self.format != "pdf" else "file", self.fileName, self.width, self.height))
+ scriptHandle.write("par(mfrow=c(%d, 1))\n" % (len(self.plots)))
+ for plot in self.plots:
+ scriptHandle.write(plot.getScript())
+ scriptHandle.write("dev.off()\n")
+ scriptHandle.close()
+ rCommand = "R"
+ if "SMARTRPATH" in os.environ:
+ rCommand = os.environ["SMARTRPATH"]
+ command = "\"%s\" CMD BATCH %s" % (rCommand, self.scriptFileName)
+ status = subprocess.call(command, shell=True)
+ if status != 0:
+ self.keep = True
+ raise Exception("Problem with the execution of script file %s, status is: %s" % (self.scriptFileName, status))
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/Progress.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/misc/Progress.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,93 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+import time
+
+class Progress(object):
+    """Show the progress of a process"""
+
+    def __init__(self, aim, message = "Progress", verbosity = 0):
+        self.aim            = aim
+        self.progress       = 0
+        self.message        = message
+        self.length         = -1
+        self.verbosity      = verbosity
+        self.maxMessageSize = 50
+        self.barSize        = 80
+        self.startTime      = time.time()
+        self.elapsed        = 0
+        if len(self.message) > self.maxMessageSize:
+            self.message = self.message[0:self.maxMessageSize-3] + "..."
+        self.show()
+
+
+    def inc(self):
+        self.progress += 1
+        self.show()
+
+
+    def getPrintableElapsedTime(self, time):
+        timeHou = int(time) / 3600
+        timeMin = int(time) / 60 - 60 * timeHou
+        timeSec = int(time) % 60
+        if timeHou > 0:
+            return "%3dh %2dm" % (timeHou, timeMin)
+        if timeMin > 0:
+            return "%2dm %2ds" % (timeMin, timeSec)
+        return "%2ds   " % (timeSec)
+
+
+    def show(self):
+        if self.verbosity <= 0:
+            return
+        if self.aim == 0:
+            return
+        messageSize = len(self.message)
+        length = int(self.progress / float(self.aim) * self.barSize)
+        elapsed = int(time.time() - self.startTime)
+        if (length > self.length) or (elapsed > self.elapsed + 10):
+            self.length = length
+            self.elapsed = elapsed
+            string = "%s%s[%s%s] %d/%d" % (self.message, " " * max(0, self.maxMessageSize - messageSize), "=" * self.length, " " * (self.barSize - self.length), self.progress, self.aim)
+            if elapsed > 5:
+                done = float(self.progress) / self.aim
+                total = elapsed / done
+                remaining = total - elapsed
+                string += " ETA: %s " % (self.getPrintableElapsedTime(remaining))
+            string += "\r"
+            sys.stdout.write(string)
+            sys.stdout.flush()
+
+
+    def done(self):
+        if self.verbosity > 0:
+            messageSize = len(self.message)
+            elapsed = time.time() - self.startTime
+            print "%s%s[%s] %d completed in %s " % (self.message, " " * max(0, self.maxMessageSize - messageSize), "=" * self.barSize, self.aim, self.getPrintableElapsedTime(elapsed))

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/Progress.pyc

Binary file SMART/Java/Python/misc/Progress.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/RPlotter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/misc/RPlotter.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,820 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+\n+import os\n+import subprocess\n+import random\n+import math\n+\n+minPositiveValue = 10e-6\n+\n+"""\n+Plot simple curves in R\n+"""\n+\n+class RPlotter(object):\n+ """\n+ Plot some curves\n+ @ivar nbColors: number of different colors\n+ @type nbColors: int\n+ @ivar fileName: name of the file\n+ @type fileName: string\n+ @ivar lines: lines to be plotted\n+ @type lines: array of dict\n+ @ivar names: name of the lines\n+ @type names: array of strings\n+ @ivar colors: color of the lines\n+ @type colors: array of strings\n+ @ivar types: type of the lines (plain or dashed)\n+ @type types: array of strings\n+ @ivar format: format of the picture\n+ @type format: string\n+ @ivar lineWidth: width of the line in a xy-plot\n+ @type lineWidth: int\n+ @ivar xMin: minimum value taken on the x-axis\n+ @type xMin: int\n+ @ivar xMax: maximum value taken on the x-axis\n+ @type xMax: int\n+ @ivar yMin: minimum value taken on the y-axis\n+ @type yMin: int\n+ @ivar yMax: maximum value taken on the y-axis\n+ @type yMax: int\n+ @ivar minimumX: minimum value allowed on the x-axis\n+ @type minimumX: int\n+ @ivar maximumX: maximum value allowed on the x-axis\n+ @type maximumX: int\n+ @ivar minimumY: minimum value allowed on the y-axis\n+ @type minimumY: int\n+ @ivar maximumY: maximum value allowed on the y-axis\n+ @type maximumY: int\n+ @ivar leftMargin: add some margin in the left part of the plot\n+ @type leftMargin: float\n+ @ivar rightMargin: add some margin in the right part of the plot\n+ @type rightMargin: float\n+ @ivar downMargin: add some margin at the top of the plot\n+ @type downMargin: float\n+ @ivar upMargin: add some margin at the bottom of the plot\n+ @type upMargin: float\n+ @ivar logX: use log scale on the x-axis\n+ @type logX: boolean\n+ @ivar logY: use log scale on the y-axis\n+ @type logY: boolean\n+ @ivar logZ: use log scale on the z-axis (the color)\n+ @type logZ: boolean\n+ @ival fill: if a value is not given, fill it with given value\n+ @type fill: int\n+ @ival bucket: cluster the data into buckets of given size\n+ @type bucket: int\n+ @ival seed: a random number\n+ @type seed: int\n+ @ival regression: plot a linear regression\n+ @type regression: boolean\n+ @ival legend: set the legend\n+ @type legend: boolean\n+ @ival legendBySide: set the legend outside of the plot\n+ @type legendBySde: boolean\n+ @ival xLabel: l'..b' lwd = %d, cex = 1.5, ncol = 1, bg = \\"white\\")\\n" % (self.lineWidth)\n+\n+ return script\n+ \n+\n+\n+ def plot(self):\n+ """\n+ Plot the lines\n+ """\n+ scriptFileName = "tmpScript-%d.R" % (self.seed)\n+ scriptHandle = open(scriptFileName, "w")\n+ scriptHandle.write("library(RColorBrewer)\\n")\n+ scriptHandle.write("colorPanel = brewer.pal(n=%d, name=\\"Set1\\")\\n" % (self.nbColors))\n+ scriptHandle.write("%s(%s = \\"%s\\", width = %d, height = %d, bg = \\"white\\")\\n" % (self.format, "filename" if self.format != "pdf" else "file", self.fileName, self.width, self.height))\n+ scriptHandle.write(self.getScript())\n+ scriptHandle.write("dev.off()\\n")\n+ scriptHandle.close()\n+ rCommand = "R"\n+ if "SMARTRPATH" in os.environ:\n+ rCommand = os.environ["SMARTRPATH"]\n+ command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n+ status = subprocess.call(command, shell=True)\n+\n+ if status != 0:\n+ self.keep = True\n+ raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n+ \n+\n+ def getCorrelationData(self):\n+ if not self.regression:\n+ return ""\n+ scriptFileName = "tmpScript-%d.R" % (self.seed)\n+ rScript = open(scriptFileName, "w")\n+ rScript.write("data = scan(\\"tmpData-%d-0.dat\\", list(x = -0.000000, y = -0.000000))\\n" % (self.seed))\n+ x = "log10(data$x)" if self.logX else "data$x"\n+ y = "log10(data$y)" if self.logY else "data$y"\n+ rScript.write("summary(lm(%s ~ %s))\\n" % (y, x))\n+ rScript.close()\n+ rCommand = "R"\n+ if "SMARTRPATH" in os.environ:\n+ rCommand = os.environ["SMARTRPATH"]\n+ command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n+ status = subprocess.call(command, shell=True)\n+ if status != 0:\n+ self.keep = True\n+ raise Exception("Problem with the execution of script file %s computing the correlation, status is: %s" % (scriptFileName, status))\n+ outputRFile = open("%sout" % (scriptFileName))\n+ output = ""\n+ start = False\n+ end = False\n+ for line in outputRFile:\n+ if start and "> " in line:\n+ end = True\n+ if start and not end:\n+ output += line\n+ if "summary" in line:\n+ start = True\n+ return output\n+\n+\n+ def getSpearmanRho(self):\n+ """\n+ Get the Spearman rho correlation using R\n+ """\n+ return None\n+ if not self.points and not self.barplot and not self.heatPoints:\n+ raise Exception("Cannot compute Spearman rho correlation whereas not in \'points\' or \'bar\' mode.")\n+ \n+ scriptFileName = "tmpScript-%d.R" % (self.seed)\n+ rScript = open(scriptFileName, "w")\n+ rScript.write("library(Hmisc)\\n")\n+ rScript.write("data = scan(\\"tmpData-%d-0.dat\\", list(x = -0.000000, y = -0.000000))\\n" % (self.seed))\n+ rScript.write("spearman(data$x, data$y)\\n")\n+ rScript.close()\n+\n+ rCommand = "R"\n+ if "SMARTRPATH" in os.environ:\n+ rCommand = os.environ["SMARTRPATH"]\n+ command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n+ status = subprocess.call(command, shell=True)\n+\n+ if status != 0:\n+ self.keep = True\n+ raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n+\n+ outputRFile = open("%sout" % (scriptFileName))\n+ nextLine = False\n+ for line in outputRFile:\n+ line = line.strip()\n+ if nextLine:\n+ if line == "NA":\n+ return None\n+ return float(line)\n+ nextLine = False\n+ if line == "rho":\n+ nextLine = True\n+\n+ return None\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/UnlimitedProgress.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/misc/UnlimitedProgress.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,81 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+import time
+
+class UnlimitedProgress(object):
+    """Show the progress of a process when no upper bound is known"""
+
+    def __init__(self, step = 1000, message = "Progress", verbosity = 0):
+        self.step           = step
+        self.progress       = 0
+        self.message        = message
+        self.verbosity      = verbosity
+        self.maxMessageSize = 50
+        self.startTime      = time.time()
+        self.elapsed        = 0
+        if len(self.message) > self.maxMessageSize:
+            self.message = self.message[0:self.maxMessageSize-3] + "..."
+        self.show()
+
+
+    def inc(self):
+        self.progress += 1
+        self.show()
+
+
+    def getPrintableElapsedTime(self, time):
+        timeHou = int(time) / 3600
+        timeMin = int(time) / 60 - 60 * timeHou
+        timeSec = int(time) % 60
+        if timeHou > 0:
+            return "%3dh %2dm" % (timeHou, timeMin)
+        if timeMin > 0:
+            return "%2dm %2ds" % (timeMin, timeSec)
+        return "%2ds" % (timeSec)
+
+
+    def show(self):
+        if self.verbosity <= 0:
+            return
+        elapsed = int(time.time() - self.startTime)
+        if (self.progress % self.step == 0) or (elapsed > self.elapsed + 10):
+            self.elapsed = elapsed
+            string = "%s %d -- time spent: %s\r" % (self.message, self.progress, self.getPrintableElapsedTime(elapsed))
+            sys.stdout.write(string)
+            sys.stdout.flush()
+
+
+    def done(self):
+        if self.verbosity > 0:
+            elapsed = time.time() - self.startTime
+            string = "%s %d -- time spent: %s\r" % (self.message, self.progress, self.getPrintableElapsedTime(elapsed))
+            print string
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/UnlimitedProgress.pyc

Binary file SMART/Java/Python/misc/UnlimitedProgress.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/Utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/misc/Utils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,271 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""Some useful functions"""\n+\n+import sys, os\n+import random\n+import subprocess\n+\n+\n+def writeFile(fileName, content):\n+ """\n+ Write the content of a file\n+ """\n+ handle = open(fileName, "w")\n+ handle.write(content)\n+ handle.close()\n+\n+def sumOfLists(list1, list2):\n+ """\n+ Element by element sum\n+ """\n+ if len(list1) != len(list2):\n+ sys.exit("Cannot sum list whose sizes are different!")\n+ return [list1[i] + list2[i] for i in range(len(list1))]\n+\n+\n+def protectBackslashes(string):\n+ """\n+ Protect the backslashes in a path by adding another backslash\n+ """\n+ return string.replace("\\\\", "\\\\\\\\")\n+ \n+\n+def getHammingDistance(string1, string2):\n+ """\n+ Compute Hamming distance between two strings\n+ """\n+ if len(string1) != len(string2):\n+ raise Exception("Error, size of %s and %s differ" % (string1, string2))\n+ return sum(ch1 != ch2 for ch1, ch2 in zip(string1, string2))\n+\n+\n+def getLevenshteinDistance(string1, string2):\n+ """\n+ Compute Levenshtein distance between two strings\n+ """\n+ if len(string1) < len(string2):\n+ return getLevenshteinDistance(string2, string1)\n+ if not string1:\n+ return len(string2)\n+ previousRow = xrange(len(string2) + 1)\n+ for i, c1 in enumerate(string1):\n+ currentRow = [i + 1]\n+ for j, c2 in enumerate(string2):\n+ insertions = previousRow[j + 1] + 1\n+ deletions = currentRow[j] + 1\n+ substitutions = previousRow[j] + (c1 != c2)\n+ currentRow.append(min(insertions, deletions, substitutions))\n+ previousRow = currentRow\n+ return previousRow[-1]\n+\n+\n+def getMinAvgMedMax(values):\n+ """\n+ Get some stats about a dict\n+ @param values: a distribution (the value being the number of occurrences of the key)\n+ @type values: dict int to int\n+ @return: a tuple\n+ """\n+ minValues = min(values.keys())\n+ maxValues = max(values.keys())\n+ sumValues = sum([value * values[value] for value in values])\n+ nbValues = sum(values.values())\n+ allValues = []\n+ for key in values:\n+ for i in range(values[key]):\n+ allValues.append(key)\n+ sortedValues = sorted(allValues)\n+ sorted(values.values())\n+ if (nbValues % 2 == 0):\n+ medValues = (sortedValues[nbValues / 2 - 1] + sortedValues[nbValues / 2]) / 2.0\n+ else:\n+ medValues = sortedValues[(nbValues + 1) / 2 - 1]\n+ return (minValues, float('..b'les differ (%d != %d)" % (len(lines1), len(lines2))\n+ return False\n+ for i in xrange(len(lines1)):\n+ if lines1[i] != lines2[i]:\n+ print "Line %d differ (\'%s\' != \'%s\')" % (i, lines1[i].strip(), lines2[i].strip())\n+ return False\n+ return True\n+\n+\n+def binomialCoefficient(a, b):\n+ """\n+ Compute cumulated product from a to b\n+ @param a: a value\n+ @type a: int\n+ @param b: a value\n+ @type b: int\n+ """\n+ if a > b / 2:\n+ a = b-a\n+ p = 1.0\n+ for i in range(b-a+1, b+1):\n+ p *= i\n+ q = 1.0\n+ for i in range(1, a+1):\n+ q *= i\n+ return p / q\n+\n+\n+memory = {}\n+\n+# def fisherExactPValue(a, b, c, d):\n+# """\n+# P-value of Fisher exact test for 2x2 contingency table\n+# """\n+# if (a, b, c, d) in memory:\n+# return memory[(a, b, c, d)]\n+\n+# n = a + b + c + d\n+# i1 = binomialCoefficient(a, a+b)\n+# i2 = binomialCoefficient(c, a+c)\n+# i3 = binomialCoefficient(c+d, n)\n+# pValue = i1 * i2 / i3\n+\n+# memory[(a, b, c, d)] = pValue\n+\n+# return pValue\n+ \n+\n+def fisherExactPValue(a, b, c, d):\n+ if (a, b, c, d) in memory:\n+ return memory[(a, b, c, d)]\n+\n+ scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000))\n+ rScript = open(scriptFileName, "w")\n+ rScript.write("data = matrix(c(%d, %d, %d, %d), nr=2)\\n" % (a, b, c, d))\n+ rScript.write("fisher.test(data)\\n")\n+ #rScript.write("chisq.test(data)\\n")\n+ rScript.close()\n+\n+ rCommand = "R"\n+ if "SMARTRPATH" in os.environ:\n+ rCommand = os.environ["SMARTRPATH"]\n+ command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n+ status = subprocess.call(command, shell=True)\n+\n+ if status != 0:\n+ sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n+\n+ outputRFileName = "%sout" % (scriptFileName)\n+ outputRFile = open(outputRFileName)\n+ pValue = None\n+ pValueTag = "p-value "\n+ for line in outputRFile:\n+ line = line.strip()\n+ if line == "": continue\n+ for splittedLine in line.split(","):\n+ splittedLine = splittedLine.strip()\n+ if splittedLine.startswith(pValueTag):\n+ pValue = float(splittedLine.split()[-1])\n+ break\n+\n+ if pValue == None:\n+ sys.exit("Problem with the cannot find p-value! File %s, values are: %d, %d, %d, %d" % (scriptFileName, a, b, c, d))\n+\n+ os.remove(scriptFileName)\n+ os.remove(outputRFileName)\n+\n+ memory[(a, b, c, d)] = pValue\n+\n+ return pValue\n+\n+\n+def fisherExactPValueBulk(list):\n+\n+ scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000))\n+ rScript = open(scriptFileName, "w")\n+ for element in list:\n+ rScript.write("fisher.test(matrix(c(%d, %d, %d, %d), nr=2))$p.value\\n" % (int(element[0]), int(element[1]), int(element[2]), int(element[3])))\n+ rScript.close()\n+\n+ rCommand = "R"\n+ if "SMARTRPATH" in os.environ:\n+ rCommand = os.environ["SMARTRPATH"]\n+ command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n+ status = subprocess.call(command, shell=True)\n+\n+ if status != 0:\n+ sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n+\n+ outputRFileName = "%sout" % (scriptFileName)\n+ outputRFile = open(outputRFileName)\n+ pValue = None\n+ pValueTag = "[1] "\n+ results = {}\n+ cpt = 0\n+ for line in outputRFile:\n+ line = line.strip()\n+ if line == "": continue\n+ if line.startswith(pValueTag):\n+ pValue = float(line.split()[-1])\n+ results[list[cpt][0:2]] = pValue\n+ cpt += 1\n+\n+ if pValue == None:\n+ sys.exit("Problem with the cannot find p-value!")\n+ if cpt != len(list):\n+ sys.exit("Error in the number of p-values computed by R in file \'%s\'!" % (scriptFileName))\n+\n+ os.remove(scriptFileName)\n+ os.remove(outputRFileName)\n+\n+ return results\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/Utils.pyc

Binary file SMART/Java/Python/misc/Utils.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/__init__.pyc

Binary file SMART/Java/Python/misc/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/misc/test/Test_Utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/misc/test/Test_Utils.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,13 @@
+import unittest
+from SMART.Java.Python.misc import Utils
+
+
+class Test_Utils(unittest.TestCase):
+
+    def testFisherExactPValue(self):
+        self.assertAlmostEqual(Utils.fisherExactPValue(3, 1, 1, 3), 0.4857142857142842, 3)
+
+
+if __name__ == '__main__':
+    unittest.main()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/modifyFasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/modifyFasta.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,62 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Modify the content of a FASTA file"""
+
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.writer.FastaWriter import FastaWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Modify Sequence List v1.0.1: Extend or shring a list of sequences. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",                     dest="inputFileName",    action="store",                                         type="string", help="input file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output",                    dest="outputFileName", action="store",            default=None,    type="string", help="output file [compulsory] [format: output file in FASTA format]")
+    parser.add_option("-s", "--start",                     dest="start",                    action="store",            default=None,    type="int",        help="keep first nucleotides [format: int]")
+    parser.add_option("-e", "--end",                         dest="end",                        action="store",            default=None,    type="int",        help="keep last nucleotides [format: int]")
+    parser.add_option("-v", "--verbosity",             dest="verbosity",            action="store",            default=1,         type="int",        help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    parser     = FastaParser(options.inputFileName, options.verbosity)
+    writer     = FastaWriter(options.outputFileName, options.verbosity)
+    progress = Progress(parser.getNbSequences(), "Reading file %s" % (options.inputFileName), options.verbosity)
+    for sequence in parser.getIterator():
+        if options.start != None:
+            sequence.shrinkToFirstNucleotides(options.start)
+        if options.end != None:
+            sequence.shrinkToLastNucleotides(options.end)
+        writer.addSequence(sequence)
+        progress.inc()
+    progress.done()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/modifyGenomicCoordinates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/modifyGenomicCoordinates.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,80 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Modify the genomic coordinates of a file"""
+
+from optparse import OptionParser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Modify Genomic Coordinates v1.0.1: Extend or shrink a list of genomic coordinates. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",     dest="format",         action="store",               type="string", help="format of the input [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",     dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-s", "--start",      dest="start",          action="store", default=None, type="int",    help="restrict to the start of the transcript [format: int]")
+    parser.add_option("-e", "--end",        dest="end",            action="store", default=None, type="int",    help="restrict to the end of the transcript [format: int]")
+    parser.add_option("-5", "--fivePrime",  dest="fivePrime",      action="store", default=None, type="int",    help="extend to the 5' direction [format: int]")
+    parser.add_option("-3", "--threePrime", dest="threePrime",     action="store", default=None, type="int",    help="extend to the 3' direction [format: int]")
+    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
+
+    (options, args) = parser.parse_args()
+
+    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+
+    writer = TranscriptWriter(options.outputFileName, "gff3", options.verbosity)
+
+    nbItems = 0
+    nbItems = parser.getNbItems()
+    print "%i items found" % (nbItems)
+
+    progress = Progress(nbItems, "Analyzing sequences of " + options.inputFileName, options.verbosity)
+    for transcript in parser.getIterator():
+        if options.start != None:
+            transcript.restrictStart(options.start)
+        if options.end != None:
+            transcript.restrictEnd(options.end)
+        if options.fivePrime != None:
+            transcript.extendStart(options.fivePrime)
+        if options.threePrime != None:
+            transcript.extendEnd(options.threePrime)
+
+        writer.addTranscript(transcript)
+
+        progress.inc()
+    progress.done()
+
+    writer.write()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/modifySequenceList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/modifySequenceList.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,72 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Modify the content of a FASTA file"""
+import sys
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.parsing.FastqParser import FastqParser
+from commons.core.writer.FastaWriter import FastaWriter
+from commons.core.writer.FastqWriter import FastqWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Modify Sequence List v1.0.1: Extend or shring a list of sequences. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName",action="store", type="string", help="input file [compulsory] [format: file in format given by -f]")
+    parser.add_option("-o", "--output", dest="outputFileName", action="store",default=None,    type="string", help="output file [compulsory] [format: output file in format given by -f]")
+    parser.add_option("-f", "--format", dest="format",action="store",type="string", help="format of the file [compulsory] [format: sequence file format]")
+    parser.add_option("-s", "--start", dest="start", action="store", default=None,type="int",help="keep first nucleotides [format: int]")
+    parser.add_option("-e", "--end",  dest="end", action="store",default=None,type="int",help="keep last nucleotides [format: int]")
+    parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1,type="int",help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    if options.format == "fasta":
+        parser = FastaParser(options.inputFileName, options.verbosity)
+        writer = FastaWriter(options.outputFileName, options.verbosity)
+    elif options.format == "fastq":
+        parser = FastqParser(options.inputFileName, options.verbosity)
+        writer = FastqWriter(options.outputFileName, options.verbosity)
+    else:
+        sys.exit("Do not understand '%s' file format." % (options.format))
+
+    progress = Progress(parser.getNbSequences(), "Reading file %s" % (options.inputFileName), options.verbosity)
+    for sequence in parser.getIterator():
+        if options.start != None:
+            sequence.shrinkToFirstNucleotides(options.start)
+        if options.end != None:
+            sequence.shrinkToLastNucleotides(options.end)
+        writer.addSequence(sequence)
+        progress.inc()
+    progress.done()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlConnection.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mySql/MySqlConnection.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,109 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+#! /usr/bin/env python
+import os
+import random
+import sqlite3
+from SMART.Java.Python.mySql.MySqlQuery import MySqlQuery
+
+
+class MySqlConnection(object):
+    """Connection to a database"""
+
+    def __init__(self, verbosity = 0):
+        self.verbosity = verbosity
+        self.databaseName = "%s%ssmartdb%d" % (os.environ.get("SMARTTMPPATH", "."), os.sep, random.randint(0, 100000))
+        self.connection = sqlite3.connect(self.databaseName)
+        self.executeQuery("PRAGMA journal_mode = OFF")
+        self.executeQuery("PRAGMA synchronous = 0")
+        self.executeQuery("PRAGMA locking_mode = EXCLUSIVE")
+        self.executeQuery("PRAGMA count_change = OFF")
+        self.executeQuery("PRAGMA temp_store = 2")
+
+    def __del__(self):
+        self.connection.close()
+
+
+    def createDatabase(self):
+        pass
+
+
+    def deleteDatabase(self):
+        if os.path.exists(self.databaseName):
+            os.remove(self.databaseName)
+
+
+    def executeQuery(self, command, insertion = False):
+        cursor = self.connection.cursor()
+        query = MySqlQuery(cursor, self.verbosity)
+        try:
+            result = query.execute(command, insertion)
+            self.connection.commit()
+        except:
+            result = query.execute(command, insertion)
+            self.connection.commit()
+        if insertion:
+            return result
+        else:
+            return query
+
+
+    def executeManyQueries(self, commands):
+        cursor = self.connection.cursor()
+        query = MySqlQuery(cursor, self.verbosity)
+        try:
+            for cpt, command in enumerate(commands):
+                query.execute(command)
+            self.connection.commit()
+        except:
+            for cpt, command in enumerate(commands):
+                query.execute(command)
+            self.connection.commit()
+
+
+    def executeManyQueriesIterator(self, table):
+        cursor = self.connection.cursor()
+        query = MySqlQuery(cursor, self.verbosity)
+        try:
+            for command in table.getIterator():
+                query.execute(command)
+            self.connection.commit()
+        except:
+            for command in table.getIterator():
+                query.execute(command)
+            self.connection.commit()
+
+
+    def executeFormattedQuery(self, command, *parameters):
+        cursor = self.connection.cursor()
+        query = MySqlQuery(cursor, self.verbosity)
+        query.executeFormat(command, parameters)
+        self.connection.commit()
+        return query

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlExonTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mySql/MySqlExonTable.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,97 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.mySql.MySqlTable import MySqlTable
+
+
+class MySqlExonTable(MySqlTable):
+    """A table of exon in a mySQL database"""
+
+    def __init__(self, connection, name = None, chromosome = None, verbosity = 0):
+        if chromosome == None:
+            chromosome = ""
+        else:
+            chromosome = "_%s" % chromosome
+        if name == None:
+            name = "TmpTable_%d" % (random.randint(0, 100000))
+        name = "%s%s_exons" % (name, chromosome)
+        super(MySqlExonTable, self).__init__(connection, name, verbosity)
+
+
+    def createExonTable(self):
+        variables = Interval.getSqlVariables()
+        variables.append("transcriptId")
+        types = Interval.getSqlTypes()
+        types["transcriptId"] = "int"
+        sizes = Interval.getSqlSizes()
+        sizes["transcriptId"] = 11
+        self.create(variables, types, sizes)
+
+
+    def rename(self, name):
+        super(MySqlExonTable, self).rename("%s_exons" % name)
+
+
+    def addExon(self, exon, transcriptId):
+        values = exon.getSqlValues()
+        values["transcriptId"] = transcriptId
+        id = self.addLine(values)
+        exon.id = id
+
+
+    def retrieveExonsFromTranscriptId(self, transcriptId):
+        if not self.created:
+            return []
+        query = self.mySqlConnection.executeQuery("SELECT * FROM %s WHERE transcriptId = %d" % (self.name, transcriptId))
+        exons = []
+        for exonLine in query.getIterator():
+            exon = Interval()
+            exon.setSqlValues(exonLine)
+            exons.append(exon)
+        return exons
+
+
+    def retrieveExonsFromBulkTranscriptIds(self, transcriptIds):
+        if not transcriptIds:
+            return {}
+        if not self.created:
+            return {}
+        exons = dict([(transcriptId, []) for transcriptId in transcriptIds])
+        query = self.mySqlConnection.executeQuery("SELECT * FROM %s WHERE transcriptId IN (%s)" % (self.name, ", ".join(["%s" % (transcriptId) for transcriptId in transcriptIds])))
+        for exonLine in query.getIterator():
+            exon = Interval()
+            exon.setSqlValues(exonLine)
+            exons[exonLine[-1]].append(exon)
+        return exons
+
+
+    def removeFromTranscriptId(self, transcriptId):
+        self.mySqlConnection.executeQuery("DELETE FROM %s WHERE transcriptId = %d" % (self.name, transcriptId))

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlExonTable.pyc

Binary file SMART/Java/Python/mySql/MySqlExonTable.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlQuery.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mySql/MySqlQuery.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,94 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+class MySqlQuery(object):
+    """Query to a database"""
+
+    def __init__(self, cursor, verbosity = 0):
+        self.verbosity = verbosity
+        self.cursor = cursor
+        self.insertedId = None
+
+
+    def __del__(self):
+        self.cursor.close()
+
+
+    def execute(self, query, insertion = False):
+        if self.verbosity > 99:
+            print "Querying %s" % (query)
+        try:
+            results = self.cursor.execute(query)
+        except Exception:
+            raise Exception("Error! Command \"%s\" failed!" % (query))
+        if insertion:
+            return self.cursor.lastrowid
+        return results
+
+
+    def executeFormat(self, query, parameters):
+        if self.verbosity > 99:
+            print "Querying %s |" % (query),
+            for parameter in parameters:
+                print parameter,
+            print
+        results = self.cursor.execute(query, parameters)
+        return results
+
+
+    def getLine(self):
+        return self.cursor.fetchone()
+
+
+    def getLines(self, lines = None):
+        if lines == None:
+            return self.cursor.fetchall()
+        return self.cursor.fetchmany(lines)
+
+
+    def isEmpty(self):
+        self.getLines()
+        return self.cursor.rowcount == None or self.cursor.rowcount == 0
+
+
+    def getInsertedId(self):
+        return self.insertedId
+
+
+    def getIterator(self):
+        line = self.getLine()
+        while line != None:
+            yield line
+            line = self.getLine()
+
+
+    def show(self):
+        for line in self.getIterator():
+            print line

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mySql/MySqlTable.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,334 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import re\n+import sys\n+\n+class MySqlTable(object):\n+ """\n+ Store a table of a mySQL database, used for transcripts or exons\n+ Record a a name and a type (int, float, double) for each column\n+ @ivar name: name of the table\n+ @type name: string\n+ @ivar variables: name of the columns\n+ @type variables: list of string\n+ @ivar types: type of the columns\n+ @type types: dict of string\n+ @ivar mySqlConnection: connection to a database\n+ @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n+ @ivar nbLines: number of rows\n+ @type nbLines: int\n+ @ivar verbosity: verbosity\n+ @type verbosity: int\n+ """\n+\n+ def __init__(self, connection, name, verbosity = 0):\n+ """\n+ Constructor\n+ Possibly retrieve column names and types if table exists\n+ @param mySqlConnection: connection to a databas\n+ @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n+ @param name: name of the table\n+ @type name: string\n+ @param verbosity: verbosity\n+ @type verbosity: int\n+ """\n+ self.name = name\n+ self.variables = []\n+ self.types = {}\n+ self.sizes = {}\n+ self.nbLines = None\n+ self.verbosity = verbosity\n+ self.mySqlConnection = connection\n+ queryTables = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE \'table\' AND name LIKE \'%s\'" % (self.name))\n+ self.created = not queryTables.isEmpty()\n+ if self.created:\n+ queryFields = self.mySqlConnection.executeQuery("PRAGMA table_info(\'%s\')" % (name))\n+ for field in queryFields.getIterator():\n+ if field[1] != "id":\n+ self.variables.append(field[1])\n+ self.types[field[1]] = field[2]\n+ self.sizes[field[1]] = field[3]\n+ \n+ \n+ def getName(self):\n+ return self.name\n+\n+\n+ def create(self, variables, types, sizes):\n+ """\n+ Create a table using give column names and types\n+ @param variables: names of the columns\n+ @type variables: list of string\n+ @param types: types of the columns\n+ @type types: dict of string\n+ @param sizes: sizes of the types\n+ @type size'..b'id):\n+ """\n+ Retrieve a row from its id\n+ @param id: the id of the row\n+ @type id: int\n+ @return: the row\n+ """\n+ query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\' WHERE id = %d" % (self.name, id))\n+ result = query.getLine()\n+ if result == None:\n+ raise Exception("Error! Id %d is not in the table %s!" % (id, self.name))\n+ return result\n+\n+\n+ def retrieveBulkFromId(self, ids):\n+ """\n+ Retrieve a row from its id\n+ @param id: the ids of the row\n+ @type id: list of int\n+ @return: the row\n+ """\n+ if not ids:\n+ return []\n+ MAXSIZE = 1000\n+ results = []\n+ for batch in range(len(ids) / MAXSIZE + 1):\n+ theseIds = ids[batch * MAXSIZE : (batch+1) * MAXSIZE]\n+ if theseIds:\n+ query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\' WHERE id IN (%s)" % (self.name, ", ".join(["%d" % (id) for id in theseIds])))\n+ lines = query.getLines()\n+ if len(lines) != len(theseIds):\n+ raise Exception("Error! Some Ids of (%s) is are missing in the table \'%s\' (got %d instead of %d)!" % (", ".join(["%d" % (id) for id in theseIds]), self.name, len(lines)), len(theseIds))\n+ results.extend(lines)\n+ return results\n+\n+\n+ def removeFromId(self, id):\n+ """\n+ Remove a row from its id\n+ @param id: the id of the row\n+ @type id: int\n+ """\n+ self.mySqlConnection.executeQuery("DELETE FROM \'%s\' WHERE id = %d" % (self.name, id))\n+ \n+ \n+ def getIterator(self):\n+ """\n+ Iterate on the content of table\n+ @return: iterator to the rows of the table\n+ """\n+ if not self.created:\n+ return\n+ MAXSIZE = 1000\n+ query = self.mySqlConnection.executeQuery("SELECT count(id) FROM \'%s\'" % (self.name))\n+ nbRows = int(query.getLine()[0])\n+ for chunk in range((nbRows / MAXSIZE) + 1):\n+ query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\' LIMIT %d, %d" % (self.name, chunk * MAXSIZE, MAXSIZE))\n+ for line in query.getIterator():\n+ yield line\n+\n+\n+ def createIndex(self, indexName, values, unique = False, fullText = False):\n+ """\n+ Add an index on the table\n+ @param indexName: name of the index\n+ @type indexName: string\n+ @param values: values to be indexed\n+ @type values: string\n+ @param unique: if the index is unique\n+ @type unique: boolean\n+ @param fullText: whether full text should be indexed\n+ @type fullText: boolean\n+ """\n+ self.mySqlConnection.executeQuery("CREATE %s%sINDEX \'%s\' ON \'%s\' (%s)" % ("UNIQUE " if unique else "", "FULLTEXT " if fullText else "", indexName, self.name, ", ".join(values)))\n+\n+\n+ def setDefaultTagValue(self, field, name, value):\n+ """\n+ Add a tag value\n+ @param name: name of the tag\n+ @type name: string\n+ @param value: value of the tag\n+ @type value: string or int\n+ """\n+ newData = {}\n+ for line in MySqlTable.getIterator(self):\n+ id = line[0]\n+ tags = line[field]\n+ if tags == \'\':\n+ newTag = "%s=%s" % (name, value)\n+ else:\n+ newTag = "%s;%s=%s" % (tags, name, value)\n+ if name not in [tag.split("=")[0] for tag in tags.split(";")]:\n+ newData[id] = newTag\n+ for id, tag in newData.iteritems():\n+ query = self.mySqlConnection.executeQuery("UPDATE \'%s\' SET tags = \'%s\' WHERE id = %i" % (self.name, tag, id))\n+\n+\n+\n+ def show(self):\n+ """\n+ Drop the content of the current table\n+ """\n+ query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\'" % (self.name))\n+ print query.getLines()\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlTable.pyc

Binary file SMART/Java/Python/mySql/MySqlTable.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlTranscriptTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mySql/MySqlTranscriptTable.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,149 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random
+import sys
+from SMART.Java.Python.structure.TranscriptList import TranscriptList
+from SMART.Java.Python.mySql.MySqlExonTable import MySqlExonTable
+from SMART.Java.Python.mySql.MySqlTable import MySqlTable
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+
+class MySqlTranscriptTable(MySqlTable):
+    """A table of transcripts in a mySQL database"""
+
+    def __init__(self, connection, name = None, chromosome = None, verbosity = 0):
+        if chromosome == None:
+            chromosome = ""
+        else:
+            chromosome = "_%s" % chromosome
+        if name == None:
+            name = "TmpTable_%d" % (random.randint(0, 100000))
+        name = "%s%s" % (name, chromosome)
+        super(MySqlTranscriptTable, self).__init__(connection, "%s_transcripts" % name, verbosity)
+
+
+    def createTranscriptTable(self):
+        self.create(Transcript.getSqlVariables(), Transcript.getSqlTypes(), Transcript.getSqlSizes())
+
+
+    def rename(self, name):
+        super(MySqlTranscriptTable, self).rename("%s_transcripts" % name)
+
+
+    def remove(self):
+        super(MySqlTranscriptTable, self).remove()
+
+
+    def clear(self):
+        super(MySqlTranscriptTable, self).clear()
+
+
+    def copy(self, transcriptTable):
+        self.remove()
+        super(MySqlTranscriptTable, self).copy(transcriptTable)
+
+
+    def add(self, transcriptTable):
+        super(MySqlTranscriptTable, self).add(transcriptTable)
+
+
+    def addTranscript(self, transcript):
+        id = self.addLine(transcript.getSqlValues())
+        transcript.id = id
+
+
+    def addTranscriptList(self, transcriptList):
+        progress = Progress(transcriptList.getNbTranscript(), "Storing list to %s" % (self.name), self.verbosity)
+        for transcript in transcriptList.getIterator():
+            self.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+
+
+    def removeTranscript(self, transcript):
+        self.removeFromId(transcript.id)
+
+
+    def retrieveTranscriptFromId(self, id):
+        transcript = Transcript()
+        transcript.setSqlValues(self.retrieveFromId(id))
+        return transcript
+
+
+    def retrieveBulkTranscriptFromId(self, ids):
+        if not ids:
+            return []
+        transcripts = self.retrieveBulkFromId(ids)
+        idsToTranscripts = {}
+        for values in transcripts:
+            transcript = Transcript()
+            transcript.setSqlValues(values)
+            idsToTranscripts[values[0]] = transcript
+        return idsToTranscripts.values()
+
+
+    def selectTranscripts(self, command, simple = False):
+        MAXSIZE = 100000
+        found   = True
+        cpt     = 0
+        while found:
+            found = False
+            if simple:
+                thisCommand = command
+            else:
+                thisCommand = "%s LIMIT %d OFFSET %d" % (command, MAXSIZE, MAXSIZE * cpt)
+            query = self.mySqlConnection.executeQuery(thisCommand)
+            for line in query.getIterator():
+                found      = True
+                id         = int(line[0])
+                transcript = Transcript()
+                transcript.setSqlValues(line)
+                yield (id, transcript)
+            cpt += 1
+            if simple:
+                return
+
+
+    def getIterator(self):
+        for id, transcript in self.selectTranscripts("SELECT * FROM '%s'" % (self.name)):
+            yield transcript
+
+
+    def retrieveTranscriptList(self):
+        transcriptList = TranscriptList()
+        for transcriptLine in self.getLines():
+            transcript = Transcript()
+            transcript.setSqlValues(transcriptLine)
+            transcriptList.addTranscript(transcript)
+        return transcriptList
+
+
+    def setDefaultTagValue(self, name, value):
+        super(MySqlTranscriptTable, self).setDefaultTagValue(Transcript.getSqlVariables().index("tags")+1, name, value)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/MySqlTranscriptTable.pyc

Binary file SMART/Java/Python/mySql/MySqlTranscriptTable.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/__init__.pyc

Binary file SMART/Java/Python/mySql/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/mySql/test/Test_MySqlTranscriptTable.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/mySql/test/Test_MySqlTranscriptTable.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,158 @@
+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
+from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
+import unittest
+
+
+class Test_MySqlTranscriptTable(unittest.TestCase):
+
+    def test_getRange(self):
+        transcript = Transcript()
+        transcript.setName("test1.1")
+        transcript.setChromosome("arm_X")
+        transcript.setStart(1000)
+        transcript.setEnd(4000)
+        transcript.setSize(2000)
+        transcript.setDirection("+")
+
+        exon1 = Interval()
+        exon1.setName("test1.1")
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+        exon1.setSize(1000)
+
+        exon2 = Interval()
+        exon2.setName("test1.1")
+        exon2.setChromosome("arm_X")
+        exon2.setStart(3000)
+        exon2.setEnd(4000)
+        exon2.setSize(1000)
+
+        transcript.addExon(exon1)
+        transcript.addExon(exon2)
+
+        connection = MySqlConnection()
+        writer = MySqlTranscriptWriter(connection, "testMySqlTranscriptTableGetRange")
+        writer.addTranscript(transcript)
+        writer.write()
+
+        transcriptContainer = TranscriptContainer("testMySqlTranscriptTableGetRange", "sql")
+        transcriptContainer.mySqlConnection = connection
+        self.assertEqual(transcriptContainer.getNbTranscripts(), 1)
+        for transcript in transcriptContainer.getIterator():
+            self.assertEqual(transcript.getName(), "test1.1")
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getStart(), 1000)
+            self.assertEqual(transcript.getEnd(), 4000)
+            self.assertEqual(transcript.getSize(), 2002)
+            self.assertEqual(transcript.getNbExons(), 2)
+            exons = transcript.getExons()
+            self.assertEqual(exons[0].getStart(), 1000)
+            self.assertEqual(exons[0].getEnd(), 2000)
+            self.assertEqual(exons[1].getStart(), 3000)
+            self.assertEqual(exons[1].getEnd(), 4000)
+
+
+    def test_setDefaultTagValue(self):
+        transcript1 = Transcript()
+        transcript1.setName("test1.1")
+        transcript1.setChromosome("arm_X")
+        transcript1.setStart(1000)
+        transcript1.setEnd(2000)
+        transcript1.setDirection("+")
+
+        exon1 = Interval()
+        exon1.setName("test1.1")
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+
+        transcript1.addExon(exon1)
+
+        transcript2 = Transcript()
+        transcript2.setName("test2.1")
+        transcript2.setChromosome("arm_X")
+        transcript2.setStart(1000)
+        transcript2.setEnd(2000)
+        transcript2.setDirection("+")
+        transcript2.setTagValue("nbOccurrences", "2")
+
+        exon2 = Interval()
+        exon2.setName("test2.1")
+        exon2.setChromosome("arm_X")
+        exon2.setStart(1000)
+        exon2.setEnd(2000)
+
+        transcript2.addExon(exon2)
+
+        transcript3 = Transcript()
+        transcript3.setName("test3.1")
+        transcript3.setChromosome("arm_X")
+        transcript3.setStart(1000)
+        transcript3.setEnd(2000)
+        transcript3.setDirection("+")
+        transcript3.setTagValue("occurrences", "2")
+
+        exon3 = Interval()
+        exon3.setName("test3.1")
+        exon3.setChromosome("arm_X")
+        exon3.setStart(1000)
+        exon3.setEnd(2000)
+
+        transcript3.addExon(exon3)
+
+        connection = MySqlConnection()
+        table      = MySqlTranscriptTable(connection, "testMySqlTranscriptTableSetDefaultTagValue")
+        table.createTranscriptTable()
+        table.addTranscript(transcript1)
+        table.addTranscript(transcript2)
+        table.addTranscript(transcript3)
+        table.setDefaultTagValue("occurrence", "1")
+
+        cpt = 0
+        for transcript in table.getIterator():
+            cpt += 1
+            self.assert_(cpt != 4)
+            if cpt == 1:
+                self.assertEqual(transcript.name, "test1.1")
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getStart(), 1000)
+                self.assertEqual(transcript.getEnd(), 2000)
+                self.assertEqual(transcript.getSize(), 1001)
+                self.assertEqual(transcript.getNbExons(), 1)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getStart(), 1000)
+                self.assertEqual(exons[0].getEnd(), 2000)
+                self.assertEqual(transcript.getTagValue("occurrence"), 1)
+            elif cpt == 2:
+                self.assertEqual(transcript.name, "test2.1")
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getStart(), 1000)
+                self.assertEqual(transcript.getEnd(), 2000)
+                self.assertEqual(transcript.getSize(), 1001)
+                self.assertEqual(transcript.getNbExons(), 1)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getStart(), 1000)
+                self.assertEqual(exons[0].getEnd(), 2000)
+                self.assertEqual(transcript.getTagValue("nbOccurrences"), 2)
+                self.assertEqual(transcript.getTagValue("occurrence"), 1)
+            elif cpt == 2:
+                self.assertEqual(transcript.name, "test3.1")
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getStart(), 1000)
+                self.assertEqual(transcript.getEnd(), 2000)
+                self.assertEqual(transcript.getSize(), 1001)
+                self.assertEqual(transcript.getNbExons(), 1)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getStart(), 1000)
+                self.assertEqual(exons[0].getEnd(), 2000)
+                self.assertEqual(transcript.getTagValue("occurrence"), 2)
+
+        table.remove()
+
+if __name__ == '__main__':
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/ConvertToNCList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/ConvertToNCList.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,172 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import random, os, time, shutil
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from SMART.Java.Python.ncList.NCListMerger import NCListMerger
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+try:
+   import cPickle as pickle
+except:
+   import pickle
+
+class ConvertToNCList(object):
+
+ def __init__(self, verbosity = 1):
+ self._parsers   = {}
+ self._sortedFileNames   = {}
+ self._inputFileName       = None
+ self._outputFileName   = None
+ self._index       = False
+ self._ncLists   = {}
+ self._splittedFileNames       = {}
+ self._nbElements   = 0
+ self._nbElementsPerChromosome = {}
+ self._randomNumber   = random.randint(0, 10000)
+ self._sorted                  = False
+ self._verbosity       = verbosity
+
+ def setInputFileName(self, fileName, format):
+ self._inputFileName = fileName
+ chooser = ParserChooser(self._verbosity)
+ chooser.findFormat(format)
+ self._parser = chooser.getParser(fileName)
+
+ def setOutputFileName(self, fileName):
+ self._outputFileName = fileName
+ fileNameNoExtension  = os.path.splitext(fileName)[0]
+ baseName = "%s_%d" % (fileNameNoExtension, self._randomNumber)
+ self._directory      = "%s_files" % (baseName)
+ if not os.path.exists(self._directory):
+ os.makedirs(self._directory)
+ self._sortedFileNames = os.path.join(self._directory, baseName)
+
+ def setIndex(self, boolean):
+ self._index = boolean
+
+ def setSorted(self, boolean):
+ self._sorted = boolean
+
+ def sortFile(self):
+ if self._verbosity > 2:
+ print "%s file %s..." % ("Rewriting" if self._sorted else "Sorting", self._inputFileName)
+ startTime = time.time()
+ fs = FileSorter(self._parser, self._verbosity-4)
+ fs.setPresorted(self._sorted)
+ fs.perChromosome(True)
+ fs.setOutputFileName(self._sortedFileNames)
+ fs.sort()
+ self._splittedFileNames       = fs.getOutputFileNames()
+ self._nbElementsPerChromosome = fs.getNbElementsPerChromosome()
+ self._nbElements   = fs.getNbElements()
+ endTime = time.time()
+ if self._verbosity > 2:
+ print " ...done (%ds)" % (endTime - startTime)
+
+ def createNCLists(self):
+ self._ncLists = {}
+ if self._verbosity > 2:
+ print "Creating NC-list for %s..." % (self._inputFileName)
+ startTime = time.time()
+ for chromosome, fileName in self._splittedFileNames.iteritems():
+ if self._verbosity > 3:
+ print "  chromosome %s" % (chromosome)
+ ncList = NCList(self._verbosity)
+ if self._index:
+ ncList.createIndex(True)
+ ncList.setChromosome(chromosome)
+ ncList.setFileName(fileName)
+ ncList.setNbElements(self._nbElementsPerChromosome[chromosome])
+ ncList.buildLists()
+ self._ncLists[chromosome] = ncList
+ endTime = time.time()
+ if self._verbosity > 2:
+ print " ...done (%ds)" % (endTime - startTime)
+
+ def writeOutputFile(self):
+ merger = NCListMerger(self._verbosity)
+ merger.setFileName(self._outputFileName)
+ merger.addIndex(self._index)
+ merger.setNCLists(self._ncLists)
+ merger.merge()
+
+ def cleanFiles(self):
+ shutil.rmtree(self._directory)
+
+ def run(self):
+ self.sortFile()
+ self.createNCLists()
+ self.writeOutputFile()
+ self.cleanFiles()
+
+ def getSortedFileNames(self):
+ return self._splittedFileNames
+
+ def getNbElements(self):
+ return self._nbElements
+
+ def getNbElementsPerChromosome(self):
+ return self._nbElementsPerChromosome
+
+ def getNCLists(self):
+ return self._ncLists
+
+ def getTmpDirectory(self):
+ return self._directory
+
+
+if __name__ == "__main__":
+ description = "Convert To NC-List v1.0.0: Convert a mapping or transcript file into a NC-List. [Category: NC-List]"
+
+ parser = OptionParser(description = description)
+ parser.add_option("-i", "--input",    dest="inputFileName",  action="store",   type="string",  help="Query input file [compulsory] [format: file in transcript format given by -f]")
+ parser.add_option("-f", "--format",    dest="format",   action="store",   type="string",  help="format of previous file [compulsory] [format: transcript file format]")
+ parser.add_option("-d", "--index",    dest="index",   action="store_true", default=False,   help="create an index [default: false] [format: boolean]")
+ parser.add_option("-o", "--output",    dest="outputFileName", action="store",   type="string",  help="Output file [compulsory] [format: output file in NCList format]")
+ parser.add_option("-s", "--sorted",    dest="sorted",       action="store_true", default=False,               help="input file is already sorted [format: boolean] [default: False]")
+ parser.add_option("-v", "--verbosity", dest="verbosity",   action="store",    default=1,   type="int",   help="Trace level [format: int] [default: 1]")
+ (options, args) = parser.parse_args()
+
+ ctncl = ConvertToNCList(options.verbosity)
+ ctncl.setInputFileName(options.inputFileName, options.format)
+ ctncl.setOutputFileName(options.outputFileName)
+ ctncl.setIndex(options.index)
+ ctncl.setSorted(options.sorted)
+ ctncl.run()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/FileSorter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/FileSorter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,210 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+try:
+ import cPickle as pickle
+except:
+ import pickle
+import random, os
+from heapq import heapify, heappop, heappush
+from itertools import islice, cycle
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+BUFFER_SIZE = 100 * 1024
+
+class FileSorter(object):
+
+ def __init__(self, parser, verbosity = 1):
+ self._parser   = parser
+ self._verbosity       = verbosity
+ self._chunks   = {}
+ self._nbElements   = 0
+ self._nbElementsPerChromosome = {}
+ self._perChromosome       = False
+ self._isPreSorted             = False
+ self._outputFileNames   = {}
+ self._prefix   = "tmpFile_%d" % (random.randint(0, 100000))
+ self._chromosome   = None
+ if "SMARTTMPPATH" in os.environ:
+ self._prefix = os.path.join(os.environ["SMARTTMPPATH"], self._prefix)
+
+ def selectChromosome(self, chromosome):
+ self._chromosome = chromosome
+
+ def perChromosome(self, boolean):
+ self._perChromosome = boolean
+
+ def setOutputFileName(self, fileName):
+ self._outputFileName = fileName
+ if self._perChromosome:
+ self._outputFileName = os.path.splitext(self._outputFileName)[0]
+
+ def setPresorted(self, presorted):
+ self._isPreSorted = presorted
+
+ def sort(self):
+ if not self._isPreSorted:
+ self._batchSort()
+ else:
+ self._presorted()
+
+ def _presorted(self):
+ progress = UnlimitedProgress(1000, "Writing files %s" % (self._parser.fileName), self._verbosity)
+ curChromosome = None
+ outputHandle  = None
+
+ if not self._perChromosome:
+ outputHandle = open(self._outputFileName, "wb")
+ for transcript in self._parser.getIterator():
+ progress.inc()
+ if transcript.__class__.__name__ == "Mapping":
+ transcript = transcript.getTranscript()
+ chromosome = transcript.getChromosome()
+ if self._chromosome != None and chromosome != self._chromosome:
+ continue
+ self._nbElements += 1
+ self._nbElementsPerChromosome[chromosome] = self._nbElementsPerChromosome.get(chromosome, 0) + 1
+ if self._perChromosome:
+ if chromosome != curChromosome:
+ if outputHandle != None:
+ outputHandle.close()
+ self._outputFileNames[chromosome] = "%s_%s.pkl" % (self._outputFileName, chromosome)
+ outputHandle  = open(self._outputFileNames[chromosome], "wb")
+ curChromosome = chromosome
+ outputHandle.writelines("%s" % pickle.dumps(transcript))
+ if outputHandle != None:
+ outputHandle.close()
+ progress.done()
+
+ def getNbElements(self):
+ return self._nbElements
+
+ def getNbElementsPerChromosome(self):
+ return self._nbElementsPerChromosome
+
+ def _printSorted(self, chromosome, chunk):
+ chunk.sort(key = lambda transcript: (transcript.getStart(), -transcript.getEnd()))
+ outputChunk = open("%s_%s_%06i.tmp" % (self._prefix, chromosome, len(self._chunks[chromosome])), "wb", 32000)
+ self._chunks[chromosome].append(outputChunk)
+ for transcript in chunk:
+ outputChunk.write(pickle.dumps(transcript, -1))
+ outputChunk.close()
+
+ def _merge(self, chunks):
+ values = []
+ for chunk in chunks:
+ chunk = open(chunk.name, "rb")
+ try:
+ transcript = pickle.load(chunk)
+ start    = transcript.getStart()
+ end    = -transcript.getEnd()
+ except EOFError:
+ try:
+ chunk.close()
+ chunks.remove(chunk)
+ os.remove(chunk.name)
+ except:
+ pass
+ else:
+ heappush(values, (start, end, transcript, chunk))
+ while values:
+ start, end, transcript, chunk = heappop(values)
+ yield transcript
+ try:
+ transcript = pickle.load(chunk)
+ start    = transcript.getStart()
+ end    = -transcript.getEnd()
+ except EOFError:
+ try:
+ chunk.close()
+ chunks.remove(chunk)
+ os.remove(chunk.name)
+ except:
+ pass
+ else:
+ heappush(values, (start, end, transcript, chunk))
+
+ def _batchSort(self):
+ currentChunks = {}
+ counts   = {}
+ try:
+ progress = UnlimitedProgress(1000, "Sorting file %s" % (self._parser.fileName), self._verbosity)
+ for transcript in self._parser.getIterator():
+ progress.inc()
+ if transcript.__class__.__name__ == "Mapping":
+ transcript = transcript.getTranscript()
+ chromosome = transcript.getChromosome()
+ if self._chromosome != None and chromosome != self._chromosome:
+ continue
+ if chromosome not in self._chunks:
+ self._chunks[chromosome]  = []
+ currentChunks[chromosome] = []
+ counts[chromosome] = 0
+ currentChunks[chromosome].append(transcript)
+ counts[chromosome] += 1
+ if counts[chromosome] == BUFFER_SIZE:
+ self._printSorted(chromosome, currentChunks[chromosome])
+ currentChunks[chromosome] = []
+ counts[chromosome]   = 0
+ self._nbElements += 1
+ self._nbElementsPerChromosome[chromosome] = self._nbElementsPerChromosome.get(chromosome, 0) + 1
+ for chromosome in self._chunks:
+ if counts[chromosome] > 0:
+ self._printSorted(chromosome, currentChunks[chromosome])
+ progress.done()
+ if not self._perChromosome:
+ outputHandle = open(self._outputFileName, "wb")
+ progress = Progress(len(self._chunks), "Writing sorted file %s" % (self._parser.fileName), self._verbosity)
+ for chromosome in self._chunks:
+ if self._perChromosome:
+ self._outputFileNames[chromosome] = "%s_%s.pkl" % (self._outputFileName, chromosome)
+ outputHandle = open(self._outputFileNames[chromosome], "wb")
+ for sequence in self._merge(self._chunks[chromosome]):
+ pickle.dump(sequence, outputHandle, -1)
+ if self._perChromosome:
+ outputHandle.close()
+ progress.inc()
+ if not self._perChromosome:
+ outputHandle.close()
+ progress.done()
+ finally:
+ for chunks in self._chunks.values():
+ for chunk in chunks:
+ try:
+ chunk.close()
+ os.remove(chunk.name)
+ except Exception:
+ pass
+
+ def getOutputFileNames(self):
+ return self._outputFileNames

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/FindOverlapsWithOneInterval.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/FindOverlapsWithOneInterval.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,197 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import struct
+import math
+import os
+from optparse import OptionParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from commons.core.parsing.ParserChooser import ParserChooser
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+from SMART.Java.Python.structure.Transcript import Transcript
+
+LONGSIZE = struct.calcsize('l')
+
+class FindOverlapsWithOneInterval(object):
+
+ def __init__(self, verbosity):
+ self._sortedFileName   = None
+ self._verbosity = verbosity
+ self._overlappingNames = []
+ self._nbOverlaps    = 0
+ self._nbWritten = 0
+
+ def __del__(self):
+ if self._sortedFileName and os.path.exists(self._sortedFileName):
+ os.remove(self._sortedFileName)
+
+ def close(self):
+ self._iWriter.close()
+
+ def setOutputFileName(self, fileName):
+ self._iWriter = Gff3Writer(fileName)
+
+ def setFileName(self, fileName, format):
+ chooser = ParserChooser(self._verbosity)
+ chooser.findFormat(format)
+ self._parser = chooser.getParser(fileName)
+ self._sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])
+
+ def setInterval(self, chromosome, start, end):
+ self._chromosome = chromosome
+ self._start   = start
+ self._end = end
+ self._transcript = Transcript()
+ self._transcript.setChromosome(chromosome)
+ self._transcript.setStart(start)
+ self._transcript.setEnd(end)
+ self._transcript.setDirection("+")
+
+ def setTranscript(self, transcript):
+ if transcript.__class__.__name__ == "Mapping":
+ transcript = transcript.getTranscript()
+ self._chromosome = transcript.getChromosome()
+ self._start   = transcript.getStart()
+ self._end = transcript.getEnd()
+ self._transcript = transcript
+
+ def prepareIntermediateFiles(self):
+ fs = FileSorter(self._parser, self._verbosity-4)
+ fs.selectChromosome(self._chromosome)
+ fs.perChromosome(False)
+ fs.setOutputFileName(self._sortedFileName)
+ fs.sort()
+ self._nbTotalLines = fs.getNbElements()
+ self._nbLines   = fs.getNbElementsPerChromosome()[self._chromosome]
+
+ def createNCList(self):
+ if self._verbosity > 2:
+ print "Creating NC-list..."
+ ncList = NCList(self._verbosity)
+ ncList.createIndex(True)
+ ncList.setChromosome(self._chromosome)
+ ncList.setFileName(self._sortedFileName)
+ ncList.setNbElements(self._nbTotalLines)
+ ncList.buildLists()
+ self.setNCList(ncList, ncList.getIndex())
+ if self._verbosity > 2:
+ print " ...done (%ds)" % (endTime - startTime)
+
+ def setNCList(self, ncList, index):
+ self._ncList = ncList
+ self._indix  = index
+
+ def binarySearch(self, cursor, startL, endL):
+ if startL > endL:
+ return None
+ middleL = (startL + endL) / 2
+ cursor.moveSibling(middleL)
+ overlap = self.isOverlapping(cursor)
+ if overlap == 0:
+ if middleL == startL:
+ return cursor
+ else:
+ return self.binarySearch(cursor, startL, middleL)
+ if overlap == -1:
+ return self.binarySearch(cursor, middleL + 1, endL)
+ return self.binarySearch(cursor, startL, middleL - 1)
+
+ def compare(self, cursor = None):
+ self._ncList.openFiles()
+ if cursor == None:
+ dump   = True
+ cursor = NCListCursor(None, self._ncList, 0, self._verbosity)
+ cursor._getSiblingData()
+ cursor = self.binarySearch(cursor, cursor._firstSiblingLIndex, cursor._lastSiblingLIndex)
+ if cursor == None:
+ return
+ while not cursor.isOut() and self.isOverlapping(cursor) == 0:
+ self.write(cursor)
+ newCursor = NCListCursor(cursor)
+ if newCursor.hasChildren():
+ newCursor.moveDown()
+ self.compare(newCursor)
+ if cursor.isLast():
+ return
+ cursor.moveRight()
+
+ def isOverlapping(self, cursor):
+ if self._end < cursor.getStart():
+ return 1
+ if self._start > cursor.getEnd():
+ return -1
+ return 0
+
+ def write(self, cursor):
+ self._nbOverlaps += 1
+ refTranscript = cursor.getTranscript()
+ self._overlappingNames.append(refTranscript.getName())
+
+ def dumpWriter(self):
+ if (not self._overlappingNames) or self._transcript == None:
+ return
+ self._transcript.setTagValue("nbOverlaps", len(self._overlappingNames))
+ self._transcript.setTagValue("overlapsWith", "--".join(self._overlappingNames))
+ self._iWriter.addTranscript(self._transcript)
+ self._nbWritten    += 1
+ self._overlappingNames = []
+
+ def run(self):
+ self.prepareIntermediateFiles()
+ self.createNCList()
+ self.compare()
+ self.dumpWriter()
+ self.close()
+ if self._verbosity > 0:
+ print "# refs: %d" % (self._nbLines)
+ print "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)
+
+
+if __name__ == "__main__":
+ description = "FindOverlapsWithOneInterval: Finds overlaps with one query interval."
+
+ parser = OptionParser(description = description)
+ parser.add_option("-i", "--input",    dest="inputFileName",   action="store", type="string",  help="Input file [compulsory] [format: file in transcript format given by -f]")
+ parser.add_option("-f", "--format",   dest="format", action="store", type="string",  help="Format of previous file [compulsory] [format: transcript file format]")
+ parser.add_option("-s", "--start",    dest="start",   action="store", type="int", help="The start of the query interval [compulsory] [format: int]")
+ parser.add_option("-e", "--end", dest="end", action="store", type="int", help="The end of the query interval [compulsory] [format: int]")
+ parser.add_option("-c", "--chromosome",  dest="chromosome", action="store", type="string",  help="Chromosome of the query interval [compulsory] [format: string]")
+ parser.add_option("-o", "--output",   dest="outputFileName", action="store", type="string",  help="Output file [compulsory] [format: output file in GFF3 format]")
+ parser.add_option("-v", "--verbosity",   dest="verbosity",   action="store", default=1, type="int", help="Trace level [format: int] [default: 1]")
+ (options, args) = parser.parse_args()
+
+ iFOWOI = FindOverlapsWithOneInterval(options.verbosity)
+ iFOWOI.setFileName(options.inputFileName, options.format)
+ iFOWOI.setInterval(options.chromosome, options.start, options.end)
+ iFOWOI.setOutputFileName(options.outputFileName)
+ iFOWOI.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervals.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,182 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+\n+import os, struct, time\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.ncList.NCList import NCList\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n+from SMART.Java.Python.ncList.FileSorter import FileSorter\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n+from SMART.Java.Python.ncList.FindOverlapsWithOneInterval import FindOverlapsWithOneInterval\n+\n+REFERENCE = 0\n+QUERY = 1\n+TYPETOSTRING = {0: "reference", 1: "query"}\n+\n+class FindOverlapsWithSeveralIntervals(object):\n+ \n+ def __init__(self, verbosity = 1):\n+ self._parsers = {}\n+ self._outputFileName = "outputOverlaps.gff3"\n+ self._iWriter = None\n+ self._nbLines = {REFERENCE: 0, QUERY: 0}\n+ self._verbosity = verbosity\n+ self._ncLists = {}\n+ self._sortedRefFileNames = None\n+ self._transQueryFileName = None\n+ self._cursors = {}\n+ self._iFowoi = FindOverlapsWithOneInterval(self._verbosity)\n+ \n+ def __del__(self):\n+ self.close()\n+ for fileName in (self._sortedRefFileNames, self._transQueryFileName):\n+ if os.path.exists(fileName):\n+ os.remove(fileName)\n+ \n+ def close(self):\n+ self._iFowoi.close()\n+ \n+ def setRefFileName(self, fileName, format):\n+ self.setFileName(fileName, format, REFERENCE)\n+ self._sortedRefFileNames = "%s_ref_sorted.pkl" % (os.path.splitext(fileName)[0])\n+ \n+ def setQueryFileName(self, fileName, format):\n+ self.setFileName(fileName, format, QUERY)\n+ self._transQueryFileName = "%s_query_trans.pkl" % (os.path.splitext(fileName)[0])\n+\n+ def setFileName(self, fileName, format, type):\n+ chooser = ParserChooser(self._verbosity)\n+ chooser.findFormat(format)\n+ self._parsers[type] = chooser.getParser(fileName)\n+ \n+ def setOutputFileName(self, outputFileName):\n+ self._iFowoi.setOutputFileName(outputFileName)\n+\n+ def _sortRefFile(self):\n+ fs = FileSorter(self._p'..b'\n+ self._sortRefFile()\n+ self._translateQueryFile()\n+\n+ def createNCLists(self):\n+ self._ncLists = {}\n+ self._indices = {}\n+ self._cursors = {}\n+ for chromosome, fileName in self._splittedFileNames.iteritems():\n+ if self._verbosity > 3:\n+ print " chromosome %s" % (chromosome)\n+ ncList = NCList(self._verbosity)\n+ ncList.createIndex(True)\n+ ncList.setChromosome(chromosome)\n+ ncList.setFileName(fileName)\n+ ncList.setNbElements(self._nbRefLinesPerChromosome[chromosome])\n+ ncList.buildLists()\n+ self._ncLists[chromosome] = ncList\n+ cursor = NCListCursor(None, ncList, 0, self._verbosity)\n+ self._cursors[chromosome] = cursor\n+ self._indices[chromosome] = ncList.getIndex()\n+ endTime = time.time()\n+\n+ def compare(self):\n+ progress = Progress(self._nbLines[QUERY], "Comparing data", self._verbosity-3)\n+ startTime = time.time()\n+ for cpt, queryTranscript in enumerate(self._parsers[QUERY].getIterator()):\n+ chromosome = queryTranscript.getChromosome()\n+ if chromosome not in self._ncLists:\n+ continue\n+ self._iFowoi.setNCList(self._ncLists[chromosome], self._indices[chromosome])\n+ self._iFowoi.setTranscript(queryTranscript)\n+ self._iFowoi.compare()\n+ self._iFowoi.dumpWriter()\n+ progress.inc()\n+ progress.done()\n+ endTime = time.time()\n+ self._timeSpent = endTime - startTime\n+\n+ def run(self):\n+ startTime = time.time()\n+ if self._verbosity > 2:\n+ print "Creating NC-list..."\n+ self.prepareIntermediateFiles()\n+ self.createNCLists()\n+ endTime = time.time()\n+ if self._verbosity > 2:\n+ print " ...done (%.2gs)" % (endTime - startTime)\n+ self.compare()\n+ self.close()\n+ if self._verbosity > 0:\n+ print "# queries: %d" % (self._nbLines[QUERY])\n+ print "# refs: %d" % (self._nbLines[REFERENCE])\n+ print "# written: %d (%d overlaps)" % (self._iFowoi._nbWritten, self._iFowoi._nbOverlaps)\n+ print "time: %.2gs" % (self._timeSpent)\n+\n+\n+if __name__ == "__main__":\n+ description = "FindOverlaps With Several Intervals v1.0.0: Finds overlaps with several query intervals. [Category: Data comparison]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--query", dest="inputQueryFileName", action="store", type="string", help="Query input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--queryFormat", dest="queryFormat", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--ref", dest="inputRefFileName", action="store", type="string", help="Reference input file [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--refFormat", dest="refFormat", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="Output file [compulsory] [format: output file in GFF3 format]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="Trace level [format: int] [default: 1]")\n+ (options, args) = parser.parse_args()\n+ \n+ iFWSI = FindOverlapsWithSeveralIntervals(options.verbosity)\n+ iFWSI.setRefFileName(options.inputRefFileName, options.refFormat)\n+ iFWSI.setQueryFileName(options.inputQueryFileName, options.queryFormat)\n+ iFWSI.setOutputFileName(options.outputFileName)\n+ iFWSI.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsBin.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsBin.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,204 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2011\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import random, os, os.path, time, sqlite3\n+from optparse import OptionParser\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from commons.core.writer.TranscriptWriter import TranscriptWriter\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Mapping import Mapping\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n+try:\n+ import cPickle as pickle\n+except:\n+ import pickle\n+\n+MINBIN = 3\n+MAXBIN = 7\n+\n+\n+def getBin(start, end):\n+\tfor i in range(MINBIN, MAXBIN + 1):\n+\t\tbinLevel = 10 ** i\n+\t\tif int(start / binLevel) == int(end / binLevel):\n+\t\t\treturn int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))\n+\treturn int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n+\n+def getOverlappingBins(start, end):\n+\tarray\t= []\n+\tbigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n+\tfor i in range(MINBIN, MAXBIN + 1):\n+\t\tbinLevel = 10 ** i\n+\t\tarray.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))\n+\tarray.append((bigBin, bigBin))\n+\treturn array\n+\n+\n+class FindOverlapsWithSeveralIntervalsBin(object):\n+\n+\tdef __init__(self, verbosity):\n+\t\tself.verbosity\t= verbosity\n+\t\tself.randomNumber = random.randint(0, 10000)\n+\t\tself.dbName\t = "smartdb%d" % (self.randomNumber)\n+\t\tif "SMARTTMPPATH" in os.environ:\n+\t\t\tself.dbName = os.join(os.environ["SMARTTMPPATH"], self.dbName)\n+\t\tself.connection = sqlite3.connect(self.dbName)\n+\t\tself.tableNames = {}\n+\t\tself.nbQueries = 0\n+\t\tself.nbRefs\t = 0\n+\t\tself.nbWritten = 0\n+\t\tself.nbOverlaps = 0\n+\t\tcursor = self.connection.cursor()\n+\t\tcursor.execute("PRAGMA journal_mode = OFF")\n+\t\tcursor.execute("PRAGMA synchronous = 0")\n+\t\tcursor.execute("PRAGMA locking_mode = EXCLUSIVE")\n+\t\tcursor.execute("PRAGMA count_change = OFF")\n+\t\tcursor.execute("PRAGMA temp_store = 2")\n+\n+\tdef __del__(self):\n+\t\tcursor = self.connection.cursor()\n+\t\tfor tableName in self.tableNames.values():\n+\t\t\tcursor.execute("DROP TABLE IF EXISTS %s" % (tableName))\n+\t\tif os.path.exists(self.dbName):\n+\t\t\tos.remove(self.dbName)\n+\t\t\n+\tdef createTable(self, chromosome):\n+\t\tcursor = self.connection.cursor()\n+\t\ttableName = "tmpTable_%s_%d" % (chromosome.replace("-", "_"), self.randomNumber)\n+\t\tcursor.execute("CREATE TABLE %s (start INT, end INT, transcript BLOB, bin INT)" % (tableName))\n+\t\tcursor.execute("CRE'..b'ursor\t = self.connection.cursor()\n+\t\t\tcursor.execute("INSERT INTO %s (start, end, transcript, bin) VALUES (?, ?, ?, ?)" % (self.tableNames[chromosome]), (start, end, sqlite3.Binary(transcriptString), bin))\n+\t\t\tself.nbRefs += 1\n+\t\tself.connection.commit()\n+\t\tendTime = time.time()\n+\t\tif self.verbosity > 2:\n+\t\t\tprint "\t...done (%.2gs)" % (endTime - startTime)\n+\n+\tdef setQueryFile(self, fileName, format):\n+\t\tchooser = ParserChooser(self.verbosity)\n+\t\tchooser.findFormat(format)\n+\t\tself.queryParser = chooser.getParser(fileName)\n+\t\tself.nbQueries = self.queryParser.getNbItems()\n+\n+\tdef setOutputFile(self, fileName):\n+\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+\n+\tdef compare(self):\n+\t\tprogress = Progress(self.nbQueries, "Reading queries", self.verbosity)\n+\t\tstartTime = time.time()\n+\t\tfor queryTranscript in self.queryParser.getIterator():\n+\t\t\tif queryTranscript.__class__.__name__ == "Mapping":\n+\t\t\t\tqueryTranscript = queryTranscript.getTranscript()\n+\t\t\tprogress.inc()\n+\t\t\tqueryChromosome = queryTranscript.getChromosome()\n+\t\t\tif queryChromosome not in self.tableNames:\n+\t\t\t\tcontinue\n+\t\t\tqueryStart = queryTranscript.getStart()\n+\t\t\tqueryEnd = queryTranscript.getEnd()\n+\t\t\tbins\t = getOverlappingBins(queryStart, queryEnd)\n+\t\t\tcommands = []\n+\t\t\tfor bin in bins:\n+\t\t\t\tcommand = "SELECT * FROM %s WHERE bin " % (self.tableNames[queryChromosome])\n+\t\t\t\tif bin[0] == bin[1]:\n+\t\t\t\t\tcommand += "= %d" % (bin[0])\n+\t\t\t\telse:\n+\t\t\t\t\tcommand += "BETWEEN %d AND %d" % (bin[0], bin[1])\n+\t\t\t\tcommands.append(command)\n+\t\t\tcommand = " UNION ".join(commands)\n+\t\t\tcursor = self.connection.cursor()\n+\t\t\tcursor.execute(command)\n+\t\t\toverlap = False\n+\t\t\tline\t= cursor.fetchone()\n+\t\t\twhile line:\n+\t\t\t\trefStart, refEnd, refTranscriptString, refBin = line\n+\t\t\t\tif refStart <= queryEnd and refEnd >= queryStart:\n+\t\t\t\t\trefTranscript = pickle.loads(str(refTranscriptString))\n+\t\t\t\t\tif refTranscript.overlapWith(queryTranscript):\n+\t\t\t\t\t\toverlap = True\n+\t\t\t\t\t\tself.nbOverlaps += 1\n+\t\t\t\tline = cursor.fetchone()\n+\t\t\tif overlap:\n+\t\t\t\tself.writer.addTranscript(queryTranscript)\n+\t\t\t\tself.nbWritten += 1\n+\t\tprogress.done()\n+\t\tendTime = time.time()\n+\t\tself.timeSpent = endTime - startTime\n+\n+\tdef displayResults(self):\n+\t\tprint "# queries: %d" % (self.nbQueries)\n+\t\tprint "# refs:\t %d" % (self.nbRefs)\n+\t\tprint "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)\n+\t\tprint "time:\t %.2gs" % (self.timeSpent)\n+\n+\tdef run(self):\n+\t\tself.compare()\n+\t\tself.displayResults()\n+\n+if __name__ == "__main__":\n+\t\n+\tdescription = "Find Overlaps With Several Intervals Using Bin v1.0.1: Use MySQL binning to compare intervals. [Category: Personal]"\n+\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--input1",\t dest="inputFileName1", action="store",\t\t\ttype="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n+\tparser.add_option("-f", "--format1",\t dest="format1",\t\taction="store",\t\t\ttype="string", help="format of previous file [compulsory] [format: transcript file format]")\n+\tparser.add_option("-j", "--input2",\t dest="inputFileName2", action="store",\t\t\ttype="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n+\tparser.add_option("-g", "--format2",\t dest="format2",\t\taction="store",\t\t\ttype="string", help="format of previous file [compulsory] [format: transcript file format]")\n+\tparser.add_option("-o", "--output",\t dest="outputFileName", action="store",\t\t\ttype="string", help="output file [format: output file in GFF3 format]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity",\t action="store", default=1, type="int",\thelp="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n+\n+\tfowsib = FindOverlapsWithSeveralIntervalsBin(options.verbosity)\n+\tfowsib.setQueryFile(options.inputFileName1, options.format1)\n+\tfowsib.setReferenceFile(options.inputFileName2, options.format2)\n+\tfowsib.setOutputFile(options.outputFileName)\n+\tfowsib.run()\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsIndex.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsIndex.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,137 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random, os, time, MySQLdb
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+
+class FindOverlapsWithSeveralIntervalsIndex(object):
+
+    def __init__(self, verbosity):
+        self.verbosity = verbosity
+        randomNumber   = random.randint(0, 10000)
+        self.dbName    = "smartdb"
+        if "SMARTTMPPATH" in os.environ:
+            self.dbName = os.join(os.environ["SMARTTMPPATH"], self.dbName)
+        self.db         = MySQLdb.connect(db = self.dbName)
+        self.tableName  = "table_%s" % (randomNumber)
+        self.nbQueries  = 0
+        self.nbRefs     = 0
+        self.nbOverlaps = 0
+
+    def __del__(self):
+        cursor = self.db.cursor()
+        cursor.execute("DROP TABLE IF EXISTS %s" % (self.tableName))
+
+
+    def setReferenceFile(self, fileName, format):
+        cursor = self.db.cursor()
+        cursor.execute("CREATE TABLE %s (start INT, end INT)" % (self.tableName))
+        cursor.execute("CREATE INDEX index_%s ON %s (start, end)" % (self.tableName, self.tableName))
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        parser = chooser.getParser(fileName)
+        progress = UnlimitedProgress(1000, "Reading references", self.verbosity)
+        for transcript in parser.getIterator():
+            start      = transcript.getStart()
+            end        = transcript.getEnd()
+            cursor     = self.db.cursor()
+            cursor.execute("INSERT INTO %s (start, end) VALUES (%d, %d)" % (self.tableName, start, end))
+            self.nbRefs += 1
+            progress.inc()
+        self.db.commit()
+        progress.done()
+
+    def setQueryFile(self, fileName, format):
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        self.queryParser = chooser.getParser(fileName)
+        self.nbQueries = self.queryParser.getNbTranscripts()
+
+    def setOutputFile(self, fileName):
+        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+    def compare(self):
+        progress = Progress(self.nbQueries, "Reading queries", self.verbosity)
+        startTime = time.time()
+        for queryTranscript in self.queryParser.getIterator():
+            queryStart = queryTranscript.getStart()
+            queryEnd   = queryTranscript.getEnd()
+            command    = "SELECT 1 FROM %s WHERE start <= %d and end >= %d" % (self.tableName, queryEnd, queryStart)
+            cursor     = self.db.cursor()
+            cursor.execute(command)
+            overlap = False
+            line = cursor.fetchone()
+            while line:
+                overlap = True
+                line    = cursor.fetchone()
+            if overlap:
+                self.writer.addTranscript(queryTranscript)
+                self.nbOverlaps += 1
+            progress.inc()
+        progress.done()
+        endTime = time.time()
+        self.timeSpent = endTime - startTime
+
+    def displayResults(self):
+        print "# queries:  %d" % (self.nbQueries)
+        print "# refs:     %d" % (self.nbRefs)
+        print "# overlaps: %d" % (self.nbOverlaps)
+        print "time:       %.2gs" % (self.timeSpent)
+
+    def run(self):
+        self.compare()
+        self.displayResults()
+
+if __name__ == "__main__":
+
+    description = "Find Overlaps With Several Intervals Using Indices v1.0.1: Use MySQL to compare intervals. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input1",      dest="inputFileName1", action="store",            type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format1",     dest="format1",        action="store",            type="string", help="format of previous file [compulsory] [format: transcript file format]")
+    parser.add_option("-j", "--input2",      dest="inputFileName2", action="store",            type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
+    parser.add_option("-g", "--format2",     dest="format2",        action="store",            type="string", help="format of previous file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store", default=1, type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    fowsii = FindOverlapsWithSeveralIntervalsIndex(options.verbosity)
+    fowsii.setQueryFile(options.inputFileName1, options.format1)
+    fowsii.setReferenceFile(options.inputFileName2, options.format2)
+    fowsii.setOutputFile(options.outputFileName)
+    fowsii.run()
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/FindOverlaps_naif.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/FindOverlaps_naif.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,85 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import os
+import struct
+from optparse import OptionParser
+from commons.core.parsing.GffParser import GffParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+LONGSIZE = struct.calcsize('l')
+
+class FindOverlaps_naif(object):
+
+    def __init__(self, inputRefGff3FileName, inputQueryGff3FileName):
+        self._inputRefGff3FileName = inputRefGff3FileName
+        self._inputQueryGff3FileName = inputQueryGff3FileName
+
+    def close(self):
+        self._iGff3Writer.close()
+
+    def setGff3FileName(self, fileName):
+        self._inputRefGff3FileName = fileName
+
+    def setQueryGff3FileName(self, fileName):
+        self._inputQueryGff3FileName = fileName
+
+    def setOutputGff3FileName(self, outputGff3FileName):
+        if outputGff3FileName != '':
+            self._outputGff3FileName = outputGff3FileName
+        self._iGff3Writer = Gff3Writer(self._outputGff3FileName)
+
+    def run(self):
+        queryParser = GffParser(self._inputQueryGff3FileName, 0)
+        for queryTranscript in queryParser.getIterator():
+            ids       = []
+            refParser = GffParser(self._inputRefGff3FileName, 0)
+            for refTranscript in refParser.getIterator():
+                if queryTranscript.overlapWith(refTranscript):
+                    ids.append(refTranscript.getTagValue('ID'))
+            if ids:
+                queryTranscript.setTagValue("nbOverlaps", len(ids))
+                queryTranscript.setTagValue("overlapsWith", "--".join(ids))
+                self._iGff3Writer.addTranscript(queryTranscript)
+
+if __name__ == "__main__":
+    description = "FindOverlapsWithSeveralInterval: Finds overlaps with several query intervals."
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--inputRef", dest="inputRefGff3FileName", action="store", type="string", help="Reference input file [compulsory] [format: file in gff3 format]")
+    parser.add_option("-j", "--inputQuery", dest="inputQueryGff3FileName", action="store", type="string", help="Query input file [compulsory] [format: file in gff3 format]")
+    parser.add_option("-o", "--output", dest="outputGff3FileName", action="store", type="string", help="output file [compulsory] [format: output file in gff3 format]")
+    (options, args) = parser.parse_args()
+
+    iFON = FindOverlaps_naif(options.inputRefGff3FileName, options.inputQueryGff3FileName)
+    iFON.setOutputGff3FileName(options.outputGff3FileName)
+    iFON.run()
+    iFON.close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCIndex.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCIndex.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,55 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+from SMART.Java.Python.structure.Transcript import Transcript
+
+class NCIndex(object):
+
+    def __init__(self, verbosity):
+        self._verbosity = verbosity
+        self._step      = 10000
+        self._indices   = []
+
+    def setStep(self, step):
+        self._step = step
+
+    def addTranscript(self, end, index):
+        binStart = len(self._indices)
+        binEnd   = int(end / self._step)
+        for bin in range(binStart, binEnd+1):
+            self._indices.append(index)
+
+    def getIndex(self, transcript):
+        bin = int(transcript.getStart() / self._step)
+        if bin >= len(self._indices):
+            return self._indices[-1]
+        return self._indices[bin]
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCIndex.pyc

Binary file SMART/Java/Python/ncList/NCIndex.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCList.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,337 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os, os.path\n+import struct\n+import shelve\n+import sys\n+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n+from SMART.Java.Python.ncList.NCIndex import NCIndex\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+LONG_SIZE = struct.calcsize(\'l\')\n+\n+H = 0\n+L = 1\n+T = 2\n+G = 3\n+\n+H_CELL_SIZE = 2\n+L_CELL_SIZE = 5\n+T_CELL_SIZE = 6\n+\n+START = 0\n+END\t = 1\n+ADDRESS = 2\n+LIST\t= 3\n+PARENT = 4\n+NEW\t = 5\n+LENGTH = 1\n+\n+def pack(input):\n+\treturn struct.pack("l", long(input))\n+def unpack(input):\n+\treturn struct.unpack("l", input)[0]\n+\n+\n+class NCList(object):\n+\n+\tdef __init__(self, verbosity):\n+\t\tself._verbosity\t\t = verbosity\n+\t\tself._subPos\t\t\t = 0\n+\t\tself._parentPos\t\t = 0\n+\t\tself._nbLines\t\t\t = 0\n+\t\tself._nbLists\t\t\t = 0\n+\t\tself._chromosome\t\t = None\n+\t\tself._transcriptFileName = None\n+\t\tself._lHandle\t\t\t = None\n+\t\tself._hHandle\t\t\t = None\n+\t\tself._tHandle\t\t\t = None\n+\t\tself._parser\t\t\t = None\n+\t\tself._sizeDict\t\t = {H: H_CELL_SIZE, L: L_CELL_SIZE, T: T_CELL_SIZE}\n+\t\tself._offsets\t\t\t = {H: 0, L: 0, G: 0}\n+\t\tself._fileNameDict\t = {}\n+\t\tself._handleDict\t\t = {}\n+\t\tself._createIndex\t\t = False\n+\t\tself._missingValues\t = dict([table, {}] for table in self._sizeDict)\n+\t\tself._missingValues[T][LIST] = -1\n+\t\tself._missingValues[L][LIST] = 0\n+\t\tself._missingValues[T][NEW] = -1\n+\n+\tdef __del__(self):\n+\t\tfor handle in (self._lHandle, self._hHandle):\n+\t\t\tif handle != None:\n+\t\t\t\thandle.close()\n+\n+\tdef createIndex(self, boolean):\n+\t\tself._createIndex = boolean\n+\n+\tdef setChromosome(self, chromosome):\n+\t\tself._chromosome = chromosome\n+\n+\tdef setFileName(self, fileName):\n+\t\tself._transcriptFileName = fileName\n+\t\tself._parser = NCListFileUnpickle(fileName, self._verbosity)\n+\t\tself._setFileNames(fileName)\n+\n+\tdef setNbElements(self, nbElements):\n+\t\tself._nbLines = nbElements\n+\n+\tdef setOffset(self, fileType, offset):\n+\t\tself._offsets[fileType] = offset\n+\n+\tdef _setFileNames(self, fileName):\n+\t\tif self._chromosome != None and fileName != None:\n+\t\t\tcoreName = os.path.splitext(fileName)[0]\n+\t\t\tif "SMARTTMPPATH" in os.environ:\n+\t\t\t\tcoreName = os.path.join(os.environ["SMARTTMPPATH"], coreName)\n+\t\t\tself._hFileName = "%s_H.bin" % (coreName)\n+\t\t\tself._lFileName = "%s_L.bin" % (coreName)\n+\t\t\tself._tFileName = "%s_T.bin" % (coreName)\n+\t\t\tself._fileNameDict = {H: self._hFileName, L: self._lFileName, T: self._tFileName'..b's, "Filling table T", self._verbosity-5)\n+\t\tfor i, transcript in enumerate(self._parser.getIterator()):\n+\t\t\tself._writeValue(T, i, START, transcript.getStart())\n+\t\t\tself._writeValue(T, i, END,\t transcript.getEnd())\n+\t\t\tself._writeValue(T, i, ADDRESS, self._parser.getCurrentTranscriptAddress())\n+\t\t\tself._writeValue(T, i, PARENT, -1)\n+\t\t\tself._writeValue(T, i, LIST,\t-1)\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n+\t\tprogress = Progress(self._nbLists, "Filling table H", self._verbosity-5)\n+\t\tfor i in xrange(self._nbLists):\n+\t\t\tself._writeValue(H, i, LENGTH, 0)\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n+\n+\tdef _labelLists(self):\n+\t\tprogress = Progress(self._nbLines, "Getting table structure", self._verbosity-5)\n+\t\tnextL = 0\n+\t\tfor i in xrange(self._nbLines):\n+\t\t\tp\t = i - 1\n+\t\t\tstart = self._readValue(T, i, START)\n+\t\t\tend = self._readValue(T, i, END)\n+\t\t\twhile p != -1 and (start < self._readValue(T, p, START) or end > self._readValue(T, p, END)):\n+\t\t\t\tp = self._readValue(T, p, PARENT)\n+\t\t\tthisL = self._readValue(T, p, LIST)\n+\t\t\tif thisL == -1:\n+\t\t\t\t#print "entering"\n+\t\t\t\tthisL = nextL\n+\t\t\t\tnextL += 1\n+\t\t\t\tlength = 0\n+\t\t\t\tself._writeValue(T, p, LIST, thisL)\n+\t\t\telse:\n+\t\t\t\tlength = self._readValue(H, thisL, LENGTH)\n+\t\t\tself._writeValue(T, i,\t PARENT, p)\n+\t\t\tself._writeValue(H, thisL, LENGTH, length + 1)\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n+\n+\tdef _computeSubStart(self):\n+\t\tprogress = Progress(self._nbLines, "Getting table sub-lists", self._verbosity-5)\n+\t\ttotal = 0\n+\t\tfor i in xrange(self._nbLists):\n+\t\t\tself._writeValue(H, i, START, total)\n+\t\t\ttotal += self._readValue(H, i, LENGTH)\n+\t\t\tself._writeValue(H, i, LENGTH, 0)\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n+\n+\tdef _computeAbsPosition(self):\n+\t\tprogress = Progress(self._nbLines, "Writing table", self._verbosity-5)\n+\t\tself._sizeFirstList = 0\n+\t\tfor i in xrange(self._nbLines):\n+\t\t\ts = self._readValue(T, i, START)\n+\t\t\te = self._readValue(T, i, END)\n+\t\t\ta = self._readValue(T, i, ADDRESS)\n+\t\t\tpt = self._readValue(T, i, PARENT)\n+\t\t\th = self._readValue(T, pt, LIST)\n+\t\t\tpl = self._readValue(T, pt, NEW)\n+\t\t\tnb = self._readValue(H, h, LENGTH)\n+\t\t\tl = self._readValue(H, h, START) + nb\n+\t\t\tself._writeValue(T, i, NEW,\t l)\n+\t\t\tself._writeValue(L, l, START, s)\n+\t\t\tself._writeValue(L, l, END,\t e)\n+\t\t\tself._writeValue(L, l, ADDRESS, a)\n+\t\t\tself._writeValue(L, l, LIST,\t-1)\n+\t\t\tself._writeValue(L, l, PARENT, pl)\n+\t\t\tself._writeValue(H, h, LENGTH, nb+1)\n+\t\t\tif nb == 0:\n+\t\t\t\t#print "adding it"\n+\t\t\t\tself._writeValue(L, pl, LIST, h)\n+\t\t\tif pl == -1:\n+\t\t\t\tself._sizeFirstList += 1\n+\t\t\t\tif self._createIndex:\n+\t\t\t\t\tself._index.addTranscript(e, l)\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n+\n+\tdef closeFiles(self):\n+\t\tfor handle in self._handleDict.values():\n+\t\t\thandle.close()\n+\t\tdel self._handleDict\n+\t\tself._lHandle = None\n+\t\tself._hHandle = None\n+\t\tself._tHandle = None\n+\t\tself._parser = None\n+\n+\tdef openFiles(self):\n+\t\tself._lHandle = open(self._fileNameDict[L], "rb")\n+\t\tself._hHandle = open(self._fileNameDict[H], "rb")\n+\t\tself._handleDict = {H: self._hHandle, L: self._lHandle}\n+\t\tself._parser = NCListFileUnpickle(self._transcriptFileName, self._verbosity)\n+\n+\tdef _cleanFiles(self):\n+\t\tself.closeFiles()\n+\t\tos.remove(self._fileNameDict[T])\n+\n+\tdef _getPosition(self, table, line, key):\n+\t\thandle = self._handleDict[table]\n+\t\thandle.seek(self._sizeDict[table] * line * LONG_SIZE + key * LONG_SIZE)\n+\t\treturn handle\n+\n+\tdef _writeValue(self, table, line, key, value):\n+\t\t#print "writing", table, line, key, "<-", value\n+\t\tif line == -1:\n+\t\t\tself._missingValues[table][key] = value\n+\t\t\treturn\n+\t\thandle = self._getPosition(table, line, key)\n+\t\thandle.write(pack(value))\n+\n+\tdef _readValue(self, table, line, key):\n+\t\t#print "reading", table, line, key, "->",\n+\t\tif line == -1:\n+\t\t\t#print self._missingValues[table][key]\n+\t\t\treturn self._missingValues[table][key]\n+\t\thandle = self._getPosition(table, line, key)\n+\t\tr = unpack(handle.read(LONG_SIZE))\n+\t\t#print r\n+\t\treturn r\n+\n+\tdef getIndex(self):\n+\t\treturn self._index\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCList.pyc

Binary file SMART/Java/Python/ncList/NCList.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCListCursor.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCListCursor.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,325 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os, os.path, struct\n+from commons.core.parsing.GffParser import GffParser\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+\n+class Data(object):\n+ def __init__(self, hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end):\n+ self.hIndex = hIndex\n+ self.transcript = transcript\n+ self.firstChildLIndex = firstChildLIndex\n+ self.lastChildLIndex = lastChildLIndex\n+ self.start = start\n+ self.end = end\n+\n+class NCListCursor(object):\n+\n+ def __init__(self, cursor = None, ncList = None, lIndex = 0, verbosity = 0):\n+ self._verbosity = verbosity\n+ self._mainListData = []\n+ if cursor:\n+ self.copy(cursor)\n+ else:\n+ self._ncList = ncList\n+ self.setLIndex(lIndex)\n+\n+ def setLIndex(self, lIndex):\n+ self._lIndex = lIndex\n+ self._start = None\n+ self._end = None\n+ self._hIndex = None\n+ self._gffIndex = None\n+ self._parentGffIndex = None\n+ self._parentLIndex = None\n+ self._parentHIndex = None\n+ self._parentStart = None\n+ self._parentEnd = None\n+ self._transcript = None\n+ self._firstSiblingLIndex = None\n+ self._lastSiblingLIndex = None\n+ self._firstChildLIndex = None\n+ self._lastChildLIndex = None\n+ self._mainListIndex = lIndex if lIndex < self._ncList.getSizeFirstList() else None\n+\n+ def precompute(self):\n+ self._mainListIndex = 0\n+ progress = Progress(self._ncList.getSizeFirstList(), "Precomputing data", self._verbosity)\n+ for i in range(self._ncList.getSizeFirstList()):\n+ gffIndex, hIndex, parentLIndex, start, end = self._ncList.getLLineElements(i)\n+ transcript = self._ncList.getIntervalFromAdress(gffIndex)\n+ firstChildLIndex, nbChildren = self._ncList.getHLineElements(hIndex)\n+ lastChildLIndex = -1 if firstChildLIndex == -1 else firstChildLIndex + nbChildren-1\n+ self._mainListData.append(Data(hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end))\n+ progress.inc()\n+ progress.done()\n+\n+ def _updateFromMainListData(self):\n+ if not self._mainListData or self._lIndex >= self._ncList.getSizeFirstList():\n+ #p'..b'+ self._gffIndex = None\n+ self._transcript = None\n+ self._firstChildLIndex = None\n+ self._lastChildLIndex = None\n+\n+ def moveSibling(self, lIndex):\n+ if self._lIndex < self._ncList.getSizeFirstList() - 1:\n+ self._mainListIndex = lIndex\n+ self._updateFromMainListData()\n+ self._lIndex = lIndex\n+ self._hIndex = None\n+ self._start = None\n+ self._end = None\n+ self._gffIndex = None\n+ self._transcript = None\n+ self._firstChildLIndex = None\n+ self._lastChildLIndex = None\n+\n+ def moveLastSibling(self):\n+ if self._lIndex < self._ncList.getSizeFirstList() - 1:\n+ self._mainListIndex = self._ncList.getSizeFirstList() - 1\n+ self._updateFromMainListData()\n+ if self._lastSiblingLIndex == None:\n+ self._getSiblingData()\n+ self._lIndex = self._lastSiblingLIndex\n+ self._hIndex = None\n+ self._start = None\n+ self._end = None\n+ self._gffIndex = None\n+ self._transcript = None\n+ self._firstChildLIndex = None\n+ self._lastChildLIndex = None\n+\n+ def moveDown(self):\n+ if self._firstChildLIndex == None:\n+ self._getChildrenData()\n+ self._parentLIndex = self._lIndex\n+ self._parentHIndex = self._hIndex\n+ self._parentGffIndex = self._gffIndex\n+ self._lIndex = self._firstChildLIndex\n+ self._lastSiblingLIndex = self._lastChildLIndex\n+ self._hIndex = None\n+ self._gffIndex = None\n+ self._transcript = None\n+ self._firstChildLIndex = None\n+ self._lastChildLIndex = None\n+ self._parentStart = self._start\n+ self._parentEnd = self._end\n+ self._start = None\n+ self._end = None\n+\n+ def isOut(self):\n+ return (self._lIndex == -1)\n+\n+ def isTop(self):\n+ if self._parentLIndex == None:\n+ self._getCurrentData()\n+ return (self._parentLIndex == -1)\n+\n+ def hasChildren(self):\n+ if self._hIndex == None:\n+ self._getCurrentData()\n+ if self._hIndex == -1:\n+ return False\n+ if self._firstChildLIndex == None:\n+ self._getChildrenData()\n+ return (self._firstChildLIndex != -1)\n+\n+ def copy(self, cursor):\n+ self._ncList = cursor._ncList\n+ self._lIndex = cursor._lIndex\n+ self._hIndex = cursor._hIndex\n+ self._gffIndex = cursor._gffIndex\n+ self._parentLIndex = cursor._parentLIndex\n+ self._parentHIndex = cursor._parentHIndex\n+ self._parentGffIndex = cursor._parentGffIndex\n+ self._transcript = cursor._transcript\n+ self._firstSiblingLIndex = cursor._firstSiblingLIndex\n+ self._lastSiblingLIndex = cursor._lastSiblingLIndex\n+ self._firstChildLIndex = cursor._firstChildLIndex\n+ self._lastChildLIndex = cursor._lastChildLIndex\n+ self._mainListData = cursor._mainListData\n+ self._mainListIndex = cursor._mainListIndex\n+ self._verbosity = cursor._verbosity\n+ self._parentStart = cursor._parentStart\n+ self._parentEnd = cursor._parentEnd\n+ self._start = cursor._start\n+ self._end = cursor._end\n+\n+ def __str__(self):\n+ return "NC-list: %s, Lindex: %s, Hindex: %s, GFFindex: %s, start: %s, end: %s, parent Lindex: %s, parent Hindex: %s, parent GFFindex: %s, transcript: %s, last sibling: %s" % (self._ncList, self._lIndex, self._hIndex, self._gffIndex, self._start, self._end, self._parentLIndex, self._parentHIndex, self._parentGffIndex, self._transcript, self._lastSiblingLIndex)\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCListCursor.pyc

Binary file SMART/Java/Python/ncList/NCListCursor.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCListFilePickle.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCListFilePickle.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,123 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+try:
+ import cPickle as pickle
+except:
+ import pickle
+from SMART.Java.Python.structure.Transcript import Transcript
+
+
+class NCListFilePickle(object):
+
+ def __init__(self, fileName, verbosity = 1):
+ self.fileName  = fileName
+ self.handle = open(fileName, "wb")
+ self.verbosity = verbosity
+
+ def __del__(self):
+ if self.handle != None:
+ self.handle.close()
+
+ def addTranscript(self, transcript):
+ pickle.dump(transcript, self.handle, -1)
+
+ def write(self):
+ pass
+
+ def close(self):
+ self.__del__()
+
+
+class NCListFileUnpickle(object):
+
+ def __init__(self, fileName, verbosity = 1):
+ self.handle    = open(fileName, "rb")
+ self.verbosity    = verbosity
+ self.initAddress   = 0
+ self.address    = self.initAddress
+ self.nbTranscripts = None
+ self.fileName    = fileName
+ self.over    = False
+ self.chromosome    = None
+
+ def __del__(self):
+ if self.handle != None:
+ self.handle.close()
+
+ def reset(self):
+ self.handle.seek(0)
+ self.initAddress = 0
+
+ def setChromosome(self, chromosome):
+ self.chromosome = chromosome
+
+ def getNbTranscripts(self):
+ if self.nbTranscripts != None:
+ return self._nbTranscripts
+ self.nbTranscripts = 0
+ for transcript in self.getIterator():
+ self_nbTranscripts += 1
+ return self.nbTranscripts
+
+ def gotoAddress(self, address):
+ self.handle.seek(address)
+ self.address = address
+
+ def getNextTranscript(self):
+ self.address = self.handle.tell()
+ try:
+ transcript = pickle.load(self.handle)
+ if self.chromosome != None and transcript.getChromosome() != self.chromosome:
+ self.over = True
+ return False
+ return transcript
+ except EOFError:
+ self.over = True
+ return False
+
+ def getIterator(self):
+ self.gotoAddress(self.initAddress)
+ while True:
+ transcript = self.getNextTranscript()
+ if not transcript:
+ self.over = True
+ return
+ yield transcript
+
+ def setInitAddress(self, address):
+ self.initAddress = address
+
+ def getCurrentTranscriptAddress(self):
+ return self.address
+
+ def isOver(self):
+ return self.over

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCListFilePickle.pyc

Binary file SMART/Java/Python/ncList/NCListFilePickle.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCListHandler.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCListHandler.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,125 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import struct
+try:
+ import cPickle as pickle
+except:
+ import pickle
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.NCIndex import NCIndex
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
+
+LONG_SIZE = struct.calcsize('l')
+
+INFO_PER_NCLIST = 5
+H_FILE     = 0
+L_FILE     = 1
+G_FILE     = 2
+FIRST_LIST_SIZE = 3
+INDEX     = 4
+
+H = 0
+L = 1
+T = 2
+G = 3
+
+def pack(input):
+ return struct.pack("l", long(input))
+def unpack(input):
+ return struct.unpack("l", input)[0]
+
+
+class NCListHandler(object):
+
+ def __init__(self, verbosity):
+ self._verbosity = verbosity
+ self._index     = False
+
+ def setFileName(self, fileName):
+ self._fileName = fileName
+ self._handle   = open(fileName, "rb")
+
+ def loadData(self):
+ self._chromosomes = pickle.load(self._handle)
+ self._nbElements = 0
+ self._nbElementsPerChromosome = {}
+ self._ncLists = {}
+ for chromosome in self._chromosomes:
+ self._nbElementsPerChromosome[chromosome] = unpack(self._handle.read(LONG_SIZE))
+ self._nbElements += self._nbElementsPerChromosome[chromosome]
+ self._headerPos = self._handle.tell()
+ for i, chromosome in enumerate(self._chromosomes):
+ ncList = NCList(self._verbosity)
+ ncList._hHandle = self._handle
+ ncList._lHandle = self._handle
+ ncList._parser  = NCListFileUnpickle(self._fileName)
+ self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + H_FILE * LONG_SIZE)
+ ncList.setOffset(H, unpack(self._handle.read(LONG_SIZE)))
+ self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + L_FILE * LONG_SIZE)
+ ncList.setOffset(L, unpack(self._handle.read(LONG_SIZE)))
+ self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + G_FILE * LONG_SIZE)
+ ncList.setOffset(G, unpack(self._handle.read(LONG_SIZE)))
+ self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + FIRST_LIST_SIZE * LONG_SIZE)
+ ncList._sizeFirstList = unpack(self._handle.read(LONG_SIZE))
+ self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + INDEX * LONG_SIZE)
+ indices = unpack(self._handle.read(LONG_SIZE))
+ if indices != -1:
+ self._handle.seek(indices)
+ data = pickle.load(self._handle)
+ index = NCIndex(self._verbosity)
+ index._indices = data
+ ncList._index = index
+ self._ncLists[chromosome] = ncList
+
+ def getChromosomes(self):
+ return self._chromosomes
+
+ def getNbElements(self):
+ return self._nbElements
+
+ def getNbElementsPerChromosome(self):
+ return self._nbElementsPerChromosome
+
+ def getNCLists(self):
+ return self._ncLists
+
+ def getParser(self, chromosome = None):
+ parser = NCListFileUnpickle(self._fileName)
+ if chromosome == None:
+ parser.setInitAddress(unpack(self._handle, self._headerPos + G_FILE * LONG_SIZE))
+ return parser
+ i = self._chromosomes.index(chromosome)
+ self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + G_FILE * LONG_SIZE)
+ pos = unpack(self._handle.read(LONG_SIZE))
+ parser.setInitAddress(pos)
+ parser.setChromosome(chromosome)
+ return parser

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCListMerger.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCListMerger.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,126 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import struct, os, shutil
+try:
+ import cPickle as pickle
+except:
+ import pickle
+
+LONG_SIZE = struct.calcsize('l')
+
+INFO_PER_NCLIST = 5
+H_FILE     = 0
+L_FILE     = 1
+G_FILE     = 2
+FIRST_LIST_SIZE = 3
+INDEX     = 4
+
+def pack(input):
+ return struct.pack("l", long(input))
+def unpack(input):
+ return struct.unpack("l", input)[0]
+
+
+class NCListMerger(object):
+
+ def __init__(self, verbosity):
+ self._verbosity = verbosity
+ self._index = False
+
+ def setFileName(self, fileName):
+ self._handle = open(fileName, "wb")
+
+ def setNCLists(self, ncLists):
+ self._ncLists = ncLists
+ self._chromosomes = sorted(self._ncLists.keys())
+
+ def addIndex(self, boolean):
+ self._index = boolean
+
+ def merge(self):
+ self._writeHeader()
+ self._addNCLists()
+ self._handle.close()
+ self._removeInputFiles()
+
+ def _writeHeader(self):
+ pickle.dump(self._chromosomes, self._handle, -1)
+ for chromosome in self._chromosomes:
+ self._handle.write(pack(self._ncLists[chromosome]._nbLines))
+ self._headerPos = self._handle.tell()
+ for chromosome in self._chromosomes:
+ for i in range(INFO_PER_NCLIST):
+ self._handle.write(pack(-1))
+
+ def _addInHeader(self, i, info, value = None):
+ currentPos = self._handle.tell()
+ if value == None:
+ value = currentPos
+ self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + info * LONG_SIZE)
+ self._handle.write(pack(value))
+ self._handle.seek(currentPos)
+
+ def _addNCLists(self):
+ self._inputFileNames = []
+ for i, chromosome in enumerate(self._chromosomes):
+ ncList = self._ncLists[chromosome]
+ self._addInHeader(i, H_FILE)
+ hFile = open(ncList._hFileName)
+ shutil.copyfileobj(hFile, self._handle)
+ hFile.close()
+ self._inputFileNames.append(ncList._hFileName)
+ for i, chromosome in enumerate(self._chromosomes):
+ ncList = self._ncLists[chromosome]
+ self._addInHeader(i, L_FILE)
+ lFile = open(ncList._lFileName)
+ shutil.copyfileobj(lFile, self._handle)
+ lFile.close()
+ self._inputFileNames.append(ncList._lFileName)
+ for i, chromosome in enumerate(self._chromosomes):
+ ncList = self._ncLists[chromosome]
+ self._addInHeader(i, FIRST_LIST_SIZE, ncList.getSizeFirstList())
+ if self._index:
+ for i, chromosome in enumerate(self._chromosomes):
+ ncList = self._ncLists[chromosome]
+ self._addInHeader(i, INDEX)
+ pickle.dump(ncList.getIndex()._indices, self._handle, -1)
+ for i, chromosome in enumerate(self._chromosomes):
+ ncList = self._ncLists[chromosome]
+ self._addInHeader(i, G_FILE)
+ tFile = open(ncList._transcriptFileName)
+ shutil.copyfileobj(tFile, self._handle)
+ tFile.close()
+ self._inputFileNames.append(ncList._transcriptFileName)
+
+ def _removeInputFiles(self):
+ for fileName in self._inputFileNames:
+ os.remove(fileName)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/NCListParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCListParser.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,74 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import random, os, time
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+try:
+   import cPickle as pickle
+except:
+   import pickle
+
+class NCListParser(object):
+
+    def __init__(self, fileName, verbosity = 1):
+        self._fileName                = fileName
+        self._ncLists                 = {}
+        self._sortedFileNames         = {}
+        self._nbElements              = 0
+        self._nbElementsPerChromosome = {}
+        self._verbosity               = verbosity
+
+    def parse(self):
+        handle                        = open(self._fileName)
+        self._sortedFileNames         = pickle.load(handle)
+        self._nbElements              = pickle.load(handle)
+        self._nbElementsPerChromosome = pickle.load(handle)
+        self._ncLists                 = pickle.load(handle)
+        for ncList in self._ncLists.values():
+            ncList._reopenFiles()
+        handle.close()
+
+    def getSortedFileNames(self):
+        return self._sortedFileNames
+
+    def getNbElements(self):
+        return self._nbElements
+
+    def getNbElementsPerChromosome(self):
+        return self._nbElementsPerChromosome
+
+    def getNCLists(self):
+        return self._ncLists

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/__init__.pyc

Binary file SMART/Java/Python/ncList/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/MockFindOverlapsWithSeveralIntervals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/MockFindOverlapsWithSeveralIntervals.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,197 @@\n+from SMART.Java.Python.misc import Utils\n+\n+class MockFindOverlapsWithOneInterval (object) :\n+ def write(self, inFileName):\n+ Utils.writeFile(inFileName, "chr1\\ttest\\ttest1.1\\t0\\t1000\\t.\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n+\n+class MockFindOverlapsWithServeralIntervals_case1 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName, "w")\n+\t\tf.write("chr1\\ttest\\ttest1.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n+\t\tf.write("chr1\\ttest\\ttest1.2\\t50\\t350\\t301\\t+\\t.\\tID=test1.2;Name=test1.2\\n")\n+\t\tf.write("chr1\\ttest\\ttest1.3\\t100\\t600\\t501\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n+\t\tf.write("chr1\\ttest\\ttest1.4\\t200\\t450\\t251\\t+\\t.\\tID=test1.4;Name=test1.4\\n")\n+\t\tf.write("chr1\\ttest\\ttest1.5\\t700\\t950\\t251\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n+\t\tf.write("chr1\\ttest\\ttest1.6\\t800\\t900\\t101\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n+\t\tf.write("chr1\\ttest\\ttest1.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test1.7;Name=test1.7\\n")\n+\t\tf.close()\n+\n+class MockFindOverlapsWithServeralIntervals_case2 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName,\'w\')\n+\t\tf.write("chr1\\ttest\\ttest2.1\\t0\\t500\\t501\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n+\t\tf.write("chr1\\ttest\\ttest2.2\\t50\\t450\\t401\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n+\t\tf.write("chr1\\ttest\\ttest2.3\\t100\\t400\\t301\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n+\t\tf.write("chr1\\ttest\\ttest2.4\\t100\\t200\\t101\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n+\t\tf.write("chr1\\ttest\\ttest2.5\\t900\\t1200\\t301\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n+\t\tf.close()\n+\n+class MockFindOverlapsWithServeralIntervals_case3 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName,\'w\')\n+\t\tf.write("chr1\\ttest\\ttest3.1\\t0\\t500\\t501\\t+\\t.\\tID=test3.1;Name=test3.1\\n")\n+\t\tf.write("chr1\\ttest\\ttest3.2\\t50\\t450\\t401\\t+\\t.\\tID=test3.2;Name=test3.2\\n")\n+\t\tf.write("chr1\\ttest\\ttest3.3\\t100\\t400\\t301\\t+\\t.\\tID=test3.3;Name=test3.3\\n")\n+\t\tf.write("chr1\\ttest\\ttest3.4\\t100\\t200\\t101\\t+\\t.\\tID=test3.4;Name=test3.4\\n")\n+\t\tf.write("chr1\\ttest\\ttest3.5\\t300\\t400\\t101\\t+\\t.\\tID=test3.5;Name=test3.5\\n")\n+\t\tf.write("chr1\\ttest\\ttest3.6\\t800\\t1000\\t201\\t+\\t.\\tID=test3.6;Name=test3.6\\n")\n+\t\tf.close()\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_case4_5 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName,\'w\')\n+\t\tf.write("chr1\\ttest\\ttest4.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test4.1;Name=test4.1\\n")\n+\t\tf.write("chr1\\ttest\\ttest4.2\\t200\\t800\\t601\\t+\\t.\\tID=test4.2;Name=test4.2\\n")\n+\t\tf.write("chr1\\ttest\\ttest4.3\\t400\\t600\\t201\\t+\\t.\\tID=test4.3;Name=test4.3\\n")\n+\t\tf.close()\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_case6_7 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName,\'w\')\n+\t\tf.write("chr1\\ttest\\ttest6.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test6.1;Name=test6.1\\n")\n+\t\tf.write("chr1\\ttest\\ttest6.2\\t100\\t300\\t201\\t+\\t.\\tID=test6.2;Name=test6.2\\n")\n+\t\tf.write("chr1\\ttest\\ttest6.3\\t400\\t500\\t101\\t+\\t.\\tID=test6.3;Name=test6.3\\n")\n+\t\tf.write("chr1\\ttest\\ttest6.4\\t510\\t520\\t11\\t+\\t.\\tID=test6.4;Name=test6.4\\n")\n+\t\tf.write("chr1\\ttest\\ttest6.5\\t850\\t950\\t001\\t+\\t.\\tID=test6.5;Name=test6.5\\n")\n+\t\tf.close()\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_case8 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName,\'w\')\n+\t\tf.write("chr1\\ttest\\ttest8.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test8.1;Name=test8.1\\n")\n+\t\tf.write("chr1\\ttest\\ttest8.2\\t100\\t200\\t101\\t+\\t.\\tID=test8.2;Name=test8.2\\n")\n+\t\tf.write("chr1\\ttest\\ttest8.3\\t300\\t400\\t101\\t+\\t.\\tID=test8.3;Name=test8.3\\n")\n+\t\tf.close()\t\t\n+\n+class MockFindOverlapsWithServeralIntervals_case9 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName,\'w\')\n+\t\tf.write("chr1\\ttest\\ttest9.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test9.1;Name=test9.1\\n")\n+\t\tf.write("chr1\\ttest\\ttest9.2\\t600\\t700\\t101\\t+\\t.\\tID=test9.2;Name=test9.2\\n")\n+\t\tf.write("chr1\\ttest\\ttest9.3\\t800\\t1200\\t401\\t+\\t.\\tID=test9.3;Name=test9.3\\n")\n+\t\tf.close()\n+\n+class MockFindOverlapsWithServeralIntervals_case10 (object) :\n+\tdef write(self,inFileName):\n+\t\tf = open(inFileName,\'w\')\n+\t\tf.write("chr1\\ttest\\ttest10.1\\t0\\t1000\\t1001\\t+'..b'ose()\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case2 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery2.1\\t150\\t300\\t151\\t+\\t.\\tID=query_1;Name=query2.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery2.2\\t300\\t450\\t151\\t+\\t.\\tID=query_2;Name=query2.2\\n")\n+\t\tf.write("chr1\\tquery\\tquery2.3\\t480\\t800\\t321\\t+\\t.\\tID=query_3;Name=query2.3\\n")\n+\t\tf.write("chr1\\tquery\\tquery2.4\\t560\\t800\\t241\\t+\\t.\\tID=query_4;Name=query2.4\\n")\n+\t\tf.write("chr1\\tquery\\tquery2.5\\t850\\t1000\\t151\\t+\\t.\\tID=query_5;Name=query2.5\\n")\n+\t\tf.close()\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case3 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery3.1\\t150\\t250\\t101\\t+\\t.\\tID=query_1;Name=query3.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery3.2\\t380\\t400\\t21\\t+\\t.\\tID=query_2;Name=query3.2\\n")\n+\t\tf.write("chr1\\tquery\\tquery3.3\\t480\\t520\\t41\\t+\\t.\\tID=query_3;Name=query3.3\\n")\n+\t\tf.write("chr1\\tquery\\tquery3.4\\t510\\t700\\t191\\t+\\t.\\tID=query_4;Name=query3.4\\n")\n+\t\tf.write("chr1\\tquery\\tquery3.5\\t900\\t950\\t41\\t+\\t.\\tID=query_5;Name=query3.5\\n")\n+\t\tf.close()\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case4 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery4.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=query4.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery4.2\\t450\\t600\\t151\\t+\\t.\\tID=query_2;Name=query4.2\\n")\n+\t\tf.write("chr1\\tquery\\tquery4.3\\t700\\t800\\t101\\t+\\t.\\tID=query_3;Name=query4.3\\n")\n+\t\tf.close()\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case5 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery5.1\\t850\\t950\\t101\\t+\\t.\\tID=query_1;Name=query5.1\\n")\n+\t\tf.close()\t\t\t\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case6 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery6.1\\t200\\t300\\t101\\t+\\t.\\tID=query_1;Name=query6.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery6.2\\t800\\t900\\t101\\t+\\t.\\tID=query_2;Name=query6.2\\n")\n+\t\tf.close()\t\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case7 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery6.1\\t530\\t550\\t21\\t+\\t.\\tID=query_1;Name=query6.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery6.2\\t600\\t700\\t101\\t+\\t.\\tID=query_2;Name=query6.2\\n")\n+\t\tf.write("chr1\\tquery\\tquery6.3\\t650\\t900\\t251\\t+\\t.\\tID=query_3;Name=query6.3\\n")\n+\t\tf.close()\t\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case8 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery7.1\\t500\\t600\\t101\\t+\\t.\\tID=query_1;Name=query7.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery7.2\\t700\\t800\\t101\\t+\\t.\\tID=query_2;Name=query7.2\\n")\n+\t\tf.write("chr1\\tquery\\tquery7.3\\t900\\t1100\\t201\\t+\\t.\\tID=query_3;Name=query7.3\\n")\n+\t\tf.write("chr1\\tquery\\tquery7.4\\t1200\\t1300\\t101\\t+\\t.\\tID=query_4;Name=query7.4\\n")\n+\t\tf.close()\t\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case9 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery8.1\\t400\\t400\\t101\\t+\\t.\\tID=query_1;Name=query8.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery8.2\\t550\\t650\\t101\\t+\\t.\\tID=query_2;Name=query8.2\\n")\n+\t\tf.close()\t\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case10 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery10.1\\t700\\t800\\t101\\t+\\t.\\tID=query_1;Name=query10.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery10.2\\t900\\t1000\\t101\\t+\\t.\\tID=query_2;Name=query10.2\\n")\n+\t\tf.write("chr1\\tquery\\tquery10.3\\t1100\\t1300\\t201\\t+\\t.\\tID=query_3;Name=query10.3\\n")\n+\t\tf.close()\t\t\t\n+\t\t\n+class MockFindOverlapsWithServeralIntervals_query_case11 (object):\n+\tdef write(self, fileName):\n+\t\tf = open(fileName, \'w\')\n+\t\tf.write("chr1\\tquery\\tquery11.1\\t420\\t480\\t61\\t+\\t.\\tID=query_1;Name=query11.1\\n")\n+\t\tf.write("chr1\\tquery\\tquery11.2\\t450\\t715\\t266\\t+\\t.\\tID=query_2;Name=query11.2\\n")\n+\t\tf.close()\t\t\n+\t\t\n+\t\t\t\t\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,118 @@
+import os
+import random
+from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.parsing.GffParser import GffParser
+
+class MockFindOverlaps_randomExample(object):
+
+    def __init__(self, fileName, ID, numberOfReads, chromSize):
+        self._fileName = fileName
+        self._ID = ID
+        self._numberOfReads = numberOfReads
+        self._chromSize = chromSize
+
+    def write(self):
+        iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize)
+        iMFO_RE.write()
+        cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName)
+        os.system(cmd)
+
+class MockFindOverlaps_randomExample_NonOrder(object):
+
+ def __init__(self, fileName, ID, numberOfReads, chromSize):
+ self._fileName = fileName
+ self._ID = ID
+ self._numberOfReads = numberOfReads
+ self._chromSize = chromSize
+
+ def write(self):
+ iRRG = RandomRegionsGenerator(2)
+ iRRG.setMinSize(36)
+ iRRG.setMaxSize(100)
+ iRRG.setGenomeSize(self._chromSize)
+ iRRG.setChromosomeName("chr1")
+ iRRG.setStrands(False)
+ iRRG.setNumber(self._numberOfReads)
+ iRRG.setOutputFile(self._fileName)
+ iRRG.run()
+
+
+class MockFindOverlaps_randomExample_MOverlaps(object):
+
+ def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize):
+ self._refFileName = refFileName
+ self._queryFileName = queryFileName
+ self._overlapNumber = overlapNumber
+ self._numberOfReads = numberOfReads
+ self._chromSize = chromSize
+
+ def createRandomExample(self):
+ id = 'reference'
+ iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize)
+ iRSS.write()
+ self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3')
+ totalOverlap = 0
+ while totalOverlap != self._overlapNumber:
+ totalOverlap = 0
+ i = 0
+ while i < 10:
+ query = self.createRandomTranscript(i, id)
+ overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
+ while overlapNumber > self._overlapNumber:
+ query = self.createRandomTranscript(i, id)
+ overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
+ totalOverlap = overlapNumber
+ i += 1
+ self.queryWriter.addTranscript(query)
+ self.queryWriter.write()
+ self.queryWriter.close()
+# os.rename("%s.gff3" % (self._queryFileName), self._queryFileName)
+
+ cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName)
+ os.system(cmd)
+ cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName)
+ os.system(cmd)
+
+ def createRandomTranscript(self, cpt, id):
+ iRRG = RandomRegionsGenerator(2)
+ strand = '+'
+ chromosome = 'chr1'
+ size = random.randint(36, 100)
+ iRRG.setSize(size)
+ start = random.randint(0, 1000-size)
+ transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt)
+ IDdetail = '%s_%d'%(id,cpt)
+ transcript.setTagValue('ID', IDdetail)
+ transcript.setName(IDdetail)
+ return transcript
+
+ def isOverlap(self, query, ref):
+ if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()):
+ return True
+ else:
+ return False
+
+ def getIntervalFromAdress(self, fileName, address):
+ iParser = GffParser(fileName)
+ iParser.gotoAddress(int(address))
+ iTranscrit = iParser.getNextTranscript()
+ iParser.close()
+ return iTranscrit
+
+ def getOverlapNumber(self, query, refFileName, totalOverlap):
+ count = totalOverlap
+ fRef = open(refFileName, 'r')
+ address = fRef.tell()
+ line = fRef.readline()
+ while line != '':
+ ref = self.getIntervalFromAdress(refFileName, address)
+ if self.isOverlap(query, ref):
+ count += 1
+ address = fRef.tell()
+ line = fRef.readline()
+ fRef.close()
+ return count
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_F_FileSorter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_F_FileSorter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,84 @@
+import os
+import unittest
+import struct
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.writer.Gff3Writer import Gff3Writer
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
+
+
+class Test_F_FileSorter(unittest.TestCase):
+
+    def setUp(self):
+        self._inputGff3FileName = 'inputFile.gff3'
+        self._outputFileName    = 'outputFile.pkl'
+
+    def tearDown(self):
+        return
+        for fileName in (self._inputGff3FileName, self._sortedFileName, self._expHFileName, self._expLFileName, self._obsHFileName, self._obsLFileName, self._addressFileName):
+            if os.path.exists(fileName):
+                os.remove(fileName)
+
+    def test_unique(self):
+        transcript = self._createTranscript("chr1", 100, 200, "test1.1")
+        parser     = self._writeAndSortAndParse([transcript])
+        self.assertEquals(parser.getNbTranscripts(), 1)
+        for transcript in parser.getIterator():
+            self._checkTranscript(transcript, "chr1", 100, 200, "test1.1")
+
+    def test_simple(self):
+        transcript1 = self._createTranscript("chr1", 300, 400, "test1.1")
+        transcript2 = self._createTranscript("chr1", 100, 200, "test1.2")
+        parser = self._writeAndSortAndParse([transcript1, transcript2])
+        self.assertEquals(parser.getNbTranscripts(), 2)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            if cpt == 0:
+                self._checkTranscript(transcript, "chr1", 100, 200, "test1.2")
+            else:
+                self._checkTranscript(transcript, "chr1", 300, 400, "test1.1")
+
+    def test_same_start(self):
+        transcript1 = self._createTranscript("chr1", 100, 200, "test1.1")
+        transcript2 = self._createTranscript("chr1", 100, 300, "test1.2")
+        parser = self._writeAndSortAndParse([transcript1, transcript2])
+        self.assertEquals(parser.getNbTranscripts(), 2)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            if cpt == 0:
+                self._checkTranscript(transcript, "chr1", 100, 300, "test1.2")
+            else:
+                self._checkTranscript(transcript, "chr1", 100, 200, "test1.1")
+
+    def _writeAndSortAndParse(self, transcripts):
+        writer = Gff3Writer(self._inputGff3FileName, 0)
+        for transcript in transcripts:
+            writer.addTranscript(transcript)
+        writer.close()
+        parser = GffParser(self._inputGff3FileName, 0)
+        fs = FileSorter(parser, 0)
+        fs.setOutputFileName(self._outputFileName)
+        fs.sort()
+        parser = NCListFileUnpickle(self._outputFileName, 0)
+        return parser
+
+    def _createTranscript(self, chromosome, start, end, name):
+        transcript = Transcript()
+        transcript.setChromosome(chromosome)
+        transcript.setStart(start)
+        transcript.setEnd(end)
+        transcript.setDirection("+")
+        transcript.setName(name)
+        return transcript
+
+    def _checkTranscript(self, transcript, chromosome, start, end, name):
+        self.assertEquals(transcript.getChromosome(), chromosome)
+        self.assertEquals(transcript.getStart(),      start)
+        self.assertEquals(transcript.getEnd(),        end)
+        self.assertEquals(transcript.getDirection(),  1)
+        self.assertEquals(transcript.getName(),       name)
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithOneInterval.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithOneInterval.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,121 @@
+import unittest
+import struct
+import os
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.FindOverlapsWithOneInterval import FindOverlapsWithOneInterval
+from SMART.Java.Python.misc import Utils
+
+class Test_F_FindOverlapsWithOneInterval(unittest.TestCase):
+
+    def setUp(self):
+        self._inputGff3FileName = 'sortedFile.gff3'
+        self._writeGFF3File(self._inputGff3FileName)
+        self._obsFileName = "overlap.gff3"
+        self._expFileName = "expFile.gff3"
+        self._iFOWOI = FindOverlapsWithOneInterval(0)
+        self._iFOWOI.setFileName(self._inputGff3FileName, "gff3")
+        self._iFOWOI.setOutputFileName(self._obsFileName)
+
+    def tearDown(self):
+        os.remove(self._inputGff3FileName)
+        os.remove(self._obsFileName)
+        os.remove(self._expFileName)
+
+    def test_run_general(self):
+        self._iFOWOI.setInterval("chr1", 500, 850)
+        self._iFOWOI.run()
+        self._writeExpGFF3File_general(self._expFileName)
+        self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+
+#   def test_run_general_asScript(self):
+#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 500 -e 850 -v 0' % (self._inputGff3FileName, self._obsFileName)
+#       os.system(cmd)
+#       self._writeExpGFF3File_general(self._expFileName)
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_one_overlap(self):
+#       self._iFOWOI.setInterval("chr1", 1250, 1450)
+#       self._iFOWOI.run()
+#       self._writeExpGFF3File_one_overlap(self._expFileName)
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_one_overlap_asScript(self):
+#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 1250 -e 1450 -v 0' % (self._inputGff3FileName, self._obsFileName)
+#       os.system(cmd)
+#       self._writeExpGFF3File_one_overlap(self._expFileName)
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_all_overlap(self):
+#       self._iFOWOI.setInterval("chr1", 300, 1250)
+#       self._iFOWOI.run()
+#       self._writeExpGff3File_all_overlap(self._expFileName)
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_all_overlap_asScript(self):
+#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 300 -e 1250 -v 0' % (self._inputGff3FileName, self._obsFileName)
+#       os.system(cmd)
+#       self._writeExpGff3File_all_overlap(self._expFileName)
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_no_overlap_right(self):
+#       self._iFOWOI.setInterval("chr1", 1400, 1500)
+#       self._iFOWOI.run()
+#       f = open(self._expFileName, "w")
+#       f.close()
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_no_overlap_right_asScript(self):
+#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 1400 -e 1500 -v 0' % (self._inputGff3FileName, self._obsFileName)
+#       os.system(cmd)
+#       f = open(self._expFileName, "w")
+#       f.close()
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_no_overlap_left(self):
+#       self._iFOWOI.setInterval("chr1", 0, 8)
+#       self._iFOWOI.run()
+#       f = open(self._expFileName, "w")
+#       f.close()
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+#
+#   def test_run_no_overlap_left_asScript(self):
+#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 0 -e 8 -v 0' % (self._inputGff3FileName, self._obsFileName)
+#       os.system(cmd)
+#       f = open(self._expFileName, "w")
+#       f.close()
+#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
+
+    def _writeExpGff3File_all_overlap(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tS-MART\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
+        f.write("chr1\tS-MART\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
+        f.write("chr1\tS-MART\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
+        f.write("chr1\tS-MART\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
+        f.write("chr1\tS-MART\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
+        f.write("chr1\tS-MART\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
+        f.write("chr1\tS-MART\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
+        f.close()
+
+    def _writeExpGFF3File_one_overlap(self, fileName):
+        f = open(fileName, "w")
+        f.write("chr1\tS-MART\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
+        f.close()
+
+    def _writeExpGFF3File_general(self, fileName):
+        f = open(fileName, "w")
+        f.write("chr1\tS-MART\ttranscript\t500\t850\t.\t+\t.\tnbOverlaps=4;overlapsWith=test2.1--test2.3--test2.5--test2.6\n")
+        f.close()
+
+    def _writeGFF3File(self, fileName):
+        f = open(fileName, "w")
+        f.write("chr1\ttest\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
+        f.write("chr1\ttest\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
+        f.write("chr1\ttest\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
+        f.write("chr1\ttest\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
+        f.write("chr1\ttest\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
+        f.write("chr1\ttest\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
+        f.write("chr1\ttest\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithSeveralIntervals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithSeveralIntervals.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,173 @@\n+import unittest\n+import os, os.path\n+from SMART.Java.Python.ncList.FindOverlapsWithSeveralIntervals import FindOverlapsWithSeveralIntervals\n+from SMART.Java.Python.misc import Utils\n+\n+class Test_F_FindOverlapsWithSeveralIntervals(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputRefGff3FileName = \'sorted_Ref.gff3\'\n+ self._inputQueryGff3FileName = \'sorted_Query.gff3\'\n+ self._outputGff3FileName = \'output.gff3\'\n+ self._expOutputFileName = \'expOutGff3.gff3\'\n+ self._writeQueryGff3File(self._inputQueryGff3FileName)\n+ self._writeGFF3File(self._inputRefGff3FileName)\n+ self._iFOWSI = FindOverlapsWithSeveralIntervals(0)\n+ self._iFOWSI.setRefFileName(self._inputRefGff3FileName, "gff3")\n+ self._iFOWSI.setQueryFileName(self._inputQueryGff3FileName, "gff3")\n+ self._iFOWSI.setOutputFileName(self._outputGff3FileName)\n+ self._iFOWSI.prepareIntermediateFiles()\n+ self._iFOWSI.createNCLists()\n+ \n+ def tearDown(self):\n+ for fileName in (self._inputRefGff3FileName, self._inputQueryGff3FileName, self._outputGff3FileName, self._expOutputFileName):\n+ if os.path.exists(fileName):\n+ os.remove(fileName)\n+ \n+ def test_run_general(self):\n+ self._writeQueryGff3File(self._inputQueryGff3FileName)\n+ self._writeGFF3File(self._inputRefGff3FileName)\n+ self._iFOWSI = FindOverlapsWithSeveralIntervals(0)\n+ self._iFOWSI.setRefFileName(self._inputRefGff3FileName, "gff3")\n+ self._iFOWSI.setQueryFileName(self._inputQueryGff3FileName, "gff3")\n+ self._iFOWSI.setOutputFileName(self._outputGff3FileName)\n+ self._iFOWSI.prepareIntermediateFiles()\n+ self._iFOWSI.createNCLists()\n+ self._iFOWSI.compare()\n+ self._iFOWSI.close()\n+ self._writeExpOutFile_general(self._expOutputFileName)\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ \n+ def test_run_general_asScript(self):\n+ cmd = \'python ../FindOverlapsWithSeveralIntervals.py -i %s -f gff3 -j %s -g gff3 -o %s -v 0\' % (self._inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName) \n+ os.system(cmd)\n+ self._writeExpOutFile_general(self._expOutputFileName)\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+\n+ \n+ def test_run_overlap_special_case(self):\n+ inputQueryGff3FileName = \'query2.gff3\'\n+ self._writeQueryGff3File2(inputQueryGff3FileName)\n+ self._iFOWSI = FindOverlapsWithSeveralIntervals(0)\n+ self._iFOWSI.setRefFileName(self._inputRefGff3FileName, "gff3")\n+ self._iFOWSI.setQueryFileName(inputQueryGff3FileName, "gff3")\n+ self._iFOWSI.setOutputFileName(self._outputGff3FileName)\n+ self._iFOWSI.prepareIntermediateFiles()\n+ self._iFOWSI.createNCLists()\n+ self._iFOWSI.compare()\n+ self._iFOWSI.close()\n+ self._writeExpOutFile_special_case(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ \n+ def test_run_overlap_special_case_asScript(self):\n+ inputQueryGff3FileName = \'query2.gff3\'\n+ self._writeQueryGff3File2(inputQueryGff3FileName)\n+ cmd = \'python ../FindOverlapsWithSeveralIntervals.py -i %s -f gff3 -j %s -g gff3 -o %s -v 0\' % (inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName) \n+ os.system(cmd) \n+ self._writeExpOutFile_special_case(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ \n+ def _writeExpOutFile_special_case(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\tS-MART\ttest2\t1250\t1300\t781\t+\t.\tnbOverl'..b'r1\\tS-MART\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tName=test2.4;OverlapWith=query_2;score=251;feature=test2.4;ID=test2.4\\n")\n+ f.write("chr1\\tS-MART\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tName=test2.5;OverlapWith=query_2;score=251;feature=test2.5;ID=test2.5\\n")\n+ f.write("chr1\\tS-MART\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tName=test2.6;OverlapWith=query_2;score=101;feature=test2.6;ID=test2.6\\n")\n+ f.close() \n+\n+ def _writeExpOutFile_overlap_to_children(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tS-MART\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tName=test2.1;OverlapWith=query_3;score=1001;feature=test2.1;ID=test2.1\\n") \n+ f.write("chr1\\tS-MART\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tName=test2.3;OverlapWith=query_3;score=501;feature=test2.3;ID=test2.3\\n") \n+ f.write("chr1\\tS-MART\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tName=test2.5;OverlapWith=query_3;score=251;feature=test2.5;ID=test2.5\\n") \n+ f.write("chr1\\tS-MART\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tName=test2.6;OverlapWith=query_3;score=101;feature=test2.6;ID=test2.6\\n") \n+ f.close() \n+\n+ def _writeExpOutFile_not_overlap_to_children(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tS-MART\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tName=test2.1;OverlapWith=query_1;score=1001;feature=test2.1;ID=test2.1\\n")\n+ f.write("chr1\\tS-MART\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tName=test2.2;OverlapWith=query_1;score=301;feature=test2.2;ID=test2.2\\n")\n+ f.write("chr1\\tS-MART\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tName=test2.3;OverlapWith=query_1;score=501;feature=test2.3;ID=test2.3\\n")\n+ f.write("chr1\\tS-MART\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tName=test2.1;OverlapWith=query_4;score=1001;feature=test2.1;ID=test2.1\\n")\n+ f.write("chr1\\tS-MART\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tName=test2.5;OverlapWith=query_4;score=251;feature=test2.5;ID=test2.5\\n")\n+ f.close() \n+\n+ def _writeExpOutFile_no_overlap_right(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.close() \n+\n+ def _writeExpOutFile_one_overlap(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tS-MART\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tName=test2.7;OverlapWith=query_5;score=101;feature=test2.7;ID=test2.7\\n")\n+ f.close() \n+ \n+ def _writeQueryGff3File2(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest1\\t1100\\t1150\\t126\\t+\\t.\\tID=query_1;Name=test1.1\\n")\n+ f.write("chr1\\tquery\\ttest2\\t1250\\t1300\\t781\\t+\\t.\\tID=query_2;Name=test1.2\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest1.1\\t25\\t150\\t126\\t+\\t.\\tID=query_1;Name=test1.1\\n")\n+ f.write("chr1\\tquery\\ttest1.2\\t70\\t850\\t781\\t+\\t.\\tID=query_2;Name=test1.2\\n")\n+ f.write("chr1\\tquery\\ttest1.3\\t550\\t850\\t201\\t+\\t.\\tID=query_3;Name=test1.3\\n")\n+ f.write("chr1\\tquery\\ttest1.4\\t925\\t1025\\t101\\t+\\t.\\tID=query_4;Name=test1.4\\n")\n+ f.write("chr1\\tquery\\ttest1.5\\t1201\\t1210\\t10\\t+\\t.\\tID=query_5;Name=test1.5\\n")\n+ f.write("chr1\\tquery\\ttest1.6\\t1500\\t1600\\t101\\t+\\t.\\tID=query_6;Name=test1.6\\n")\n+ f.close()\n+ \n+ def _writeGFF3File(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("chr1\\ttest\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n+ f.write("chr1\\ttest\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n+ f.write("chr1\\ttest\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n+ f.write("chr1\\ttest\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n+ f.write("chr1\\ttest\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n+ f.write("chr1\\ttest\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tID=test2.6;Name=test2.6\\n")\n+ f.write("chr1\\ttest\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test2.7;Name=test2.7\\n")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_F_FindOverlaps_naif.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_F_FindOverlaps_naif.py Fri Jan 18 04:54:14 2013 -0500

b"@@ -0,0 +1,455 @@\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from SMART.Java.Python.misc import Utils\n+from SMART.Java.Python.ncList.FindOverlaps_naif import FindOverlaps_naif\n+from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n+\n+class Test_F_FindOverlaps_naif(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputRefGff3FileName = 'ref.gff3'\n+ self._writeGFF3File(self._inputRefGff3FileName)\n+ self._inputQueryGff3FileName = 'query.gff3'\n+ self._writeQueryGff3File(self._inputQueryGff3FileName)\n+ self._outputGff3FileName = 'output.gff3'\n+ self._expOutputFileName = 'expOutGff3.gff3'\n+ self._iFON = FindOverlaps_naif(self._inputRefGff3FileName, self._inputQueryGff3FileName)\n+ self._iFON.setOutputGff3FileName(self._outputGff3FileName)\n+ \n+ def tearDown(self):\n+ os.remove(self._inputRefGff3FileName)\n+ os.remove(self._inputQueryGff3FileName)\n+ os.remove(self._outputGff3FileName)\n+ os.remove(self._expOutputFileName)\n+ \n+ def test_run_general(self):\n+ self._iFON.run()\n+ self._iFON.close()\n+ self._writeExpOutFile_general(self._expOutputFileName)\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ \n+ def test_run_general_asScript(self):\n+ cmd = 'python ../FindOverlaps_naif.py -i %s -j %s -o %s' % (self._inputRefGff3FileName, self._inputQueryGff3FileName, self._outputGff3FileName) \n+ os.system(cmd)\n+ self._writeExpOutFile_general(self._expOutputFileName)\n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+\n+ def test_run_overlap_special_case(self):\n+ inputQueryGff3FileName = 'query2.gff3'\n+ self._writeQueryGff3File2(inputQueryGff3FileName)\n+ iFON = FindOverlaps_naif(self._inputRefGff3FileName, inputQueryGff3FileName)\n+ iFON.setOutputGff3FileName(self._outputGff3FileName)\n+ iFON.run()\n+ iFON.close()\n+ self._writeExpOutFile_special_case(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ \n+ def test_run_overlap_special_case_asScript(self):\n+ inputQueryGff3FileName = 'query2.gff3'\n+ self._writeQueryGff3File2(inputQueryGff3FileName)\n+ cmd = 'python ../FindOverlaps_naif.py -i %s -j %s -o %s' % (self._inputRefGff3FileName, inputQueryGff3FileName, self._outputGff3FileName) \n+ os.system(cmd) \n+ self._writeExpOutFile_special_case(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ \n+ def test_case_2(self):\n+ inputRefGff3FileName = 'ref_case2.gff3'\n+ iMock = MockFindOverlapsWithServeralIntervals_case2()\n+ iMock.write(inputRefGff3FileName)\n+ inputQueryGff3FileName = 'query_case2.gff3'\n+ self._writeQueryGff3File_case2(inputQueryGff3FileName) \n+ iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)\n+ iFON.setOutputGff3FileName(self._outputGff3FileName)\n+ iFON.run()\n+ iFON.close()\n+ self._writeExpOutFile_case2(self._expOutputFileName) \n+ self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName)) \n+ os.remove(inputQueryGff3FileName) \n+ os.remove(inputRefGff3FileName) \n+ \n+ def test_case_3(self):\n+ inputRefGff3FileName = 'ref_case3.gff3'\n+ iMock = MockFindOverlapsWithServeralIntervals_case3()\n+ iMock.write(inputRefGff3FileName)\n+ inputQueryGff3FileName = 'query_case3.gff3'\n+ self._writeQueryGff3File_case3(inputQueryGff3FileName) \n+ iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)\n+ "..b'(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest3.1\\t150\\t250\\t101\\t+\\t.\\tID=query_1;Name=test3.1\\n")\n+ f.write("chr1\\tquery\\ttest3.2\\t380\\t400\\t21\\t+\\t.\\tID=query_2;Name=test3.2\\n")\n+ f.write("chr1\\tquery\\ttest3.3\\t480\\t520\\t41\\t+\\t.\\tID=query_3;Name=test3.3\\n")\n+ f.write("chr1\\tquery\\ttest3.4\\t510\\t700\\t191\\t+\\t.\\tID=query_4;Name=test3.4\\n")\n+ f.write("chr1\\tquery\\ttest3.5\\t900\\t950\\t51\\t+\\t.\\tID=query_5;Name=test3.5\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case4(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest4.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test4.1\\n")\n+ f.write("chr1\\tquery\\ttest4.2\\t450\\t600\\t151\\t+\\t.\\tID=query_2;Name=test4.2\\n")\n+ f.write("chr1\\tquery\\ttest4.3\\t700\\t800\\t101\\t+\\t.\\tID=query_3;Name=test4.3\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case5(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest5.1\\t850\\t950\\t101\\t+\\t.\\tID=query_1;Name=test5.1\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case6(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest6.1\\t200\\t300\\t101\\t+\\t.\\tID=query_1;Name=test6.1\\n")\n+ f.write("chr1\\tquery\\ttest6.2\\t800\\t900\\t101\\t+\\t.\\tID=query_2;Name=test6.2\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case7(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest7.1\\t530\\t550\\t21\\t+\\t.\\tID=query_1;Name=test7.1\\n")\n+ f.write("chr1\\tquery\\ttest7.2\\t600\\t700\\t101\\t+\\t.\\tID=query_2;Name=test7.2\\n")\n+ f.write("chr1\\tquery\\ttest7.3\\t650\\t900\\t251\\t+\\t.\\tID=query_3;Name=test7.3\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case8(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest8.1\\t500\\t600\\t101\\t+\\t.\\tID=query_1;Name=test8.1\\n")\n+ f.write("chr1\\tquery\\ttest8.2\\t700\\t800\\t101\\t+\\t.\\tID=query_2;Name=test8.2\\n")\n+ f.write("chr1\\tquery\\ttest8.3\\t900\\t1100\\t201\\t+\\t.\\tID=query_3;Name=test8.3\\n")\n+ f.write("chr1\\tquery\\ttest8.4\\t1200\\t1300\\t101\\t+\\t.\\tID=query_4;Name=test8.4\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case9(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest9.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test9.1\\n")\n+ f.write("chr1\\tquery\\ttest9.2\\t550\\t650\\t101\\t+\\t.\\tID=query_2;Name=test9.2\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case10(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest10.1\\t700\\t800\\t101\\t+\\t.\\tID=query_1;Name=test10.1\\n")\n+ f.write("chr1\\tquery\\ttest10.2\\t900\\t1000\\t101\\t+\\t.\\tID=query_2;Name=test10.2\\n")\n+ f.write("chr1\\tquery\\ttest10.3\\t1100\\t1300\\t201\\t+\\t.\\tID=query_3;Name=test10.3\\n")\n+ f.close()\n+ \n+ def _writeQueryGff3File_case11(self, fileName):\n+ f = open(fileName, \'w\')\n+ f.write("chr1\\tquery\\ttest11.1\\t420\\t480\\t61\\t+\\t.\\tID=query_1;Name=test11.1\\n")\n+ f.write("chr1\\tquery\\ttest11.2\\t450\\t715\\t266\\t+\\t.\\tID=query_2;Name=test11.2\\n")\n+ f.close()\n+ \n+ def _writeGFF3File(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("chr1\\ttest\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n+ f.write("chr1\\ttest\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n+ f.write("chr1\\ttest\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n+ f.write("chr1\\ttest\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n+ f.write("chr1\\ttest\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n+ f.write("chr1\\ttest\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tID=test2.6;Name=test2.6\\n")\n+ f.write("chr1\\ttest\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test2.7;Name=test2.7\\n")\n+ f.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_F_FindOverlaps_randomExample.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_F_FindOverlaps_randomExample.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,48 @@
+import unittest
+import os
+import time
+from commons.core.utils.FileUtils import FileUtils
+from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample_NonOrder
+from SMART.Java.Python.ncList.FindOverlaps_naif import FindOverlaps_naif
+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
+
+class Test_F_FindOverlaps_randomExample(unittest.TestCase):
+
+    def setUp(self):
+        self._output_optim = 'output_optim.gff3'
+
+    def test_FindOverlaps_NonOrder(self):
+        inputRefGff3FileName = 'refMOverlaps.gff3'
+        inputQueryGff3FileName = 'queryMOverlaps.gff3'
+        outputDataName = 'timeResult.dat'
+        fTime = open(outputDataName, 'w')
+        fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')
+        numberOfRefReads = 10
+        chromSize = 100000
+        numberOfQReads = 10
+        print 'ref size = %d,  query size = %d' %(numberOfRefReads, numberOfQReads)
+        iMFOR_ref = MockFindOverlaps_randomExample_NonOrder(inputRefGff3FileName, 'ref', numberOfRefReads, chromSize)
+        iMFOR_ref.write()
+        iMFOR_query = MockFindOverlaps_randomExample_NonOrder(inputQueryGff3FileName,'q', numberOfQReads, chromSize)
+        iMFOR_query.write()
+        iFOO = FindOverlapsOptim(0)
+        iFOO.setRefFileName(inputRefGff3FileName, "gff3")
+        iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
+        iFOO.setOutputFileName(self._output_optim)
+        startTime_optim = time.time()
+        iFOO.run()
+        iFOO.close()
+        nbOverlap = iFOO._nbOverlaps
+        endTime_optim = time.time()
+        totalTime_optim = endTime_optim - startTime_optim
+        print 'we take %s second.' % (totalTime_optim)
+        fTime.write('%d\t%d\t%d\t%.2f\n'%(numberOfRefReads, numberOfQReads, nbOverlap, totalTime_optim))
+        fTime.close()
+        os.remove(inputQueryGff3FileName)
+        os.remove(inputRefGff3FileName)
+        os.remove(self._output_optim)
+        os.remove(outputDataName)
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_F_NCList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_F_NCList.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,302 @@\n+import os\n+import unittest\n+import struct\n+from SMART.Java.Python.ncList.NCList import NCList\n+from SMART.Java.Python.misc import Utils\n+from commons.core.utils.FileUtils import FileUtils\n+from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n+from commons.core.parsing.GffParser import GffParser\n+from SMART.Java.Python.ncList.FileSorter import FileSorter\n+\n+class Test_F_NCList(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._inputGff3FileName = \'sortedFile.gff3\'\n+ self._sortedFileName = \'sortedFile.pkl\'\n+ self._expHFileName = \'expH.bin\'\n+ self._expLFileName = \'expL.bin\'\n+ self._obsHFileName = \'H.bin\'\n+ self._obsLFileName = \'L.bin\'\n+ self._addressFileName = \'address.txt\'\n+ self._writeGFF3File(self._inputGff3FileName)\n+ self._ncList = NCList(0)\n+ self._ncList.setChromosome("chr1")\n+ \n+ def tearDown(self):\n+ return\n+ for fileName in (self._inputGff3FileName, self._sortedFileName, self._expHFileName, self._expLFileName, self._obsHFileName, self._obsLFileName, self._addressFileName):\n+ if os.path.exists(fileName):\n+ os.remove(fileName)\n+ \n+ def _sortAndBuild(self):\n+ parser = GffParser(self._inputGff3FileName)\n+ fs = FileSorter(parser, 0)\n+ fs.setOutputFileName(self._sortedFileName)\n+ fs.sort()\n+ self._ncList.setFileName(self._sortedFileName)\n+ self._ncList.setNbElements(parser.getNbTranscripts())\n+ self._ncList.buildLists()\n+\n+ def test_run_with_one_elementSubList(self):\n+ iMock = MockFindOverlapsWithOneInterval()\n+ iMock.write(self._inputGff3FileName)\n+ self._sortAndBuild()\n+ self._writeExpHFile_one_elementSubList()\n+ self._writeExpLFile_one_elementSubList()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName)) \n+ \n+ def test_case1(self):\n+ iMock = MockFindOverlapsWithServeralIntervals_case1()\n+ iMock.write(self._inputGff3FileName)\n+ self._sortAndBuild()\n+ self._writeExpHFileCase1()\n+ self._writeExpLFileCase1()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName)) \n+ \n+ def test_case2(self):\n+ iMock = MockFindOverlapsWithServeralIntervals_case2()\n+ iMock.write(self._inputGff3FileName)\n+ self._sortAndBuild()\n+ self._writeExpHFileCase2()\n+ self._writeExpLFileCase2()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName)) \n+ \n+ def test_case3(self):\n+ iMock = MockFindOverlapsWithServeralIntervals_case3()\n+ iMock.write(self._inputGff3FileName)\n+ self._sortAndBuild()\n+ self._writeExpHFileCase3()\n+ self._writeExpLFileCase3()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName)) \n+ \n+ def test_case4_5(self):\n+ iMock = MockFindOverlapsWithServeralIntervals_case4_5()\n+ iMock.write(self._inputGff3FileName)\n+ self._sortAndBuild()\n+ self._writeExpHFileCase4_5()\n+ self._writeExpLFileCase4_5()\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName)) \n+\n+ def test_case6_7(self):\n+ iMock = MockFindOverlapsWithServeralIntervals_case6_7()\n+ iMock'..b'(self._expHFileName, elements)\n+ \n+ def _writeExpHFileCase11(self):\n+ elements = [0, 2, 2, 2, 4, 2]\n+ self._writeBinFile(self._expHFileName, elements)\n+ \n+ def _writeExpHFileCase12(self):\n+ elements = [0, 1, 1, 3, 4, 1]\n+ self._writeBinFile(self._expHFileName, elements)\n+ \n+ def _writeExpLFile_one_elementSubList(self):\n+ elements = [0, 1000, 0, -1, -1]\n+ self._writeBinFile(self._expLFileName, elements)\n+\n+ def _writeExpLFileCase1(self):\n+ elements = [ 0, 1000, 0, 1, -1, \\\n+ 1200, 1300, 2345, -1, -1, \\\n+ 50, 350, 391, -1, 0, \\\n+ 100, 600, 781, 2, 0, \\\n+ 700, 950, 1563, 3, 0, \\\n+ 200, 450, 1172, -1, 3, \\\n+ 800, 900, 1954, -1, 4]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase2(self):\n+ elements = [ 0, 500, 0, 1, -1, \\\n+ 900, 1200, 1561, -1, -1, \\\n+ 50, 450, 389, 2, 0, \\\n+ 100, 400, 779, 3, 2, \\\n+ 100, 200, 1170, -1, 3]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase3(self):\n+ elements = [ 0, 500, 0, 1, -1, \\\n+ 800, 1000, 1952, -1, -1, \\\n+ 50, 450, 389, 2, 0, \\\n+ 100, 400, 779, 3, 2, \\\n+ 100, 200, 1170, -1, 3, \\\n+ 300, 400, 1561, -1, 3]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase4_5(self):\n+ elements = [ 0, 1000, 0, 1, -1, \\\n+ 200, 800, 391, 2, 0, \\\n+ 400, 600, 782, -1, 1]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase6_7(self):\n+ elements = [ 0, 1000, 0, 1, -1, \\\n+ 100, 300, 391, -1, 0, \\\n+ 400, 500, 782, -1, 0, \\\n+ 510, 520, 1173, -1, 0, \\\n+ 850, 950, 1563, -1, 0]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase8(self):\n+ elements = [ 0, 1000, 0, 1, -1, \\\n+ 100, 200, 391, -1, 0, \\\n+ 300, 400, 782, -1, 0]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase9(self):\n+ elements = [ 0, 1000, 0, 1, -1, \\\n+ 800, 1200, 782, -1, -1, \\\n+ 600, 700, 391, -1, 0]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase10(self):\n+ elements = [ 0, 1000, 0, 1, -1, \\\n+ 1200, 1300, 1576, -1, -1, \\\n+ 1400, 1500, 1972, -1, -1, \\\n+ 100, 200, 394, -1, 0, \\\n+ 300, 400, 788, -1, 0, \\\n+ 500, 600, 1182, -1, 0]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase11(self):\n+ elements = [ 0, 500, 0, 1, -1, \\\n+ 700, 900, 1180, 2, -1, \\\n+ 100, 200, 392, -1, 0, \\\n+ 300, 400, 786, -1, 0, \\\n+ 710, 720, 1574, -1, 1, \\\n+ 740, 750, 1967, -1, 1]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+ def _writeExpLFileCase12(self):\n+ elements = [ 0, 1400, 0, 1, -1, \\\n+ 300, 500, 368, 2, 0, \\\n+ 800, 1100, 1106, -1, 0, \\\n+ 1200, 1300, 1476, -1, 0, \\\n+ 300, 500, 737, -1, 1]\n+ self._writeBinFile(self._expLFileName, elements)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_FindOverlapsWithOneInterval.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_FindOverlapsWithOneInterval.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,81 @@
+import unittest
+import struct
+import os
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.FindOverlapsWithOneInterval import FindOverlapsWithOneInterval
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+
+class Test_FindOverlapsWithOneInterval(unittest.TestCase):
+
+    def setUp(self):
+        self._inputGff3FileName = 'sortedFile.gff3'
+        self._writeGFF3File(self._inputGff3FileName)
+        self._obsFileName = "overlap.gff3"
+        self._iFOWOI = FindOverlapsWithOneInterval(0)
+        self._iFOWOI.setFileName(self._inputGff3FileName, "gff3")
+        self._iFOWOI._chromosome = "chr1"
+        self._iFOWOI.prepareIntermediateFiles()
+        self._iFOWOI.createNCList()
+        self._ncList = self._iFOWOI._ncList
+        self._iFOWOI.setOutputFileName(self._obsFileName)
+
+    def tearDown(self):
+        return
+        self._iFOWOI.close()
+        for file in (self._inputGff3FileName, self._obsFileName):
+            if os.path.exists(file):
+                os.remove(file)
+
+    def test_binarySearch_first_element_overlap(self):
+        self._iFOWOI.setInterval("chr1", 500, 850)
+        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 0, 0), 0, 6)
+        expReadPosition = 0
+        self._iFOWOI.dumpWriter()
+        self._iFOWOI.close()
+        self.assertEquals(expReadPosition, obsReadPosition._lIndex)
+
+    def test_binarySearch_second_element_overlap(self):
+        self._iFOWOI.setInterval("chr1", 500, 850)
+        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 2, 0), 2, 6)
+        expReadPosition = 3
+        self._iFOWOI.dumpWriter()
+        self._iFOWOI.close()
+        self.assertEquals(expReadPosition, obsReadPosition._lIndex)
+
+    def test_binarySearch_empty_subList(self):
+        self._iFOWOI.setInterval("chr1", 500, 850)
+        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 5, 0), 5, 5)
+        expReadPosition = None
+        self._iFOWOI.dumpWriter()
+        self._iFOWOI.close()
+        self.assertEquals(expReadPosition, obsReadPosition)
+
+    def test_binarySearch_no_overlap_right(self):
+        self._iFOWOI.setInterval("chr1", 1400, 1500)
+        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 0, 0), 0, 6)
+        expReadPosition = None
+        self._iFOWOI.dumpWriter()
+        self._iFOWOI.close()
+        self.assertEquals(expReadPosition, obsReadPosition)
+
+    def test_binarySearch_no_overlap_left(self):
+        self._iFOWOI.setInterval("chr1", 0, 45)
+        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 2, 0), 2, 6)
+        expReadPosition = None
+        self._iFOWOI.dumpWriter()
+        self._iFOWOI.close()
+        self.assertEquals(expReadPosition, obsReadPosition)
+
+    def _writeGFF3File(self, fileName):
+        f = open(fileName, "w")
+        f.write("chr1\ttest\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
+        f.write("chr1\ttest\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
+        f.write("chr1\ttest\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
+        f.write("chr1\ttest\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
+        f.write("chr1\ttest\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
+        f.write("chr1\ttest\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
+        f.write("chr1\ttest\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_FindOverlapsWithSeveralIntervals.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_FindOverlapsWithSeveralIntervals.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,160 @@
+import unittest
+import os
+from SMART.Java.Python.ncList.FindOverlapsWithSeveralIntervals import FindOverlapsWithSeveralIntervals
+
+class Test_FindOverlapsWithSeveralIntervals(unittest.TestCase):
+
+    def setUp(self):
+        self._inputRefGff3FileName = 'sortedFile.gff3'
+        self._writeGFF3File(self._inputRefGff3FileName)
+        self._inputQueryGff3FileName = 'sorted_Query.gff3'
+        self._writeQueryGff3File(self._inputQueryGff3FileName)
+        self._outputGff3FileName = 'overlaps.gff3'
+        self._iFOWSI = FindOverlapsWithSeveralIntervals(self._inputRefGff3FileName, self._inputQueryGff3FileName)
+        self._iFOWSI.setOutputGff3FileName(self._outputGff3FileName)
+
+    def tearDown(self):
+        os.remove(self._inputRefGff3FileName)
+        os.remove(self._inputQueryGff3FileName)
+        os.remove(self._outputGff3FileName)
+        self._iFOWSI.deletIntermediateFiles()
+
+    def test_isOverlapping_true(self):
+        queryGff3Addr = 116
+        RefGff3Addr = 231
+        obs = self._iFOWSI.isOverlapping(queryGff3Addr, RefGff3Addr)
+        exp = 0
+        self.assertEquals(exp, obs)
+
+    def test_isOverlapping_false_left(self):
+        queryGff3Addr = 116
+        RefGff3Addr = 58
+        obs = self._iFOWSI.isOverlapping(queryGff3Addr, RefGff3Addr)
+        exp = -1
+        self.assertEquals(exp, obs)
+
+    def test_isOverlapping_false_right(self):
+        queryGff3Addr = 116
+        RefGff3Addr = 347
+        obs = self._iFOWSI.isOverlapping(queryGff3Addr, RefGff3Addr)
+        exp = 1
+        self.assertEquals(exp, obs)
+
+    def test_getHisFirstChild(self):
+        firstRefLAddr = 0
+        obsFirstChildLAddr = self._iFOWSI.getHisFirstChild(firstRefLAddr)
+        expFirstChildLAddr = 48
+        self.assertEquals(expFirstChildLAddr, obsFirstChildLAddr)
+
+    def test_isLastElement_true(self):
+        refLAddr = 96
+        obsBool = self._iFOWSI.isLastElement(refLAddr)
+        expBool = True
+        self.assertEquals(expBool, obsBool)
+
+    def test_isLastElement_false(self):
+        refLAddr = 72
+        obsBool = self._iFOWSI.isLastElement(refLAddr)
+        expBool = False
+        self.assertEquals(expBool, obsBool)
+
+    def test_isLastElement_highestLevel_true(self):
+        refLAddr = 24
+        obsBool = self._iFOWSI.isLastElement(refLAddr)
+        expBool = True
+        self.assertEquals(expBool, obsBool)
+
+    def test_isLastElement_highestLevel_false(self):
+        refLAddr = 0
+        obsBool = self._iFOWSI.isLastElement(refLAddr)
+        expBool = False
+        self.assertEquals(expBool, obsBool)
+
+    def test_findOverlapIter(self):
+        queryGff3Addr = 175
+        firstRefLAddr = 0
+        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
+        expFirstOverlapLAddr = 0
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+
+    def test_not_findOverlapIter(self):
+        queryGff3Addr = 295
+        firstRefLAddr = 24
+        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
+        expFirstOverlapLAddr = None
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+
+    def test_findOverlapIter_not_the_first_RefOverlap(self):
+        queryGff3Addr = 235
+        firstRefLAddr = 0
+        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
+        expFirstOverlapLAddr = 24
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+
+    def test_changeToNewSubEndLAddr(self):
+        firstChildLAddr = 48
+        subEndLAddr = 48
+        expSubEndLAddr = 120
+        obsSubEndLAddr = self._iFOWSI.changeToNewSubEndLAddr(firstChildLAddr, subEndLAddr)
+        self.assertEquals(expSubEndLAddr, obsSubEndLAddr)
+
+    def test_defineSubEndLaddr(self):
+        parentLAddr = -1
+        expSubEndLAddr = 48
+        obsSubEndLAddr = self._iFOWSI.defineSubEndLaddr(parentLAddr)
+        self.assertEquals(expSubEndLAddr, obsSubEndLAddr)
+
+    def test_getNextRefIntervalInCaseNotOverLap(self):
+        firstRefLAddr = 96
+        expRefLAddr = 24
+        obsRefLAddr = self._iFOWSI.getNextRefIntervalInCaseNotOverLap(firstRefLAddr)
+        self.assertEquals(expRefLAddr, obsRefLAddr)
+
+    def test_getNextRefIntervalInCaseOverLap(self):
+        firstChildLAddr = -1
+        firstRefLAddr = 120
+        subEndLAddr = 144
+        expRefLAddr, expSubEndLAddr = (96, 144)
+        obsRefLAddr, obsSubEndLAddr = self._iFOWSI.getNextRefIntervalInCaseOverLap(firstChildLAddr, firstRefLAddr, subEndLAddr)
+        self.assertEquals((expRefLAddr, expSubEndLAddr), (obsRefLAddr, obsSubEndLAddr))
+
+    def test_not_findOverlapIter_between2RefIntervals(self):
+        inputQueryGff3FileName = 'query2.gff3'
+        self._writeQueryGff3File2(inputQueryGff3FileName)
+        self._iFOWSI.setQueryGff3FileName(inputQueryGff3FileName)
+        queryGff3Addr = 0
+        firstRefLAddr = 0
+        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
+        expFirstOverlapLAddr = 24
+        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
+        os.remove(inputQueryGff3FileName)
+
+    def _writeQueryGff3File2(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tquery\ttest1\t1100\t1150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
+        f.write("chr1\tquery\ttest2\t1250\t1300\t781\t+\t.\tID=test1.2;Name=test1.2\n")
+        f.close()
+
+    def _writeQueryGff3File(self, fileName):
+        f = open(fileName, 'w')
+        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
+        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t+\t.\tID=test1.2;Name=test1.2\n")
+        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t+\t.\tID=test1.3;Name=test1.3\n")
+        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=test1.4;Name=test1.4\n")
+        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=test1.5;Name=test1.5\n")
+        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=test1.6;Name=test1.6\n")
+        f.close()
+
+    def _writeGFF3File(self, fileName):
+        f = open(fileName, "w")
+        f.write("chr1\ttest\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
+        f.write("chr1\ttest\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
+        f.write("chr1\ttest\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
+        f.write("chr1\ttest\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
+        f.write("chr1\ttest\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
+        f.write("chr1\ttest\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
+        f.write("chr1\ttest\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_FindOverlaps_randomExample.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_FindOverlaps_randomExample.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,99 @@
+import unittest
+import os
+import time
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import *
+from SMART.Java.Python.ncList.FindOverlaps_naif import FindOverlaps_naif
+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
+
+class Test_FindOverlaps_randomExample(unittest.TestCase):
+
+    def setUp(self):
+        self._output_naif = 'output_naif.gff3'
+        self._outputOptim = 'outputOptim.gff3'
+
+
+    def tearDown(self):
+        return
+        os.remove(self._output_naif)
+        os.remove(self._outputOptim)
+
+    def test_run_smallSize(self):
+        inputRefGff3FileName = 'ref_small.gff3'
+        numberOfReads = 10
+        chromSize = 1000
+        iMFO_rand = MockFindOverlaps_randomExample(inputRefGff3FileName, 'reference', numberOfReads, chromSize)
+        iMFO_rand.write()
+
+        inputQueryGff3FileName = 'query_small.gff3'
+        iMFO_rand = MockFindOverlaps_randomExample(inputQueryGff3FileName,'query', 10, 1000)
+        iMFO_rand.write()
+
+        iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)
+        iFON.setOutputGff3FileName(self._output_naif)
+        iFOO = FindOverlapsOptim(0)
+        iFOO.setRefFileName(inputRefGff3FileName, "gff3")
+        iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
+        iFOO.setOutputFileName(self._outputOptim)
+        iFOO.prepareIntermediateFiles()
+        iFOO.createNCLists()
+
+        startTime_naif = time.time()
+        iFON.run()
+        iFON.close()
+        endTime_naif = time.time()
+        totalTime_naif = endTime_naif - startTime_naif
+        print 'for naive algo, we take %e second' % (totalTime_naif)
+
+        startTimeOptim = time.time()
+        iFOO.compare()
+        endTimeOptim = time.time()
+        totalTimeOptim = endTimeOptim - startTimeOptim
+        print 'for optim algo, we take %e second' % (totalTimeOptim)
+        iFOO.close()
+
+        self.assertTrue(Utils.diff(self._output_naif, self._outputOptim))
+
+        os.remove(inputRefGff3FileName)
+        os.remove(inputQueryGff3FileName)
+
+
+    def test_creatRandomExampleWithMOverlaps_smallSize(self):
+        inputRefGff3FileName = 'refMOverlaps_small.gff3'
+        inputQueryGff3FileName = 'queryMOverlaps_small.gff3'
+        numberOfReads = 10
+        chromSize = 1000
+        iRMSS = MockFindOverlaps_randomExample_MOverlaps(inputRefGff3FileName, inputQueryGff3FileName, 7, numberOfReads, chromSize)
+        iRMSS.createRandomExample()
+
+
+        iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)
+        iFON.setOutputGff3FileName(self._output_naif)
+        iFOO = FindOverlapsOptim(0)
+        iFOO.setRefFileName(inputRefGff3FileName, "gff3")
+        iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
+        iFOO.setOutputFileName(self._outputOptim)
+        iFOO.prepareIntermediateFiles()
+        iFOO.createNCLists()
+
+        startTime_naif = time.time()
+        iFON.run()
+        endTime_naif = time.time()
+        totalTime_naif = endTime_naif - startTime_naif
+        print 'for naive algo, we take %e second' % (totalTime_naif)
+        iFON.close()
+
+        startTimeOptim = time.time()
+        iFOO.compare()
+        endTimeOptim = time.time()
+        totalTimeOptim = endTimeOptim - startTimeOptim
+        print 'for optim algo, we take %e second' % (totalTimeOptim)
+        iFOO.close()
+
+        self.assertTrue(Utils.diff(self._output_naif, self._outputOptim))
+
+        os.remove(inputRefGff3FileName)
+        os.remove(inputQueryGff3FileName)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/ncList/test/Test_randExample.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/test/Test_randExample.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,51 @@
+import unittest
+import time
+from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import *
+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
+
+class Test_F_FindOverlaps_randomExample(unittest.TestCase):
+
+    def setUp(self):
+        self._output_optim = 'output_optim.gff3'
+
+    def test_creatRandomExampleWithMOverlaps(self):
+        inputRefGff3FileName = 'refMOverlaps.gff3'
+        inputQueryGff3FileName = 'queryMOverlaps.gff3'
+        outputDataName = 'timeResult.dat'
+        fTime = open(outputDataName, 'w')
+        fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')
+        numberOfRefReads = 1000
+        chromSize = 100000
+        while numberOfRefReads <= 1000:
+            numberOfQReads = 1000
+            while numberOfQReads <= 1000:
+                print 'ref size = %d,  query size = %d' %(numberOfRefReads, numberOfQReads)
+                iMFOR_ref = MockFindOverlaps_randomExample(inputRefGff3FileName, 'ref', numberOfRefReads, chromSize)
+                iMFOR_ref.write()
+                iMFOR_query = MockFindOverlaps_randomExample(inputQueryGff3FileName,'q', numberOfQReads, chromSize)
+                iMFOR_query.write()
+                iFOO = FindOverlapsOptim(0)
+                iFOO.setRefFileName(inputRefGff3FileName, "gff3")
+                iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
+                iFOO.setOutputFileName(self._output_optim)
+                iFOO.prepareIntermediateFiles()
+                iFOO.createNCLists()
+
+                startTime_optim = time.time()
+                iFOO.compare()
+                endTime_optim = time.time()
+                totalTime_optim = endTime_optim - startTime_optim
+                print 'we took %s second.' % (totalTime_optim)
+                nbOverlap = iFOO._nbOverlaps
+                iFOO.close()
+                fTime.write('%d\t%d\t%d\t%.2f\n' % (numberOfRefReads, numberOfQReads, nbOverlap, totalTime_optim))
+                numberOfQReads *= 10
+            numberOfRefReads *= 10
+        fTime.close()
+        os.remove(inputQueryGff3FileName)
+        os.remove(inputRefGff3FileName)
+        os.remove(self._output_optim)
+
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/plot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/plot.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,223 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+\n+"""\n+Plot the data from the data files\n+"""\n+\n+import os, re, math\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc.Progress import Progress\n+from commons.core.utils.FileUtils import FileUtils\n+\n+class Plot(object):\n+\n+ def __init__(self, verbosity):\n+ self.verbosity = verbosity\n+ self.keep = False\n+\n+ def keepTmpFiles(self, boolean):\n+ self.keep = boolean\n+\n+ def setShape(self, shape):\n+ self.shape = shape\n+\n+ def setInputFileName(self, fileName, format):\n+ self.parser = TranscriptContainer(fileName, format, self.verbosity)\n+\n+ def setXData(self, tag, default):\n+ self.x = tag\n+ self.xDefault = default\n+\n+ def setYData(self, tag, default):\n+ self.y = tag\n+ self.yDefault = default\n+\n+ def setZData(self, tag, default):\n+ self.z = tag\n+ self.zDefault = default\n+\n+ def setNbBars(self, nbBars):\n+ self.nbBars = nbBars\n+\n+ def setOutputFileName(self, fileName):\n+ self.outputFileName = fileName\n+\n+ def setRegression(self, regression):\n+ self.regression = regression\n+\n+ def setLog(self, log):\n+ self.log = log\n+\n+ def createPlotter(self):\n+ self.plotter = RPlotter(self.outputFileName, self.verbosity, self.keep)\n+ if self.shape == "barplot":\n+ self.plotter.setBarplot(True)\n+ elif self.shape == "line":\n+ pass\n+ elif self.shape == "points":\n+ self.plotter.setPoints(True)\n+ elif self.shape == "heatPoints":\n+ self.plotter.setHeatPoints(True)\n+ else:\n+ raise Exception("Do not understand shape \'%s\'\\n" % (self.shape))\n+ \n+ self.plotter.setLog(self.log)\n+ self.plotter.setRegression(self.regression)\n+\n+ def getValues(self, transcript):\n+ x = transcript.getTagValue(self.x)\n+ y = None\n+ z = None\n+ if self.y != None:\n+ y = transcript.getTagValue(self.y)\n+ if self.z != None:\n+ z = transcript.getTagValue(self.z)\n+ if x == None:\n+ if self.xDefault != None:\n+ x = self.xDefault\n+ else:\n+ raise Exception("Error! Transcript %s do not have the x-tag %s\\n" % (transcript, self.x))\n+ if '..b'= "heatPoints":\n+ self.plotter.addHeatLine(heatLine)\n+ self.plotter.plot()\n+\n+ def close(self):\n+ if self.regression:\n+ print self.plotter.getCorrelationData()\n+ if self.shape == "points":\n+ rho = self.plotter.getSpearmanRho()\n+ if rho == None:\n+ print "Cannot compute Spearman rho."\n+ else:\n+ print "Spearman rho: %f" % (rho) \n+\n+ def run(self):\n+ self.createPlotter()\n+ self.parseFile() \n+ self.close()\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Plot v1.0.2: Plot some information from a list of transcripts. [Category: Visualization]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the input [compulsory] [format: transcript file format]")\n+ parser.add_option("-x", "--x", dest="x", action="store", type="string", help="tag for the x value [format: string]")\n+ parser.add_option("-y", "--y", dest="y", action="store", type="string", help="tag for the y value [format: string]")\n+ parser.add_option("-z", "--z", dest="z", action="store", default=None, type="string", help="tag for the z value [format: string]")\n+ parser.add_option("-X", "--xDefault", dest="xDefault", action="store", default=None, type="float", help="value for x when tag is not present [format: float]")\n+ parser.add_option("-Y", "--yDefault", dest="yDefault", action="store", default=None, type="float", help="value for y when tag is not present [format: float]")\n+ parser.add_option("-Z", "--zDefault", dest="zDefault", action="store", default=None, type="float", help="value for z when tag is not present [format: float]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file names [format: output file in PNG format]")\n+ parser.add_option("-s", "--shape", dest="shape", action="store", default="barplot", type="string", help="shape of the plot [format: choice (barplot, line, points, heatPoints)]")\n+ parser.add_option("-n", "--nbBars", dest="nbBars", action="store", default=2, type="int", help="number of bars in barplot [format: int]")\n+ parser.add_option("-k", "--keep", dest="keep", action="store_true", default=False, help="keep temporary files [format: bool]")\n+ parser.add_option("-r", "--regression", dest="regression", action="store_true", default=False, help="plot regression line (in \'points\' format) [format: bool]")\n+ parser.add_option("-l", "--log", dest="log", action="store", default="y", type="string", help="use log on x- or y-axis (write \'x\', \'y\' or \'xy\') [format: string]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ plot = Plot(options.verbosity)\n+ plot.setInputFileName(options.inputFileName, options.format)\n+ plot.setOutputFileName(options.outputFileName)\n+ plot.setXData(options.x, options.xDefault)\n+ plot.setYData(options.y, options.yDefault)\n+ plot.setZData(options.z, options.zDefault)\n+ plot.setShape(options.shape)\n+ plot.setNbBars(options.nbBars)\n+ plot.setRegression(options.regression)\n+ plot.setLog(options.log)\n+ plot.keepTmpFiles(options.keep)\n+ plot.run()\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/plotCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/plotCoverage.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,473 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os, subprocess, glob, random\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+from SMART.Java.Python.misc.Progress import Progress\n+from commons.core.parsing.FastaParser import FastaParser\n+\n+strands = [-1, 1]\n+colors = {-1: "blue", 1: "red", 0: "black"}\n+colorLine = "black"\n+\n+def parseTargetField(field):\n+ strand = "+"\n+ splittedFieldSpace = field.split()\n+ splittedFieldPlus = field.split("+", 4)\n+ if len(splittedFieldSpace) == 3:\n+ id, start, end = splittedFieldSpace\n+ elif len(splittedFieldSpace) == 4:\n+ id, start, end, strand = splittedFieldSpace\n+ elif len(splittedFieldPlus) == 3:\n+ id, start, end = splittedFieldPlus\n+ elif len(splittedFieldPlus) == 4:\n+ id, start, end, strand = splittedFieldPlus\n+ else:\n+ raise Exception("Cannot parse Target field \'%s\'." % (field))\n+ return (id, int(start), int(end), strand)\n+\n+\n+class SimpleTranscript(object):\n+ def __init__(self, transcript1, transcript2, color = None):\n+ self.start = max(0, transcript1.getStart() - transcript2.getStart())\n+ self.end = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart())\n+ self.strand = transcript1.getDirection() * transcript2.getDirection()\n+ self.exons = []\n+ for exon in transcript1.getExons():\n+ if exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd():\n+ start = max(0, exon.getStart() - transcript2.getStart())\n+ end = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart())\n+ self.addExon(start, end, self.strand, color)\n+\n+ def addExon(self, start, end, strand, color):\n+ exon = SimpleExon(start, end, strand, color)\n+ self.exons.append(exon)\n+\n+ def getRScript(self, yOffset, height):\n+ rString = ""\n+ previousEnd = None\n+ for exon in sorted(self.exons, key=lambda exon: exon.start):\n+ if previousEnd != None:\n+ rString += "segments(%.1f, %.1f, %.1f, %.1f, col = \\"%s\\")\\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height'..b'file 1 [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--inputFormat1", dest="inputFormat1", action="store", type="string", help="format of input file 1 [compulsory] [format: transcript file format]")\n+ parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n+ parser.add_option("-g", "--inputFormat2", dest="inputFormat2", action="store", type="string", help="format of input file 2 [compulsory] [format: transcript file format]")\n+ parser.add_option("-q", "--sequence", dest="inputSequence", action="store", default=None, type="string", help="input sequence file [format: file in FASTA format] [default: None]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in PNG format]")\n+ parser.add_option("-w", "--width", dest="width", action="store", default=1500, type="int", help="width of the plots (in px) [format: int] [default: 1500]")\n+ parser.add_option("-e", "--height", dest="height", action="store", default=1000, type="int", help="height of the plots (in px) [format: int] [default: 1000]")\n+ parser.add_option("-t", "--title", dest="title", action="store", default="", type="string", help="title of the plots [format: string]")\n+ parser.add_option("-x", "--xlab", dest="xLabel", action="store", default="", type="string", help="label on the x-axis [format: string]")\n+ parser.add_option("-y", "--ylab", dest="yLabel", action="store", default="", type="string", help="label on the y-axis [format: string]")\n+ parser.add_option("-p", "--plusColor", dest="plusColor", action="store", default="red", type="string", help="color for the elements on the plus strand [format: string] [default: red]")\n+ parser.add_option("-m", "--minusColor", dest="minusColor", action="store", default="blue", type="string", help="color for the elements on the minus strand [format: string] [default: blue]")\n+ parser.add_option("-s", "--sumColor", dest="sumColor", action="store", default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]")\n+ parser.add_option("-l", "--lineColor", dest="lineColor", action="store", default="black", type="string", help="color for the lines [format: string] [default: black]")\n+ parser.add_option("-1", "--merge", dest="merge", action="store_true", default=False, help="merge the 2 plots in 1 [format: boolean] [default: false]")\n+ parser.add_option("-D", "--directory", dest="working_Dir", action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ colors[1] = options.plusColor\n+ colors[-1] = options.minusColor\n+ colors[0] = options.sumColor\n+ colorLine = options.lineColor\n+\n+ pp = PlotParser(options.verbosity)\n+ pp.addInput(0, options.inputFileName1, options.inputFormat1)\n+ pp.addInput(1, options.inputFileName2, options.inputFormat2)\n+ pp.addSequence(options.inputSequence)\n+ if options.working_Dir[-1] != \'/\':\n+ path = options.working_Dir + \'/\'\n+ pp.setOutput(path + options.outputFileName)\n+ pp.setPlotSize(options.width, options.height)\n+ pp.setLabels(options.xLabel, options.yLabel)\n+ pp.setTitle(options.title)\n+ pp.setMerge(options.merge)\n+ pp.start()\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/plotCsv.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/plotCsv.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,146 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Plot the data from the data files
+"""
+
+import os
+import re
+from optparse import OptionParser
+from SMART.Java.Python.misc.RPlotter import *
+from SMART.Java.Python.misc.Progress import *
+
+
+def mergeData(line1, line2):
+    if line1.keys() != line2.keys():
+        sys.exit("Error! Input files do not correspond to each other! Aborting...")
+    mergedData = {}
+    for key in line1:
+        mergedData[key] = (line1[key], line2[key])
+    return mergedData
+
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Plot CSV v1.0.1: Plot the content of a CSV file. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileNames", action="store",             type="string", help="input file [compulsory] [format: file in CSV format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",             type="string", help="output file [compulsory] [format: output file in PNG format]")
+    parser.add_option("-s", "--shape",     dest="shape",          action="store",             type="string", help="shape of the plot [format: choice (line, bar, points, heatPoints)]")
+    parser.add_option("-l", "--log",       dest="log",            action="store", default="", type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string] [default: ]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,  type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    plotter = RPlotter(options.outputFileName, options.verbosity)
+    if options.shape == "bar":
+        plotter.setBarplot(True)
+    elif options.shape == "points":
+        plotter.setPoints(True)
+    elif options.shape == "heatPoints":
+        plotter.setHeatPoints(True)
+
+    plotter.setLog(options.log)
+
+    lines            = []
+    nbsColumns = []
+    for inputFileName in options.inputFileNames.split(","):
+        inputFile = open(inputFileName)
+        line            = {}
+        nbColumns = None
+
+        for point in inputFile:
+            point = point.strip()
+
+            m = re.search(r"^\s*(\S+)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s*$", point)
+            if m != None:
+                line[m.group(1)] = (float(m.group(2)), float(m.group(3)))
+                if nbColumns == None:
+                    nbColumns = 3
+                    nbsColumns.append(nbColumns)
+                elif nbColumns != 3:
+                    sys.exit("Number of columns changed around line '%s' of file '%s'! Aborting..." % (point, inputFileName))
+            else:
+                m = re.search(r"^\s*(\d+\.?\d*)\s+(\d+\.?\d*)\s*$", point)
+                if m != None:
+                    line[float(m.group(1))] = float(m.group(2))
+                    if nbColumns == None:
+                        nbColumns = 2
+                        nbsColumns.append(nbColumns)
+                    if nbColumns != 2:
+                        sys.exit("Number of columns changed around line '%s' of file '%s'! Aborting..." % (point, inputFileName))
+                else:
+                    m = re.search(r"^\s*(\S+)\s+(\d+\.?\d*)\s*$", point)
+                    if m != None:
+                        line[m.group(1)] = float(m.group(2))
+                        if nbColumns == None:
+                            nbColumns = 1
+                            nbsColumns.append(nbColumns)
+                        if nbColumns != 1:
+                            sys.exit("Number of columns changed around line '%s' of file '%s'! Aborting..." % (point, inputFileName))
+                    else:
+                        sys.exit("Do not understand line '%s' of file '%s'! Aborting..." % (point, inputFileName))
+
+        lines.append(line)
+
+    if len(lines) != len(nbsColumns):
+        sys.exit("Something is wrong in the input files! Aborting...")
+
+    if options.shape == "bar":
+        if len(lines) != 1:
+            sys.exit("Error! Bar plot should have exactly one input file! Aborting...")
+        if nbsColumns[0] != 2:
+            sys.exit("Error! Bar plot input file should have exactly two columns! Aborting...")
+        plotter.addLine(lines[0])
+    elif options.shape == "points":
+        if len(lines) != 2:
+            sys.exit("Error! Points cloud should have exactly two input file! Aborting...")
+        if nbsColumns[0] != 2 or nbsColumns[1] != 2:
+            sys.exit("Error! Points cloud plot input file should have exactly two columns! Aborting...")
+        plotter.addLine(mergedData(lines[0], lines[1]))
+    elif options.shape == "heatPoints":
+        if len(lines) != 3:
+            sys.exit("Error! Heat points cloud should have exactly three input file! Aborting...")
+        plotter.addLine(mergeData(lines[0], lines[1]))
+        plotter.addHeatLine(lines[2])
+    elif options.shape == "line":
+        for i in range(0, len(lines)):
+            if (nbsColumns[i] != 2):
+                sys.exit("Error! Curve plot input file should have exactly two columns! Aborting...")
+            plotter.addLine(lines[i])
+    else:
+        sys.exit("Do not understand shape '%s'" % (options.shape))
+
+
+    plotter.plot()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/plotGenomeCoverage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/plotGenomeCoverage.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,132 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.Utils import *
+
+
+class GetGenomeCoverage(object):
+
+    def __init__(self, verbosity = 1):
+        self.verbosity       = verbosity
+        self.inputContainer  = None
+        self.referenceParser = None
+        self.outputFileName  = None
+        self.genomeSize      = None
+        self.coverage        = {}
+        self.distribution    = {}
+
+
+    def setInputFile(self, fileName, format):
+        self.inputContainer = TranscriptContainer(fileName, format, self.verbosity)
+
+
+    def setOutputFile(self, fileName):
+        self.outputFileName = fileName
+
+
+    def setReference(self, fileName):
+        self.referenceParser = FastaParser(fileName, self.verbosity)
+
+
+    def getReferenceSizes(self):
+        self.genomeSize = 0
+        for chromosome in self.referenceParser.getRegions():
+            self.genomeSize += self.referenceParser.getSizeOfRegion(chromosome)
+
+
+    def getCoverage(self):
+        progress = Progress(self.inputContainer.getNbTranscripts(), "Reading reads", self.verbosity)
+        for transcript in self.inputContainer.getIterator():
+            chromosome = transcript.getChromosome()
+            if chromosome not in self.coverage:
+                self.coverage[chromosome] = {}
+            for exon in transcript.getExons():
+                for pos in range(exon.getStart(), exon.getEnd() + 1):
+                    if pos not in self.coverage[chromosome]:
+                        self.coverage[chromosome][pos] = 1
+                    else:
+                        self.coverage[chromosome][pos] += 1
+            progress.inc()
+        progress.done()
+
+
+    def getDistribution(self):
+        nbNucleotides = sum([len(self.coverage[chromosome].keys()) for chromosome in self.coverage])
+        progress      = Progress(nbNucleotides, "Building distribution", self.verbosity)
+        for chromosome in self.coverage:
+            for num in self.coverage[chromosome].values():
+                if num not in self.distribution:
+                    self.distribution[num] = 1
+                else:
+                    self.distribution[num] += 1
+                progress.inc()
+        progress.done()
+        self.distribution[0] = self.genomeSize - nbNucleotides
+
+
+    def plotDistribution(self):
+        plotter = RPlotter(self.outputFileName, self.verbosity)
+        plotter.setFill(0)
+        plotter.addLine(self.distribution)
+        plotter.plot()
+        print "min/avg/med/max reads per base: %d/%.2f/%.1f/%d" % getMinAvgMedMax(self.distribution)
+
+
+    def run(self):
+        self.getReferenceSizes()
+        self.getCoverage()
+        self.getDistribution()
+        self.plotDistribution()
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Plot Genome Coverage v1.0.1: Get the coverage of a genome. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="reads file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-r", "--reference", dest="reference",      action="store",               type="string", help="sequences file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in PNG format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    getGenomeCoverage = GetGenomeCoverage(options.verbosity)
+    getGenomeCoverage.setInputFile(options.inputFileName, options.format)
+    getGenomeCoverage.setOutputFile(options.outputFileName)
+    getGenomeCoverage.setReference(options.reference)
+    getGenomeCoverage.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/plotRepartition.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/plotRepartition.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,128 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Plot the data from the data files
+"""
+import os
+from optparse import OptionParser
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Plot Repartition v1.0.1: Plot the repartition of different data on a whole genome. (This tool uses 1 input file only, the different values being stored in the tags.    See documentation to know more about it.) [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                           type="string", help="input file name [compulsory] [format: file in GFF3 format]")
+    parser.add_option("-n", "--names",     dest="names",          action="store",      default=None,        type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                           type="string", help="output file [compulsory] [format: output file in PNG format]")
+    parser.add_option("-c", "--color",     dest="colors",         action="store",      default=None,        type="string", help="color of the lines (separated by commas and no space) [format: string]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",      default="png",       type="string", help="format of the output file [format: string] [default: png]")
+    parser.add_option("-r", "--normalize", dest="normalize",      action="store_true", default=False,                      help="normalize data (when panels are different) [format: bool] [default: false]")
+    parser.add_option("-l", "--log",       dest="log",            action="store",      default="",          type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,           type="int",    help="trace level [format: int]")
+    parser.add_option("-D", "--directory", dest="working_Dir",    action="store",      default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")
+    (options, args) = parser.parse_args()
+
+    strands        = [1, -1]
+    strandToString = {1: "+", -1: "-"}
+    names          = [None] if options.names == None else options.names.split(",")
+    maxs           = {}
+    nbElements     = [0 for name in names]
+    lines          = [{} for i in range(len(names))]
+    if options.colors == None:
+        colors = [None for i in range(len(names))]
+    else:
+        colors = options.colors.split(",")
+
+    parser = GffParser(options.inputFileName, options.verbosity)
+    progress = Progress(parser.getNbTranscripts(), "Reading %s" % (options.inputFileName), options.verbosity)
+    for transcript in parser.getIterator():
+        chromosome = transcript.getChromosome()
+        direction  = transcript.getDirection()
+        start      = transcript.getStart()
+        for i, name in enumerate(names):
+            if chromosome not in lines[i]:
+                lines[i][chromosome] = dict([(strand, {}) for strand in strands])
+            if chromosome not in maxs:
+                maxs[chromosome] = transcript.getStart()
+            else:
+                maxs[chromosome] = max(maxs[chromosome], start)
+            if start not in lines[i][chromosome][direction]:
+                lines[i][chromosome][direction][start] = 0
+            thisNbElements                          = float(transcript.getTagValue(name)) if name != None and name in transcript.getTagNames() else 1
+            lines[i][chromosome][direction][start] += thisNbElements * direction
+            nbElements[i]                          += thisNbElements
+        progress.inc()
+    progress.done()
+
+    if options.normalize:
+        if options.verbosity >= 10:
+            print "Normalizing..."
+        for i, linesPerCondition in enumerate(lines):
+            for linesPerChromosome in linesPerCondition.values():
+                for line in linesPerChromosome.values():
+                    for key, value in line.iteritems():
+                        line[key] = value / float(nbElements[i]) * max(nbElements)
+    if options.verbosity >= 10:
+        print "... done."
+
+    progress = Progress(len(maxs.keys()), "Plotting", options.verbosity)
+    for chromosome in maxs:
+        plot = RPlotter("%s%s.%s" % (options.outputFileName, chromosome.capitalize(), options.format), options.verbosity)
+        plot.setLog(options.log)
+        plot.setImageSize(2000, 500)
+        plot.setFormat(options.format)
+        if maxs[chromosome] <= 1000:
+            unit    = "nt."
+            ratio = 1.0
+        elif maxs[chromosome] <= 1000000:
+            unit    = "kb"
+            ratio = 1000.0
+        else:
+            unit    = "Mb"
+            ratio = 1000000.0
+        plot.setXLabel("Position on %s (in %s)" % (chromosome.replace("_", " "), unit))
+        plot.setYLabel("# reads")
+        plot.setLegend(True)
+        for i, name in enumerate(names):
+            for strand in strands:
+                correctedLine = dict([(key / ratio, value) for key, value in lines[i][chromosome][strand].iteritems()])
+                if name != None:
+                    name = "%s (%s)" % (name.replace("_", " "), strandToString[strand])
+                plot.addLine(correctedLine, None, colors[i])
+        plot.plot()
+        progress.inc()
+    progress.done()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/plotTranscriptList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/plotTranscriptList.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,255 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+"""\n+Plot the data from the data files\n+"""\n+import sys\n+import math\n+from optparse import OptionParser\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.misc.RPlotter import RPlotter\n+\n+\n+class PlotTranscriptList(object):\n+\n+ def __init__(self, verbosity = 0):\n+ self.inputFileName = None\n+ self.format = None\n+ self.x = None\n+ self.y = None\n+ self.z = None\n+ self.xDefault = None\n+ self.yDefault = None\n+ self.zDefault = None\n+ self.xLabel = None\n+ self.yLabel = None\n+ self.shape = None\n+ self.bucket = None\n+ self.keep = None\n+ self.log = None\n+ self.verbosity = None\n+\n+\n+ def setPlotter(self, outputFileName, keep, log, xLabel, yLabel):\n+ self.plotter = RPlotter(outputFileName, self.verbosity, keep)\n+ if self.shape != "barplot":\n+ self.plotter.setLog(log)\n+ self.plotter.setXLabel(xLabel)\n+ self.plotter.setYLabel(yLabel)\n+\n+\n+ def setShape(self, shape):\n+ if self.shape == "line":\n+ pass\n+ elif shape == "barplot":\n+ self.plotter.setBarplot(True)\n+ elif shape == "points":\n+ self.plotter.setPoints(True)\n+ elif shape == "heatPoints":\n+ self.plotter.setHeatPoints(True)\n+ else:\n+ sys.exit("Do not understand shape \'%s\'" % (shape))\n+\n+\n+ def setInput(self, inputFileName, format):\n+ self.parser = TranscriptContainer(inputFileName, format, self.verbosity)\n+\n+\n+ def getValues(self, transcript):\n+ x, y, z = None, None, None\n+ x = transcript.getTagValue(self.x)\n+ if self.y != None:\n+ y = transcript.getTagValue(self.y)\n+ if self.z != None:\n+ z = transcript.getTagValue(self.z)\n+ if x == None:\n+ if self.xDefault != None:\n+ x = self.xDefault\n+ else:\n+ sys.exit("Error! Transcript %s do not have the x-tag %s" % (transcript, self.x))\n+ if y == None and self.shape != "line" and self.shape != "barplot":\n+ if self.yDefault != None:\n+ y = s'..b'line = self.clusterInBarplot(line)\n+\n+ if self.shape == "points" or self.shape == "barplot" or self.shape == "line":\n+ self.plotter.addLine(line)\n+ elif self.shape == "heatPoints":\n+ self.plotter.addLine(line)\n+ self.plotter.addHeatLine(heatLine)\n+ else:\n+ sys.exit("Do not understand shape \'%s\'" % (self.shape))\n+\n+ self.plotter.plot()\n+\n+ if self.shape == "points" or self.shape == "heatPoints":\n+ self.getSpearmanRho()\n+\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Plot v1.0.2: Plot some information from a list of transcripts. [Category: Visualization]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input",dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n+ parser.add_option("-f", "--format",dest="format", action="store",type="string", help="format of the input [compulsory] [format: transcript file format]")\n+ parser.add_option("-x", "--x",dest="x",action="store", type="string", help="tag for the x value [format: string]")\n+ parser.add_option("-y", "--y",dest="y",action="store", type="string", help="tag for the y value [format: string]")\n+ parser.add_option("-z", "--z",dest="z", action="store", default=None,type="string", help="tag for the z value [format: string]")\n+ parser.add_option("-X", "--xDefault",dest="xDefault",action="store", default=None,type="float",help="value for x when tag is not present [format: float]")\n+ parser.add_option("-Y", "--yDefault",dest="yDefault",action="store",default=None,type="float",help="value for y when tag is not present [format: float]")\n+ parser.add_option("-Z", "--zDefault",dest="zDefault", action="store",default=None,type="float",help="value for z when tag is not present [format: float]")\n+ parser.add_option("-n", "--xLabel",dest="xLabel",action="store",default="",type="string", help="label on the x-axis [format: string] [default: ]")\n+ parser.add_option("-m", "--yLabel",dest="yLabel",action="store",default="", type="string", help="label on the y-axis [format: string] [default: ]")\n+ parser.add_option("-o", "--output",dest="outputFileName",action="store",type="string", help="output file names [format: output file in PNG format]")\n+ parser.add_option("-s", "--shape",dest="shape",action="store", type="string", help="shape of the plot [format: choice (barplot, line, points, heatPoints)]")\n+ parser.add_option("-b", "--bucket",dest="bucket",action="store",default=None,type="float",help="bucket size (for the line plot) [format: int] [default: 1]")\n+ parser.add_option("-k", "--keep",dest="keep",action="store_true", default=False, help="keep temporary files [format: bool]")\n+ parser.add_option("-l", "--log",dest="log",action="store",default="",type="string", help="use log on x- or y-axis (write \'x\', \'y\' or \'xy\') [format: string] [default: ]")\n+ parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1, type="int",help="trace level [format: int]")\n+ (options, args) = parser.parse_args()\n+\n+ plotTranscriptList = PlotTranscriptList(options.verbosity)\n+ plotTranscriptList.x = options.x\n+ plotTranscriptList.y = options.y\n+ plotTranscriptList.z = options.z\n+ plotTranscriptList.xDefault = options.xDefault\n+ plotTranscriptList.yDefault = options.yDefault\n+ plotTranscriptList.zDefault = options.zDefault\n+ plotTranscriptList.shape = options.shape\n+ plotTranscriptList.bucket = options.bucket\n+ plotTranscriptList.log = options.log\n+ plotTranscriptList.setPlotter(options.outputFileName, options.keep, options.log, options.xLabel, options.yLabel)\n+ plotTranscriptList.setShape(options.shape)\n+ plotTranscriptList.setInput(options.inputFileName, options.format)\n+ plotTranscriptList.run()\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/qualToFastq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/qualToFastq.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,87 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from optparse import OptionParser
+from commons.core.parsing.SequenceListParser import SequenceListParser
+from SMART.Java.Python.misc.Progress import Progress
+
+"""
+Transform qual and fasta files to a single fastq file
+"""
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Qual To FastQ v1.0.2: Convert a file in FASTA/Qual format to FastQ format. [Category: Conversion]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-f", "--fasta",     dest="fastaFileName",  action="store",               type="string", help="input fasta file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-q", "--qual",      dest="qualFileName",   action="store",               type="string", help="input qual file [compulsory] [format: file in TXT format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store", default=None, type="string", help="output file [compulsory] [format: output file in FASTQ format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    fastaFile = open(options.fastaFileName)
+    qualFile  = open(options.qualFileName)
+    fastqFile = open(options.outputFileName, "w")
+
+    fastaLine = fastaFile.readline().strip()
+    qualLine  = qualFile.readline().strip()
+    header    = None
+    cpt       = 0
+    while fastaLine:
+        if not qualLine:
+            raise Exception("Qual file is shorter!")
+        if fastaLine[0] == ">":
+            header = fastaLine[1:]
+            if qualLine[0] != ">":
+                raise Exception("Discrepencies around %s!" % (header))
+            fastqFile.write("@%s\n" % (header))
+        else:
+            if qualLine[0] == ">":
+                raise Exception("Discrepencies around %s!" % (qualLine[1:]))
+            intQualities = qualLine.split()
+            if len(intQualities) != len(fastaLine):
+                raise Exception("Sizes of read and quality diverge in %s!" % (header))
+            chrQualities = [chr(min(int(quality), 93) + 33) for quality in intQualities]
+            fastqFile.write("%s\n+\n%s\n" % (fastaLine, "".join(chrQualities)))
+        fastaLine = fastaFile.readline().strip()
+        qualLine  = qualFile.readline().strip()
+        if cpt % 1000 == 0 and options.verbosity > 1:
+            sys.stdout.write("%d lines read\r" % (cpt))
+            sys.stdout.flush()
+        cpt += 1
+    if options.verbosity > 0:
+        print "%d lines read" % (cpt)
+
+    if qualLine:
+        raise Exception("Qual file is longer!")
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/removeAllTmpTables.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/removeAllTmpTables.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,64 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Remove all tmp tables in the MySQL database"""
+
+import os
+import glob
+from optparse import OptionParser
+from SMART.Java.Python.mySql.MySqlConnection import *
+
+
+if __name__ == "__main__":
+
+    description = "Remove Tables v1.0.2: Remove tables in the local MySQL database. [Category: Other]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-t", "--tmp",     dest="tmp",     action="store_true",    default=False, help="Remove temporary tables only [format: bool] [default: false]")
+    parser.add_option("-f", "--files", dest="files", action="store_false", default=True,    help="Do not remove temporary files [format: bool] [default: true]")
+    (options, args) = parser.parse_args()
+
+    print "Removing temporary databases:"
+    if options.files:
+        for tmpFile in glob.glob("smartdb*"):
+            print "    removing %s" % (tmpFile)
+            os.unlink(tmpFile)
+    print "Removing temporary files:"
+    if options.files:
+        for tmpFile in glob.glob("tmp*.dat"):
+            print "    removing %s" % (tmpFile)
+            os.unlink(tmpFile)
+        for tmpFile in glob.glob("tmp*.R"):
+            print "    removing %s" % (tmpFile)
+            os.unlink(tmpFile)
+        for tmpFile in glob.glob("tmp*.Rout"):
+            print "    removing %s" % (tmpFile)
+            os.unlink(tmpFile)
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/removeEmptySequences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/removeEmptySequences.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,135 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Remove empty sequences from a FASTA or FASTQ file
+"""
+
+import os, random
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import *
+from commons.core.parsing.FastqParser import *
+from commons.core.writer.FastaWriter import *
+from commons.core.writer.FastqWriter import *
+from SMART.Java.Python.misc.Progress import *
+
+
+class EmptySequenceRemover(object):
+
+    def __init__(self, verbosity = 1):
+        self.verbosity            = verbosity
+        self.inputFileName    = None
+        self.parser                 = None
+        self.format                 = None
+        self.writer                 = None
+        self.forbiddenNames = {}
+        self.removedNames     = {}
+
+
+    def setInputFileName(self, fileName, format):
+        self.inputFileName = fileName
+        self.format                = format
+        if options.format == "fasta":
+            self.parser = FastaParser(self.inputFileName, self.verbosity)
+        elif options.format == "fastq":
+            self.parser = FastqParser(self.inputFileName, self.verbosity)
+        else:
+            sys.exit("Do not understand '%s' file format." % (self.format))
+
+
+    def setOutputFileName(self, fileName):
+        if options.format == "fasta":
+            self.writer = FastaWriter("%s.mfa" % (fileName), self.verbosity)
+        elif options.format == "fastq":
+            self.writer = FastqWriter("%s.mfq" % (fileName), self.verbosity)
+
+
+    def parse(self):
+        progress = Progress(self.parser.getNbSequences(), "Reading sequences in %s" % (options.inputFileName), options.verbosity)
+        for sequence in self.parser.getIterator():
+            name = sequence.name.split("/")[0]
+            if name not in self.forbiddenNames:
+                if sequence.sequence == "":
+                    self.removedNames[name] = 1
+                else:
+                    self.writer.addSequence(sequence)
+            progress.inc()
+        progress.done()
+        self.writer.write()
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Remove Empty Sequences v1.0.2: Remove all the empty sequences in a list. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",         dest="inputFileName",     action="store",                                         type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",        dest="format",                    action="store",                                         type="string", help="format of the input file [compulsory] [format: sequence file format]")
+    parser.add_option("-j", "--input2",        dest="inputFileName2",    action="store",                                         type="string", help="input file 2 (in case of pair end reads) [format: file in sequence format given by -f] [default: None]")
+    parser.add_option("-o", "--output",        dest="outputFileName",    action="store",            default=None,    type="string", help="output file [compulsory] [format: output file in format given by -f]")
+    parser.add_option("-p", "--output2",     dest="outputFileName2", action="store",            default=None,    type="string", help="output file 2 (in case of pair end reads) [format: output file in sequence format given by -f] [default: None]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",             action="store",            default=1,         type="int",        help="trace level [format: int] [default: 1]")
+    parser.add_option("-l", "--log",             dest="log",                         action="store_true", default=False,                                help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.log:
+        logHandle = open("%s.log" % options.outputFileName, "w")
+
+    remover = EmptySequenceRemover(options.verbosity)
+    remover.setInputFileName(options.inputFileName, options.format)
+    remover.setOutputFileName(options.outputFileName)
+    remover.parse()
+    removedNames = remover.removedNames
+    if options.log:
+        for name in removedNames:
+            logHandle.write("Removed '%s' in %s\n" % (name, options.inputFileName))
+    nbSequences = remover.parser.getNbSequences()
+
+    newRemovedNames = {}
+    if options.inputFileName2 != None:
+        remover = EmptySequenceRemover(options.verbosity)
+        remover.setInputFileName(options.inputFileName2, options.format)
+        remover.setOutputFileName(options.outputFileName2)
+        remover.forbiddenNames = removedNames
+        remover.parse()
+        newRemovedNames = remover.removedNames
+        if options.log:
+            for name in newRemovedNames:
+                logHandle.write("Removed '%s' in %s\n" % (name, options.inputFileName2))
+
+        remover = EmptySequenceRemover(options.verbosity)
+        remover.setInputFileName(options.inputFileName, options.format)
+        remover.setOutputFileName(options.outputFileName)
+        remover.forbiddenNames = newRemovedNames
+        remover.parse()
+
+    nbRemoved = len(removedNames.keys()) + len(newRemovedNames.keys())
+    print "%d over %d sequences are empty (%.2f%%)." % (nbRemoved, nbSequences, float(nbRemoved) / nbSequences * 100)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/removeExonLines.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/removeExonLines.sh Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,2 @@
+#!/bin/bash
+sed '/exon/d' $1

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/repetGffConverter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/repetGffConverter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,71 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Convert a GFF with REPET format to BED format"""
+
+import os
+from optparse import OptionParser
+from commons.core.parsing.GffParser import *
+from commons.core.writer.BedWriter import *
+from SMART.Java.Python.misc.Progress import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Repet GFF Convert v1.0.1: Convert REPET-flavored GFF to normal GFF. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",                        dest="inputFileName",    action="store",                                         type="string", help="input file [compulsory] [format: file in GFF3 format]")
+    parser.add_option("-o", "--output",                     dest="outputFileName", action="store",                                         type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity",                dest="verbosity",            action="store",            default=1,         type="int",        help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    parser            = GffParser(options.inputFileName, options.verbosity)
+    transcripts = dict()
+    progress        = Progress(parser.getNbTranscripts(), "Analyzing file %s" % (options.inputFileName), options.verbosity)
+    for transcript in parser.getIterator():
+        if transcript.feature.endswith("range"):
+            transcripts[transcript.name] = transcript
+        elif transcript.feature.endswith("hsp"):
+            if transcript.name in transcripts:
+                transcripts[transcript.name].addExon(transcript)
+            else:
+                sys.exit("Transcript %s is not defined\n" % (transcript.name))
+        else:
+            sys.exit("Do not understand feature %s" % (transcript.feature))
+        progress.inc()
+    progress.done()
+
+    writer = BedWriter(options.outputFileName, options.verbosity)
+    for name in transcripts:
+        writer.addTranscript(transcripts[name])
+
+    print "%d transcripts out of %d written (%.2f%%)" % (len(transcripts.keys()), parser.getNbTranscripts(), float(len(transcripts.keys())) / parser.getNbTranscripts() * 100)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/restrictFromNucleotides.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/restrictFromNucleotides.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,78 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Remove all dirty sequences"""
+
+import os
+import sys
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import *
+from commons.core.writer.FastaWriter import *
+from commons.core.parsing.FastqParser import *
+from commons.core.writer.FastqWriter import *
+from SMART.Java.Python.misc.Progress import *
+from SMART.Java.Python.misc.RPlotter import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Restrict from nucleotide v1.0.1: Remove the sequences with ambiguous nucleotides. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",         dest="inputFileName",    action="store",                                                type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",        dest="inputFileName",    action="store",            default="fasta",    type="string", help="format of the input and output files [compulsory] [format: sequence file format]")
+    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                                                type="string", help="output file [compulsory] [format: output file in sequence format given by -f]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",            action="store",            default=1,                type="int",        help="trace level [format: int]")
+    parser.add_option("-l", "--log",             dest="log",                        action="store_true", default=False,                                     help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    # treat items
+    if options.format == "fasta":
+        parser     = FastaParser(options.inputFileName, options.verbosity)
+        writer     = FastaWriter(options.outputFileName, options.verbosity)
+    elif options.format == "fastq":
+        parser     = FastqParser(options.inputFileName, options.verbosity)
+        writer     = FastqWriter(options.outputFileName, options.verbosity)
+    else:
+        sys.exit("Do not understand '%s' format." % (options.format))
+    nbSequences = parser.getNbSequences()
+    print "sequences: %d" % (nbSequences)
+
+    progress = Progress(nbSequences, "Analyzing sequences of %s" % (options.inputFileName), options.verbosity)
+    nbKept     = 0
+    for sequence in parser.getIterator():
+        if not sequence.containsAmbiguousNucleotides():
+            writer.addSequence(sequence)
+            nbKept += 1
+        progress.inc()
+    progress.done()
+
+    print "%d items, %d kept (%.2f%%)" % (nbSequences, nbKept, float(nbKept) / nbSequences * 100)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/restrictFromSize.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/restrictFromSize.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,94 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Get the size distribution of a Fasta / BED file"""
+
+import os
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import *
+from commons.core.parsing.FastqParser import *
+from SMART.Java.Python.structure.TranscriptContainer import *
+from commons.core.writer.TranscriptWriter import *
+from commons.core.writer.FastaWriter import *
+from commons.core.writer.FastqWriter import *
+from SMART.Java.Python.misc.Progress import *
+from SMART.Java.Python.misc.RPlotter import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Restrict from Size v1.0.1: Select the elements of a list of sequences or transcripts with a given size. [Category: Data Selection]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the input [compulsory] [format: sequence or transcript file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in transcript or sequence format given by -f]")
+    parser.add_option("-m", "--minSize",   dest="minSize",        action="store",      default=None,  type="int",    help="minimum size [format: int]")
+    parser.add_option("-M", "--maxSize",   dest="maxSize",        action="store",      default=None,  type="int",    help="maximum size [format: int]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+    parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.format == "fasta":
+        parser = FastaParser(options.inputFileName, options.verbosity)
+        writer = FastaWriter(options.outputFileName, options.verbosity)
+    elif options.format == "fastq":
+        parser = FastqParser(options.inputFileName, options.verbosity)
+        writer = FastqWriter(options.outputFileName, options.verbosity)
+    else:
+        parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+        writer = TranscriptWriter(options.outputFileName, options.format, options.verbosity)
+
+
+    # treat items
+    nbItems  = parser.getNbItems()
+    progress = Progress(nbItems, "Analyzing sequences of %s" % (options.inputFileName), options.verbosity)
+    nbKept   = 0
+    nbRead   = 0
+    nbClKept = 0
+    nbClRead = 0
+    for item in parser.getIterator():
+        size      = item.getSize()
+        nb        = 1 if options.format in ("fasta", "fastq") or "nbElements" not in item.getTagNames() else float(item.getTagValue("nbElements"))
+        nbRead   += nb
+        nbClRead += 1
+        if (options.minSize == None or options.minSize <= size) and (options.maxSize == None or options.maxSize >= size):
+            writer.addElement(item)
+            nbKept   += nb
+            nbClKept += 1
+        progress.inc()
+    progress.done()
+
+    writer.write()
+
+    print "%d items,    %d kept (%.2f%%)" % (nbRead, nbKept, 0 if nbItems == 0 else float(nbKept) / nbItems * 100)
+    if nbKept != nbClKept or nbRead != nbClRead:
+        print "%d clusters, %d kept (%.2f%%)" % (nbClRead, nbClKept, 0 if nbClRead == 0 else float(nbClKept) / nbClRead * 100)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/restrictSequenceList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/restrictSequenceList.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,113 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Restrict a sequence list with some names"""
+
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.WriterChooser import WriterChooser
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+
+class RestrictSequenceList(object):
+
+    def __init__(self, verbosity):
+        self.verbosity = verbosity
+        self.exclude   = False
+
+    def setInputFileName(self, fileName, format):
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        self.parser = chooser.getParser(fileName)
+
+    def setExclusion(self, boolean):
+        self.exclude = boolean
+
+    def setOutputFileName(self, fileName, format):
+        chooser = WriterChooser(self.verbosity)
+        chooser.findFormat(format)
+        self.writer = chooser.getWriter(fileName)
+
+    def setNamesFileName(self, fileName):
+        self.namesFileName = fileName
+
+    def _readNames(self):
+        self.names = []
+        handle = open(self.namesFileName)
+        for name in handle:
+            self.names.append(name.strip())
+        handle.close()
+
+    def _write(self):
+        nbElements = self.parser.getNbItems()
+        progress   = Progress(nbElements, "Parsing input file", self.verbosity)
+        nbRead     = 0
+        nbWritten  = 0
+        for element in self.parser.getIterator():
+            name    = element.getName()
+            nbRead += 1
+            if Utils.xor(name in self.names, self.exclude):
+                self.writer.addElement(element)
+                nbWritten += 1
+            if name in self.names:
+                self.names.remove(name)
+            progress.inc()
+        progress.done()
+        if self.verbosity > 0:
+            print "%d read" % (nbRead)
+            print "%d written (%d%%)" % (nbWritten, 0 if nbRead == 0 else round(float(nbWritten) / nbRead * 100))
+
+    def run(self):
+        self._readNames()
+        self._write()
+        if self.names:
+            print "Some names are not present in the file: %s" % ", ".join(self.names)
+
+
+
+if __name__ == "__main__":
+
+    description = "Restrict Sequence List v1.0.1: Keep the elements of a list of sequences whose name is mentionned in a given file. [Category: Data Selection]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFile",  action="store",                       type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",     action="store",      default="fasta", type="string", help="format of the input and output files [compulsory] [format: sequence file format] [default: fasta]")
+    parser.add_option("-n", "--name",      dest="names",      action="store",                       type="string", help="names of the transcripts [compulsory] [format: file in TXT format]")
+    parser.add_option("-o", "--output",    dest="outputFile", action="store",                       type="string", help="output file [format: output file in sequence format given by -f]")
+    parser.add_option("-x", "--exclude",   dest="exclude",    action="store_true", default=False,                  help="output all those whose name is NOT on the list [format: boolean]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",  action="store",      default=1,       type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    rsl = RestrictSequenceList(options.verbosity)
+    rsl.setInputFileName(options.inputFile, options.format)
+    rsl.setOutputFileName(options.outputFile, options.format)
+    rsl.setNamesFileName(options.names)
+    rsl.setExclusion(options.exclude)
+    rsl.run()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/restrictTranscriptList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/restrictTranscriptList.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,85 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Restrict a transcript list with some parameters (regions)"""
+
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+STRAND2DIRECTION = {"+": 1, "-": -1, None: None}
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Restrict Transcript List v1.0.2: Keep the coordinates which are located in a given position. [Category: Data Selection]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",                             type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",     dest="format",         action="store",                             type="string", help="format [compulsory] [format: transcript file format]")
+    parser.add_option("-c", "--chromosome", dest="chromosome",     action="store",            default=None,    type="string", help="chromosome [format: string]")
+    parser.add_option("-s", "--start",      dest="start",          action="store",            default=None,    type="int",    help="start [format: int]")
+    parser.add_option("-e", "--end",        dest="end",            action="store",            default=None,    type="int",    help="end [format: int]")
+    parser.add_option("-t", "--strand",     dest="strand",         action="store",            default=None,    type="string", help="strand (+ or -) [format: string]")
+    parser.add_option("-o", "--output",     dest="outputFileName", action="store",                             type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store",            default=1,       type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+    writer = TranscriptWriter(options.outputFileName, options.format, options.verbosity)
+
+    direction = STRAND2DIRECTION[options.strand]
+
+    nbTranscripts = parser.getNbTranscripts()
+    progress      = Progress(nbTranscripts, "Parsing file %s" % (options.inputFileName), options.verbosity)
+
+    nbTotal = 0
+    nbKept    = 0
+    for transcript in parser.getIterator():
+        progress.inc()
+        nbTotal += 1
+        if options.chromosome != None and options.chromosome != transcript.getChromosome():
+            continue
+        if options.start != None and options.start > transcript.getEnd():
+            continue
+        if options.end != None and options.end < transcript.getStart():
+            continue
+        if options.end != None and options.end < transcript.getStart():
+            continue
+        if direction != None and direction != transcript.getDirection():
+            continue
+        nbKept += 1
+        writer.addTranscript(transcript)
+    progress.done()
+
+    writer.write()
+
+    print "%d out of %d are kept (%f%%)" % (nbKept, nbTotal, (float(nbKept) / nbTotal * 100))

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/runRandomJobs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/runRandomJobs.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,46 @@
+import unittest
+import os
+import time
+from optparse import OptionParser
+from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample
+from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
+
+if __name__ == '__main__':
+    description = "runRandomJobs: create random ref/query files (with size given), and run the jobs on cluster with help of runJobs.sh"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--inputRef", dest="inputRefGff3FileName", action="store", type="string", help="Reference input file [compulsory] [format: file in gff3 format]")
+    parser.add_option("-j", "--inputQuery", dest="inputQueryGff3FileName", action="store", type="string", help="Query input file [compulsory] [format: file in gff3 format]")
+    parser.add_option("-m", "--inputRefSize", dest="numberOfRefReads", action="store", type="int", help="The number of Reference")
+    parser.add_option("-n", "--inputQuerySize", dest="numberOfQReads", action="store", type="int", help="The number of Query")
+    parser.add_option("-o", "--output", dest="outputGff3FileName", action="store", type="string", help="output file [compulsory] [format: output file in gff3 format]")
+    (options, args) = parser.parse_args()
+
+    outputDataName = 'timeResult.dat'
+    fTime = open(outputDataName, 'w')
+    fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')
+    chromSize = 100000
+    print 'ref size = %d,  query size = %d' %(options.numberOfRefReads, options.numberOfQReads)
+    iMFOR_ref = MockFindOverlaps_randomExample(options.inputRefGff3FileName, 'ref', options.numberOfRefReads, chromSize)
+    iMFOR_ref.write()
+    cmd_ref = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputRefGff3FileName, options.inputRefGff3FileName)
+    os.system(cmd_ref)
+    iMFOR_query = MockFindOverlaps_randomExample(options.inputQueryGff3FileName,'q', options.numberOfQReads, chromSize)
+    iMFOR_query.write()
+    cmd_query = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputQueryGff3FileName, options.inputQueryGff3FileName)
+    os.system(cmd_query)
+    iFOO = FindOverlaps_optim(options.inputRefGff3FileName, options.inputQueryGff3FileName)
+    iFOO.setOutputGff3FileName(options.outputGff3FileName)
+
+    startTime_optim = time.time()
+    iFOO.run()
+    iFOO.close()
+    nbOverlap = iFOO.getNbOverlap()
+    endTime_optim = time.time()
+    cmd = 'sort -f -n -k4 -k5.4rn -k9.5 -t ";" -o %s %s' % (options.outputGff3FileName, options.outputGff3FileName)
+    os.system(cmd)
+    totalTime_optim = endTime_optim - startTime_optim
+    print 'we take %s second.' % (totalTime_optim)
+    fTime.write('%d\t%d\t%d\t%.2f\n'%(options.numberOfRefReads, options.numberOfQReads, nbOverlap, totalTime_optim))
+    iFOO.deletIntermediateFiles()
+    fTime.close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/selectByNbOccurrences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/selectByNbOccurrences.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,89 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Select the transcript that have not more that a given number of occurrences"""
+
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import *
+from commons.core.writer.Gff3Writer import *
+from SMART.Java.Python.misc.Progress import *
+from SMART.Java.Python.misc.RPlotter import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Select by # of Occurrences v1.0.1: Keep the reads which have mapped less than a given number of times. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",             dest="inputFileName",    action="store",                                                type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",            dest="format",                 action="store",                                                type="string", help="format of the input [compulsory] [format: transcript file format]")
+    parser.add_option("-n", "--occurrences", dest="occurrences",        action="store",            default=1,                type="int",        help="maximum number of occurrences allowed [format: int] [default: 1]")
+    parser.add_option("-o", "--output",            dest="outputFileName", action="store",                                                type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-y", "--mysql",             dest="mysql",                    action="store_true", default=False,                                     help="mySQL output [format: bool] [default: false]")
+    parser.add_option("-v", "--verbosity",     dest="verbosity",            action="store",            default=1,                type="int",        help="trace level [format: int] [default: 1]")
+    parser.add_option("-l", "--log",                 dest="log",                        action="store_true", default=False,                                     help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+
+    # get occurrences of the transcripts
+    names        = dict()
+    progress = Progress(parser.getNbTranscripts(), "Reading names of %s" % (options.inputFileName), options.verbosity)
+    for transcript in parser.getIterator():
+        name = transcript.name
+        if name not in names:
+            names[name] = 1
+        else:
+            names[name] += 1
+        progress.inc()
+    progress.done()
+
+    # write output file
+    nbWritten = 0
+    writer        = Gff3Writer(options.outputFileName, options.verbosity)
+    if options.mysql:
+        mysqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
+    progress    = Progress(parser.getNbTranscripts(), "Writing transcripts", options.verbosity)
+    for transcript in parser.getIterator():
+        name = transcript.name
+        if names[name] <= options.occurrences:
+            nbWritten += 1
+            writer.addTranscript(transcript)
+            if options.mysql:
+                mysqlWriter.addTranscript(transcript)
+        progress.inc()
+    progress.done()
+
+    if options.mysql:
+        mysqlWriter.write()
+    print "%d input" % (parser.getNbTranscripts())
+    print "%d output (%.2f%%)" % (nbWritten, float(nbWritten) / parser.getNbTranscripts() * 100)

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/sequenceListSplitter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/sequenceListSplitter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,73 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Split a FASTA file into several shorter ones"""
+
+from optparse import OptionParser
+from commons.core.parsing.SequenceListParser import *
+from commons.core.writer.FastaWriter import *
+from SMART.Java.Python.misc.Progress import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Sequence List Splitter v1.0.1: Split a list of big sequences into small chunks. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",   action="store",                         type="string", help="input file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output",    dest="outputFileNames", action="store",                         type="string", help="output files [compulsory] [format: output file in FASTA format]")
+    parser.add_option("-n", "--number",    dest="number",          action="store",      default=10,        type="int",    help="number of splits [compulsory] [format: int] [default: 10]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")
+    parser.add_option("-l", "--log",       dest="log",             action="store_true", default=False,                    help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.log:
+        logHandle = open(options.outputFileNames + ".log", "w")
+
+    # split file
+    sequenceListParser = SequenceListParser(options.inputFileName, options.verbosity)
+    nbSequences                = sequenceListParser.getNbSequences()
+    nbSequencesByFile    = math.ceil(nbSequences / options.number)
+
+    # write into files
+    currentFileNumber = 1
+    writer            = FastaWriter("%s%i.fasta" % (options.outputFileNames, currentFileNumber), options.verbosity)
+    nbSequencesHere   = 0
+    progress          = Progress(nbSequences, "Writing files", options.verbosity)
+    for sequence in sequenceListParser.getIterator():
+        writer.addSequence(sequence)
+        nbSequencesHere += 1
+        if nbSequencesHere == nbSequencesByFile:
+            currentFileNumber += 1
+            writer             = FastaWriter("%s%i.fasta" % (options.outputFileNames, currentFileNumber), options.verbosity)
+            nbSequencesHere    = 0
+        progress.inc()
+    progress.done()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/splitByTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/splitByTag.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,68 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Read a file and split it into several, depending on a tag"""
+
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import *
+from commons.core.writer.Gff3Writer import *
+from SMART.Java.Python.misc.Progress import *
+from SMART.Java.Python.misc import Utils
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Split By Tag v1.0.1: Read a file and split it into several, depending on a tag. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",                     dest="inputFileName",         action="store",                                        type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",                    dest="format",                        action="store",                                        type="string", help="format of file 1 [compulsory] [format: transcript file format]")
+    parser.add_option("-t", "--tag",                         dest="tag",                             action="store",                                        type="string", help="tag on which the split is made [compulsory] [format: string]")
+    parser.add_option("-o", "--output",                    dest="outputFileName",        action="store",                                        type="string", help="output file [format: output file in CSV format]")
+    parser.add_option("-v", "--verbosity",             dest="verbosity",                 action="store",            default=1,        type="int",        help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
+    writers                         = dict()
+
+    progress = Progress(transcriptContainer.getNbTranscripts(), "Reading file %s" % (options.inputFileName), options.verbosity)
+    for transcript in transcriptContainer.getIterator():
+        value = transcript.getTagValue(options.tag)
+        if value == None:
+            value = "noTag"
+        value = str(value).replace(" ", "_").title()
+        if value not in writers:
+            writers[value] = Gff3Writer("%s.gff3" % (os.path.join(options.outputFileName, value)))
+        writers[value].addTranscript(transcript)
+
+        progress.inc()
+    progress.done()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/splitMultiFasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/splitMultiFasta.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,64 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Split a Multi-Fasta file to several Fasta files"""
+
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import *
+from commons.core.writer.Gff3Writer import *
+from SMART.Java.Python.misc.Progress import *
+from SMART.Java.Python.misc import Utils
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Split Multi-Fasta v1.0.1: Split a Multi-Fasta file to several Fasta files. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",                     dest="inputFileName",         action="store",                                        type="string", help="input file 1 [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output",                    dest="outputFileName",        action="store",                                        type="string", help="output file [format: output file in FASTA format]")
+    (options, args) = parser.parse_args()
+
+    inputHandle    = open(options.inputFileName)
+    outputHandle = None
+
+    for line in inputHandle:
+        line = line.strip()
+        if line[0] == ">":
+            if outputHandle != None:
+                outputHandle.close()
+            name = line[1:].split(" ")[0]
+            outputHandle = open("%s%s.fasta" % (options.outputFileName, name), "w")
+        outputHandle.write("%s\n" % (line))
+
+    inputHandle.close()
+    outputHandle.close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Bins.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/Bins.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,77 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Some functions about bins
+"""
+
+def getMinBin():
+    return 3
+
+
+def getMaxBin():
+    return 7
+
+
+def getBin(start, end):
+    for i in range(getMinBin(), getMaxBin() + 1):
+        binLevel = 10 ** i
+        if int(start / binLevel) == int(end / binLevel):
+            return int(i * 10 ** (getMaxBin() + 1) + int(start / binLevel))
+    return int((getMaxBin() + 1) * 10 ** (getMaxBin() + 1))
+
+
+def getOverlappingBins(start, end):
+    array  = []
+    bigBin = int((getMaxBin() + 1) * 10 ** (getMaxBin() + 1))
+    for i in range(getMinBin(), getMaxBin() + 1):
+        binLevel = 10 ** i
+        array.append((int(i * 10 ** (getMaxBin() + 1) + int(start / binLevel)), int(i * 10 ** (getMaxBin() + 1) + int(end / binLevel))))
+    array.append((bigBin, bigBin))
+    return array
+
+
+def getIterator(maxValue = None):
+    if maxValue == None:
+        maxValue = 10 ** (getMaxBin() + getMinBin()) - 1
+    for i in range(getMinBin(), getMaxBin() + 1):
+        binLevel = 10 ** i
+        binBit   = i * 10 ** (getMaxBin() + 1)
+        for j in range(0, maxValue / binLevel+1):
+            yield binBit + j
+    yield int((getMaxBin() + 1) * 10 ** (getMaxBin() + 1))
+
+
+def getNbBins(maxValue = None):
+    if maxValue == None:
+        maxValue = 10 ** (getMaxBin() + getMinBin()) - 1
+    nbBins = 0
+    for i in range(getMinBin(), getMaxBin() + 1):
+        nbBins += maxValue / 10 ** i
+    return nbBins + 1

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Bins.pyc

Binary file SMART/Java/Python/structure/Bins.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Interval.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/Interval.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,707 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+\n+from SMART.Java.Python.structure.Bins import *\n+from commons.core.coord.Range import Range\n+\n+class Interval(Range):\n+ """\n+ Store a genomic interval\n+ @ivar name: name of the interval [optional]\n+ @type name: string\n+ @ivar id: id of the interval [optional]\n+ @type id: int\n+ @ivar bin: bin in which the interval should be if stored in a database [computed]\n+ @type bin: int \n+ @ival tags: information about the transcript [optional]\n+ @type tags: dict\n+ @ivar verbosity: verbosity\n+ @type verbosity: int [default: 0]\n+ """\n+\n+ def __init__(self, interval = None, verbosity = 0):\n+ """\n+ Constructor\n+ @param interval: interval to be copied\n+ @type interval: class L{Interval<Interval>}\n+ @param verbosity: verbosity\n+ @type verbosity: int\n+ """\n+ Range.__init__(self)\n+ self.name = None\n+ self.id = None\n+ self.bin = None\n+ self.verbosity = verbosity\n+ self.tags = {}\n+ if interval != None:\n+ self.copy(interval)\n+\n+ #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!#\n+ #In case strand = "+", start < end; strand = "-", start > end \n+ def getStart(self):\n+ if self.start == -1:\n+ return -1\n+ if self.end == -1:\n+ return self.start\n+ return self.getMin()\n+\n+ \n+ def getEnd(self):\n+ if self.end == -1:\n+ return -1\n+ if self.start == -1:\n+ return self.end\n+ return self.getMax()\n+\n+\n+ def getChromosome(self):\n+ return self.getSeqname()\n+\n+\n+ def getDirection(self):\n+ return 1 if self.getStrand() == "+" else -1\n+\n+\n+ def getName(self):\n+ return self.name\n+\n+\n+ def isSet(self):\n+ """\n+ Check if the interval is set\n+ """\n+ return self.getStart() == None and self.getEnd() == None\n+\n+\n+ def copy(self, interval):\n+ """\n+ Copy method\n+ @param interval: interval to be copied\n+ @type interval: class L{Interval<Interval>}\n+ """\n+ self.setStart(interval.getStart())\n+ self.setEnd(interval.getEnd())\n+ self.setChromosome(interval.getChromosome())\n+ self.setDirection(interval.getDirection()'..b'+ variables = ["name", "chromosome", "start", "end", "direction", "tags", "bin"]\n+ return variables\n+ getSqlVariables = classmethod(getSqlVariables)\n+\n+\n+ def setSqlValues(self, array):\n+ """\n+ Set the values of the properties of this object as given by a results line of a SQL query\n+ """\n+ self.id = array[0]\n+ self.name = array[1].strip("\'")\n+ self.setChromosome(array[2].strip("\'"))\n+ self.setStart(array[3])\n+ self.setEnd(array[4])\n+ self.setDirection(array[5])\n+ self.setTagValues(array[6].strip("\'"), ";", "=")\n+ self.bin = array[7]\n+\n+\n+ def getSqlValues(self):\n+ """\n+ Get the values of the properties that should be saved in a database\n+ """\n+ values = dict()\n+ values["name"] = self.name\n+ values["chromosome"] = self.getChromosome()\n+ values["start"] = self.getStart()\n+ values["end"] = self.getEnd()\n+ values["direction"] = self.getDirection()\n+ values["tags"] = self.getTagValues(";", "=")\n+ values["bin"] = self.getBin()\n+ return values\n+\n+\n+ def getSqlTypes(cls):\n+ """\n+ Get the values of the properties that should be saved in a database\n+ """\n+ types = dict()\n+ types["name"] = "varchar"\n+ types["chromosome"] = "varchar"\n+ types["start"] = "int"\n+ types["end"] = "int"\n+ types["direction"] = "tinyint"\n+ types["tags"] = "varchar"\n+ types["bin"] = "int"\n+ return types\n+ getSqlTypes = classmethod(getSqlTypes)\n+ \n+\n+ def getSqlSizes(cls):\n+ """\n+ Get the sizes of the properties that should be saved in a database\n+ """\n+ sizes = dict()\n+ sizes["name"] = 255\n+ sizes["chromosome"] = 255\n+ sizes["start"] = 11\n+ sizes["end"] = 11\n+ sizes["direction"] = 4\n+ sizes["tags"] = 1023\n+ sizes["bin"] = 11\n+ return sizes\n+ getSqlSizes = classmethod(getSqlSizes)\n+ \n+\n+ def printCoordinates(self):\n+ """\n+ Print start and end positions (depending on the direction of the interval)\n+ """\n+ if self.getDirection() == 1:\n+ return "%d-%d" % (self.getStart(), self.getEnd())\n+ else:\n+ return "%d-%d" % (self.getEnd(), self.getStart())\n+\n+ \n+ def extractSequence(self, parser):\n+ """\n+ Get the sequence corresponding to this interval\n+ @param parser: a parser to a FASTA file\n+ @type parser: class L{SequenceListParser<SequenceListParser>}\n+ @return : a instance of L{Sequence<Sequence>}\n+ """\n+ return parser.getSubSequence(self.getChromosome(), self.getStart(), self.getEnd(), self.getDirection(), self.name)\n+ \n+ \n+ def extractWigData(self, parser):\n+ """\n+ Get the data retrieved from a wig file\n+ @param parser: a parser class to a WIG file\n+ @type parser: class L{WigParser<WigParser>}\n+ """\n+ data = parser.getRange(self.getChromosome(), self.getStart(), self.getEnd())\n+ if self.getDirection() == -1:\n+ if parser.strands:\n+ newData = {}\n+ for strand in data:\n+ data[strand].reverse()\n+ newData[-strand] = data[strand]\n+ data = newData\n+ else:\n+ data.reverse()\n+ return data\n+\n+\n+ def __str__(self):\n+ """\n+ Output a simple representation of this interval\n+ """\n+ direction = "+"\n+ if self.getDirection() == -1:\n+ direction = "-"\n+ string = "%s:%d-%d (%s)" % (self.getChromosome(), self.getStart(), self.getEnd(), direction)\n+ if self.name != "":\n+ string = "(%s) %s" % (self.name, string)\n+ return string\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Interval.pyc

Binary file SMART/Java/Python/structure/Interval.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Mapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/Mapping.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,255 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from SMART.Java.Python.structure.SubMapping import SubMapping\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.Interval import Interval\n+\n+class Mapping(object):\n+ """A class that represents a mapping"""\n+\n+ def __init__(self):\n+ self.targetInterval = None\n+ self.queryInterval = None\n+ self.subMappings = []\n+ self.size = None\n+ self.transcript = None\n+ self.tags = {}\n+\n+\n+ def copy(self, mapping):\n+ for subMapping in mapping.subMappings:\n+ newSubMapping = SubMapping(subMapping)\n+ self.addSubMapping(newSubMapping)\n+ self.targetInterval = Interval(mapping.targetInterval)\n+ self.queryInterval = Interval(mapping.queryInterval)\n+ self.size = mapping.size\n+ self.tags = {}\n+ for tag in mapping.tags:\n+ self.tags[tag] = mapping[tag]\n+ self.transcript.copy(mapping.transcript)\n+\n+\n+ def setTargetInterval(self, interval):\n+ self.targetInterval = Interval(interval)\n+ if self.queryInterval != None:\n+ self.setDirection(self.targetInterval.getDirection() * self.queryInterval.getDirection())\n+\n+\n+ def setQueryInterval(self, interval):\n+ self.queryInterval = Interval(interval)\n+ if self.targetInterval != None:\n+ self.setDirection(self.targetInterval.getDirection() * self.queryInterval.getDirection())\n+\n+\n+ def getQueryInterval(self):\n+ return self.queryInterval\n+\n+\n+ def addSubMapping(self, subMapping):\n+ subMappingCopy = SubMapping(subMapping)\n+ self.subMappings.append(subMappingCopy)\n+\n+ if self.targetInterval:\n+ self.targetInterval.setStart(min(self.targetInterval.getStart(), subMapping.targetInterval.getStart()))\n+ self.targetInterval.setEnd(max(self.targetInterval.getEnd(), subMapping.targetInterval.getEnd()))\n+ else:\n+ self.setTargetInterval(subMapping.targetInterval)\n+ if self.queryInterval:\n+ self.queryInterval.setStart(min(self.queryInterval.getStart(), subMapping.queryInterval.getStart()))\n+ self.queryInterval.setEnd(max(self.queryInterval.getEnd(), subMapping.queryInterval.getEnd()))\n+ else:\n+ self.setQueryInterval(subMapping.queryInterval)\n+\n+ if self.getDirection() != 0:\n+ subMapping.setDirection(self.getDirection'..b'ccurrences(self, nbOccurrences):\n+ self.setTagValue("nbOccurrences", nbOccurrences)\n+\n+\n+ def setNbMismatches(self, nbMismatches):\n+ self.setTagValue("nbMismatches", nbMismatches)\n+ if self.size != None and "identity" not in self.getTagNames():\n+ identity = 100 if self.size == 0 else (self.size - self.getTagValue("nbMismatches")) / float(self.size) * 100\n+ self.setTagValue("identity", identity)\n+\n+\n+ def setNbGaps(self, nbGaps):\n+ self.setTagValue("nbGaps", nbGaps)\n+ \n+ \n+ def setRank(self, rank):\n+ self.setTagValue("rank", rank)\n+ \n+\n+ def setEvalue(self, evalue):\n+ self.setTagValue("evalue", evalue)\n+ \n+\n+ def setOccurrence(self, occurrence):\n+ self.setTagValue("occurrence", occurrence)\n+ \n+ \n+ def setBestRegion(self, bestRegion):\n+ self.setTagValue("bestRegion", bestRegion)\n+\n+\n+ def mergeExons(self, distance):\n+ previousSubMapping = None\n+ subMappings = []\n+ for subMapping in self.subMappings:\n+ if previousSubMapping == None:\n+ subMappings.append(subMapping)\n+ previousSubMapping = subMapping\n+ else:\n+ targetDistance = subMapping.targetInterval.getDistance(previousSubMapping.targetInterval)\n+ queryDistance = subMapping.queryInterval.getDistance(previousSubMapping.queryInterval)\n+ if targetDistance <= distance:\n+ self.setTagValue("nbGaps", self.getTagValue("nbGaps") + queryDistance)\n+ previousSubMapping.merge(subMapping)\n+ else:\n+ subMappings.append(subMapping)\n+ previousSubMapping = subMapping\n+ self.subMappings = subMappings\n+ \n+ \n+ def getTranscript(self):\n+ """\n+ Extract a transcript from this mapping\n+ @return: a transcript\n+ """\n+ if self.transcript != None:\n+ return self.transcript\n+ self.transcript = Transcript()\n+ self.transcript.copy(self.targetInterval)\n+ self.transcript.setDirection(self.getDirection())\n+ self.transcript.setName(self.queryInterval.getName())\n+ self.transcript.removeExons()\n+ if len(self.subMappings) > 1:\n+ for subMapping in self.subMappings:\n+ self.transcript.addExon(subMapping.targetInterval)\n+ cpt = 1\n+ for exon in self.transcript.exons:\n+ exon.setDirection(self.transcript.getDirection())\n+ exon.setName("%s-exon%d" % (self.transcript.getName(), cpt))\n+ exon.setChromosome(self.transcript.getChromosome())\n+ cpt += 1\n+ self.transcript.setDirection(self.getDirection())\n+ self.transcript.sortExons()\n+ for tag in self.tags:\n+ if "bestRegion" not in self.getTagNames():\n+ self.transcript.setTagValue("bestRegion", "(self)")\n+ self.transcript.setTagValue(tag, self.getTagValue(tag))\n+ return self.transcript\n+ \n+\n+ def getChromosome(self):\n+ if not self.subMappings:\n+ raise Exception("Error! Mapping \'%s\' has no submapping" % (self))\n+ return self.subMappings[0].targetInterval.getChromosome()\n+\n+\n+ \n+ def getErrorScore(self):\n+ return self.getTagValue("nbGaps") * 3 + self.getTagValue("nbMismatches") + (len(self.subMappings) - 1) * 0.1\n+ \n+\n+ def printGBrowseReference(self):\n+ return self.getTranscript().printGBrowseReference()\n+\n+\n+ def printGBrowseLine(self):\n+ return self.getTranscript().printGBrowseLine()\n+\n+\n+ def printGBrowse(self):\n+ return self.getTranscript().printGBrowse()\n+\n+\n+ def printBed(self):\n+ return self.getTranscript().printBed()\n+\n+\n+ def __str__(self):\n+ return "%s ---- %s" % (str(self.getTranscript()), ", ". join([str(submapping) for submapping in self.subMappings]))\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Mapping.pyc

Binary file SMART/Java/Python/structure/Mapping.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Sequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/Sequence.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,184 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+import re
+from commons.core.seq.Bioseq import Bioseq
+
+reverseComplementString = {
+    "A": "T",
+    "C": "G",
+    "G": "C",
+    "T": "A",
+    "U": "A",
+    "M": "K",
+    "R": "Y",
+    "W": "W",
+    "S": "S",
+    "Y": "R",
+    "K": "M",
+    "V": "B",
+    "H": "D",
+    "D": "H",
+    "B": "V",
+    "N": "N",
+    "a": "t",
+    "c": "g",
+    "g": "c",
+    "t": "a",
+    "u": "a",
+    "m": "k",
+    "r": "y",
+    "w": "w",
+    "s": "s",
+    "y": "r",
+    "k": "m",
+    "v": "b",
+    "h": "d",
+    "d": "h",
+    "b": "v",
+    "n": "n"
+}
+
+class Sequence(Bioseq):
+    """A class that codes for a sequence"""
+
+    def __init__(self, name = "", sequence = ""):
+        super(Sequence, self).__init__(name, sequence)
+        self.name            = self.header
+        self.quality         = None
+        self.chunkedSequence = None
+        self.chunkedQuality  = None
+        self.integerQuality  = False
+
+    def setName(self, name=""):
+        super(Sequence, self).setHeader(name)
+
+    def getName(self):
+        return self.getHeader()
+
+    def setSequence(self, seq=""):
+        super(Sequence, self).setSequence(seq)
+
+    def setQuality(self, quality):
+        if quality == None:
+            self.quality = None
+            return
+        if " " in quality:
+            self.quality        = quality.split()
+            self.integerQuality = True
+        else:
+            self.quality = list(quality)
+
+    def getQuality(self):
+        if self.quality == None:
+            return None
+        if self.integerQuality:
+            return " ".join(self.quality)
+        return "".join(self.quality)
+
+    def getSize(self):
+        return len(self.getSequence())
+
+
+    def copy(self, sequence):
+        self.setName(sequence.getName())
+        self.setSequence(sequence.getSequence())
+        self.setQuality(sequence.getQuality())
+        self.chunkedSequence = None
+        self.chunkedQuality  = None
+
+
+    def chunkSequence(self):
+        self.chunkedSequence = []
+        for i in range (0, self.getSize() / 60 + 1):
+            self.chunkedSequence.append(self.getSequence()[i * 60 : min(self.getSize(), (i+1) * 60)])
+        if self.quality != None:
+            self.chunkedQuality = []
+            for i in range (0, self.getSize() / 60 + 1):
+                self.chunkedQuality.append(self.quality[i * 60 : min(self.getSize(), (i+1) * 60)])
+
+    def concatenate(self, seq):
+        sequence  = self.getSequence()
+        sequence += seq.getSequence()
+        self.setSequence(sequence)
+        if self.quality != None:
+            sep = " " if self.integerQuality else ""
+            self.setQuality(self.getQuality() + sep + seq.getQuality())
+        self.chunkedSequence = None
+        self.chunkedQuality  = None
+
+
+    def printFasta(self):
+        if self.chunkedSequence == None:
+            self.chunkSequence()
+        return ">%s\n%s\n" % (self.getHeader(), "\n".join(self.chunkedSequence))
+
+
+    def printFastq(self):
+        if self.chunkedSequence == None:
+            self.chunkSequence()
+        return "@%s\n%s\n+%s\n%s\n" % (self.getHeader(), self.getSequence(), self.getHeader(), self.getQuality())
+
+
+    def reverseComplement(self):
+        seq = ""
+        self.chunkedSequence = None
+        self.chunkedQuality  = None
+        for i in range(0, self.getSize()):
+            char = self.getSequence()[i:i+1]
+            if char not in reverseComplementString:
+                sys.exit("Cannot understand character %s from string %s" % (char, self.getSequence()))
+            seq = "%s%s" % (reverseComplementString[char], seq)
+        self.setSequence(seq)
+        if self.quality != None:
+            self.quality = self.quality[::-1]
+
+
+    def containsAmbiguousNucleotides(self):
+        m = re.search("[^ACGTUacgtu]", self.getSequence())
+        if m != None:
+            return True
+        return False
+
+
+    def shrinkToFirstNucleotides(self, nbNucleotides):
+        self.chunkedSequence = None
+        self.chunkedQuality  = None
+        self.setSequence(self.getSequence()[0:nbNucleotides])
+        if self.quality != None:
+            self.quality = self.quality[0:nbNucleotides]
+
+
+    def shrinkToLastNucleotides(self, nbNucleotides):
+        self.chunkedSequence = None
+        self.chunkedQuality  = None
+        self.setSequence(self.getSequence()[-nbNucleotides:])
+        if self.quality != None:
+            self.quality = self.quality[-nbNucleotides:]

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Sequence.pyc

Binary file SMART/Java/Python/structure/Sequence.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/SequenceList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/SequenceList.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,72 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import math
+
+class SequenceList(object):
+    """A class that codes for a list of sequences"""
+
+    def __init__(self, verbosity = 0):
+        self.sequences = []
+        self.verbosity = verbosity
+
+
+    def nbSequences(self):
+        return len(self.sequences)
+
+
+    def getSequence(self, index):
+        return self.sequences[index]
+
+
+    def addSequence(self, sequence):
+        self.sequences.append(sequence)
+
+
+    def split(self, number):
+        sequenceLists = []
+        size          = math.ceil(self.nbSequences() / number)
+
+        sequenceList = SequenceList()
+        for i in range(0, self.nbSequences()):
+            sequenceList.addSequence(self.getSequence(i))
+            if (sequenceList.nbSequences() == size):
+                sequenceLists.append(sequenceList)
+                sequenceList = SequenceList()
+        if (sequenceList.nbSequences() != 0):
+            sequenceLists.append(sequenceList)
+        return sequenceLists
+
+
+    def printFasta(self):
+        string = ""
+        for sequence in self.sequences:
+            string += sequence.printFasta()
+        return string
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/SequenceList.pyc

Binary file SMART/Java/Python/structure/SequenceList.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/SubMapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/SubMapping.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,258 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from SMART.Java.Python.structure.Interval import Interval\n+from commons.core.coord.Align import Align\n+\n+class SubMapping(Align):\n+ """\n+ A class that represents a part of a mapping, more precisely, a pair (target interval, query interval) that match together\n+ @ivar targetInterval: the target interval\n+ @type targetInterval: class L{Interval<Interval>}\n+ @ivar queryInterval: the query interval\n+ @type queryInterval: class L{Interval<Interval>}\n+ @ivar size: size of this sub-mapping\n+ @type size: int\n+ @ivar tags: various information\n+ @type tags: dict\n+ """\n+\n+ def __init__(self, subMapping = None):\n+ """\n+ Constructor\n+ @param subMapping: a sub-mapping to be copied\n+ @type subMapping: class L{SubMapping<SubMapping>}\n+ """\n+ self.targetInterval = Interval()\n+ self.queryInterval = Interval()\n+ Align.__init__(self, self.queryInterval, self.targetInterval)\n+ self.size = None\n+ self.tags = {}\n+ if subMapping != None:\n+ self.copy(subMapping)\n+ \n+ def __eq__(self, o):\n+ if o == None:\n+ return False\n+ areAlignAttributesEquals = Align.__eq__(self, o)\n+ return areAlignAttributesEquals and (self.targetInterval == o.targetInterval) and (self.queryInterval == o.queryInterval) and self.size == o.getSize() and self.tags == o.getTags()\n+ \n+ def getSuperAdress(self):\n+ return hex(id(super(Align, self)))\n+ \n+# def setRangesAlignToRangesInterval(self):\n+# self.range_query = super(Range, self.queryInterval)\n+# self.range_subject = super(Range, self.targetInterval)\n+ \n+ def copy(self, subMapping):\n+ """\n+ Copy method\n+ @param subMapping: a sub-mapping to be copied\n+ @type subMapping: class L{SubMapping<SubMapping>}\n+ """\n+ self.setQueryName(subMapping.getQueryName())\n+ self.setQueryStart(subMapping.getQueryStart())\n+ self.setQueryEnd(subMapping.getQueryEnd())\n+ self.setSubjectName(subMapping.getSubjectName())\n+ self.setSubjectStart(subMapping.getSubjectStart())\n+ self.setSubjectEnd(subMapping.getSubjectEnd())\n+ self.e_value = subMapping.getEvalue()\n+ self.score = subMapping.getScore()\n+ self.identity = subMapping.getIdentity()\n+ \n+ self.targetInterval.copy(subMapping.targetInterval)\n+ sel'..b' @type name: string\n+ @param value: value of the tag\n+ @type value: string or int\n+ """\n+ self.tags[name] = value\n+\n+\n+ def getTagValue(self, name):\n+ """\n+ Get the value of a tag\n+ @param name: name of the tag\n+ @type name: string\n+ @return: value of the tag\n+ """\n+ return self.tags[name]\n+\n+ \n+ def getTagNames(self):\n+ """\n+ Get all the names of the tags\n+ @return: the names of the tags\n+ """\n+ return self.tags.keys()\n+\n+ def getTargetInterval(self):\n+ return self.targetInterval\n+ \n+ def getQueryInterval(self):\n+ return self.queryInterval\n+ \n+ def getSize(self):\n+ return self.size\n+ \n+ def getTags(self):\n+ return self.tags\n+\n+ def setIdentity(self, identity):\n+ """\n+ Set the percentage of identity of the sub-mapping\n+ Possibly also set number of mismatches\n+ @param identity: the percentage of identity of the sub-mapping\n+ @type identity: float\n+ """\n+ self.identity = identity\n+ self.setTagValue("identity", identity)\n+ if self.size != None and "nbMismatches" not in self.getTagNames():\n+ self.setTagValue("nbMismatches", self.size - round(self.size * self.getTagValue("identity") / 100.0))\n+\n+\n+ def setNbMismatches(self, nbMismatches):\n+ """\n+ Set the number of mismatches of the sub-mapping\n+ Possibly also set percentage of identity\n+ @param nbMismatches: the number of mismatches of the sub-mapping\n+ @type nbMismatches: int\n+ """\n+ self.nbMismatches = nbMismatches\n+ if self.size != None and "identity" not in self.getTagNames():\n+ self.setTagValue("identity", (self.size - self.getTagValue("nbMismatches")) / float(self.size) * 100)\n+\n+\n+ def setNbGaps(self, nbGaps):\n+ """\n+ Set the number of gaps of the sub-mapping\n+ @param nbGaps: the number of gaps of the sub-mapping\n+ @type nbGaps: int\n+ """\n+ self.setTagValue("nbGaps", nbGaps)\n+ \n+ \n+ def merge(self, subMapping):\n+ """\n+ Merge two subMappings\n+ @param subMapping: another sub-mapping\n+ @type subMapping: class L{SubMapping<SubMapping>}\n+ """\n+ self.targetInterval.merge(subMapping.targetInterval)\n+ self.queryInterval.merge(subMapping.queryInterval)\n+\n+\n+ def printCoordinates(self):\n+ """\n+ Print the coordinates of the sub-mapping (considering the direction)\n+ @return: a string\n+ """\n+ if self.getDirection() == 1:\n+ return "%d-%d" % (self.targetInterval.getStart(), self.targetInterval.getEnd())\n+ else:\n+ return "%d-%d" % (self.targetInterval.getEnd(), self.targetInterval.getStart())\n+\n+\n+ def __str__(self):\n+ """\n+ Return a representation of this object\n+ @return: a string\n+ """\n+\n+ if "match" in self.getTagNames() and not self.getTagValue("match"):\n+ return "%s ---" % self.queryName\n+\n+ direction = "+"\n+ if self.getDirection() == -1:\n+ direction = "-"\n+ string = "%s:%d-%d -- %s:%d-%d (%s)" % (self.targetInterval.getChromosome(), self.targetInterval.getStart(), self.targetInterval.getEnd(), self.queryInterval.name, self.queryInterval.getStart(), self.queryInterval.getEnd(), direction)\n+ if "nbMismatches" in self.getTagNames():\n+ string += "(%i mm)" % (self.getTagValue("nbMismatches"))\n+ if "identity" in self.getTagNames():\n+ string += "(id: %i%%)" % (self.getTagValue("identity"))\n+ if self.targetInterval.getSize() != None and self.queryInterval.getSize() != None and self.size != None:\n+ string += "(sizes: %d, %d -> %d)" % (self.targetInterval.getSize(), self.queryInterval.getSize(), self.size)\n+ return string\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/SubMapping.pyc

Binary file SMART/Java/Python/structure/SubMapping.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Transcript.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/Transcript.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,851 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import sys\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.structure.Sequence import Sequence\n+\n+\n+class Transcript(Interval):\n+\t"""\n+\tA class that models an transcript, considered as a specialized interval (the bounds of the transcript) that contains exons (also represented as intervals)\n+\t@ivar exons: a list of exons (intervals)\n+\t@type exons: list of L{Interval{Interval}}\n+\t"""\n+\n+\tdef __init__(self, transcript = None, verbosity = 0):\n+\t\t"""\n+\t\tConstructor\n+\t\t@param transcript: transcript to be copied\n+\t\t@type transcript: class L{Transcript<Transcript>}\n+\t\t@param verbosity: verbosity\n+\t\t@type verbosity: int\n+\t\t"""\n+\t\tsuper(Transcript, self).__init__(None, verbosity)\n+\t\tself.exons = []\n+\t\tself.introns = None\n+\t\tif transcript != None:\n+\t\t\tself.copy(transcript)\n+\n+\n+\tdef copy(self, transcript):\n+\t\t"""\n+\t\tCopy method\n+\t\t@param transcript: transcript to be copied\n+\t\t@type\ttranscript: class L{Transcript<Transcript>} or L{Interval<Interval>}\n+\t\t"""\n+\t\tsuper(Transcript, self).copy(transcript)\n+\t\tif transcript.__class__.__name__ == "Transcript":\n+\t\t\texons = transcript.getExons()\n+\t\t\tif len(exons) > 1:\n+\t\t\t\tfor exon in exons:\n+\t\t\t\t\texonCopy = Interval(exon)\n+\t\t\t\t\tself.addExon(exonCopy)\n+\n+\n+\tdef setDirection(self, direction):\n+\t\t"""\n+\t\tSet the direction of the interval\n+\t\tPossibly parse different formats\n+\t\tImpact all exons\n+\t\t@param direction: direction of the transcript (+ / -)\n+\t\t@type\tdirection: int or string\n+\t\t"""\n+\t\tsuper(Transcript, self).setDirection(direction)\n+\t\tfor exon in self.exons:\n+\t\t\texon.setDirection(direction)\n+\t\t\t\n+\n+\tdef setChromosome(self, chromosome):\n+\t\t"""\n+\t\tSet the chromosome\n+\t\t@param chromosome: chromosome on which the transcript is\n+\t\t@type chromosome: string\n+\t\t"""\n+\t\tsuper(Transcript, self).setChromosome(chromosome)\n+\t\tfor exon in self.exons:\n+\t\t\texon.setChromosome(chromosome)\n+\n+\t\n+\tdef addExon(self, exon):\n+\t\t"""\n+\t\tAdd an exon to the list of exons\n+\t\t@param exon: a new exon\n+\t\t@type exon: class L{Interval<Interval>}\n+\t\t"""\n+\t\tif not self.exons and not exon.overlapWith(self):\n+\t\t\tfirstExon = Interval()\n+\t\t\tfirstExon.setStart(self.getStart())\n+\t\t\tfirstExon.setEnd(self.getEnd())\n+\t\t\tfirstExon.setDirection(self.getDirection())\n+\t\t\tfirstExon.setChromosome(self.getChromosome())\n+\t\t\tself.exons.append(firstExon)\n+\t\tnewExon = Interval(exon)\n+\t\tnewExon.setDirection(self.getDirection())\n+\t\tself.exons.append(newExon)\n+\t\tif newExon.getStart() < self.getStart():\n+\t\t\tself.setSta'..b'\tif i == 0:\n+\t\t\t\tcontinue\n+\t\t\tcigar += "%dN" % (exon.getStart() - lastExonEnd - 1)\n+\t\t\tcigar += "%dM" % (exon.getSize())\n+\n+\t\treturn "%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n" % (name, flag, chromosome, genomeStart, quality, cigar, mate, mateGenomeStart, gapSize, sequence, qualityString, tags)\n+\n+\n+\tdef printUcsc(self):\n+\t\t"""\n+\t\tExport this transcript using UCSC BED format\n+\t\t@return: a string\n+\t\t"""\n+\t\tif self.getChromosome().find("Het") != -1:\n+\t\t\treturn ""\n+\t\tname\t = self.name\n+\t\tcomment = self.getTagValues(";", "")\n+\t\tsizes\t = []\n+\t\tstarts\t= []\n+\t\tdirection = "+"\n+\t\tif self.getDirection() == -1:\n+\t\t\tdirection = "-"\n+\t\tself.sortExonsIncreasing()\n+\t\tfor exon in self.getExons():\n+\t\t\tsizes.append("%d" % (exon.getSize()))\n+\t\t\tstarts.append("%d" % (exon.getStart() - self.getStart()))\n+\t\treturn "%s\\t%d\\t%d\\t%s\\t1000\\t%s\\t%d\\t%d\\t0\\t%d\\t%s,\\t%s,\\n" % (self.getChromosome().replace("arm_", "chr"), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts))\n+\n+\n+\tdef printGBrowseReference(self):\n+\t\t"""\n+\t\tExport this transcript using GBrowse format (1st line only)\n+\t\t@return: a string\n+\t\t"""\n+\t\treturn "reference = %s\\n" % (self.getChromosome())\n+\n+\n+\tdef printGBrowseLine(self):\n+\t\t"""\n+\t\tExport this transcript using GBrowse format (2nd line only)\n+\t\t@return: a string\n+\t\t"""\n+\t\tself.sortExons()\n+\t\tcoordinates = []\n+\t\tfor exon in self.getExons():\n+\t\t\tcoordinates.append(exon.printCoordinates())\n+\t\tcoordinatesString = ",".join(coordinates)\n+\t\tcomment = self.getTagValues(";", "=")\n+\t\tif comment:\n+\t\t\tcomment = "\\t\\"%s\\"" % (comment)\n+\t\treturn "User_data\\t%s\\t%s%s\\n" % (self.name, coordinatesString, comment)\n+\n+\t\n+\tdef printGBrowse(self):\n+\t\t"""\n+\t\tExport this transcript using GBrowse format\n+\t\t@return: a string\n+\t\t"""\n+\t\treturn "%s%s" % (self.printGBrowseReference(), self.printGBrowseLine())\n+\n+\n+\tdef printCsv(self):\n+\t\t"""\n+\t\tExport this transcript using CSV format\n+\t\t@return: a string\n+\t\t"""\n+\t\tself.sortExons()\n+\t\tstring = "%s,%d,%d,\\"%s\\"," % (self.getChromosome(), self.getStart(), self.getEnd(), "+" if self.getDirection() == 1 else "-")\n+\t\tif len(self.getExons()) == 1:\n+\t\t\tstring += "None"\n+\t\telse:\n+\t\t\tfor exon in self.getExons():\n+\t\t\t\tstring += "%d-%d " % (exon.getStart(), exon.getEnd())\n+\t\tfor tag in sorted(self.tags.keys()):\n+\t\t\tstring += ",%s=%s" % (tag, str(self.tags[tag]))\n+\t\tstring += "\\n"\n+\t\treturn string\n+\n+\n+\tdef extractSequence(self, parser):\n+\t\t"""\n+\t\tGet the sequence corresponding to this transcript\n+\t\t@param parser: a parser to a FASTA file\n+\t\t@type parser: class L{SequenceListParser<SequenceListParser>}\n+\t\t@return:\t an instance of L{Sequence<Sequence>}\n+\t\t"""\n+\t\tself.sortExons()\n+\t\tname = self.name\n+\t\tif "ID" in self.getTagNames() and self.getTagValue("ID") != self.name:\n+\t\t\tname += ":%s" % (self.getTagValue("ID"))\n+\t\tsequence = Sequence(name)\n+\t\tfor exon in self.getExons():\n+\t\t\tsequence.concatenate(exon.extractSequence(parser))\n+\t\treturn sequence\n+\t\n+\t\n+\tdef extractWigData(self, parser):\n+\t\t"""\n+\t\tGet some wig data corresponding to this transcript\n+\t\t@param parser: a parser to a wig file\n+\t\t@type parser: class L{WigParser<WigParser>}\n+\t\t@return: a sequence of float\n+\t\t"""\n+\t\tself.sortExons()\n+\t\tif parser.strands:\n+\t\t\tstrands = (-1, 1)\n+\t\t\tvalues = dict([(strand, []) for strand in strands])\n+\t\t\tfor exon in self.getExons():\n+\t\t\t\ttheseValues = exon.extractWigData(parser)\n+\t\t\t\tif self.getDirection() == -1:\n+\t\t\t\t\tfor strand in strands:\n+\t\t\t\t\t\ttheseValues[strand].reverse()\n+\t\t\t\tfor strand in strands:\n+\t\t\t\t\tvalues[strand].extend(theseValues[strand])\n+\t\t\tif self.getDirection() == -1:\n+\t\t\t\tfor strand in strands:\n+\t\t\t\t\tvalues[strand].reverse()\n+\t\t\treturn values\n+\t\telse:\n+\t\t\tvalues = []\n+\t\t\tfor exon in self.getExons():\n+\t\t\t\ttheseValues = exon.extractWigData(parser)\n+\t\t\t\t#if self.getDirection() == -1:\n+\t\t\t\t#\ttheseValues.reverse()\n+\t\t\t\tvalues.extend(theseValues)\n+\t\t\t#if self.getDirection() == -1:\n+\t\t\t#\tvalues.reverse()\n+\t\t\treturn values\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/Transcript.pyc

Binary file SMART/Java/Python/structure/Transcript.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/TranscriptContainer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/TranscriptContainer.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,236 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import re\n+import sys\n+from commons.core.parsing.ParserChooser import ParserChooser\n+from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable\n+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n+\n+class TranscriptContainer(object):\n+ """\n+ An interface class that contains a list of transcripts, handle different formats\n+ @ivar container: container of the data\n+ @type container: string \n+ @ivar format: format of the data\n+ @type format: string \n+ @ivar transcriptListParser: possibly contains a parser to a list of transcripts\n+ @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None\n+ @ivar mappingListParser: possibly contains a parser to a list of mappings\n+ @type mappingListParser: L{MapperParser<MapperParser>} or None\n+ @ivar transcriptTables: possibly contains the mySQL tables\n+ @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None\n+ @ivar mySqlConnection: connection to a MySQL database\n+ @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n+ @ivar type: type of the data (transcripts, mappings or mySQL)\n+ @type type: string\n+ @ivar verbosity: verbosity\n+ @type verbosity: int \n+ """\n+\n+ def __init__(self, container, format, verbosity = 0):\n+ """\n+ Constructor\n+ @param container: container of the data\n+ @type container: string\n+ @param format: format of the data\n+ @type format: string\n+ @param verbosity: verbosity\n+ @type verbosity: int\n+ """\n+ self.container = container\n+ self.format = format\n+ self.verbosity = verbosity\n+ self.transcriptListParser = None\n+ self.mappingListParser = None\n+ self.transcriptTables = {}\n+ self.mySqlConnection = None\n+ self.foundData = False\n+ self.nbTranscripts = None\n+ self.nbNucleotides = None\n+ self.chromosomes = None\n+ self.type = None\n+ if self.container == None:\n+ sys.exit("Error! Container input file name is empty!")\n+ if self.format == None:\n+ sys.exit("Error! Container input format is empty!")\n+ \n+ \n+ def findData(self):\n+ """\n+ Load data\n+ """\n+ if self.format == None:\n+ sys.ex'..b'ndle format \'%s\'!" % (self.format))\n+\n+ if self.transcriptListParser != None:\n+ if self.type == "transcript":\n+ self.nbTranscripts = self.transcriptListParser.getNbTranscripts()\n+ self.nbNucleotides = self.transcriptListParser.getNbNucleotides()\n+ self.chromosomes = self.transcriptListParser.getChromosomes()\n+ if self.mappingListParser != None:\n+ if self.type == "mapping":\n+ self.nbTranscripts = self.mappingListParser.getNbMappings()\n+ self.nbNucleotides = self.mappingListParser.getNbNucleotides()\n+ self.chromosomes = self.mappingListParser.getChromosomes()\n+\n+ self.foundData = True\n+\n+\n+ def getNbTranscripts(self):\n+ """\n+ Get the number of transcripts\n+ @return: the number of transcripts\n+ """\n+ if not self.foundData:\n+ self.findData()\n+ return self.nbTranscripts\n+ \n+ \n+ def getNbItems(self):\n+ """\n+ Same as getNbTranscripts\n+ """\n+ return self.getNbTranscripts()\n+\n+\n+ def getNbNucleotides(self):\n+ """\n+ Get the number of nucleotides\n+ @return: the number of nucleotides\n+ """\n+ if not self.foundData:\n+ self.findData()\n+ return self.nbNucleotides\n+\n+\n+ def getChromosomes(self):\n+ """\n+ Get the chromosomes\n+ @return: the chromosomes\n+ """\n+ if not self.foundData:\n+ self.findData()\n+ return self.chromosomes\n+ \n+\n+ def getIterator(self):\n+ """\n+ An iterator\n+ @return: an iterator to a list of transcripts\n+ """\n+ if not self.foundData:\n+ self.findData()\n+ if self.type == "sql":\n+ for chromosome in self.transcriptTables:\n+ for transcript in self.transcriptTables[chromosome].getIterator():\n+ yield transcript\n+ return\n+ if self.type == "transcript":\n+ for transcript in self.transcriptListParser.getIterator():\n+ yield transcript\n+ return\n+ if self.type == "mapping":\n+ for mapping in self.mappingListParser.getIterator():\n+ yield mapping.getTranscript()\n+ return\n+ sys.exit("Error! No valid transcript container given!")\n+ \n+ \n+ def storeIntoDatabase(self, name = None):\n+ """\n+ Store the current transcript / mapping list into database\n+ """\n+ if not self.foundData:\n+ self.findData()\n+\n+ if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0:\n+ return\n+ \n+ mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity)\n+ mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser)\n+ mySqlTranscriptWriter.write()\n+ self.transcriptTables = mySqlTranscriptWriter.getTables()\n+ self.type = "sql"\n+ \n+ \n+ def getTables(self):\n+ """\n+ Accessor to the mySQL tables\n+ @return: the mySQL tables\n+ """\n+ return self.transcriptTables\n+ \n+\n+ def setDefaultTagValue(self, name, value):\n+ """\n+ Set the given tag to the value for all transcripts\n+ @param name: name of the tag\n+ @type name: string\n+ @param value: value of the tag\n+ @type value: string\n+ """\n+ if self.type == "sql":\n+ for chromosome in self.transcriptTables:\n+ self.transcriptTables[chromosome].setDefaultTagValue(name, value)\n+ elif self.type == "transcript":\n+ self.transcriptListParser.setDefaultTagValue(name, value)\n+ elif self.type == "mapping":\n+ self.mappingListParser.setDefaultTagValue(name, value)\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/TranscriptContainer.pyc

Binary file SMART/Java/Python/structure/TranscriptContainer.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/TranscriptList.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/TranscriptList.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,172 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.mySql.MySqlTable import MySqlTable
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.misc.Progress import Progress
+
+
+class TranscriptList(object):
+    """A class that codes for a list of transcript"""
+
+    def __init__(self, verbosity = 0):
+        self.transcripts = dict()
+        self.longestTranscript = 0
+        self.verbosity = verbosity
+
+
+    def getTranscript(self, chromosome, index):
+        return self.transcripts[chromosome][index]
+
+
+    def getChromosomes(self):
+        return self.transcripts.keys()
+
+
+    def getTranscriptsOnChromosome(self, chromosome):
+        if chromosome not in self.transcripts:
+            return []
+        return self.transcripts[chromosome]
+
+
+    def addTranscript(self, transcript):
+        if transcript.getChromosome() in self.transcripts:
+            self.transcripts[transcript.getChromosome()].append(transcript)
+        else:
+            self.transcripts[transcript.getChromosome()] = [transcript]
+        self.longestTranscript = max(self.longestTranscript, transcript.getEnd() - transcript.getStart())
+
+
+    def removeTranscript(self, chromosome, i):
+        del self.transcripts[chromosome][i]
+
+
+    def removeAll(self):
+        self.transcripts = {}
+
+
+    def getNbTranscripts(self):
+        nbTranscripts = 0
+        for chromosome in self.transcripts:
+            nbTranscripts += len(self.transcripts[chromosome])
+        return nbTranscripts
+
+
+    def getSize(self):
+        size = 0
+        for chromosome in self.transcripts:
+            for transcript in self.transcripts[chromosome]:
+                size += transcript.getSize()
+        return size
+
+
+    def sort(self):
+        for chromosome in self.transcripts:
+            self.transcripts[chromosome].sort(lambda x, y: x.getStart() - y.getStart())
+
+
+    def removeOverlapWith(self, transcriptList):
+        transcriptList.sort()
+        for chromosome in self.transcripts:
+            progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity)
+            for thisTranscriptId in range(len(self.transcripts[chromosome])):
+                progress.inc()
+                for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])):
+                    if self.transcripts[chromosome][thisTranscriptId].overlapWith(transcriptList.transcripts[chromosome][thatTranscriptId]):
+                        self.transcripts[chromosome][thisTranscriptId] = None
+                        break
+                    if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]:
+                        break
+            self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None]
+        progress.done()
+
+
+    def removeOverlapWithExon(self, transcriptList):
+        transcriptList.sort()
+        for chromosome in self.transcripts:
+            progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity)
+            for thisTranscriptId in range(len(self.transcripts[chromosome])):
+                progress.inc()
+                for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])):
+                    if self.transcripts[chromosome][thisTranscriptId].overlapWithExon(transcriptList.transcripts[chromosome][thatTranscriptId]):
+                        self.transcripts[chromosome][thisTranscriptId] = None
+                        break
+                    if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]:
+                        break
+            self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None]
+        progress.done()
+
+
+    def setDefaultTagValue(self, name, value):
+        for transcript in self.getIterator():
+            transcript.setTag(name, value)
+
+
+    def storeDatabase(self, mySqlConnection):
+        transcriptsTable = MySqlTable("TmpTranscriptsTable", mySqlConnection)
+        transcriptsTable.create(Transcript.getSqlVariables(), Transcript.getSqlTypes())
+        intervalsVariables = Interval.getSqlVariables()
+        intervalsVariables.append("idTranscript")
+        intervalsTypes = Interval.getSqlTypes()
+        intervalsTypes["idTranscript"] = "int"
+        intervalsTable = MySqlTable("TmpIntervalsTable", mySqlConnection)
+        intervalsTable.create(intervalsVariables, intervalsTypes)
+        for chromosome in self.transcripts:
+            for transcript in self.transcripts[chromosome]:
+                idTranscript = transcriptsTable.addLine(transcript.getSqlValues())
+                for exon in transcript.getExons():
+                    intervalValues = exon.getSqlValues()
+                    intervalValues["idTranscript"] = idTranscript
+                    intervalsTable.addLine(intervalValues)
+
+
+    def getIterator(self):
+        chromosomes = self.transcripts.keys()
+        currentChromosome = 0
+        currentTranscript = 0
+        while True:
+            if currentChromosome >= len(chromosomes):
+                return
+            elif currentTranscript >= len(self.transcripts[chromosomes[currentChromosome]]):
+                currentTranscript    = 0
+                currentChromosome += 1
+            elif self.transcripts[chromosomes[currentChromosome]][currentTranscript] == None:
+                currentTranscript += 1
+            else:
+                yield self.transcripts[chromosomes[currentChromosome]][currentTranscript]
+                currentTranscript += 1
+
+
+    def __str__(self):
+        string = ""
+        for transcript in self.getIterator():
+            string += str(transcript)
+        return string
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/TranscriptList.pyc

Binary file SMART/Java/Python/structure/TranscriptList.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/TranscriptListIterator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/TranscriptListIterator.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,58 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+class TranscriptListIterator(object):
+    """A class that iterates on a list of transcript"""
+
+    def __init__(self, transcriptList, verbosity = 0):
+        self.transcriptList = transcriptList
+        self.verbosity = verbosity
+        self.chromosomes = self.transcriptList.transcripts.keys()
+        self.currentChromosome = 0
+        self.currentTranscript = -1
+
+
+    def __iter__(self):
+        return self
+
+
+    def next(self):
+        self.currentTranscript += 1
+        while True:
+            if self.currentChromosome >= len(self.transcriptList.transcripts):
+                raise StopIteration
+            elif self.currentTranscript >= len(self.transcriptList.transcripts[self.chromosomes[self.currentChromosome]]):
+                self.currentTranscript = 0
+                self.currentChromosome += 1
+            elif self.transcriptList.transcripts[self.chromosomes[self.currentChromosome]][self.currentTranscript] == None:
+                self.currentTranscript += 1
+            else:
+                return self.transcriptList.transcripts[self.chromosomes[self.currentChromosome]][self.currentTranscript]
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/TranscriptListsComparator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/TranscriptListsComparator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1198 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import sys\n+import random\n+from SMART.Java.Python.misc import Utils\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.structure.TranscriptList import TranscriptList\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n+from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable\n+from SMART.Java.Python.misc.Progress import Progress\n+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n+\n+\n+\n+class TranscriptListsComparator(object):\n+ """\n+ Compare two transcript lists, using a database for one of the list\n+ Uses one TranscriptContainer for query data, \n+ one TranscriptContainer exported to MySqlTranscriptTable for reference data, \n+ one MySqlTranscriptTable for transformed reference data\n+ @ivar inputTranscriptContainers: parsers to the list of query transcripts\n+ @type inputTranscriptContainers: list of 2 L{TranscriptContainer<TranscriptContainer>}\n+ @ivar writer: transcript list writer\n+ @type writer: class L{TranscriptListWriter<TranscriptListWriter>}\n+ @ivar mySqlConnection: connection to a MySQL database (to compute the ovelapping efficiently)\n+ @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n+ @ivar introns: compare transcripts or exons only\n+ @type introns: list of 2 boolean\n+ @ivar starts: restrict the query transcripts to first nucleotides\n+ @type starts: list of 2 int or None\n+ @ivar fivePrimes: extend a list of transcripts by their 5\' end\n+ @type fivePrimes: list of 2 int or None\n+ @ivar threePrimes: extend a list of transcripts by their 3\' end\n+ @type threePrimes: list of 2 int or None\n+ @ivar minDistance: min distance between two transcripts [default: 0]\n+ @type minDistance: int\n+ @ivar maxDistance: max distance between two transcripts [default: 0]\n+ @type maxDistance: int\n+ @ivar minOverlap: minimum number of overlapping nucleotides to declare an overlap\n+ @type minOverlap: int\n+ @ivar pcOverlap: percentage of overlapping nucleotides to declare an ove'..b' for index2, transcript2 in self.getTables(self.REFERENCE)[chromosome1].selectTranscripts(command):\n+ transcripts2.append(transcript2)\n+ command = "DELETE FROM %s WHERE start < %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), end + distance)\n+ self.mySqlConnection.executeQuery(command)\n+\n+ # compare sets\n+ toBeRemoved1 = []\n+ for index1, transcript1 in enumerate(transcripts1):\n+ newTranscript1 = Transcript()\n+ newTranscript1.copy(transcript1)\n+ for transcript2 in transcripts2:\n+ newTranscript1 = newTranscript1.getDifference(transcript2)\n+ if newTranscript1 == None:\n+ toBeRemoved1.append(index1)\n+ break\n+ transcripts1[index1] = newTranscript1\n+\n+ # check if query transcript extends bounds of the chunk\n+ if newTranscript1 != None and newTranscript1.getEnd() < end:\n+ if self.splitDifference:\n+ for exon in newTranscript1.getExons():\n+ transcript = Transcript()\n+ transcript.copy(exon)\n+ self.writeTranscript(transcript)\n+ else:\n+ self.writeTranscript(newTranscript1)\n+ toBeRemoved1.append(index1)\n+\n+ # update list of query transcripts\n+ for index1 in reversed(toBeRemoved1):\n+ del transcripts1[index1]\n+\n+ # check if the reference transcripts extends bounds of the chunk\n+ toBeRemoved2 = []\n+ for index2, transcript2 in enumerate(transcripts2):\n+ if transcript2.getEnd() + distance < end:\n+ toBeRemoved2.append(index2)\n+ for index2 in reversed(toBeRemoved2):\n+ del transcripts2[index2]\n+\n+ progress.inc()\n+\n+ for transcript1 in transcripts1:\n+ if self.splitDifference:\n+ for exon in transcript1.getExons():\n+ transcript = Transcript()\n+ transcript.copy(exon)\n+ self.writeTranscript(transcript)\n+ else:\n+ self.writeTranscript(transcript1)\n+ progress.done()\n+ self.getTables(self.QUERY)[chromosome1].remove()\n+ if chromosome1 in self.getTables(self.REFERENCE):\n+ self.getTables(self.REFERENCE)[chromosome1].remove()\n+ self.getTables(self.WORKING)[chromosome1].remove()\n+\n+ self.flushData()\n+ if self.writer != None:\n+ self.writer.close()\n+ self.writer = None\n+\n+ if self.verbosity > 0:\n+ print "query: %d elements" % (self.nbTranscripts[self.QUERY])\n+ print "reference: %d elements" % (self.nbTranscripts[self.REFERENCE])\n+ print "# printed: %d (%.2f%%)" % (self.nbPrinted, self.nbPrinted / float(self.nbTranscripts[self.QUERY]) * 100)\n+\n+\n+ def getOddsPerTranscript(self):\n+ """\n+ Return overlap results\n+ @return a dict of data\n+ """\n+ if not self.odds:\n+ raise Exception("Did not compute odds!")\n+ return self.overlapResults\n+\n+\n+ def getOdds(self):\n+ """\n+ Return odds about the overlap\n+ @return a dict of data\n+ """\n+ if not self.odds:\n+ raise Exception("Did not compute odds!")\n+ if self.oddResults != None:\n+ return self.oddResults\n+ self.oddResults = {}\n+ for name, value in self.overlapResults.iteritems():\n+ self.oddResults[value] = self.oddResults.get(value, 0) + 1\n+ return self.oddResults\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/__init__.pyc

Binary file SMART/Java/Python/structure/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/test/Test_Interval.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/test/Test_Interval.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,369 @@\n+import unittest\n+from SMART.Java.Python.structure.Interval import Interval\n+\n+class Test_Interval(unittest.TestCase):\n+\n+ def setUp(self):\n+ self.iInterval = Interval()\n+ self.iInterval1 = Interval()\n+ self.iInterval2 = Interval()\n+ \n+ def test__init__(self):\n+ self.iInterval.setChromosome("chromosome")\n+ self.iInterval.setName("sequence")\n+ self.iInterval.setStart(0)\n+ self.iInterval.setEnd(123)\n+ obsStart = self.iInterval.getStart()\n+ obsEnd = self.iInterval.getEnd()\n+ expStart = 0\n+ expEnd = 123\n+ \n+ self.assertEqual(expStart, obsStart)\n+ self.assertEqual(expEnd, obsEnd)\n+\n+ def test_copy(self):\n+ self.iInterval1.setName("interval1")\n+ self.iInterval1.setChromosome("chr1")\n+ self.iInterval1.setStart(100)\n+ self.iInterval1.setEnd(300)\n+ self.iInterval1.setDirection("+")\n+\n+ self.iInterval2.copy(self.iInterval1)\n+ self.assertEqual(self.iInterval2.getName(), "interval1")\n+ self.assertEqual(self.iInterval2.getChromosome(), "chr1")\n+ self.assertEqual(self.iInterval2.getStart(), 100)\n+ self.assertEqual(self.iInterval2.getEnd(), 300)\n+ self.assertEqual(self.iInterval2.getDirection(), 1)\n+\n+ self.iInterval1.setStart(200)\n+ self.assertEqual(self.iInterval2.getStart(), 100)\n+ \n+ def test_getDirection(self):\n+ self.iInterval1.setName("interval1")\n+ self.iInterval1.setChromosome("chr1")\n+ self.iInterval1.setStart(100)\n+ self.iInterval1.setEnd(300)\n+ self.iInterval1.setDirection("+")\n+ expDirect = 1\n+ self.assertEquals(expDirect,self.iInterval1.getDirection())\n+\n+ #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!#\n+ #In case strand = "+", start < end; strand = "-", start > end \n+ def test_setStartEnd(self):\n+ self.iInterval1 = Interval()\n+ self.iInterval1.setName("interval1")\n+ self.iInterval1.setChromosome("chr1")\n+ self.iInterval1.setStart(100)\n+ self.iInterval1.setEnd(300)\n+ self.iInterval1.setDirection("+")\n+ \n+ self.assertEqual(self.iInterval1.getName(), "interval1")\n+ self.assertEqual(self.iInterval1.getChromosome(), "chr1")\n+ self.assertEqual(self.iInterval1.getStart(),100)\n+ self.assertEqual(self.iInterval1.getEnd(), 300)\n+ self.assertEqual(self.iInterval1.getDirection(), 1)\n+\n+ self.iInterval1.setStart(200)\n+ self.assertEqual(self.iInterval1.getStart(), 200)\n+ self.assertEqual(self.iInterval1.getEnd(), 300)\n+\n+ self.iInterval1.setEnd(300)\n+ self.iInterval1.setStart(100)\n+ self.assertEqual(self.iInterval1.getStart(), 100)\n+ self.assertEqual(self.iInterval1.getEnd(), 300)\n+\n+ self.iInterval1.setEnd(1200)\n+ self.iInterval1.setStart(1000)\n+ self.assertEqual(self.iInterval1.getStart(), 1000)\n+ self.assertEqual(self.iInterval1.getEnd(), 1200)\n+\n+ self.iInterval1.reverse()\n+ self.assertEqual(self.iInterval1.getDirection(), -1)\n+ self.assertEqual(self.iInterval1.getStart(), 1000)\n+ self.assertEqual(self.iInterval1.getEnd(), 1200)\n+\n+ self.iInterval1.setStart(1100)\n+ self.assertEqual(self.iInterval1.getStart(), 1100)\n+ self.assertEqual(self.iInterval1.getEnd(), 1200)\n+\n+ self.iInterval1.setEnd(2200)\n+ self.iInterval1.setStart(2000)\n+ self.assertEqual(self.iInterval1.getStart(), 2000)\n+ self.assertEqual(self.iInterval1.getEnd(), 2200)\n+\n+ self.iInterval1.setStart(1000)\n+ self.iInterval1.setEnd(1200)\n+ self.assertEqual(self.iInterval1.getStart(), 1000)\n+ self.assertEqual(self.iInterval1.getEnd(), 1200)\n+\n+ def test_reverse(self):\n+ self.iInterval1 = Interval()\n+ self.iInterval1.setName("interval1")\n+ self.i'..b'\n+\n+ iInterval2.setChromosome("chr2")\n+ results = iInterval1.getDifference(iInterval2)\n+ self.assertEqual(len(results), 1)\n+ resultInterval = results[0]\n+ self.assertEqual(resultInterval.getStart(), iInterval1.getStart())\n+ self.assertEqual(resultInterval.getEnd(), iInterval1.getEnd())\n+ self.assertEqual(resultInterval.getDirection(), iInterval1.getDirection())\n+ self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n+\n+ iInterval2.setChromosome("chr1")\n+ iInterval2.setEnd(300)\n+ results = iInterval1.getDifference(iInterval2)\n+ self.assertEqual(len(results), 1)\n+ resultInterval = results[0]\n+ self.assertEqual(resultInterval.getStart(), 301)\n+ self.assertEqual(resultInterval.getEnd(), iInterval1.getEnd())\n+ self.assertEqual(resultInterval.getDirection(), iInterval1.getDirection())\n+ self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n+ \n+ iInterval2.setDirection("-")\n+ results = iInterval1.getDifference(iInterval2, True)\n+ self.assertEqual(len(results), 1)\n+ resultInterval = results[0]\n+ self.assertEqual(resultInterval.getStart(), iInterval1.getStart())\n+ self.assertEqual(resultInterval.getEnd(), iInterval1.getEnd())\n+ self.assertEqual(resultInterval.getDirection(), iInterval1.getDirection())\n+ self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n+ \n+ iInterval2.setDirection("+")\n+ iInterval2.setStart(200)\n+ results = iInterval1.getDifference(iInterval2)\n+ self.assertEqual(len(results), 2)\n+ resultInterval1, resultInterval2 = results\n+ self.assertEqual(resultInterval1.getStart(), iInterval1.getStart())\n+ self.assertEqual(resultInterval1.getEnd(), 199)\n+ self.assertEqual(resultInterval1.getDirection(), iInterval1.getDirection())\n+ self.assertEqual(resultInterval1.getChromosome(), iInterval1.getChromosome())\n+ self.assertEqual(resultInterval2.getStart(), 301)\n+ self.assertEqual(resultInterval2.getEnd(), iInterval1.getEnd())\n+ self.assertEqual(resultInterval2.getDirection(), iInterval1.getDirection())\n+ self.assertEqual(resultInterval2.getChromosome(), iInterval1.getChromosome())\n+\n+ iInterval2.setEnd(2000)\n+ iInterval2.setStart(1000)\n+ results = iInterval1.getDifference(iInterval2)\n+ self.assertEqual(len(results), 1)\n+ resultInterval = results[0]\n+ self.assertEqual(resultInterval.getStart(), iInterval1.getStart())\n+ self.assertEqual(resultInterval.getEnd(), iInterval1.getEnd())\n+ self.assertEqual(resultInterval.getDirection(), iInterval1.getDirection())\n+ self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n+ \n+ def test_mergeWithDifferentStrand(self):\n+ self.iInterval1 = Interval()\n+ self.iInterval1.setName("interval1")\n+ self.iInterval1.setChromosome("chr1")\n+ self.iInterval1.setStart(100)\n+ self.iInterval1.setEnd(200)\n+ self.iInterval1.setDirection("+")\n+ \n+ self.iInterval2 = Interval()\n+ self.iInterval2.setName("interval2")\n+ self.iInterval2.setChromosome("chr1")\n+ self.iInterval2.setStart(300)\n+ self.iInterval2.setEnd(400)\n+ self.iInterval2.setDirection("-")\n+\n+ expMessage = "Cannot merge \'%s\' and \'%s\' for they are on different strands." % (str(self.iInterval2), str(self.iInterval1))\n+ isExceptionRaised = False\n+ try:\n+ self.iInterval2.merge(self.iInterval1)\n+ except Exception, e:\n+ isExceptionRaised = True\n+ obsMessage = str(e)\n+\n+ self.assertTrue(isExceptionRaised)\n+ self.assertEquals(expMessage, obsMessage)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/test/Test_Mapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/test/Test_Mapping.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,59 @@
+import unittest
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from SMART.Java.Python.structure.Mapping import Mapping
+
+class Test_Mapping(unittest.TestCase):
+
+    def test__getTranscript(self):
+        queryInterval1 = Interval()
+        queryInterval1.setName("read1_1")
+        queryInterval1.setStart(1)
+        queryInterval1.setEnd(10)
+        queryInterval1.setDirection(1)
+
+        targetInterval1 = Interval()
+        targetInterval1.setChromosome("chr1")
+        targetInterval1.setStart(100)
+        targetInterval1.setEnd(110)
+        targetInterval1.setDirection(1)
+
+        subMapping1 = SubMapping()
+        subMapping1.setQueryInterval(queryInterval1)
+        subMapping1.setTargetInterval(targetInterval1)
+
+        queryInterval2 = Interval()
+        queryInterval2.setName("read1_2")
+        queryInterval2.setStart(11)
+        queryInterval2.setEnd(20)
+        queryInterval2.setDirection(1)
+
+        targetInterval2 = Interval()
+        targetInterval2.setChromosome("chr1")
+        targetInterval2.setStart(200)
+        targetInterval2.setEnd(210)
+        targetInterval2.setDirection(1)
+
+        subMapping2 = SubMapping()
+        subMapping2.setQueryInterval(queryInterval2)
+        subMapping2.setTargetInterval(targetInterval2)
+
+        mapping = Mapping()
+        mapping.addSubMapping(subMapping1)
+        mapping.addSubMapping(subMapping2)
+
+        transcript = mapping.getTranscript()
+        self.assertEqual(transcript.getStart(),      100)
+        self.assertEqual(transcript.getEnd(),        210)
+        self.assertEqual(transcript.getChromosome(), "chr1")
+        exons = transcript.getExons()
+        self.assertEqual(len(exons), 2)
+        exon1, exon2 = exons
+        self.assertEqual(exon1.getStart(), 100)
+        self.assertEqual(exon1.getEnd(),   110)
+        self.assertEqual(exon2.getStart(), 200)
+        self.assertEqual(exon2.getEnd(),   210)
+
+
+if __name__ == '__main__':
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/test/Test_Sequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/test/Test_Sequence.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,90 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+from SMART.Java.Python.structure.Sequence import Sequence
+
+
+class Test_Sequence(unittest.TestCase):
+
+    def setUp(self):
+        self._bs = Sequence()
+        self._bs1 = Sequence()
+
+    def test_getSize(self):
+        self._bs.setName("sequence1")
+        self._bs.setSequence("AGCGGACGATGCAGCATGCGAATGACGATA")
+        obsSize = self._bs.getSize()
+        expSize = 30
+        self.assertEquals( expSize, obsSize )
+
+    def test_concatenate(self):
+        self._bs.setName("sequence")
+        self._bs.setSequence("GATGTGCAGACTTTTCACGCAGGACTACATCACTGT")
+        self._bs.setQuality("WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ")
+        self._bs1.setName("sequence1")
+        self._bs1.setSequence("GGAAACATATGCACATAAACGTTGAAATCATGCTTA")
+        self._bs1.setQuality("WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU")
+        self._bs.concatenate(self._bs1)
+        expSeq = "GATGTGCAGACTTTTCACGCAGGACTACATCACTGTGGAAACATATGCACATAAACGTTGAAATCATGCTTA"
+        expQal = "WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQWWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU"
+        self.assertEquals(expSeq, self._bs.getSequence())
+        self.assertEquals(expQal, self._bs.getQuality())
+
+    def test_reverseComplement(self):
+        self._bs.setName("seq1")
+        self._bs.setSequence("TACGGC")
+        exp = "GCCGTA"
+        self._bs.reverseComplement()
+        obs = self._bs.getSequence()
+        self.assertEquals(exp, obs)
+
+    def test_containsAmbiguousNucleotides(self):
+        self._bs.setName("seq1")
+        self._bs.setSequence("WCGTUacgtu")
+        self.assertTrue (self._bs.containsAmbiguousNucleotides())
+
+    def test_shrinkToFirstNucleotides(self):
+        self._bs.setName("seq1")
+        self._bs.setSequence("WCGTUacgtu")
+        self._bs.shrinkToFirstNucleotides(3)
+        expSeq = "WCG"
+        self.assertEquals(expSeq, self._bs.getSequence())
+
+    def test_shrinkToLastNucleotides(self):
+        self._bs.setName("seq1")
+        self._bs.setSequence("WCGTUacgtu")
+        self._bs.shrinkToLastNucleotides(5)
+        expSeq = "acgtu"
+        self.assertEquals(expSeq, self._bs.getSequence())
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/test/Test_SubMapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/test/Test_SubMapping.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,292 @@\n+import unittest\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.structure.SubMapping import SubMapping\n+\n+class Test_SubMapping(unittest.TestCase):\n+\n+ def test__init__(self):\n+ expEvalue = 0.00\n+ expScore = 0\n+ expIdentity = 0.00\n+ expTargetInterval = Interval()\n+ expQueryInterval = Interval()\n+ expQueryRange = expQueryInterval\n+ expSubjectRange = expTargetInterval\n+ expSize = None\n+ expTags = {}\n+ \n+ iSubMapping = SubMapping()\n+ obsQueryRange = iSubMapping.getQueryAsRange()\n+ obsSubjectRange = iSubMapping.getSubjectAsRange()\n+ obsEvalue = iSubMapping.getEvalue()\n+ obsScore = iSubMapping.getScore()\n+ obsIdentity = iSubMapping.getIdentity()\n+ obsTargetInterval = iSubMapping.getTargetInterval()\n+ obsQueryInterval = iSubMapping.getQueryInterval()\n+ obsSize = iSubMapping.getSize()\n+ obsTags = iSubMapping.getTags()\n+ \n+ self.assertEquals(expEvalue, obsEvalue)\n+ self.assertEquals(expIdentity, obsIdentity)\n+ self.assertEquals(expQueryInterval, obsQueryInterval)\n+ self.assertEquals(expQueryRange, obsQueryRange)\n+ self.assertEquals(expScore, obsScore)\n+ self.assertEquals(expSize, obsSize)\n+ self.assertEquals(expSubjectRange, obsSubjectRange)\n+ self.assertEquals(expTags, obsTags)\n+ self.assertEquals(expTargetInterval, obsTargetInterval)\n+ \n+ def test__init__change_values_by_Interval(self):\n+ iSubMapping = SubMapping()\n+ \n+ expSeqName = ""\n+ \n+ obsRangeSubject = iSubMapping.range_subject.getSeqname()\n+ obsRangeQuery = iSubMapping.range_query.getSeqname()\n+ obsIntervalTarget = iSubMapping.getTargetInterval().getChromosome()\n+ obsIntervalQuery = iSubMapping.getQueryInterval().getChromosome()\n+ \n+ self.assertEquals(expSeqName, obsRangeSubject)\n+ self.assertEquals(expSeqName, obsRangeQuery)\n+ self.assertEquals(expSeqName, obsIntervalTarget)\n+ self.assertEquals(expSeqName, obsIntervalQuery)\n+ \n+ iSubMapping.getTargetInterval().setChromosome("intervalTarget")\n+ iSubMapping.getQueryInterval().setChromosome("intervalQuery")\n+ \n+ expTargetSeqName = "intervalTarget"\n+ expQuerySeqName = "intervalQuery"\n+ \n+ obsRangeSubject = iSubMapping.range_subject.getSeqname()\n+ obsRangeQuery = iSubMapping.range_query.getSeqname()\n+ obsIntervalTarget = iSubMapping.getTargetInterval().getChromosome()\n+ obsIntervalQuery = iSubMapping.getQueryInterval().getChromosome()\n+ \n+ self.assertEquals(expTargetSeqName, obsRangeSubject)\n+ self.assertEquals(expQuerySeqName, obsRangeQuery)\n+ self.assertEquals(expTargetSeqName, obsIntervalTarget)\n+ self.assertEquals(expQuerySeqName, obsIntervalQuery)\n+ \n+ def test__init__change_values_by_Align(self):\n+ iSubMapping = SubMapping()\n+ \n+ expSeqName = ""\n+ \n+ obsRangeSubject = iSubMapping.range_subject.getSeqname()\n+ obsRangeQuery = iSubMapping.range_query.getSeqname()\n+ obsIntervalTarget = iSubMapping.getTargetInterval().getChromosome()\n+ obsIntervalQuery = iSubMapping.getQueryInterval().getChromosome()\n+ \n+ self.assertEquals(expSeqName, obsRangeSubject)\n+ self.assertEquals(expSeqName, obsRangeQuery)\n+ self.assertEquals(expSeqName, obsIntervalTarget)\n+ self.assertEquals(expSeqName, obsIntervalQuery)\n+ \n+ iSubMapping.range_subject.setSeqName("intervalTarget")\n+ iSubMapping.range_query.setSeqName("intervalQuery")\n+ \n+ expTargetSeqName = "intervalTarget"\n+ expQuerySeqName = "intervalQuery"\n+ \n+ obsRangeSubject = iSubMapping.range_subject.getSeqname()\n+ obsRangeQuery = iSubMapping.range_query.getSeqname'..b'rt(0)\n+ iIntervalTarget.setEnd(123)\n+ iIntervalTarget.setDirection("+")\n+ iIntervalQuery = Interval()\n+ iIntervalQuery.setChromosome("chromosomeQuery")\n+ iIntervalQuery.setName("sequenceQuery")\n+ iIntervalQuery.setStart(200)\n+ iIntervalQuery.setEnd(323)\n+ iIntervalQuery.setDirection("+") \n+ \n+ iTestSubMapping.setQueryInterval(iIntervalTarget)\n+ iTestSubMapping.setTargetInterval(iIntervalQuery)\n+ iTestSubMapping.setTagValue("identity", 50)\n+ iTestSubMapping.setSize(10)\n+ \n+ iSubMappingWithCopy = SubMapping(iTestSubMapping)\n+ self.assertEquals(iSubMappingWithCopy, iTestSubMapping) \n+\n+ \n+ def test_copy(self):\n+ iSubMapping = SubMapping()\n+ iSubMapping.setQueryName("Query")\n+ iSubMapping.setQueryStart(50)\n+ iSubMapping.setQueryEnd(150)\n+ iSubMapping.setSubjectName("Subject")\n+ iSubMapping.setSubjectStart(100)\n+ iSubMapping.setSubjectEnd(200)\n+ iSubMapping.e_value = 1e-20\n+ iSubMapping.score = 30\n+ iSubMapping.identity = 90.2 \n+ \n+ iInterval1 = Interval()\n+ iInterval1.setChromosome("chromosome1")\n+ iInterval1.setName("sequence1")\n+ iInterval1.setStart(0)\n+ iInterval1.setEnd(123)\n+ iInterval1.setDirection("+")\n+ iInterval2 = Interval()\n+ iInterval2.setChromosome("chromosome2")\n+ iInterval2.setName("sequence2")\n+ iInterval2.setStart(200)\n+ iInterval2.setEnd(300) \n+ iInterval2.setDirection("+") \n+ iSubMapping.setQueryInterval(iInterval1)\n+ iSubMapping.setTargetInterval(iInterval2)\n+ iSubMapping.setTagValue("identity", 50)\n+ iSubMapping.setSize(10)\n+\n+ iSubMappingCopy = SubMapping()\n+ iSubMappingCopy.copy(iSubMapping)\n+ self.assertEqual(iSubMappingCopy, iSubMapping)\n+ \n+ \n+ def test_setTags(self):\n+ iSubMapping = SubMapping()\n+ iSubMapping.getQueryInterval().setSize(50)\n+ iSubMapping.getTargetInterval().setSize(2)\n+ iSubMapping.setTagValue("identity", 50)\n+ iSubMapping.setSize(10)\n+ \n+ expQueryIntervalSize = 50\n+ expTargetIntervalSize = 2\n+ expTags = {"identity" : 50,\n+ "nbMismatches" : 5}\n+ \n+ obsTags = iSubMapping.getTags()\n+ self.assertEquals(expTags, obsTags)\n+ \n+ \n+ def test_setIdentity(self):\n+ iSubMapping = SubMapping() \n+ iSubMapping.setIdentity(10)\n+ expIdentity = 10\n+ expTags = {"identity": 10}\n+ \n+ obsIdentity = iSubMapping.getIdentity()\n+ obsTags = iSubMapping.getTags()\n+ \n+ self.assertEquals(expIdentity,obsIdentity)\n+ self.assertEquals(expTags,obsTags)\n+ \n+ \n+ def test_setIdentity_with_size(self):\n+ iSubMapping = SubMapping() \n+ iSubMapping.setSize(10)\n+ iSubMapping.setIdentity(50)\n+ \n+ expIdentity = 50\n+ expTags = {"identity" : 50,\n+ "nbMismatches" : 5}\n+ \n+ obsIdentity = iSubMapping.getIdentity()\n+ obsTags = iSubMapping.getTags()\n+ \n+ self.assertEquals(expIdentity,obsIdentity)\n+ self.assertEquals(expTags,obsTags)\n+ \n+ \n+ def test_setIdentity_with_sizeAndMismatchTag(self):\n+ iSubMapping = SubMapping() \n+ iSubMapping.setSize(10)\n+ iSubMapping.setTagValue("nbMismatches", 8)\n+ iSubMapping.setIdentity(50)\n+ \n+ expIdentity = 50\n+ expTags = {"identity" : 50,\n+ "nbMismatches" : 8}\n+ \n+ obsIdentity = iSubMapping.getIdentity()\n+ obsTags = iSubMapping.getTags()\n+ \n+ self.assertEquals(expIdentity,obsIdentity)\n+ self.assertEquals(expTags,obsTags)\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/test/Test_Transcript.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/test/Test_Transcript.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,351 @@\n+import unittest\n+from SMART.Java.Python.structure.Interval import Interval\n+from SMART.Java.Python.structure.Transcript import Transcript\n+\n+class Test_Transcript(unittest.TestCase):\n+ \n+ def test_getSize(self):\n+ transcript1 = Transcript()\n+ transcript1.setDirection("+")\n+ transcript1.setStart(2000)\n+ transcript1.setEnd(3000)\n+ transcript1.setChromosome("arm_X")\n+ \n+ self.assertEqual(transcript1.getSize(), 1001)\n+ \n+ transcript2 = Transcript()\n+ transcript2.copy(transcript1)\n+ self.assertEqual(transcript1.getSize(), 1001)\n+ \n+ transcript3 = Transcript()\n+ transcript3.setDirection("+")\n+ transcript3.setChromosome("arm_X")\n+ \n+ exon1 = Interval()\n+ exon1.setDirection("+")\n+ exon1.setChromosome("arm_X")\n+ exon1.setStart(100)\n+ exon1.setEnd(200)\n+ transcript3.addExon(exon1)\n+ \n+ exon2 = Interval()\n+ exon2.setDirection("+")\n+ exon2.setChromosome("arm_X")\n+ exon2.setStart(300)\n+ exon2.setEnd(400)\n+ transcript3.addExon(exon2)\n+ \n+ self.assertEqual(transcript3.getSize(), 203)\n+\n+\n+ def test_overlapWithExons(self):\n+ exon1_1 = Interval()\n+ exon1_1.setChromosome("chr1")\n+ exon1_1.setStart(100)\n+ exon1_1.setEnd(200)\n+ exon1_1.setDirection("+")\n+\n+ exon1_2 = Interval()\n+ exon1_2.setChromosome("chr1")\n+ exon1_2.setStart(500)\n+ exon1_2.setEnd(600)\n+ exon1_2.setDirection("+")\n+\n+ transcript1 = Transcript()\n+ transcript1.setChromosome("chr1")\n+ transcript1.setStart(100)\n+ transcript1.setEnd(600)\n+ transcript1.setDirection("+")\n+ transcript1.addExon(exon1_1)\n+ transcript1.addExon(exon1_2)\n+\n+ exon2_1 = Interval()\n+ exon2_1.copy(exon1_1)\n+\n+ transcript2 = Transcript()\n+ transcript2.setChromosome("chr1")\n+ transcript2.setStart(100)\n+ transcript2.setEnd(200)\n+ transcript2.setDirection("+")\n+ transcript2.addExon(exon2_1)\n+ \n+ self.assertTrue(transcript1.overlapWithExon(transcript2))\n+\n+ transcript2.reverse()\n+ try:\n+ self.assertFalse(transcript1.overlapWithExon(transcript2))\n+ except Exception:\n+ pass\n+ \n+ transcript2.reverse()\n+ transcript2.setChromosome("chr2")\n+ self.assertFalse(transcript1.overlapWithExon(transcript2))\n+\n+ exon3_1 = Interval()\n+ exon3_1.copy(exon1_1)\n+ exon3_1.setEnd(400)\n+ exon3_1.setStart(300)\n+\n+ transcript3 = Transcript()\n+ transcript3.setChromosome("chr1")\n+ transcript3.setStart(300)\n+ transcript3.setEnd(400)\n+ transcript3.setDirection("+")\n+ transcript3.addExon(exon3_1)\n+ self.assertFalse(transcript1.overlapWithExon(transcript3))\n+\n+\n+ def test_merge(self):\n+ exon1_1 = Interval()\n+ exon1_1.setChromosome("chr1")\n+ exon1_1.setStart(100)\n+ exon1_1.setEnd(200)\n+ exon1_1.setDirection("+")\n+\n+ exon1_2 = Interval()\n+ exon1_2.setChromosome("chr1")\n+ exon1_2.setStart(500)\n+ exon1_2.setEnd(600)\n+ exon1_2.setDirection("+")\n+\n+ transcript1 = Transcript()\n+ transcript1.setChromosome("chr1")\n+ transcript1.setEnd(600)\n+ transcript1.setStart(100)\n+ transcript1.setDirection("+")\n+ transcript1.addExon(exon1_1)\n+ transcript1.addExon(exon1_2)\n+\n+ exon2_1 = Interval()\n+ exon2_1.copy(exon1_1)\n+\n+ transcript2 = Transcript()\n+ transcript2.setChromosome("chr1")\n+ transcript2.setEnd(200)\n+ transcript2.setStart(100)\n+ transcript2.setDirection("+")\n+ transcript2.addExon(exon2_1)\n+ \n+ transcript1.merge(transcript2)\n+ transcript1.sortExonsIncreasing()\n+ exons = transcript1.getExons()\n+ self.assertEqual'..b'ctStart(301)\n+ exons = transcript1.getExons()\n+ self.assertEqual(len(exons), 2)\n+ exon1, exon2 = exons\n+ self.assertEqual(exon1.getStart(), 100)\n+ self.assertEqual(exon1.getEnd(), 200)\n+ self.assertEqual(exon2.getStart(), 300)\n+ self.assertEqual(exon2.getEnd(), 400)\n+\n+\n+ def test__include(self):\n+ iTranscript1 = Transcript()\n+ iTranscript1.setName("transcript1")\n+ iTranscript1.setChromosome("chr1")\n+ iTranscript1.setStart(100)\n+ iTranscript1.setEnd(200)\n+ iTranscript1.setDirection("+")\n+ \n+ iTranscript2 = Transcript()\n+ iTranscript2.copy(iTranscript1)\n+ iTranscript2.setName("transcript2")\n+ self.assertTrue(iTranscript1.include(iTranscript2))\n+ self.assertTrue(iTranscript2.include(iTranscript1))\n+\n+ iTranscript2.setChromosome("chr2")\n+ self.assertFalse(iTranscript1.include(iTranscript2))\n+ self.assertFalse(iTranscript2.include(iTranscript1))\n+\n+ iTranscript2.setChromosome("chr1")\n+ exon = Interval()\n+ exon.setChromosome("chr1")\n+ exon.setDirection("+")\n+ exon.setStart(300)\n+ exon.setEnd(400)\n+ iTranscript1.addExon(exon)\n+ self.assertTrue(iTranscript1.include(iTranscript2))\n+ self.assertFalse(iTranscript2.include(iTranscript1))\n+ \n+ exon = Interval()\n+ exon.setChromosome("chr1")\n+ exon.setDirection("+")\n+ exon.setStart(500)\n+ exon.setEnd(600)\n+ iTranscript2.addExon(exon)\n+ self.assertFalse(iTranscript1.include(iTranscript2))\n+ self.assertFalse(iTranscript2.include(iTranscript1))\n+ \n+\n+ def test__getDifference(self):\n+ iTranscript1 = Transcript()\n+ iTranscript1.setName("transcript1")\n+ iTranscript1.setChromosome("chr1")\n+ iTranscript1.setStart(100)\n+ iTranscript1.setEnd(400)\n+ iTranscript1.setDirection("+")\n+ \n+ iTranscript2 = Transcript()\n+ iTranscript2.setName("transcript1")\n+ iTranscript2.setChromosome("chr1")\n+ iTranscript2.setStart(200)\n+ iTranscript2.setEnd(400)\n+ iTranscript2.setDirection("+")\n+\n+ newTranscript = iTranscript1.getDifference(iTranscript2)\n+ self.assertTrue(newTranscript.getStart(), 100)\n+ self.assertTrue(newTranscript.getEnd(), 199)\n+ exons = newTranscript.getExons()\n+ self.assertTrue(len(exons), 1)\n+ exon1 = exons[0]\n+ self.assertTrue(exon1.getStart(), 100)\n+ self.assertTrue(exon1.getEnd(), 199)\n+\n+ iTranscript2 = Transcript()\n+ iTranscript2.setName("transcript1")\n+ iTranscript2.setChromosome("chr1")\n+ iTranscript2.setStart(100)\n+ iTranscript2.setEnd(200)\n+ iTranscript2.setDirection("+")\n+\n+ newTranscript = iTranscript1.getDifference(iTranscript2)\n+ self.assertTrue(newTranscript.getStart(), 201)\n+ self.assertTrue(newTranscript.getEnd(), 400)\n+ exons = newTranscript.getExons()\n+ self.assertTrue(len(exons), 1)\n+ exon1 = exons[0]\n+ self.assertTrue(exon1.getStart(), 201)\n+ self.assertTrue(exon1.getEnd(), 400)\n+\n+ iTranscript2 = Transcript()\n+ iTranscript2.setName("transcript1")\n+ iTranscript2.setChromosome("chr1")\n+ iTranscript2.setStart(200)\n+ iTranscript2.setEnd(300)\n+ iTranscript2.setDirection("+")\n+\n+ newTranscript = iTranscript1.getDifference(iTranscript2)\n+ self.assertTrue(newTranscript.getStart(), 100)\n+ self.assertTrue(newTranscript.getEnd(), 400)\n+ exons = newTranscript.getExons()\n+ self.assertTrue(len(exons), 2)\n+ exon1, exon2 = exons\n+ self.assertTrue(exon1.getStart(), 100)\n+ self.assertTrue(exon1.getEnd(), 199)\n+ self.assertTrue(exon2.getStart(), 301)\n+ self.assertTrue(exon2.getEnd(), 400)\n+\n+\n+if __name__ == \'__main__\':\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/structure/test/Test_TranscriptListsComparator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/structure/test/Test_TranscriptListsComparator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,262 @@\n+import os\n+import unittest\n+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n+from commons.core.writer.Gff3Writer import Gff3Writer\n+from commons.core.parsing.GffParser import GffParser\n+from commons.core.utils.FileUtils import FileUtils\n+\n+SMART_PATH = os.environ["REPET_PATH"] + "/SMART"\n+\n+class Test_TranscriptListsComparator(unittest.TestCase):\n+ \n+\n+ def test_compareTranscriptList(self):\n+ container1 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList1.bed" % SMART_PATH, "bed", 0)\n+ container2 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList2.bed" % SMART_PATH, "bed", 0)\n+ outputContainer = "output.gff3"\n+ comparator = TranscriptListsComparator(None, 0)\n+ comparator.computeOdds(True)\n+ comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n+ comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n+ comparator.setOutputWriter(Gff3Writer(outputContainer, 0))\n+ comparator.compareTranscriptList()\n+ parser = GffParser("output.gff3", 0)\n+ self.assertEqual(parser.getNbTranscripts(), 2)\n+ cpt = 0\n+ for transcript in parser.getIterator():\n+ if cpt == 0:\n+ self.assertEqual(transcript.getChromosome(), "arm_X")\n+ self.assertEqual(transcript.getStart(), 1000)\n+ self.assertEqual(transcript.getEnd(), 1999)\n+ self.assertEqual(transcript.getDirection(), 1)\n+ elif cpt == 1:\n+ self.assertEqual(transcript.getChromosome(), "arm_X")\n+ self.assertEqual(transcript.getStart(), 1000)\n+ self.assertEqual(transcript.getEnd(), 1999)\n+ self.assertEqual(transcript.getDirection(), -1)\n+ cpt += 1\n+\n+\n+ def test_compareTranscriptListDistanceSimple(self):\n+ container1 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple1.gff3" % SMART_PATH, "gff", 0)\n+ container2 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple2.gff3" % SMART_PATH, "gff", 0)\n+\n+ comparator = TranscriptListsComparator(None, 0)\n+ comparator.computeOdds(True)\n+ comparator.setMaxDistance(1000)\n+ comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n+ comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n+ distances = comparator.compareTranscriptListDistance()\n+\n+ self.assertEqual(distances, {0: 1})\n+\n+ comparator = TranscriptListsComparator(None, 0)\n+ comparator.computeOdds(True)\n+ comparator.setMaxDistance(1000)\n+ comparator.setInputTranscriptContainer(comparator.QUERY, container2)\n+ comparator.setInputTranscriptContainer(comparator.REFERENCE, container1)\n+ distances = comparator.compareTranscriptListDistance()\n+\n+ self.assertEqual(distances, {0: 1, -1000: 1})\n+\n+\n+ def test_compareTranscriptListDistanceAntisense(self):\n+ container1 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense1.gff3" % SMART_PATH, "gff", 0)\n+ container2 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense2.gff3" % SMART_PATH, "gff", 0)\n+\n+ comparator = TranscriptListsComparator(None, 0)\n+ comparator.computeOdds(True)\n+ comparator.setMaxDistance(10000)\n+ comparator.getAntisenseOnly(True)\n+ comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n+ comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n+ distances = comparator.compareTranscriptListDistance()\n+\n+ self.assertEqual(distances, {1000: 1})\n+\n+\n+\n+ def '..b'orCompareTranscriptListSelfMergeDifferentClusters1.bed" % SMART_PATH, "bed", 0)\n+ comparator = TranscriptListsComparator(None, 0)\n+ comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n+ comparator.setOutputWriter(Gff3Writer("output.gff3", 0))\n+ comparator.compareTranscriptListSelfMerge()\n+\n+ parser = GffParser("output.gff3", 0)\n+ self.assertEquals(parser.getNbTranscripts(), 1)\n+ for transcript in parser.getIterator():\n+ self.assertEqual(transcript.getChromosome(), "arm_X")\n+ self.assertEqual(transcript.getStart(), 100)\n+ self.assertEqual(transcript.getEnd(), 100099)\n+ self.assertEqual(transcript.getDirection(), 1)\n+ self.assertEqual(transcript.getNbExons(), 1)\n+ self.assertEqual(transcript.getSize(), 100000)\n+\n+\n+ def test_compareTranscriptListgetDifferenceTranscriptList(self):\n+ container1 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3" % SMART_PATH, "gff", 0)\n+ container2 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3" % SMART_PATH, "gff", 0)\n+\n+ comparator = TranscriptListsComparator(None, 0)\n+ comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n+ comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n+ comparator.setOutputWriter(Gff3Writer("output.gff3", 0))\n+ comparator.getDifferenceTranscriptList()\n+\n+ parser = GffParser("output.gff3", 0)\n+ self.assertEqual(parser.getNbTranscripts(), 1)\n+ for transcript in parser.getIterator():\n+ self.assertEqual(transcript.getChromosome(), "arm_X")\n+ self.assertEqual(transcript.getStart(), 1000)\n+ self.assertEqual(transcript.getEnd(), 4000)\n+ self.assertEqual(transcript.getDirection(), 1)\n+ self.assertEqual(transcript.getNbExons(), 2)\n+ exon1, exon2 = transcript.getExons()\n+ self.assertEqual(exon1.getStart(), 1000)\n+ self.assertEqual(exon1.getEnd(), 1999)\n+ self.assertEqual(exon2.getStart(), 3001)\n+ self.assertEqual(exon2.getEnd(), 4000)\n+\n+\n+\n+ def test_compareTranscriptListgetDifferenceTranscriptListSplit(self):\n+ container1 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3" % SMART_PATH, "gff", 0)\n+ container2 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3" % SMART_PATH, "gff", 0)\n+\n+ comparator = TranscriptListsComparator(None, 0)\n+ comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n+ comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n+ comparator.setSplitDifference(True)\n+ comparator.setOutputWriter(Gff3Writer("output.gff3", 0))\n+ comparator.getDifferenceTranscriptList()\n+\n+ parser = GffParser("output.gff3", 0)\n+ self.assertEqual(parser.getNbTranscripts(), 2)\n+ for id, transcript in enumerate(parser.getIterator()):\n+ if id == 0:\n+ self.assertEqual(transcript.getChromosome(), "arm_X")\n+ self.assertEqual(transcript.getStart(), 1000)\n+ self.assertEqual(transcript.getEnd(), 1999)\n+ self.assertEqual(transcript.getDirection(), 1)\n+ self.assertEqual(transcript.getNbExons(), 1)\n+ else:\n+ self.assertEqual(transcript.getChromosome(), "arm_X")\n+ self.assertEqual(transcript.getStart(), 3001)\n+ self.assertEqual(transcript.getEnd(), 4000)\n+ self.assertEqual(transcript.getDirection(), 1)\n+ self.assertEqual(transcript.getNbExons(), 1)\n+\n+\n+if __name__ == \'__main__\':\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/testInstall.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/testInstall.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,103 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Test if the configuration is sound
+"""
+
+import sys
+import os
+import subprocess
+
+# Test Python files
+try :
+    from SMART.Java.Python.misc.RPlotter import *
+except:
+    print "Cannot find Python scripts! Update PYTHONPATH (currently %s) environment variable and see configuration in the documentation!" % (os.environ["PYTHONPATH"] if "PYTHONPATH" in os.environ else "empty")
+    sys.exit(3)
+
+try :
+    from SMART.Java.Python.mySql.MySqlTranscriptTable import *
+    from SMART.Java.Python.mySql.MySqlConnection import *
+except:
+    print "SQLite is not installed ! Please read the documentation!"
+    sys.exit(4)
+
+
+if __name__ == "__main__":
+
+    print "Python scripts are correctly read."
+
+    # Test mySQL
+    connection = MySqlConnection()
+    table = MySqlTranscriptTable(connection)
+
+    try:
+        table.createTranscriptTable()
+    except:
+        print "Cannot connect to the SQLite database! See configuration in the documentation!"
+        sys.exit(5)
+
+    print "SQLite database is correctly set up."
+
+
+    # Test R
+    fileName = "tmpFile.R"
+    file = open(fileName, "w")
+    file.write("?licence\n")
+    file.close()
+    rCommand = "R"
+    if "SMARTRPATH" in os.environ:
+        rCommand = os.environ["SMARTRPATH"]
+    command = "\"%s\" CMD BATCH %s" % (rCommand, fileName)
+    status    = subprocess.call(command, shell=True)
+    os.remove(fileName)
+    outputFileName = "%sout" % (fileName)
+    if os.path.exists(outputFileName):
+        os.remove(outputFileName)
+
+    if status != 0:
+        print "Problem with the execution of R script (command '%s' did not work, current directory is %s, status is %d)! See configuration in the documentation!" % (command, os.getcwd(), status)
+        sys.exit(6)
+
+    line = {0: 1, 1: 2}
+    pngFileName = "tmpFile.png"
+    plotter = RPlotter(pngFileName)
+    plotter.addLine(line)
+    try:
+        plotter.plot()
+    except:
+        print "Problem with the execution of R script: library 'RColorBrewer' is missing! See configuration in the documentation!"
+        sys.exit(7)
+    os.remove(pngFileName)
+
+    print "R is available."
+
+    print "Set up is fine! Enjoy S-MART!"

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/toolLauncher/RnaFoldLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/toolLauncher/RnaFoldLauncher.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,379 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os\n+import sys\n+import random\n+import subprocess\n+from SMART.Java.Python.structure.TranscriptList import TranscriptList\n+from SMART.Java.Python.structure.Transcript import Transcript\n+from SMART.Java.Python.misc.Progress import Progress\n+from commons.core.parsing.FastaParser import FastaParser\n+\n+\n+class RnaFoldStructure(object):\n+ """\n+ A structure to store the output of RNAFold\n+ @ivar name: the name of the sequence\n+ @type name: string\n+ @ivar sequence: the sequence (with gaps)\n+ @type sequence: string\n+ @ivar structure: the bracket structure\n+ @type structure: string\n+ @ivar energy: the energy of the fold\n+ @type energy: float\n+ @ivar interactions: the interactions inside the structure\n+ @ivar interactions: the interactions inside the structure\n+ """\n+\n+ def __init__(self, name, sequence, structure, energy):\n+ """\n+ Initialize the structure\n+ @param name the name of the sequence\n+ @type name: string\n+ @param sequence: the sequence (with gaps)\n+ @type sequence: string\n+ @param structure: the bracket structure\n+ @type structure: string\n+ @param energy: the energy of the fold\n+ @type energy: float\n+ """\n+ self.name = name \n+ self.sequence = sequence\n+ self.structure = structure\n+ self.energy = energy\n+ self.interactions = None\n+\n+ \n+ def analyze(self):\n+ """\n+ Analyze the output, assign the interactions\n+ """\n+ if len(self.sequence) != len(self.structure):\n+ sys.exit("Sizes of sequence and structure differ (\'%s\' and \'%s\')!\\n" % (self.sequence, self.structure))\n+ stack = []\n+ self.interactions = [None for i in range(len(self.sequence))]\n+ for i in range(len(self.sequence)):\n+ if self.structure[i] == "(":\n+ stack.append(i)\n+ elif self.structure[i] == ")":\n+ if not stack:\n+ sys.exit("Something wrong in the interaction line \'%s\'!\\n" % (self.structure))\n+ otherI = stack.pop()\n+ self.interactions[i] = otherI\n+ self.interactions[otherI] = i\n+ if stack:\n+ sys.exit("Something wrong in the interaction line \'%s\'!\\n" % (self.structure))\n+\n+\n+ def getNbBulges(self, sta'..b'\n+ @type rnaFoldOutput: class L{RnaFoldStructure<RnaFoldStructure>}\n+ @ivar reverse: invert the extensions\n+ @type reverse: bool\n+ @return: a t-uple of energy, number of insertions, number of bulges, strand\n+ """\n+ rnaFoldOutput.analyze()\n+ transcriptSize = transcript.end - transcript.start + 1\n+ start = fivePrimeExtension if not reverse else threePrimeExtension\n+ end = start + transcriptSize\n+ energy = rnaFoldOutput.energy\n+ nbBulges = rnaFoldOutput.getNbBulges(start, end)\n+ (minStar, maxStar) = rnaFoldOutput.getStar(start, end)\n+ minStar += transcript.start - start\n+ maxStar += transcript.start - start\n+ if self.verbosity > 100:\n+ print "Getting structure with energy %d, nbBulges %d, miRna* %d-%d, strand %s" % (energy, nbBulges, minStar, maxStar, "-" if reverse else "+")\n+ return (energy, nbBulges, minStar, maxStar, reverse)\n+\n+ \n+ def fold(self, transcript):\n+ """\n+ Fold a transcript (in each strand)\n+ @ivar transcript: a transcript\n+ @type transcript: class L{Transcript<Transcript>}\n+ @return: a t-uple of energy, number of insertions, number of bulges, strand\n+ """\n+ results = [None] * self.nbStrands\n+ strands = [False, True] if self.nbStrands == 2 else [False]\n+ minNbBulges = 1000000\n+ for i, reverse in enumerate(strands):\n+ self.writeInputFile(transcript, reverse, self.fivePrimeExtension, self.threePrimeExtension)\n+ self.startRnaFold()\n+ output = self.parseRnaFoldOutput()\n+ results[i] = self.analyzeRnaFoldOutput(transcript, output, reverse, self.fivePrimeExtension, self.threePrimeExtension)\n+ minNbBulges = min(minNbBulges, results[i][1])\n+ for result in results:\n+ if result[1] == minNbBulges:\n+ return result\n+ return None\n+\n+\n+ def refold(self, transcript):\n+ """\n+ Fold a transcript, knowing where the miRNA starts and end\n+ @ivar transcript: a transcript\n+ @type transcript: class L{Transcript<Transcript>}\n+ @return: the energy\n+ """\n+ miStar = transcript.getTagValue("miRnaStar")\n+ startMiStar = int(miStar.split("-")[0])\n+ endMiStart = int(miStar.split("-")[1])\n+ fivePrimeExtension = max(0, transcript.start - startMiStar) + 5\n+ threePrimeExtension = max(0, endMiStart - transcript.end) + 5\n+ self.writeInputFile(transcript, False, fivePrimeExtension, threePrimeExtension)\n+ self.startRnaFold()\n+ output = self.parseRnaFoldOutput()\n+ result = self.analyzeRnaFoldOutput(transcript, output, False, fivePrimeExtension, threePrimeExtension)\n+ return result[0]\n+\n+\n+ def computeResults(self):\n+ """\n+ Fold all and fill an output transcript list with the values\n+ """\n+ progress = Progress(self.inputTranscriptList.getNbTranscripts(), "Handling transcripts", self.verbosity)\n+ self.outputTranscriptList = TranscriptList()\n+ for transcript in self.inputTranscriptList.getIterator():\n+ result = self.fold(transcript)\n+ transcript.setTagValue("nbBulges", result[1])\n+ transcript.setTagValue("miRnaStar", "%d-%d" % (result[2], result[3]))\n+ transcript.setTagValue("miRNAstrand", result[4])\n+ transcript.setTagValue("energy", self.refold(transcript))\n+ self.outputTranscriptList.addTranscript(transcript)\n+ progress.inc()\n+ progress.done()\n+\n+\n+ def getResults(self):\n+ """\n+ Get an output transcript list with the values\n+ """\n+ if self.outputTranscriptList == None:\n+ self.computeResults()\n+ return self.outputTranscriptList\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/trimAdaptor.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/trimAdaptor.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,107 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Trim the sequences from a 5' adaptor"""
+
+import sys
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.parsing.FastqParser import FastqParser
+from commons.core.writer.FastaWriter import FastaWriter
+from commons.core.writer.FastqWriter import FastqWriter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Trim Adaptor v1.0.1: Remove the 3' adaptor of a list of reads. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: sequence file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in sequence format given by -f]")
+    parser.add_option("-a", "--adaptor",   dest="adaptor",        action="store",               type="string", help="adaptor [compulsory] [format: string]")
+    parser.add_option("-e", "--errors",    dest="errors",         action="store", default=0,    type="int" ,   help="number of errors in percent [format: int] [default: 0]")
+    parser.add_option("-n", "--noAdaptor", dest="noAdaptor",      action="store", default=None, type="string", help="file name where to print sequences with no adaptor [format: output file in sequence format given by -f]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    minSize = 2
+
+    if options.format == "fasta":
+        parser = FastaParser(options.inputFileName, options.verbosity)
+    elif options.format == "fastq":
+        parser = FastqParser(options.inputFileName, options.verbosity)
+    else:
+        sys.exit("Cannot handle files with '%s' format." % (options.format))
+
+    if options.format == "fasta":
+        writer = FastaWriter(options.outputFileName, options.verbosity)
+    elif options.format == "fastq":
+        writer = FastqWriter(options.outputFileName, options.verbosity)
+    else:
+        sys.exit("Cannot handle files with '%s' format." % (options.format))
+
+    writerNoAdaptor = None
+    if options.noAdaptor != None:
+        if options.format == "fasta":
+            writerNoAdaptor = FastaWriter(options.noAdaptor, options.verbosity)
+        elif options.format == "fastq":
+            writerNoAdaptor = FastqWriter(options.noAdaptor, options.verbosity)
+        else:
+            sys.exit("Cannot handle files with '%s' format." % (options.format))
+
+    nbFound = 0
+
+    progress = Progress(parser.getNbSequences(), "Reading %s" % (options.inputFileName), options.verbosity)
+    for sequence in parser.getIterator():
+        progress.inc()
+        nucleotides = sequence.getSequence()
+        found       = False
+        for i in range(len(nucleotides) - minSize):
+            nucleotidesPart = nucleotides[i:]
+            adaptorPart     = options.adaptor if len(nucleotidesPart) >= len(options.adaptor) else options.adaptor[:len(nucleotidesPart)]
+            nucleotidesPart = nucleotidesPart if len(adaptorPart) == len(nucleotidesPart) else nucleotidesPart[:len(adaptorPart)]
+            if Utils.getHammingDistance(adaptorPart, nucleotidesPart) <= int(options.errors / 100.0 * len(adaptorPart)):
+                nbFound += 1
+                sequence.shrinkToFirstNucleotides(i)
+                writer.addSequence(sequence)
+                found = True
+                break
+        if not found:
+            writer.addSequence(sequence)
+            if writerNoAdaptor != None:
+                writerNoAdaptor.addSequence(sequence)
+    progress.done()
+
+    print "%d sequences with adaptors on %d (%.2f%%)" % (nbFound, parser.getNbSequences(), float(nbFound) / parser.getNbSequences() * 100)
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/trimSequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/trimSequence.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,102 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Remove sequences with low reliability"""
+
+from optparse import OptionParser
+from commons.core.parsing.SequenceListParser import *
+from commons.core.writer.FastaWriter import *
+from SMART.Java.Python.misc.Progress import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Trim Sequences v1.0.1: Remove sequences with low reliability: low occurrences and highly repeted. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",         dest="inputFileName",    action="store",                                         type="string", help="input file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                                         type="string", help="output file [compulsory] [format: output file in FASTA format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",            action="store",            default=1,         type="int",        help="trace level [format: int]")
+    parser.add_option("-l", "--log",             dest="log",                        action="store_true", default=False,                                help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    parser            = SequenceListParser(options.inputFileName, options.verbosity)
+    nbSequences = parser.getNbSequences()
+    progress        = Progress(nbSequences, "Parsing file %s" % (options.inputFileName), options.verbosity)
+
+    writer = FastaWriter(options.outputFileName, options.verbosity)
+    if options.log:
+        logHandle = open("log.txt", "w")
+
+    letters                            = ("A", "C", "G", "T")
+    nbLowComplexity            = 0
+    nbTooManyOccurrences = 0
+
+    for sequence in parser.getIteractor():
+        halfSize                     = len(sequence.sequence) / 2
+        occurrences                = set()
+        nbOccurrences            = dict(zip(letters, [0 for letter in letters]))
+        tooManyOccurrences = False
+        good                             = True
+
+        for char in sequence.sequence:
+            if char in letters:
+                occurrences.add(char)
+                nbOccurrences[char] += 1
+
+
+        if len(occurrences) < 4:
+            nbLowComplexity += 1
+            if options.log:
+                logHandle.write("Low complexity for %s\n" % (sequence.sequence))
+            good = False
+
+        if good:
+            for letter, nbOccurrence in nbOccurrences.iteritems():
+                if nbOccurrence > halfSize:
+                    if not tooManyOccurrences:
+                        nbTooManyOccurrences += 1
+                        if options.log:
+                            logHandle.write("Too many occurrences for %s\n" % (sequence.sequence))
+                    tooManyOccurrences = True
+                    good = False
+
+        if good:
+            writer.addSequence(sequence)
+
+        progress.inc()
+    progress.done()
+
+    if options.log:
+        logHandle.close()
+
+    print "%d out of %d have low complexity (%f%%)"             % (nbLowComplexity, nbSequences, (float(nbLowComplexity) / nbSequences * 100))
+    print "%d out of %d have too many occurrences (%f%%)" % (nbTooManyOccurrences, nbSequences, (float(nbTooManyOccurrences) / nbSequences * 100))

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/trimSequences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/trimSequences.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,149 @@\n+#! /usr/bin/env python\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+from optparse import OptionParser\n+from commons.core.parsing.FastaParser import FastaParser\n+from commons.core.parsing.FastqParser import FastqParser\n+from commons.core.writer.FastaWriter import FastaWriter\n+from commons.core.writer.FastqWriter import FastqWriter\n+from SMART.Java.Python.misc.Progress import Progress\n+from SMART.Java.Python.misc import Utils\n+\n+\n+if __name__ == "__main__":\n+ \n+ # parse command line\n+ description = "Trim Sequences v1.0.3: Remove the 5\' and/or 3\' adaptors of a list of reads. [Category: Data Modification]"\n+\n+ parser = OptionParser(description = description)\n+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in sequence format given by -f]")\n+ parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of file [compulsory] [format: sequence file format]")\n+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in sequence format given by -f]")\n+ parser.add_option("-3", "--threePAdaptor", dest="threePAdaptor", action="store", default=None, type="string", help="3\' adaptor [format: string] [default: None]")\n+ parser.add_option("-5", "--fivePAdaptor", dest="fivePAdaptor", action="store", default=None, type="string", help="5\' adaptor [format: string] [default: None]")\n+ parser.add_option("-e", "--errors", dest="errors", action="store", default=0, type="int", help="number of errors in percent [format: int] [default: 0]")\n+ parser.add_option("-d", "--indels", dest="indels", action="store_true", default=False, help="also accept indels [format: bool] [default: False]")\n+ parser.add_option("-n", "--noAdaptor5p", dest="noAdaptor5p", action="store", default=None, type="string", help="print sequences with no 5\' adaptor [format: output file in sequence format given by -f]")\n+ parser.add_option("-m", "--noAdaptor3p", dest="noAdaptor3p", action="store", default=None, type="string", help="print sequences with no 3\' adaptor [format: output file in sequence format given by -f]")\n+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default'..b'ormat))\n+\n+\n+ if options.noAdaptor5p != None:\n+ if options.format == "fasta":\n+ writer5pNoAdaptor = FastaWriter(options.noAdaptor5p, options.verbosity)\n+ elif options.format == "fastq":\n+ writer5pNoAdaptor = FastqWriter(options.noAdaptor5p, options.verbosity)\n+ else:\n+ raise Exception("Cannot handle files with \'%s\' format." % (options.format))\n+ nbFound5p = 0\n+ \n+ if options.noAdaptor3p != None:\n+ if options.format == "fasta":\n+ writer3pNoAdaptor = FastaWriter(options.noAdaptor3p, options.verbosity)\n+ elif options.format == "fastq":\n+ writer3pNoAdaptor = FastqWriter(options.noAdaptor3p, options.verbosity)\n+ else:\n+ raise Exception("Cannot handle files with \'%s\' format." % (options.format))\n+ nbFound3p = 0\n+ \n+ progress = Progress(parser.getNbSequences(), "Reading %s" % (options.inputFileName), options.verbosity)\n+ for sequence in parser.getIterator():\n+ progress.inc()\n+ if options.threePAdaptor != None:\n+ nucleotides = sequence.sequence\n+ found = False\n+ bestScore = 10000\n+ bestRegion = 0\n+ for i in range(len(nucleotides) - minSize):\n+ nucleotidesPart = nucleotides[i:]\n+ adaptorPart = options.threePAdaptor if len(nucleotidesPart) >= len(options.threePAdaptor) else options.threePAdaptor[:len(nucleotidesPart)]\n+ nucleotidesPart = nucleotidesPart if len(adaptorPart) == len(nucleotidesPart) else nucleotidesPart[:len(adaptorPart)]\n+ if options.indels:\n+ score = Utils.getLevenshteinDistance(adaptorPart, nucleotidesPart)\n+ else:\n+ score = Utils.getHammingDistance(adaptorPart, nucleotidesPart)\n+ if score <= int(options.errors / 100.0 * len(adaptorPart)) and score < bestScore:\n+ bestScore = score\n+ bestRegion = i\n+ found = True\n+ if found:\n+ nbFound3p += 1\n+ sequence.shrinkToFirstNucleotides(bestRegion)\n+ elif options.noAdaptor3p:\n+ writer3pNoAdaptor.addSequence(sequence)\n+ if options.fivePAdaptor != None:\n+ nucleotides = sequence.sequence\n+ found = False\n+ bestScore = 10000\n+ bestRegion = 0\n+ for i in reversed(range(minSize, len(nucleotides))):\n+ nucleotidesPart = nucleotides[:i]\n+ adaptorPart = options.fivePAdaptor if len(nucleotidesPart) >= len(options.fivePAdaptor) else options.fivePAdaptor[-len(nucleotidesPart):]\n+ nucleotidesPart = nucleotidesPart if len(adaptorPart) == len(nucleotidesPart) else nucleotidesPart[-len(adaptorPart):]\n+ if options.indels:\n+ score = Utils.getLevenshteinDistance(adaptorPart, nucleotidesPart)\n+ else:\n+ score = Utils.getHammingDistance(adaptorPart, nucleotidesPart)\n+ if score <= int(options.errors / 100.0 * len(adaptorPart)) and score < bestScore:\n+ bestScore = score\n+ bestRegion = i\n+ found = True\n+ if found:\n+ nbFound5p += 1\n+ sequence.shrinkToLastNucleotides(len(nucleotides) - bestRegion)\n+ elif options.noAdaptor5p:\n+ writer5pNoAdaptor.addSequence(sequence)\n+ writer.addSequence(sequence)\n+ progress.done()\n+ writer.close()\n+\n+ print "%d sequences" % (parser.getNbSequences())\n+ if options.fivePAdaptor != None:\n+ print "%d sequences with 5\' adaptors (%.2f%%)" % (nbFound5p, float(nbFound5p) / parser.getNbSequences() * 100)\n+ if options.threePAdaptor != None:\n+ print "%d sequences with 3\' adaptors (%.2f%%)" % (nbFound3p, float(nbFound3p) / parser.getNbSequences() * 100)\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/txtToFasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/txtToFasta.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,63 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Transform a plain text file to a FASTA file"""
+
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.Sequence import *
+from SMART.Java.Python.misc.Progress import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    parser = OptionParser()
+    description = "Txt to Fasta v1.0.1: Convert a Txt file (one sequence per line) into Fasta file. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",            type="string", help="input file [compulsory] [format: file in TXT format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",            type="string", help="output file [compulsory] [format: output file in FASTA format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1, type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    inputFile = open(options.inputFileName)
+    outputFile = open(options.outputFileName, "w")
+
+    for line in inputFile:
+        line = line.strip()
+        splittedLine = line.split()
+        sequence = splittedLine[0]
+        nb = 1 if len(splittedLine) == 1 else int(splittedLine[1])
+        for i in range(nb):
+            outputFile.write(">%s\n%s\n" % (sequence, sequence))
+
+    inputFile.close()
+    outputFile.close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/updateQual.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/updateQual.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,86 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Update a .qual file given a .fasta file"""
+
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import *
+from SMART.Java.Python.misc.Progress import *
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Update Qual v1.0.1: Remove the sequence in a Qual file which are not in the corresponding Fasta file. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-f", "--fasta",         dest="fastaFile",    action="store",                                         type="string", help="fasta file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-q", "--qual",            dest="qualFile",     action="store",                                         type="string", help="qual file [compulsory] [format: file in QUAL format]")
+    parser.add_option("-o", "--output",        dest="outputFile", action="store",                                         type="string", help="output file [compulsory] [format: output file in QUAL format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",    action="store",            default=1,         type="int",        help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    parser             = SequenceListParser(options.fastaFile, options.verbosity)
+    nbSequences    = parser.getNbSequences()
+    progress         = Progress(nbSequences, "Parsing file %s" % (options.fastaFile), options.verbosity)
+    qualHandle     = open(options.qualFile)
+    outputHandle = open(options.outputFile, "w")
+    nbRefused        = 0
+    nbTotal            = 0
+
+    names = []
+    while parser.getNextSequence():
+        sequence = parser.getCurrentSequence()
+        nbTotal += 1
+
+        found = False
+        name    = None
+        for line in qualHandle:
+            line = line.strip()
+            if line[0] == ">":
+                name = line[1:]
+                if name == sequence.name:
+                    found = True
+                else:
+                    nbRefused += 1
+            else:
+                if found:
+                    outputHandle.write(">%s\n%s\n" % (name, line))
+                    found = False
+                    name    = None
+                    break
+        progress.inc()
+    progress.done()
+
+
+    outputHandle.close()
+    qualHandle.close()
+
+    print "%d out of %d are refused (%f%%)"             % (nbRefused, nbTotal, (float(nbRefused) / nbTotal * 100))

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/wigExploder.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/wigExploder.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,99 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Explode wig files into several files, one for each chromosome"""
+
+import os, re, sys
+from optparse import OptionParser
+
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Wig Exploder v1.0.1: Explode a big WIG file into several smaller WIG files (one per chromosome). [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",                        dest="inputFileName", action="store",                                         type="string", help="input file [compulsory] [format: file in WIG format]")
+    parser.add_option("-o", "--output",                     dest="output",                action="store",            default=None,    type="string", help="output directory [compulsory] [format: directory]")
+    parser.add_option("-s", "--strand",                     dest="strand",                action="store",            default=None,    type="string", help="strand of the input WIG file (if any) [format: choice (+, -)]")
+    parser.add_option("-v", "--verbosity",                dest="verbosity",         action="store",            default=1,         type="int",        help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    inputFile = open(options.inputFileName)
+
+    files         = {}
+    file            = None
+    trackLine = None
+    strand        = ""
+    if options.strand != None:
+        strand = options.strand
+
+    for line in inputFile:
+        line = line.strip()
+
+        if line.startswith("track"):
+            trackLine = line
+            continue
+
+        m1 = re.search(r"^\s*fixedStep\s+chrom=(\S+)\s+start=\d+\s+step=\d+\s*$", line)
+        m2 = re.search(r"^\s*fixedStep\s+chrom=(\S+)\s+start=\d+\s+step=\d+\s+span=\d+\s*$", line)
+        m3 = re.search(r"^\s*variableStep\s+chrom=(\S+)\s*$", line)
+        m4 = re.search(r"^\s*variableStep\s+chrom=(\S+)span=\d+\s*$", line)
+
+        m = None
+        if m1 != None:
+            m = m1
+        elif m2 != None:
+            m = m2
+        elif m3 != None:
+            m = m3
+        elif m4 != None:
+            m = m4
+
+        if m != None:
+            chromosome = m.group(1)
+
+            if chromosome in files:
+                file = files[chromosome]
+            else:
+                file = open("%s%s%s%s.wig" % (options.output, os.sep, chromosome, strand), "w")
+                files[chromosome] = file
+                if trackLine != None:
+                    file.write("%s\n" % (trackLine))
+
+        if file == None:
+            sys.exit("Header is missing (current first line is '%s')! Aborting..." % (line))
+
+        file.write("%s\n" % (line))
+
+    inputFile.close()
+
+    for chromosome in files:
+        files[chromosome].close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Python/wrongFastqToQual.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/wrongFastqToQual.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,81 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Convert a pseudo-FASTQ file to QUAL files"""
+
+import os
+from optparse import OptionParser
+from SMART.Java.Python.misc.Progress import *
+from math import *
+
+if __name__ == "__main__":
+
+    # parse command line
+    description = "Wrong FastQ to Qual v1.0.1: Convert a pseudo-FastQ (i.e. a FastQ file with a wrong format) into a Qual file. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",            dest="inputFileName",         action="store",                                                type="string", help="input file [compulsory] [format: file in FASTQ format]")
+    parser.add_option("-o", "--output",         dest="outputFileName",        action="store",                                                type="string", help="output file [compulsory] [format: output file in QUAL format]")
+    parser.add_option("-v", "--verbosity",    dest="verbosity",                 action="store",            default=1,                type="int",        help="trace level [format: int] [default: 1]")
+    (options, args) = parser.parse_args()
+
+    inputFile             = open(options.inputFileName)
+    outputFastaFile = open("%s.fasta" % (options.outputFileName), "w")
+    outputQualFile    = open("%s.qual" % (options.outputFileName), "w")
+
+    inSequence     = False
+    inQuality        = True
+    sequenceName = None
+    for line in inputFile:
+        line = line.strip()
+        if line[0] == "@":
+            if inQuality == False:
+                sys.exit("Quality of %s is missing" % (sequenceName))
+            inSequence     = True
+            inQuality        = False
+            sequenceName = line[1:]
+            outputFastaFile.write(">%s\n" % (sequenceName))
+        elif line[0] == "+":
+            if inSequence == False:
+                sys.exit("Sequence of %s is missing" % (line[1:]))
+            inSequence     = False
+            inQuality        = True
+            if sequenceName != line[1:]:
+                sys.exit("Names in sequence and qual are different (%s, %s)" % (sequenceName, line[1:]))
+            outputQualFile.write(">%s\n" % (sequenceName))
+        else:
+            if inSequence:
+                outputFastaFile.write("%s\n" % (line))
+            elif inQuality:
+                outputQualFile.write("%s\n" % (line))
+
+    inputFile.close()
+    outputFastaFile.close()
+    outputQualFile.close()

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/PythonHelperReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/PythonHelperReader.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,336 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.io.File;\n+import java.io.*;\n+import java.util.regex.*;\n+\n+public class PythonHelperReader {\n+\n+ String fileName;\n+ Program program;\n+ BufferedReader reader;\n+ String message;\n+\n+ public PythonHelperReader(String fileName) {\n+ this.fileName = fileName; \n+ this.reader = reader;\n+ this.message = null;\n+ }\n+\n+ public void setReader(BufferedReader reader) {\n+ this.reader = reader;\n+ }\n+ \n+ public void run() {\n+ this.program = new Program();\n+ boolean inBeginning = true;\n+ boolean inUsage = false;\n+ boolean afterUsage = false;\n+ boolean inDescription = false;\n+ boolean afterDescription = false;\n+ boolean inOptions = false;\n+ boolean inOptionBlank = false;\n+ boolean inError = false;\n+ String usage = null;\n+ String description = null;\n+ String option = null;\n+ Vector <String> options = new Vector < String > ();\n+ String[] optionSplitted;\n+\n+ // Parse file\n+ try {\n+ String line = null;\n+\n+ while ((line = reader.readLine()) != null) {\n+ line = line.trim();\n+ if (line.startsWith("Traceback")) {\n+ this.message = "Problem with header of \'" + this.fileName + "\':\\n" + line + "\\n";\n+ inError = true;\n+ inBeginning = false;\n+ inUsage = false;\n+ afterUsage = false;\n+ inDescription = false;\n+ afterDescription = false;\n+ inOptions = false;\n+ inOptionBlank = false;\n+ }\n+ else if (inError) {\n+ this.message += line + "\\n";\n+ }\n+ else if (inBeginning) {\n+ if (line.startsWith("Usage:")) {\n+ inUsage = true;\n+ inBeginning = false;\n+ usage = line;\n+ }\n+ }\n+ else if (inUsage) {\n+ if ("".equals(line)) {\n+ inUsage = false;\n+ afterUsage = true;\n+ }\n+ else {\n+ usage += " " + line;\n+ }\n+ }\n+ else if (afterUsage) {\n+ if (! "".equals(line)) {\n+ description = line;\n+ afterUsage = false;\n+ inDescription = true;\n+ }\n+ '..b' int pos = value.indexOf(" ");\n+ currentWord = value.substring(0, pos);\n+ rest = value.substring(pos+1);\n+ }\n+ else {\n+ currentWord = value;\n+ }\n+ // Output file type\n+ if ("output".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setInput(false);\n+ int pos = rest.indexOf(" ");\n+ currentWord = rest.substring(0, pos).trim();\n+ rest = rest.substring(pos+1).trim();\n+ }\n+ // File (input or output file)\n+ if ("file".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("file");\n+ // Format given by an associated option (to be found later)\n+ if (rest.startsWith("in format given by ")) {\n+ associatedOption.put(programOption, rest.substring(rest.indexOf("format given by ") + "format given by ".length() + 1).trim());\n+ }\n+ else {\n+ if (! rest.startsWith("in ")) {\n+ this.message = "Descriptor " + option + " does not have a proper format.\\n";\n+ return;\n+ }\n+ rest = rest.substring("in ".length());\n+ int pos = rest.indexOf(" format");\n+ if (pos == -1) {\n+ this.message = "Descriptor " + option + " does not have a proper format.\\n";\n+ return;\n+ }\n+ programOption.setFormat(rest.substring(0, pos).trim().toLowerCase().split(" or "));\n+ }\n+ }\n+ // Format type\n+ else if (rest.endsWith("file format")) {\n+ programOption.setFormat((currentWord + " " + rest.substring(0, rest.indexOf("file format"))).trim().toLowerCase().split(" or "));\n+ programOption.setType("format");\n+ }\n+ // Choice type\n+ else if ("choice".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setChoices(rest.replace("(", "").replace(")", "").split(", "));\n+ programOption.setType("choice");\n+ }\n+ // Boolean type\n+ else if ("bool".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("boolean");\n+ }\n+ // Other type\n+ else {\n+ if (currentWord == null) {\n+ this.message = "Program \'" + this.fileName + "\' has a problem concerning the type of option \'" + identifier + "\'.\\n";\n+ return;\n+ }\n+ programOption.setType(currentWord);\n+ }\n+ }\n+ // Default value\n+ else if ("default".compareToIgnoreCase(type) == 0) {\n+ programOption.setDefault(value);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.\\n";\n+ return;\n+ }\n+ }\n+ else {\n+ // Compulsory option\n+ if ("compulsory".compareToIgnoreCase(inner) == 0) {\n+ programOption.setCompulsory(true);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.\\n";\n+ return;\n+ }\n+ }\n+ }\n+ if (! programOption.checkSettings()) {\n+ this.message = "Program \'" + this.fileName + "\' has a problem concerning option \'" + identifier + "\'.\\n";\n+ return;\n+ }\n+ program.addOption(programOption);\n+ }\n+\n+ // Set associated option\n+ Iterator it = associatedOption.keySet().iterator();\n+ while (it.hasNext()) {\n+ ProgramOption programOption = (ProgramOption) it.next();\n+ programOption.setAssociatedOption(identifierToOptions.get(associatedOption.get(programOption)));\n+ }\n+ }\n+\n+ public String getMessage () {\n+ return this.message;\n+ }\n+\n+ public Program getProgram () {\n+ return this.program;\n+ }\n+}\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/PythonProgramFinder.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/PythonProgramFinder.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,92 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.io.*;
+import java.util.*;
+
+public class PythonProgramFinder {
+
+  String             dirName;
+  Vector < Program > programs;
+
+  public PythonProgramFinder(String dirName) {
+    this.dirName = dirName;
+  }
+
+  public String findPrograms() {
+    java.io.File directory = new java.io.File(this.dirName);
+    String[] files         = directory.list(new FilenameFilter() {public boolean accept(java.io.File dir, String name) {return ((! name.startsWith(".")) && (! name.startsWith("test")) && ((new java.io.File(dir, name)).isFile()) && (name.endsWith(".py")) && (name.compareToIgnoreCase("__init__.py") != 0));}});
+    this.programs          = new Vector < Program > ();
+
+    for (int i = 0; i < files.length; i++) {
+      String[] commandList  = {Global.pythonCommand, "Python" + java.io.File.separator + files[i], "-h"};
+      String command = "";
+      for (int j = 0; j < commandList.length; j++) {
+        command += commandList[j] + " ";
+      }
+      ProcessBuilder pb = new ProcessBuilder(commandList);
+      pb = pb.redirectErrorStream(true);
+      Map<String, String> env = pb.environment();
+      env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+      env.put("SMARTRPATH", Global.rCommand);
+
+      PythonHelperReader helperReader = new PythonHelperReader(files[i]);
+      try {
+        final Process process = pb.start();
+        InputStream is        = process.getInputStream();
+        InputStreamReader isr = new InputStreamReader(is);
+        BufferedReader br     = new BufferedReader(isr);
+        helperReader.setReader(br);
+        helperReader.run();
+      }
+      catch (IOException e) {
+        final Writer result = new StringWriter();
+        final PrintWriter printWriter = new PrintWriter(result);
+        e.printStackTrace(printWriter);
+        return "Command '" + command + "' failed (I/O error)...\n" + result.toString();
+      }
+      String comments = helperReader.getMessage();
+      if (comments != null) return comments;
+      Program program = helperReader.getProgram();
+      if (("Personnal".compareToIgnoreCase(program.getSection()) != 0) && ("Personal".compareToIgnoreCase(program.getSection()) != 0)) {
+        this.programs.add(program);
+      }
+    }
+    return null;
+  }
+
+  public Vector <Program> getPrograms () {
+    return this.programs;
+  }
+}
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/File.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/File.java Fri Jan 18 04:54:14 2013 -0500

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/Files.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Files.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,71 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class Files {
+  HashMap <String, File> files;
+
+  public Files () {
+    files = new HashMap < String, File> ();
+  }
+
+  public void addFile(String fileName, String type, String format) {
+    this.addFile(new File(fileName, type, format));
+  }
+
+  public void addFile(File file) {
+    files.put(file.name, file);
+  }
+
+  public String getType(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format type of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).formatType;
+  }
+
+  public String getFormat(String fileName) {
+    if (fileName == null) {
+      System.out.println("Error! Looking for format of empty file name!");
+    }
+    if (! files.containsKey(fileName)) {
+      System.out.println("Oops! Format of file " + fileName + " is not found!");
+      return null;
+    }
+    return files.get(fileName).format;
+  }
+}
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/FormatType.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/FormatType.java Fri Jan 18 04:54:14 2013 -0500

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/FormatsContainer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/FormatsContainer.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,81 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+
+public class FormatsContainer {
+
+  HashMap < String, FormatType > formatTypes;
+
+
+  public FormatsContainer() {
+    this.formatTypes = new HashMap < String, FormatType > ();
+  }
+
+
+  public void addFormat(String type, String format) {
+    FormatType formatType;
+    if (formatTypes.containsKey(type)) {
+      formatType = this.formatTypes.get(type);
+    }
+    else {
+      formatType = new FormatType(type);
+      this.formatTypes.put(type, formatType);
+    }
+    formatType.addFormat(format);
+  }
+
+
+  public Vector < String > getFormatTypes () {
+    Vector < String > v = new Vector < String > ();
+    v.addAll(this.formatTypes.keySet());
+    return v;
+  }
+
+
+  public FormatType getFormats (String type) {
+    return formatTypes.get(type);
+  }
+
+
+  public String getFormatType (String format) {
+    for (Iterator it = formatTypes.keySet().iterator(); it.hasNext(); ) {
+      Object type       =  it.next();
+      Object formatType = formatTypes.get(type);
+      if (((FormatType) formatType).containsFormat(format)) {
+        return (String) type;
+      }
+    }
+    return null;
+  }
+}
+
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/FormatsReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/FormatsReader.java Fri Jan 18 04:54:14 2013 -0500

[

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/Global.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Global.java Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,70 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.Vector;
+import java.util.HashMap;
+import javax.swing.DefaultListModel;
+import javax.swing.JButton;
+import javax.swing.JTextField;
+
+public class Global {
+
+  public static int logAreaSize = 100;
+
+  public static String smartConfFileName = "smart.conf";
+
+  public static String smartProgramsFileName = "programs.txt";
+
+  public static String smartFormatsFileName = "formats.txt";
+
+  public static String pythonPath = new String();
+
+  public static String pythonCommand = "python";
+
+  public static String mysqlCommand = "mysql";
+
+  public static String rCommand = "R";
+
+  public static Files files = new Files();
+
+  public static DefaultListModel fileNames = new DefaultListModel();
+
+  public static FormatsContainer formats = new FormatsContainer();
+
+  public static boolean programRunning = false;
+
+  public static HashMap < JButton, JTextField > otherFilesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherDirectoriesChooser = new HashMap < JButton, JTextField >();
+
+  public static HashMap < JButton, JTextField > otherFileConcatenationChooser = new HashMap < JButton, JTextField >();
+
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/Program.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Program.java Fri Jan 18 04:54:14 2013 -0500

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/ProgramFileReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/ProgramFileReader.java Fri Jan 18 04:54:14 2013 -0500

[

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/ProgramLauncher.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/ProgramLauncher.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,191 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.util.*;
+import java.io.*;
+import javax.swing.SwingUtilities;
+import javax.swing.*;
+import java.util.concurrent.CountDownLatch;
+
+public class ProgramLauncher extends SwingWorker<Boolean, String>  {
+
+  String[]     command;
+  JTextArea    logArea;
+  JLabel       messageField;
+  JProgressBar progressBar;
+  JLabel       etaField;
+  int          exitValue;
+
+
+  public ProgramLauncher (LinkedList <String> c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = new String[c.size()];
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+    c.toArray(command);
+  }
+
+
+  public ProgramLauncher (String[] c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
+    command       = c;
+    logArea       = la;
+    messageField  = mf;
+    progressBar   = pb;
+    etaField      = ef;
+    exitValue     = -1;
+  }
+
+
+  @Override
+  public Boolean doInBackground() {
+    ProcessBuilder pb           = new ProcessBuilder(command);
+    Process process             = null;
+    BufferedReader outputReader = null;
+    pb                          = pb.redirectErrorStream(true);
+    Map<String, String> env     = pb.environment();
+    env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+    env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+    env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+    env.put("SMARTRPATH", Global.rCommand);
+    String commandJoined = Arrays.toString(command);
+
+    try {
+      publish("=== Starting command '" + commandJoined.trim() + "' ===\n");
+      process = pb.start();
+
+      BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());
+      InputStream is                   = process.getInputStream();
+      InputStreamReader isr            = new InputStreamReader(is);
+      outputReader                     = new BufferedReader(isr);
+    }
+    catch (Exception exception) {
+      publish("!Process cannot be started (command is '" + commandJoined + "')!\n");
+      exception.printStackTrace();
+      return Boolean.FALSE;
+    }
+    if (outputReader == null) {
+      publish("!Problem in the output of the command!\n");
+      return Boolean.FALSE;
+    }
+    else {
+      try {
+        String line;
+        while ((line = outputReader.readLine()) != null) {
+          publish(line + "\n");
+        }
+      }
+      catch (IOException e) {
+        e.printStackTrace();
+        publish("!Cannot get the output of the command!\n");
+        return Boolean.FALSE;
+      }
+    }
+    try {
+      process.waitFor();
+    }
+    catch (InterruptedException e) {
+      e.printStackTrace();
+      publish("!Cannot wait for the end of the command!\n");
+      return Boolean.FALSE;
+    }
+    try {
+      exitValue = process.exitValue();
+      System.out.println(exitValue);
+    }
+    catch (IllegalThreadStateException e) {
+      e.printStackTrace();
+      publish("!Cannot get the exit value of the command!\n");
+      return Boolean.FALSE;
+    }
+    if (exitValue != 0) {
+      publish("!Problem during the execution of the command '" + commandJoined + "'!\n");
+      return Boolean.FALSE;
+    }
+    publish("=== Ending command '" + commandJoined.trim() + "' ===\n");
+    return Boolean.TRUE;
+  }
+
+
+  @Override
+  protected void process(List<String> chunks) {
+    String message = "";
+    String text    = logArea.getText();
+    for (String chunk: chunks) {
+      text += chunk;
+    }
+    for (String lineSeparatedByCarriageReturn: text.split("\n")) {
+      for (String line: lineSeparatedByCarriageReturn.split("\r")) {
+        boolean progressLine = false;
+        if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*")) {
+          String[] ratioElements = line.split("\\]")[1].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("");
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*ETA:\\s*.*")) {
+          String[] ratioElements = line.split("\\]")[1].split("E")[0].trim().split("/");
+          int      current       = Integer.parseInt(ratioElements[0].trim());
+          int      aim           = Integer.parseInt(ratioElements[1].trim());
+          String   eta           = line.split("ETA:")[1].trim();
+          messageField.setText(line.split("\\[")[0].trim());
+          progressBar.setValue(current * 100 / aim);
+          etaField.setText("ETA: " + eta);
+          progressLine = true;
+        }
+        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*\\s*completed in.*")) {
+          String nbElements = line.split("\\]")[1].split("completed")[0].trim();
+          String timeSpent  = line.split("completed in")[1].trim();
+          message          += line.split("\\[")[0].trim() + ": " + nbElements + " elements completed in " + timeSpent + "\n";
+          messageField.setText(line.split("\\[")[0].trim());
+          progressLine = true;
+        }
+        if (! progressLine) {
+          message += line + "\n";
+        }
+      }
+    }
+    String lines[]     = message.split("\n");
+    String toBeWritten = "";
+    for (int i = Math.max(0, lines.length - Global.logAreaSize); i < lines.length; i++) {
+      toBeWritten += lines[i] + "\n";
+    }
+    logArea.setText(toBeWritten);
+  }
+
+  public int getExitValue() {
+    return exitValue;
+  }
+}

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/ProgramOption.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/ProgramOption.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,329 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+\n+\n+public class ProgramOption {\n+ boolean input;\n+ String identifier;\n+ String type;\n+ String comment;\n+ boolean compulsory;\n+ String[] format;\n+ String formatIdentifier;\n+ ProgramOption associatedOption;\n+ String defaultValue;\n+ String[] choices;\n+ JComponent component;\n+ JPanel panel;\n+\n+\n+ public ProgramOption() {\n+ this.input = true;\n+ this.identifier = null;\n+ this.type = null;\n+ this.comment = null;\n+ this.compulsory = false;\n+ this.format = null;\n+ this.formatIdentifier = null;\n+ this.associatedOption = null;\n+ this.defaultValue = "";\n+ this.choices = null;\n+ this.component = null;\n+ this.panel = null;\n+ }\n+\n+\n+ public void setInput(boolean input) {\n+ this.input = input;\n+ }\n+\n+\n+ public void setIdentifier(String identifier) {\n+ this.identifier = identifier;\n+ }\n+\n+\n+ public void setType(String type) {\n+ this.type = type;\n+ }\n+\n+\n+ public void setComment(String comment) {\n+ this.comment = comment;\n+ }\n+\n+\n+ public void setCompulsory(boolean compulsory) {\n+ this.compulsory = compulsory;\n+ }\n+\n+\n+ public void setFormat(String[] format) {\n+ this.format = format;\n+ }\n+\n+\n+ public void setFormat(String format) {\n+ this.format = new String[1];\n+ this.format[0] = format;\n+ }\n+\n+\n+ public void setFormatIdentifier(String formatIdentifier) {\n+ this.formatIdentifier = formatIdentifier;\n+ }\n+\n+\n+ public void setAssociatedOption(ProgramOption option) {\n+ this.associatedOption = option;\n+ }\n+\n+\n+ public void setChoices(String[] choices) {\n+ this.choices = choices;\n+ }\n+\n+\n+ public void setDefault(String defaultValue) {\n+ this.defaultValue = defaultValue;\n+ }\n+\n+\n+ public boolean isInput() {\n+ return this.input;\n+ }\n+\n+\n+ public JPanel getPanel() {\n+ if (this.panel != null) {\n+ return this.panel;\n+ }\n+ String comment = this.comment;\n+ if (this.compulsory) {\n+ comment += " [*]";\n+ }\n+\n+ GridLayout horizontalLayout = new GridLayout(1, 0);\n+ this.panel = ne'..b'component = new JTextField();\n+ label.setLabelFor(this.component);\n+ this.panel.add(label);\n+ JPanel rightPanel = new JPanel(false);\n+ rightPanel.setLayout(new BoxLayout(rightPanel, BoxLayout.LINE_AXIS));\n+ rightPanel.add(this.component);\n+ rightPanel.add(button);\n+ this.panel.add(rightPanel);\n+ Global.otherDirectoriesChooser.put(button, (JTextField) this.component);\n+ }\n+ else if ("choice".compareToIgnoreCase(this.type) == 0) {\n+ this.component = new JComboBox(this.choices);\n+ label.setLabelFor(this.component);\n+ this.panel.add(label);\n+ this.panel.add(this.component);\n+ }\n+ else {\n+ System.out.println("Do not know how to read type " + this.type);\n+ }\n+\n+ return this.panel;\n+ }\n+\n+\n+ public JComponent getComponent() {\n+ if (component == null) {\n+ this.getPanel();\n+ }\n+ return this.component;\n+ }\n+\n+\n+ private String getValue() {\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("directory".equals(this.type)) || ("files".equals(this.type))) {\n+ String s = ((JTextField) this.component).getText();\n+ if ("None".equals(s)) {\n+ return "";\n+ }\n+ return s;\n+ }\n+ if ("file".equals(this.type)) {\n+ return (String) ((JList) this.component).getSelectedValue();\n+ }\n+ if ("boolean".equals(this.type)) {\n+ return ((JCheckBox) this.component).isSelected()? "true": "false";\n+ }\n+ if ("format".equals(this.type)) {\n+ return (String) ((JComboBox) this.component).getSelectedItem();\n+ }\n+ return null;\n+ }\n+\n+\n+ public String checkValue() {\n+ String value = this.getValue();\n+ if ((this.compulsory) && ((value == null) || ("".equals(value)))) {\n+ return "Option \'" + this.comment + "\' has no value... Please specify it.\\n";\n+ }\n+ if ("int".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value)) && (! "None".equals(value))) {\n+ try {\n+ int i = Integer.parseInt(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be an integer... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ else if ("float".equals(this.type)) {\n+ if ((value != null) && (! "".equals(value))) {\n+ try {\n+ float i = Float.parseFloat(value);\n+ }\n+ catch (NumberFormatException e) {\n+ return "Option \'" + this.comment + "\' should be a float... Please correct it.\\n";\n+ }\n+ }\n+ }\n+ return null;\n+ }\n+\n+\n+ public LinkedList <String> getCommand() {\n+ LinkedList <String> list = new LinkedList <String> ();\n+\n+ if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("format".equals(this.type)) || ("directory".equals(this.type)) || ("files".equals(this.type))) {\n+ String value = this.getValue();\n+ if (value.length() == 0) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(value);\n+ return list;\n+ }\n+ if ("file".equals(this.type)) {\n+ String fileName = (String) ((JList) this.component).getSelectedValue();\n+ if (fileName == null) {\n+ return list;\n+ }\n+ list.add(this.identifier);\n+ list.add(this.getValue());\n+ return list;\n+ }\n+ if ("boolean".equals(this.type)) {\n+ if ("true".equals(this.getValue())) {\n+ list.add(this.identifier);\n+ }\n+ return list;\n+ }\n+ return null;\n+ }\n+\n+\n+ public File getOutputFile() {\n+ if (this.input) return null;\n+ String format = "";\n+ if (this.format != null) {\n+ format = this.format[0];\n+ }\n+ if (this.associatedOption != null) {\n+ format = this.associatedOption.getValue();\n+ }\n+ return new File(this.getValue() + "." + format, Global.formats.getFormatType(format), format);\n+ }\n+}\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/PythonHelperReader.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/PythonHelperReader.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,323 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.io.File;\n+import java.io.*;\n+import java.util.regex.*;\n+\n+public class PythonHelperReader {\n+\n+ String fileName;\n+ Program program;\n+ BufferedReader reader;\n+ String message;\n+\n+ public PythonHelperReader(String fileName) {\n+ this.fileName = fileName; \n+ this.reader = reader;\n+ this.message = null;\n+ }\n+\n+ public void setReader(BufferedReader reader) {\n+ this.reader = reader;\n+ }\n+ \n+ public void run() {\n+ this.program = new Program();\n+ boolean inBeginning = true;\n+ boolean inUsage = false;\n+ boolean afterUsage = false;\n+ boolean inDescription = false;\n+ boolean afterDescription = false;\n+ boolean inOptions = false;\n+ boolean inOptionBlank = false;\n+ boolean inError = false;\n+ String usage = null;\n+ String description = null;\n+ String option = null;\n+ Vector <String> options = new Vector < String > ();\n+ String[] optionSplitted;\n+\n+ // Parse file\n+ try {\n+ String line = null;\n+\n+ while ((line = reader.readLine()) != null) {\n+ line = line.trim();\n+ if (line.startsWith("Traceback")) {\n+ this.message = "Problem with header of \'" + this.fileName + "\':\\n" + line + "\\n";\n+ inError = true;\n+ inBeginning = false;\n+ inUsage = false;\n+ afterUsage = false;\n+ inDescription = false;\n+ afterDescription = false;\n+ inOptions = false;\n+ inOptionBlank = false;\n+ }\n+ else if (inError) {\n+ this.message += line + "\\n";\n+ }\n+ else if (inBeginning) {\n+ if (line.startsWith("Usage:")) {\n+ inUsage = true;\n+ inBeginning = false;\n+ usage = line;\n+ }\n+ }\n+ else if (inUsage) {\n+ if ("".equals(line)) {\n+ inUsage = false;\n+ afterUsage = true;\n+ }\n+ else {\n+ usage += " " + line;\n+ }\n+ }\n+ else if (afterUsage) {\n+ if (! "".equals(line)) {\n+ description = line;\n+ afterUsage = false;\n+ inDescription = true;\n+ }\n+ '..b'her.end()-1);\n+ if (inner.contains(":")) {\n+ String type = inner.substring(0, inner.indexOf(":")).trim();\n+ String value = inner.substring(inner.indexOf(":")+1).trim();\n+ // Types of the options\n+ if ("format".compareToIgnoreCase(type) == 0) {\n+ String currentWord = "";\n+ String rest = "";\n+ if (value.contains(" ")) {\n+ int pos = value.indexOf(" ");\n+ currentWord = value.substring(0, pos);\n+ rest = value.substring(pos+1);\n+ }\n+ else {\n+ currentWord = value;\n+ }\n+ // Output file type\n+ if ("output".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setInput(false);\n+ int pos = rest.indexOf(" ");\n+ currentWord = rest.substring(0, pos).trim();\n+ rest = rest.substring(pos+1).trim();\n+ }\n+ // File (input or output file)\n+ if ("file".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("file");\n+ // Format given by an associated option (to be found later)\n+ if (rest.startsWith("in format given by ")) {\n+ associatedOption.put(programOption, rest.substring(rest.indexOf("format given by ") + "format given by ".length() + 1).trim());\n+ }\n+ else {\n+ if (! rest.startsWith("in ")) {\n+ this.message = "Descriptor " + option + " does not have a proper format.";\n+ return;\n+ }\n+ rest = rest.substring("in ".length());\n+ int pos = rest.indexOf(" format");\n+ if (pos == -1) {\n+ this.message = "Descriptor " + option + " does not have a proper format.";\n+ return;\n+ }\n+ programOption.setFormat(rest.substring(0, pos).trim().toLowerCase().split(" or "));\n+ }\n+ }\n+ // Format type\n+ else if (rest.endsWith("file format")) {\n+ programOption.setFormat((currentWord + " " + rest.substring(0, rest.indexOf("file format"))).trim().toLowerCase().split(" or "));\n+ programOption.setType("format");\n+ }\n+ // Choice type\n+ else if ("choice".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setChoices(rest.replace("(", "").replace(")", "").split(", "));\n+ programOption.setType("choice");\n+ }\n+ // Boolean type\n+ else if ("bool".compareToIgnoreCase(currentWord) == 0) {\n+ programOption.setType("boolean");\n+ }\n+ // Other type\n+ else {\n+ programOption.setType(currentWord);\n+ }\n+ }\n+ // Default value\n+ else if ("default".compareToIgnoreCase(type) == 0) {\n+ programOption.setDefault(value);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.";\n+ return;\n+ }\n+ }\n+ else {\n+ // Compulsory option\n+ if ("compulsory".compareToIgnoreCase(inner) == 0) {\n+ programOption.setCompulsory(true);\n+ }\n+ else {\n+ this.message = "Do not understand option descriptor \'" + inner + "\'.";\n+ return;\n+ }\n+ }\n+ }\n+ program.addOption(programOption);\n+ }\n+\n+ // Set associated option\n+ Iterator it = associatedOption.keySet().iterator();\n+ while (it.hasNext()) {\n+ ProgramOption programOption = (ProgramOption) it.next();\n+ programOption.setAssociatedOption(identifierToOptions.get(associatedOption.get(programOption)));\n+ }\n+ }\n+\n+ public String getMessage () {\n+ return this.message;\n+ }\n+\n+ public Program getProgram () {\n+ return this.program;\n+ }\n+}\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/PythonProgramFinder.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/PythonProgramFinder.java Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,90 @@
+/**
+ *
+ * Copyright INRA-URGI 2009-2010
+ *
+ * This software is governed by the CeCILL license under French law and
+ * abiding by the rules of distribution of free software. You can use,
+ * modify and/ or redistribute the software under the terms of the CeCILL
+ * license as circulated by CEA, CNRS and INRIA at the following URL
+ * "http://www.cecill.info".
+ *
+ * As a counterpart to the access to the source code and rights to copy,
+ * modify and redistribute granted by the license, users are provided only
+ * with a limited warranty and the software's author, the holder of the
+ * economic rights, and the successive licensors have only limited
+ * liability.
+ *
+ * In this respect, the user's attention is drawn to the risks associated
+ * with loading, using, modifying and/or developing or reproducing the
+ * software by the user in light of its specific status of free software,
+ * that may mean that it is complicated to manipulate, and that also
+ * therefore means that it is reserved for developers and experienced
+ * professionals having in-depth computer knowledge. Users are therefore
+ * encouraged to load and test the software's suitability as regards their
+ * requirements in conditions enabling the security of their systems and/or
+ * data to be ensured and, more generally, to use and operate it in the
+ * same conditions as regards security.
+ *
+ * The fact that you are presently reading this means that you have had
+ * knowledge of the CeCILL license and that you accept its terms.
+ *
+ */
+import java.io.*;
+import java.util.*;
+
+public class PythonProgramFinder {
+
+  String             dirName;
+  Vector < Program > programs;
+
+  public PythonProgramFinder(String dirName) {
+    this.dirName = dirName;
+  }
+
+  public String findPrograms() {
+    java.io.File directory = new java.io.File(this.dirName);
+    String[] files         = directory.list(new FilenameFilter() {public boolean accept(java.io.File dir, String name) {return ((! name.startsWith(".")) && (! name.startsWith("test")) && ((new java.io.File(dir, name)).isFile()) && (name.endsWith(".py")));}});
+    this.programs          = new Vector < Program > ();
+
+    for (int i = 0; i < files.length; i++) {
+      String[] commandList  = {Global.pythonCommand, "Python" + java.io.File.separator + files[i], "-h"};
+      String command = "";
+      for (int j = 0; j < commandList.length; j++) {
+        command += commandList[j] + " ";
+      }
+      ProcessBuilder pb = new ProcessBuilder(commandList);
+      pb = pb.redirectErrorStream(true);
+      Map<String, String> env = pb.environment();
+      env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
+      env.put("SMARTMYSQLPATH", Global.mysqlCommand);
+      env.put("SMARTRPATH", Global.rCommand);
+
+      PythonHelperReader helperReader = new PythonHelperReader(files[i]);
+      try {
+        final Process process = pb.start();
+        InputStream is        = process.getInputStream();
+        InputStreamReader isr = new InputStreamReader(is);
+        BufferedReader br     = new BufferedReader(isr);
+        helperReader.setReader(br);
+        helperReader.run();
+      }
+      catch (IOException e) {
+        e.printStackTrace();
+        return "Command '" + command + "' failed (I/O error)...\n";
+      }
+      String comments = helperReader.getMessage();
+      if (comments != null) return comments;
+      Program program = helperReader.getProgram();
+      if ("Personnal".compareToIgnoreCase(program.getSection()) != 0) {
+        this.programs.add(program);
+      }
+    }
+    return null;
+  }
+
+  public Vector <Program> getPrograms () {
+    return this.programs;
+  }
+}
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Sav/Smart.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Sav/Smart.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,489 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+\n+\n+public class Smart extends JPanel implements ActionListener {\n+\n+ String version = "1.0.2";\n+\n+ JFrame mainFrame;\n+ JButton openButton;\n+ JButton comparisonGoButton;\n+\n+ JComboBox formatTypes;\n+ JComboBox fileFormats;\n+ String[] emptyFormats = {"Choose a type first..."};\n+\n+ JFrame askFrame;\n+ JButton pythonButton;\n+ JButton mySqlButton;\n+ JButton rButton;\n+\n+ HashMap <JButton, Program> callingProgram;\n+\n+ // comparison\n+ JList comparisonFile1List;\n+ JList comparisonFile2List;\n+ JTextField comparisonOutputTextField;\n+ JTextField comparisonFiveQueryExtensionTextField;\n+ JCheckBox comparisonColinearCheckBox;\n+ JCheckBox comparisonAntisenseCheckBox;\n+ JCheckBox comparisonInvertCheckBox;\n+\n+ JList fileList;\n+ JTextArea logArea;\n+\n+ // progress bar\n+ JLabel messageField;\n+ JProgressBar progressBar;\n+ JLabel etaField;\n+\n+ // process\n+ Program currentProgram;\n+ Process process;\n+ javax.swing.Timer processTimer;\n+\n+\n+ int previousStatus;\n+\n+ public Smart() {\n+ super(new BorderLayout());\n+\n+ callingProgram = new HashMap <JButton, Program> ();\n+\n+ previousStatus = -1;\n+\n+ processTimer = new javax.swing.Timer(1000, this);\n+ processTimer.stop();\n+\n+ // Ask frame buttons\n+ pythonButton = new JButton("find...");\n+ mySqlButton = new JButton("find...");\n+ rButton = new JButton("find...");\n+\n+ // Get available formats\n+ FormatsReader formatReader = new FormatsReader(Global.smartFormatsFileName);\n+ if (! formatReader.read()) {\n+ System.out.println("Something was wrong while reading file format...");\n+ }\n+\n+ // Get screen size\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+\n+ // Log\n+ logArea = new JTextArea(512, Global.logAreaSize);\n+ logArea.setPreferredSize(new Dimension(screenSize.width, (int) (screenSize.height * 0.22)));\n+ logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));\n+ JScrollPane logScroll = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);\n+ TitledBorder logBorder '..b'\\n");\n+ }\n+ }\n+ this.checkConfiguration();\n+ }\n+ // Format type\n+ else if (e.getSource() == formatTypes) {\n+ fileFormats.removeAllItems();\n+ Vector < String > selectedFormats = Global.formats.getFormats((String) formatTypes.getSelectedItem()).getFormats();\n+ for (int i = 0; i < selectedFormats.size(); i++) {\n+ fileFormats.addItem(selectedFormats.get(i));\n+ }\n+ }\n+ // Main file chooser\n+ else if (e.getSource() == openButton) {\n+ JFileChooser chooser = new JFileChooser();\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ String fileName = chooser.getSelectedFile().getPath();\n+ Global.fileNames.addElement(fileName);\n+ Global.files.addFile(fileName, (String) formatTypes.getSelectedItem(), (String) fileFormats.getSelectedItem());\n+ }\n+ }\n+ // Other file choosers\n+ else if (Global.otherFilesChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherFilesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ // Other directories choosers\n+ else if (Global.otherDirectoriesChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ else if (Global.otherFileConcatenationChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ String text = textField.getText();\n+ if ((text == null) || ("".equals(text))) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ else {\n+ textField.setText(text + "," + chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ }\n+ // Programs\n+ else {\n+ currentProgram = callingProgram.get(e.getSource());\n+ String comment = currentProgram.checkValues();\n+ if (comment != null) {\n+ logArea.append(comment);\n+ return;\n+ }\n+ LinkedList <String> command = currentProgram.getCommand();\n+ ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n+ launcher.execute();\n+ Vector < File > outputFiles = currentProgram.getOutputFiles();\n+ for (int i = 0; i < outputFiles.size(); i++) {\n+ File file = outputFiles.get(i);\n+ if (file.getFormatType().compareToIgnoreCase("other") != 0) {\n+ Global.fileNames.addElement(file.getName());\n+ Global.files.addFile(file);\n+ }\n+ }\n+ currentProgram = null;\n+ }\n+ }\n+\n+\n+ private static void createAndShowGUI() {\n+ // Create and set up the window.\n+ JFrame mainFrame = new JFrame("S-Mart");\n+ mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);\n+\n+ //Create and set up the content pane.\n+ JComponent newContentPane = new Smart();\n+ newContentPane.setOpaque(true);\n+ mainFrame.setContentPane(newContentPane);\n+\n+ // Display the window.\n+ mainFrame.pack();\n+ mainFrame.setVisible(true);\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+ mainFrame.setBounds(0, 0, screenSize.width, screenSize.height);\n+ }\n+\n+\n+ public static void main(String[] args) {\n+ javax.swing.SwingUtilities.invokeLater(new Runnable() {\n+ public void run() {\n+ createAndShowGUI();\n+ }\n+ });\n+ }\n+}\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Smart.jar

Binary file SMART/Java/Smart.jar has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/Smart.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Smart.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,668 @@\n+/**\n+ *\n+ * Copyright INRA-URGI 2009-2010\n+ * \n+ * This software is governed by the CeCILL license under French law and\n+ * abiding by the rules of distribution of free software. You can use,\n+ * modify and/ or redistribute the software under the terms of the CeCILL\n+ * license as circulated by CEA, CNRS and INRIA at the following URL\n+ * "http://www.cecill.info".\n+ * \n+ * As a counterpart to the access to the source code and rights to copy,\n+ * modify and redistribute granted by the license, users are provided only\n+ * with a limited warranty and the software\'s author, the holder of the\n+ * economic rights, and the successive licensors have only limited\n+ * liability.\n+ * \n+ * In this respect, the user\'s attention is drawn to the risks associated\n+ * with loading, using, modifying and/or developing or reproducing the\n+ * software by the user in light of its specific status of free software,\n+ * that may mean that it is complicated to manipulate, and that also\n+ * therefore means that it is reserved for developers and experienced\n+ * professionals having in-depth computer knowledge. Users are therefore\n+ * encouraged to load and test the software\'s suitability as regards their\n+ * requirements in conditions enabling the security of their systems and/or\n+ * data to be ensured and, more generally, to use and operate it in the\n+ * same conditions as regards security.\n+ * \n+ * The fact that you are presently reading this means that you have had\n+ * knowledge of the CeCILL license and that you accept its terms.\n+ *\n+ */\n+import java.util.*;\n+import java.awt.*;\n+import java.awt.event.ActionEvent;\n+import java.awt.event.ActionListener;\n+import java.awt.event.WindowEvent;\n+import java.awt.event.WindowAdapter;\n+import java.io.*;\n+import javax.swing.*;\n+import javax.swing.filechooser.*;\n+import javax.swing.border.*;\n+import javax.swing.SwingUtilities;\n+import java.util.prefs.BackingStoreException;\n+\n+\n+public class Smart extends JPanel implements ActionListener {\n+\n+ String version = "1.1.0";\n+\n+ JFrame mainFrame;\n+ JButton openButton;\n+ JButton resetFileButton;\n+\n+ JComboBox formatTypes;\n+ JComboBox fileFormats;\n+ String[] emptyFormats = {"Choose a type first..."};\n+\n+ JFrame askFrame;\n+ JButton pythonButton;\n+ JButton mySqlButton;\n+ JButton rButton;\n+\n+ HashMap <JButton, Program> callingProgram;\n+\n+ static JList fileList;\n+ static JTextArea logArea;\n+\n+ // progress bar\n+ static JLabel messageField;\n+ static JProgressBar progressBar;\n+ static JLabel etaField;\n+\n+ // process\n+ Program currentProgram;\n+ Process process;\n+ javax.swing.Timer processTimer;\n+\n+\n+ int previousStatus;\n+\n+ public Smart() {\n+ super(new BorderLayout());\n+\n+ callingProgram = new HashMap <JButton, Program> ();\n+\n+ previousStatus = -1;\n+\n+ processTimer = new javax.swing.Timer(1000, this);\n+ processTimer.stop();\n+\n+ // Ask frame buttons\n+ pythonButton = new JButton("find...");\n+ mySqlButton = new JButton("find...");\n+ rButton = new JButton("find...");\n+\n+ // Get available formats\n+ FormatsReader formatReader = new FormatsReader(Global.smartFormatsFileName);\n+ if (! formatReader.read()) {\n+ System.out.println("Something was wrong while reading file format...");\n+ }\n+\n+ // Get screen size\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+\n+ // Log\n+ logArea = new JTextArea(512, Global.logAreaSize);\n+ logArea.setPreferredSize(new Dimension(screenSize.width, (int) (screenSize.height * 0.22)));\n+ logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));\n+ JScrollPane logScroll = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);\n+ TitledBorder logBorder = BorderFactory.createTitledBorder("Log");\n+ logScroll.setBorder(logBorder);\n+ logArea.append("Using S-MART " + version + "\\n");\n+\n+ '..b'extField = Global.otherFilesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ // Other directories choosers\n+ else if (Global.otherDirectoriesChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ else if (Global.otherFileConcatenationChooser.containsKey(e.getSource())) {\n+ JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n+ JFileChooser chooser = new JFileChooser();\n+ chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n+ if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n+ String text = textField.getText();\n+ if ((text == null) || ("".equals(text))) {\n+ textField.setText(chooser.getSelectedFile().getPath());\n+ }\n+ else {\n+ textField.setText(text + "," + chooser.getSelectedFile().getPath());\n+ }\n+ }\n+ }\n+ // Programs\n+ else {\n+ currentProgram = callingProgram.get(e.getSource());\n+ String comment = currentProgram.checkValues();\n+ if (comment != null) {\n+ logArea.append(comment);\n+ return;\n+ }\n+ LinkedList <String> command = currentProgram.getCommand();\n+ ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n+ launcher.execute();\n+ Vector < File > outputFiles = currentProgram.getOutputFiles();\n+ for (int i = 0; i < outputFiles.size(); i++) {\n+ File file = outputFiles.get(i);\n+ if (file.getFormatType().compareToIgnoreCase("other") != 0) {\n+ Global.fileNames.addElement(file.getName());\n+ Global.files.addFile(file);\n+ }\n+ }\n+ currentProgram = null;\n+ }\n+ }\n+\n+\n+ private static void removeTmpFiles() {\n+ logArea.append("You want to quit already?\\nRemoving temporary files...");\n+ String[] command = {Global.pythonCommand, "Python" + java.io.File.separator + "removeAllTmpTables.py"};\n+ ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n+ launcher.execute();\n+ logArea.append(" done.\\nNow quitting.\\nBye!");\n+ }\n+\n+\n+ private static void printJavaVersions() {\n+ String[] pro = {"java.version", "java.vm.version", "java.runtime.version"};\n+\n+ Properties properties = System.getProperties();\n+ for (int i = 0; i < pro.length; i++) {\n+ logArea.append(pro[i] + ": " + properties.getProperty(pro[i]) + "\\n");\n+ }\n+ }\n+ \n+ private static void createAndShowGUI() {\n+ // Create and set up the window.\n+ JFrame mainFrame = new JFrame("S-Mart");\n+ mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);\n+\n+ //Create and set up the content pane.\n+ JComponent newContentPane = new Smart();\n+ newContentPane.setOpaque(true);\n+ mainFrame.setContentPane(newContentPane);\n+\n+ // Display the window.\n+ mainFrame.pack();\n+ mainFrame.setVisible(true);\n+ Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n+ mainFrame.setBounds(0, 0, screenSize.width, screenSize.height);\n+ printJavaVersions();\n+\n+ // Remove tmp file while quitting.\n+ mainFrame.addWindowListener(new WindowAdapter() {\n+ @Override\n+ public void windowClosing(WindowEvent e) {\n+ removeTmpFiles();\n+ }\n+ });\n+ }\n+\n+\n+ public static void main(String[] args) {\n+ javax.swing.SwingUtilities.invokeLater(new Runnable() {\n+ public void run() {\n+ createAndShowGUI();\n+ }\n+ });\n+ }\n+}\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/SmartInstaller.jar

Binary file SMART/Java/SmartInstaller.jar has changed

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/WindowsRegistry.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/WindowsRegistry.java Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,387 @@\n+import java.lang.reflect.InvocationTargetException;\n+import java.lang.reflect.Method;\n+import java.util.HashMap;\n+import java.util.Map;\n+import java.util.ArrayList;\n+import java.util.List;\n+import java.util.prefs.Preferences;\n+\n+public class WindowsRegistry {\n+ public static final int HKEY_CURRENT_USER = 0x80000001;\n+ public static final int HKEY_LOCAL_MACHINE = 0x80000002;\n+ public static final int REG_SUCCESS = 0;\n+ public static final int REG_NOTFOUND = 2;\n+ public static final int REG_ACCESSDENIED = 5;\n+\n+ private static final int KEY_ALL_ACCESS = 0xf003f;\n+ private static final int KEY_READ = 0x20019;\n+ private static Preferences userRoot = Preferences.userRoot();\n+ private static Preferences systemRoot = Preferences.systemRoot();\n+ private static Class<? extends Preferences> userClass = userRoot.getClass();\n+ private static Method regOpenKey = null;\n+ private static Method regCloseKey = null;\n+ private static Method regQueryValueEx = null;\n+ private static Method regEnumValue = null;\n+ private static Method regQueryInfoKey = null;\n+ private static Method regEnumKeyEx = null;\n+ private static Method regCreateKeyEx = null;\n+ private static Method regSetValueEx = null;\n+ private static Method regDeleteKey = null;\n+ private static Method regDeleteValue = null;\n+\n+ static {\n+ try {\n+ regOpenKey = userClass.getDeclaredMethod("WindowsRegOpenKey",\n+ new Class[] { int.class, byte[].class, int.class });\n+ regOpenKey.setAccessible(true);\n+ regCloseKey = userClass.getDeclaredMethod("WindowsRegCloseKey",\n+ new Class[] { int.class });\n+ regCloseKey.setAccessible(true);\n+ regQueryValueEx = userClass.getDeclaredMethod("WindowsRegQueryValueEx",\n+ new Class[] { int.class, byte[].class });\n+ regQueryValueEx.setAccessible(true);\n+ regEnumValue = userClass.getDeclaredMethod("WindowsRegEnumValue",\n+ new Class[] { int.class, int.class, int.class });\n+ regEnumValue.setAccessible(true);\n+ regQueryInfoKey = userClass.getDeclaredMethod("WindowsRegQueryInfoKey1",\n+ new Class[] { int.class });\n+ regQueryInfoKey.setAccessible(true);\n+ regEnumKeyEx = userClass.getDeclaredMethod( \n+ "WindowsRegEnumKeyEx", new Class[] { int.class, int.class, \n+ int.class }); \n+ regEnumKeyEx.setAccessible(true);\n+ regCreateKeyEx = userClass.getDeclaredMethod( \n+ "WindowsRegCreateKeyEx", new Class[] { int.class, \n+ byte[].class }); \n+ regCreateKeyEx.setAccessible(true); \n+ regSetValueEx = userClass.getDeclaredMethod( \n+ "WindowsRegSetValueEx", new Class[] { int.class, \n+ byte[].class, byte[].class }); \n+ regSetValueEx.setAccessible(true); \n+ regDeleteValue = userClass.getDeclaredMethod( \n+ "WindowsRegDeleteValue", new Class[] { int.class, \n+ byte[].class }); \n+ regDeleteValue.setAccessible(true); \n+ regDeleteKey = userClass.getDeclaredMethod( \n+ "WindowsRegDeleteKey", new Class[] { int.class, \n+ byte[].class }); \n+ regDeleteKey.setAccessible(true); \n+ }\n+ catch (Exception e) {\n+ e.printStackTrace();\n+ }\n+ }\n+\n+ private WindowsRegistry() { }\n+\n+ /**\n+ * Read a value from key and value name\n+ * @param hkey HKEY_CURRENT_USER/HKEY_LOCAL_MACHINE\n+ * @param key\n+ * @param valueName\n+ * @return the value\n+ * @throws IllegalArgumentException\n+ * @throws IllegalAccessException\n+ * @throws InvocationTargetException\n+ */\n+ public static String readString(int hkey, String key, String valueName) \n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ if (hkey == HKEY_LOCAL_MACHINE) {\n+ return readString(systemRoot, hkey, key, valueName);\n+ }\n+ else if (hkey == HKEY_CURRENT_USER) {\n+ return readString(userRoot, hkey, key, valueName);\n+ }\n+ else {\n+ throw new Illeg'..b' return rc; // can REG_NOTFOUND, REG_ACCESSDENIED, REG_SUCCESS\n+ }\n+\n+ private static String readString(Preferences root, int hkey, String key, String value)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_READ) });\n+ if (handles[1] != REG_SUCCESS) {\n+ return null; \n+ }\n+ byte[] valb = (byte[]) regQueryValueEx.invoke(root, new Object[] {\n+ new Integer(handles[0]), toCstr(value) });\n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ return (valb != null ? new String(valb).trim() : null);\n+ }\n+\n+ private static Map<String,String> readStringValues\n+ (Preferences root, int hkey, String key)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ HashMap<String, String> results = new HashMap<String,String>();\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_READ) });\n+ if (handles[1] != REG_SUCCESS) {\n+ return null;\n+ }\n+ int[] info = (int[]) regQueryInfoKey.invoke(root,\n+ new Object[] { new Integer(handles[0]) });\n+\n+ int count = info[2]; // count \n+ int maxlen = info[3]; // value length max\n+ for(int index=0; index<count; index++) {\n+ byte[] name = (byte[]) regEnumValue.invoke(root, new Object[] {\n+ new Integer\n+ (handles[0]), new Integer(index), new Integer(maxlen + 1)});\n+ String value = readString(hkey, key, new String(name));\n+ results.put(new String(name).trim(), value);\n+ }\n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ return results;\n+ }\n+\n+ private static List<String> readStringSubKeys\n+ (Preferences root, int hkey, String key)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ List<String> results = new ArrayList<String>();\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_READ) \n+ });\n+ if (handles[1] != REG_SUCCESS) {\n+ return null;\n+ }\n+ int[] info = (int[]) regQueryInfoKey.invoke(root,\n+ new Object[] { new Integer(handles[0]) });\n+\n+ int count = info[2]; // count \n+ int maxlen = info[3]; // value length max\n+ for(int index=0; index<count; index++) {\n+ byte[] name = (byte[]) regEnumKeyEx.invoke(root, new Object[] {\n+ new Integer\n+ (handles[0]), new Integer(index), new Integer(maxlen + 1)\n+ });\n+ results.add(new String(name).trim());\n+ }\n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ return results;\n+ }\n+\n+ private static int [] createKey(Preferences root, int hkey, String key)\n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ return (int[]) regCreateKeyEx.invoke(root,\n+ new Object[] { new Integer(hkey), toCstr(key) });\n+ }\n+\n+ private static void writeStringValue \n+ (Preferences root, int hkey, String key, String valueName, String value) \n+ throws IllegalArgumentException, IllegalAccessException,\n+ InvocationTargetException \n+ {\n+ int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n+ new Integer(hkey), toCstr(key), new Integer(KEY_ALL_ACCESS) });\n+\n+ regSetValueEx.invoke(root, \n+ new Object[] { \n+ new Integer(handles[0]), toCstr(valueName), toCstr(value) \n+ }); \n+ regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n+ }\n+\n+ // utility\n+ private static byte[] toCstr(String str) {\n+ byte[] result = new byte[str.length() + 1];\n+\n+ for (int i = 0; i < str.length(); i++) {\n+ result[i] = (byte) str.charAt(i);\n+ }\n+ result[str.length()] = 0;\n+ return result;\n+ }\n+}\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/formats.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/formats.txt Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,4 @@
+sequence: fasta, fastq
+transcript: bed, gff, gff2, gff3, csv
+mapping: axt, blast, bowtie, exo, maq, nucmer, psl, sam, seqmap, shrimp, soap, soap2
+other: txt, wig, png, nclist

diff -r ea3082881bf8 -r 769e306b7933 SMART/Java/manifest.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/manifest.txt Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Created-By: Matthias Zytnicki
+Main-Class: Smart

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/CleanTranscriptFile.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CleanTranscriptFile.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,60 @@
+<tool id="CleanTranscriptFile" name="Clean Transcript File">
+ <description> Clean a transcript file so that it is useable for S-MART.</description>
+ <command interpreter="python"> ../Java/Python/CleanTranscriptFile.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+ #if $optionType.type == 'Yes':
+ -t $optionType.value
+ #end if
+ -o $outputFile
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName"  type="select" label="Input File Format">
+ <option value="gff">gff</option>
+ <option value="gtf">gtf</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+          <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+          <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="optionType">
+
+ <param name="type" type="select" label="You can choose the tag that you are interested in, like tRNA,rRNA,ncRNA,CDS,exon, etc." help="Name of the types you want to keep in GFF/GTF (list separated by commas)">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="value" type="text" value="tRNA,rRNA,ncRNA,CDS,exon"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+
+ <outputs>
+ <data name="outputFile" format="gtf">
+ <change_format>
+ <when input="formatType.FormatInputFileName" value="gff" format="gff" />
+ <when input="formatType.FormatInputFileName" value="gff3" format="gff3" />
+ </change_format>
+ </data>
+
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/Clusterize.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/Clusterize.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,73 @@
+<tool id="MergingDataClusterize" name="Clusterize">
+ <description>Clusterizes the reads when their genomic intervals overlap.</description>
+ <command interpreter="python">
+ ../Java/Python/clusterize.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'csv':
+ -f csv
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+ -o $outputFileGff
+ $colinear
+ $normalize
+ -d $distance
+ $log $outputFileLog
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="csv">csv</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="csv">
+ <param name="inputFileName" format="csv" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
+ <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
+ <param name="distance" type="text" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ <data name="outputFileLog" format="txt">
+ <filter>log</filter>
+ </data>
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/CollapseReads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CollapseReads.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,59 @@
+<tool id="collapseReads" name="collapse reads">
+ <description>Merges two reads if they have exactly the same genomic coordinates.</description>
+ <command interpreter="python">
+ ../Java/Python/CollapseReads.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+ -$strand
+ -o $outputFileGff
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/CompareOverlappingSmallQuery.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CompareOverlappingSmallQuery.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,134 @@
+<tool id="CompareOverlappingSmallQuery" name="Compare Overlapping Small Query">
+ <description>Provide the queries that overlap with a reference, when the query is small.</description>
+ <command interpreter="python">
+ ../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+     -g gtf
+ #end if
+ -o $outputFileGff
+ #if $OptionDistance.Dist == 'Yes':
+ -d $OptionDistance.distance
+ #end if
+ #if $OptionColinearOrAntiSens.OptionCA == 'Colinear':
+ -c
+ #elif $OptionColinearOrAntiSens.OptionCA == 'AntiSens':
+ -a
+ #end if
+ $InvertMatch
+ $NotOverlapping
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input Query File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+                         </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input Reference File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+ </conditional>
+ <conditional name="OptionDistance">
+ <param name="Dist" type="select" label="Maximum Distance between two reads">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="distance" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <conditional name="OptionColinearOrAntiSens">
+ <param name="OptionCA" type="select" label="Colinear or anti-sens">
+ <option value="Colinear">Colinear</option>
+ <option value="AntiSens">AntiSens</option>
+ <option value="NONE" selected="true">NONE</option>
+ </param>
+ <when value="Colinear">
+ </when>
+ <when value="AntiSens">
+ </when>
+ <when value="NONE">
+ </when>
+ </conditional>
+ <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
+ <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/CompareOverlappingSmallRef.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CompareOverlappingSmallRef.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,137 @@
+<tool id="CompareOverlappingSmallRef" name="Compare Overlapping Small Reference">
+ <description>Provide the queries that overlap with a reference, when the reference is small.</description>
+ <command interpreter="python">
+ ../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+     -g gtf
+ #end if
+ -o $outputFileGff
+ #if $OptionDistance.Dist == 'Yes':
+ -d $OptionDistance.distance
+ #end if
+ #if $OptionColinearOrAntiSens.OptionCA == 'Colinear':
+ -c
+ #elif $OptionColinearOrAntiSens.OptionCA == 'AntiSens':
+ -a
+ #end if
+ $InvertMatch
+ $NotOverlapping
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input Query File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+                         </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input Reference File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+ </conditional>
+ <conditional name="OptionDistance">
+ <param name="Dist" type="select" label="Maximum Distance between two reads">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="distance" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <conditional name="OptionColinearOrAntiSens">
+ <param name="OptionCA" type="select" label="Colinear or anti-sens">
+ <option value="Colinear">Colinear</option>
+ <option value="AntiSens">AntiSens</option>
+ <option value="NONE" selected="true">NONE</option>
+ </param>
+ <when value="Colinear">
+ </when>
+ <when value="AntiSens">
+ </when>
+ <when value="NONE">
+ </when>
+ </conditional>
+
+ <conditional name="OptionDistance">
+ <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
+ <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>
+ </conditional>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,119 @@
+<tool id="ConvertTranscriptFile" name="Convert transcript file">
+  <description>Convert a file from a format to another.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFormatType.inputFileName
+   #if $inputFormatType.FormatInputFileName == 'gff3':
+    -f gff3
+   #elif $inputFormatType.FormatInputFileName == 'bed':
+ -f bed
+   #elif $inputFormatType.FormatInputFileName == 'gff2':
+ -f gff2
+   #elif $inputFormatType.FormatInputFileName == 'bam':
+ -f blast
+   #elif $inputFormatType.FormatInputFileName == 'sam':
+ -f sam
+   #elif $inputFormatType.FormatInputFileName == 'gtf':
+ -f gtf
+          #end if
+
+ -g $outputFormatType.outFormat
+     #if $optionSequence.choose == 'Yes':
+    -s $optionSequence.value
+ #end if
+
+
+   -n $name
+   $strand
+   -o $outputFile
+
+  </command>
+  <inputs>
+   <conditional name="inputFormatType">
+   <param name="FormatInputFileName"  type="select" label="Input File Format">
+   <option value="gff3">GFF3</option>
+   <option value="bed">BED</option>
+   <option value="gff2">GFF2</option>
+   <option value="bam">BAM</option>
+   <option value="sam">SAM</option>
+   <option value="gtf">GTF</option>
+   </param>
+   <when value="gff3">
+   <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+   </when>
+   <when value="bed">
+   <param name="inputFileName" format="bed" type="data" label="Input File"/>
+   </when>
+   <when value="gff2">
+   <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+   </when>
+   <when value="bam">
+   <param name="inputFileName" format="bam" type="data" label="Input File"/>
+   </when>
+   <when value="sam">
+   <param name="inputFileName" format="sam" type="data" label="Input File"/>
+   </when>
+   <when value="gtf">
+   <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+   </when>
+ </conditional>
+
+
+   <conditional name="outputFormatType">
+   <param name="outFormat"  type="select" label="Please choose the format that you want to convert to (corresponding to your input file format).">
+   <option value="gff3">GFF3</option>
+   <option value="bed">BED</option>
+   <option value="gff2">GFF2</option>
+   <option value="wig">WIG</option>
+   <option value="sam">SAM</option>
+   <option value="csv">CSV</option>
+   <option value="gtf">GTF</option>
+   </param>
+     <when value="gff3">
+   </when>
+   <when value="bed">
+   </when>
+     <when value="gff2">
+   </when>
+   <when value="wig">
+   </when>
+     <when value="sam">
+   </when>
+   <when value="csv">
+   </when>
+   <when value="gtf">
+   </when>
+ </conditional>
+
+ <param name="name" type="text" value="SMART" label="name for the transcripts"/>
+
+ <conditional name="optionSequence">
+ <param name="choose" type="select" label="give the corresponding Multi-Fasta file (useful for EMBL format)">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="value" type="data" format="mfa" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="strand" type="boolean" truevalue="-t" falsevalue="" checked="false" label="consider the 2 strands as different (only useful for writing WIG files)"/>
+
+  </inputs>
+
+  <outputs>
+   <data name="outputFile" format="gff3" label="$inputFormatType.FormatInputFileName to $outputFormatType.outFormat">
+ <change_format>
+ <when input="outputFormatType.outFormat" value="bed" format="bed" />
+ <when input="outputFormatType.outFormat" value="gff2" format="gff2" />
+ <when input="outputFormatType.outFormat" value="wig" format="wig" />
+ <when input="outputFormatType.outFormat" value="sam" format="sam" />
+ <when input="outputFormatType.outFormat" value="csv" format="csv" />
+ <when input="outputFormatType.outFormat" value="gtf" format="gtf" />
+ </change_format>
+ </data>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToCsv" name="Bed -> Csv">
+  <description>Convert Bed File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[bed -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToGff2" name="Bed -> Gff2">
+  <description>Convert Bed File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[bed -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToGff3" name="Bed -> Gff3">
+  <description>Convert Bed File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[bed -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BedToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToSam.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToSam" name="Bed -> Sam">
+  <description>Convert Bed File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[bed -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToCsv" name="Blast (-m 8) -> Csv">
+  <description>Convert Blast (-m 8) File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[blast -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToGff2" name="Blast (-m 8) -> Gff2">
+  <description>Convert Blast (-m 8) File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[blast -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToGff3" name="Blast (-m 8) -> Gff3">
+  <description>Convert Blast (-m 8) File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[blast -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToSam" name="Blast (-m 8) -> Sam">
+  <description>Convert Blast (-m 8) File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[blast -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_FastqToFasta" name="Fastq -> Fasta">
+  <description>Convert Fastq File to Fasta File.</description>
+  <command interpreter="python"> ../Java/Python/fastqToFasta.py -i $inputFile -o $outputFile 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="fastq"/>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[fastq -> fasta] Output File"/>
+    <data format="txt" name="logFile" label="[fastq -> fasta] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToCsv" name="Gff2 -> Csv">
+  <description>Convert Gff2 File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[gff2 -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToGff3" name="Gff2 -> Gff3">
+  <description>Convert Gff2 File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[gff2 -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToSam" name="Gff2 -> Sam">
+  <description>Convert Gff2 File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[gff2 -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToCsv" name="Gff3 -> Csv">
+  <description>Convert Gff3 File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[gff3 -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToGff2" name="Gff3 -> Gff2">
+  <description>Convert Gff3 File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[gff3 -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToSam" name="Gff3 -> Sam">
+  <description>Convert Gff3 File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[gff3 -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToWig" name="Gff3 -> Wig">
+  <description>Convert Gff3 File to Wig File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g wig yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="wig" name="outputFile" label="[gff3 -> wig] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> wig] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToCsv" name="Sam -> Csv">
+  <description>Convert Sam File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[sam -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToGff2" name="Sam -> Gff2">
+  <description>Convert Sam File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[sam -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToGff3" name="Sam -> Gff3">
+  <description>Convert Sam File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[sam -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/CountReadGCPercent.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CountReadGCPercent.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,16 @@
+<tool id="CountReadGCPercent" name="count read GCpercent">
+    <description>Count GC percent for each read against a genome.</description>
+    <command interpreter="python"> ../Java/Python/CountReadGCPercent.py -i $inputFastaFile -j $inputGffFile -o $outputFile</command>
+  <inputs>
+      <param name="inputFastaFile" type="data" label="Input reference fasta File" format="fasta"/>
+      <param name="inputGffFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[CountReadGCPercent] Output File"/>
+   </outputs>
+
+  <help>
+  </help>
+</tool>
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/DiffExpAnal.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/DiffExpAnal.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,19 @@
+<tool id="testDiffExpAnal" name="Differential Expression Analysis">
+  <description>Differential expression analysis for sequence count data</description>
+  <command interpreter="sh"> ../DiffExpAnal/testR.sh $inputFile $columnsOfGeneName $columnsOfCondition1 $columnsOfCondition2 $outputFileCSV $outputFilePNG 2>$outputLog </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+ <param name="columnsOfGeneName" type="text" value="0" label="Please indicate the column numbers of gene names with ',' separator. If There are not gene names, default value is 0."/>
+ <param name="columnsOfCondition1" type="text" value="1,2" label="Please indicate the column numbers of condition1 with ',' separator."/>
+ <param name="columnsOfCondition2" type="text" value="3,4" label="Please indicate the column numbers of condition2 with ',' separator."/>
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="outputFileCSV" label="[DiffExpAnal] Output CSV File"/>
+ <data format="png" name="outputFilePNG" label="[DiffExpAnal] Output PNG File"/>
+    <data format="tabular" name="outputLog" label="[DiffExpAnal] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/FindOverlaps_optim.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/FindOverlaps_optim.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,16 @@
+<tool id="findOverlaps" name="findOverlaps">
+ <description>Finds the overlapped reference reads.</description>
+ <command interpreter="python">
+ ../Java/Python/FindOverlaps_optim.py -i $inputRef -j $inputQ -o $outputFileGff
+ </command>
+
+  <inputs>
+    <param name="inputRef" type="data" label="Input Reference File" format="gff3"/>
+    <param name="inputQ" type="data" label="Input Query File" format="gff3"/>
+  </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/GetDifferentialExpression.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/GetDifferentialExpression.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,210 @@
+<tool id="GetDifferentialExpression" name="get differential expression">
+ <description>Get the differential expression between 2 conditions using Fisher's exact test, on regions defined by a third file.</description>
+ <command interpreter="python">
+ ../Java/Python/GetDifferentialExpression.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+ -g gtf
+ #end if
+
+ -k $formatTypeRef.inputFileNameRef
+ #if $formatTypeRef.FormatInputFileNameRef == 'bed':
+ -l bed
+ #elif $formatTypeRef.FormatInputFileNameRef == 'gff':
+ -l gff
+ #elif $formatTypeRef.FormatInputFileNameRef == 'gff2':
+ -l gff2
+ #elif $formatTypeRef.FormatInputFileNameRef == 'gff3':
+ -l gff3
+ #elif $formatTypeRef.FormatInputFileNameRef == 'sam':
+ -l sam
+ #elif $formatTypeRef.FormatInputFileNameRef == 'gtf':
+ -l gtf
+ #end if
+
+ -o $outputFileGff
+
+ $simple
+ $adjusted
+
+
+ #if $optionSimplePara.simplePara == 'Yes':
+ -S $optionSimplePara.paraValue
+ #end if
+
+ #if $optionFixedSizeFactor.FSF == 'Yes':
+ -x $optionFixedSizeFactor.FSFValue
+ #end if
+
+ #if $optionFDR.FDR == 'Yes':
+ -d $optionFDR.FDRValue
+ #end if
+ $plot $outputFilePNG
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input File Format 1">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+ </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input File Format 2">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+ </conditional>
+
+ <conditional name="formatTypeRef">
+ <param name="FormatInputFileNameRef" type="select" label="Input Ref File Format ">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileNameRef" format="bed" type="data" label="Input Ref File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileNameRef" format="gff" type="data" label="Input Ref File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileNameRef" format="gff2" type="data" label="Input Ref File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileNameRef" format="gff3" type="data" label="Input Ref File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileNameRef" format="sam" type="data" label="Input Ref File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileNameRef" format="gtf" type="data" label="Input Ref File"/>
+ </when>
+ </conditional>
+
+ <param name="simple" type="boolean" truevalue="-s" falsevalue="" checked="false" label="normalize using the number of reads in each condition"/>
+ <param name="adjusted" type="boolean" truevalue="-a" falsevalue="" checked="false" label="normalize using the number of reads of 'mean' regions"/>
+
+ <conditional name="optionSimplePara">
+ <param name="simplePara" type="select" label="provide the number of reads" >
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="paraValue" type="text" value="None" label="provide the number of reads" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionFixedSizeFactor">
+ <param name="FSF" type="select" label="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization)">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="FSFValue" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionFDR">
+ <param name="FDR" type="select" label="use FDR">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="FDRValue" type="float" value="0.0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="plot cloud plot"/>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[GetDifferentialExpression]out file"/>
+ <data name="outputFilePNG" format="PNG" label="[GetDifferentialExpression]PNG file">
+ <filter>plot</filter>
+ </data>
+ </outputs>
+
+ <help>
+ example: python GetDifferentialExpression.py -i input1 -f gff3 -j input2 -g gff3 -k ref -l gff3 -o output.gff3
+ </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/GetFlanking.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/GetFlanking.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,179 @@
+<tool id="GetFlanking" name="get flanking">
+ <description>Get the flanking regions of a set of reference.</description>
+ <command interpreter="python">
+ ../Java/Python/GetFlanking.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+ -g gtf
+ #end if
+
+   #if $OptionUpDownStream.OptionUD == 'UpStream':
+ -5
+ #elif $OptionUpDownStream.OptionUD == 'DownStream':
+ -3
+ #end if
+
+
+   #if $OptionColinearOrAntiSens.OptionCA == 'Colinear':
+ -c
+ #elif $OptionColinearOrAntiSens.OptionCA == 'AntiSens':
+ -a
+ #end if
+
+ #if $OptionMax.maximum == "Yes":
+ -D $OptionMax.max
+ #end if
+ #if $OptionMin.minimum == "Yes":
+ -d $OptionMin.min
+ #end if
+
+   -o $outputFile
+
+
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="query File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+ </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Refence File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+ </conditional>
+
+ <conditional name="OptionUpDownStream">
+ <param name="OptionUD" type="select" label="UpStream or DownStream">
+ <option value="UpStream">UpStream</option>
+ <option value="DownStream">DownStream</option>
+ <option value="NONE" selected="true">NONE</option>
+ </param>
+ <when value="UpStream">
+ </when>
+ <when value="DownStream">
+ </when>
+ <when value="NONE">
+ </when>
+ </conditional>
+
+ <conditional name="OptionColinearOrAntiSens">
+ <param name="OptionCA" type="select" label="Colinear or anti-sens">
+ <option value="Colinear">Colinear</option>
+ <option value="AntiSens">AntiSens</option>
+ <option value="NONE" selected="true">NONE</option>
+ </param>
+ <when value="Colinear">
+ </when>
+ <when value="AntiSens">
+ </when>
+ <when value="NONE">
+ </when>
+ </conditional>
+
+ <conditional name="OptionMax">
+ <param name="maximum" type="select" label="maximum distance between 2 elements">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="max" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionMin">
+ <param name="minimum" type="select" label="minimum distance between 2 elements">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="min" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+
+   <outputs>
+    <data format="gff3" name="outputFile" label="[GetFlanking] Output File"/>
+   </outputs>
+
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/SelectByTag.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/SelectByTag.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,122 @@
+<tool id="SelectByTag" name="select by tag">
+ <description>Keeps the genomic coordinates such that a value of a given tag.</description>
+ <command interpreter="python">
+ ../Java/Python/SelectByTag.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+ -g $Tag
+ #if $OptionValue.Value == "Yes":
+ -a $OptionValue.valeur
+ #end if
+ #if $OptionMax.maximum == "Yes":
+ -M $OptionMax.max
+ #end if
+ #if $OptionMin.minimum == "Yes":
+ -m $OptionMin.min
+ #end if
+
+ #if $OptionDefault.default == "Yes":
+ -d $OptionDefault.defaultValue
+ #end if
+
+ -o $outputFileGff
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>
+
+ <conditional name="OptionValue">
+ <param name="Value" type="select" label="value of tag">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="valeur" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionMax">
+ <param name="maximum" type="select" label="maximum value of tag">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="max" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionMin">
+ <param name="minimum" type="select" label="minimum value of tag">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="min" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionDefault">
+ <param name="default" type="select" label="gives this value if tag is not present">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="defaultValue" type="float" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[SelectByTag] Output File"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/WrappGetLetterDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/WrappGetLetterDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,97 @@
+#! /usr/bin/env python
+
+import os
+import sys
+import getopt
+from pyRepetUnit.commons.checker.CheckerException import CheckerException
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+
+class WrappGetLetterDistribution(object):
+
+    def __init__(self):
+        self._inputFileName = ""
+        self._inputFileFormat = ""
+        self._outputFileName = "tmpOutputFile"
+        self._csv = False
+
+    def help( self ):
+        print
+        print "usage: %s [ options ]" % ( sys.argv[0] )
+        print "options:"
+        print "     -h: this help"
+        print "     -i: input file"
+        print "     -f: 'fasta' or 'fastq'"
+        print "     -c: CSV output file"
+        print "     -a: first PNG output file"
+        print "     -b: second PNG output file"
+        print
+        print "Exemple:"
+        print
+        print "1:\n\tpython WrappGetLetterDistribution.py -i inputFile.fasta -f fasta -c outputFile1.csv -a outputFile2.png -b outputFile3.png"
+        print
+        print "2:\n\tpython WrappGetLetterDistribution.py -i inputFile.fastq -f fastq -c outputFile1.csv -a outputFile2.png -b outputFile3.png"
+        print
+        print
+
+
+    def setAttributesFromCommandLine(self):
+        try:
+            opts, args = getopt.getopt( sys.argv[1:], "hi:f:a:b:c:" )
+        except getopt.GetoptError, err:
+            print str(err); sys.exit(1)
+        for o, a in opts:
+            if o == "-h":
+                self.help()
+                sys.exit(0)
+            if o == "-i":
+                self._inputFileName = a
+            elif o == "-f":
+                self._inputFileFormat = a
+            elif o == "-c":
+                self._outputFileNameCSV = a
+                self._csv = True
+            elif o == "-a":
+                self._outputFileNamePNG = a
+            elif o == "-b":
+                self._outputFileNamePerNtPNG = a
+
+    def checkAttributes(self):
+        lMsg = []
+        if self._inputFileName == "" and not os.path.exists(self._inputFileName):
+            lMsg.append("ERROR: This input file doesn't exist!")
+        if self._inputFileFormat == "":
+            lMsg.append("ERROR: No input file format specified in option!")
+        if self._outputFileNamePNG == "":
+            lMsg.append("ERROR: No output file.png specified in option!")
+        if self._outputFileNamePerNtPNG == "":
+            lMsg.append("ERROR: No output filePerNt.png specified in option!")
+        if self._outputFileNameCSV == "" and self._csv == True :
+            lMsg.append("ERROR: No output file.csv specified in option!")
+
+        print ">>> lMsg " + str(lMsg)
+        if lMsg != []:
+            exp = CheckerException()
+            exp.setMessages(lMsg)
+            raise (exp)
+
+    def _cleanWorkingDir(self, cDir):
+        os.system("rm %s/tmpData* %s/tmpScript*" % (cDir, cDir))
+
+    def wrapp(self):
+        self.checkAttributes()
+ cDir = os.getcwd()
+
+        if self._csv == True:
+            os.system("python %s/Java/Python/getLetterDistribution.py -i %s -f %s -o %s/%s -c" % (SMART_PATH, self._inputFileName, self._inputFileFormat, cDir, self._outputFileName))
+            os.system("mv %s/%s.csv %s" % (cDir, self._outputFileName, self._outputFileNameCSV))
+            os.system("mv %s/%s.png %s" % (cDir, self._outputFileName, self._outputFileNamePNG))
+            os.system("mv %s/%sPerNt.png %s" % (cDir, self._outputFileName, self._outputFileNamePerNtPNG))
+
+        self._cleanWorkingDir(cDir)
+
+if __name__ == '__main__':
+    launcher = WrappGetLetterDistribution()
+    launcher.setAttributesFromCommandLine()
+    launcher.wrapp()
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/WrappGetLetterDistribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/WrappGetLetterDistribution.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,33 @@
+<tool id="getLetterDistribution1" name="Get Letter Distribution">
+    <description>Calculate distribution for each nucleotide per position for all short reads (S-MART)</description>
+    <command interpreter="python">
+     WrappGetLetterDistribution.py -i $inputFileName
+ #if $formatType.FormatInputFileName == 'fasta':
+ -f fasta
+ #else :
+ -f fastq
+ #end if
+ -c $ouputFileNameCSV -a $ouputFileNamePNG1 -b $ouputFileNamePNG2
+    </command>
+    <inputs>
+             <conditional name="formatType">
+       <param name="FormatInputFileName" type="select" label="Input File Format">
+          <option value="fasta">fasta</option>
+        <option value="fastq" selected="true">fastq</option>
+       </param>
+       <when value="fasta">
+              <param name="inputFileName" format="fasta" type="data" label="Fasta Input File"/>
+       </when>
+       <when value="fastq">
+              <param name="inputFileName" format="fastq" type="data" label="Fastq Input File"/>
+       </when>
+             </conditional>
+    </inputs>
+
+    <outputs>
+                <data name="ouputFileNameCSV" format="tabular" label="[getLetterDistribution] CSV File"/>
+                <data name="ouputFileNamePNG1" format="png" label="[getLetterDistribution] PNG File 1"/>
+                <data name="ouputFileNamePNG2" format="png" label="[getLetterDistribution] PNG File 2"/>
+    </outputs>
+</tool>
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/changeGffFeatures.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/changeGffFeatures.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,16 @@
+<tool id="changeGffFeatures" name="change gff Features">
+ <description>Changes one feature name by an other name (the feature name can be found on the 3rd column).</description>
+ <command interpreter="bash">
+ ../Java/Python/changeGffFeatures.sh $inputFile $inputFeature $outputFeature >$outputFile
+ </command>
+      <inputs>
+       <param name="inputFile" type="data" label="Input File" format="gff"/>
+       <param name="inputFeature" type="text" value="exon" label="A given feature, you must choose a feature name(on the 3rd column)."/>
+       <param name="outputFeature" type="text" value="exon" label="You must choose an other feature name(on the 3rd column)."/>
+      </inputs>
+
+      <outputs>
+             <data name="outputFile" format="gff" label="[changeGffFeatures] Output File"/>
+      </outputs>
+</tool>
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/changeTagName.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/changeTagName.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,54 @@
+<tool id="changeTagName" name="change tag name">
+ <description>Changes the name of tag of a list of transcripts.</description>
+ <command interpreter="python">
+ ../Java/Python/changeTagName.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+ -t $Tag
+ -n $name
+
+ -o $outputFileGff
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>
+ <param name="name" type="text" value="None" label="name option" help="new name for the tag, you must choose a new name."/>
+
+
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[changeTagName] Output File"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/cleanGff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/cleanGff.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,18 @@
+<tool id="cleanGff" name="clean Gff">
+   <description>Cleans a GFF file as given by NCBI and outpus a Gff3 file.</description>
+   <command interpreter="python"> ../Java/Python/cleanGff.py -i $inputFile
+   -t $type
+   -o $outputFile
+   </command>
+
+       <inputs>
+       <param name="inputFile" type="data" label="Input File" format="gff"/>
+       <param name="type" type="text" value="tRNA,rRNA,ncRNA,CDS" label="tag option, compulsory option" help="lists of comma separated types that you want to keep.EX: ncRNA,tRNA,rRNA,CDS"/>
+       </inputs>
+
+       <outputs>
+           <data format="gff3" name="outputFile" label="[cleanGff] Output File"/>
+       </outputs>
+
+</tool>
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/clusterize.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/clusterize.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,67 @@
+<tool id="MergingDataClusterize" name="Clusterize">
+ <description>Clusterizes the reads when their genomic intervals overlap.</description>
+ <command interpreter="python">
+ ../Java/Python/clusterize.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'csv':
+ -f csv
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #end if
+ -o $outputFileGff
+ $colinear
+ $normalize
+ -d $distance
+ $log $outputFileLog
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="csv">csv</option>
+ <option value="sam">sam</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="csv">
+ <param name="inputFileName" format="csv" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
+ <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
+ <param name="distance" type="integer" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[clusterize]output file"/>
+ <data name="outputFileLog" format="txt" label="[clusterize]output file">
+ <filter>log</filter>
+ </data>
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/clusterizeBySlidingWindows.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/clusterizeBySlidingWindows.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,132 @@
+<tool id="clusterizeBySlidingWindows" name="clusterize By SlidingWindows">
+ <description>Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.</description>
+ <command interpreter="python">
+ ../Java/Python/clusterizeBySlidingWindows.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+ -s $size
+ -e $overlap
+ -o $outputFileGff
+ $normalize
+ $strands
+
+ #if $OptionTag.tag == "Yes":
+ -g $OptionTag.value
+ #end if
+
+ #if $OptionsOperation.operation == "Yes":
+ -r $OptionsOperation.value
+ #end if
+
+ #if $OptionWriteTag.writeTag == "Yes":
+ -w $OptionWriteTag.value
+ #end if
+
+ $strand
+ $plot $plotPng
+ $excel $excelOutput
+
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+
+ <param name="size" type="text" value="50000" label="Size option" help="Size of the regions."/>
+ <param name="overlap" type="text" value="50" label="Overlap option" help="Overlap between two consecutive regions."/>
+ <param name="normalize" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Normalize option for only GFF3 file format" help="This option normalizes (Warning!! Only for GFF3 file!)"/>
+ <param name="strands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strands option" help="Consider the two strands separately."/>
+
+ <conditional name="OptionTag">
+ <param name="tag" type="select" label="use a given tag as input (instead of summing number of features)">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="value" type="text" value="None" label="tag option" help="write a tag name you want to observe."/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+
+ <conditional name="OptionsOperation">
+ <param name="operation" type="select" label="combine tag value with given operation [choice (sum, avg, med, min, max)]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="value" type="text" value="None" label="operation option" help="You can ONLY choose one of fowlling operation : sum, avg, med, min, max."/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+
+ <conditional name="OptionWriteTag">
+ <param name="writeTag" type="select" label="write a new tag in output file">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="value" type="text" value="nbElements" label="write tag option" help="print the result in the given tag (default usually is 'nbElements')"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="strand" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strand option" help="This option considers the two strands separately."/>
+ <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="This option creates a png file."/>
+ <param name="excel" type="boolean" truevalue="-x" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ <data name="excelOutput" format="csv">
+ <filter>excel</filter>
+ </data>
+ <data name="plotPng" format="png">
+ <filter>plot</filter>
+ </data>
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/compareOverlapping.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/compareOverlapping.xml Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,288 @@\n+<tool id="CompareOverlapping" name="Compare Overlapping">\n+\t<description>Print all the transcripts from a first file which overlap with the transcripts from a second file.</description>\n+\t<command interpreter="python">\n+\t\t../Java/Python/CompareOverlapping.py -i $formatType.inputFileName1\n+\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n+\t\t\t-f bed\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n+\t\t\t-f gff\t\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n+\t\t\t-f gff2\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n+\t\t\t-f gff3\n+\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n+\t\t\t-f sam\n+\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n+\t\t\t-f gtf\n+\t\t#end if\n+\t\t\t\n+\t\t-j $formatType2.inputFileName2\n+\t\t#if $formatType2.FormatInputFileName2 == \'bed\':\n+\t\t\t-g bed\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff\':\n+\t\t\t-g gff\t\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff2\':\n+\t\t\t-g gff2\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff3\':\n+\t\t\t-g gff3\n+\t\t#elif $formatType2.FormatInputFileName2 == \'sam\':\n+\t\t\t-g sam\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gtf\':\n+\t\t -g gtf\n+\t\t#end if\n+\n+\t\t-o $outputFileGff \n+\n+\t\t#if $optionNFirstFile1.NFirstForFile1 == \'Yes\':\n+\t\t\t-S $optionNFirstFile1.firstNtFile1\n+\t\t#end if\n+\t\t#if $optionNFirstFile2.NFirstForFile2 == \'Yes\':\n+\t\t\t-s $optionNFirstFile2.firstNtFile2\n+\t\t#end if\n+\t\t#if $optionNLastFile1.NLastForFile1 == \'Yes\':\n+\t\t\t-U $optionNLastFile1.lastNtFile1\n+\t\t#end if\n+\t\t#if $optionNLastFile2.NLastForFile2 == \'Yes\':\n+\t\t\t-u $optionNLastFile2.lastNtFile2\n+\t\t#end if\n+\t\n+\t\t#if $optionExtentionCinqFile1.extentionFile1 == \'Yes\':\n+\t\t\t-E $optionExtentionCinqFile1.extention51\n+\t\t#end if\n+\t\t#if $optionExtentionCinqFile2.extentionFile2 == \'Yes\':\n+\t\t\t-e $optionExtentionCinqFile2.extention52\n+\t\t#end if\n+\n+\t\t#if $optionExtentionTroisFile1.extentionFile1 == \'Yes\':\n+\t\t\t-N $optionExtentionTroisFile1.extention31\n+\t\t#end if\n+\t\t#if $optionExtentionTroisFile2.extentionFile2 == \'Yes\':\n+\t\t\t-n $optionExtentionTroisFile2.extention32\n+\t\t#end if\t\n+\n+\t\t#if $OptionColinearOrAntiSens.OptionCA == \'Colinear\':\n+\t\t\t-c \n+\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n+\t\t\t-a\n+\t\t#end if\t\n+\n+\t\t#if $OptionDistance.Dist == \'Yes\':\n+\t\t\t-d $OptionDistance.distance\n+\t\t#end if\n+\n+\t\t#if $OptionMinOverlap.MO == \'Yes\':\n+\t\t\t-m $OptionMinOverlap.minOverlap\n+\t\t#end if\n+\n+\t\t$InvertMatch\n+\t\t$ReportIntron\n+\t\t$NotOverlapping\n+\t\t\n+\t</command>\n+\n+\t<inputs>\n+\t\t<conditional name="formatType">\n+\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="sam">sam</option>\n+\t\t\t\t<option value="gtf">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff">\n+\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff2">\n+\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff3">\n+\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="sam">\n+\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gtf">\n+\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n+\t\t\t\t\t\t\t\t </when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="formatType2">\n+\t\t\t<param name="FormatInputFileName2" type="select" label="Input File Format 2">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="sam">sam</option>\n+\t\t\t\t<option value="gtf">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName2" format="bed" type="data" label="Inp'..b'e="integer" value="1" label="n last nucleotides for input file 1" help="only consider the n last nucleotides of the transcripts in file 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t<conditional name="optionNLastFile2">\n+\t\t\t<param name="NLastForFile2" type="select" label="NLast for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\n+\n+\t\t<conditional name="optionExtentionCinqFile1">\n+\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention51" type="integer" value="1" label="in file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\n+\t\t<conditional name="optionExtentionCinqFile2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extension towards 5 for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention52" type="integer" value="1" label="in file 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionTroisFile1">\n+\t\t\t<param name="extentionFile1" type="select" label="Extension towards 3 for file 1">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention31" type="integer" value="1" label="in file 1" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionExtentionTroisFile2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="extention32" type="integer" value="1" label="in file 2" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionColinearOrAntiSens">\n+\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n+\t\t\t\t<option value="Colinear">Colinear</option>\n+\t\t\t\t<option value="AntiSens">AntiSens</option>\n+\t\t\t\t<option value="NONE" selected="true">NONE</option>\n+\t\t\t</param>\n+\t\t\t<when value="Colinear">\n+\t\t\t</when>\n+\t\t\t<when value="AntiSens">\n+\t\t\t</when>\n+\t\t\t<when value="NONE">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionDistance">\n+\t\t\t<param name="Dist" type="select" label="Maximum Distance between two reads">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="distance" type="integer" value="0"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionMinOverlap">\n+\t\t\t<param name="MO" type="select" label="Minimum number of overlapping between two reads">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="minOverlap" type="integer" value="1"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n+\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n+\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n+\t\t\n+\t</inputs>\n+\n+\t<outputs>\n+\t\t<data name="outputFileGff" format="gff3"/>\n+\t</outputs> \n+\t\n+</tool>\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/computeCoverage.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/computeCoverage.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,107 @@
+<tool id="ComputeCoverage" name="Compute coverage">
+    <description>Compute the coverage of a set with respect to another set.</description>
+    <command interpreter="python">
+        ../Java/Python/ComputeCoverage.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+ -g gtf
+ #end if
+
+                $ReportIntron
+                -o $outputFileGff
+
+    </command>
+
+    <inputs>
+        <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input File Format 1">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+ </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input File Format 2">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+                </conditional>
+
+                <param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Include introns."/>
+
+        </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[computeCoverage] OUTPUT file"/>
+ </outputs>
+
+</tool>
+

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/coordinatesToSequence.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/coordinatesToSequence.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,62 @@
+<tool id="coordinatesToSequence" name="coordinates to sequence">
+ <description>Coordinates to Sequences: Extract the sequences from a list of coordinates.</description>
+ <command interpreter="python">
+ ../Java/Python/coordinatesToSequence.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+
+ -s $sequence
+ -o $outputFileFasta
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="sequence" type="data" label="Reference fasta File" format="fasta"/>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFileFasta" format="fasta" label="coordinates to sequences output"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/findTss.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/findTss.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,59 @@
+<tool id="findTss" name="findTss">
+ <description>Find the transcription start site of a list of transcripts.</description>
+ <command interpreter="python">
+ ../Java/Python/findTss.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+
+ -o $outputFileGff
+ $colinear
+ $normalize
+ -d $distance
+ $excel $excelOutput
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="colinear" type="boolean" truevalue="-e" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (Warning!! Only for GFF3 file!!!!!)"/>
+ <param name="distance" type="text" value="10" label="distance option" help="Limit the maximum distance between two reads"/>
+ <param name="excel" type="boolean" truevalue="-c" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[findTss] Output File"/>
+ <data name="excelOutput" format="csv" label="[findTss] CSV File">
+ <filter>excel</filter>
+ </data>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getDifference.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getDifference.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,127 @@
+<tool id="getDifference" name="get Difference">
+ <description>Gets all the regions of the genome, except the one given or get all the elements from the first set which does not ovelap with the second set (at the nucleotide level).</description>
+ <command interpreter="python">
+ ../Java/Python/getDifference.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+ -g gtf
+ #end if
+
+
+ $split
+
+ #if $OptionSequence.option == "Yes":
+ -s $OptionSequence.sequence
+ #end if
+
+ -o $outputFileGff
+
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input File Format 1">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File "/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File "/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File "/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File "/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File "/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File "/>
+ </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input File Format 2">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="reference file"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="reference file"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="reference file"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="reference file"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="reference file"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="reference file"/>
+ </when>
+ </conditional>
+
+ <param name="split" type="boolean" truevalue="-p" falsevalue="" checked="false" label="split option" help="When comparing to a set of genomic coordinates, do not join."/>
+
+
+ <conditional name="OptionSequence">
+ <param name="option" type="select" label="Compare with a reference fasta file.">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="sequence" type="data" label="Fasta File" format="fasta"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[getDifference]output File."/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getDistance.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getDistance.xml Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,275 @@\n+<tool id="GetDistance" name="get distance">\n+\t<description>Give the distances between every data from the first input set and the data from the second input set</description>\n+\t<command interpreter="python">\n+\t\t../Java/Python/getDistance.py -i $formatType.inputFileName1\n+\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n+\t\t\t-f bed\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n+\t\t\t-f gff\t\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n+\t\t\t-f gff2\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n+\t\t\t-f gff3\n+\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n+\t\t\t-f sam\n+\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n+\t\t\t-f gtf\n+\t\t#end if\n+\t\t\t\n+\t\t-j $formatType2.inputFileName2\n+\t\t#if $formatType2.FormatInputFileName2 == \'bed\':\n+\t\t\t-g bed\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff\':\n+\t\t\t-g gff\t\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff2\':\n+\t\t\t-g gff2\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff3\':\n+\t\t\t-g gff3\n+\t\t#elif $formatType2.FormatInputFileName2 == \'sam\':\n+\t\t\t-g sam\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gtf\':\n+\t\t\t-g gtf\n+\t\t#end if\n+\n+\n+\t\t$absolute $proportion\n+\n+\t\t#if $OptionColinearOrAntiSens.OptionCA == "Colinear":\n+\t\t\t-c \n+\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n+\t\t\t-a\n+\t\t#end if\n+\n+\t\t#if $OptionFirstNucl5.FirstNu5 == "Yes":\n+\t\t\t-s $OptionFirstNucl5.first5File1\n+\t\t\t-S $OptionFirstNucl5.first5File2\n+\t\t#end if\t\t\n+\n+\t\t#if $OptionFirstNucl3.FirstNu3 == "Yes":\n+\t\t\t-e $OptionFirstNucl3.first3File1\n+\t\t\t-E $OptionFirstNucl3.first3File2\n+\t\t#end if\n+\n+\t\t#if $OptionMinDistance.MinD == "Yes":\n+\t\t\t-m $OptionMinDistance.minDistance\n+\t\t#end if\n+\n+\t\t#if $OptionMaxDistance.MaxD == "Yes":\n+\t\t\t-M $OptionMaxDistance.maxDistance\n+\t\t#end if\n+\n+\t\t$fivePrime $threePrime $spearMan\n+\n+\t\t#if $OptionBuckets.OBuckets == "Yes":\n+\t\t\t-u $OptionBuckets.buckets\n+\t\t#end if\n+\n+\t\t#if $OptionMinXaxis.MinX == "Yes":\n+\t\t\t-x $OptionMinXaxis.minXaxis\n+\t\t#end if\n+\n+\t\t#if $OptionMaxXaxis.MaxX == "Yes":\n+\t\t\t-X $OptionMaxXaxis.maxXaxis\n+\t\t#end if\n+\n+\t\t#if $OptionTitle.OTitle == "Yes":\n+\t\t\t-t $OptionTitle.title\n+\t\t#end if\n+\t\t\n+\t\t-o $outputFilePng\n+\t\t$outputDistance $outputFileDistance\n+\n+\t</command>\n+\n+\t<inputs>\n+\t\t<conditional name="formatType">\n+\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="sam">sam</option>\n+\t\t\t\t<option value="gtf">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff">\n+\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff2">\n+\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff3">\n+\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="sam">\n+\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gtf">\n+\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="formatType2">\n+\t\t\t<param name="FormatInputFileName2" type="select" label="Input File Format 2">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="sam">sam</option>\n+\t\t\t\t<option value="gtf">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName2" format="bed" type="data" label="Input File 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff">\n+\t\t\t\t<param name="inputFileName2" format="gff" type="data" label="Input File 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff2">\n+\t\t\t\t<param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>\n+\t\t\t</when>'..b'ption>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="first5File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>\n+\t\t\t\t<param name="first5File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionFirstNucl3">\n+\t\t\t<param name="FirstNu3" type="select" label="only consider the n first 3\' nucleotides for input files">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="first3File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>\n+\t\t\t\t<param name="first3File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionMinDistance">\n+\t\t\t<param name="MinD" type="select" label="minimum distance considered between two transcripts">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="minDistance" type="integer" value="1"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionMaxDistance">\n+\t\t\t<param name="MaxD" type="select" label="maximum distance considered between two transcripts">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="maxDistance" type="integer" value="1000"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<param name="fivePrime" type="boolean" truevalue="-5" falsevalue="" checked="false" label="five prime option" help="Consider the elements from input file 1 which are upstream of elements of input file 2"/>\n+\t\t<param name="threePrime" type="boolean" truevalue="-3" falsevalue="" checked="false" label="three prime option" help="Consider the elements from input file1 which are downstream of elements of input file 2"/>\n+\t\t<param name="spearMan" type="boolean" truevalue="-r" falsevalue="" checked="false" label="spearman option" help="Compute Spearman rho."/>\n+\n+\n+\t\t<conditional name="OptionBuckets">\n+\t\t\t<param name="OBuckets" type="select" label="Plots histogram instead of line plot with given interval size.">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="buckets" type="integer" value="1" label="Interval size"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionMinXaxis">\n+\t\t\t<param name="MinX" type="select" label="Minimum value on the x-axis to plot.">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="minXaxis" type="integer" value="1"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionMaxXaxis">\n+\t\t\t<param name="MaxX" type="select" label="Maximum value on the x-axis to plot.">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="maxXaxis" type="integer" value="1"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="OptionTitle">\n+\t\t\t<param name="OTitle" type="select" label="Title for the graph.">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="title" type="text" value=""/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t</inputs>\n+\n+\n+\t<outputs>\n+\t\t<data name="outputFilePng" format="png"/>\n+\t\t<data name="outputFileDistance" format="gff3">\n+\t\t\t<filter>outputDistance</filter>\n+\t\t</data>\n+\t</outputs> \n+\n+</tool>\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getDistribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getDistribution.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,237 @@
+<tool id="getDistribution" name="get distribution">
+ <description>Get Distribution: Get the distribution of the genomic coordinates on a genome.</description>
+ <command interpreter="python">
+ ../Java/Python/GetDistribution.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'csv':
+ -f csv
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+ -r $refFile
+
+ #if $optionNbBin.Nb == 'Yes':
+ -b $optionNbBin.nbBins
+ #end if
+
+ #if $optionStart.start == 'Yes':
+ -s $optionStart.startValue
+ #end if
+
+ #if $optionEnd.end == 'Yes':
+ -e $optionEnd.endValue
+ #end if
+
+ #if $optionHeight.height == 'Yes':
+ -H $optionHeight.heightValue
+ #end if
+
+ #if $optionWidth.width == 'Yes':
+ -W $optionWidth.widthValue
+ #end if
+
+ #if $optionYMin.YMin == 'Yes':
+ -y $optionYMin.YMinValue
+ #end if
+
+ #if $optionYMax.YMax == 'Yes':
+ -Y $optionYMax.YMaxValue
+ #end if
+
+ #if $optionChrom.chrom == 'Yes':
+ -c $optionChrom.chromValue
+ #end if
+
+ #if $optionColor.color == 'Yes':
+ -l $optionColor.colorValue
+ #end if
+
+ $bothStrands
+ $average
+ -n $names
+ $normalize
+ $csv $outputCSV
+ $gff $outputGFF
+ -m
+ -o $outputFile
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="csv">csv</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="csv">
+ <param name="inputFileName" format="csv" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="refFile" format="fasta" type="data" label="reference genome file"/>
+
+ <conditional name="optionNbBin">
+ <param name="Nb" type="select" label="number of bins">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="nbBins" type="integer" value="1000" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionStart">
+ <param name="start" type="select" label="start from a given region">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="startValue" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionEnd">
+ <param name="end" type="select" label="end from a given region">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="endValue" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionHeight">
+ <param name="height" type="select" label="height of the graphics">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="heightValue" type="integer" value="300" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionWidth">
+ <param name="width" type="select" label="width of the graphics">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="widthValue" type="integer" value="1000" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionYMin">
+ <param name="YMin" type="select" label="minimum value on the y-axis to plot">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="YMinValue" type="integer" value="1000" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionYMax">
+ <param name="YMax" type="select" label="maximum value on the y-axis to plot">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="YMaxValue" type="integer" value="1000" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionChrom">
+ <param name="chrom" type="select" label="plot only one given chromosome">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="chromValue" type="text" value="chromName" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionColor">
+ <param name="color" type="select" label="color of the lines (separated by commas and no space)">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="colorValue" type="text" value="red,blue" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+
+ <param name="bothStrands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="plot one curve per strand"/>
+ <param name="average" type="boolean" truevalue="-a" falsevalue="" checked="false" label="plot plot average (instead of sum)"/>
+ <param name="names" type="text" value="nbElements" label="name for the tags (separated by commas and no space)"/>
+ <param name="normalize" type="boolean" truevalue="-z" falsevalue="" checked="false" label="normalize data (when panels are different)"/>
+ <param name="csv" type="boolean" truevalue="-x" falsevalue="" checked="false" label="write a .csv file."/>
+ <param name="gff" type="boolean" truevalue="-g" falsevalue="" checked="false" label="write a .gff file."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFile" format="png" label="[getDistribution] out png file"/>
+ <data name="outputCSV" format="csv" label="[getDistribution] output csv file">
+ <filter>csv</filter>
+ </data>
+
+ <data name="outputGFF" format="gff" label="[getDistribution] output gff file">
+ <filter>gff</filter>
+ </data>
+ </outputs>
+
+    <help>
+        This script gives a .tar out file, if you want to take look at the results, you have to download it.
+    </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getExons.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getExons.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,72 @@
+<tool id="getExons" name="get exons">
+    <description>Get the exons of a set of transcripts.</description>
+    <command interpreter="python">
+ ../Java/Python/getExons.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+ #if $optionSelect.Value == "Yes":
+ -s $optionSelect.selectValue
+ #end if
+
+ -o $outputFileGff
+ </command>
+
+    <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="optionSelect">
+ <param name="Value" type="select" label="select some of the exons (like '1,2,5..-3,-1')">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="selectValue" type="text" value="None" label="select option" help="like '1,2,5..-3,-1'"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="gff3" name="outputFileGff" label="[getExons -> gff3] Output File"/>
+    </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getIntrons.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getIntrons.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,56 @@
+<tool id="getIntrons" name="get introns">
+    <description>Get the introns of a set of transcripts.</description>
+    <command interpreter="python">
+ ../Java/Python/getIntrons.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+ -o $outputFileGff
+ </command>
+
+    <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+    </inputs>
+
+    <outputs>
+        <data format="gff3" name="outputFileGff" label="[getIntrons -> gff3] Output File"/>
+    </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getNb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getNb.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,78 @@
+<tool id="getNumber" name="get number">
+ <description>Get the distribution of exons per transcripts, or mapping per read, or transcript per cluster.</description>
+ <command interpreter="python">
+ ../Java/Python/getNb.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+ -o $outputFilePNG
+ -q $query
+ $barPlot
+ #if $optionXMAX.XMAX == 'Yes':
+ -x $optionXMAX.xMaxValue
+ #end if
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="query" type="text" value="None" label="compulsory option, choice (exon, transcript, cluster)" />
+ <param name="barPlot" type="boolean" truevalue="-b" falsevalue="" checked="false" label="use barplot representation"/>
+
+ <conditional name="optionXMAX">
+ <param name="XMAX" type="select" label="maximum value on the x-axis to plot ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="xMaxValue" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFilePNG" format="png" label="[getNB]out file"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getReadDistribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getReadDistribution.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,67 @@
+<tool id="getReadDistribution" name="get read distribution">
+ <description>Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented.</description>
+ <command interpreter="python">
+ ../Java/Python/WrappGetReadDistribution.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'fasta':
+ -f fasta
+ #elif $formatType.FormatInputFileName == 'fastq':
+ -f fastq
+ #end if
+
+ #if $optionnumber.number == 'Yes':
+ -n $optionnumber.bestNumber
+ #end if
+ #if $optionpercent.percent == 'Yes':
+ -p $optionpercent.percentage
+ #end if
+ -o $outputFile
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Sequence input File Format ">
+ <option value="fasta">fasta</option>
+ <option value="fastq">fastq</option>
+ </param>
+ <when value="fasta">
+ <param name="inputFileName" format="fasta" type="data" label="Sequence input File"/>
+ </when>
+ <when value="fastq">
+ <param name="inputFileName" format="fastq" type="data" label="Sequence input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="optionnumber">
+ <param name="number" type="select" label="keep the best n">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="bestNumber" type="integer" value="0"  />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionpercent">
+ <param name="percent" type="select" label="keep the best n percentage">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="percentage" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFile" format="tar" label="[getReadDistribution] tar out file" help="You can not see the results directly from galaxy, but you can download this tar output file."/>
+ </outputs>
+
+    <help>
+        This script gives a .tar out file, if you want to take look at the results, you have to download it.
+    </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getSequence.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getSequence.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,21 @@
+<tool id="getSequence" name="get sequence">
+  <description>Get a single sequence in a FASTA file.</description>
+  <command interpreter="python"> ../Java/Python/getSequence.py -i $inputFile
+ -n $name
+   -o $outputFile
+
+  </command>
+
+
+  <inputs>
+    <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
+   <param name="name" type="text" value="None" label="name of the sequence [compulsory option]"/>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[getSequence] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getSizes.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getSizes.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,135 @@
+<tool id="GetSizes" name="get sizes">
+ <description>Get the sizes of a set of genomic coordinates.</description>
+ <command interpreter="python">
+ ../Java/Python/getSizes.py -i $formatType.inputFileName $formatType.FormatInputFileName
+
+ #if $OptionQuery.OptionQ == 'NONE':
+ -q size
+ #else:
+ $OptionQuery.OptionQ
+ #end if
+
+ -o $outputFile
+
+ #if $OptionXMax.xMax == "Yes":
+ -x $OptionXMax.maxValue
+ #end if
+ #if $OptionX.xLab == "Yes":
+         -a $OptionX.xLabValue
+ #end if
+                #if $OptionY.yLab == "Yes":
+         -b $OptionY.yLabValue
+ #end if
+ $barPlot
+ $excel $excelOutput
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="-f bed">bed</option>
+ <option value="-f gff">gff</option>
+ <option value="-f gff2">gff2</option>
+ <option value="-f gff3">gff3</option>
+ <option value="-f sam">sam</option>
+ <option value="-f gtf">gtf</option>
+ <option value="-f fasta">fasta</option>
+ <option value="-f fastq">fastq</option>
+ </param>
+ <when value="-f bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="-f gff">
+ <param name="inputFileName" format="gff" type="data" label="Input gff File"/>
+ </when>
+ <when value="-f gff2">
+ <param name="inputFileName" format="gff" type="data" label="Input gff2 File"/>
+ </when>
+ <when value="-f gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input gff3 File"/>
+ </when>
+ <when value="-f sam">
+ <param name="inputFileName" format="sam" type="data" label="Input gff2 File"/>
+ </when>
+ <when value="-f gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input gff3 File"/>
+ </when>
+ <when value="-f fasta">
+ <param name="inputFileName" format="fasta" type="data" label="Input fasta File"/>
+ </when>
+ <when value="-f fastq">
+ <param name="inputFileName" format="fastq" type="data" label="Input fastq File"/>
+ </when>
+ </conditional>
+
+ <conditional name="OptionQuery">
+ <param name="OptionQ" type="select" label="mesure type">
+ <option value="-q size">size</option>
+ <option value="-q intron size">intron size</option>
+ <option value="-q exon size">exon size</option>
+ <option value="-q 1st exon size">1st exon size</option>
+ <option value="NONE" selected="true">NONE</option>
+ </param>
+ <when value="-q size">
+ </when>
+ <when value="-q intron size">
+ </when>
+ <when value="-q exon size">
+ </when>
+ <when value="-q 1st exon size">
+ </when>
+ <when value="NONE">
+
+ </when>
+ </conditional>
+
+ <conditional name="OptionXMax">
+ <param name="xMax" type="select" label="maximum value on the x-axis to plot [format: int]">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="maxValue" type="integer" value="1000"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionX">
+ <param name="xLab" type="select" label="X label title">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="xLabValue" type="text" value="Size" label="Notice: The title should not have spaces. EX. Size_of_transcript"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionY">
+ <param name="yLab" type="select" label="Y label title">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="yLabValue" type="text" value="#_reads" label="Notice: The title should not have spaces. EX. Number_of_reads"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+
+
+ <param name="barPlot" type="boolean" truevalue="-B" falsevalue="" checked="false" label="use barplot representation"/>
+
+ <param name="excel" type="boolean" truevalue="-c" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFile" format="png" label="[Get size] Output file"/>
+ <data name="excelOutput" format="csv">
+ <filter>excel</filter>
+ </data>
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getWigData.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getWigData.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,17 @@
+<tool id="getWigData" name="get wig data">
+    <description>Compute the average data for some genomic coordinates using WIG files</description>
+    <command interpreter="python">
+ ../Java/Python/getWigData.py -i $inputGff3File -f gff3 -w $inputWigFile -t $tagName -$strand -o $outputFile
+ </command>
+
+    <inputs>
+     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
+    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
+ <param name="tagName" type="text" value="None" label="tag option (compulsory option)" help="choose a tag name to write the wig information to output file."/>
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>
+    </inputs>
+
+    <outputs>
+        <data format="gff3" name="outputFile" label="[getWigData -> gff3] Output File"/>
+    </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getWigDistance.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getWigDistance.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,17 @@
+<tool id="getWigDistance" name="get wig distance">
+    <description>Compute the average data around some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
+    <command interpreter="python">
+ ../Java/Python/getWigDistance.py -i $inputGff3File -f gff3 -w $inputWigFile -a 0.0 -d $distance $strand -o $outputFile
+ </command>
+
+    <inputs>
+     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
+    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
+ <param name="distance" type="integer" value="1000" label="distance option (compulsory option)" help="Distance around position.Be Careful! The value must be upper than 0"/>
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>
+    </inputs>
+
+    <outputs>
+        <data name="outputFile" format="png" label="[getWigDistance] PNG output File"/>
+    </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/getWigProfile.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/getWigProfile.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,70 @@
+<tool id="getWigProfile" name="get wig profile">
+ <description>Compute the average profile of some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
+ <command interpreter="python">
+ ../Java/Python/getWigProfile.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+ -w $inputWigFile
+ -p $nbPoints
+ -d $distance
+ $strands
+ -o $outputFilePNG
+ #if $optionSMO.SMO == 'Yes':
+ -m $optionSMO.smoothen
+ #end if
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="inputWigFile" type="data" label="Input Wig File" format="wig"/>
+ <param name="nbPoints" type="integer" value="1000" label="number of points on the x-axis"/>
+ <param name="distance" type="integer" value="0" label="distance around genomic coordinates"/>
+ <param name="strands" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately"/>
+
+ <conditional name="optionSMO">
+ <param name="SMO" type="select" label="smoothen the curve">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="smoothen" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFilePNG" format="png" label="[getWigProfile]out file"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/mapperAnalyzer.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/mapperAnalyzer.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,186 @@
+<tool id="mapperAnalyzer" name="mapper analyzer">
+ <description>Read the output of an aligner, print statistics and possibly translate into BED or GBrowse formats. </description>
+ <command interpreter="python">
+ ../Java/Python/mapperAnalyzer.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'bam':
+ -f bam
+ #elif $formatType.FormatInputFileName1 == 'seqmap':
+ -f seqmap
+ #end if
+
+ -q $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'fasta':
+ -k fasta
+ #elif $formatType2.FormatInputFileName2 == 'fastq':
+ -k fastq
+ #end if
+
+
+ #if $optionnumber.number == 'Yes':
+ -n $optionnumber.numberVal
+ #end if
+ #if $optionsize.size == 'Yes':
+ -s $optionsize.sizeVal
+ #end if
+ #if $optionidentity.identity == 'Yes':
+ -d $optionidentity.identityVal
+ #end if
+ #if $optionmismatch.mismatch == 'Yes':
+ -m $optionmismatch.mismatchVal
+ #end if
+ #if $optiongap.gap == 'Yes':
+ -p $optiongap.gapVal
+ #end if
+ #if $optiontitle.title == 'Yes':
+ -t $optiontitle.titleVal
+ #end if
+ #if $optionappend.append == 'Yes':
+ -a $optionappend.appendfile
+ #end if
+
+ $merge
+ $remove
+ $remain
+ -o $outputFileGFF
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input File mapping Format">
+ <option value="bed">bed</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="bam">bam</option>
+ <option value="seqmap" selected="true">seqmap</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="bam">
+ <param name="inputFileName1" format="bam" type="data" label="Input File"/>
+ </when>
+ <when value="seqmap">
+ <param name="inputFileName1" format="seqmap" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Reference sequence File Format">
+ <option value="fasta" selected="true">fasta</option>
+ <option value="fastq">fastq</option>
+ </param>
+ <when value="fasta">
+ <param name="inputFileName2" format="fasta" type="data" label="Reference sequence File Format"/>
+ </when>
+ <when value="fastq">
+ <param name="inputFileName2" format="fastq" type="data" label="Reference sequence File Format"/>
+ </when>
+ </conditional>
+
+
+ <conditional name="optionnumber">
+ <param name="number" type="select" label="max. number of occurrences of a sequence">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="numberVal" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionsize">
+ <param name="size" type="select" label="minimum pourcentage of size ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="sizeVal" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionidentity">
+ <param name="identity" type="select" label="minimum pourcentage of identity ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="identityVal" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionmismatch">
+ <param name="mismatch" type="select" label="maximum number of mismatches">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="mismatchVal" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optiongap">
+ <param name="gap" type="select" label="maximum number of gaps">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="gapVal" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optiontitle">
+ <param name="title" type="select" label="title of the plots ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="titleVal" type="text" value="title of the UCSC track" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionappend">
+ <param name="append" type="select" label="append to GFF3 file">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="appendfile" type="data" format="gff3" label="append a file"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="merge" type="boolean" truevalue="-e" falsevalue="" checked="false" label="merge exons when introns are short "/>
+ <param name="remove" type="boolean" truevalue="-x" falsevalue="" checked="false" label="remove transcripts when exons are short"/>
+ <param name="remain" type="boolean" truevalue="-r" falsevalue="" checked="false" label="print the unmatched sequences "/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGFF" format="gff3" label="[mapperAnalyzer] out file"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/mappingToCoordinates.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/mappingToCoordinates.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,57 @@
+<tool id="mappingToCoordinates" name="mapping to coordinates">
+ <description>Converts a mapping type file(given by a mapping tool) to a GFF3 type file.</description>
+ <command interpreter="python">
+ ../Java/Python/mappingToCoordinates.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'blast -8'
+ -f blast
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+ -o $outputFileGff
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="sam">sam</option>
+ <option value="blast -8">blast</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="blast -8">
+ <param name="inputFileName" format="blast" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/mergeSlidingWindowsClusters.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/mergeSlidingWindowsClusters.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,103 @@
+<tool id="mergeSlidingWindowsClusters" name="merge sliding windows clusters">
+ <description>Merges two files containing the results of a sliding windows clustering.</description>
+ <command interpreter="python">
+ ../Java/Python/mergeSlidingWindowsClusters.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+ -g gtf
+ #end if
+
+ -o $outputFileGff
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input File Format 1">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+ </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input File Format 2">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/mergeTranscriptLists.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/mergeTranscriptLists.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,148 @@
+<tool id="mergeTranscriptLists" name="merge transcript lists">
+ <description>Merge the elements of two lists of genomic coordinates.</description>
+ <command interpreter="python">
+ ../Java/Python/mergeTranscriptLists.py -i $formatType.inputFileName1
+ #if $formatType.FormatInputFileName1 == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+ -g gtf
+ #end if
+
+ $all
+ $normalize
+
+ #if $OptionDistance.dis == 'Yes':
+ -d $OptionDistance.disVal
+ #end if
+
+ #if $OptionColinearOrAntiSens.OptionCA == 'Colinear':
+ -c
+ #elif $OptionColinearOrAntiSens.OptionCA == 'AntiSens':
+ -a
+ #end if
+
+ -o $outputFileGff
+
+
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input File Format 1">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+ </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input File Format 2">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+ </conditional>
+
+
+ <param name="all" type="boolean" truevalue="-k" falsevalue="" checked="false" label="print all the transcripts, not only those overlapping"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize the number of reads per cluster by the number of mappings per read "/>
+
+ <conditional name="OptionDistance">
+ <param name="dis" type="select" label="provide the number of reads" >
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="disVal" type="integer" value="0" label="max. distance between two transcripts" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionColinearOrAntiSens">
+ <param name="OptionCA" type="select" label="Colinear or anti-sens">
+ <option value="Colinear">Colinear</option>
+ <option value="AntiSens">AntiSens</option>
+ <option value="NONE" selected="true">NONE</option>
+ </param>
+ <when value="Colinear">
+ </when>
+ <when value="AntiSens">
+ </when>
+ <when value="NONE">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[mergeTranscriptLists]out file"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/modifyFasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/modifyFasta.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,49 @@
+<tool id="modifyFasta" name="modify fasta">
+  <description>Extend or shring a list of sequences.</description>
+  <command interpreter="python"> ../Java/Python/modifyFasta.py -i $inputFile
+   #if $OptionStart.start == "Yes":
+ -s $OptionStart.startValue
+   #end if
+
+   #if $OptionEnd.end == "Yes":
+ -e $OptionEnd.endValue
+   #end if
+   -o $outputFile
+
+  </command>
+
+
+  <inputs>
+    <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
+ <conditional name="OptionStart">
+ <param name="start" type="select" label="keep first nucleotides">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="startValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionEnd">
+ <param name="end" type="select" label="keep last nucleotides">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="endValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[modifyFasta] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/modifyGenomicCoordinates.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/modifyGenomicCoordinates.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,126 @@
+<tool id="modifyGenomicCoordinates" name="modify genomic coordinates">
+  <description>Extend or shrink a list of genomic coordinates.</description>
+  <command interpreter="python"> ../Java/Python/modifyGenomicCoordinates.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+   #if $OptionStart.start == "Yes":
+ -s $OptionStart.startValue
+   #end if
+
+   #if $OptionEnd.end == "Yes":
+ -e $OptionEnd.endValue
+   #end if
+
+   #if $OptionFivePrim.five == "Yes":
+ -5 $OptionFivePrim.fivePValue
+   #end if
+
+   #if $OptionTroisP.TroisP == "Yes":
+ -3 $OptionTroisP.ThreePValue
+   #end if
+
+   -o $outputFile
+  </command>
+
+
+  <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="OptionStart">
+ <param name="start" type="select" label="restrict to the start of the transcript">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="startValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionEnd">
+ <param name="end" type="select" label="restrict to the end of the transcript">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="endValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+
+ <conditional name="OptionFivePrim">
+ <param name="five" type="select" label="extend to the 5' direction">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="fivePValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionTroisP">
+ <param name="TroisP" type="select" label="extend to the 3' direction">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ThreePValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[modifyGenomicCoordinates] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/modifySequenceList.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/modifySequenceList.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,46 @@
+<tool id="modifySequenceList" name="modify sequence list">
+  <description>Extend or shring a list of sequences. </description>
+  <command interpreter="python"> ../Java/Python/modifySequenceList.py -i $inputFile -f fasta
+ #if $OptionStart.Start == "Yes":
+ -s $OptionStart.StartVal
+ #end if
+ #if $OptionEnd.End == "Yes":
+ -e $OptionEnd.EndVal
+ #end if
+   -o $outputFile
+  </command>
+
+
+  <inputs>
+ <param name="inputFile" type="data" format="fasta" label="input file"/>
+
+ <conditional name="OptionStart">
+ <param name="Start" type="select" label="keep first nucleotides">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="StartVal" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionEnd">
+ <param name="End" type="select" label="keep last nucleotides">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="EndVal" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+  </inputs>
+
+  <outputs>
+     <data format="fasta" name="outputFile" label="[modifySequenceList] Output File"/>
+  </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/plot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plot.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,71 @@
+<tool id="plot" name="Plot">
+ <description>Plot some information from a list of transcripts.</description>
+ <command interpreter="python">
+ ../Java/Python/plot.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+ -x $xLabel
+
+                -y $yLabel
+
+         -X $XVal
+                -Y $YVal
+
+         #if $optionLog.log == 'Yes' :
+     -l $optionLog.logOnAxisLabel
+                #end if
+
+                -s $shape
+ -o $outputFile
+
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="xLabel" type="text" value="value1" label="x label option" help="Choose one of the tags of 9th column in GFF file to be plotted as X-axis. Warning: You can only choose the tag value is digital."/>
+                <param name="yLabel" type="text" value="value2" label="y label option" help="Choose one of the tags of 9th column in GFF file to be plotted as Y-axis. You can only choose the tag value is digital."/>
+                <param name="XVal" type="float" value="0.0" label="value for x when tag is not present "/>
+
+ <param name="YVal" type="float" value="0.0" label="value for y when tag is not present"/>
+
+
+                <conditional name="optionLog">
+ <param name="log" type="select" label="calculate log option" help="use log on x- or y-axis (write 'x', 'y' or 'xy')">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="logOnAxisLabel" type="text" value="y" label="use log on x- or y-axis (write 'x', 'y' or 'xy')"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+                <param name="shape" type="text" value="barplot" label="shape of the plot [format: choice (barplot, line, points, heatPoints)]"/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFile" format="png" label="[plot] Output file"/>
+ </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/plotCoverage.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plotCoverage.xml Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,265 @@\n+<tool id="plotCoverage" name="plot coverage">\n+\t<description>Plot the coverage of the first data with respect to the second one.</description>\n+\t<command interpreter="python">\n+\t\t../Java/Python/WrappPlotCoverage.py -i $formatType.inputFileName1\n+\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n+\t\t\t-f bed\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n+\t\t\t-f gff\t\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n+\t\t\t-f gff2\n+\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n+\t\t\t-f gff3\n+\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n+\t\t\t-f sam\n+\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n+\t\t\t-f gtf\n+\t\t#end if\n+\t\t\t\n+\t\t-j $formatType2.inputFileName2\n+\t\t#if $formatType2.FormatInputFileName2 == \'bed\':\n+\t\t\t-g bed\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff\':\n+\t\t\t-g gff\t\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff2\':\n+\t\t\t-g gff2\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gff3\':\n+\t\t\t-g gff3\n+\t\t#elif $formatType2.FormatInputFileName2 == \'sam\':\n+\t\t\t-g sam\n+\t\t#elif $formatType2.FormatInputFileName2 == \'gtf\':\n+\t\t\t-g gtf\n+\t\t#end if\n+\n+\n+\t\t#if $optionRef.Ref == \'Yes\':\n+\t\t\t-q $optionRef.inputSequenceFile\n+\t\t#end if\n+\n+\t\t#if $optionwidth.width == \'Yes\':\n+\t\t\t-w $optionwidth.widthVal\n+\t\t#end if\n+\t\t#if $optionheight.height == \'Yes\':\n+\t\t\t-e $optionheight.heightVal\n+\t\t#end if\n+\t\t#if $optionXlab.Xlab == \'Yes\':\n+\t\t\t-x $optionXlab.XlabVal\n+\t\t#end if\n+\t\t#if $optionYlab.Ylab == \'Yes\':\n+\t\t\t-y $optionYlab.YlabVal\n+\t\t#end if\n+\t\t#if $optiontitle.title == \'Yes\':\n+\t\t\t-t $optiontitle.titleVal\n+\t\t#end if\t\n+\t\n+\t\t#if $optionplusColor.plusColor == \'Yes\':\n+\t\t\t-p $optionplusColor.plusColorVal\n+\t\t#end if\n+\t\t#if $optionminusColor.minusColor == \'Yes\':\n+\t\t\t-m $optionminusColor.minusColorVal\n+\t\t#end if\n+\n+\t\t#if $optionsumColor.sumColor == \'Yes\':\n+\t\t\t-s $optionsumColor.sumColorVal\n+\t\t#end if\n+\t\t#if $optionlineColor.lineColor == \'Yes\':\n+\t\t\t-l $optionlineColor.lineColorVal\n+\t\t#end if\t\n+\n+\t\t$merge\n+\t\t-o $outputFile\n+\t</command>\n+\n+\t<inputs>\n+\t\t<conditional name="formatType">\n+\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="sam">sam</option>\n+\t\t\t\t<option value="gtf">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff">\n+\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff2">\n+\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff3">\n+\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="sam">\n+\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t\t<when value="gtf">\n+\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="formatType2">\n+\t\t\t<param name="FormatInputFileName2" type="select" label="Input File Format 2">\n+\t\t\t\t<option value="bed">bed</option>\n+\t\t\t\t<option value="gff">gff</option>\n+\t\t\t\t<option value="gff2">gff2</option>\n+\t\t\t\t<option value="gff3">gff3</option>\n+\t\t\t\t<option value="gff2">sam</option>\n+\t\t\t\t<option value="gff3">gtf</option>\n+\t\t\t</param>\n+\t\t\t<when value="bed">\n+\t\t\t\t<param name="inputFileName2" format="bed" type="data" label="Input File 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff">\n+\t\t\t\t<param name="inputFileName2" format="gff" type="data" label="Input File 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff2">\n+\t\t\t\t<param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="gff3">\n+\t\t\t\t<param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>\n+\t\t\t</when>\n+\t\t\t<when value="sam">\n+\t\t\t\t<param name="inputFileName2" format="sam" type="data" label="'..b'n value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="inputSequenceFile" format="fasta" type="data" value="None"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t\n+\n+\n+\t\t<conditional name="optionwidth">\n+\t\t\t<param name="width" type="select" label="width of the plots (in px)">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="widthVal" type="integer" value="1500" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t\n+\t\t<conditional name="optionheight">\n+\t\t\t<param name="height" type="select" label="height of the plots (in px)">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="heightVal" type="integer" value="1000" />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optiontitle">\n+\t\t\t<param name="title" type="select" label="title of the plots ">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="titleVal" type="text" value=" " />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t\n+\t\t<conditional name="optionXlab">\n+\t\t\t<param name="Xlab" type="select" label="label on the x-axis">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="XlabVal" type="text" value=" "/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionYlab">\n+\t\t\t<param name="Ylab" type="select" label="label on the y-axis">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="YlabVal" type="text" value=" " />\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionplusColor">\n+\t\t\t<param name="plusColor" type="select" label="color for the elements on the plus strand">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="plusColorVal" type="text" value="red"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionminusColor">\n+\t\t\t<param name="minusColor" type="select" label="color for the elements on the minus strand">\n+\t\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="minusColorVal" type="text" value="blue"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionsumColor">\n+\t\t\t<param name="sumColor" type="select" label="color for 2 strands coverage line">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="sumColorVal" type="text" value="black"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\n+\t\t<conditional name="optionlineColor">\n+\t\t\t<param name="lineColor" type="select" label="color for the lines">\n+\t\t\t\t<option value="Yes">Yes</option>\n+\t\t\t\t<option value="No" selected="true">No</option>\n+\t\t\t</param>\n+\t\t\t<when value="Yes">\n+\t\t\t\t<param name="lineColorVal" type="text" value="black"/>\n+\t\t\t</when>\n+\t\t\t<when value="No">\n+\t\t\t</when>\n+\t\t</conditional>\n+\t\t\n+\t\t<param name="merge" type="boolean" truevalue="-1" falsevalue="" checked="false" label="merge the 2 plots in 1"/>\n+\t</inputs>\n+\n+\t<outputs>\n+\t\t<data name="outputFile" format="tar" label="[plotCoverage] tar out file" help="You can not see the results directly from galaxy, but you can download this tar output file."/>\n+\t</outputs> \n+\t\n+ <help>\n+ This script gives a .tar out file, if you want to take look at the results, you have to download it.\n+ </help>\t\t\n+</tool>\n'

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/plotGenomeCoverage.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plotGenomeCoverage.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,62 @@
+<tool id="plotGenomeCoverage" name="plot genome coverage">
+  <description>Get the coverage of a genome. </description>
+  <command interpreter="python"> ../Java/Python/plotGenomeCoverage.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+ -r $reference
+   -o $outputFile
+  </command>
+
+
+  <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="reference" type="data" label="reference Fasta File" format="fasta"/>
+  </inputs>
+
+  <outputs>
+    <data format="png" name="outputFile" label="[plotGenomeCoverage] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/plotRepartition.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plotRepartition.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,59 @@
+<tool id="plotRepartition" name="plot repartition">
+ <description>Plot the repartition of different data on a whole genome. (This tool uses only 1 input file, the different values are stored in the tags. )</description>
+ <command interpreter="python">
+ ../Java/Python/WrappPlotRepartition.py -i $inputFileName
+ -n $names
+ $normalize
+ #if $optionColor.Color == 'Yes':
+ -c $optionColor.colValue
+ #end if
+ -f $format
+
+ #if $optionLog.log == 'Yes':
+ -l $optionLog.logVal
+ #end if
+
+ -o $outputFilePNG
+ </command>
+
+ <inputs>
+ <param name="inputFileName" type="data" label="Input Gff3 File" format="gff3"/>
+ <param name="names" type="text" value="None" label="name for the tags (separated by commas and no space) [compulsory option]"/>
+ <param name="normalize" type="boolean" truevalue="-r" falsevalue="" checked="false" label="normalize data (when panels are different)"/>
+ <param name="format" type="text" value="png" label="format of the output file[default: png]"/>
+
+ <conditional name="optionColor">
+ <param name="Color" type="select" label="scolor of the lines (separated by commas and no space) ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="colValue" type="text" value="None"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionLog">
+ <param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="logVal" type="text" value=" "/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFilePNG" format="tar" label="[plotRepartition]out file"/>
+ </outputs>
+
+ <help>
+ This script gives a .tar out file, if you want to take look at the results, you have to download it.
+ </help>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/plotTranscriptList.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/plotTranscriptList.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,141 @@
+<tool id="plotTranscriptList" name="plot transcript list">
+ <description>Plot some information from a list of transcripts. </description>
+ <command interpreter="python">
+ ../Java/Python/plotTranscriptList.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+ -x $xVal
+ -y $yVal
+ #if $optionz.z == 'Yes':
+ -z $optionz.zVal
+ #end if
+
+ -X $XVal
+ -Y $YVal
+ -Z $ZVal
+
+ #if $optionxLab.xLab == 'Yes':
+ -n $optionxLab.labVal
+ #end if
+ #if $optionyLab.yLab == 'Yes':
+ -m $optionyLab.labVal
+ #end if
+
+ #if $optionyLog.log == 'Yes':
+ -l $optionyLog.logVal
+ #end if
+
+ -s $shape
+ -b $bucket
+
+ -o $outputFilePNG
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="xVal" type="text" value="None" label="tag for the x value [compulsory option]"/>
+ <param name="yVal" type="text" value="None" label="tag for the y value [compulsory option]"/>
+
+ <conditional name="optionz">
+ <param name="z" type="select" label="tag for the z value ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="zVal" type="text" value="None"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="XVal" type="float" value="0.0" label="value for x when tag is not present "/>
+
+ <param name="YVal" type="float" value="0.0" label="value for y when tag is not present"/>
+
+ <param name="ZVal" type="float" value="0.0" label="value for z when tag is not present"/>
+
+ <conditional name="optionxLab">
+ <param name="xLab" type="select" label="label on the x-axis ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="labVal" type="text" value=" "/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <conditional name="optionyLab">
+ <param name="yLab" type="select" label="label on the y-axis ">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="labVal" type="text" value=" "/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="optionyLog">
+ <param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="logVal" type="text" value=" "/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="shape" type="text" value="barplot" label="shape of the plot [format: choice (barplot, line, points, heatPoints)]"/>
+ <param name="bucket" type="float" value="1.0" label="bucket size (for the line plot)"/>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFilePNG" format="png" label="[plotTranscriptList]out file"/>
+ </outputs>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/qualToFastq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/qualToFastq.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="qualToFastq" name="qual -> Fastq">
+  <description>Convert a file in FASTA/Qual format to FastQ format.</description>
+  <command interpreter="python"> ../Java/Python/qualToFastq.py -f $inputFastaFile -q $inputQualFile -o $outputFile </command>
+  <inputs>
+    <param name="inputFastaFile" type="data" label="Input fasta File" format="fasta"/>
+    <param name="inputQualFile" type="data" label="Input qual File" format="txt"/>
+  </inputs>
+
+  <outputs>
+    <data format="fastq" name="outputFile" label="[qual -> Fastq] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/removeExonLines.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/removeExonLines.sh Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,2 @@
+#!/bin/bash
+sed '/exon/d' $1

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/removeExonLines.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/removeExonLines.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,15 @@
+<tool id="removeExonLines" name="remove exon lines">
+  <description>Removes the lines containing Exon.</description>
+  <command interpreter="sh"> ../Java/Python/removeExonLines.sh $inputFile > $outputFile  </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[removeExonLine] Output File"/>
+  </outputs>
+
+  <help>
+ command example: sh removeExonLines.sh input.gff3
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/restrictFromSize.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/restrictFromSize.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,93 @@
+<tool id="restrictFromSize" name="restrict from size">
+ <description>Select the elements of a list of sequences or transcripts with a given size.</description>
+ <command interpreter="python">
+ ../Java/Python/restrictFromSize.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+ #if $OptionMax.maximum == "Yes":
+ -M $OptionMax.max
+ #end if
+ #if $OptionMin.minimum == "Yes":
+ -m $OptionMin.min
+ #end if
+
+ -o $outputFileGff
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="OptionMax">
+ <param name="maximum" type="select" label="maximum number of np">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="max" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionMin">
+ <param name="minimum" type="select" label="minimum number of np">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="min" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3" label="[restrictFromSize] Output File"/>
+ </outputs>
+
+ <help>
+ command example: restrictFromSize.py -i cis_e10_cluster20InSeed2515_nbEUp10.gff3 -f gff -o cis_e10_cluster20InSeed2515_nbEUp10_lgUp50 -m 50
+ </help>
+
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/restrictSequenceList.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/restrictSequenceList.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,16 @@
+<tool id="restrictSequenceList" name="restrict sequence list">
+  <description>Keep the elements of a list of sequences whose name is mentionned in a given file.</description>
+  <command interpreter="python"> ../Java/Python/restrictSequenceList.py -i $inputFile -f fasta -n $name -o $outputFile </command>
+
+  <inputs>
+ <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
+ <param name="name" type="data" label="The txt file contains the names of the transcripts." format="txt"/>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[restrictSequenceList] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/restrictTranscriptList.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/restrictTranscriptList.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,108 @@
+<tool id="restrictTranscriptList" name="restrict transcript list">
+  <description>Keep the coordinates which are located in a given position.</description>
+  <command interpreter="python"> ../Java/Python/restrictTranscriptList.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+
+   #if $OptionChrom.Chrom == "Yes":
+ -c $OptionChrom.ChromName
+   #end if
+
+   #if $OptionStart.start == "Yes":
+ -s $OptionStart.startValue
+   #end if
+
+   #if $OptionEnd.end == "Yes":
+ -e $OptionEnd.endValue
+   #end if
+
+   -o $outputFile
+
+  </command>
+
+
+  <inputs>
+    <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName" format="sam" type="data" label="Input File"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName" format="gtf" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <conditional name="OptionChrom">
+ <param name="Chrom" type="select" label="chromosome name">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ChromName" type="text" value="None"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionStart">
+ <param name="start" type="select" label="restrict to the start of the transcript">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="startValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionEnd">
+ <param name="end" type="select" label="restrict to the end of the transcript">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="endValue" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[restrictTranscriptList] Output File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/test/CollapseReads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/test/CollapseReads.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,49 @@
+<tool id="collapseReads" name="collapseReads">
+ <description>Merges two reads if they have exactly the same genomic coordinates.</description>
+ <command interpreter="python">
+ ../Java/Python/CollapseReads.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #end if
+
+ -$strand
+ -o $outputFileGff
+ --galaxy
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName" format="bed" type="data" label="Input File"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName" format="gff" type="data" label="Input File"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName" format="gff2" type="data" label="Input File"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName" format="gff3" type="data" label="Input File"/>
+ </when>
+ </conditional>
+
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs>
+
+</tool>
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,91 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from SMART.galaxy.WrappGetLetterDistribution import WrappGetLetterDistribution
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+SMART_DATA = SMART_PATH + "/data"
+
+class Test_F_WrappGetLetterDistribution(unittest.TestCase):
+
+
+    def setUp(self):
+        self._dirTest = "%s/galaxy/test" % SMART_PATH
+        self._iwrappFastq = WrappGetLetterDistribution()
+        self._iwrappFasta = WrappGetLetterDistribution()
+        self._expOutputCSV = "expOutputTomate.csv"
+
+    def test_wrappFasta(self):
+        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
+        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
+        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
+        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
+        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
+        self._iwrappFasta._inputFileFormat = "fasta"
+        self._iwrappFasta._csv = True
+        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
+            self._iwrappFasta.wrapp()
+            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
+            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG))
+            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNameCSV))
+            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFasta._outputFileNameCSV,self._expOutputCSV))
+        else:
+            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
+
+
+#    def test_wrappFasta_withoutCSV_Opt(self):
+#        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
+#        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
+#        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
+#        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
+#        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
+#        self._iwrappFasta._inputFileFormat = "fasta"
+#        self._iwrappFasta._csv = False
+#        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
+#            self._iwrappFasta.wrapp()
+#            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
+#            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG))
+#        else:
+#            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
+#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
+#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH)
+#
+#
+#    def test_wrappFastq(self):
+#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
+#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
+#        self._iwrappFastq._inputFileFormat = "fastq"
+#        self._iwrappFastq._csv = True
+#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
+#            self._iwrappFastq.wrapp()
+#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
+#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG))
+#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNameCSV))
+#            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFastq._outputFileNameCSV,self._expOutputCSV))
+#        else:
+#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq
+#
+#
+#    def test_wrappFastq_withoutCSV_Opt(self):
+#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
+#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
+#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
+#        self._iwrappFastq._inputFileFormat = "fastq"
+#        self._iwrappFastq._csv = False
+#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
+#            self._iwrappFastq.wrapp()
+#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
+#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG))
+#        else:
+#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq
+#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
+#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/testArgum.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/testArgum.xml Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,24 @@
+<tool id="test_argument" name="test_argu" version="1.0.0">
+  <description>To test the arguments from shell.</description>
+  <command>
+../testArgu.sh $test_out
+#for $i in $replicate_groups
+#for $j in $i.replicates
+$j.bam_alignment:#slurp
+#end for
+#end for
+    >> $Log_File </command>
+  <inputs>
+ <param format="gff3" name="anno_input_selected" type="data" label="Genome annotation in GFF3 file" help="A tab delimited format for storing sequence features and annotations"/>
+   <repeat name="replicate_groups" title="Replicate group" min="2">
+     <repeat name="replicates" title="Replicate">
+      <param format="fastq" name="bam_alignment" type="data" label="BAM alignment file" help="BAM alignment file. Can be generated from SAM files using the SAM Tools."/>
+     </repeat>
+   </repeat>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="test_out" label="DESeq result"/>
+ <data format="txt" name="Log_File" label="DESeq result"/>
+  </outputs>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/testR.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/testR.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,19 @@
+<tool id="testDiffExpAnal" name="Differential Expression Analysis">
+  <description>Differential expression analysis for sequence count data (DESeq)</description>
+  <command interpreter="sh"> ../DiffExpAnal/testR.sh $inputFile $columnsOfGeneName $columnsOfCondition1 $columnsOfCondition2 $outputFileCSV $outputFilePNG 2>$outputLog </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+ <param name="columnsOfGeneName" type="text" value="0" label="Please indicate the column numbers of gene names with ',' separator. If There are not gene names, default value is 0."/>
+ <param name="columnsOfCondition1" type="text" value="1,2" label="Please indicate the column numbers of condition1 with ',' separator."/>
+ <param name="columnsOfCondition2" type="text" value="3,4" label="Please indicate the column numbers of condition2 with ',' separator."/>
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="outputFileCSV" label="[DiffExpAnal] Output CSV File"/>
+ <data format="png" name="outputFilePNG" label="[DiffExpAnal] Output PNG File"/>
+    <data format="tabular" name="outputLog" label="[DiffExpAnal] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/trimAdaptor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/trimAdaptor.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,39 @@
+<tool id="trimAdaptor" name="trim adaptors">
+  <description>Remove the 3' adaptor of a list of reads.</description>
+  <command interpreter="python"> ../Java/Python/trimAdaptor.py -i $inputFile -f fastq
+   -a $adaptor
+   #if $OptionError.Error == "Yes":
+ -e $OptionError.ErrorVal
+ #end if
+   $noAdaptor $noAdaptorFile
+   -o $outputFile
+  </command>
+
+
+  <inputs>
+    <param name="inputFile" type="data" label="Input fastq File" format="fastq"/>
+ <param name="adaptor" type="text" value="None" label="adaptor [compulsory option]"/>
+ <conditional name="OptionError">
+ <param name="Error" type="select" label="number of errors in percent">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ErrorVal" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <param name="noAdaptor" type="boolean" truevalue="-n" falsevalue="" checked="false" label="log option" help="file name where to print sequences with no adaptor"/>
+  </inputs>
+
+  <outputs>
+    <data format="fastq" name="outputFile" label="[trimAdaptor] Output File"/>
+ <data name="noAdaptorFile" format="fastq" label="[trimAdaptor] Log File">
+ <filter>noAdaptor</filter>
+ </data>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 SMART/galaxy/trimSequences.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/trimSequences.xml Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,81 @@
+<tool id="trimSequences" name="trim sequences">
+  <description>Remove the 5' and/or 3' adaptors of a list of reads.</description>
+  <command interpreter="python"> ../Java/Python/trimSequences.py -i $inputFile -f fastq
+   #if $OptionFPADP.FPADP == "Yes":
+ -5 $OptionFPADP.fivePAdaptor
+ #end if
+ #if $OptionTPADP.TPADP == "Yes":
+ -3 $OptionTPADP.threePAdaptor
+ #end if
+   #if $OptionError.Error == "Yes":
+ -e $OptionError.ErrorVal
+ #end if
+
+ $indels
+   $noAdaptor5p $noAdaptorFile5p
+   $noAdaptor3p $noAdaptorFile3p
+   -o $outputFile
+
+  </command>
+
+
+  <inputs>
+    <param name="inputFile" type="data" label="Input fastq File" format="fastq"/>
+
+ <conditional name="OptionFPADP">
+ <param name="FPADP" type="select" label="5'adaptor">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="fivePAdaptor" type="text" value="None" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionTPADP">
+ <param name="TPADP" type="select" label="3'adaptor">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="threePAdaptor" type="text" value="None" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <conditional name="OptionError">
+ <param name="Error" type="select" label="number of errors in percent">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="ErrorVal" type="integer" value="0" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="indels" type="boolean" truevalue="-d" falsevalue="" checked="false" label="indels option" help="also accept indels"/>
+ <param name="noAdaptor5p" type="boolean" truevalue="-n" falsevalue="" checked="false" label="noAdaptor 5' option" help="file name where to print sequences with no 5' adaptor "/>
+ <param name="noAdaptor3p" type="boolean" truevalue="-m" falsevalue="" checked="false" label="noAdaptor 3' option" help="file name where to print sequences with no 3' adaptor "/>
+
+
+
+  </inputs>
+
+  <outputs>
+    <data format="fastq" name="outputFile" label="[trimSequences] Output File"/>
+ <data name="noAdaptorFile5p" format="fastq" label="[trimSequences] noAdaptor5p File">
+ <filter>noAdaptor5p</filter>
+ </data>
+ <data name="noAdaptorFile3p" format="fastq" label="[trimSequences] noAdaptor3p File">
+ <filter>noAdaptor3p</filter>
+ </data>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r ea3082881bf8 -r 769e306b7933 commons/__init__.pyc

Binary file commons/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/__init__.pyc

Binary file commons/core/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/AbstractChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/AbstractChecker.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,61 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.checker.IChecker import IChecker
+from commons.core.LoggerFactory import LoggerFactory
+
+
+## Enable a Logger in your Checker.
+#
+#  Subclasses of  AbstractChecker have a already a logger enabled (referenced by self._log attribute). Subclasses also already implements IChecker.
+#  All you have to do is to call __init__() method in your own constructor.
+class AbstractChecker( IChecker ):
+
+    ## Constructor
+    #
+    # @param logFileName name of log file where logger outputs
+    #
+    def __init__(self, logFileName):
+        self._log = LoggerFactory.createLogger(logFileName)
+
+
+    ## Set (change) default logger
+    #
+    # @param logger a new logger
+    #
+    def setLogger(self, logger):
+        self._log = logger
+
+
+    ## Return the logger instance
+    #
+    def getLogger(self):
+        return self._log

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/CheckerException.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/CheckerException.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,52 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Exception raised during check
+#
+# This class wraps Exception class
+#
+class CheckerException( Exception ):
+
+    ## Constructor
+    #
+    # @param msg  message embedded in Exception class
+    def __init__(self,msg=""):
+        self.messages = []
+        self.msg = msg
+        Exception.__init__(self, msg)
+
+
+    def setMessages(self,lMessages):
+        self.messages = lMessages
+
+
+    def getMessages(self):
+        return self.messages

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/CheckerUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/CheckerUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,316 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import re\n+import glob\n+import ConfigParser\n+from ConfigParser import NoOptionError\n+from ConfigParser import NoSectionError\n+from commons.core.checker.CheckerException import CheckerException\n+\n+\n+## A set of static methods used to perform checks.\n+#\n+#\n+class CheckerUtils( object ):\n+ \n+ ## Check if blastName param is in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]\n+ # \n+ # @param blastName name to check\n+ # @return True if name is in list False otherwise\n+ #\n+ def isBlastNameNotInBlastValues( blastName ):\n+ blastValuesSet = set( ["blastn", "blastp", "blastx", "tblastn", "tblastx"] )\n+ blastNameSet = set( [ blastName ] )\n+ return not blastNameSet.issubset( blastValuesSet )\n+ \n+ isBlastNameNotInBlastValues = staticmethod( isBlastNameNotInBlastValues )\n+ \n+ \n+ ## Check if param is NOT "TRUE" and NOT false "FALSE"\n+ #\n+ # @param param str to check\n+ # @return True if param is not eq to "TRUE" AND not eq to "FALSE", false otherwise \n+ #\n+ def isNotTRUEisNotFALSE( param ):\n+ return param != "TRUE" and param != "FALSE"\n+ \n+ isNotTRUEisNotFALSE = staticmethod( isNotTRUEisNotFALSE )\n+ \n+ \n+ ## Check if resource (file or dir) do NOT exists\n+ # \n+ # @param resource file or dir to check\n+ # @return True if resource exists False otherwise\n+ #\n+ def isRessourceNotExits( resource ):\n+ return not os.path.exists( resource )\n+ \n+ isRessourceNotExits = staticmethod( isRessourceNotExits )\n+ \n+ \n+ ## Check a specific E-value format: de-dd \n+ #\n+ # @param param E-value to check\n+ # @return True if format is de-dd False otherwise\n+ #\n+ def isNotAeValueWithOneDigit2DecimalsAtLeast( param ):\n+ # \\d\\d stands for 2 digits and more ???\n+ return not re.match( "\\de\\-\\d\\d", param )\n+ \n+ isNotAeValueWithOneDigit2DecimalsAtLeast = staticmethod( isNotAeValueWithOneDigit2DecimalsAtLeast )\n+ \n+ \n+ ## Check a number format\n+ #\n+ # @param param value to check\n+ # @return True if param is a number (d+) False otherwise\n+ #\n+ def isNotANumber( param ):\n+ return not re.match( "\\d+", param )\n+ \n+ isNotANumber = staticmethod( isNotANumber )\n+ \n+\n+ ## Check if an executable is in the user\'s PATH\n+ #\n+ # @param exeName name of t'..b'me)\n+ \n+ checkSectionInConfigFile = staticmethod( checkSectionInConfigFile )\n+ \n+ \n+ ## Check if an option is in a specified section in the configuration file\n+ #\n+ # @param config filehandle of configuration file\n+ # @param sectionName string of section name\n+ # @param optionName string of option name to check\n+ # @exception NoOptionError: if option not found raise a NoOptionError\n+ #\n+ def checkOptionInSectionInConfigFile( config, sectionName, optionName ):\n+ config.get( sectionName, optionName )\n+ \n+ checkOptionInSectionInConfigFile = staticmethod( checkOptionInSectionInConfigFile )\n+ \n+ \n+ ## Check version number coherency between configFile and CHANGELOG\n+ #\n+ # @param config ConfigParser Instance of configuration file\n+ # @param changeLogFileHandle CHANGELOG file handle\n+ # @exception NoOptionError: if option not found raise a NoOptionError\n+ #\n+ def checkConfigVersion( changeLogFileHandle, config ):\n+ line = changeLogFileHandle.readline()\n+ while not line.startswith("REPET release "):\n+ line = changeLogFileHandle.readline()\n+ numVersionChangeLog = line.split()[2]\n+ \n+ numVersionConfig = config.get("repet_env", "repet_version")\n+ \n+ if not numVersionChangeLog == numVersionConfig:\n+ message = "*** Error: wrong config file version. Expected version num is " + numVersionChangeLog + " but actual in config file is " + numVersionConfig\n+ raise CheckerException(message)\n+ \n+ checkConfigVersion = staticmethod( checkConfigVersion )\n+ \n+ \n+ ## Get version number from CHANGELOG\n+ #\n+ # @param changeLogFile CHANGELOG file name\n+ #\n+ def getVersionFromChangelogFile(changeLogFileName):\n+ with open(changeLogFileName) as changeLogFileHandle:\n+ line = changeLogFileHandle.readline()\n+ while not line.startswith("REPET release "):\n+ line = changeLogFileHandle.readline()\n+ numVersionChangeLog = line.split()[2]\n+ return numVersionChangeLog\n+ \n+ \n+ getVersionFromChangelogFile = staticmethod( getVersionFromChangelogFile )\n+ \n+ \n+ ## Check if headers of an input file contain only alpha numeric characters and "_ : . -"\n+ #\n+ # @param fileHandler file handle\n+ # @exception CheckerException if bad header raise a CheckerException\n+ #\n+ def checkHeaders( fileHandler ):\n+ lHeaders = CheckerUtils._getHeaderFromFastaFile(fileHandler)\n+ p = re.compile(\'[^a-zA-Z0-9_:\\.\\-]\', re.IGNORECASE)\n+ lWrongHeaders = []\n+ for header in lHeaders:\n+ errList=p.findall(header)\n+ if len( errList ) > 0 :\n+ lWrongHeaders.append(header)\n+ if lWrongHeaders != []:\n+ exception = CheckerException()\n+ exception.setMessages(lWrongHeaders)\n+ raise exception\n+ \n+ checkHeaders = staticmethod( checkHeaders ) \n+ \n+ \n+ def _getHeaderFromFastaFile( inFile ):\n+ lHeaders = []\n+ while True:\n+ line = inFile.readline()\n+ if line == "":\n+ break\n+ if line[0] == ">":\n+ lHeaders.append( line[1:-1] )\n+ return lHeaders\n+ \n+ _getHeaderFromFastaFile = staticmethod( _getHeaderFromFastaFile ) \n+\n+\n+ ## Return True if an option is in a specified section in the configuration file, False otherwise\n+ #\n+ # @param config handler of configuration file\n+ # @param sectionName string of section name\n+ # @param optionName string of option name to check\n+ #\n+ def isOptionInSectionInConfig( configHandler, section, option ):\n+ try:\n+ CheckerUtils.checkOptionInSectionInConfigFile( configHandler, section, option ) \n+ except NoOptionError:\n+ return False\n+ return True\n+ \n+ isOptionInSectionInConfig = staticmethod( isOptionInSectionInConfig )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/ConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/ConfigChecker.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,226 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import re\n+import sys\n+from commons.core.utils.RepetConfigParser import RepetConfigParser\n+from commons.core.checker.ConfigValue import ConfigValue\n+from commons.core.checker.IChecker import IChecker\n+from commons.core.checker.RepetException import RepetException\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Rule(object):\n+ \n+ def __init__(self, mandatory= False, isPattern=False, type="", set=(), help =""):\n+ self.mandatory = mandatory\n+ self.isPattern = isPattern\n+ self.type = type\n+ self.set = set\n+ self.help = help\n+ \n+class ConfigRules(object):\n+ \n+ def __init__(self, configName = "", configDescription = ""):\n+ self.configName = configName\n+ self.configDescription = configDescription\n+ self.dRules4Sections={}\n+ \n+ def _addRule(self, section, option="DEFAULT", mandatory=False, isPattern=False, type="", set=(), help =""):\n+ if not self.dRules4Sections.has_key(section):\n+ self.dRules4Sections[section] = {}\n+ self.dRules4Sections[section][option]=Rule(mandatory, isPattern, type.lower(), set) \n+ \n+ def addRuleSection(self, section, mandatory=False, isPattern=False, help = ""):\n+ self._addRule(section = section, option = "DEFAULT", mandatory = mandatory, isPattern = isPattern, help = "")\n+ \n+ def addRuleOption(self, section, option, mandatory=False, isPattern=False, type="", set=(), help = ""):\n+ self._addRule(section = section, option = option, mandatory = mandatory, isPattern = isPattern, type = type, set=set , help = "")\n+ \n+ def isSectionMandatory(self, section):\n+ if self.dRules4Sections.has_key(section):\n+ if self.dRules4Sections[section].has_key("DEFAULT"):\n+ return self.dRules4Sections[section]["DEFAULT"].mandatory\n+ return False\n+ \n+ def isOptionMandatory(self, section, option):\n+ if self.dRules4Sections.has_key(section):\n+ if self.dRules4Sections[section].has_key(option):\n+ return self.dRules4Sections[section][option].mandatory\n+ return False\n+ \n+ def getRule(self, section, option):\n+ if self.dRules4Sections.has_key(section):\n+ if self.dRules4Sections[section].has_key(option):\n+ return self.dRules4Sections[section][option]\n+ '..b'on(sectionName, optionName):\n+ missingOption += "\\n - [%s]: %s" % (sectionName, optionName)\n+ if missingOption != "":\n+ raise RepetException ("Error in configuration file %s, following options are missing: %s\\n" % (self._configFileName, missingOption))\n+ \n+ def getSectionNamesAccordingPatternRules (self, sectionWordOrPattern, isPattern): \n+ lSectionsFoundAccordingPatternRules=[]\n+ if isPattern == False:\n+ if self._iRawConfig.has_section(sectionWordOrPattern):\n+ lSectionsFoundAccordingPatternRules.append(sectionWordOrPattern)\n+ else:\n+ for sectionName in self._iRawConfig.sections():\n+ if re.search(sectionWordOrPattern, sectionName, re.IGNORECASE):\n+ lSectionsFoundAccordingPatternRules.append(sectionName)\n+ return lSectionsFoundAccordingPatternRules\n+ \n+ def getOptionsNamesAccordingPatternRules(self, sectionName, optionWordOrPattern, isPattern):\n+ lOptionsFoundAccordingPatternRules=[]\n+ if isPattern == False:\n+ if self._iRawConfig.has_option(sectionName, optionWordOrPattern):\n+ lOptionsFoundAccordingPatternRules.append(optionWordOrPattern)\n+ else :\n+ for optionName in self._iRawConfig.options(sectionName):\n+ if re.search(optionWordOrPattern, optionName, re.IGNORECASE)!= None:\n+ lOptionsFoundAccordingPatternRules.append(optionName)\n+ return lOptionsFoundAccordingPatternRules\n+ \n+ def extendConfigRulesWithPatternRules(self):\n+ for sectionName in self._iConfigRules.dRules4Sections.keys():\n+ dRules4OptionsOfThisSection = self._iConfigRules.dRules4Sections[sectionName] \n+ lRawSections=[]\n+ if dRules4OptionsOfThisSection.has_key("DEFAULT"):\n+ mandatorySection = dRules4OptionsOfThisSection["DEFAULT"].mandatory\n+ isPatternSection = dRules4OptionsOfThisSection["DEFAULT"].isPattern\n+ lRawSections=self.getSectionNamesAccordingPatternRules(sectionName, isPatternSection)\n+ for rawSectionName in lRawSections:\n+ self._iExtendedConfigRules.addRuleSection(rawSectionName, "DEFAULT", mandatorySection )\n+ if mandatorySection and (len(lRawSections)==0):\n+ self._iExtendedConfigRules.addRuleSection(sectionName, "DEFAULT", mandatorySection )\n+ else:\n+ lRawSections.append(sectionName) \n+ for optionName in dRules4OptionsOfThisSection.keys():\n+ setOption = dRules4OptionsOfThisSection[optionName].set\n+ isPatternOption = dRules4OptionsOfThisSection[optionName].isPattern\n+ mandatoryOption = dRules4OptionsOfThisSection[optionName].mandatory\n+ typeOption = dRules4OptionsOfThisSection[optionName].type\n+ if optionName != "DEFAULT":\n+ for rawSectionName in lRawSections:\n+ lRawOptions=self.getOptionsNamesAccordingPatternRules(rawSectionName, optionName, isPatternOption)\n+ for rawOptionName in lRawOptions:\n+ self._iExtendedConfigRules.addRuleOption(rawSectionName, rawOptionName, mandatoryOption, False, typeOption, setOption)\n+ if mandatoryOption and (len(lRawOptions)==0):\n+ self._iExtendedConfigRules.addRuleOption(rawSectionName, optionName, mandatoryOption, False, typeOption, setOption)\n+ \n+ def getConfig(self):\n+ self.checkIfExistsConfigFile()\n+ iConfig = self.readConfigFile()\n+ self.setRawConfig(iConfig)\n+ self.extendConfigRulesWithPatternRules()\n+ self.checkMandatorySections()\n+ self.checkMandatoryOptions()\n+ self.setConfig(iConfig)\n+ return self._iRawConfig\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/ConfigException.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/ConfigException.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,53 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.checker.RepetException import RepetException
+
+##  A exception raised by check() method of class ConfigChecker
+#
+# This class allow storage of multiple messages (see messages attribute).
+# Example: use one instance of ConfigException class for one section in configuration file.
+# All messages relatives to this section are stored in messages attribute.
+class ConfigException( RepetException ):
+
+    ## Constructor
+    #
+    # @param msg message embedded in Exception class
+    #
+    def __init__(self, msg, messages = []):
+        RepetException.__init__(self, msg)
+        self.messages = messages
+
+    def getMessages(self):
+        return self.messages
+
+    def setMessages(self, messages):
+        self.messages = messages
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/ConfigValue.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/ConfigValue.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,70 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+class ConfigValue(object):
+
+    def __init__(self):
+        self.dOptionsValues4Sections={}
+
+    def has_section(self,sectionName):
+        return self.dOptionsValues4Sections.has_key(sectionName)
+
+    def has_option(self, sectionName, optionName):
+        isOptionExist = False
+        if self.has_section(sectionName):
+            isOptionExist = self.dOptionsValues4Sections[sectionName].has_key(optionName)
+        return isOptionExist
+
+    def sections(self):
+        lSectionsKeys = self.dOptionsValues4Sections.keys()
+        return lSectionsKeys
+
+    def options(self, sectionName):
+        lOptionsKeys = []
+        if self.has_section(sectionName):
+            lOptionsKeys = self.dOptionsValues4Sections[sectionName].keys()
+        return lOptionsKeys
+
+    def get(self, sectionName, optionName):
+        if self.has_option(sectionName, optionName):
+            return self.dOptionsValues4Sections[sectionName][optionName]
+        return None
+
+    def set(self, sectionName, optionName, optionValue):
+        if not (self.has_section(sectionName)):
+            self.dOptionsValues4Sections[sectionName] = {}
+        self.dOptionsValues4Sections[sectionName][optionName] = optionValue
+
+    def setdOptionsValues4Sections(self, dOptionsValues4Sections):
+        self.dOptionsValues4Sections = dOptionsValues4Sections
+
+    def __eq__(self, o):
+        return self.dOptionsValues4Sections == o.dOptionsValues4Sections

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/IChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/IChecker.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,45 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for a checker
+#
+# This class emulates an interface for a checker.
+#
+# All checkers are subclasses of IChecker.
+#
+class IChecker( object ):
+
+    ## perform check, raise a CheckerException if error occurred
+    #
+    # @param arg a collecting parameter: put here all you need to perform check
+    #
+    def check(self, arg=""):
+        pass

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/OldConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/OldConfigChecker.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,101 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import ConfigParser
+from ConfigParser import NoOptionError
+from commons.core.checker.IChecker import IChecker
+from commons.core.checker.ConfigException import ConfigException
+
+
+## A checker for a configuration file
+#
+#
+# A configuration file is formatted as follow:
+#
+# [section1]
+#
+# option_name1: option_value1
+#
+# option_name2: option_value2
+#
+# option_name3: option_value3
+#
+# [section2]
+#
+# ...
+#
+#
+# This class performs 3 checkes on a configuration file:
+#
+# (i) check if file exists
+#
+# (ii) check if section exists
+#
+# (iii) check if option exists
+#
+class ConfigChecker( IChecker ):
+
+    ## Constructor A checker for configuration file.
+    #
+    # @param  sectionName name of section to check in configuration file
+    # @param  optionsDict dictionary with option(s) to check as keys and empty strings ("") as values
+    def __init__ (self, sectionName, optionsDict):
+        self._sectionName = sectionName
+        self._optionsDict = optionsDict
+
+
+    ## Perform 3 checks : file exists, sections exists, option exists
+    #
+    # @param configFile configuration file to check
+    # @exception ConfigException with a list of messages
+    def check (self, configFile):
+        config = ConfigParser.ConfigParser()
+        msg = []
+        try:
+            config.readfp( open(configFile) )
+        except IOError, e:
+            msg.append("CONFIG FILE not found - " + e.message)
+            raise ConfigException("", msg)
+
+        if not (config.has_section(self._sectionName)):
+            msg.append("[" + self._sectionName + "]" + " section not found - ")
+            raise ConfigException("", msg)
+
+        isExceptionOccured = False
+        for key in self._optionsDict.keys():
+            try:
+                self._optionsDict[key] = config.get(self._sectionName, key)
+            except NoOptionError, e:
+                msg.append("[" + self._sectionName + "]" + " - " + e.message)
+                isExceptionOccured = True
+
+        if (isExceptionOccured):
+            raise ConfigException("", msg)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/RepetException.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/RepetException.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,51 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+class RepetException(Exception):
+
+    def __init__(self, msg):
+        Exception.__init__(self)
+        self._message = msg
+
+    def __str__(self):
+        return self._message
+
+    def getMessage(self):
+        return self._message
+
+    def setMessage(self, msg):
+        self._message = msg
+
+
+class RepetDataException(RepetException):
+
+    def __init__(self, msg):
+        RepetException.__init__(self, msg)
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/test/TestSuite_Checker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/TestSuite_Checker.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import sys
+import unittest
+import Test_CheckerUtils
+import Test_ConfigChecker
+
+
+def main():
+
+    TestSuite_Checker = unittest.TestSuite()
+
+    TestSuite_Checker.addTest( unittest.makeSuite( Test_CheckerUtils.Test_CheckerUtils, "test" ) )
+    TestSuite_Checker.addTest( unittest.makeSuite( Test_ConfigChecker.Test_ConfigChecker, "test" ) )
+
+    runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+    runner.run( TestSuite_Checker )
+
+if __name__ == "__main__":
+    main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/test/Test_CheckerUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_CheckerUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,535 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import ConfigParser\n+from commons.core.checker.CheckerUtils import CheckerUtils\n+from commons.core.checker.CheckerException import CheckerException\n+from ConfigParser import NoOptionError\n+from ConfigParser import NoSectionError\n+\n+class Test_CheckerUtils( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self.queueFileName = "queueName.txt"\n+ self.configFileName = "dummyConfig.cfg"\n+ \n+ def tearDown(self):\n+ if os.path.exists(self.queueFileName):\n+ os.remove(self.queueFileName)\n+ if os.path.exists(self.configFileName):\n+ os.remove(self.configFileName)\n+ \n+ def test_isBlastNameInBlastValues( self ):\n+ correctValueList = [ "blastn", "blastp", "blastx", "tblastn", "tblastx" ]\n+ for value in correctValueList:\n+ self.assertFalse( CheckerUtils.isBlastNameNotInBlastValues( value ) )\n+ \n+ incorrectValueList = [ "badbalst", "wublast" ]\n+ for value in incorrectValueList:\n+ self.assertTrue( CheckerUtils.isBlastNameNotInBlastValues( value ) )\n+ \n+ def test_isNotTRUEisNotFALSE( self ):\n+ correctValueList = [ "TRUE", "FALSE" ]\n+ for value in correctValueList:\n+ self.assertFalse( CheckerUtils.isNotTRUEisNotFALSE( value ) )\n+ \n+ incorrectValueList = [ "True", "False" ]\n+ for value in incorrectValueList:\n+ self.assertTrue( CheckerUtils.isNotTRUEisNotFALSE( value ) )\n+ \n+ def test_isRessourceNotExists( self ):\n+ fileName = "dummyFile.txt"\n+ self.assertTrue( CheckerUtils.isRessourceNotExits( fileName ) )\n+ os.system( "touch %s" % ( fileName ) )\n+ self.assertFalse( CheckerUtils.isRessourceNotExits( fileName ) )\n+ os.remove( fileName )\n+ \n+ def test_isNotAeValueWithOneDigit2DecimalsAtLeast( self ):\n+ correctEValueList = [ "5e-32", "7e-45", "1e-2122", "9e-32" ]\n+ for value in correctEValueList:\n+ self.assertFalse( CheckerUtils.isNotAeValueWithOneDigit2DecimalsAtLeast( value ) )\n+ \n+ incorrecEValueStr = [ "10e-32", "2e-3", "2e-2", "1", "cxhhe" ]\n+ for value in incorrecEValueStr:\n+ self.assertTrue( CheckerUtils.isNotAeValueWithOneDigit2DecimalsAtLeast( value ) )\n+ \n+ def test_isNotADigit( self ):\n'..b'CACCTTCAAA\\n")\n+ fastaFileHandler.write(">DmelC:hr4_Blas-ter_Piler_1.0_Map_9\\n")\n+ fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n+ fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n+ fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n+ fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n+ fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n+ fastaFileHandler.close()\n+ \n+ def _writeFastaFile_with_pipe(self, fastaFileName): \n+ fastaFileHandler = open(fastaFileName, "w")\n+ fastaFileHandler.write(">DmelChr4_Blaster_Piler_0.0_Map_3\\n")\n+ fastaFileHandler.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\\n")\n+ fastaFileHandler.write("TTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAGA\\n")\n+ fastaFileHandler.write("GCTGAGAGCGCTACAGCGAACAGCTCTTTTCTACACATAAAGTGATAGCAGACAACTGTA\\n")\n+ fastaFileHandler.write("TGTGTGCACACGTGTGCTCATGCATTGTAAATTTGACAAAATATGCCCTTCACCTTCAAA\\n")\n+ fastaFileHandler.write(">DmelC|hr4_Blas-ter_Piler_1.0_Map_9\\n")\n+ fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n+ fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n+ fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n+ fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n+ fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n+ fastaFileHandler.close()\n+ \n+ def _writeFastaFile_with_equal(self, fastaFileName): \n+ fastaFileHandler = open(fastaFileName, "w")\n+ fastaFileHandler.write(">DmelChr4_Blaster_Piler_0.0_Map_3\\n")\n+ fastaFileHandler.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\\n")\n+ fastaFileHandler.write("TTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAGA\\n")\n+ fastaFileHandler.write("GCTGAGAGCGCTACAGCGAACAGCTCTTTTCTACACATAAAGTGATAGCAGACAACTGTA\\n")\n+ fastaFileHandler.write("TGTGTGCACACGTGTGCTCATGCATTGTAAATTTGACAAAATATGCCCTTCACCTTCAAA\\n")\n+ fastaFileHandler.write(">DmelC:hr4_Blas=ter_Piler_1.0_Map_9\\n")\n+ fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n+ fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n+ fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n+ fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n+ fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n+ fastaFileHandler.close()\n+\n+ def _writeChangeLogFile(self, changeLogFileName ):\n+ changeLogFileHandler = open(changeLogFileName, "w")\n+ changeLogFileHandler.write("ChangeLog of REPET\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.write("REPET release 1.3.6\\n")\n+ changeLogFileHandler.write("(release date XX/XX/2010)\\n")\n+ changeLogFileHandler.write("\\n")\n+ changeLogFileHandler.close()\n+\n+ def _writeConfigFile(self, lineVersion):\n+ configFileHandler = open(self.configFileName, "w")\n+ configFileHandler.write("[repet_env]\\n")\n+ configFileHandler.write(lineVersion)\n+ configFileHandler.write("repet_host: <your_MySQL_host>\\n")\n+ configFileHandler.close()\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_CheckerUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/test/Test_ConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_ConfigChecker.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,569 @@\n+from commons.core.checker.ConfigChecker import ConfigChecker \n+from commons.core.checker.ConfigChecker import ConfigRules\n+from commons.core.checker.RepetException import RepetException\n+import os\n+import unittest\n+\n+class Test_ConfigChecker(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._configFileName = "testConfigChecker.cfg"\n+ self._iMock = MockConfig()\n+ \n+ def test_checkIfExistsConfigFile_file_exist(self):\n+ f=open(self._configFileName, "w")\n+ f.close()\n+ \n+ doesFileExists = True\n+ iConfigRules = ConfigRules()\n+ try:\n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)\n+ iConfigChecker.checkIfExistsConfigFile()\n+ except RepetException:\n+ doesFileExists = False\n+ os.remove(self._configFileName) \n+ self.assertTrue(doesFileExists)\n+ \n+ def test_checkIfExistsConfigFile_file_not_exist(self):\n+ iConfigRules = ConfigRules()\n+ expMsg ="CONFIG FILE not found - \'%s\'" %self._configFileName\n+ doesFileExists = True\n+ try:\n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRules) \n+ iConfigChecker.checkIfExistsConfigFile() \n+ except RepetException, re:\n+ doesFileExists = False\n+ self.assertFalse(doesFileExists)\n+ self.assertEqual(expMsg, re.getMessage())\n+ \n+ def test_readConfigFile(self):\n+ self._iMock.write_config(self._configFileName)\n+ iConfigRules = ConfigRules()\n+ expDictRawConfigValues = {"dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""},\n+ \'analysis1\': {\'description\': \'\',\n+ \'gff_name\': \'BLASTX.gff2\',\n+ \'name\': \'BLASTXWheat2\',\n+ \'program\': \'BLASTX2\',\n+ \'programversion\': \'3.32\',\n+ \'sourcename\': \'dummyDesc_BLASTX2\'}\n+ }\n+ isNoExceptionRaised = True\n+ try: \n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)\n+ iConfig = iConfigChecker.readConfigFile()\n+ iConfigChecker.setRawConfig(iConfig)\n+ obsDictRawConfigValues = iConfigChecker._iRawConfig.dOptionsValues4Sections\n+ except RepetException:\n+ isNoExceptionRaised = False\n+ os.remove(self._configFileName)\n+ self.assertTrue(isNoExceptionRaised)\n+ self.assertEquals(obsDictRawConfigValues, expDictRawConfigValues)\n+ \n+ def test_readConfigFile_section_define_twice(self):\n+ self._iMock.write_case_section_define_twice(self._configFileName)\n+ iConfigRules = ConfigRules()\n+ expMsg = "Duplicate section exist in config file %s" %self._configFileName\n+ expDictRawConfigValues = {"dir_name": {"work_dir":"toto"},\n+ "analysis1" : {"name": "BLASTXWheat2",\n+ "program" : "BLASTX2",\n+ "programversion" : "3.32",\n+ "sourcename" :"dummyDesc_BLASTX2",\n+ "description" : "",\n+ "gff_name" :"BLASTX.gff2"}\n+ }\n+ doesNoExceptionRaised = True\n+ try:\n+ iConfigChecker = ConfigChecker(self._configFileName, iConfigRu'..b' configF.write( "sourcename: dummyDesc_BLASTX\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+ #configuration file with section with option depends on presence of other options\n+ def write_with_one_option_depends_of_an_other_one(self, configFileName ):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto\\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "abbreviation: T.aestivum\\n")\n+ configF.write( "genus: Triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat\\n")\n+ configF.write( "program: BLASTX\\n")\n+ configF.write( "programversion: 3.3\\n")\n+ configF.write( "sourcename: src_BLASTX\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis2]\\n")\n+ configF.write( "name: GMHMMWheat\\n")\n+ configF.write( "program: GMHMM\\n")\n+ configF.write( "programversion: 4.3\\n")\n+ configF.write( "sourcename: src_GMHMM\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: GMHMM.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[target]\\n")\n+ configF.write( "target_used: yes\\n")\n+ configF.write( "target_used_list: target.lst\\n")\n+ configF.close()\n+ \n+ def write_case_pattern_rule(self, configFileName ):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto\\n" ) \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "abbreviation: T.aestivum\\n")\n+ configF.write( "genus: Triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat\\n")\n+ configF.write( "program: BLASTX\\n")\n+ configF.write( "programversion: 3.3\\n")\n+ configF.write( "sourcename: src_BLASTX\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[analysis2]\\n")\n+ configF.write( "name: GMHMMWheat\\n")\n+ configF.write( "program: GMHMM\\n")\n+ configF.write( "programversion: 4.3\\n")\n+ configF.write( "sourcename: src_GMHMM\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: GMHMM.gff\\n")\n+ configF.write( "\\n")\n+ configF.write( "[target]\\n")\n+ configF.write( "target_used: yes\\n")\n+ configF.write( "target_used_list: target.lst\\n")\n+ configF.write( "\\n")\n+ configF.write( "[section_with_option_pattern]\\n")\n+ configF.write( "option1: value1\\n")\n+ configF.write( "option2: value2\\n")\n+ configF.write( "[second_section_with_option_pattern]\\n")\n+ configF.write( "option1: value1\\n")\n+ configF.write( "option2: value2\\n")\n+ configF.close()\n+ \n+ def write_config_case(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "min_SSR_coverage: 0.50\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/test/Test_ConfigValue.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_ConfigValue.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,217 @@\n+import unittest\n+from commons.core.checker.ConfigValue import ConfigValue\n+\n+class Test_ConfigValue(unittest.TestCase):\n+ \n+ def setUp(self):\n+ self._iConfigValue = ConfigValue()\n+ \n+ def test__eq__True(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ iConfigValue1 = ConfigValue() \n+ iConfigValue1.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ self.assertEqual(self._iConfigValue, iConfigValue1)\n+ \n+ def test__eq__False_not_same_section(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organisms" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ iConfigValue1 = ConfigValue() \n+ iConfigValue1.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ self.assertNotEqual(self._iConfigValue, iConfigValue1)\n+ \n+ \n+ def test__eq__False_not_same_option(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "family":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ iConfigValue1 = ConfigValue() \n+ iConfigValue1.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ self.assertNotEqual(self._iConfigValue, iConfigValue1)\n+ \n+ def test__eq__False_not_same_value(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"vitis",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n'..b'+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ \n+ obsOptionExist = self._iConfigValue.has_option("organism","toto")\n+ self.assertFalse(obsOptionExist)\n+ obsOptionExist = self._iConfigValue.has_option("toto","genus")\n+ self.assertFalse(obsOptionExist)\n+\n+ def test_sections(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ expListSections = ["dir_name", "organism"]\n+ obsListSections = self._iConfigValue.sections()\n+ self.assertEquals(expListSections, obsListSections)\n+ \n+ def test_sections_empty_config(self):\n+ self._iConfigValue.dOptionsValues4Sections = {}\n+ expListSections = []\n+ obsListSections = self._iConfigValue.sections()\n+ self.assertEquals(expListSections, obsListSections)\n+\n+ def test_options(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ expListOptions = [\'abbreviation\', \'common_name\', \'genus\', \'species\', \'comment\']\n+ obsListOptions = self._iConfigValue.options("organism")\n+ self.assertEquals(expListOptions, obsListOptions)\n+ \n+ expListOptions = ["work_dir"]\n+ obsListOptions = self._iConfigValue.options("dir_name")\n+ self.assertEquals(expListOptions, obsListOptions)\n+ \n+ def test_options_empty_config(self):\n+ self._iConfigValue.dOptionsValues4Sections = {}\n+ expListOptions = []\n+ obsListOptions = self._iConfigValue.options("toto")\n+ self.assertEquals(expListOptions, obsListOptions)\n+\n+ def test_set(self):\n+ self._iConfigValue.dOptionsValues4Sections = {}\n+ expDictOptionsValue = {"dir_name" : {"work_dir":"toto"}}\n+ self._iConfigValue.set("dir_name", "work_dir", "toto")\n+ obsDictOptionsValue = self._iConfigValue.dOptionsValues4Sections\n+ self.assertEquals(expDictOptionsValue, obsDictOptionsValue)\n+ \n+ def test_get(self):\n+ self._iConfigValue.dOptionsValues4Sections = {\n+ "dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""}\n+ }\n+ expValue = "aestivum"\n+ obsValue = self._iConfigValue.get("organism", "species")\n+ self.assertEquals(expValue, obsValue)\n+ expValue = None\n+ obsValue = self._iConfigValue.get("toto", "species")\n+ self.assertEquals(expValue, obsValue)\n+ expValue = None\n+ obsValue = self._iConfigValue.get("organism", "dummyopt")\n+ self.assertEquals(expValue, obsValue) \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/test/Test_F_ConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_F_ConfigChecker.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,214 @@\n+from commons.core.checker.ConfigChecker import ConfigChecker \n+from commons.core.checker.ConfigChecker import ConfigRules\n+from commons.core.checker.ConfigValue import ConfigValue\n+from commons.core.checker.RepetException import RepetException\n+import unittest\n+import os\n+\n+class Test_F_ConfigChecker(unittest.TestCase):\n+ \n+ #TODO: AJouter test (wrong type, etc..)\n+ def setUp(self):\n+ self._configFileName = "test_conf_checker"\n+ \n+ def tearDown(self):\n+ os.remove(self._configFileName)\n+ \n+ def test_run(self):\n+ iMock = MockConfig()\n+ iMock.write_config(self._configFileName)\n+ \n+ iConfigRules = ConfigRules()\n+ iConfigRules.addRuleSection(section="dir_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="dir_name", option ="work_dir", mandatory=True)\n+ iConfigRules.addRuleSection(section="organism", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="comment")\n+ iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="name", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="program", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="sourcename", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="description")\n+ iConfigRules.addRuleOption(section="analysis", option ="gff_name")\n+ \n+ iConfigChecker = ConfigChecker(self._configFileName,iConfigRules)\n+ \n+ obsValidatedConfig = iConfigChecker.getConfig()\n+ \n+ expValidatedConfig = ConfigValue()\n+ d = {"dir_name" : {"work_dir":"toto"},\n+ "organism" : {"abbreviation":"T.aestivum",\n+ "genus":"triticum",\n+ "species":"aestivum",\n+ "common_name":"wheat",\n+ "comment":""},\n+ \'analysis1\': {\'description\': \'\',\n+ \'gff_name\': \'BLASTX.gff2\',\n+ \'name\': \'BLASTXWheat2\',\n+ \'program\': \'BLASTX2\',\n+ \'programversion\': \'3.32\',\n+ \'sourcename\': \'dummyDesc_BLASTX2\'}\n+ }\n+ expValidatedConfig.setdOptionsValues4Sections(d)\n+ \n+ self.assertEquals(expValidatedConfig, obsValidatedConfig)\n+ \n+ \n+ def test_run_exception_section_missing(self):\n+ iMock = MockConfig()\n+ iMock.write_config_section_missing(self._configFileName)\n+ \n+ iConfigRules = ConfigRules()\n+ iConfigRules.addRuleSection(section="dir_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="dir_name", option ="work_dir", mandatory=True)\n+ iConfigRules.addRuleSection(section="organism", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="comment")\n+ iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n+ iConfigRules.addRuleOption('..b'on ="work_dir", mandatory=True)\n+ iConfigRules.addRuleSection(section="organism", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n+ iConfigRules.addRuleOption(section="organism", option ="comment")\n+ iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="name", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="program", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="sourcename", mandatory=True)\n+ iConfigRules.addRuleOption(section="analysis", option ="description")\n+ iConfigRules.addRuleOption(section="analysis", option ="gff_name")\n+ \n+ iConfigChecker = ConfigChecker(self._configFileName,iConfigRules)\n+ \n+ expMessage = "Error in configuration file %s, following options are missing: \\n - [organism]: abbreviation\\n"% self._configFileName\n+ \n+ try :\n+ obsValidatedConfig = iConfigChecker.getConfig()\n+ except RepetException, e:\n+ obsMessage = e.getMessage()\n+\n+ self.assertEquals(expMessage, obsMessage)\n+ \n+class MockConfig (object):\n+ \n+ def write_config(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "abbreviation: T.aestivum\\n")\n+ configF.write( "genus: triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat2\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "programversion: 3.32\\n")\n+ configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff2\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+ def write_config_section_missing(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat2\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "programversion: 3.32\\n")\n+ configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff2\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+ def write_config_option_missing(self, configFileName):\n+ configF = open(configFileName, "w" )\n+ configF.write( "[dir_name]\\n")\n+ configF.write( "work_dir : toto \\n") \n+ configF.write( "\\n")\n+ configF.write( "[organism]\\n")\n+ configF.write( "genus: triticum\\n")\n+ configF.write( "species: aestivum\\n")\n+ configF.write( "common_name: wheat\\n")\n+ configF.write( "comment: \\n")\n+ configF.write( "[analysis1]\\n")\n+ configF.write( "name: BLASTXWheat2\\n")\n+ configF.write( "program: BLASTX2\\n")\n+ configF.write( "programversion: 3.32\\n")\n+ configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n+ configF.write( "description: \\n")\n+ configF.write( "gff_name: BLASTX.gff2\\n")\n+ configF.write( "\\n")\n+ configF.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/checker/test/Test_OldConfigChecker.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/checker/test/Test_OldConfigChecker.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,104 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import os
+from commons.core.checker.OldConfigChecker import ConfigChecker
+from commons.core.checker.ConfigException import ConfigException
+
+class Test_ConfigChecker( unittest.TestCase ):
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+
+    def testFileNotFound(self):
+        exceptionExpected = None
+        configChecker = ConfigChecker("",{})
+        try :
+            configChecker.check("noExistsFile.cfg")
+        except ConfigException, ce:
+            exceptionExpected = ce
+
+        self.assertTrue(exceptionExpected != None)
+        msg = exceptionExpected.messages[0]
+        self.assertTrue(msg.startswith("CONFIG FILE not found - "))
+
+
+    def testNoSectionInConfigFile (self):
+        exceptionExpected = None
+        dummyFile = open("dummyFile.cfg", "w")
+        configChecker = ConfigChecker("dummySection",{})
+        try :
+            configChecker.check("dummyFile.cfg")
+        except ConfigException, ce:
+            exceptionExpected = ce
+
+        self.assertTrue(exceptionExpected != None)
+        msg = exceptionExpected.messages[0]
+        self.assertTrue(msg.startswith("[dummySection]" + " section not found - "))
+
+        os.remove("dummyFile.cfg")
+
+
+    def testNoOptionInConfigFile (self):
+        exceptionExpected = None
+        MockConfigFile("dummyConfig.cfg",{})
+        configChecker = ConfigChecker("blaster_config",{"dummy":""})
+        try :
+            configChecker.check("dummyConfig.cfg")
+        except ConfigException, ce:
+            exceptionExpected = ce
+
+        self.assertTrue(exceptionExpected != None)
+        msg = exceptionExpected.messages[0]
+        self.assertTrue(msg.startswith("[blaster_config] - No option 'dummy' in section: 'blaster_config'"))
+        os.remove("dummyConfig.cfg")
+
+
+class MockConfigFile:
+
+    def __init__ (self, fileName, optionsDict):
+        self._fileName = fileName
+        config = open(fileName, "w");
+        config.write("[blaster_config]\n")
+        for key in optionsDict.keys():
+            config.write(key + ":" + optionsDict[key] + "\n")
+        config.close()
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_ConfigChecker ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/Align.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,428 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import time\n+\n+from commons.core.coord.Range import Range\n+from commons.core.coord.Map import Map\n+\n+\n+## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity)\n+#\n+class Align( object ):\n+ \n+ ## Constructor\n+ #\n+ # @param range_q: a Range instance for the query\n+ # @param range_s: a Range instance for the subject\n+ # @param e_value: E-value of the match \n+ # @param identity: identity percentage of the match\n+ # @param score: score of the match\n+ #\n+ def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0):\n+ self.range_query = range_q\n+ self.range_subject = range_s\n+ self.e_value = float(e_value)\n+ self.score = float(score)\n+ self.identity = float(identity)\n+ \n+ ## Return True if the instance is empty, False otherwise\n+ #\n+ def isEmpty(self):\n+ return self.range_query.isEmpty() or self.range_subject.isEmpty()\n+ \n+ ## Equal operator\n+ #\n+ def __eq__(self, o):\n+ if self.range_query==o.range_query and self.range_subject==o.range_subject and \\\n+ self.e_value==o.e_value and self.score==o.score and self.identity==o.identity:\n+ return True\n+ return False\n+ \n+ ## Unequal operator\n+ #\n+ # @param o a Range instance\n+ #\n+ def __ne__(self, o):\n+ return not self.__eq__(o)\n+ \n+ ## Convert the object into a string\n+ #\n+ # @note used in \'print myObject\'\n+ #\n+ def __str__( self ):\n+ return self.toString()\n+ \n+ ## Read attributes from an Align file\n+ # \n+ # @param fileHandler: file handler of the file being read\n+ # @return: 1 on success, 0 at the end of the file \n+ #\n+ def read(self, fileHandler):\n+ self.reset()\n+ line = fileHandler.readline()\n+ if line == "":\n+ return 0\n+ tokens = line.split("\\t")\n+ if len(tokens) < len(self.__dict__.keys()):\n+ return 0\n+ self.setFromTuple(tokens)\n+ return 1\n+ \n+ ## Set attributes from tuple\n+ #\n+ # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)\n+ # @note data are loaded such that the query is always on the direct strand\n+ #\n+ def setFromTuple( self, tuple ):\n+ #'..b' self.identity = max(self.identity,o.identity)\n+ \n+ ## Return a Map instance with the subject mapped on the query\n+ #\n+ def getSubjectAsMapOfQuery(self):\n+ iMap = Map()\n+ iMap.name = self.range_subject.seqname\n+ iMap.seqname = self.range_query.seqname\n+ if self.range_subject.isOnDirectStrand():\n+ iMap.start = self.range_query.start\n+ iMap.end = self.range_query.end\n+ else:\n+ iMap.start = self.range_query.end\n+ iMap.end = self.range_query.start\n+ return iMap\n+ \n+ ## Return True if query is on direct strand\n+ #\n+ def isQueryOnDirectStrand( self ):\n+ return self.range_query.isOnDirectStrand()\n+ \n+ ## Return True if subject is on direct strand\n+ #\n+ def isSubjectOnDirectStrand( self ):\n+ return self.range_subject.isOnDirectStrand()\n+ \n+ ## Return True if query and subject are on the same strand, False otherwise\n+ #\n+ def areQrySbjOnSameStrand(self):\n+ return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()\n+ \n+ ## Return False if query and subject are on the same strand, True otherwise\n+ #\n+ def areQrySbjOnOppositeStrands(self):\n+ return not self.areQrySbjOnSameStrand()\n+\n+ ## Set attributes from string\n+ #\n+ # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity\n+ # @param sep field separator\n+ #\n+ def setFromString(self, string, sep="\\t"):\n+ if string[-1] == "\\n":\n+ string = string[:-1]\n+ self.setFromTuple( string.split(sep) )\n+ \n+ ## Return a first Map instance for the query and a second for the subject\n+ #\n+ def getMapsOfQueryAndSubject(self):\n+ iMapQuery = Map( name="repet",\n+ seqname=self.range_query.seqname,\n+ start=self.range_query.start,\n+ end=self.range_query.end )\n+ iMapSubject = Map( name="repet",\n+ seqname=self.range_subject.seqname,\n+ start=self.range_subject.start,\n+ end=self.range_subject.end )\n+ return iMapQuery, iMapSubject\n+ \n+ ## Write query coordinates as Map in a file\n+ #\n+ # @param fileHandler: file handler of the file being filled\n+ #\n+ def writeSubjectAsMapOfQuery( self, fileHandler ):\n+ m = self.getSubjectAsMapOfQuery()\n+ m.write( fileHandler )\n+ \n+ ## Return a bin for fast database access\n+ #\n+ def getBin(self):\n+ return self.range_query.getBin()\n+ \n+ ## Switch query and subject\n+ #\n+ def switchQuerySubject( self ):\n+ tmpRange = self.range_query\n+ self.range_query = self.range_subject\n+ self.range_subject = tmpRange\n+ if not self.isQueryOnDirectStrand():\n+ self.reverse()\n+ \n+ ## Return True if the query overlaps with the query of another Align instance, False otherwise\n+ #\n+ def isQueryOverlapping( self, iAlign ):\n+ return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )\n+ \n+ ## Return True if the subject overlaps with the subject of another Align instance, False otherwise\n+ #\n+ def isSubjectOverlapping( self, iAlign ):\n+ return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )\n+ \n+ ## Return True if the Align instance overlaps with another Align instance, False otherwise\n+ #\n+ def isOverlapping( self, iAlign ):\n+ if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):\n+ return True\n+ else:\n+ return False\n+ \n+ ## Update the score\n+ #\n+ # @note the new score is the length on the query times the percentage of identity\n+ #\n+ def updateScore( self ):\n+ newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0\n+ self.score = newScore\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Align.pyc

Binary file commons/core/coord/Align.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/AlignUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/AlignUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,359 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import shutil\n+from commons.core.coord.Align import Align\n+\n+\n+## Static methods manipulating Align instances\n+#\n+class AlignUtils( object ):\n+ \n+ ## Return a list with Align instances from the given file\n+ #\n+ # @param inFile name of a file in the Align format\n+ #\n+ def getAlignListFromFile( inFile ):\n+ lAlignInstances = []\n+ inFileHandler = open( inFile, "r" )\n+ while True:\n+ line = inFileHandler.readline()\n+ if line == "":\n+ break\n+ a = Align()\n+ a.setFromString( line )\n+ lAlignInstances.append( a )\n+ inFileHandler.close()\n+ return lAlignInstances\n+\n+ getAlignListFromFile = staticmethod( getAlignListFromFile )\n+ \n+ \n+ ## Return a list with all the scores\n+ #\n+ # @param lAlignInstances: list of Align instances\n+ #\n+ def getListOfScores( lAlignInstances ):\n+ lScores = []\n+ for iAlign in lAlignInstances:\n+ lScores.append( iAlign.score )\n+ return lScores\n+ \n+ getListOfScores = staticmethod( getListOfScores )\n+\n+ \n+ ## Return a list with all the scores from the given file\n+ #\n+ # @param inFile name of a file in the Align format\n+ #\n+ def getScoreListFromFile(inFile):\n+ lScores = []\n+ append = lScores.append\n+ with open(inFile, "r") as inFileHandler:\n+ line = inFileHandler.readline()\n+ while line:\n+ if line != "\\n":\n+ append(int(line.split(\'\\t\')[7]))\n+ line = inFileHandler.readline()\n+ return lScores\n+ \n+ getScoreListFromFile = staticmethod( getScoreListFromFile )\n+ \n+ \n+ ## for each line of a given Align file, write the coordinates on the query and the subject as two distinct lines in a Map file\n+ #\n+ # @param alignFile: name of the input Align file\n+ # @param mapFile: name of the output Map file\n+ #\n+ def convertAlignFileIntoMapFileWithQueriesAndSubjects( alignFile, mapFile ):\n+ alignFileHandler = open( alignFile, "r" )\n+ mapFileHandler = open( mapFile, "w" )\n+ iAlign = Align()\n+ while True:\n+ line = alignFileHandler.readline()\n+ if line == "":\n+ break\n+ iAlign.setFromString( line )\n+ iMapQ, iMap'..b'Dir)\n+ \n+ createAlignFiles = staticmethod( createAlignFiles )\n+ \n+ \n+ ## Return a list with Align instances sorted by query name, subject name, query start, query end and score\n+ #\n+ def sortList( lAligns ):\n+ return sorted( lAligns, key=lambda iAlign: ( iAlign.getQueryName(),\n+ iAlign.getSubjectName(),\n+ iAlign.getQueryStart(),\n+ iAlign.getQueryEnd(),\n+ iAlign.getScore() ) )\n+ \n+ sortList = staticmethod( sortList )\n+ \n+ \n+ ## Return a list after merging all overlapping Align instances\n+ #\n+ def mergeList( lAligns ):\n+ lMerged = []\n+ \n+ lSorted = AlignUtils.sortList( lAligns )\n+ \n+ prev_count = 0\n+ for iAlign in lSorted:\n+ if prev_count != len(lSorted):\n+ for i in lSorted[ prev_count + 1: ]:\n+ if iAlign.isOverlapping( i ):\n+ iAlign.merge( i )\n+ IsAlreadyInList = False\n+ for newAlign in lMerged:\n+ if newAlign.isOverlapping( iAlign ):\n+ IsAlreadyInList = True\n+ newAlign.merge( iAlign )\n+ lMerged [ lMerged.index( newAlign ) ] = newAlign\n+ if not IsAlreadyInList:\n+ lMerged.append( iAlign )\n+ prev_count += 1\n+ \n+ return lMerged\n+ \n+ mergeList = staticmethod( mergeList )\n+ \n+ \n+ ## Merge all Align instance in a given Align file\n+ #\n+ def mergeFile( inFile, outFile="" ):\n+ if outFile == "":\n+ outFile = "%s.merged" % ( inFile )\n+ if os.path.exists( outFile ):\n+ os.remove( outFile )\n+ \n+ tmpFile = "%s.sorted" % ( inFile )\n+ AlignUtils.sortAlignFile( inFile, tmpFile )\n+ \n+ tmpF = open( tmpFile, "r" )\n+ dQrySbj2Aligns = {}\n+ prevPairQrySbj = ""\n+ while True:\n+ line = tmpF.readline()\n+ if line == "":\n+ break\n+ iAlign = Align()\n+ iAlign.setFromString( line )\n+ pairQrySbj = "%s_%s" % ( iAlign.getQueryName(), iAlign.getSubjectName() )\n+ if not dQrySbj2Aligns.has_key( pairQrySbj ):\n+ if prevPairQrySbj != "":\n+ lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )\n+ AlignUtils.writeListInFile( lMerged, outFile, "a" )\n+ del dQrySbj2Aligns[ prevPairQrySbj ]\n+ prevPairQrySbj = pairQrySbj\n+ else:\n+ prevPairQrySbj = pairQrySbj\n+ dQrySbj2Aligns[ pairQrySbj ] = []\n+ dQrySbj2Aligns[ pairQrySbj ].append( iAlign )\n+ lMerged = []\n+ if len(dQrySbj2Aligns.keys()) > 0:\n+ lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )\n+ AlignUtils.writeListInFile( lMerged, outFile, "a" )\n+ tmpF.close()\n+ os.remove( tmpFile )\n+ \n+ mergeFile = staticmethod( mergeFile )\n+\n+\n+ ## Update the scores of each match in the input file\n+ #\n+ # @note the new score is the length on the query times the percentage of identity\n+ #\n+ def updateScoresInFile( inFile, outFile ):\n+ inHandler = open( inFile, "r" )\n+ outHandler = open( outFile, "w" )\n+ iAlign = Align()\n+ \n+ while True:\n+ line = inHandler.readline()\n+ if line == "":\n+ break\n+ iAlign.reset()\n+ iAlign.setFromString( line, "\\t" )\n+ iAlign.updateScore()\n+ iAlign.write( outHandler )\n+ \n+ inHandler.close()\n+ outHandler.close()\n+ \n+ updateScoresInFile = staticmethod( updateScoresInFile )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/ConvCoord.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/ConvCoord.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,504 @@\n+#!/usr/bin/env python\n+\n+##@file\n+# Convert coordinates from chunks to chromosomes or the opposite.\n+#\n+# usage: ConvCoord.py [ options ]\n+# options:\n+# -h: this help\n+# -i: input data with coordinates to convert (file or table)\n+# -f: input data format (default=\'align\'/\'path\')\n+# -c: coordinates to convert (query, subject or both; default=\'q\'/\'s\'/\'qs\')\n+# -m: mapping of chunks on chromosomes (format=\'map\')\n+# -x: convert from chromosomes to chunks (opposite by default)\n+# -o: output data (file or table, same as input)\n+# -C: configuration file (for database connection)\n+# -v: verbosity level (default=0/1/2)\n+\n+\n+import os\n+import sys\n+import getopt\n+import time\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.coord.MapUtils import MapUtils\n+from commons.core.sql.TableMapAdaptator import TableMapAdaptator\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Range import Range\n+\n+\n+## Class to handle coordinate conversion\n+#\n+class ConvCoord( object ):\n+ \n+ ## Constructor\n+ #\n+ def __init__( self, inData="", mapData="", outData="", configFile="", verbosity=0):\n+ self._inData = inData\n+ self._formatInData = "align"\n+ self._coordToConvert = "q"\n+ self._mapData = mapData\n+ self._mergeChunkOverlaps = True\n+ self._convertChunks = True\n+ self._outData = outData\n+ self._configFile = configFile\n+ self._verbose = verbosity\n+ self._typeInData = "file"\n+ self._typeMapData = "file"\n+ self._tpa = None\n+ if self._configFile != "" and os.path.exists(self._configFile):\n+ self._iDb = DbFactory.createInstance(self._configFile)\n+ else:\n+ self._iDb = DbFactory.createInstance()\n+ \n+ \n+ ## Display the help on stdout\n+ #\n+ def help( self ):\n+ print\n+ print "usage: ConvCoord.py [ options ]"\n+ print "options:"\n+ print " -h: this help"\n+ print " -i: input data with coordinates to convert (file or table)"\n+ print " -f: input data format (default=\'align\'/\'path\')"\n+ print " -c: coordinates to convert (query, subject or both; default=\'q\'/\'s\'/\'qs\')"\n+ print " -m: mapping of chunks on chromosomes (format=\'map\')"\n+ print " -M: merge chunk overlaps (default=yes/no)"\n+ print " -x: convert from chromosomes to chunks (opposite by default)"\n+ print " -o: output data (file or table, same as input)"\n+ print " -C: configuration file (for database connection)"\n+ print " -v: verbosity level (default=0/1/2)"\n+ print\n+ \n+ \n+ ## Set the attributes from the command-line\n+ #\n+ def setAttributesFromCmdLine( self ):\n+ try:\n+ opts, args = getopt.getopt(sys.argv[1:],"hi:f:c:m:M:xo:C:v:")\n+ except getopt.GetoptError, err:\n+ sys.stderr.write( "%s\\n" % ( str(err) ) )\n+ self.help(); sys.exit(1)\n+ for o,a in opts:\n+ if o == "-h":\n+ self.help(); sys.exit(0)\n+ elif o == "-i":\n+ self.setInputData( a )\n+ elif o == "-f":\n+ self.setInputFormat( a )\n+ elif o == "-c":\n+ self.setCoordinatesToConvert( a )\n+ elif o == "-m":\n+ self.setMapData( a )\n+ elif o == "-M":\n+ self.setMergeChunkOverlaps( a )\n+ elif o == "-o":\n+ self.setOutputData( a )\n+ elif o == "-C":\n+ self.setConfigFile( a )\n+ elif o == "-v":\n+ self.setVerbosityLevel( a )\n+ \n+ \n+ def setInputData( self, inData ):\n+ self._inData = inData\n+ '..b'ile( tmpPathTable, tmpPathTable, False )\n+ self._iDb.dropTable( tmpPathTable )\n+ if self._formatInData == "align":\n+ PathUtils.convertPathFileIntoAlignFile( tmpPathTable, outFile )\n+ os.remove( tmpPathTable )\n+ elif self._formatInData == "path":\n+ os.rename( tmpPathTable, outFile )\n+ \n+ \n+ def saveChrCoordsAsTable( self, tmpPathTable, outTable ):\n+ if self._formatInData == "align":\n+ self._iDb.convertPathTableIntoAlignTable( tmpPathTable, outTable )\n+ self._iDb.dropTable( tmpPathTable )\n+ elif self._formatInData == "path":\n+ self._iDb.renameTable( tmpPathTable, outTable )\n+ \n+ \n+ ## Convert coordinates from chunks to chromosomes\n+ #\n+ def convertCoordinatesFromChunksToChromosomes( self ):\n+ dChunks2CoordMaps = self.getChunkCoordsOnChromosomes()\n+ \n+ if self._typeInData == "file":\n+ tmpPathTable = self.convCoordsChkToChrFromFile( self._inData, self._formatInData, dChunks2CoordMaps )\n+ elif self._typeInData == "table":\n+ tmpPathTable = self.convCoordsChkToChrFromTable( self._inData, self._formatInData, dChunks2CoordMaps )\n+ \n+ if self._mergeChunkOverlaps:\n+ self.mergeCoordsOnChunkOverlaps( dChunks2CoordMaps, tmpPathTable );\n+ \n+ if self._typeInData == "file":\n+ self.saveChrCoordsAsFile( tmpPathTable, self._outData )\n+ elif self._typeInData == "table":\n+ self.saveChrCoordsAsTable( tmpPathTable, self._outData )\n+ \n+ \n+ ## Convert coordinates from chromosomes to chunks\n+ #\n+ def convertCoordinatesFromChromosomesToChunks( self ):\n+ msg = "ERROR: convert coordinates from chromosomes to chunks not yet available"\n+ sys.stderr.write( "%s\\n" % ( msg ) )\n+ sys.exit(1)\n+ \n+ \n+ ## Useful commands before running the program\n+ #\n+ def start( self ):\n+ self.checkAttributes()\n+ if self._verbose > 0:\n+ msg = "START ConvCoord.py (%s)" % ( time.strftime("%m/%d/%Y %H:%M:%S") )\n+ msg += "\\ninput data: %s" % ( self._inData )\n+ if self._typeInData == "file":\n+ msg += " (file)\\n"\n+ else:\n+ msg += " (table)\\n"\n+ msg += "format: %s\\n" % ( self._formatInData )\n+ msg += "coordinates to convert: %s\\n" % ( self._coordToConvert )\n+ msg += "mapping data: %s" % ( self._mapData )\n+ if self._typeMapData == "file":\n+ msg += " (file)\\n"\n+ else:\n+ msg += " (table)\\n"\n+ if self._mergeChunkOverlaps:\n+ msg += "merge chunk overlaps\\n"\n+ else:\n+ msg += "don\'t merge chunk overlaps\\n"\n+ if self._convertChunks:\n+ msg += "convert chunks to chromosomes\\n"\n+ else:\n+ msg += "convert chromosomes to chunks\\n"\n+ msg += "output data: %s" % ( self._outData )\n+ if self._typeInData == "file":\n+ msg += " (file)\\n"\n+ else:\n+ msg += " (table)\\n"\n+ sys.stdout.write( msg )\n+ \n+ \n+ ## Useful commands before ending the program\n+ #\n+ def end( self ):\n+ self._iDb.close()\n+ if self._verbose > 0:\n+ msg = "END ConvCoord.py (%s)" % ( time.strftime("%m/%d/%Y %H:%M:%S") )\n+ sys.stdout.write( "%s\\n" % ( msg ) )\n+ \n+ \n+ ## Run the program\n+ #\n+ def run( self ):\n+ self.start()\n+ \n+ if self._convertChunks:\n+ self.convertCoordinatesFromChunksToChromosomes()\n+ else:\n+ self.convertCoordinatesFromChromosomesToChunks()\n+ \n+ self.end()\n+ \n+ \n+if __name__ == "__main__":\n+ i = ConvCoord()\n+ i.setAttributesFromCmdLine()\n+ i.run()\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/Map.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,161 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.coord.Range import Range
+
+
+## Record a named region on a given sequence
+#
+class Map( Range ):
+
+    ## Constructor
+    #
+    # @param name the name of the region
+    # @param seqname the name of the sequence
+    # @param start the start coordinate
+    # @param end the end coordinate
+    #
+    def __init__(self, name="", seqname="", start=-1, end=-1):
+        self.name = name
+        Range.__init__( self, seqname, start, end )
+
+    ## Equal operator
+    #
+    # @param o a Map instance
+    #
+    def __eq__(self, o):
+        if self.name == o.name:
+            return Range.__eq__(self, o)
+        return False
+
+    ## Return name
+    #
+    def getName( self ):
+        return self.name
+
+    ## Set attributes from tuple
+    #
+    # @param tuple: a tuple with (name,seqname,start,end)
+    #
+    def setFromTuple(self, tuple):
+        self.name = tuple[0]
+        Range.setFromTuple(self, tuple[1:])
+
+    ## Set attributes from string
+    #
+    # @param string a string formatted like name<sep>seqname<sep>start<sep>end
+    # @param sep field separator
+    #
+    def setFromString(self, string, sep="\t"):
+        if string[-1] == "\n":
+            string = string[:-1]
+        self.setFromTuple( string.split(sep) )
+
+    ## Reset
+    #
+    def reset(self):
+        self.setFromTuple( [ "", "", -1, -1 ] )
+
+    ## Read attributes from a Map file
+    #
+    # @param fileHandler: file handler of the file being read
+    # @return: 1 on success, 0 at the end of the file
+    #
+    def read(self, fileHandler):
+        self.reset()
+        line = fileHandler.readline()
+        if line == "":
+            return 0
+        tokens = line.split("\t")
+        if len(tokens) < len(self.__dict__.keys()):
+            return 0
+        self.setFromTuple(tokens)
+        return 1
+
+    ## Return the attributes as a formatted string
+    #
+    def toString(self):
+        string = "%s" % (self.name)
+        string += "\t%s" % (Range.toString(self))
+        return string
+
+    ## Write attributes into a Map file
+    #
+    # @param fileHandler: file handler of the file being filled
+    #
+    def write(self, fileHandler):
+        fileHandler.write("%s\n" % (self.toString()))
+
+    ## Save attributes into a Map file
+    #
+    # @param file: name of the file being filled
+    #
+    def save(self, file):
+        fileHandler = open( file, "a" )
+        self.write( fileHandler )
+        fileHandler.close()
+
+    ## Return a Range instance with the attributes
+    #
+    def getRange(self):
+        return Range( self.seqname, self.start, self.end)
+
+    ## Remove in the instance the region overlapping with another Map instance
+    #
+    # @param o a Map instance
+    #
+    def diff(self, o):
+        iRange = Range.diff(self, o.getRange())
+        new = Map()
+        if not iRange.isEmpty():
+            new.name = self.name
+            new.seqname = self.seqname
+            new.start = iRange.start
+            new.end = iRange.end
+        return new
+
+    ## Write attributes in a Path file, the name being the subject and the rest the Range query
+    #
+    # @param fileHandler: file handler of a Path file
+    #
+    def writeAsQueryOfPath(self, fileHandler):
+        string = "0"
+        string += "\t%s" % ( self.seqname )
+        string += "\t%i" % ( self.getMin() )
+        string += "\t%i" % ( self.getMax() )
+        string += "\t%s" % ( self.name )
+        string += "\t0"
+        string += "\t0"
+        string += "\t0.0"
+        string += "\t0"
+        string += "\t0"
+        fileHandler.write( "%s\n" % ( string ) )
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Map.pyc

Binary file commons/core/coord/Map.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/MapUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/MapUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,246 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import sys\n+import os\n+from commons.core.coord.Map import Map\n+from commons.core.coord.Set import Set\n+try:\n+ from commons.core.checker.CheckerUtils import CheckerUtils\n+except ImportError:\n+ pass\n+\n+\n+## static methods manipulating Map instances\n+#\n+class MapUtils( object ):\n+ \n+ ## Return a list of Map instances sorted in increasing order according to the min, then the max, and finally their initial order\n+ #\n+ # @param lMaps list of Map instances\n+ #\n+ def getMapListSortedByIncreasingMinThenMax( lMaps ):\n+ return sorted( lMaps, key=lambda iMap: ( iMap.getMin(), iMap.getMax() ) ) \n+ \n+ getMapListSortedByIncreasingMinThenMax = staticmethod( getMapListSortedByIncreasingMinThenMax )\n+ \n+ \n+ ## Return a list of Map instances sorted in increasing order according to the name, then the seqname, then the min, then the max\n+ #\n+ # @param lMaps list of Map instances\n+ #\n+ def getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax( lMaps ):\n+ return sorted( lMaps, key=lambda iMap: ( iMap.getName(), iMap.getSeqname(), iMap.getMin(), iMap.getMax() ) ) \n+ \n+ getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax = staticmethod( getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax )\n+ \n+ \n+ ## Return a dictionary which keys are Map names and values the corresponding Map instances\n+ #\n+ def getDictPerNameFromMapFile( mapFile ):\n+ dName2Maps = {}\n+ mapFileHandler = open( mapFile, "r" )\n+ while True:\n+ line = mapFileHandler.readline()\n+ if line == "":\n+ break\n+ iMap = Map()\n+ iMap.setFromString( line, "\\t" )\n+ if dName2Maps.has_key( iMap.name ):\n+ if iMap == dName2Maps[ iMap.name ]:\n+ continue\n+ else:\n+ msg = "ERROR: in file \'%s\' two different Map instances have the same name \'%s\'" % ( mapFile, iMap.name )\n+ sys.stderr.write( "%s\\n" % ( msg ) )\n+ sys.exit(1)\n+ dName2Maps[ iMap.name ] = iMap\n+ mapFileHandler.close()\n+ return dName2Maps\n+ \n+ getDictPerNameFromMapFile = staticmethod( getDictPerNameFromMapFile )\n+\n+ \n+ ## Give a list of Set instances from a list of Map instances\n+ #\n+ # @param lMaps list of Map '..b's.rename( "%s.merge" % inFile,\n+ outFile )\n+ \n+ mergeCoordsInFile = staticmethod( mergeCoordsInFile )\n+ \n+ \n+ ## Return a dictionary which keys are Map seqnames and values the corresponding Map instances\n+ #\n+ def getDictPerSeqNameFromMapFile( mapFile ):\n+ dSeqName2Maps = {}\n+ mapFileHandler = open( mapFile, "r" )\n+ while True:\n+ line = mapFileHandler.readline()\n+ if line == "":\n+ break\n+ iMap = Map()\n+ iMap.setFromString( line, "\\t" )\n+ if not dSeqName2Maps.has_key( iMap.seqname ):\n+ dSeqName2Maps[ iMap.seqname ] = []\n+ dSeqName2Maps[ iMap.seqname ].append( iMap )\n+ mapFileHandler.close()\n+ return dSeqName2Maps\n+ \n+ getDictPerSeqNameFromMapFile = staticmethod( getDictPerSeqNameFromMapFile )\n+ \n+ \n+ ## Convert an Map file into a Set file\n+ #\n+ # @param mapFile string input map file name\n+ # @param setFile string output set file name\n+ #\n+ def convertMapFileIntoSetFile( mapFileName, setFileName = "" ):\n+ if setFileName == "":\n+ setFileName = "%s.set" % mapFileName\n+ mapFileHandler = open( mapFileName, "r" )\n+ setFileHandler = open( setFileName, "w" )\n+ iMap = Map()\n+ count = 0\n+ while True:\n+ line = mapFileHandler.readline()\n+ if line == "":\n+ break\n+ iMap.setFromString(line)\n+ count += 1\n+ iSet = Set()\n+ iSet.id = count\n+ iSet.name = iMap.getName()\n+ iSet.seqname = iMap.getSeqname()\n+ iSet.start = iMap.getStart()\n+ iSet.end = iMap.getEnd()\n+ iSet.write(setFileHandler)\n+ mapFileHandler.close()\n+ setFileHandler.close()\n+ \n+ convertMapFileIntoSetFile = staticmethod( convertMapFileIntoSetFile )\n+ \n+ ## Write Map instances contained in the given list\n+ #\n+ # @param lMaps list of Map instances\n+ # @param fileName a file name\n+ # @param mode the open mode of the file \'"w"\' or \'"a"\' \n+ #\n+ def writeListInFile(lMaps, fileName, mode="w"):\n+ fileHandler = open(fileName, mode)\n+ for iMap in lMaps:\n+ iMap.write(fileHandler)\n+ fileHandler.close()\n+ \n+ writeListInFile = staticmethod( writeListInFile )\n+\n+ \n+ ## Get the length of the shorter seq in map file\n+ #\n+ # @param mapFileName\n+ # @param mode the open mode of the file \'"w"\' or \'"a"\' \n+ #\n+ def getMinLengthOfMapFile(self, mapFileName):\n+ fileHandler = open(mapFileName, "r")\n+ line = fileHandler.readline()\n+ start = int (line.split(\'\\t\')[2])\n+ end = int (line.split(\'\\t\')[3])\n+ min = end - start + 1\n+ while True:\n+ line = fileHandler.readline()\n+ if line == "":\n+ break\n+ start = int (line.split(\'\\t\')[2])\n+ end = int (line.split(\'\\t\')[3])\n+ currentMin = end - start + 1\n+ if min >= currentMin:\n+ min = currentMin\n+ fileHandler.close()\n+ return min\n+\n+ ## Get the max length of the shorter seq in map file\n+ #\n+ # @param mapFileName\n+ # @param mode the open mode of the file \'"w"\' or \'"a"\' \n+ #\n+ def getMaxLengthOfMapFile(self, mapFileName):\n+ fileHandler = open(mapFileName, "r")\n+ line = fileHandler.readline()\n+ start = int (line.split(\'\\t\')[2])\n+ end = int (line.split(\'\\t\')[3])\n+ max = end - start + 1\n+ while True:\n+ line = fileHandler.readline()\n+ if line == "":\n+ break\n+ start = int (line.split(\'\\t\')[2])\n+ end = int (line.split(\'\\t\')[3])\n+ currentMax = end - start + 1\n+ if max <= currentMax:\n+ max = currentMax\n+ fileHandler.close()\n+ return max\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Match.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/Match.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,206 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import sys\n+from commons.core.coord.Range import Range\n+from commons.core.coord.Path import Path\n+\n+\n+## Handle a chain of match(es) between two sequences, query and subject, with an identifier and the length of the input sequences\n+#\n+class Match( Path ):\n+ \n+ ## Constructor\n+ #\n+ def __init__(self):\n+ Path.__init__(self)\n+ self.query_length = -1\n+ self.query_length_perc = -1 # length of the match on the query / length of the query\n+ self.query_seqlength = -1\n+ self.match_length_perc = -1 # length of the match on the query / total length of the subject\n+ self.subject_length = -1\n+ self.subject_length_perc = -1 # length of the match on the subject / length of the subject\n+ self.subject_seqlength = -1\n+ \n+ ## Equal operator\n+ #\n+ def __eq__(self, o):\n+ if o == None \\\n+ or self.query_length != o.query_length or self.query_length_perc != o.query_length_perc\\\n+ or self.query_seqlength != o.query_seqlength or self.subject_length != o.subject_length\\\n+ or self.subject_length_perc != o.subject_length_perc or self.subject_seqlength != o.subject_seqlength\\\n+ or self.match_length_perc != o.match_length_perc:\n+ return False\n+ return Path.__eq__(self, o)\n+ \n+ ## Return the length of the match on the query divided by the total length of the query\n+ #\n+ def getLengthPercOnQuery(self):\n+ return self.query_length_perc\n+ \n+ ## Return the length of the match on the subject divided by the total length of the subject\n+ #\n+ def getLengthPercOnSubject(self):\n+ return self.subject_length_perc\n+ \n+ ## Return the length of the match on the subject\n+ #\n+ def getLengthMatchOnSubject(self):\n+ return self.subject_length\n+ \n+ ## Set attributes from a tuple\n+ # \n+ # @param tuple: a tuple with (query name,query start,query end,\n+ # query length, query length perc (between 0-1), match length perc (between 0-1), subject name,\n+ # subject start,subject end,subject length, subject length percentage (between 0-1), e_value,score,identity,id)\n+ #\n+ def setFromTuple( self, tuple ):\n+ queryStart = int(tuple[1])\n+ queryEnd = int(tuple[2])\n+ subjectStart = int(tuple[7])\n+ subjectEnd = int(tuple[8])\n+ if quer'..b'gth = -1\n+ self.match_length_perc = -1\n+ self.subject_length = -1\n+ self.subject_length_perc = -1\n+ self.subject_seqlength = -1\n+ \n+ ## Return a formated string of the attribute data\n+ # \n+ def toString( self ):\n+ string = "%s" % ( self.range_query.toString() )\n+ string += "\\t%i\\t%f" % ( self.query_length,\n+ self.query_length_perc )\n+ string += "\\t%f" % ( self.match_length_perc )\n+ string += "\\t%s" % ( self.range_subject.toString() )\n+ string += "\\t%i\\t%f" % ( self.subject_length,\n+ self.subject_length_perc )\n+ string += "\\t%g\\t%i\\t%f" % ( self.e_value,\n+ self.score,\n+ self.identity )\n+ string += "\\t%i" % ( self.id )\n+ return string\n+ \n+ ## Return a Path instance\n+ #\n+ def getPathInstance( self ):\n+ p = Path()\n+ tuple = ( self.id,\n+ self.range_query.seqname,\n+ self.range_query.start,\n+ self.range_query.end,\n+ self.range_subject.seqname,\n+ self.range_subject.start,\n+ self.range_subject.end,\n+ self.e_value,\n+ self.score,\n+ self.identity )\n+ p.setFromTuple( tuple )\n+ return p\n+ \n+ ## Give information about a match whose query is included in the subject\n+ # \n+ # @return string\n+ #\n+ def getQryIsIncluded( self ):\n+ string = "query %s (%d bp: %d-%d) is contained in subject %s (%d bp: %d-%d): id=%.2f - %.3f - %.3f - %.3f" %\\\n+ ( self.range_query.seqname, self.query_seqlength, self.range_query.start, self.range_query.end,\n+ self.range_subject.seqname, self.subject_seqlength, self.range_subject.start, self.range_subject.end,\n+ self.identity, self.query_length_perc, self.match_length_perc, self.subject_length_perc )\n+ return string\n+ \n+ def increaseLengthPercOnQuery(self, coverage):\n+ self.query_length_perc += coverage\n+ \n+ ## Compare the object with another match and see if they are equal\n+ # (same identity, E-value and score + same subsequences whether in query or subject)\n+ #\n+ # @return True if objects are equals False otherwise\n+ #\n+ def isDoublonWith( self, match, verbose=0 ):\n+\n+ # if both matches have same identity, score and E-value\n+ if self.identity == match.identity and self.score == match.score and self.e_value == match.e_value:\n+\n+ # if query and subject are identical\n+ if ( self.range_query.seqname == match.range_query.seqname \\\n+ and self.range_subject.seqname == match.range_subject.seqname ):\n+\n+ # if the coordinates are equal\n+ if self.range_query.__eq__( match.range_query ) and self.range_subject.__eq__( match.range_subject ):\n+ return True\n+\n+ else:\n+ if verbose > 0: print "different coordinates"; sys.stdout.flush()\n+ return False\n+\n+ # if query and subject are reversed but identical\n+ elif self.range_query.seqname == match.range_subject.seqname and self.range_subject.seqname == match.range_query.seqname:\n+\n+ # if the coordinates are equal\n+ if self.range_query.__eq__( match.range_subject ) and self.range_subject.__eq__( match.range_query ):\n+ return True\n+\n+ else:\n+ if verbose > 0: print "different coordinates"; sys.stdout.flush()\n+ return False\n+\n+ else:\n+ if verbose > 0: print "different sequence names"; sys.stdout.flush()\n+ return False\n+\n+ else:\n+ if verbose > 0: print "different match numbers"; sys.stdout.flush()\n+ return False\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/MatchUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/MatchUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,288 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import math\n+import os\n+import sys\n+from commons.core.coord.Match import Match\n+from commons.core.checker.RepetException import RepetException\n+\n+## Static methods for the manipulation of Match instances\n+#\n+class MatchUtils ( object ):\n+ \n+ ## Return a list with Match instances from the given file\n+ #\n+ # @param inFile name of a file in the Match format\n+ # @return a list of Match instances\n+ #\n+ def getMatchListFromFile(inFile ):\n+ lMatchInstances = []\n+ inFileHandler = open( inFile, "r" )\n+ while True:\n+ line = inFileHandler.readline()\n+ if line == "":\n+ break\n+ if line[0:10] == "query.name":\n+ continue\n+ m = Match()\n+ m.setFromString( line )\n+ lMatchInstances.append( m )\n+ inFileHandler.close()\n+ return lMatchInstances\n+ \n+ getMatchListFromFile = staticmethod( getMatchListFromFile )\n+ \n+ ## Split a Match list in several Match lists according to the subject\n+ #\n+ # @param lMatches a list of Match instances\n+ # @return a dictionary which keys are subject names and values Match lists\n+ #\n+ def getDictOfListsWithSubjectAsKey( lMatches ):\n+ dSubject2MatchList = {}\n+ for iMatch in lMatches:\n+ if not dSubject2MatchList.has_key( iMatch.range_subject.seqname ):\n+ dSubject2MatchList[ iMatch.range_subject.seqname ] = []\n+ dSubject2MatchList[ iMatch.range_subject.seqname ].append( iMatch )\n+ return dSubject2MatchList\n+ \n+ getDictOfListsWithSubjectAsKey = staticmethod( getDictOfListsWithSubjectAsKey )\n+ \n+ ## Split a Match list in several Match lists according to the query\n+ #\n+ # @param lMatches a list of Match instances\n+ # @return a dictionary which keys are query names and values Match lists\n+ #\n+ def getDictOfListsWithQueryAsKey ( lMatches ):\n+ dQuery2MatchList = {}\n+ for iMatch in lMatches:\n+ if not dQuery2MatchList.has_key( iMatch.range_query.seqname ):\n+ dQuery2MatchList[ iMatch.range_query.seqname ] = []\n+ dQuery2MatchList[ iMatch.range_query.seqname ].append( iMatch )\n+ return dQuery2MatchList\n+ \n+ getDictOfListsWithQueryAsKey = staticmethod( getDictOfListsWithQueryAsKey ) \n+ \n+ ## Write M'..b' else:\n+ dMatches = MatchUtils.getDictOfListsWithSubjectAsKey(lMatches)\n+ \n+ for qry in dMatches.keys():\n+ countMatch = 0\n+ for match in dMatches[ qry ]:\n+ \n+ if match.identity >= thresIdentityPerc and getattr(match,whatToCount.lower() +"_length_perc") >= thresLength:\n+ countMatch += 1\n+ if countMatch > 0:\n+ countSbj += 1\n+ return countSbj\n+ \n+ getNbDistinctSequencesInsideMatchesWithThresh = staticmethod(getNbDistinctSequencesInsideMatchesWithThresh)\n+ \n+ ## Convert a \'match\' file (output from Matcher) into an \'align\' file\n+ ## replace old parser.tab2align\n+ #\n+ # @param inFileName a string input file name\n+ #\n+ def convertMatchFileToAlignFile(inFileName):\n+ basename = os.path.splitext(inFileName)[0]\n+ outFileName = "%s.align" % basename\n+ outFile = open(outFileName, "w")\n+ \n+ lMatches = MatchUtils.getMatchListFromFile(inFileName) \n+ \n+ for match in lMatches:\n+ string = "%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n" % ( match.getQueryName(), match.getQueryStart(), match.getQueryEnd(), match.getSubjectName(), match.getSubjectStart(), match.getSubjectEnd(), match.getEvalue(), match.getScore(), match.getIdentity() )\n+ outFile.write( string )\n+ \n+ outFile.close()\n+ \n+ convertMatchFileToAlignFile = staticmethod(convertMatchFileToAlignFile)\n+ \n+ ## Convert a \'match\' file (output from Matcher) into an \'abc\' file (MCL input file)\n+ # Use coverage on query for arc value\n+ #\n+ # @param matchFileName string input match file name\n+ # @param outFileName string output abc file name\n+ # @param coverage float query coverage filter threshold\n+ #\n+ @staticmethod\n+ def convertMatchFileIntoABCFileOnQueryCoverage(matchFileName, outFileName, coverage = 0):\n+ with open(matchFileName) as inF:\n+ with open(outFileName, "w") as outF:\n+ inF.readline()\n+ inLine = inF.readline()\n+ while inLine:\n+ splittedLine = inLine.split("\\t")\n+ if float(splittedLine[4]) >= coverage:\n+ outLine = "\\t".join([splittedLine[0], splittedLine[6], splittedLine[4]])\n+ outLine += "\\n"\n+ outF.write(outLine)\n+ inLine = inF.readline()\n+\n+ ## Adapt the path IDs as the input file is the concatenation of several \'Match\' files, and remove the extra header lines. \n+ ## replace old parser.tabnum2id\n+ #\n+ # @param fileName a string input file name\n+ # @param outputFileName a string output file name (optional)\n+ #\n+ def generateMatchFileWithNewPathId(fileName, outputFileName=None):\n+ if outputFileName is None: \n+ outFile = open(fileName, "w")\n+ else:\n+ outFile = open(outputFileName, "w") \n+ outFile.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ \n+ lMatches = MatchUtils.getMatchListFromFile(fileName) \n+ count = 1\n+ dMatchKeyIdcount = {}\n+ \n+ for match in lMatches:\n+ key_id = str(match.getIdentifier()) + "-" + match.getQueryName() + "-" + match.getSubjectName()\n+ if not key_id in dMatchKeyIdcount.keys():\n+ newPath = count\n+ count += 1\n+ dMatchKeyIdcount[ key_id ] = newPath\n+ else:\n+ newPath = dMatchKeyIdcount[ key_id ]\n+ \n+ match.id = newPath\n+ outFile.write( match.toString()+"\\n" ) \n+ outFile.close()\n+ \n+ generateMatchFileWithNewPathId = staticmethod(generateMatchFileWithNewPathId)\n+ \n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/MergedRange.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/MergedRange.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,98 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+## Record a region on multiple sequence using Path ID information
+#
+class MergedRange(object):
+
+    ## Constructor
+    #
+    # @param lId list of Path ID
+    # @param start the start coordinate
+    # @param end the end coordinate
+    #
+    def __init__(self, lId = None, start = -1, end = -1):
+        self._lId = lId or []
+        self._start = start
+        self._end = end
+
+    ## Equal operator
+    #
+    # @param o a MergedRange instance
+    #
+    def __eq__(self, o):
+        return o._lId == self._lId and o._start == self._start and o._end == self._end
+
+
+    ## Return True if the MergedRange instance overlaps with another MergedRange instance, False otherwise
+    #
+    # @param o a MergedRange instance
+    # @return boolean False or True
+    #
+    def isOverlapping(self, o):
+        if o._start <= self._start and o._end >= self._end:
+            return True
+        if o._start >= self._start and o._start <= self._end or o._end >= self._start and o._end <= self._end:
+            return True
+        return False
+
+    ## Merge coordinates and ID of two Merged Range
+    #
+    # @param o a MergedRange instance
+    #
+    def merge(self, o):
+        self._start = min(self._start, o._start)
+        self._end = max(self._end, o._end)
+        self._lId.extend(o._lId)
+        self._lId.sort()
+
+    ## Set a Merged Range instance using a Match instance
+    #
+    # @param iMatch instance Match instance
+    #
+    def setFromMatch(self, iMatch):
+        self._lId= [iMatch.id]
+        self._start = iMatch.range_query.start
+        self._end = iMatch.range_query.end
+
+    ## Get a Merged Range instance list using a Match instance list
+    #
+    # @param lIMatch list Match instance list
+    # @return lMergedRange list MergedRange instance list
+    #
+    def getMergedRangeListFromMatchList(lIMatch):
+        lMergedRange = []
+        for iMatch in lIMatch:
+            mr = MergedRange()
+            mr.setFromMatch(iMatch)
+            lMergedRange.append(mr)
+        return lMergedRange
+
+    getMergedRangeListFromMatchList = staticmethod(getMergedRangeListFromMatchList)
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Path.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/Path.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,149 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.coord.Align import Align
+from commons.core.coord.Set import Set
+from commons.core.coord.Range import Range
+
+
+## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) with an identifier
+#
+class Path( Align ):
+
+    ## Constructor
+    #
+    # @param id identifier
+    # @param range_q: a Range instance for the query
+    # @param range_s: a Range instance for the subject
+    # @param e_value: E-value of the match
+    # @param score: score of the match
+    # @param identity: identity percentage of the match
+    #
+    def __init__( self, id=-1, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0 ):
+        self.id = int( id )
+        Align.__init__( self, range_q, range_s, e_value, score, identity )
+
+    ## Equal operator
+    #
+    def __eq__(self, o):
+        if o == None or self.id != o.id:
+            return False
+        return Align.__eq__(self, o)
+
+    ## Set attributes from tuple
+    #
+    # @param tuple a tuple with (id,queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
+    # @note data are loaded such that the query is always on the direct strand
+    #
+    def setFromTuple(self, tuple):
+        self.id = int(tuple[0])
+        Align.setFromTuple(self, tuple[1:])
+
+    ## Reset
+    #
+    def reset(self):
+        self.id = -1
+        Align.reset(self)
+
+    ## Return the attributes as a formatted string
+    #
+    def toString(self):
+        string = "%i" % ( self.id )
+        string += "\t%s" % (Align.toString(self))
+        return string
+
+
+    ## Return the identifier of the Path instance
+    #
+    def getIdentifier( self ):
+        return self.id
+
+    ## Return a Set instance with the subject mapped on the query
+    #
+    def getSubjectAsSetOfQuery(self):
+        iSet = Set()
+        iSet.id = self.id
+        iSet.name = self.range_subject.seqname
+        iSet.seqname = self.range_query.seqname
+        if self.range_subject.isOnDirectStrand():
+            iSet.start = self.range_query.start
+            iSet.end = self.range_query.end
+        else:
+            iSet.start = self.range_query.end
+            iSet.end = self.range_query.start
+        return iSet
+
+    #TODO: add tests !!!!
+    #WARNING: subject always in direct strand !!!
+    ## Return a Set instance with the subject mapped on the query
+    #
+    def getQuerySetOfSubject(self):
+        iSet = Set()
+        iSet.id = self.id
+        iSet.name = self.range_query.seqname
+        iSet.seqname = self.range_subject.seqname
+        if self.range_subject.isOnDirectStrand():
+            iSet.start = self.range_subject.start
+            iSet.end = self.range_subject.end
+        else:
+            iSet.start = self.range_subject.end
+            iSet.end = self.range_subject.start
+        return iSet
+
+    ## Return True if the instance can be merged with another Path instance, False otherwise
+    #
+    # @param o a Path instance
+    #
+    def canMerge(self, o):
+        return o.id != self.id \
+            and o.range_query.seqname == self.range_query.seqname \
+            and o.range_subject.seqname == self.range_subject.seqname \
+            and o.range_query.isOnDirectStrand() == self.range_query.isOnDirectStrand() \
+            and o.range_subject.isOnDirectStrand() == self.range_subject.isOnDirectStrand() \
+            and o.range_query.isOverlapping(self.range_query) \
+            and o.range_subject.isOverlapping(self.range_subject)
+
+    ## Return an Align instance with the same attributes, except the identifier
+    #
+    def getAlignInstance(self):
+        iAlign = Align()
+        lAttributes = []
+        lAttributes.append( self.range_query.seqname )
+        lAttributes.append( self.range_query.start )
+        lAttributes.append( self.range_query.end )
+        lAttributes.append( self.range_subject.seqname )
+        lAttributes.append( self.range_subject.start )
+        lAttributes.append( self.range_subject.end )
+        lAttributes.append( self.e_value )
+        lAttributes.append( self.score )
+        lAttributes.append( self.identity )
+        iAlign.setFromTuple( lAttributes )
+        return iAlign

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/PathUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/PathUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,858 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import copy\n+from commons.core.coord.Path import Path\n+from commons.core.coord.SetUtils import SetUtils\n+from commons.core.coord.Map import Map\n+from commons.core.coord.AlignUtils import AlignUtils\n+from commons.core.checker.RepetException import RepetDataException\n+\n+## Static methods for the manipulation of Path instances\n+#\n+class PathUtils ( object ):\n+ \n+ ## Change the identifier of each Set instance in the given list\n+ #\n+ # @param lPaths list of Path instances\n+ # @param newId new identifier\n+ #\n+ def changeIdInList(lPaths, newId):\n+ for iPath in lPaths:\n+ iPath.id = newId\n+ \n+ changeIdInList = staticmethod( changeIdInList )\n+ \n+ \n+ ## Return a list of Set instances containing the query range from a list of Path instances\n+ # \n+ # @param lPaths a list of Path instances\n+ # \n+ def getSetListFromQueries(lPaths):\n+ lSets = []\n+ for iPath in lPaths:\n+ lSets.append( iPath.getSubjectAsSetOfQuery() )\n+ return lSets\n+ \n+ getSetListFromQueries = staticmethod( getSetListFromQueries )\n+ \n+ #TODO: add tests !!!!\n+ ## Return a list of Set instances containing the query range from a list of Path instances\n+ # \n+ # @param lPaths a list of Path instances\n+ #\n+ @staticmethod\n+ def getSetListFromSubjects(lPaths):\n+ lSets = []\n+ for iPath in lPaths:\n+ lSets.append( iPath.getQuerySetOfSubject() )\n+ return lSets\n+ \n+ \n+ ## Return a sorted list of Range instances containing the subjects from a list of Path instances\n+ # \n+ # @param lPaths a list of Path instances\n+ # @note meaningful only if all Path instances have same identifier\n+ #\n+ def getRangeListFromSubjects( lPaths ):\n+ lRanges = []\n+ for iPath in lPaths:\n+ lRanges.append( iPath.range_subject )\n+ if lRanges[0].isOnDirectStrand():\n+ return sorted( lRanges, key=lambda iRange: ( iRange.getMin(), iRange.getMax() ) )\n+ else:\n+ return sorted( lRanges, key=lambda iRange: ( iRange.getMax(), iRange.getMin() ) )\n+ \n+ getRangeListFromSubjects = staticmethod( getRangeListFromSubjects )\n+ \n+ \n+ ## Return a tuple with min and max of query coordinates from Path instances in the given list\n+ #\n+ # @param '..b'te the \'path\' query is supposed to correspond to the \'gff\' first column\n+ #\n+ def convertPathFileIntoGffFile( pathFile, gffFile, source="REPET", verbose=0 ):\n+ dId2PathList = PathUtils.getDictOfListsWithIdAsKeyFromFile( pathFile )\n+ if verbose > 0:\n+ msg = "number of chains: %i" % ( len(dId2PathList.keys()) )\n+ sys.stdout.write( "%s\\n" % msg )\n+ sys.stdout.flush()\n+ gffFileHandler = open( gffFile, "w" )\n+ for id in dId2PathList.keys():\n+ if len( dId2PathList[ id ] ) == 1:\n+ iPath = dId2PathList[ id ][0]\n+ string = iPath.toStringAsGff( ID="%i" % iPath.getIdentifier(),\n+ source=source )\n+ gffFileHandler.write( "%s\\n" % string )\n+ else:\n+ iPathrange = PathUtils.convertPathListToPathrange( dId2PathList[ id ] )\n+ string = iPathrange.toStringAsGff( ID="ms%i" % iPathrange.getIdentifier(),\n+ source=source )\n+ gffFileHandler.write( "%s\\n" % string )\n+ count = 0\n+ for iPath in dId2PathList[ id ]:\n+ count += 1\n+ string = iPath.toStringAsGff( type="match_part",\n+ ID="mp%i-%i" % ( iPath.getIdentifier(), count ),\n+ Parent="ms%i" % iPathrange.getIdentifier(),\n+ source=source )\n+ gffFileHandler.write( "%s\\n" % string )\n+ gffFileHandler.close()\n+ \n+ convertPathFileIntoGffFile = staticmethod( convertPathFileIntoGffFile )\n+ \n+ \n+ ## Convert a Path file into a Set file\n+ # replace old parser.pathrange2set\n+ # @param pathFile: name of the input Path file\n+ # @param setFile: name of the output Set file\n+ #\n+ def convertPathFileIntoSetFile( pathFile, setFile ):\n+ pathFileHandler = open( pathFile, "r" )\n+ setFileHandler = open( setFile, "w" )\n+ iPath = Path()\n+ while True:\n+ line = pathFileHandler.readline()\n+ if line == "":\n+ break\n+ iPath.setFromString( line )\n+ iSet = iPath.getSubjectAsSetOfQuery()\n+ iSet.write( setFileHandler )\n+ pathFileHandler.close()\n+ setFileHandler.close()\n+ \n+ convertPathFileIntoSetFile = staticmethod( convertPathFileIntoSetFile )\n+ \n+ ## Write Path File without duplicated Path (same query, same subject and same coordinate)\n+ #\n+ # @param inputFile: name of the input Path file\n+ # @param outputFile: name of the output Path file\n+ #\n+ def removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(inputFile, outputFile):\n+ f = open(inputFile, "r")\n+ line = f.readline()\n+ previousQuery = ""\n+ previousSubject = ""\n+ lPaths = []\n+ while line:\n+ iPath = Path()\n+ iPath.setFromString(line)\n+ query = iPath.getQueryName()\n+ subject = iPath.getSubjectName()\n+ if (query != previousQuery or subject != previousSubject) and lPaths != []: \n+ lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)\n+ PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")\n+ lPaths = []\n+ lPaths.append(iPath)\n+ previousQuery = query\n+ previousSubject = subject\n+ line = f.readline()\n+ lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)\n+ PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")\n+ f.close()\n+ removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName = staticmethod(removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName)\n+ \n+ \n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Range.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/Range.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,361 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+## Record a region on a given sequence\n+#\n+class Range( object ):\n+\n+ ## Constructor\n+ #\n+ # @param seqname the name of the sequence\n+ # @param start the start coordinate\n+ # @param end the end coordinate\n+ #\n+ def __init__(self, seqname="", start=-1, end=-1):\n+ self.seqname = seqname\n+ self.start = int(start)\n+ self.end = int(end)\n+ \n+ ## Equal operator\n+ #\n+ # @param o a Range instance\n+ #\n+ def __eq__(self, o):\n+ if self.seqname == o.seqname and self.start == o.start and self.end == o.end:\n+ return True\n+ return False\n+ \n+ ## Unequal operator\n+ #\n+ # @param o a Range instance\n+ #\n+ def __ne__(self, o):\n+ return not self.__eq__(o)\n+ \n+ ## Convert the object into a string\n+ #\n+ # @note used in \'print myObject\'\n+ #\n+ def __str__( self ):\n+ return self.toString()\n+ \n+ ## Convert the object into a string\n+ #\n+ # @note used in \'repr(myObject)\' for debugging\n+ #\n+ def __repr__( self ):\n+ return self.toString().replace("\\t",";")\n+ \n+ def setStart(self, start):\n+ self.start = start\n+ \n+ def setEnd(self, end):\n+ self.end = end\n+ \n+ def setSeqName(self, seqName):\n+ self.seqname = seqName\n+ \n+ ## Reset\n+ #\n+ def reset(self):\n+ self.seqname = ""\n+ self.start = -1\n+ self.end = -1\n+ \n+ ## Return the attributes as a formatted string\n+ # \n+ def toString(self):\n+ string = "%s" % (self.seqname)\n+ string += "\\t%d" % (self.start)\n+ string += "\\t%d" % (self.end)\n+ return string\n+ \n+ ## Show the attributes\n+ #\n+ def show(self):\n+ print self.toString()\n+ \n+ ## Return seqname\n+ #\n+ def getSeqname(self):\n+ return self.seqname\n+ \n+ ## Return the start coordinate\n+ #\n+ def getStart(self):\n+ return self.start\n+ \n+ ## Return the end coordinate\n+ #\n+ def getEnd(self):\n+ return self.end\n+ \n+ ## Return the lowest value between start and end coordinates\n+ #\n+ def getMin(self):\n+ return min(self.start, self.end)\n+ \n+ ## Return the greatest value between start and end attributes\n+ # \n+ def getMax(self):\n+ return max(self.start, self.end)\n+ \n+ ## Return Tr'..b', o ):\n+ if o.seqname != self.seqname:\n+ return False\n+ if self.getMin() >= o.getMin() and self.getMax() <= o.getMax():\n+ return True\n+ else:\n+ return False\n+\n+ \n+ ## Return the distance between the start of the instance and the start of another Range instance\n+ #\n+ # @param o a Range instance\n+ #\n+ def getDistance(self, o):\n+ if self.isOnDirectStrand() == o.isOnDirectStrand():\n+ if self.isOverlapping(o):\n+ return 0\n+ elif self.isOnDirectStrand():\n+ if self.start > o.start:\n+ return self.start - o.end\n+ else:\n+ return o.start - self.end\n+ else:\n+ if self.start > o.start:\n+ return self.end - o.start\n+ else:\n+ return o.end - self.start\n+ return -1\n+ \n+ ## Remove in the instance the region overlapping with another Range instance\n+ #\n+ # @param o a Range instance\n+ # \n+ def diff(self, o):\n+ new_range = Range(self.seqname)\n+ if not self.isOverlapping(o) or self.seqname != o.seqname:\n+ return new_range\n+\n+ istart = min(self.start, self.end)\n+ iend = max(self.start, self.end)\n+ jstart = min(o.start, o.end)\n+ jend = max(o.start, o.end)\n+ if istart < jstart:\n+ if iend <= jend:\n+ if self.isOnDirectStrand():\n+ self.start = istart\n+ self.end = jstart - 1\n+ else:\n+ self.start = jstart - 1\n+ self.end = istart\n+ else:\n+ if self.isOnDirectStrand():\n+ self.start = istart\n+ self.end = jstart - 1\n+ new_range.start = jend + 1\n+ new_range.end = iend\n+ else:\n+ self.start = jstart - 1;\n+ self.end = istart;\n+ new_range.start = iend\n+ new_range.end = jend + 1\n+ else: #istart>=jstart\n+ if iend <= jend:\n+ self.start = 0\n+ self.end = 0\n+ else:\n+ if self.isOnDirectStrand():\n+ self.start = jend + 1\n+ self.end = iend\n+ else:\n+ self.start = iend\n+ self.end = jend + 1\n+ return new_range\n+ \n+ ## Find the bin that contains the instance and compute its index\n+ #\n+ # @note Required for coordinate indexing via a hierarchical bin system\n+ #\n+ def findIdx(self):\n+ min_lvl = 3\n+ max_lvl = 6\n+ for bin_lvl in xrange(min_lvl, max_lvl):\n+ if getBin(self.start, bin_lvl) == getBin(self.end, bin_lvl):\n+ return getIdx(self.start, bin_lvl)\n+ return getIdx(self.start, max_lvl) \n+ \n+ ## Get a bin for fast database access\n+ #\n+ # @return bin number (float)\n+ #\n+ def getBin(self):\n+ for i in xrange(3, 8):\n+ bin_lvl = pow(10, i)\n+ if int(self.start/bin_lvl) == int(self.end/bin_lvl):\n+ return float(bin_lvl+(int(self.start/bin_lvl)/1e10))\n+ bin_lvl = pow(10, 8)\n+ return float(bin_lvl+(int(self.start/bin_lvl)/1e10))\n+ \n+ \n+# Functions\n+\n+# Get the bin number of a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system\n+# \n+def getBin(val, bin_lvl):\n+ bin_size = pow(10, bin_lvl)\n+ return long(val / bin_size)\n+ \n+# Get an index from a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system\n+#\n+def getIdx(val, bin_lvl):\n+ min_lvl = 3\n+ max_lvl = 6\n+ if bin_lvl >= max_lvl:\n+ return long((bin_lvl-min_lvl+1)*pow(10,max_lvl))\n+ return long(((bin_lvl-min_lvl+1)*pow(10,max_lvl))+getBin(val,bin_lvl))\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Range.pyc

Binary file commons/core/coord/Range.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/Set.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/Set.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,125 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.coord.Map import Map
+
+
+## Record a named region on a given sequence with an identifier
+#
+class Set( Map ):
+
+    ## Constructor
+    #
+    # @param id identifier
+    # @param name the name of the region
+    # @param seqname the name of the sequence
+    # @param start the start coordinate
+    # @param end the end coordinate
+    #
+    def __init__(self, id=-1, name="", seqname="", start=-1, end=-1):
+        Map.__init__( self, name, seqname, start, end )
+        self.id = id
+
+    ## Equal operator
+    #
+    def __eq__(self, o):
+        if self.id != o.id:
+            return False
+        else:
+            return Map.__eq__(self, o)
+
+    def getId(self):
+        return self.id
+
+    ## Reset
+    #
+    def reset(self):
+        self.setFromTuple([-1, "", "", -1, -1 ])
+
+    ## Set attributes from tuple
+    #
+    # @param tuple: a tuple with (id, name, seqname, start, end)
+    #
+    def setFromTuple(self, tuple):
+        self.id = int(tuple[0])
+        Map.setFromTuple(self, tuple[1:])
+
+    ## Return the attributes as a formatted string
+    #
+    def toString(self):
+        string = "%i" % (self.id)
+        string += "\t%s" % (Map.toString(self))
+        return string
+
+    ## Merge the instance with another Set instance
+    #
+    # @param o a Set instance
+    #
+    def merge(self, o):
+        if self.seqname == o.seqname:
+            Map.merge(self, o)
+            self.id = min(self.id, o.id)
+
+    ## Return a Map instance with the attributes
+    #
+    def getMap(self):
+        return Map(self.name, self.seqname, self.start, self.end)
+
+    ## Remove in the instance the region overlapping with another Set instance
+    #
+    # @param o a Set instance
+    #
+    def diff(self, o):
+        iMap = Map.diff(self, o.getMap())
+        new = Set()
+        if not iMap.isEmpty():
+            new.id = self.id
+            new.name = self.name
+            new.seqname = self.seqname
+            new.start = iMap.start
+            new.end = iMap.end
+        return new
+
+    ## Return a Map instance with the identifier in the name
+    #
+    def set2map(self):
+        return Map(self.name+"::"+str(self.id),self.seqname,self.start,self.end)
+
+
+    def getMapInstance( self ):
+        iMap = Map()
+        lAttributes = []
+        lAttributes.append( self.name )
+        lAttributes.append( self.seqname )
+        lAttributes.append( self.start )
+        lAttributes.append( self.end )
+        iMap.setFromTuple( lAttributes )
+        return iMap

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/SetUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/SetUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,553 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.coord.Set import Set\n+\n+## Static methods for the manipulation of Path instances\n+#\n+class SetUtils( object ):\n+ \n+ ## Change the identifier of each Set instance in the given list\n+ #\n+ # @param lSets list of Set instances\n+ # @param newId new identifier\n+ #\n+ def changeIdInList(lSets, newId):\n+ for iSet in lSets:\n+ iSet.id = newId\n+ \n+ changeIdInList = staticmethod( changeIdInList )\n+ \n+ ## Return the length of the overlap between two lists of Set instances\n+ #\n+ # @param lSets1 list of Set instances\n+ # @param lSets2 list of Set instances\n+ # @return length of overlap\n+ # @warning sequence names are supposed to be identical\n+ #\n+ def getOverlapLengthBetweenLists(lSets1, lSets2):\n+ lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)\n+ lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)\n+ osize = 0\n+ i = 0\n+ j = 0\n+ while i!= len(lSet1Sorted):\n+ while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\\\n+ and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):\n+ j+=1\n+ jj=j\n+ while jj!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[jj]):\n+ osize+=lSet1Sorted[i].getOverlapLength(lSet2Sorted[jj])\n+ jj+=1\n+ i+=1\n+ return osize\n+ \n+ getOverlapLengthBetweenLists = staticmethod( getOverlapLengthBetweenLists )\n+ \n+ ## Return True if the two lists of Set instances overlap, False otherwise \n+ #\n+ # @param lSets1 list of Set instances\n+ # @param lSets2 list of Set instances\n+ # \n+ def areSetsOverlappingBetweenLists( lSets1, lSets2 ):\n+ lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)\n+ lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)\n+ i=0\n+ j=0\n+ while i!= len(lSet1Sorted):\n+ while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\\\n+ and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):\n+ j+=1\n+ if j!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[j]):\n+ return True\n+ i+=1\n+ return False\n+ '..b' def getListOfSetWithoutOverlappingBetweenTwoListOfSet(lSet1, lSet2):\n+ for i in lSet1:\n+ for idx,j in enumerate(lSet2):\n+ n=j.diff(i)\n+ if not n.isEmpty() and n.getLength()>=20:\n+ lSet2.append(n)\n+ lSet2WithoutOverlaps=[]\n+ for i in lSet2:\n+ if not i.isEmpty() and i.getLength()>=20:\n+ lSet2WithoutOverlaps.append(i)\n+ return lSet2WithoutOverlaps\n+ \n+ getListOfSetWithoutOverlappingBetweenTwoListOfSet = staticmethod (getListOfSetWithoutOverlappingBetweenTwoListOfSet)\n+\n+ ## Return a Set list from a Set file\n+ #\n+ # @param setFile string name of a Set file\n+ # @return a list of Set instances\n+ #\n+ def getSetListFromFile( setFile ):\n+ lSets = []\n+ setFileHandler = open( setFile, "r" )\n+ while True:\n+ line = setFileHandler.readline()\n+ if line == "":\n+ break\n+ iSet = Set()\n+ iSet.setFromString( line )\n+ lSets.append( iSet )\n+ setFileHandler.close()\n+ return lSets\n+ \n+ getSetListFromFile = staticmethod( getSetListFromFile )\n+ \n+ \n+ def convertSetFileIntoMapFile( setFile, mapFile ):\n+ setFileHandler = open( setFile, "r" )\n+ mapFileHandler = open( mapFile, "w" )\n+ iSet = Set()\n+ while True:\n+ line = setFileHandler.readline()\n+ if line == "":\n+ break\n+ iSet.setFromString( line )\n+ iMap = iSet.getMapInstance()\n+ iMap.write( mapFileHandler )\n+ setFileHandler.close()\n+ mapFileHandler.close()\n+ \n+ convertSetFileIntoMapFile = staticmethod( convertSetFileIntoMapFile )\n+\n+\n+ def getDictOfListsWithSeqnameAsKey( lSets ):\n+ dSeqnamesToSetList = {}\n+ for iSet in lSets:\n+ if not dSeqnamesToSetList.has_key( iSet.seqname ):\n+ dSeqnamesToSetList[ iSet.seqname ] = []\n+ dSeqnamesToSetList[ iSet.seqname ].append( iSet )\n+ return dSeqnamesToSetList\n+ \n+ getDictOfListsWithSeqnameAsKey = staticmethod( getDictOfListsWithSeqnameAsKey )\n+ \n+ \n+ def filterOnLength( lSets, minLength=0, maxLength=10000000000 ):\n+ if minLength == 0 and maxLength == 0:\n+ return lSets\n+ lFiltered = []\n+ for iSet in lSets:\n+ if minLength <= iSet.getLength() <= maxLength:\n+ lFiltered.append( iSet )\n+ return lFiltered\n+ \n+ filterOnLength = staticmethod( filterOnLength )\n+ \n+ \n+ def getListOfNames( setFile ):\n+ lNames = []\n+ setFileHandler = open( setFile, "r" )\n+ iSet = Set()\n+ while True:\n+ line = setFileHandler.readline()\n+ if line == "":\n+ break\n+ iSet.setFromTuple( line[:-1].split("\\t") )\n+ if iSet.name not in lNames:\n+ lNames.append( iSet.name )\n+ setFileHandler.close()\n+ return lNames\n+ \n+ getListOfNames = staticmethod( getListOfNames )\n+\n+\n+ def getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile ):\n+ dNames2DictsId = {}\n+ setFileHandler = open( setFile, "r" )\n+ while True:\n+ line = setFileHandler.readline()\n+ if line == "":\n+ break\n+ iSet = Set()\n+ iSet.setFromTuple( line[:-1].split("\\t") )\n+ if not dNames2DictsId.has_key( iSet.name ):\n+ dNames2DictsId[ iSet.name ] = { iSet.id: [ iSet ] }\n+ else:\n+ if not dNames2DictsId[ iSet.name ].has_key( iSet.id ):\n+ dNames2DictsId[ iSet.name ][ iSet.id ] = [ iSet ]\n+ else:\n+ dNames2DictsId[ iSet.name ][ iSet.id ].append( iSet )\n+ setFileHandler.close()\n+ return dNames2DictsId\n+ \n+ getDictOfDictsWithNamesThenIdAsKeyFromFile = staticmethod( getDictOfDictsWithNamesThenIdAsKeyFromFile )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/SlidingWindow.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/SlidingWindow.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,73 @@
+class SlidingWindow(object):
+
+    def __init__( self, length = 1, overlap = 1 ):
+        self._length = length
+        self._overlap = overlap
+        self._start = 1
+        self._end = length
+        self._step = length - overlap
+
+    def slideWindowOnce(self):
+        self._start = self._start + self._step
+        self._end = self._end + self._step
+
+    def getStart(self):
+        return self._start
+
+    def getEnd(self):
+        return self._end
+
+    def setStart(self, start):
+        self._start = start
+
+    def setEnd(self, end):
+        self._end = end
+
+    def getLength(self):
+        return self._length
+
+    def getOverlap(self):
+        return self._overlap
+
+    def setLength(self, length):
+        self._length = length
+
+    def setOverlap(self, overlap):
+        self._overlap = overlap
+
+    def getSlidingMsg(self):
+        return "Window is sliding : %s %s" %(self._start, self._end)
+
+class SlidingWindowToCountMatchingBases(SlidingWindow):
+
+    def getSetLengthOnWindow( self, iSet ):
+        if self._isSetIncludedInTheWindow(iSet):
+            return iSet.getLength()
+        if self._isWindowIncludedInTheSet(iSet):
+            return self._length
+        elif self._isSetOverlapTheRightSideOfTheWindow(iSet):
+            return self._end - iSet.getMin()+1
+        elif self._isSetOverlapTheLeftSideOfTheWindow(iSet):
+            return iSet.getMax() - self._start+1
+
+    def getCoordSetOnWindow( self, iSet ):
+        if self._isSetIncludedInTheWindow(iSet):
+            return iSet.getStart(), iSet.getEnd()
+        if self._isWindowIncludedInTheSet(iSet):
+            return self.getStart(), self.getEnd()
+        elif self._isSetOverlapTheRightSideOfTheWindow(iSet):
+            return iSet.getStart(), self.getEnd()
+        elif self._isSetOverlapTheLeftSideOfTheWindow(iSet):
+            return self.getStart(), iSet.getEnd()
+
+    def _isSetIncludedInTheWindow(self, feature):
+        return feature.getMin() >= self._start and feature.getMax() <= self._end
+
+    def _isWindowIncludedInTheSet(self, feature):
+        return self._start >= feature.getMin() and self._end <= feature.getMax()
+
+    def _isSetOverlapTheRightSideOfTheWindow(self, feature):
+        return feature.getMin() <= self._end and feature.getMin() >= self._start
+
+    def _isSetOverlapTheLeftSideOfTheWindow(self, feature):
+        return feature.getMax() <= self._end and feature.getMax() >= self._start

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/__init__.pyc

Binary file commons/core/coord/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/align2set.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/align2set.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+import sys
+import getopt
+from commons.core.coord.Align import Align
+
+def help():
+    print
+    print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
+    print "options:"
+    print "     -h: this help"
+    print "     -i: input file name (format='align')"
+    print "     -o: output file name (format='set', default=inFileName+'.set')"
+    print "     -v: verbosity level (default=0/1)"
+    print
+
+
+def align2set( inFileName, outFileName ):
+    alignFileHandler = open( inFileName, "r" )
+    setFileHandler = open( outFileName, "w" )
+    iAlign = Align()
+    countAlign = 0
+    while True:
+        line = alignFileHandler.readline()
+        if line == "":
+            break
+        countAlign += 1
+        iAlign.setFromString( line, "\t" )
+        setFileHandler.write( "%i\t%s\t%s\t%i\t%i\n" % ( countAlign,
+                                                         iAlign.getSubjectName(),
+                                                         iAlign.getQueryName(),
+                                                         iAlign.getQueryStart(),
+                                                         iAlign.getQueryEnd() ) )
+    alignFileHandler.close()
+    setFileHandler.close()
+
+
+def main():
+
+    inFileName = ""
+    outFileName = ""
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt( sys.argv[1:], "hi:o:v:" )
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+
+    if  inFileName == "":
+        print "ERROR: missing input file name"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "START %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    if outFileName == "":
+        outFileName = "%s.set" % ( inFileName )
+
+#TODO: move 'align2set' into 'AlignUtils.convertAlignFileIntoPSetFile' with a test
+#    AlignUtils.convertAlignFileIntoPSetFile( inFileName, outFileName )
+
+    align2set( inFileName, outFileName )
+
+    if verbose > 0:
+        print "END %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/TestSuite_coord.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/TestSuite_coord.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_Align
+import Test_AlignUtils
+import Test_Map
+import Test_MapUtils
+import Test_Match
+import Test_MatchUtils
+import Test_Path
+import Test_PathUtils
+import Test_Range
+import Test_Set
+import Test_SetUtils
+
+
+def main():
+
+    TestSuite_coord = unittest.TestSuite()
+
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Align.Test_Align, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_AlignUtils.Test_AlignUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Map.Test_Map, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_MapUtils.Test_MapUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Match.Test_Match, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_MatchUtils.Test_MatchUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Path.Test_Path, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_PathUtils.Test_PathUtils, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Range.Test_Range, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_Set.Test_Set, "test" ) )
+    TestSuite_coord.addTest( unittest.makeSuite( Test_SetUtils.Test_SetUtils, "test" ) )
+
+    runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+    runner.run( TestSuite_coord )
+
+
+if __name__ == "__main__":
+    main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_Align.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Align.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,518 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Map import Map\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Range import Range\n+\n+\n+class Test_Align( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._align = Align()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n+ \n+ def tearDown(self):\n+ self._align = None\n+ \n+ def test_isEmpty_True(self):\n+ alignInstance = Align()\n+ \n+ self.assertTrue(alignInstance.isEmpty())\n+ \n+ def test_isEmpty_True_query_is_empty(self):\n+ alignInstance = Align()\n+ line = "\\t-1\\t-1\\tTE2\\t3\\t10\\t1e-20\\t30\\t90.2\\n"\n+ alignInstance.setFromString(line)\n+ \n+ self.assertTrue(alignInstance.isEmpty())\n+ \n+ def test_isEmpty_True_subject_is_empty(self):\n+ alignInstance = Align()\n+ line = "chr1\\t2\\t20\\t\\t-1\\t-1\\t1e-20\\t30\\t90.2\\n"\n+ alignInstance.setFromString(line)\n+ \n+ self.assertTrue(alignInstance.isEmpty())\n+ \n+ def test_isEmpty_False(self):\n+ alignInstance = Align()\n+ line = "chr1\\t2\\t20\\tTE2\\t3\\t10\\t1e-20\\t30\\t90.2\\n"\n+ alignInstance.setFromString(line)\n+ \n+ self.assertFalse(alignInstance.isEmpty())\n+ \n+ def test_read(self):\n+ line = "chr2\\t1\\t10\\tTE3\\t11\\t17\\t1e-20\\t30\\t90.2\\n"\n+ expReturn = 1\n+\n+ dummyMockAlignFile = "dummyMockAlignFile"\n+ mockAlignFileHandle = open(dummyMockAlignFile, "w")\n+ mockAlignFileHandle.write(line)\n+ mockAlignFileHandle.close()\n+ \n+ expAlignInstance = Align()\n+ expAlignInstance.setFromString(line)\n+\n+ mockAlignFileHandle = open(dummyMockAlignFile, "r")\n+ obsAlignInstance = Align()\n+ obsReturn = obsAlignInstance.read(mockAlignFileHandle)\n+ \n+ mockAlignFileHandle.close()\n+ os.remove(dummyMockAlignFile) \n+ \n+ self.assertEquals(expAlignInstance, obsAlignInstance) \n+ self.assertEquals(expReturn, obsReturn) \n+ \n+ def test_read_empty_file(self):\n+ expReturn = 0\n+ \n+ dummyMockAlignFile = "dummyMockAlignFile"\n+ mockAlignFileHandle = open(dummyMockAlignFile, "w")\n+ mockAlignFileHandle.close'..b'ject.setFromTuple( ( "repet", "sbj1", "1", "100" ) )\n+ \n+ obsMapQuery, obsMapSubject = self._align.getMapsOfQueryAndSubject()\n+ \n+ self.assertEqual( expMapQuery, obsMapQuery )\n+ self.assertEqual( expMapSubject, obsMapSubject )\n+ \n+ def test_getBin_bin_level_9(self):\n+ tuple = ("chr1","190000000","390000000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ expRes = 100000000.0\n+ obsRes = self._align.getBin()\n+ self.assertEquals(expRes, obsRes)\n+\n+ def test_getBin_bin_level_8(self):\n+ tuple = ("chr1","19000000","39000000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ expRes = 100000000.0\n+ obsRes = self._align.getBin()\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_7(self):\n+ tuple = ("chr1","1900000","3900000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ expRes = 10000000.0\n+ obsRes = self._align.getBin()\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_6(self):\n+ tuple = ("chr1","190000","390000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_5(self):\n+ tuple = ("chr1","19000","39000","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 100000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_4(self):\n+ tuple = ("chr1","1900","3900","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 10000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_3(self):\n+ tuple = ("chr1","190","390","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_2(self):\n+ tuple = ("chr1","19","39","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_1(self):\n+ tuple = ("chr1","1","3","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple(tuple)\n+ obsRes = self._align.getBin()\n+ expRes = 1000.0\n+ self.assertEquals(expRes, obsRes)\n+ \n+ \n+ def test_switchQuerySubject_directS( self ):\n+ tuple = ("chr1","1","3","TE2","11","17","1e-20","30","90.2")\n+ self._align.setFromTuple( tuple )\n+ exp = Align( Range("TE2","11","17"), Range("chr1","1","3"), "1e-20", "30", "90.2" )\n+ self._align.switchQuerySubject()\n+ self.assertEquals( exp, self._align )\n+ \n+ \n+ def test_switchQuerySubject_reverseS( self ):\n+ tuple = ("chr1","1","3","TE2","17","11","1e-20","30","90.2")\n+ self._align.setFromTuple( tuple )\n+ exp = Align( Range("TE2","11","17"), Range("chr1","3","1"), "1e-20", "30", "90.2" )\n+ self._align.switchQuerySubject()\n+ self.assertEquals( exp, self._align )\n+ \n+ \n+ def test_toStringAsGff( self ):\n+ self._align.setFromString( "chr1\\t1\\t10\\tTE3\\t11\\t17\\t1e-20\\t30\\t85.2\\n" )\n+ exp = "chr1\\tREPET\\tmatch\\t1\\t10\\t1e-20\\t+\\t.\\tID=23;Target=TE3 11 17"\n+ obs = self._align.toStringAsGff( ID="23" )\n+ self.assertEqual( obs, exp )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Align ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_AlignUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_AlignUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,777 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+import shutil\n+from commons.core.coord.AlignUtils import AlignUtils\n+from commons.core.coord.Align import Align\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Range import Range\n+\n+\n+class Test_AlignUtils( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n+ \n+ \n+ def tearDown( self ):\n+ self._uniqId = ""\n+ \n+ \n+ def test_getAlignListFromFile( self ):\n+ a1 = Align()\n+ a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) )\n+ a2 = Align()\n+ a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) )\n+ \n+ inFileName = "dummyFile_%s" % ( self._uniqId )\n+ inFileHandler = open( inFileName, "w" )\n+ a1.write( inFileHandler )\n+ a2.write( inFileHandler )\n+ inFileHandler.close()\n+ \n+ lExp = [ a1, a2 ]\n+ lObs = AlignUtils.getAlignListFromFile( inFileName )\n+ \n+ self.assertEqual( lExp, lObs )\n+ \n+ if os.path.exists( inFileName ):\n+ os.remove( inFileName )\n+ \n+ \n+ def test_getListOfScores( self ):\n+ a1 = Align()\n+ a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) )\n+ a2 = Align()\n+ a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) )\n+ lAligns = [ a1, a2 ]\n+ \n+ lExp = [ 89, 95 ]\n+ lObs = AlignUtils.getListOfScores( lAligns )\n+ \n+ self.assertEqual( lExp, lObs )\n+ \n+ \n+ def test_getScoreListFromFile( self ):\n+ alignFile = "dummyAlignFile"\n+ alignFileHandler = open( alignFile, "w" )\n+ alignFileHandler.write( "chr3\\t1\\t100\\tchr5\\t11\\t110\\t1e-52\\t133\\t87.2\\n" )\n+ alignFileHandler.write( "chr7\\t1\\t200\\tchr2\\t11\\t210\\t1e-78\\t235\\t98.9\\n" )\n+ alignFileHandler.close()\n+ \n+ lExp = [ 133, 235 ]\n+ lObs = AlignUtils.getScoreListFromFile( alignFile )\n+ self.assertEqual( lExp, lObs )\n+ \n+ os.remove( alignFile )\n+ \n+ \n+ def test_getScoreListFromFile_empty_file( self ):\n+ alignFile = "dummyAlignFile"\n+ '..b' iAlign2 = Align( Range("chr1",51,80), Range("TE1",161,190), 1e-20, 90.2, 30 )\n+ self.assertFalse( iAlign1.isOverlapping( iAlign2 ) )\n+ \n+ \n+ def test_mergeList( self ):\n+ iAlign1 = Align( Range("chr1",81,120), Range("TE1",91,130), 1e-20, 90.2, 30 )\n+ iAlign2 = Align( Range("chr2",51,80), Range("TE1",61,90), 1e-20, 90.2, 30 ) # different query\n+ iAlign3 = Align( Range("chr1",1,100), Range("TE1",11,110), 1e-20, 90.2, 30 ) # to be merged with 1st line\n+ iAlign4 = Align( Range("chr1",1,200), Range("TE2",11,210), 1e-20, 90.2, 30 ) # different subject\n+ iAlign5 = Align( Range("chr1",1,100), Range("TE1",501,600), 1e-20, 90.2, 30 ) # non-overlapping subject\n+ lAligns = [ iAlign1, iAlign2, iAlign3, iAlign4, iAlign5 ]\n+ \n+ iAlign6 = Align( Range("chr1",1,120), Range("TE1",11,130), 1e-20, 90.2, 30 )\n+ lExp = [ iAlign6, iAlign5, iAlign4, iAlign2 ]\n+ \n+ lObs = AlignUtils.mergeList( lAligns )\n+ \n+ self.assertEquals( lExp, lObs )\n+ \n+ \n+ def test_mergeFile_empty( self ):\n+ inFile = "dummyInFile.align"\n+ inF = open( inFile, "w" )\n+ inF.close()\n+ \n+ expFile = "dummyExpFile.align"\n+ expF = open( expFile, "w" )\n+ expF.close()\n+ \n+ obsFile = "dummyObsFile.align"\n+ AlignUtils.mergeFile( inFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+ def test_mergeFile( self ):\n+ iAlign = Align()\n+ \n+ inFile = "dummyInFile.align"\n+ inF = open( inFile, "w" )\n+ iAlign.setFromString( "chr1\\t81\\t120\\tTE1\\t91\\t130\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( inF )\n+ iAlign.setFromString( "chr2\\t51\\t80\\tTE1\\t61\\t90\\t1e-20\\t30\\t90.2\\n" ) # different query\n+ iAlign.write( inF )\n+ iAlign.setFromString( "chr1\\t1\\t100\\tTE1\\t11\\t110\\t1e-20\\t30\\t90.2\\n" ) # to be merged with 1st line\n+ iAlign.write( inF )\n+ iAlign.setFromString( "chr1\\t1\\t200\\tTE2\\t11\\t210\\t1e-20\\t30\\t90.2\\n" ) # different subject\n+ iAlign.write( inF )\n+ inF.close()\n+ \n+ expFile = "dummyExpFile.align"\n+ expF = open( expFile, "w" )\n+ iAlign.setFromString( "chr1\\t1\\t120\\tTE1\\t11\\t130\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( expF )\n+ iAlign.setFromString( "chr1\\t1\\t200\\tTE2\\t11\\t210\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( expF )\n+ iAlign.setFromString( "chr2\\t51\\t80\\tTE1\\t61\\t90\\t1e-20\\t30\\t90.2\\n" )\n+ iAlign.write( expF )\n+ expF.close()\n+ \n+ obsFile = "dummyObsFile.align"\n+ AlignUtils.mergeFile( inFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+ def test_updateScoresInFile( self ):\n+ iAlign = Align()\n+ \n+ inFile = "dummyInFile.align"\n+ inHandler = open( inFile, "w" )\n+ iAlign.setFromString( "query1\\t1\\t100\\tsubject1\\t1\\t95\\t1e-180\\t230\\t90.2\\n" )\n+ iAlign.write( inHandler )\n+ inHandler.close()\n+ \n+ expFile = "dummyExpFile.align"\n+ expHandler = open( expFile, "w" )\n+ iAlign.setFromString( "query1\\t1\\t100\\tsubject1\\t1\\t95\\t1e-180\\t%i\\t90.2\\n" % ( ( 100 - 1 + 1 ) * 90.2 / 100.0 ) )\n+ iAlign.write( expHandler )\n+ expHandler.close()\n+ \n+ obsFile = "dummyObsFile.align"\n+ AlignUtils.updateScoresInFile( inFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ inFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_ConvCoord.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_ConvCoord.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,205 @@\n+import unittest\n+import os\n+import time\n+from commons.core.coord.ConvCoord import ConvCoord\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.coord.Map import Map\n+\n+class Test_ConvCoord( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._i = ConvCoord()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._inData = "dummyInData_%s" % ( self._uniqId )\n+ self._mapData = "dummyMapData_%s" % ( self._uniqId )\n+ self._expData = "dummyExpData_%s" % ( self._uniqId )\n+ self._obsData = "dummyObsData_%s" % ( self._uniqId )\n+ self._iDb = DbFactory.createInstance()\n+ self._i._iDb = self._iDb\n+ \n+ def tearDown( self ):\n+ self._iDb.close()\n+\n+#TODO: handle duplicated matchs for path\n+# def test_convCoordsChkToChrFromFile_duplicated_matchs( self ):\n+# dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),\n+# "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }\n+# tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId \n+# self._writePathFileCoordOnChunk(tmpPathFileName)\n+# \n+# expPathFile = "dummyExpPathFile_%s" % self._uniqId\n+# self._writePathFileCoordOnChrWithOutDoublons(expPathFile)\n+# \n+# outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)\n+# \n+# obsPathFile = "dummyObsPathFile_%s" % self._uniqId\n+# self._iDb.exportDataToFile(outTableName, obsPathFile)\n+# \n+# self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n+# \n+# for f in [ expPathFile, obsPathFile, tmpPathFileName ]:\n+# os.remove( f )\n+# self._iDb.dropTable(outTableName)\n+ \n+#TODO: handle matchs out of chunk overlap ? For one side (=> path 128, remove path 152) ? For two sides (path 129, fusion with path 154) ?\n+# def test_convCoordsChkToChrFromFile_matchs_out_of_overlap( self ):\n+# dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),\n+# "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }\n+# tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId \n+# self._writePathFileCoordOnChunk_outOfOverlap(tmpPathFileName)\n+# \n+# expPathFile = "dummyExpPathFile_%s" % self._uniqId\n+# self._writePathFileCoordOnChrWithOutDoublons_outOfOverlap(expPathFile)\n+# \n+# outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)\n+# \n+# obsPathFile = "dummyObsPathFile_%s" % self._uniqId\n+# self._iDb.exportDataToFile(outTableName, obsPathFile)\n+# \n+# self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n+# \n+# for f in [ expPathFile, obsPathFile, tmpPathFileName ]:\n+# os.remove( f )\n+# self._iDb.dropTable(outTableName)\n+ \n+ def test_mergeCoordsOnChunkOverlaps( self ):\n+ dChunks2CoordMaps = { "chunk1": Map( "chunk1", "chromosome1", 1, 100 ),\n+ "chunk2": Map( "chunk2", "chromosome1", 91, 190 ),\n+ "chunk3": Map( "chunk3", "chromosome2", 1, 100 ) }\n+ tmpPathTable = "dummyTmpPathTable"\n+ linesToProcess = [\n+ "1" + "\\t" + "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.8" + "\\n", # hit within the 1st chunk\n+ "3" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "2" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" '..b'tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n+ pathFile.write("152\\tchunk2\\t3866\\t3889\\tCR1-19_HM_1p:classI:LINE\\t898\\t891\\t5e-21\\t4\\t34.98\\n")\n+ pathFile.write("153\\tchunk2\\t3951\\t4343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n+ pathFile.write("154\\tchunk2\\t3866\\t3889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+ pathFile.write("155\\tchunk2\\t3102\\t3199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n+ pathFile.close()\n+ \n+# def _writePathFileCoordOnChunk_outOfOverlap(self, pathFileName):\n+# pathFile = open( pathFileName, "w" )\n+# pathFile.write("123\\tchunk1\\t108397\\t108531\\tMariner2_AG_1p:classII:TIR\\t53\\t97\\t8e-19\\t28\\t35.56\\n")\n+# pathFile.write("123\\tchunk1\\t108545\\t109120\\tMariner2_AG_1p:classII:TIR\\t102\\t333\\t8e-19\\t87\\t27.97\\n")\n+# pathFile.write("124\\tchunk1\\t59607\\t59714\\tLINER1-2_NVi_2p:classI:?\\t502\\t537\\t3e-20\\t30\\t36.11\\n")\n+# pathFile.write("124\\tchunk1\\t59695\\t60156\\tLINER1-2_NVi_2p:classI:?\\t533\\t725\\t3e-20\\t90\\t36.79\\n")\n+# pathFile.write("125\\tchunk1\\t193027\\t193101\\tCR1-8_AG_1p:classI:LINE\\t470\\t448\\t1e-27\\t11\\t28.57\\n")\n+# pathFile.write("126\\tchunk1\\t102131\\t102178\\tTc1-1_TCa_1p:classII:TIR\\t288\\t274\\t5e-29\\t18\\t52.5\\n")\n+# pathFile.write("127\\tchunk1\\t59520\\t59606\\tNotoAg1_2p:classI:?\\t482\\t508\\t1e-13\\t14\\t30.61\\n")\n+# pathFile.write("128\\tchunk1\\t183866\\t193889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n+# pathFile.write("129\\tchunk1\\t183866\\t200000\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+# pathFile.write("150\\tchunk2\\t21176\\t21250\\tTc1-1_TCa_1p:classII:TIR\\t135\\t109\\t8e-32\\t21\\t41.57\\n")\n+# pathFile.write("151\\tchunk2\\t116603\\t116698\\tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n+# pathFile.write("152\\tchunk2\\t1\\t3889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n+# pathFile.write("153\\tchunk2\\t3951\\t4343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n+# pathFile.write("154\\tchunk2\\t1\\t13889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+# pathFile.write("155\\tchunk2\\t3102\\t3199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n+# pathFile.close()\n+# \n+# def _writePathFileCoordOnChrWithOutDoublons_outOfOverlap(self, pathFileName):\n+# file = open( pathFileName, "w" )\n+# file.write("123\\tdmel_chr4\\t868397\\t868531\\tMariner2_AG_1p:classII:TIR\\t53\\t97\\t8e-19\\t28\\t35.56\\n")\n+# file.write("123\\tdmel_chr4\\t868545\\t869120\\tMariner2_AG_1p:classII:TIR\\t102\\t333\\t8e-19\\t87\\t27.97\\n")\n+# file.write("124\\tdmel_chr4\\t819607\\t819714\\tLINER1-2_NVi_2p:classI:?\\t502\\t537\\t3e-20\\t30\\t36.11\\n")\n+# file.write("124\\tdmel_chr4\\t819695\\t820156\\tLINER1-2_NVi_2p:classI:?\\t533\\t725\\t3e-20\\t90\\t36.79\\n")\n+# file.write("125\\tdmel_chr4\\t953027\\t953101\\tCR1-8_AG_1p:classI:LINE\\t470\\t448\\t1e-27\\t11\\t28.57\\n")\n+# file.write("126\\tdmel_chr4\\t862131\\t862178\\tTc1-1_TCa_1p:classII:TIR\\t288\\t274\\t5e-29\\t18\\t52.5\\n")\n+# file.write("127\\tdmel_chr4\\t819520\\t819606\\tNotoAg1_2p:classI:?\\t482\\t508\\t1e-13\\t14\\t30.61\\n")\n+# file.write("128\\tdmel_chr4\\t943866\\t953889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n+# file.write("129\\tdmel_chr4\\t943866\\t963889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n+# file.write("150\\tdmel_chr4\\t971176\\t971250\\tTc1-1_TCa_1p:classII:TIR\\t135\\t109\\t8e-32\\t21\\t41.57\\n")\n+# file.write("151\\tdmel_chr4\\t1066603\\t1066698\\tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n+# file.write("153\\tdmel_chr4\\t953951\\t954343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n+# file.write("155\\tdmel_chr4\\t953102\\t953199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n+# file.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_F_ConvCoord.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_F_ConvCoord.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,213 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.coord.ConvCoord import ConvCoord\n+import time\n+import subprocess\n+import os\n+import unittest\n+\n+class Test_F_ConvCoord(unittest.TestCase):\n+ \n+ def setUp( self ):\n+ self._i = ConvCoord()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._inData = "dummyInData_%s" % ( self._uniqId )\n+ self._mapData = "dummyMapData_%s" % ( self._uniqId )\n+ self._expData = "dummyExpData_%s" % ( self._uniqId )\n+ self._obsData = "dummyObsData_%s" % ( self._uniqId )\n+ self._iDb = DbFactory.createInstance()\n+ self._i._iDb = self._iDb\n+ \n+ def tearDown( self ):\n+ self._iDb.close()\n+ \n+ def test_run_as_script_alignFile_query( self ):\n+ configFile = "%s/dummyConfigFile_%s" % ( os.getcwd(), self._uniqId )\n+ configF = open( configFile, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._writeMapFile( self._mapData )\n+ \n+ linesToProcess = [ "chunk1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.800000" + "\\n", # hit within the 1st chunk\n+ "chunk1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 1st chunk\n+ "chunk2" + "\\t" + "2" + "\\t" + "9" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "chunk2" + "\\t" + "51" + "\\t" + "58" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit inside the 2nd chunk\n+ "chunk2" + "\\t" + "51" + "\\t" + "70" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n" # subject on reverse strand\n+ ]\n+ FileUtils.writeLineListInFile( self._inData, linesToProcess )\n+ \n+ refLines = [ "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.800000" + "\\n",\n+ "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",\n+ "chromosome1" + "\\t" + "141" + "\\t" + "148" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",\n+ "chromosome1" + "\\t" + "141" + "\\t" + "160" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n"\n+ ]\n+ FileUtils.writeLineListInFile( self._expData, refLines )\n+ \n+ cmd = "ConvCoord.py"\n+ cmd += " -i %s" % ( self._inData )\n+ cmd += " -f %s" % ( "align" )\n+ cmd += " -c %s" % ( "q" )\n+ cmd += " -m %s" % ( self._mapData )\n+ cmd += " -o %s" % ( self._obsData )\n+ cmd += " -C %s" % ( configFile )\n+ process = subprocess.Popen(cmd, shell = True)\n+ process.communicate()\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )\n+ \n+ os.remove( self._inData )\n+ os.remove(configFile)\n+ os.remove( self._mapData )\n+ os.r'..b'"8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 1st chunk\n+ "3" + "\\t" + "chunk2" + "\\t" + "2" + "\\t" + "9" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "4" + "\\t" + "chunk2" + "\\t" + "51" + "\\t" + "58" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n", # hit inside the 2nd chunk\n+ "5" + "\\t" + "chunk2" + "\\t" + "51" + "\\t" + "70" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n" # subject on reverse strand\n+ ]\n+ FileUtils.writeLineListInFile( self._inData, linesToProcess )\n+ self._iDb.createTable( self._inData, "path", self._inData, True )\n+ os.remove( self._inData )\n+ \n+ refLines = [ "1" + "\\t" + "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.8" + "\\n",\n+ "2" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",\n+ "3" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n", # hit included within the chunk overlap, on the 2nd chunk\n+ "4" + "\\t" + "chromosome1" + "\\t" + "141" + "\\t" + "148" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",\n+ "5" + "\\t" + "chromosome1" + "\\t" + "141" + "\\t" + "160" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n"\n+ ]\n+ FileUtils.writeLineListInFile( self._expData, refLines )\n+ \n+ cmd = "ConvCoord.py"\n+ cmd += " -i %s" % ( self._inData )\n+ cmd += " -f %s" % ( "path" )\n+ cmd += " -c %s" % ( "q" )\n+ cmd += " -m %s" % ( self._mapData )\n+ cmd += " -M %s" % ( "no" )\n+ cmd += " -o %s" % ( self._obsData )\n+ process = subprocess.Popen(cmd, shell = True)\n+ process.communicate()\n+ \n+ self._iDb.exportDataToFile( self._obsData, self._obsData )\n+ self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )\n+ \n+ os.remove( self._obsData )\n+ os.remove( self._expData )\n+ self._iDb.dropTable( self._mapData )\n+ self._iDb.dropTable( self._inData )\n+ self._iDb.dropTable( self._expData )\n+ self._iDb.dropTable( self._obsData )\n+\n+ def test_run(self):\n+ inFileName = "DmelChr4_chk.align.not_over.filtered"\n+ expFileName = "%s/Tools/DmelChr4_chr.align.not_over.filtered" % os.environ["REPET_DATA"]\n+ obsFileName = "obs.align"\n+ os.symlink("%s/Tools/%s" % (os.environ["REPET_DATA"], inFileName), inFileName)\n+ iConvCoord = ConvCoord()\n+ iConvCoord.setInputData(inFileName)\n+ iConvCoord.setMapData("%s/Tools/DmelChr4_chunks.map" % os.environ["REPET_DATA"])\n+ iConvCoord.setCoordinatesToConvert("qs")\n+ iConvCoord.setMergeChunkOverlaps(False)\n+ iConvCoord.setOutputData(obsFileName)\n+ iConvCoord.run()\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ \n+ os.remove(inFileName)\n+ os.remove(obsFileName)\n+ \n+ def _writeMapFile( self, mapFile ):\n+ mapF = open( mapFile, "w" )\n+ mapF.write( "chunk1\\tchromosome1\\t1\\t100\\n" )\n+ mapF.write( "chunk2\\tchromosome1\\t91\\t190\\n" )\n+ mapF.write( "chunk3\\tchromosome2\\t1\\t100\\n" )\n+ mapF.close()\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_Map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Map.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,183 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import os
+from commons.core.coord.Map import Map
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_Map( unittest.TestCase ):
+
+    def setUp(self):
+        self._map = Map()
+
+    def test_setFromString(self):
+        line = "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"   # test with '\t' separator
+        self._map.setFromString(line)
+        self.assertEqual( self._map.name, "MbQ12Gr2Cl2" )
+        self.assertEqual( self._map.seqname, "consensus1" )
+        self.assertEqual( self._map.start, 51 )
+        self.assertEqual( self._map.end, 1230 )
+        line = "MbQ12Gr2Cl2;consensus1;51;1230"   # test with ';' separator
+        self._map.setFromString(line,";")
+        self.assertEqual( self._map.name, "MbQ12Gr2Cl2" )
+        self.assertEqual( self._map.seqname, "consensus1" )
+        self.assertEqual( self._map.start, 51 )
+        self.assertEqual( self._map.end, 1230 )
+
+    def test___eq__(self):
+        self._map.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n" )
+        o = Map()
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n" )
+        self.assertEqual( self._map, o )   # same data
+        o.setFromString( "MbQ12Gr2Cl1\tconsensus1\t51\t1230\n" )
+        self.assertNotEqual( self._map, o )   # different name
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus2\t51\t1230\n" )
+        self.assertNotEqual( self._map, o )   # different seqname
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t15\t1230\n" )
+        self.assertNotEqual( self._map, o )   # different start
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t123000\n" )
+        self.assertNotEqual( self._map, o )   # different end
+        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t1230\t51\n" )
+        self.assertNotEqual( self._map, o )   # same start/end but in different order
+
+    def test_setFromTuple(self):
+        tuple = ("MbQ12Gr2Cl2", "consensus1","51","1230")
+        self._map.setFromTuple(tuple)
+
+        expMap = Map("MbQ12Gr2Cl2", "consensus1",51,1230)
+        obsMap = self._map
+
+        self.assertEquals(expMap, obsMap)
+
+    def test_read_empty_file(self):
+
+        fileName = "dummyFile"
+        os.system("touch " + fileName)
+        fileHandle = open(fileName, "r")
+
+        obsResult = self._map.read(fileHandle)
+        expResult = 0
+
+        fileHandle.close()
+        os.remove(fileName)
+
+        self.assertEquals(expResult, obsResult)
+
+    def test_read_uncompleted_line( self):
+        uncompletedLine = "MbQ12Gr2Cl2\tconsensus1\t51"
+        fileName = "dummyFile"
+
+        fileHandle = open(fileName, "w")
+        fileHandle.write(uncompletedLine)
+        fileHandle.close()
+
+        fileHandle = open(fileName, "r")
+
+        obsResult = self._map.read(fileHandle)
+        expResult = 0
+
+        fileHandle.close()
+        os.remove(fileName)
+
+        self.assertEquals(obsResult, expResult)
+
+    def test_read(self):
+        line =  "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"
+        fileName = "dummyFile"
+
+        fileHandle = open(fileName, "w")
+        fileHandle.write(line)
+        fileHandle.close()
+
+        fileHandle = open(fileName, "r")
+        self._map.read(fileHandle)
+        obsResult = self._map
+
+        expResult = Map()
+        expResult.setFromString(line)
+
+        fileHandle.close()
+        os.remove(fileName)
+
+        self.assertEquals(obsResult, expResult)
+
+    def test_write(self):
+        line =  "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"
+        expFileName = "expFileName"
+
+        fileHandle = open(expFileName, "w")
+        fileHandle.write(line)
+        fileHandle.close()
+
+        obsFileName = "obsFileName"
+        fileHandle = open(obsFileName, "w")
+        self._map.setFromString(line)
+        self._map.write(fileHandle)
+        fileHandle.close()
+
+        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
+
+        os.remove(obsFileName)
+        os.remove(expFileName)
+
+    def test_diff1(self):
+        map1 = Map("seq1","DmelChr4", 190000, 390000)
+        map2 = Map("seq2","DmelChr4", 290000, 590000)
+
+        expMap1 = Map("seq1", "DmelChr4", 190000, 289999)
+        expReturnedMap = Map()
+
+        obsReturnedMap = map1.diff(map2)
+        obsMap1 = map1
+
+        self.assertEquals(expMap1, obsMap1)
+        self.assertEquals(expReturnedMap, obsReturnedMap)
+
+    def test_diff2(self):
+        map1 = Map("seq1","DmelChr4", 190000, 590000)
+        map2 = Map("seq2","DmelChr4", 290000, 390000)
+
+        expMap1 = Map("seq1", "DmelChr4", 190000, 289999)
+        expReturnedMap = Map("seq1", "DmelChr4", 390001, 590000)
+
+        obsReturnedMap = map1.diff(map2)
+        obsMap1 = map1
+
+        self.assertEquals(expMap1, obsMap1)
+        self.assertEquals(expReturnedMap, obsReturnedMap)
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_Map ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_MapUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_MapUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,384 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import sys\n+from commons.core.coord.MapUtils import MapUtils\n+from commons.core.coord.Map import Map\n+from commons.core.coord.Set import Set\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_MapUtils( unittest.TestCase ):\n+ \n+ def test_getMapListSortedByIncreasingMinThenMax( self ):\n+ iMap1 = Map("name1", "chr1", 1, 350)\n+ iMap2 = Map("name2", "chr1", 1, 100)\n+ iMap3 = Map("name3", "chr1", 50, 350)\n+ iMap4 = Map("name4", "chr1", 5, 450)\n+ lMaps = [ iMap1, iMap2, iMap3, iMap4 ]\n+ \n+ expLMaps = [ iMap2, iMap1, iMap4, iMap3 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_ordered( self ):\n+ iMap1 = Map("name1", "chr1", 1, 100)\n+ iMap2 = Map("name2", "chr1", 1, 350)\n+ \n+ lMaps = [ iMap1, iMap2 ]\n+ expLMaps = [ iMap1, iMap2 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_unordered( self ):\n+ iMap1 = Map("name1", "chr1", 1, 350)\n+ iMap2 = Map("name2", "chr1", 1, 100)\n+ \n+ lMaps = [ iMap1, iMap2 ]\n+ expLMaps = [ iMap2, iMap1 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_nonOverlapping( self ):\n+ iMap1 = Map("name1", "chr1", 1, 350)\n+ iMap2 = Map("name2", "chr1", 400, 600)\n+ \n+ lMaps = [ iMap2, iMap1 ]\n+ expLMaps = [ iMap1, iMap2 ]\n+ \n+ obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n+ \n+ self.assertEquals( expLMaps, obsLMaps )\n+ \n+ \n+ def test_getMapListSortedByIncreasingMinThenMax_sameMinThreeMaps( self ):\n+ iMap1 = Map("name1", "chr1", 350, 1)\n+ iMap2 = Map("name2", "chr1", 400, 1)\n+ iMap3 = Map("name3", "chr1", 500, 1)\n+ \n+ lMaps = [ iMap2, iMap1, iMap3 ]\n+ expLMaps = [ iMap1, iMap2, iMap3 ]\n+ \n+ obsLM'..b'SetFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ expSetFileHandler.write( "3\\tseq40\\tchr2\\t600\\t700\\n" )\n+ expSetFileHandler.write( "4\\tseq2\\tchr3\\t301\\t500\\n" )\n+ expSetFileHandler.close()\n+ \n+ obsFile = "dummyObsFile"\n+ \n+ MapUtils.convertMapFileIntoSetFile( mapInputFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expSetFile, obsFile ) )\n+ \n+ for f in [ expSetFile, mapInputFile, obsFile ]:\n+ os.remove( f )\n+\n+ def test_convertMapFileIntoSetFile_one_line(self):\n+ mapInputFile = "dummyExpFile"\n+ mapFileHandler = open( mapInputFile, "w" )\n+ mapFileHandler.write( "seq31\\tchr1\\t151\\t250\\n" )\n+ mapFileHandler.close()\n+\n+ expSetFile = "dummyexpSetFile"\n+ expSetFileHandler = open( expSetFile, "w" )\n+ expSetFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ expSetFileHandler.close()\n+ \n+ obsFile = "dummyObsFile"\n+ \n+ MapUtils.convertMapFileIntoSetFile( mapInputFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expSetFile, obsFile ) )\n+ \n+ for f in [ expSetFile, mapInputFile, obsFile ]:\n+ os.remove( f )\n+\n+ def test_convertMapFileIntoSetFile_empty_file(self):\n+ mapInputFile = "dummyFile.map"\n+ mapFileHandler = open( mapInputFile, "w" )\n+ mapFileHandler.close()\n+ \n+ expFile = "dummyExpFile.map.set"\n+ expFileHandler = open( expFile, "w" )\n+ expFileHandler.close()\n+ \n+ obsFile = "dummyFile.map.set"\n+ \n+ MapUtils.convertMapFileIntoSetFile( mapInputFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ expFile, mapInputFile, obsFile ]:\n+ os.remove( f )\n+ \n+ def test_writeListInFile_empty_list(self):\n+ lMaps = [ ]\n+ expFileName = "expFileName"\n+ fileHandle = open(expFileName, "w")\n+ fileHandle.close()\n+ \n+ obsFileName = "obsFileName"\n+ fileHandle = open(obsFileName, "w")\n+ MapUtils.writeListInFile(lMaps, obsFileName, "w")\n+ fileHandle.close()\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )\n+ \n+ os.remove(obsFileName)\n+ os.remove(expFileName)\n+ \n+ def test_writeListInFile_list_one_set(self):\n+ lMaps = [ Map( "map1", "map1seq", 1, 10 ) ]\n+ line = "map1\\tmap1seq\\t1\\t10\\n"\n+ \n+ expFileName = "expFileName"\n+ \n+ fileHandle = open(expFileName, "w")\n+ fileHandle.write(line)\n+ fileHandle.close()\n+ \n+ obsFileName = "obsFileName"\n+ fileHandle = open(obsFileName, "w")\n+ MapUtils.writeListInFile(lMaps, obsFileName, "w")\n+ fileHandle.close()\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )\n+ \n+ os.remove(obsFileName)\n+ os.remove(expFileName)\n+\n+ def test_getMinLengthOfMapFile(self):\n+ mapFileName = "%s/Gnome_tools/Vein_v4_scaffold_00001.fa.Nstretch.map" % os.environ["REPET_DATA"]\n+ expMinLengthofMapFile = 20\n+ iMap = MapUtils()\n+ obsMinLengthofMapFile = iMap.getMinLengthOfMapFile(mapFileName)\n+ self.assertEquals(expMinLengthofMapFile, obsMinLengthofMapFile)\n+ \n+ def test_getMaxLengthOfMapFile(self):\n+ mapFileName = "%s/Gnome_tools/Vein_v4_scaffold_00001.fa.Nstretch.map" % os.environ["REPET_DATA"]\n+ expMinLengthofMapFile = 6344\n+ iMap = MapUtils()\n+ obsMinLengthofMapFile = iMap.getMaxLengthOfMapFile(mapFileName)\n+ self.assertEquals(expMinLengthofMapFile, obsMinLengthofMapFile)\n+ \n+\n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_MapUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_Match.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Match.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,363 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+from commons.core.coord.Match import Match\n+from commons.core.coord.Path import Path\n+\n+\n+class Test_Match( unittest.TestCase ):\n+ \n+ def test_eq_match_equals( self ):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_name( self ):\n+ tuple1 = ("Name", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_start( self ):\n+ tuple1 = ("QName", 2, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_end( self ):\n+ tuple1 = ("QName", 1, 6, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_length( self ):\n+ tuple1 = ("QName", 1, 5, 6, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertNotEquals( match1, match2 )\n+ \n+ def test_eq_match_not_equals_query_length_perc( self ):\n+ tuple1 = ("QName", 1, 5, 5, 0.15, 0.2, "SName'..b'ple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match = Match()\n+ match.setFromTuple(tuple)\n+ expString = "QName\\t1\\t5\\t5\\t%f\\t%f\\tSName\\t5\\t25\\t20\\t%f\\t%g\\t15\\t%f\\t1" % (0.1,0.2,0.15,1e-20, 87.2)\n+ obsString = match.toString()\n+ self.assertEquals(expString, obsString)\n+ \n+ def test_getPathInstance( self ):\n+ tuple = ( "QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1 )\n+ match = Match()\n+ match.setFromTuple( tuple )\n+ tuple = ( 1, "QName", 1, 5, "SName", 5, 25, 1e-20, 15, 87.2 )\n+ exp = Path()\n+ exp.setFromTuple( tuple )\n+ obs = match.getPathInstance()\n+ self.assertEqual( exp, obs )\n+ \n+ def test_getQryIsIncluded(self):\n+ tuple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match = Match()\n+ match.setFromTuple(tuple)\n+ expString = "query QName (50 bp: 1-5) is contained in subject SName (133 bp: 5-25): id=87.20 - 0.100 - 0.200 - 0.150"\n+ obsString = match.getQryIsIncluded()\n+ self.assertEquals(expString, obsString)\n+ \n+ def test_isDoublonWith_Matchs_equals(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertTrue(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Matchs_unequals_on_MatchNumbers(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 86.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Matchs_unequals_on_SeqNames(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "Name", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Matchs_unequals_on_Coordinates(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("QName", 1, 6, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Reversed_Matchs_equals(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("SName", 5, 25, 20, 0.15, 0.2, "QName", 1, 5, 5, 0.1, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertTrue(match1.isDoublonWith(match2))\n+ \n+ def test_isDoublonWith_Reversed_Matchs_unequals(self):\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ match1 = Match()\n+ match1.setFromTuple(tuple1)\n+ tuple2 = ("SName", 5, 25, 20, 0.15, 0.2, "QName", 1, 6, 5, 0.1, 1e-20, 15, 87.2, 1)\n+ match2 = Match()\n+ match2.setFromTuple(tuple2)\n+ self.assertFalse(match1.isDoublonWith(match2))\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Match ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_MatchUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_MatchUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,439 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.MatchUtils import MatchUtils\n+from commons.core.coord.Match import Match\n+from commons.core.seq.BioseqDB import BioseqDB\n+\n+\n+class Test_MatchUtils( unittest.TestCase ):\n+ \n+ def test_getMatchListFromFile( self ):\n+ inFile = "dummyInFile"\n+ inFileHandler = open( inFile, "w" )\n+ inFileHandler.write( "query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n" )\n+ m1 = Match()\n+ m1.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m1.write( inFileHandler )\n+ m2 = Match()\n+ m2.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m2.write( inFileHandler )\n+ inFileHandler.close()\n+ \n+ lExp = [ m1, m2 ]\n+ \n+ lObs = MatchUtils.getMatchListFromFile( inFile )\n+ \n+ self.assertEquals( lExp, lObs )\n+ \n+ os.remove( inFile )\n+ \n+ def test_getDictOfListsWithSubjectAsKey( self ):\n+ m1 = Match()\n+ m1.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m2 = Match()\n+ m2.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ lMatch = [ m1, m2 ]\n+ \n+ dExp = { "SName1": [ m1 ], "SName2": [ m2 ] }\n+ \n+ dObs = MatchUtils.getDictOfListsWithSubjectAsKey( lMatch )\n+ \n+ self.assertEquals( dExp, dObs )\n+ \n+ def test_getDictOfListsWithQueryAsKey( self ):\n+ m1 = Match()\n+ m1.setFromTuple( ("QName1", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m2 = Match()\n+ m2.setFromTuple( ("QName2", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ m3 = Match()\n+ m3.setFromTuple( ("QName1", 1, 5, 5, 0.1, 0.2, "SName3", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n+ lMatch = [ m1, m2, m3 ]\n+ \n+ dExp = { "QName1": [ m1, m3 ], "QName2": [ m2 ] }\n+ \n+ dObs = MatchUtils.getDictOfListsWithQueryAsKey( lMatch )\n+ \n+ self.assertEquals'..b'TTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGAATAAAAAATGATTATTTG\\n")\n+ f.write("CATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGGGATTCGTTTCATTCACCG\\n")\n+ f.close()\n+\n+ def _writeMatchFile2(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t1\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.close()\n+ \n+ def _writeMatchFile3(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header3\\t1\\t120\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t67\\t1\\n")\n+ f.close()\n+ \n+ def _writeMatchFile4(self, fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header3\\t120\\t220\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100\\t1\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90\\t1\\n")\n+ f.close()\n+ \n+ def _writeExpAlignFile(self,fileName):\n+ f = open(fileName, "w")\n+ f.write("header2\\t1\\t120\\tBS31790\\t19\\t138\\t3e-68\\t238.0\\t100.0\\n")\n+ f.write("header3\\t120\\t220\\tBS31790\\t19\\t138\\t3e-65\\t238.0\\t100.0\\n")\n+ f.write("header4\\t1\\t120\\tBS31790\\t19\\t138\\t3e-67\\t244.0\\t90.0\\n")\n+ f.close()\n+ \n+ def _writeMatchFile5(self,fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header2\\t124\\t144\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n+ f.write("header3\\t120\\t220\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100\\t1\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90\\t1\\n")\n+ f.close()\n+ \n+ def _writeExpMatchFile(self,fileName):\n+ f = open(fileName, "w")\n+ f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n+ f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100.000000\\t1\\n")\n+ f.write("header2\\t124\\t144\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100.000000\\t1\\n")\n+ f.write("header3\\t120\\t220\\t120\\t0.990000\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100.000000\\t2\\n")\n+ f.write("header4\\t1\\t120\\t120\\t1.000000\\t0.941570\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90.000000\\t3\\n")\n+ f.close()\n+ \n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_MatchUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_MergedRange.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_MergedRange.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,119 @@
+import unittest
+from commons.core.coord.MergedRange import MergedRange
+from commons.core.coord.Match import Match
+
+class Test_MergedRange(unittest.TestCase):
+
+    def test_eq_True(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([1], 6, 10)
+        self.assertEquals(mr1, mr2)
+
+    def test_eq_different_list(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([1, 2], 6, 10)
+        self.assertNotEquals(mr1, mr2)
+
+    def test_eq_different_start(self):
+        mr1 = MergedRange([1], 5, 10)
+        mr2 = MergedRange([1], 6, 10)
+        self.assertNotEquals(mr1, mr2)
+
+    def test_eq_different_end(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([1], 6, 11)
+        self.assertNotEquals(mr1, mr2)
+
+    def test_isOverlapping_no( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 16, 20)
+        exp = False
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_yes( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 5, 20)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_range1_before_range2( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 8, 15)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_range1_after_range2( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 1, 8)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_isOverlapping_range1_equal_range2( self ):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 6, 10)
+        exp = True
+        obs = mr1.isOverlapping( mr2 )
+        self.assertEquals( exp, obs )
+
+    def test_merge_mr1_with_mr2(self):
+        otherMergedRange = MergedRange()
+        otherMergedRange._lId.append(3)
+        otherMergedRange._start = 1
+        otherMergedRange._end = 10
+
+        mr1 = MergedRange()
+        mr1._lId.append(1)
+        mr1._start = 6
+        mr1._end = 10
+
+        mr2 = MergedRange([2], 1, 15)
+        mr1.merge(mr2)
+
+        exp = MergedRange([1, 2], 1, 15)
+        self.assertEquals(exp, mr1)
+
+    def test_merge_mr2_with_mr1(self):
+        mr1 = MergedRange([1], 6, 10)
+        mr2 = MergedRange([2], 1, 15)
+        mr2.merge(mr1)
+        exp = MergedRange([1, 2], 1, 15)
+        self.assertEquals(exp, mr2)
+
+    def test_setFromMatch(self):
+        tuple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)
+        iMatch = Match()
+        iMatch.setFromTuple(tuple)
+
+        expMergedRange = MergedRange([1], 1, 5)
+        obsMergedRange = MergedRange()
+        obsMergedRange.setFromMatch(iMatch)
+
+        self.assertEquals(expMergedRange, obsMergedRange)
+
+    def test_getMergedRangeListFromMatchList(self):
+        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)
+        iMatch1 = Match()
+        iMatch1.setFromTuple(tuple1)
+        tuple2 = ("QName", 10, 15, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 2)
+        iMatch2 = Match()
+        iMatch2.setFromTuple(tuple2)
+        lMatch = [iMatch1, iMatch2]
+
+        explMergedRange = [MergedRange([1], 1, 5), MergedRange([2], 10, 15)]
+        obslMergedRange = MergedRange.getMergedRangeListFromMatchList(lMatch)
+
+        self.assertEquals(explMergedRange, obslMergedRange)
+
+    def test_getMergedRangeListFromMatchList_empty_list(self):
+        lMatch = []
+        explMergedRange = []
+        obslMergedRange = MergedRange.getMergedRangeListFromMatchList(lMatch)
+
+        self.assertEquals(explMergedRange, obslMergedRange)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_Path.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Path.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,146 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+from commons.core.coord.Path import Path
+from commons.core.coord.Align import Align
+from commons.core.coord.Set import Set
+
+
+class Test_Path( unittest.TestCase ):
+
+    def setUp( self ):
+        self._path = Path()
+
+    def test_setFromTuple( self ):
+        line = "1\tchr1\t1\t10\tTE2\t11\t17\t1e-20\t30\t90.2"
+        self._path.setFromTuple( line.split("\t") )
+        self.assertEqual( self._path.id, 1 )
+        self.assertEqual( self._path.range_query.seqname, "chr1" )
+        self.assertEqual( self._path.range_query.start, 1 )
+        self.assertEqual( self._path.range_query.end, 10 )
+        self.assertEqual( self._path.range_subject.seqname, "TE2" )
+        self.assertEqual( self._path.range_subject.start, 11 )
+        self.assertEqual( self._path.range_subject.end, 17 )
+        self.assertEqual( self._path.e_value, float("1e-20") )
+        self.assertEqual( self._path.score, float("30") )
+        self.assertEqual( self._path.identity, float("90.2") )
+
+    def test___eq__( self ):
+        self._path.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
+        o = Path()
+        o.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
+        self.assertEqual( self._path,  o )
+        o.setFromString( "2\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
+        self.assertNotEqual( self._path,  o )
+        o.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t3000000\t90.2\n" )
+        self.assertNotEqual( self._path,  o )
+
+    def test_canMerge( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","2", "9","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertTrue(self._path.canMerge(o))
+
+    def test_canMerge_on_same_id ( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("1", "chr1","2", "9","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_same_chr( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr2","2", "9","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_diff_subj( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","2", "9","TE3","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_queries_that_do_not_overlap( self ):
+        tuple = ("1", "chr1","5", "11","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","1", "4","TE2","10","13","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_canMerge_on_subjects_that_do_not_overlap( self ):
+        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        tuple = ("2", "chr1","2", "9","TE2","1","10","1e-20","30","90.2")
+        o = Path()
+        o.setFromTuple(tuple)
+        self.assertFalse(self._path.canMerge(o))
+
+    def test_getSubjectAsSetOfQuery( self ):
+        tuple = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        exp = Set(1,"TE2","chr1",1,10)
+        obs = self._path.getSubjectAsSetOfQuery()
+        self.assertEqual( exp, obs )
+
+    def test_getSubjectAsSetOfQuery_on_neg_strand( self ):
+        tuple = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2")
+        self._path.setFromTuple(tuple)
+        exp = Set(1,"TE2","chr1",10,1)
+        obs = self._path.getSubjectAsSetOfQuery()
+        self.assertEqual( exp, obs )
+
+    def test_toString( self ):
+        self._path.setFromString( "1\tchr1\t1\t10\tTE3\t11\t17\t1e-20\t30\t85.2\n" )
+        exp = "1\tchr1\t1\t10\tTE3\t11\t17\t%g\t30\t%f" % ( 1e-20, 85.2 )
+        obs = self._path.toString()
+        self.assertEqual( obs, exp )
+
+    def test_getAlignInstance( self ):
+        self._path.setFromTuple( ( "2", "chr3", "250", "151", "seq5", "1", "100", "1e-32", "147", "87.9" ) )
+        expAlign = Align()
+        expAlign.setFromTuple( ( "chr3", "151", "250", "seq5", "100", "1", "1e-32", "147", "87.9" ) )
+        obsAlign = self._path.getAlignInstance()
+        self.assertEqual( expAlign, obsAlign )
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_Path ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_PathUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_PathUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1667 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Set import Set\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Range import Range\n+from commons.core.coord.Align import Align\n+\n+\n+class Test_PathUtils ( unittest.TestCase ):\n+\n+ def test_getSetListFromQueries( self ):\n+ set1 = Set(1,"TE2","chr1",1,10)\n+ set2 = Set(1,"TE2","chr1",10,1)\n+ set3 = Set(1,"TE3","chr4",12,22)\n+ \n+ expList = [set1, set2, set3]\n+\n+ tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")\n+ tuple2 = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2")\n+ tuple3 = ("1","chr4","12","22","TE3","11","17","1e-20","30","90.2")\n+\n+ pathList = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] )\n+\n+ obsList = PathUtils.getSetListFromQueries( pathList )\n+\n+ self.assertEquals( expList, obsList )\n+ \n+ \n+ def test_getSetListFromQueries_on_empty_list( self ):\n+ expList = []\n+ obsList = PathUtils.getSetListFromQueries( [] )\n+\n+ self.assertEquals( expList, obsList )\n+ \n+ \n+ def test_getSetListFromQueries_on_list_size1( self ):\n+ set1 = Set(1,"TE2","chr1",1,10)\n+ \n+ expList = [set1]\n+ \n+ tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")\n+ path1 = Path()\n+ path1.setFromTuple(tuple1)\n+ \n+ pathList = [path1]\n+ obsList = PathUtils.getSetListFromQueries( pathList )\n+ \n+ self.assertEquals( expList, obsList )\n+ \n+ \n+ def test_getRangeListFromSubjects_initiallyOrdered_directStrand( self ):\n+ tuple1 = ("1","chr1","1","10","TE2","1","10","1e-20","30","90.2")\n+ tuple2 = ("1","chr1","21","30","TE2","11","20","1e-20","30","90.2")\n+ tuple3 = ("1","chr1","41","50","TE2","21","30","1e-20","30","90.2")\n+ lPaths = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] )\n+ \n+ iSet1 = Range( "TE2", 1, 10 )\n+ iSet2 = Range( "TE2", 11, 20 )\n+ iSet3 = Range( "TE2", 21, 30 )\n+ lExp = [ iSet1, iSet2, iSet3 ]\n+ \n+ lObs = PathUtils.getRangeListFromSubjects( lPaths )\n+ \n+ self.assertEquals( lExp, lObs )\n+'..b'\\t4641\\t0\\t585\\t97.3607\\n")\n+ f.write("9\\taurora-element\\t2265\\t2483\\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\\t3999\\t4218\\t0\\t361\\t96.347\\n")\n+ f.write("10\\taurora-element\\t2834\\t4045\\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\\t4800\\t6011\\t0\\t2074\\t97.0248\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t205\\t317\\t8.5e-37\\t157\\t93.75\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t305\\t417\\t8.5e-37\\t157\\t93.75\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t305\\t417\\t8.5e-37\\t157\\t93.75\\n")\n+ f.close() \n+ \n+ obsPathFile = "obsDummyPathFile"\n+ PathUtils.removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(pathFile, obsPathFile)\n+ \n+ expPathFile = "expDummyPathFile"\n+ f = open(expPathFile, "w")\n+ f.write("1\\tG4\\t1\\t3856\\tAtha5Chr4_Pals_Piler_3590_69_MAP_3\\t1\\t3856\\t0\\t7642\\t99.974100\\n")\n+ f.write("2\\trooA\\t1\\t386\\tAtha5Chr4_Pals_Piler_3589_69_MAP_3\\t1\\t386\\t6.3e-220\\t758\\t99.481900\\n")\n+ f.write("3\\trooA\\t7236\\t7621\\tAtha5Chr4_Pals_Piler_3536_69_MAP_3\\t1\\t386\\t6.3e-220\\t758\\t99.481900\\n")\n+ f.write("4\\trooA\\t387\\t7235\\tAtha5Chr4_Pals_Piler_3596_69_MAP_3\\t1\\t6849\\t0\\t13580\\t99.985400\\n")\n+ f.write("5\\taurora-element\\t4046\\t4257\\tAtha5Chr4_Pals_Piler_3540_69_MAP_3\\t1\\t204\\t6.1e-80\\t300\\t96.568600\\n")\n+ f.write("6\\taurora-element\\t274\\t381\\tAtha5Chr4_Pals_Piler_3595_23_MAP_3\\t177\\t284\\t0\\t191\\t97.222200\\n")\n+ f.write("6\\taurora-element\\t116\\t287\\tAtha5Chr4_Pals_Piler_3595_30_MAP_3\\t3\\t170\\t0\\t290\\t98.809500\\n")\n+ f.write("7\\taurora-element\\t393\\t902\\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\\t1467\\t1945\\t0\\t873\\t97.244100\\n")\n+ f.write("7\\taurora-element\\t1387\\t2271\\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\\t276\\t10780\\t0\\t1576\\t97.624400\\n")\n+ f.write("8\\taurora-element\\t2486\\t2828\\tAtha5Chr4_Pals_Piler_3595_50_MAP_3\\t4301\\t4641\\t0\\t585\\t97.360700\\n")\n+ f.write("9\\taurora-element\\t2265\\t2483\\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\\t3999\\t4218\\t0\\t361\\t96.347000\\n")\n+ f.write("10\\taurora-element\\t2834\\t4045\\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\\t4800\\t6011\\t0\\t2074\\t97.024800\\n")\n+ f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t205\\t317\\t8.5e-37\\t157\\t93.750000\\n")\n+ f.close()\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n+ \n+ os.remove(pathFile)\n+ os.remove(expPathFile)\n+ os.remove(obsPathFile)\n+ \n+ \n+ def test_getPathListWithoutDuplicatesOnQueryCoord(self):\n+ iPath1 = Path(1, Range("qry1",398,491), Range("sbj1",10,112), 0.0, 10, 98.7)\n+ iPath2 = Path(1, Range("qry1",451,492), Range("sbj1",124,169), 0.0, 10, 98.7)\n+ iPath3 = Path(1, Range("qry1",451,492), Range("sbj1",249,294), 0.0, 10, 98.7)\n+ lPaths = [iPath3, iPath2, iPath1]\n+ \n+ obslPaths = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)\n+ \n+ explPaths = [iPath1, iPath3]\n+ \n+ self.assertEquals(explPaths, obslPaths)\n+ \n+ \n+ def _makePathListFromTupleList ( self, tupleList ):\n+ pathList = []\n+ for tuple in tupleList:\n+ path = Path()\n+ path.setFromTuple(tuple)\n+ pathList.append(path)\n+ return pathList\n+ \n+ def _makePathListFromStringList (self, stringList):\n+ pathList = []\n+ for string in stringList:\n+ path = Path()\n+ path.setFromString(string)\n+ pathList.append(path)\n+ return pathList\n+ \n+ def _show (self, list):\n+ for item in list:\n+ print item.toString()\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_PathUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_Range.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Range.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,671 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+from commons.core.coord.Range import Range\n+from commons.core.coord.Range import getBin, getIdx\n+\n+\n+class Test_Range( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._range = Range()\n+ \n+ def test_setFromString(self):\n+ line = "chunk1\\t190000\\t390000"\n+ self._range.setFromString( line )\n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ line = "chunk1\\t190000\\t390000\\n"\n+ self._range.setFromString( line )\n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ line = "chunk1;190000;390000"\n+ self._range.setFromString( line, ";" )\n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ def test_setFromTuple(self):\n+ tuple = ("chunk1","190000","390000")\n+ self._range.setFromTuple( tuple)\n+ \n+ self.assertEqual( self._range.seqname, "chunk1" )\n+ self.assertEqual( self._range.start, 190000 )\n+ self.assertEqual( self._range.end, 390000 )\n+ \n+ def test___eq__(self):\n+ self._range.setFromString( "chunk1\\t190000\\t390000\\n" )\n+ o = Range()\n+ o.setFromString( "chunk1\\t190000\\t390000\\n" )\n+ self.assertEqual( self._range, o )\n+ \n+ o.setFromString( "chunk1\\t190000\\t39" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ o.setFromString( "chromosome1\\t190000\\t390000" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ o.setFromString( "chunk1\\t390000\\t190000" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ o.setFromString( "chromosome1\\t390000\\t190000" )\n+ self.assertNotEquals( self._range, o )\n+ \n+ def test_getMin(self):\n+ self._range.setFromTuple( ("chunk1", 190000, 390000) )\n+ expMin = 190000\n+ obsMin = self._range.getMin() \n+ self.assertTrue(expMin, obsMin)\n+ \n+ def test_getMax(self):\n+ self._range.setFromTuple( ("chunk1", 190000, 390000) )\n+ expMax = 390000\n+ obsMa'..b'2)\n+ obsRange1 = range1\n+ \n+ self.assertEquals(expRange1, obsRange1)\n+ self.assertEquals(expReturnedRange, obsReturnedRange)\n+ \n+ def test_getIdx(self):\n+ self.assertEqual(getIdx(1000,3),1000001)\n+ self.assertEqual(getIdx(999,3),1000000)\n+ self.assertEqual(getIdx(2000,3),1000002)\n+ self.assertEqual(getIdx(2000,4),2000000)\n+ self.assertEqual(getIdx(2000,5),3000000)\n+ self.assertEqual(getIdx(20000000,6),4000000)\n+ self.assertEqual(getIdx(20000000,5),3000200)\n+ self.assertEqual(getIdx(20000000,4),2002000)\n+ self.assertEqual(getIdx(20000000,3),1020000)\n+ \n+ def test_getBin_bin_level_9(self):\n+ tuple1 = ("chunk1", 190000000, 390000000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 100000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_8(self):\n+ tuple1 = ("chunk1", 19000000, 39000000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 100000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_7(self):\n+ tuple1 = ("chunk1", 1900000, 3900000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 10000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_6(self):\n+ tuple1 = ("chunk1", 190000, 390000)\n+ range1 =Range()\n+ range1.setFromTuple(tuple1)\n+ \n+ expRes = 1000000.0\n+ obsRes = range1.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_5(self):\n+ tuple = ("chunk1", 19000, 39000)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ expRes = 100000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_4(self):\n+ tuple = ("chunk1", 1900, 3900)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 10000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_3(self):\n+ tuple = ("chunk1", 190, 390)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 1000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_2(self):\n+ tuple = ("chunk1", 19, 39)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 1000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ def test_getBin_bin_level_1(self):\n+ tuple = ("chunk1", 1, 3)\n+ range =Range()\n+ range.setFromTuple(tuple)\n+ \n+ expRes = 1000.0\n+ obsRes = range.getBin()\n+ \n+ self.assertEquals(expRes, obsRes)\n+ \n+ \n+ def test_getBin_function(self):\n+ expBin = 2L\n+ obsBin = getBin(200, 2)\n+ \n+ self.assertEquals(expBin, obsBin)\n+ \n+ def test_findIdx(self):\n+ o = Range()\n+ o.setFromString( "chunk1\\t1000\\t2000\\n" )\n+ self.assertEqual(o.findIdx(),2000000)\n+ \n+ o.setFromString( "chunk1\\t2000\\t1000\\n" ) \n+ self.assertEqual(o.findIdx(),2000000)\n+ \n+ o.setFromString( "chunk1\\t200\\t999\\n" ) \n+ self.assertEqual(o.findIdx(),1000000)\n+ \n+ o.setFromString( "chunk1\\t1\\t20000000\\n" ) \n+ self.assertEqual(o.findIdx(),4000000)\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Range ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_Set.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_Set.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,282 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+from commons.core.coord.Set import Set\n+from commons.core.coord.Map import Map\n+\n+\n+class Test_Set( unittest.TestCase ):\n+ \n+ def test__eq__sets_equals(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 1, "set1", "seq1", 1 ,2 ) \n+ self.assertEquals( set1, set2 )\n+ \n+ def test__eq__sets_not_equals_ids(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 2, "set1", "seq1", 1 ,2 ) \n+ self.assertNotEquals( set1, set2 )\n+ \n+ def test__eq__sets_not_equals_name(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 1, "set2", "seq1", 1 ,2 ) \n+ self.assertNotEquals( set1, set2 )\n+ \n+ def test__eq__on_empty_set(self):\n+ set1 = Set()\n+ set2 = Set()\n+ self.assertEquals( set1, set2 )\n+ \n+ def test_setFromTuple_equals_instances(self):\n+ tuple = ( 1, "set1", "seq1", 1, 2 )\n+ obsSet = Set()\n+ obsSet.setFromTuple(tuple)\n+ expSet = Set( 1, "set1", "seq1", 1, 2 )\n+ self.assertEquals( expSet, obsSet )\n+ \n+ def test_setFromTuple_not_equals_instances(self):\n+ tuple = ( 1, "set1", "seq1", 1, 2 )\n+ obsSet = Set()\n+ obsSet.setFromTuple(tuple)\n+ expSet = Set( 2, "set1", "seq1", 1, 2 )\n+ self.assertNotEquals( expSet, obsSet )\n+ \n+ def test_read_empty_line_file(self):\n+ fileName = "dummyFile"\n+ \n+ os.system(" touch " + fileName)\n+ \n+ fileHandler = open(fileName, "r")\n+ \n+ obsSet = Set()\n+ \n+ obsRes = obsSet.read( fileHandler )\n+ expRes = 0\n+ \n+ fileHandler.close()\n+ os.remove(fileName)\n+ \n+ self.assertEquals( expRes, obsRes )\n+\n+ def test_read_one_line_file(self):\n+ line = ( "1\\tset1\\tseq1\\t1\\t2" )\n+ fileName = "dummyFile"\n+ \n+ fileHandler = open( fileName, "w" )\n+ fileHandler.write( line )\n+ fileHandler.close()\n+ \n+ fileHandler = open( fileName, "r" )\n+ \n+ tuple = line.split("\\t")\n+ expSet = Set()\n+ expSet.setFromTuple(tuple)\n+ \n+ obsSet = Set()\n+ \n+ expRes = 1\n+ obsRes = obsSet.read(fileHandler)\n+ \n+ fileHandler.close()\n+ os.remove(fileName)\n+ '..b'et\n+ \n+ self.assertEquals( expSet, obsSet)\n+ \n+ def test_merge_first_id_smaller_than_second_id(self):\n+ firstSet = Set( 1, "set1", "seq1", 10, 40 )\n+ secondSet = Set( 2, "set2", "seq1", 20, 60 )\n+ \n+ firstSet.merge( secondSet )\n+ \n+ expSet = Set( 1, "set1", "seq1", 10, 60)\n+ obsSet = firstSet\n+ \n+ self.assertEquals( expSet, obsSet)\n+\n+ def test_merge_first_id_equals_second_id(self):\n+ firstSet = Set( 1, "set1", "seq1", 10, 40 )\n+ secondSet = Set( 1, "set2", "seq1", 20, 60 )\n+ \n+ firstSet.merge( secondSet )\n+ \n+ expSet = Set( 1, "set1", "seq1", 10, 60)\n+ obsSet = firstSet\n+ \n+ self.assertEquals( expSet, obsSet)\n+ \n+ def test_merge_different_seqnames(self):\n+ firstSet = Set( 2, "set1", "seq1", 10, 40 )\n+ secondSet = Set( 1, "set1", "seq2", 20, 60 )\n+ expSet = Set( 2, "set1", "seq1", 10, 40 )\n+ firstSet.merge( secondSet )\n+ obsSet = firstSet\n+ self.assertEquals( expSet, obsSet )\n+ \n+ def test_diff_on_empty_sets(self):\n+ firstSet = Set()\n+ firstSet.seqname = "seq1"\n+ secondSet = Set()\n+ secondSet.seqname = "seq2"\n+ \n+ obsSet = firstSet.diff( secondSet )\n+ expSet = Set()\n+ \n+ self.assertEquals( expSet, obsSet )\n+ \n+ def test_diff(self):\n+ firstSet = Set( 2, "set1", "seq1", 10, 80 )\n+ secondSet = Set( 1, "set2", "seq1", 20, 60 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 10, 19 )\n+ expSet2 = Set( 2, "set1", "seq1", 61, 80 ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_reverse(self):\n+ firstSet = Set( 2, "set1", "seq1", 20, 60 )\n+ secondSet = Set( 1, "set2", "seq1", 10, 80 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 0, 0 )\n+ expSet2 = Set( ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_list1_overlap_end_list2(self):\n+ firstSet = Set( 2, "set1", "seq1", 20, 100 )\n+ secondSet = Set( 1, "set2", "seq1", 10, 80 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 81, 100 ) \n+ expSet2 = Set( ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_with_empty_set1(self):\n+ set2 = Set( 1, "set1", "seq1", 2, 45 )\n+ set1 = Set( )\n+ \n+ expSet1 = Set( )\n+ expSet2 = Set( )\n+ \n+ obsSet2 = set1.diff( set2 )\n+ obsSet1 = set1\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_diff_list2_overlap_end_list1(self):\n+ firstSet = Set( 2, "set1", "seq1", 10, 70 )\n+ secondSet = Set( 1, "set2", "seq1", 40, 100 )\n+ \n+ expSet1 = Set( 2, "set1", "seq1", 10, 39 )\n+ expSet2 = Set( ) \n+ \n+ obsSet2 = firstSet.diff( secondSet )\n+ obsSet1 = firstSet\n+ \n+ self.assertEquals( expSet1, obsSet1 ) \n+ self.assertEquals( expSet2, obsSet2 )\n+ \n+ def test_set2map(self):\n+ set = Set( 1, "set", "seq", 1, 2 )\n+ \n+ expMap = Map( "set::1", "seq", 1, 2 )\n+ obsMap = set.set2map()\n+ \n+ self.assertEquals( expMap, obsMap )\n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Set ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_SetUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_SetUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1689 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.coord.Set import Set\n+from commons.core.coord.Map import Map\n+from commons.core.coord.SetUtils import SetUtils\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_SetUtils( unittest.TestCase ):\n+ \n+ def test_changeIdInList_on_empty_list(self):\n+ lSets = []\n+ SetUtils.changeIdInList( lSets , 1 )\n+ obsLSets = lSets\n+ expLSets = []\n+ self.assertEquals( expLSets , obsLSets )\n+ \n+ def test_changeIdInList_on_list_size_one(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ lSets = [ set1 ]\n+ SetUtils.changeIdInList( lSets , 9 )\n+ obsLSets = lSets\n+ set1 = Set( 9, "set1", "seq1", 1, 2 )\n+ expLSets = [ set1 ]\n+ self.assertEquals( expLSets , obsLSets )\n+ \n+ def test_changeIdInList(self):\n+ set1 = Set( 1, "set1", "seq1", 1, 2 )\n+ set2 = Set( 2, "set2", "seq2", 2, 3 )\n+ lSets = [ set1, set2 ]\n+ SetUtils.changeIdInList( lSets , 9 )\n+ obsLSets = lSets\n+ set1 = Set( 9, "set1", "seq1", 1, 2 )\n+ set2 = Set( 9, "set2", "seq2", 2, 3 )\n+ expLSets = [ set1, set2 ]\n+ \n+ self.assertEquals( expLSets , obsLSets )\n+ \n+ def test_getOverlapLengthBetweenLists_all_list_are_empty (self):\n+ lSets1 = []\n+ lSets2 = []\n+ \n+ expOverlapSize = 0\n+ obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n+ \n+ self.assertEquals( expOverlapSize, obsOverlapSize )\n+ \n+ def test_getOverlapLengthBetweenLists_list1_empty_list2_size_one (self):\n+ lSets1 = []\n+ lSets2 = [ Set( 9, "set1", "seq1", 1, 2 ) ]\n+ \n+ expOverlapSize = 0\n+ obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n+ \n+ self.assertEquals( expOverlapSize, obsOverlapSize )\n+ \n+ def test_getOverlapLengthBetweenLists_list1_empty_list2_size_two (self):\n+ lSets1 = []\n+ lSets2 = [ Set( 9, "set1", "seq1", 1, 2 ), Set( 9, "set2", "seq2", 2, 3 ) ]\n+ \n+ expOverlapSize = 0\n+ obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n+ \n+ self.assertEquals( expOverlapSize, obsOverlapSize )\n+ \n+ def test_getOverlapLengthBetweenLists_list1_si'..b'\n+ obsLSet = SetUtils.getSetListFromFile(file)\n+ os.remove(file)\n+ self.assertEqual( expLSet, obsLSet )\n+ \n+ \n+ def test_convertSetFileIntoMapFile( self ):\n+ setFile = "dummySetFile"\n+ setFileHandler = open( setFile, "w" )\n+ setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ setFileHandler.close()\n+ \n+ expFile = "dummyExpFile"\n+ expFileHandler = open( expFile, "w" )\n+ expFileHandler.write( "seq31\\tchr1\\t151\\t250\\n" )\n+ expFileHandler.write( "seq27\\tchr2\\t301\\t500\\n" )\n+ expFileHandler.close()\n+ \n+ obsFile = "dummyObsFile"\n+ \n+ SetUtils.convertSetFileIntoMapFile( setFile, obsFile )\n+ \n+ self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n+ \n+ for f in [ setFile, expFile, obsFile ]:\n+ os.remove( f )\n+ \n+ \n+ def test_getDictOfListsWithSeqnameAsKey_empty( self ):\n+ lSets = []\n+ dExp = {}\n+ dObs = SetUtils.getDictOfListsWithSeqnameAsKey( lSets )\n+ self.assertEquals( dExp, dObs )\n+ \n+ \n+ def test_getDictOfListsWithSeqnameAsKey( self ):\n+ lSets = [ Set( 1, "TE3", "chr2", 10, 50 ),\n+ Set( 2, "gene74", "chr1", 31, 800 ),\n+ Set( 3, "TE1", "chr1", 1, 30 ) ]\n+ dExp = { "chr1": [ Set( 2, "gene74", "chr1", 31, 800 ),\n+ Set( 3, "TE1", "chr1", 1, 30 ) ],\n+ "chr2": [ Set( 1, "TE3", "chr2", 10, 50 ) ] }\n+ dObs = SetUtils.getDictOfListsWithSeqnameAsKey( lSets )\n+ self.assertEquals( dExp, dObs )\n+ \n+ \n+ def test_filterOnLength( self ):\n+ lSets = [ Set( 1, "TE3", "chr2", 10, 50 ),\n+ Set( 2, "gene74", "chr1", 31, 800 ),\n+ Set( 3, "TE1", "chr1", 1, 30 ) ]\n+ lExp = [ Set( 2, "gene74", "chr1", 31, 800 ) ]\n+ lObs = SetUtils.filterOnLength( lSets, 100 )\n+ self.assertEqual( lExp, lObs )\n+ \n+ \n+ def test_getListOfNames( self ):\n+ setFile = "dummySetFile"\n+ setFileHandler = open( setFile, "w" )\n+ setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ setFileHandler.close()\n+ \n+ lExp = [ "seq31", "seq27" ]\n+ lObs = SetUtils.getListOfNames( setFile )\n+ \n+ self.assertEquals( lExp, lObs )\n+ \n+ os.remove( setFile )\n+ \n+ \n+ def test_getDictOfDictsWithNamesThenIdAsKeyFromFile( self ):\n+ setFile = "dummySetFile"\n+ setFileHandler = open( setFile, "w" )\n+ setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n+ setFileHandler.write( "3\\tseq27\\tchr3\\t1\\t100\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n+ setFileHandler.write( "2\\tseq27\\tchr2\\t601\\t650\\n" )\n+ setFileHandler.close()\n+ \n+ dExp = { "seq31": { 1: [ Set( 1, "seq31", "chr1", 151, 250 ) ] },\n+ "seq27": { 2: [ Set( 2, "seq27", "chr2", 301, 500 ),\n+ Set( 2, "seq27", "chr2", 601, 650 ) ],\n+ 3: [ Set( 3, "seq27", "chr3", 1, 100 ) ]\n+ }\n+ }\n+ dObs = SetUtils.getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile )\n+ \n+ self.assertEquals( dExp, dObs )\n+ \n+ os.remove( setFile )\n+ \n+ \n+ def _makeSetListFromTupleList (self, tupleList):\n+ setList = []\n+ for tuple in tupleList:\n+ set = Set()\n+ set.setFromTuple(tuple)\n+ setList.append(set)\n+ return setList\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_SetUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/coord/test/Test_SlidingWindow.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/coord/test/Test_SlidingWindow.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,109 @@
+import unittest
+from commons.core.coord.SlidingWindow import SlidingWindow
+from commons.core.coord.SlidingWindow import SlidingWindowToCountMatchingBases
+from commons.core.coord.Set import Set
+
+class Test_SlidingWindow( unittest.TestCase ):
+
+    def test_slideWindowOnce( self ):
+        expStart = 91
+        expEnd = 190
+        self.sw = SlidingWindow(100, 10)
+        self.sw.slideWindowOnce()
+        obsStart = self.sw._start
+        obsEnd = self.sw._end
+
+        self.assertEqual(expStart, obsStart)
+        self.assertEqual(expEnd, obsEnd)
+
+    def test_slideWindowOnceFourTime( self ):
+        expStart = 201
+        expEnd = 300
+        self.sw = SlidingWindow(100, 50)
+        i = 0
+        for i in range(4):
+            self.sw.slideWindowOnce()
+            i += 1
+        obsStart = self.sw._start
+        obsEnd = self.sw._end
+
+        self.assertEqual(expStart, obsStart)
+        self.assertEqual(expEnd, obsEnd)
+
+
+class Test_SlidingWindowToCountMatchingBases(unittest.TestCase):
+
+    def test_getSetLengthOnWindow_featureIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 1)
+        iSet = Set( 1, "TE3", "chr1", 21, 30 )
+        exp = 10
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getSetLengthOnWindow_windowIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 530 )
+        exp = 100
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getSetLengthOnWindow_featureOverlapLeft( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 130 )
+        exp = 40
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getSetLengthOnWindow_featureOverlapRight( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 121, 230 )
+        exp = 70
+        obs = self.sw.getSetLengthOnWindow( iSet)
+        self.assertEqual( exp, obs )
+
+    def test_getCoordSetOnWindow_featureIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 1)
+        iSet = Set( 1, "TE3", "chr1", 21, 30 )
+        expStart = 21
+        expEnd = 30
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+    def test_getCoordSetOnWindow_windowIncluded( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 530 )
+        expStart = 91
+        expEnd = 190
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+    def test_getCoordSetOnWindow_featureOverlapLeft( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 21, 130 )
+        expStart = 91
+        expEnd = 130
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+    def test_getCoordSetOnWindow_featureOverlapRight( self ):
+        self.sw = SlidingWindowToCountMatchingBases(100, 10)
+        self.sw.slideWindowOnce()
+        iSet = Set( 1, "TE3", "chr1", 121, 230 )
+        expStart = 121
+        expEnd = 190
+        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
+        self.assertEqual( expStart, obsStart )
+        self.assertEqual( expEnd, obsEnd )
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_SlidingWindow ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/JobScriptTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/JobScriptTemplate.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '@@tmpDir@@'"
+ sys.stdout.flush()
+ if not os.path.exists( "@@tmpDir@@" ):
+ raise IOError("ERROR: temporary directory '@@tmpDir@@' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("@@tmpDir@@")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of input file in '@@tmpDir@@'" % minFreeGigaInTmpDir)
+
+ os.chdir("@@tmpDir@@")
+ newDir = "@@groupId@@_@@jobName@@_@@time@@"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ @@cmdStart@@
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ @@cmdFinish@@
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : @@tmpDir@@"
+ print "cDir is : @@cDir@@"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("@@cDir@@/%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "@@cDir@@/%s" % newDir)
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/JobScriptTemplateLight.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/JobScriptTemplateLight.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '@@tmpDir@@'"
+ sys.stdout.flush()
+ if not os.path.exists( "@@tmpDir@@" ):
+ raise IOError("ERROR: temporary directory '@@tmpDir@@' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("@@tmpDir@@")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of input file in '@@tmpDir@@'" % minFreeGigaInTmpDir)
+
+ os.chdir("@@tmpDir@@")
+ newDir = "@@groupId@@_@@jobName@@_@@time@@"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ @@cmdStart@@
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ @@cmdFinish@@
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ sys.stdout.flush()
+ sys.exit(1)
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/JobScriptWithFilesCopyTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/JobScriptWithFilesCopyTemplate.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '@@tmpDir@@'"
+ sys.stdout.flush()
+ if not os.path.exists("@@tmpDir@@"):
+ raise IOError("ERROR: temporary directory '@@tmpDir@@' doesn't exist")
+
+ fileSize = 0
+ if not os.path.exists("@@groupId@@"):
+ @@cmdSize@@
+ freeGigaNeededInTmpDir = float(1 + fileSize)
+ freeSpace = os.statvfs("@@tmpDir@@")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < freeGigaNeededInTmpDir):
+ raise RepetException("ERROR: less than %.2fG of input file in '@@tmpDir@@'" % freeGigaNeededInTmpDir)
+
+ os.chdir("@@tmpDir@@")
+ if not os.path.exists("@@groupId@@"):
+ try:
+ os.mkdir("@@groupId@@")
+ except OSError, e :
+ if e.args[0] != 17:
+ raise RepetException("ERROR: can't create '@@groupId@@'")
+ os.chdir("@@groupId@@")
+ @@cmdCopy@@
+ else:
+ os.chdir("@@groupId@@")
+
+ newDir = "@@groupId@@_@@jobName@@_@@time@@"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ @@cmdStart@@
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ @@cmdFinish@@
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : @@tmpDir@@"
+ print "cDir is : @@cDir@@"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("@@cDir@@/%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "@@cDir@@/%s" % newDir)
+ iJob = Job(jobname = "@@jobName@@", groupid = "@@groupId@@", launcherFile = "@@launcher@@", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "@@jobTableName@@")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/Launcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/Launcher.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,229 @@\n+from commons.tools.CleanClusterNodesAfterRepet import CleanClusterNodesAfterRepet\n+from commons.core.stat.Stat import Stat\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.sql.Job import Job\n+import stat\n+import os\n+import re\n+import sys\n+import time\n+import glob\n+\n+class Launcher(object):\n+\n+ #TODO: remove unused parameters : query="", subject="", param="", job_table=""\n+ def __init__( self, jobdb, query="", subject="", param="", cdir="",\n+ tmpdir="", job_table="", queue="", groupid="", acro="X",\n+ chooseTemplateWithCopy = False, chooseTemplateLight = False):\n+ if jobdb.__class__.__name__ == "RepetJob":\n+ self.jobdb = TableJobAdaptatorFactory.createInstance(jobdb, "jobs")\n+ else:\n+ self.jobdb = jobdb\n+ self.jobdb.checkJobTable()\n+ if cdir == "":\n+ cdir = os.getcwd()\n+ self.cdir = cdir\n+ self.tmpdir = tmpdir\n+ self.groupid = groupid\n+ self.acronyme = acro\n+ self._chooseTemplateWithCopy = chooseTemplateWithCopy\n+ self._chooseTemplateLight = chooseTemplateLight\n+ self.queue, self.lResources = self.getQueueNameAndResources(queue)\n+ self._createJobInstance()\n+ self._nbJobs = 0\n+ \n+ def getQueueNameAndResources(self, configQueue):\n+ tokens = configQueue.replace("\'","").split(" ")\n+ queueName = ""\n+ lResources = []\n+ if tokens[0] != "":\n+ if re.match(".*\\.q", tokens[0]):\n+ queueName = tokens[0]\n+ lResources = tokens[1:]\n+ else:\n+ lResources = tokens\n+ return queueName, lResources\n+\n+ def createGroupidIfItNotExist(self):\n+ if self.groupid == "":\n+ self.job.groupid = str(os.getpid())\n+ else:\n+ self.job.groupid = self.groupid\n+\n+ def beginRun( self ):\n+ self.createGroupidIfItNotExist()\n+ if self.jobdb.hasUnfinishedJob(self.job.groupid):\n+ self.jobdb.waitJobGroup(self.job.groupid)\n+ else:\n+ self.jobdb.cleanJobGroup(self.job.groupid)\n+\n+ ## Launch one job in parallel\n+ #\n+ # @param cmdStart string command-line for the job to be launched\n+ # @param cmdFinish string command to retrieve result files\n+ # @warning the jobname has to be defined outside from this method\n+ #\n+ def runSingleJob(self, cmdStart, cmdFinish = "", cmdSize = "", cmdCopy = ""):\n+ if self._nbJobs == 0:\n+ self._nbJobs = 1\n+ pid = str(os.getpid())\n+ now = time.localtime()\n+ #TODO: rename ClusterLauncher_ ...\n+ pyFileName = self.cdir + "/ClusterLauncher_" + self.job.groupid + "_" +\\\n+ self.job.jobname + "_" + str(now[0]) + "-" + str(now[1]) +\\\n+ "-" + str(now[2]) + "_" + pid + ".py"\n+ self.job.launcher = pyFileName\n+ \n+ #TODO: to remove when refactoring is done\n+ cmdStart = self._indentCmd(cmdStart)\n+ cmdFinish = self._indentCmd(cmdFinish)\n+ \n+ iWriteScript = WriteScript(self.job, self.jobdb, self.cdir, self.tmpdir, self._chooseTemplateWithCopy, self._chooseTemplateLight)\n+ iWriteScript.run(cmdStart, cmdFinish, pyFileName, cmdSize, cmdCopy)\n+ os.chmod(pyFileName, stat.S_IRWXU+stat.S_IRGRP+stat.S_IXGRP+stat.S_IROTH+stat.S_IXOTH)\n+ sys.stdout.flush()\n+ log = self.jobdb.submitJob(self.job)\n+ if log != 0:\n+ print "ERROR while submitting job to the cluster"\n+ sys.exit(1)\n+ \n+ def endRun(self, cleanNodes = False):\n+ string = "waiting for %i job(s) with groupid \'%s\' (%s)" % (self._nbJobs, self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S"))\n+ print string; sys.stdout.flush()\n+ self.jobdb.waitJobGroup(self.job.groupid)\n+ if self._nbJobs > 1:\n+ '..b'()\n+ return stat \n+\n+ def clean( self, acronyme = "", stdout = True, stderr = True ):\n+ lFileToRemove = []\n+ if acronyme == "":\n+ acronyme = self.acronyme \n+ pattern = "ClusterLauncher*%s*.py" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern ))\n+ if stdout:\n+ pattern = "%s*.o*" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern )) \n+ if stderr:\n+ pattern = "%s*.e*" % ( acronyme )\n+ lFileToRemove.extend(glob.glob( pattern )) \n+ for file in lFileToRemove:\n+ os.remove(file)\n+ \n+ #TODO: handle of nodesMustBeCleaned => class attribute ?\n+ def runLauncherForMultipleJobs(self, acronymPrefix, lCmdsTuples, cleanMustBeDone = True, nodesMustBeCleaned = False):\n+ self.beginRun()\n+ print "submitting job(s) with groupid \'%s\' (%s)" % (self.job.groupid, time.strftime("%Y-%m-%d %H:%M:%S"))\n+ for cmdsTuple in lCmdsTuples:\n+ self._nbJobs += 1\n+ self.acronyme = "%s_%s" % (acronymPrefix, self._nbJobs)\n+ self.job.jobname = self.acronyme\n+ if len(cmdsTuple) == 2:\n+ self.runSingleJob(cmdsTuple[0], cmdsTuple[1])\n+ else:\n+ self.runSingleJob(cmdsTuple[0], cmdsTuple[1], cmdsTuple[2], cmdsTuple[3])\n+ self._createJobInstance()\n+ self.createGroupidIfItNotExist()\n+ self.acronyme = acronymPrefix\n+ self.endRun(nodesMustBeCleaned)\n+ if cleanMustBeDone:\n+ self.clean("%s_" % acronymPrefix)\n+ self.jobdb.close()\n+\n+ def prepareCommands(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []):\n+ cmdStart = ""\n+ for cmd in lCmdStart:\n+ cmdStart += "%s\\n\\t" % cmd\n+ for cmd in lCmds:\n+ cmdStart += "%s\\n\\t" % cmd\n+ cmdFinish = ""\n+ for cmd in lCmdFinish:\n+ cmdFinish += "%s\\n\\t" % cmd\n+ cmdSize = ""\n+ for cmd in lCmdSize:\n+ cmdSize += "%s\\n\\t\\t" % cmd\n+ cmdCopy = ""\n+ for cmd in lCmdCopy:\n+ cmdCopy += "%s\\n\\t\\t" % cmd\n+ return (cmdStart, cmdFinish, cmdSize, cmdCopy)\n+\n+ #TODO: to remove when refactoring is done\n+ def prepareCommands_withoutIndentation(self, lCmds, lCmdStart = [], lCmdFinish = [], lCmdSize = [], lCmdCopy = []):\n+ cmdStart = ""\n+ for cmd in lCmdStart:\n+ cmdStart += "%s\\n" % cmd\n+ for cmd in lCmds:\n+ cmdStart += "%s\\n" % cmd\n+ cmdFinish = ""\n+ for cmd in lCmdFinish:\n+ cmdFinish += "%s\\n" % cmd\n+ cmdSize = ""\n+ for cmd in lCmdSize:\n+ cmdSize += "%s\\n\\t\\t" % cmd\n+ cmdCopy = ""\n+ for cmd in lCmdCopy:\n+ cmdCopy += "%s\\n\\t\\t" % cmd\n+ return (cmdStart, cmdFinish, cmdSize, cmdCopy)\n+ \n+ def getSystemCommand(self, prg, lArgs):\n+ systemCmd = "log = os.system(\\"" + prg \n+ for arg in lArgs:\n+ systemCmd += " " + arg\n+ systemCmd += "\\")"\n+ return systemCmd\n+\n+ def cleanNodes(self):\n+ iCleanClusterNodeAfterRepet = CleanClusterNodesAfterRepet()\n+ iCleanClusterNodeAfterRepet.setLNodes(self.jobdb.getNodesListByGroupId(self.groupid))\n+ iCleanClusterNodeAfterRepet.setTempDirectory(self.tmpdir)\n+ iCleanClusterNodeAfterRepet.setPattern("%s*" % self.groupid)\n+ iCleanClusterNodeAfterRepet.run()\n+\n+ #TODO: to remove when refactoring is done\n+ def _indentCmd(self, cmd):\n+ lCmd = cmd.split("\\n")\n+ cmd_Tab = "%s\\n" % lCmd[0]\n+ for line in lCmd[1:-1]:\n+ cmd_Tab += "\\t%s\\n" % line\n+ return cmd_Tab\n+ \n+ def _createJobInstance(self):\n+ if self.lResources == []:\n+ #To have mem_free=1G:\n+ self.job = Job(queue=self.queue)\n+ else:\n+ self.job = Job(queue=self.queue, lResources=self.lResources)\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/LauncherUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/LauncherUtils.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,31 @@
+class LauncherUtils(object):
+
+    @staticmethod
+    def createHomogeneousSizeList(lStringSizeTuples, maxSize):
+        lStringSizeTuplesSorted = sorted(lStringSizeTuples, key=lambda stringSizeTuple:(stringSizeTuple[1], stringSizeTuple[0]), reverse = True)
+        lStringSizeList = []
+        lStringSize = []
+        sumTupleSize = 0
+        iteratorFromBegin = 0
+        iteratorFromEnd = len(lStringSizeTuplesSorted) - 1
+        for tuple in lStringSizeTuplesSorted:
+            if sumTupleSize + tuple[1] < maxSize:
+                lStringSize.append(tuple[0])
+                sumTupleSize += tuple[1]
+            elif tuple[1] >= maxSize:
+                lStringSizeList.append([tuple[0]])
+            else:
+                tupleFromEnd = lStringSizeTuplesSorted[iteratorFromEnd]
+                while sumTupleSize + tupleFromEnd[1] < maxSize and iteratorFromBegin < iteratorFromEnd:
+                    lStringSize.append(tupleFromEnd[0])
+                    sumTupleSize += tupleFromEnd[1]
+                    del lStringSizeTuplesSorted[iteratorFromEnd]
+                    iteratorFromEnd -= 1
+                    tupleFromEnd = lStringSizeTuplesSorted[iteratorFromEnd]
+                lStringSizeList.append(lStringSize)
+                lStringSize = [tuple[0]]
+                sumTupleSize = tuple[1]
+            iteratorFromBegin += 1
+        if lStringSize:
+            lStringSizeList.append(lStringSize)
+        return lStringSizeList
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/WriteScript.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/WriteScript.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,76 @@
+import os
+import time
+
+class WriteScript(object):
+
+    def __init__(self, job = None, jobdb = None, cdir = "", tmpdir = "", chooseTemplateWithCopy = False, chooseTemplateLight = False):
+        self._iJob = job
+        self._iJobdb = jobdb
+        self._cDir = cdir
+        self._tmpDir = tmpdir
+        self._chooseTemplateWithCopy = chooseTemplateWithCopy
+        self._chooseTemplateLight = chooseTemplateLight
+
+    def run(self, cmdStart, cmdFinish, pyFileName, cmdSize = "", cmdCopy = ""):
+        if self._chooseTemplateLight:
+            d = self.createJobScriptLightDict(cmdStart, cmdFinish, cmdSize, cmdCopy)
+        else:
+            d = self.createJobScriptDict(cmdStart, cmdFinish, cmdSize, cmdCopy)
+        self.fillTemplate(pyFileName, d)
+
+    def fillTemplate(self, outputFileName, dict):
+        if self._chooseTemplateWithCopy:
+            inputFileName = "%s/commons/core/launcher/JobScriptWithFilesCopyTemplate.py" % os.environ["REPET_PATH"]
+        else:
+            inputFileName = "%s/commons/core/launcher/JobScriptTemplate.py" % os.environ["REPET_PATH"]
+
+        if self._chooseTemplateLight:
+            inputFileName = "%s/commons/core/launcher/JobScriptTemplateLight.py" % os.environ["REPET_PATH"]
+
+        input = open(inputFileName, "r")
+        data = input.read()
+        input.close()
+        for key, value in dict.items():
+            data = data.replace("@@%s@@" % key, value)
+        output = open(outputFileName, "w")
+        output.write(data)
+        output.close()
+
+    def createJobScriptDict(self, cmdStart, cmdFinish, cmdSize, cmdCopy):
+        dict = {
+         "tmpDir" : self._tmpDir,
+         "jobTableName" : self._iJobdb._table,
+         "groupId" : self._iJob.groupid,
+         "jobName" : self._iJob.jobname,
+         "launcher" : self._iJob.launcher,
+         "time" : time.strftime("%Y%m%d-%H%M%S"),
+         "repet_path" : os.environ["REPET_PATH"],
+         "repet_host" : os.environ["REPET_HOST"],
+         "repet_user" : os.environ["REPET_USER"],
+         "repet_pw" : os.environ["REPET_PW"],
+         "repet_db" : os.environ["REPET_DB"],
+         "repet_port" : os.environ["REPET_PORT"],
+         "cmdStart" : cmdStart,
+         "cmdFinish" : cmdFinish,
+         "cDir" : self._cDir,
+         "cmdSize" : cmdSize,
+         "cmdCopy" : cmdCopy
+            }
+        return dict
+
+    def createJobScriptLightDict(self, cmdStart, cmdFinish, cmdSize, cmdCopy):
+        dict = {
+         "tmpDir" : self._tmpDir,
+         "jobTableName" : self._iJobdb._table,
+         "groupId" : self._iJob.groupid,
+         "jobName" : self._iJob.jobname,
+         "launcher" : self._iJob.launcher,
+         "time" : time.strftime("%Y%m%d-%H%M%S"),
+         "repet_path" : os.environ["REPET_PATH"],
+         "cmdStart" : cmdStart,
+         "cmdFinish" : cmdFinish,
+         "cDir" : self._cDir,
+         "cmdSize" : cmdSize,
+         "cmdCopy" : cmdCopy
+            }
+        return dict

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/Test_Launcher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/Test_Launcher.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,333 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.launcher.Launcher import Launcher\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.stat.Stat import Stat\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.Job import Job\n+import unittest\n+import os\n+import shutil\n+import time\n+import stat\n+\n+#TODO: Test_F_Launcher.py : to execute prepareCommands() and runSingleJob()\n+# to test runLauncherForMultipleJobs()\n+#TODO: check clean of "Test_runSingleJob"\n+#TODO: refactoring => choose between "self._queue" or "lResources" to set resources\n+class Test_Launcher(unittest.TestCase):\n+\n+ SARUMAN_NAME = "compute-2-46.local"\n+ \n+ def setUp(self):\n+ self._cDir = os.getcwd()\n+ self._tmpDir = self._cDir\n+ self._groupid = "test"\n+ self._jobTable = "dummyJobTable"\n+ self._iDb = DbFactory.createInstance()\n+ self._iDb.createTable(self._jobTable, "jobs", overwrite = True)\n+ self._jobdb = TableJobAdaptatorFactory.createInstance(self._iDb, self._jobTable)\n+ self._queue = ""\n+ self._configFileName = "dummyConfigFile"\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable(self._jobTable)\n+ self._iDb.close()\n+ FileUtils.removeFilesByPattern(\'*.e*\')\n+ FileUtils.removeFilesByPattern(\'*.o*\')\n+ FileUtils.removeFilesByPattern(\'launcherFileTest_BeginRun.py\')\n+ FileUtils.removeFilesByPattern(self._configFileName)\n+ FileUtils.removeFilesByPattern(\'ClusterLauncher_*\')\n+ \n+ def test__init__wrong_fields_for_job_table(self):\n+ self._iDb.dropTable(self._jobTable)\n+ sqlCmd = "CREATE TABLE " + self._jobTable \n+ sqlCmd += " ( jobid INT UNSIGNED"\n+ sqlCmd += ", jobname VARCHAR(255)"\n+ sqlCmd += ", groupid VARCHAR(255)"\n+ sqlCmd += ", command TEXT"\n+ sqlCmd += ", launcher VARCHAR(1024)"\n+ sqlCmd += ", queue VARCHAR(255)"\n+ sqlCmd += ", status VARCHAR(255)"\n+ sqlCmd += ", time DATETIME"\n+ sqlCmd += ", node VARCHAR(255) )"\n+ self._iDb.execute(sqlCmd)\n+ acronym = "Test__init__"\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", self._queue, self._groupid, acronym)\n+ lExpFields = sorted(["jobid", "jobname", "groupid", "launcher", "queue", "resources", "status", "time", "node"])\n+ lObsFields = sorted(self._iDb.getFieldList(self._jobTable))\n+ self.assertEquals(lExpFields, lObsFields)\n+ expJob = Job(queue = self._queue)\n+ obsJob = iLauncher.job\n+ self.assertEquals(expJob, obsJob)\n+ \n+ def test__init__withResources(self):\n+ queue = "main.q mem_free=3G"\n+ acronym = "Test__init__"\n+ expQueue = "main.q"\n+ explResources = [\'mem_free=3G\']\n+ expJob = Job(queue = expQueue, lResources = explResources)\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", queue, self._groupid, acronym)\n+ obsJob = iLauncher.job\n+ self.assertEquals(expJob, obsJob)\n+\n+ def test_createGroupidIfItNotExist(self):\n+ acronym = "checkGroupID"\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", self._queue, self._groupid, acronym)\n+ iLauncher.createGroupidIfItNotExist()\n+ obsGroupid = iLauncher.job.groupid\n+ self.assertEquals(self._groupid, obsGroupid)\n+\n+ def test_createGroupidIfItNotExist_without_groupid(self):\n+ groupid = ""\n+ acronym = "checkGroupID"\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", self._cDir, self._tmpDir, "", self._queue, groupid, acronym)\n+ iLauncher.createGroupidIfItNotExist()\n+ obsGroupid = iLauncher.job.groupid\n+ self.assertTrue(obsGroupid != "")\n+ \n+ def test_begi'..b'r)\n+ shutil.rmtree(acronym)\n+ self.assertEqual(jobStatus, "finished")\n+ \n+ def test_runSingleJob_catch_error_wrong_tmpDir(self):\n+ acronym = "Test_runSingleJob_catch_error"\n+ os.mkdir(acronym)\n+ os.chdir(acronym)\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", os.getcwd(), "%s/toto" % self._tmpDir, "", self._queue, self._groupid, acronym)\n+ iLauncher.job.groupid = self._groupid\n+ iLauncher.job.jobname = acronym\n+ iLauncher.job.queue = self._queue\n+ if Test_Launcher.SARUMAN_NAME == os.getenv("HOSTNAME"):\n+ iLauncher.job.lResources = ["test=TRUE"]\n+ cmd = "log = os.system(\\"touch \'YuFei\'\\")\\n"\n+ iLauncher.runSingleJob(cmd)\n+ time.sleep(20)\n+ jobStatus = self._jobdb.getJobStatus(iLauncher.job) \n+ os.chdir(self._cDir)\n+ shutil.rmtree(acronym)\n+ self.assertEqual(jobStatus, "error")\n+ \n+ def test_runSingleJob_catch_error_wrong_cmd(self):\n+ acronym = "Test_runSingleJob_catch_error"\n+ os.mkdir(acronym)\n+ os.chdir(acronym)\n+ iLauncher = Launcher(self._jobdb, os.getcwd(), "", "", os.getcwd(), self._tmpDir, "", self._queue, self._groupid, acronym)\n+ iLauncher.job.groupid = self._groupid\n+ iLauncher.job.jobname = acronym\n+ iLauncher.job.queue = self._queue\n+ if Test_Launcher.SARUMAN_NAME == os.getenv("HOSTNAME"):\n+ iLauncher.job.lResources = ["test=TRUE"]\n+ cmd = "log = os.system(\\"truc -i toto\\")\\n"\n+ iLauncher.runSingleJob(cmd)\n+ time.sleep(20)\n+ jobStatus = self._jobdb.getJobStatus(iLauncher.job) \n+ self._jobdb.cleanJobGroup(self._groupid)\n+ os.chdir(self._cDir)\n+ shutil.rmtree(acronym)\n+ self.assertEqual(jobStatus, "error")\n+\n+ def test_prepareCommands(self):\n+ expCmdStart = "os.symlink(\\"../Yufei_chunks.fa\\", \\"Yufei_chunks.fa\\")\\n\\tos.symlink(\\"../Yufei_chunks.fa_cut\\", \\"Yufei_chunks.fa_cut\\")\\n\\tlog = os.system(\\"touch file\\")\\n\\t" \n+ expCmdFinish = "if os.path.exists(\\"yufei.align\\"):\\n\\t\\tshutil.move(\\"yufei.align\\", \\"yufeiLuo/.\\" )\\n\\t"\n+ expCmdSize = "fileSize = 3.2\\n\\t\\t"\n+ expCmdCopy = "shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa\\", \\".\\")\\n\\t\\tshutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa_cut\\", \\".\\")\\n\\t\\t"\n+ \n+ lCmdStart = []\n+ lCmdStart.append("os.symlink(\\"../Yufei_chunks.fa\\", \\"Yufei_chunks.fa\\")")\n+ lCmdStart.append("os.symlink(\\"../Yufei_chunks.fa_cut\\", \\"Yufei_chunks.fa_cut\\")")\n+ lCmds = []\n+ lCmds.append("log = os.system(\\"touch file\\")")\n+ lCmdFinish = []\n+ lCmdFinish.append("if os.path.exists(\\"yufei.align\\"):")\n+ lCmdFinish.append("\\tshutil.move(\\"yufei.align\\", \\"yufeiLuo/.\\" )") \n+ lCmdSize = []\n+ lCmdSize.append("fileSize = 3.2") \n+ lCmdCopy = []\n+ lCmdCopy.append("shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa\\", \\".\\")")\n+ lCmdCopy.append("shutil.copy(\\"PY/Yufei_db/Yufei_chunks.fa_cut\\", \\".\\")")\n+\n+ iLauncher = Launcher(self._jobdb)\n+ obsCmdStart, obsCmdFinish, obsCmdSize, obsCmdCopy = iLauncher.prepareCommands(lCmds, lCmdStart, lCmdFinish, lCmdSize, lCmdCopy) \n+ \n+ self.assertEquals(expCmdStart, obsCmdStart)\n+ self.assertEquals(expCmdFinish, obsCmdFinish) \n+ self.assertEquals(expCmdSize, obsCmdSize)\n+ self.assertEquals(expCmdCopy, obsCmdCopy)\n+ \n+ def test_getSystemCommand(self):\n+ prg = "touch"\n+ lArgs = []\n+ lArgs.append("file")\n+ expCmd = "log = os.system(\\"touch file\\")"\n+ iLauncher = Launcher(self._jobdb)\n+ obsCmd = iLauncher.getSystemCommand(prg, lArgs)\n+ self.assertEquals(expCmd, obsCmd)\n+\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Launcher ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite ) \n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/Test_LauncherUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/Test_LauncherUtils.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,102 @@
+import unittest
+from commons.core.launcher.LauncherUtils import LauncherUtils
+
+class Test_LauncherUtils(unittest.TestCase):
+
+    def test_createHomogeneousSizeList_empty(self):
+        lHeadersSizeTuples = []
+        maxSize = 500
+        expLHeadersList = []
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_one_item_upper_mean(self):
+        lHeadersSizeTuples = [("h1", 300)]
+        maxSize = 500
+        expLHeadersList = [["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_one_item_under_mean(self):
+        lHeadersSizeTuples = [("h1", 100)]
+        maxSize = 500
+        expLHeadersList = [["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_3items(self):
+        lHeadersSizeTuples = [("h1", 250),
+                              ("h2", 250),
+                              ("h3", 300)]
+        maxSize = 500
+        expLHeadersList = [["h3"], ["h2"], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_4items(self):
+        lHeadersSizeTuples = [("h1", 100),
+                              ("h2", 200),
+                              ("h3", 10),
+                              ("h4", 400)]
+        maxSize = 500
+        expLHeadersList = [["h4", "h3"], ["h2", "h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_5items(self):
+        lHeadersSizeTuples = [("h1", 300),
+                              ("h2", 300),
+                              ("h3", 250),
+                              ("h4", 100),
+                              ("h5", 90)]
+        maxSize = 500
+        expLHeadersList = [["h2", "h5","h4"], ["h1"], ["h3"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_all_upper_max(self):
+        lHeadersSizeTuples = [("h1", 600),
+                              ("h2", 500),
+                              ("h3", 700),
+                              ("h4", 900),
+                              ("h5", 500)]
+        maxSize = 500
+        expLHeadersList = [["h4"], ["h3"], ["h1"], ["h5"], ["h2"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_all_upper_mean(self):
+        lHeadersSizeTuples = [("h1", 300),
+                              ("h2", 300),
+                              ("h3", 300),
+                              ("h4", 300),
+                              ("h5", 300)]
+        maxSize = 500
+        expLHeadersList = [["h5"], ["h4"], ["h3"], ["h2"], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_all_under_mean(self):
+        lHeadersSizeTuples = [("h1", 100),
+                              ("h2", 100),
+                              ("h3", 100),
+                              ("h4", 100),
+                              ("h5", 100)]
+        maxSize = 500
+        expLHeadersList = [["h5", "h4", "h3", "h2"], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+    def test_createHomogeneousSizeList_floats(self):
+        lHeadersSizeTuples = [("h1", 99.1),
+                              ("h2", 100.7),
+                              ("h3", 100.1),
+                              ("h4", 100.1),
+                              ("h5", 100)]
+        maxSize = 500
+        expLHeadersList = [['h2', 'h4', 'h3', 'h5'], ["h1"]]
+        obsLHeadersList = LauncherUtils.createHomogeneousSizeList(lHeadersSizeTuples, maxSize)
+        self.assertEquals(expLHeadersList, obsLHeadersList)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/Test_WriteScript.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/Test_WriteScript.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,365 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.launcher.WriteScript import WriteScript\n+from commons.core.sql.Job import Job\n+from commons.core.sql.DbFactory import DbFactory\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+import unittest\n+import os\n+import shutil\n+import time\n+import threading\n+\n+class Test_WriteScript(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._testDir = os.getcwd()\n+ self._acronym = "dummyAcronym"\n+ self._jobTable = "dummyJobsTable"\n+ self._iDb = DbFactory.createInstance()\n+ self._iDb.createTable(self._jobTable, "jobs", overwrite = True)\n+ self._jobdb = TableJobAdaptatorFactory.createInstance(self._iDb, self._jobTable)\n+ self._job = Job()\n+ self._job.groupid = "groupid"\n+ self._job.jobname = self._acronym\n+ self._job.launcher = "ClusterLauncher"\n+ self._jobdb.recordJob(self._job)\n+ self._dummyScratch = "dummyScratch"\n+ os.mkdir(self._dummyScratch)\n+ os.chdir(self._dummyScratch)\n+ self._tmpDir = os.getcwd()\n+ self._iScriptWriter = WriteScript(self._job, self._jobdb, self._testDir, self._tmpDir)\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable(self._jobTable)\n+ self._iDb.close()\n+ if FileUtils.isRessourceExists(self._dummyScratch):\n+ shutil.rmtree(self._dummyScratch)\n+\n+ def test_run(self):\n+ isScriptAsRun = False\n+ fileToCreate = \'dummyFile\'\n+ cmdStart = "log = os.system( \\"touch %s\\" )\\n" % fileToCreate\n+ cmdFinish = "os.system(\\"mv %s %s\\" )\\n" % (fileToCreate, self._testDir)\n+ pyFileName = "%s/ClusterLauncher_%s.py" % (os.getcwd(), self._acronym) \n+ \n+ self._iScriptWriter.run(cmdStart, cmdFinish, pyFileName)\n+ os.system("python %s" % pyFileName)\n+\n+ os.chdir(self._testDir)\n+ if FileUtils.isRessourceExists(fileToCreate):\n+ os.remove(fileToCreate)\n+ isScriptAsRun = True\n+ expJobStatus = "finished" \n+ obsJobStatus = self._jobdb.getJobStatus(self._job)\n+ \n+ self.assertTrue(isScriptAsRun)\n+ self.assertEquals(expJobStatus, obsJobStatus)\n+ \n+ def test_run_with_cmdSize_and_cmdCopy(self):\n+ isScriptAsRun = False\n+ fileToCreate = \'dummyFile\'\n+ fileSize = 0.5\n+ cmdSize = "fileSize = %f\\n" % fileSize\n+ cmdCopy = "os.system(\\"touch bank.fa\\")\\n"\n+ cmdStart = "log = os.system(\\"touch %s\\")\\n" % fileToCreate\n+ cmdFinish = "shutil.move(\\"%s\\", \\"%s\\")" % (fileToCreate, self._testDir)\n+ pyFileName = "%s/ClusterLauncher_%s.py" % (os.getcwd(), self._acronym) \n+ \n+ iWriteScript = WriteScript(self._job, self._jobdb, self._testDir, self._tmpDir, True)\n+ iWriteScript.run(cmdStart, cmdFinish, pyFileName, cmdSize, cmdCopy)\n+ os.system("python %s" % pyFileName)\n+\n+ os.chdir(self._testDir)\n+ if FileUtils.isRessourceExists(fileToCreate):\n+ os.remove(fileToCreate)\n+ isScriptAsRun = True\n+ expJobStatus = "finished" \n+ obsJobStatus = self._jobdb.getJobStatus(self._job)\n+ \n+ self.assertTrue(isScriptAsRun)\n+ self.assertEquals(expJobStatus, obsJobStatus)\n+\n+#TODO: how to test ?\n+# def test_run_2_jobs_trying_to_create_same_groupIdDir(self):\n+# fileToCreate1 = \'dummyFile1\'\n+# fileToCreate2 = \'dummyFile2\'\n+# flagFileOSError = "osErrorRaised"\n+# \n+# fileSize = 0.5\n+# cmd_checkSize = ""\n+# cmd_checkSize += "if not os.path.exists( \\"%s\\" ):\\n" % self._job.groupid\n+# cmd_checkSize += "\\tfileSize = %f\\n" % fileSize\n+# \n+# cmd_checkGroupidDir1 = ""\n+# cmd_checkGroupidDir1 += "if not os.path.exists(\\"%s\\"):\\n" % self._job.groupid\n+# cmd_checkGroupidDir1 += "\\ttry:\\n"\n+# cmd_checkGroupidDir1 += "\\t\\ttime.sleep('..b'JobsTable",\n+ "groupId" : "groupid",\n+ "jobName" : "job1",\n+ "launcher" : "ClusterLauncher",\n+ "time" : "20110505-105353",\n+ "repet_path" : "/home/user/workspace/repet_pipe",\n+ "cmdStart" : "log = os.system(\\"touch dummyFile1\\")",\n+ "cmdFinish" : "shutil.move(\\"dummyFile1\\", \\"/home/user/workspace/repet_pipe/commons/core/launcher/test\\")",\n+ "cDir" : "/home/user/workspace/repet_pipe/commons/core/launcher/test/",\n+ "cmdSize" : "fileSize = 0.500000",\n+ "cmdCopy" : "os.system(\\"touch bank.fa\\")"\n+ }\n+ expFileName = "expFiles/expJobScriptTemplateLight.py"\n+ obsFileName = "obs.py"\n+ \n+ iWS = WriteScript(chooseTemplateLight = True)\n+ iWS.fillTemplate(obsFileName, d)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(obsFileName)\n+ \n+ def test_createJobScriptDict(self):\n+ os.chdir("..")\n+ cmd_start = "log = os.system(\\"touch dummyFile1\\")"\n+ cmd_finish = "shutil.move(\\"dummyFile1\\", \\"/home/user/workspace/repet_pipe/commons/core/launcher/test\\")"\n+ cmd_size = ""\n+ cmd_copy = ""\n+ expDict = {\n+ "tmpDir" : self._tmpDir,\n+ "jobTableName" : self._jobTable,\n+ "groupId" : self._job.groupid,\n+ "jobName" : self._acronym,\n+ "launcher" : self._job.launcher,\n+ "time" : time.strftime("%Y%m%d-%H%M%S"),\n+ "repet_path" : os.environ["REPET_PATH"],\n+ "repet_host" : os.environ["REPET_HOST"],\n+ "repet_user" : os.environ["REPET_USER"],\n+ "repet_pw" : os.environ["REPET_PW"],\n+ "repet_db" : os.environ["REPET_DB"],\n+ "repet_port" : os.environ["REPET_PORT"],\n+ "cmdStart" : cmd_start,\n+ "cmdFinish" : cmd_finish,\n+ "cDir" : self._testDir,\n+ "cmdSize" : cmd_size,\n+ "cmdCopy" : cmd_copy\n+ }\n+ obsDict = self._iScriptWriter.createJobScriptDict(cmd_start, cmd_finish, cmd_size, cmd_copy)\n+ self.assertEquals(expDict, obsDict)\n+ \n+ def test_createJobScriptDict_with_cmdSize_and_cmdCopy(self):\n+ os.chdir("..")\n+ cmd_start = "log = os.system(\\"touch dummyFile1\\")"\n+ cmd_finish = "shutil.move(\\"dummyFile1\\", \\"/home/user/workspace/repet_pipe/commons/core/launcher/test\\")"\n+ cmd_size = "fileSize = 0.500000"\n+ cmd_copy = "os.system(\\"touch bank.fa\\")"\n+ expDict = {\n+ "tmpDir" : self._tmpDir,\n+ "jobTableName" : self._jobTable,\n+ "groupId" : self._job.groupid,\n+ "jobName" : self._acronym,\n+ "launcher" : self._job.launcher,\n+ "time" : time.strftime("%Y%m%d-%H%M%S"),\n+ "repet_path" : os.environ["REPET_PATH"],\n+ "repet_host" : os.environ["REPET_HOST"],\n+ "repet_user" : os.environ["REPET_USER"],\n+ "repet_pw" : os.environ["REPET_PW"],\n+ "repet_db" : os.environ["REPET_DB"],\n+ "repet_port" : os.environ["REPET_PORT"],\n+ "cmdStart" : cmd_start,\n+ "cmdFinish" : cmd_finish,\n+ "cDir" : self._testDir,\n+ "cmdSize" : cmd_size,\n+ "cmdCopy" : cmd_copy\n+ }\n+ obsDict = self._iScriptWriter.createJobScriptDict(cmd_start, cmd_finish, cmd_size, cmd_copy)\n+ self.assertEquals(expDict, obsDict)\n+ \n+class CreateFileThread(threading.Thread):\n+\n+ def __init__(self, pyFileName):\n+ threading.Thread.__init__(self)\n+ self._pyFileName = pyFileName\n+ \n+ def run(self):\n+ os.system("python %s" % self._pyFileName)\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_WriteScript ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite ) \n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/expFiles/expJobScriptSQLiteWithFilesCopyTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptSQLiteWithFilesCopyTemplate.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbMySql import DbMySql
+from commons.core.sql.DbSQLite import DbSQLite
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ fileSize = 0
+ if not os.path.exists("groupid"):
+ fileSize = 0.500000
+ freeGigaNeededInTmpDir = float(1 + fileSize)
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < freeGigaNeededInTmpDir):
+ raise RepetException("ERROR: less than %.2fG of input file in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % freeGigaNeededInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if not os.path.exists("groupid"):
+ try:
+ os.mkdir("groupid")
+ except OSError, e :
+ if e.args[0] != 17:
+ raise RepetException("ERROR: can't create 'groupid'")
+ os.chdir("groupid")
+ os.system("touch bank.fa")
+ else:
+ os.chdir("groupid")
+
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ queue = "main.q"
+ iJob = Job("jobs", jobname = "job1", groupid = "groupid", queue = queue, node = os.getenv("HOSTNAME"))
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+
+except IOError, e :
+ print e
+ queue = "main.q"
+ iJob = Job("jobs", jobname = "job1", groupid = "groupid", queue = queue, node = os.getenv("HOSTNAME"))
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ queue = "main.q"
+ iJob = Job("jobs", jobname = "job1", groupid = "groupid", queue = queue, node = os.getenv("HOSTNAME"))
+ iDb = DbSQLite("/home/user/workspace/repet_pipe/commons/core/launcher/test/jobs")
+ iTJA = TableJobAdaptator(iDb, "jobs")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ iDb.close()
+ sys.exit(1)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/expFiles/expJobScriptTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptTemplate.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists( "/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch" ):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of input file in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % minFreeGigaInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/expFiles/expJobScriptTemplateLight.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptTemplateLight.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists( "/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch" ):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of input file in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % minFreeGigaInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ sys.stdout.flush()
+ sys.exit(1)
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/expFiles/expJobScriptTemplate_cmdWith2Lines.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptTemplate_cmdWith2Lines.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists( "/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch" ):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ minFreeGigaInTmpDir = 1
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < minFreeGigaInTmpDir):
+ raise RepetException("ERROR: less than %iG of input file in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % minFreeGigaInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ print "Hello Yufei"
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/launcher/test/expFiles/expJobScriptWithFilesCopyTemplate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/launcher/test/expFiles/expJobScriptWithFilesCopyTemplate.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import time
+import shutil
+from commons.core.checker.RepetException import RepetException
+from commons.core.sql.TableJobAdaptator import TableJobAdaptator
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.Job import Job
+
+try:
+ newDir = None
+ print os.uname()
+ beginTime = time.time()
+ print 'beginTime=%f' % beginTime
+ print "work in dir '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'"
+ sys.stdout.flush()
+ if not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"):
+ raise IOError("ERROR: temporary directory '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch' doesn't exist")
+
+ fileSize = 0
+ if not os.path.exists("groupid"):
+ fileSize = 0.500000
+ freeGigaNeededInTmpDir = float(1 + fileSize)
+ freeSpace = os.statvfs("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if ((freeSpace.f_bavail * freeSpace.f_frsize) / 1073741824.0 < freeGigaNeededInTmpDir):
+ raise RepetException("ERROR: less than %.2fG of input file in '/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch'" % freeGigaNeededInTmpDir)
+
+ os.chdir("/home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch")
+ if not os.path.exists("groupid"):
+ try:
+ os.mkdir("groupid")
+ except OSError, e :
+ if e.args[0] != 17:
+ raise RepetException("ERROR: can't create 'groupid'")
+ os.chdir("groupid")
+ os.system("touch bank.fa")
+ else:
+ os.chdir("groupid")
+
+ newDir = "groupid_job1_20110505-105353"
+ if os.path.exists(newDir):
+ shutil.rmtree(newDir)
+ os.mkdir(newDir)
+ os.chdir(newDir)
+
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "running")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ log = os.system("touch dummyFile1")
+ if log != 0:
+ raise RepetException("ERROR: job returned %i" % log)
+ else:
+ print "job finished successfully"
+ sys.stdout.flush()
+ shutil.move("dummyFile1", "/home/user/workspace/repet_pipe/commons/core/launcher/test")
+
+ os.chdir("..")
+ shutil.rmtree(newDir)
+
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "finished")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+
+ endTime = time.time()
+ print 'endTime=%f' % endTime
+ print 'executionTime=%f' % (endTime - beginTime)
+ print os.uname()
+ sys.stdout.flush()
+
+except IOError, e :
+ print e
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)
+
+except Exception, e :
+ print "tmpDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/dummyScratch"
+ print "cDir is : /home/user/workspace/repet_pipe/commons/core/launcher/test/"
+ print e
+ if newDir != None and os.path.exists("../%s" % newDir) and not os.path.exists("/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir):
+ os.chdir("..")
+ shutil.move(newDir, "/home/user/workspace/repet_pipe/commons/core/launcher/test//%s" % newDir)
+ iJob = Job(jobname = "job1", groupid = "groupid", launcherFile = "ClusterLauncher", node = os.getenv("HOSTNAME"))
+ iDb = DbFactory.createInstance()
+ iTJA = TableJobAdaptator(iDb, "dummyJobsTable")
+ print "current status: %s" % iTJA.getJobStatus(iJob)
+ iTJA.changeJobStatus(iJob, "error")
+ print "updated status: %s" % iTJA.getJobStatus(iJob)
+ sys.stdout.flush()
+ iDb.close()
+ sys.exit(1)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/AxtParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/AxtParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,154 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.misc.Utils import getHammingDistance
+
+
+class AxtParser(MapperParser):
+    """A class that parses AXT (as given by Mosaik)"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(AxtParser, self).__init__(fileName, verbosity)
+        self.queryLine = None
+        self.subjectLine = None
+
+    def __del__(self):
+        super(AxtParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["axt"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def getInfos(self):
+        self.chromosomes = set()
+        self.nbMappings  = 0
+        self.size        = 0
+        cpt              = 0
+        self.reset()
+        for line in self.handle:
+            line = line.strip()
+            if line == "": continue
+            if cpt % 3 == 0:
+                line    = line.strip()
+                parts = line.split(" ")
+                self.chromosomes.add(parts[1])
+                self.size       += int(parts[6])
+                self.nbMappings += 1
+            cpt += 1
+            if self.verbosity >= 10 and self.nbMappings % 100000 == 0:
+                sys.stdout.write("    %d mappings read\r" % (self.nbMappings))
+                sys.stdout.flush()
+        self.reset()
+        if self.verbosity >= 10:
+            print "    %d mappings read" % (self.nbMappings)
+            print "Done."
+
+
+    def parseLine(self, line):
+
+        if line.strip() == "":
+            for line in self.handle:
+                self.currentLineNb += 1
+                break
+        if line.strip() == "":
+            return None
+
+        m = re.search(r"^\s*\d+\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+\d+\s*$", line)
+        if m != None:
+            #sys.exit("\nLine %d '%s' does not have an AXT format" % (self.currentLineNb, line))
+
+            mapping = Mapping()
+            subMapping = SubMapping()
+
+            offset = -1 if m.group(7) == "-" else 0
+            subMapping.queryInterval.setName(m.group(4))
+            subMapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6)))-1)
+            subMapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6)))-1)
+            subMapping.queryInterval.setDirection(m.group(7))
+
+            subMapping.targetInterval.setChromosome(m.group(1))
+            subMapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))) + offset)
+            subMapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))) + offset)
+            subMapping.targetInterval.setDirection(1)
+
+            subMapping.setSize(min(subMapping.targetInterval.getSize(), subMapping.queryInterval.getSize()))
+            subMapping.setDirection(m.group(7))
+
+            mapping.addSubMapping(subMapping)
+
+            mapping.setDirection(m.group(7))
+            mapping.targetInterval.setChromosome(m.group(1))
+            mapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))) + offset)
+            mapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))) + offset)
+
+            mapping.queryInterval.setName(m.group(4))
+            mapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6)))-1)
+            mapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6)))-1)
+
+            mapping.setSize(min(mapping.targetInterval.getSize(), mapping.queryInterval.getSize()))
+
+            for line in self.handle:
+                string1 = line.strip()
+                self.currentLineNb += 1
+                break
+            for line in self.handle:
+                string2 = line.strip()
+                self.currentLineNb += 1
+                break
+            mapping.setNbMismatches(Utils.getHammingDistance(string1, string2))
+            mapping.setNbGaps(0)
+
+            self.currentMapping = mapping
+        else:
+            if self.queryLine == None:
+                self.queryLine = line
+            else:
+                self.subjectLine = line
+                seqLen = float(len(self.subjectLine))
+                dist = float(getHammingDistance(self.queryLine, self.subjectLine))
+                identity = ((seqLen-dist)/seqLen) *100
+                self.currentMapping.setIdentity(identity)
+                self.queryLine = None
+                self.subjectLine = None
+                return self.currentMapping
+
+
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/AxtParser.pyc

Binary file commons/core/parsing/AxtParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BamParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BamParser.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,483 @@\n+#\n+# Copyright INRA-URGI 2009-2012\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import re, sys, gzip, struct\n+from commons.core.parsing.MapperParser import MapperParser\n+from SMART.Java.Python.structure.Mapping import Mapping\n+from SMART.Java.Python.structure.SubMapping import SubMapping\n+from SMART.Java.Python.structure.Interval import Interval\n+\n+\n+BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN"\n+\n+BAM_CIGAR_LOOKUP = "MIDNSHP=X"\n+BAM_CIGAR_SHIFT = 4\n+BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1)\n+\n+\n+\n+def pack_int32(x):\n+\treturn struct.pack(\'<i\', x)\n+\n+def pack_uint32(x):\n+\treturn struct.pack(\'<I\', x)\n+\n+def unpack_int8(x):\n+\treturn struct.unpack(\'<b\', x)[0]\n+\n+def unpack_int16(x):\n+\treturn struct.unpack(\'<h\', x)[0]\n+\n+def unpack_int32(x):\n+\treturn struct.unpack(\'<i\', x)[0]\n+\n+def unpack_int64(x):\n+\treturn struct.unpack(\'<q\', x)[0]\n+\n+def unpack_uint8(x):\n+\treturn struct.unpack(\'<B\', x)[0]\n+\n+def unpack_uint16(x):\n+\treturn struct.unpack(\'<H\', x)[0]\n+\n+def unpack_uint32(x):\n+\treturn struct.unpack(\'<I\', x)[0]\n+\n+def unpack_uint64(x):\n+\treturn struct.unpack(\'<Q\', x)[0]\n+\n+def unpack_float(x):\n+\treturn struct.unpack(\'<f\', x)[0]\n+\n+def unpack_string(x):\n+\tlength = len(x)\n+\tformat_string = "<{0}s".format(length)\n+\tstring = struct.unpack(format_string, x)[0]\n+\tif string[-1] == \'\\0\':\n+\t\treturn string[:-1]\n+\telse:\n+\t\treturn string\n+\n+\n+BAM_TAG_CODE = {"c": unpack_int8, \\\n+\t\t\t\t"C": unpack_uint8, \\\n+\t\t\t\t"s": unpack_int16, \\\n+\t\t\t\t"S": unpack_uint16, \\\n+\t\t\t\t"i": unpack_int32, \\\n+\t\t\t\t"I": unpack_uint32, \\\n+\t\t\t\t"f": unpack_float, \\\n+\t\t\t\t#"A": unpack_int8, \\\n+\t\t\t\t"A": lambda x: x, \\\n+\t\t\t\t"Z": unpack_int8, \\\n+\t\t\t\t"H": unpack_int8}\n+\n+BAM_TAG_VALUE = {"c": int, \\\n+\t\t\t\t "C": int, \\\n+\t\t\t\t "s": int, \\\n+\t\t\t\t "S": int, \\\n+\t\t\t\t "i": int, \\\n+\t\t\t\t "I": int, \\\n+\t\t\t\t "f": float, \\\n+\t\t\t\t "A": lambda x: x}\n+\n+BAM_TAG_SIZE = {"c": 1, \\\n+\t\t\t\t"C": 1, \\\n+\t\t\t\t"s": 2, \\\n+\t\t\t\t"S": 2, \\\n+\t\t\t\t"i": 4, \\\n+\t\t\t\t"I": 4, \\\n+\t\t\t\t"f": 4, \\\n+\t\t\t\t"A": 1}\n+\n+\n+class CigarOp(object):\n+\tdef __init__(self, data):\n+\t\tself._length = data >> BAM_CIGAR_SHIFT\n+\t\tself._type = BAM_CIGAR_LOOKUP[ data & BAM_CIGAR_MASK ]\n+\n+\n+class CigarData(object):\n+\tdef __init__(self, data, num_ops):\n+\t\tself._ops = []\n+\t\tfor i in range(num_ops):\n+\t\t\tcigar_data = unpack_uint32(data[i*4: (i+1)*4])\n+\t\t\tself._ops.append(CigarOp(cigar_data))\t\t\n+\n+\tdef getCigarData(self):\n+\t\treturn self._ops\n+\t\n+\tdef __str__(self):\n+\t\treturn "".join(["%d%s" % (op._length, op._type) for op in self._ops])\n+\n+\n+class TagsData(object):\n+\tdef __init__(self):\n+\t\tself._tags = {}\n+\n+\tdef add(self, tag):\n+\t\tself._tags[tag._ta'..b'nbGaps\t\t = 0\n+\tsubMapping\t = None\n+\tqueryOffset = 0\n+\ttargetOffset = 0\n+\treadStart\t = None\n+\n+\tfor tag, value in read._tags.iteritems():\n+\t\tif tag == "X0":\n+\t\t\tnbOccurrences = value._value\n+\t\telif tag == "X1":\n+\t\t\tnbOccurrences += value._value\n+\t\telif tag == "XM":\n+\t\t\tnbMismatches = value._value\n+\tmapping.setTagValue("nbOccurrences", nbOccurrences)\n+\tmapping.setTagValue("quality", read._mappingQuality)\n+\n+\tfor operation in read._cigar:\n+\t\tif operation._type == "M":\n+\t\t\tif readStart == None:\n+\t\t\t\treadStart = queryOffset\n+\t\t\tif subMapping == None:\n+\t\t\t\tsubMapping = SubMapping()\n+\t\t\t\tsubMapping.setSize(operation._length)\n+\t\t\t\tsubMapping.setDirection(direction)\n+\t\t\t\tsubMapping.queryInterval.setName(read._name)\n+\t\t\t\tsubMapping.queryInterval.setStart(queryOffset)\n+\t\t\t\tsubMapping.queryInterval.setDirection(direction)\n+\t\t\t\tsubMapping.targetInterval.setChromosome(read._chromosome)\n+\t\t\t\tsubMapping.targetInterval.setStart(genomeStart + targetOffset)\n+\t\t\t\tsubMapping.targetInterval.setDirection(1)\n+\t\t\tnbMatches\t += operation._length\n+\t\t\ttargetOffset += operation._length\n+\t\t\tqueryOffset += operation._length\n+\t\t\tcurrentNumber = 0\n+\t\t\tcontinue\n+\t\tif operation._type == "I":\n+\t\t\tnbGaps\t += 1\n+\t\t\tqueryOffset += operation._length\n+\t\t\tcurrentNumber = 0\n+\t\t\tcontinue\n+\t\tif operation._type == "D":\n+\t\t\tif subMapping != None:\n+\t\t\t\tsubMapping.queryInterval.setEnd(queryOffset - 1)\n+\t\t\t\tsubMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+\t\t\t\tmapping.addSubMapping(subMapping)\n+\t\t\tsubMapping\t = None\n+\t\t\tnbGaps\t += 1\n+\t\t\ttargetOffset += operation._length\n+\t\t\tcurrentNumber = 0\n+\t\t\tcontinue\n+\t\tif operation._type == "N":\n+\t\t\tif subMapping != None:\n+\t\t\t\tsubMapping.queryInterval.setEnd(queryOffset - 1)\n+\t\t\t\tsubMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+\t\t\t\tmapping.addSubMapping(subMapping)\n+\t\t\tsubMapping\t= None\n+\t\t\ttargetOffset += operation._length\n+\t\t\tcurrentNumber = 0\n+\t\t\tcontinue\n+\t\tif operation._type == "S":\n+\t\t\tnbMismatches += operation._length\n+\t\t\ttargetOffset += operation._length\n+\t\t\tqueryOffset += operation._length\n+\t\t\tcurrentNumber = 0\n+\t\t\tcontinue\n+\t\tif operation._type == "H":\n+\t\t\ttargetOffset += operation._length\n+\t\t\tqueryOffset += operation._length\n+\t\t\tcurrentNumber = 0\n+\t\t\tcontinue\n+\t\tif operation._type == "P":\n+\t\t\tcontinue\n+\t\traise Exception("Do not understand parameter \'%s\'" % (operation._type))\n+\n+\tif subMapping != None:\n+\t\tsubMapping.queryInterval.setEnd(queryOffset - 1)\n+\t\tsubMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+\t\tmapping.addSubMapping(subMapping)\n+\tmapping.queryInterval.setStart(readStart)\n+\tmapping.queryInterval.setEnd(queryOffset - 1)\n+\tmapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+\tmapping.setNbMismatches(nbMismatches)\n+\tmapping.setNbGaps(nbGaps)\n+\tmapping.queryInterval.setName(read._name)\n+\tmapping.queryInterval.setDirection(direction)\n+\tmapping.targetInterval.setChromosome(read._chromosome)\n+\tmapping.targetInterval.setStart(genomeStart)\n+\tmapping.targetInterval.setDirection(direction)\n+\tmapping.setSize(len(read._sequence))\n+\tmapping.setDirection(direction)\n+\treturn mapping\n+\n+\t\n+class BamParser(MapperParser):\n+\t"""A class that parses BAM format"""\n+\n+\tdef __init__(self, fileName, verbosity = 0):\n+\t\tself.verbosity = verbosity\n+\t\tself.handle = gzip.open(fileName, "rb")\n+\t\tself.reader = FileReader(self.handle)\n+\t\tself.nbMappings = None\n+\t\tself.fileName = fileName\n+\n+\n+\tdef __del__(self):\n+\t\tself.handle.close()\n+\n+\n+\tdef getFileFormats():\n+\t\treturn ["bam"]\n+\tgetFileFormats = staticmethod(getFileFormats)\n+\n+\n+\tdef reset(self):\n+\t\tself.reader.reset()\n+\n+\n+\tdef getNextMapping(self):\n+\t\tself.currentMapping = None\n+\t\twhile self.currentMapping == None:\n+\t\t\tread = self.reader.getNextAlignment()\n+\t\t\tif not read:\n+\t\t\t\tself.currentMapping = False\n+\t\t\t\treturn False\n+\t\t\tread.parse()\n+\t\t\tself.currentMapping = parseAlignedRead(read)\n+\t\treturn self.currentMapping\n+\t\t\n+\t\t\n+\tdef setDefaultTagValue(self, name, value):\n+\t\tpass\n+\n+\n+\tdef skipFirstLines(self):\n+\t\tpass\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BamParser.pyc

Binary file commons/core/parsing/BamParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BedParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BedParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,139 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Interval import Interval
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+from SMART.Java.Python.structure.Transcript import Transcript
+
+
+class BedParser(TranscriptListParser):
+    """A class that parses a BED file and create a transcript list"""
+
+
+    def __init__(self, fileName, verbosity = 0):
+        self.title = None
+        TranscriptListParser.__init__(self, fileName, verbosity)
+
+
+#    def __del__(self):
+#        super(BedParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["bed"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        mark = self.handle.tell()
+        line = self.handle.readline()
+        line = line.strip()
+        m = re.search(r"^\s*track\s+name\s*=\s*(\S+)\s+", line)
+        if m != None:
+            self.title = m.group(1)
+            self.currentLineNb += 1
+        else:
+            self.handle.seek(mark)
+        return
+
+
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\s*$", line)
+        if m != None:
+            transcript = Transcript()
+            transcript.setChromosome(m.group(1))
+            transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
+            transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
+            transcript.setName("Unnamed")
+            transcript.setDirection(1)
+            return transcript
+
+        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\s*$", line)
+        if m != None:
+            transcript = Transcript()
+            transcript.setChromosome(m.group(1))
+            transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
+            transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
+            transcript.setName(m.group(4))
+            transcript.setDirection(1)
+            return transcript
+
+        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+\d+\.?\d*\s*$", line)
+        if m != None:
+            transcript = Transcript()
+            transcript.setChromosome(m.group(1))
+            transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
+            transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
+            transcript.setName(m.group(4))
+            transcript.setDirection(1)
+            return transcript
+
+        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+\d+\t+([+-])\t+\d+\t+\d+\t+0\t+(\d+)\t+(\S+)\t+(\S+)\s*$", line)
+        if m == None:
+            raise Exception("\nLine %d '%s' does not has a BED format." % (self.currentLineNb, line))
+        transcript = Transcript()
+        transcript.setChromosome(m.group(1))
+        transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
+        transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
+        transcript.setName(m.group(4))
+        transcript.setDirection(m.group(5))
+        nbExons = int(m.group(6))
+        sizes = m.group(7).split(",")
+        starts = m.group(8).split(",")
+
+        # check for comment in name
+        m = re.search(r"^([^$]*)\((\S+)$$", transcript.getName())
+        if m != None:
+            transcript.setName(m.group(1))
+            transcript.setTagValues(m.group(2), ";", "=")
+
+        # check for nb occurrences in name
+        m = re.search(r"(.*)-(\d+)$", transcript.getName())
+        if m != None:
+            transcript.setName(m.group(1))
+            transcript.setOccurrence(int(m.group(2)))
+
+        for i in range(nbExons):
+            exon = Interval(transcript)
+            exon.setStart(int(starts[i])+transcript.getStart())
+            exon.setEnd(transcript.getStart()+int(starts[i])+int(sizes[i])-1)
+            exon.setSize(int(sizes[i]))
+            transcript.addExon(exon)
+
+        if transcript.exons[0].getStart() != transcript.getStart():
+            sys.exit("There is something wrong with the start of transcript line '%s': transcript starts at %d whereas first exon starts at %d" % (line.strip(), transcript.start, transcript.exons[0].start))
+        if transcript.exons[-1].getEnd() != transcript.getEnd():
+            sys.exit("There is something wrong with the end of transcript line '%s': transcript ends at %d whereas last exon ends at %d" % (line.strip(), transcript.end, transcript.exons[-1].end))
+
+        return transcript
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BedParser.pyc

Binary file commons/core/parsing/BedParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BlastParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BlastParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,88 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from SMART.Java.Python.structure.Mapping import Mapping
+
+
+class BlastParser(MapperParser):
+    """A class that parses the output of Blast (-m 8 format)"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(BlastParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(BlastParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["blast"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        m = re.search(r"^(\S+)\s+(\S+)\s+(\d+\.?\d*)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+([-+]?\d+\.?\d*[eE]?[-+]?\d*)\s+(\d+\.?\d*)\s*$", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have an Blast format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        queryInterval = Interval()
+        queryInterval.setName(m.group(1))
+        queryInterval.setStart(min(int(m.group(7)), int(m.group(8))))
+        queryInterval.setEnd(max(int(m.group(7)), int(m.group(8))))
+
+        targetInterval = Interval()
+        targetInterval.setChromosome(m.group(2))
+        targetInterval.setStart(min(int(m.group(9)), int(m.group(10))))
+        targetInterval.setEnd(max(int(m.group(9)), int(m.group(10))))
+
+        subMapping = SubMapping()
+        subMapping.setQueryInterval(queryInterval)
+        subMapping.setTargetInterval(targetInterval)
+
+        mapping.addSubMapping(subMapping)
+
+        mapping.setIdentity(round(float(m.group(3))))
+        mapping.setSize(int(m.group(4)))
+        mapping.setNbMismatches(int(m.group(5)))
+        mapping.setNbGaps(int(m.group(6)))
+        mapping.setDirection((int(m.group(8)) - int(m.group(7))) * (int(m.group(10)) - int(m.group(9))))
+        mapping.setEvalue(float(m.group(11)))
+
+        return mapping

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BlastParser.pyc

Binary file commons/core/parsing/BlastParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BlatFileParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BlatFileParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,63 @@
+from commons.core.parsing.BlatParser import BlatParser
+import os
+
+class BlatFileParser(object):
+
+    def __init__(self, blatFileName = None):
+        self._blatFileName = blatFileName
+        self._lBlatHits = []
+        self._dBlatHitsByQueries = {}
+        self._dQueries = {}
+
+    def getDictOfQueries(self):
+        return self._dQueries
+
+    def getResultLinesOfOneQuery(self, queryName):
+        return self._dBlatHitsByQueries[queryName]
+
+    def getDictOfBlatHitsByQueries(self):
+        return self._dBlatHitsByQueries
+
+    def getListsOfHits(self):
+        return self._lBlatHits
+
+    def parseBlatFile(self):
+        blatFile = open(self._blatFileName, 'r')
+        line = blatFile.readline()
+        n = 1
+        while line != "":
+            if self._isInteger(line.split("\t")[0]):
+                iBlatParser = BlatParser()
+                iBlatParser.setAttributesFromString(line, n)
+                queryHeader = iBlatParser.getQName()
+                self._dQueries[queryHeader] = 1
+                self._lBlatHits.append(iBlatParser)
+            line = blatFile.readline()
+            n += 1
+        return self._lBlatHits
+
+    def parseBlatFileByQueries(self):
+        blatFile = open(self._blatFileName, 'r')
+        line = blatFile.readline()
+        n = 1
+        while line != "":
+            if self._isInteger(line.split("\t")[0]):
+                iBlatParser = BlatParser()
+                iBlatParser.setAttributesFromString(line, n)
+                queryHeader = iBlatParser.getQName()
+                self._dQueries[queryHeader] = 1
+                if self._dBlatHitsByQueries.has_key(queryHeader):
+                    self._dBlatHitsByQueries[queryHeader].append(iBlatParser)
+                else:
+                    self._dBlatHitsByQueries[queryHeader] = [iBlatParser]
+            line = blatFile.readline()
+            n += 1
+        blatFile.close()
+        return self._dBlatHitsByQueries
+
+    def _isInteger(self, string):
+        try:
+            int(string)
+            return True
+        except ValueError:
+            return False

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BlatParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BlatParser.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,351 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import sys\n+\n+## this class can parse a Blat results output file\n+#\n+class BlatParser(object):\n+\n+\n+ def __init__(self, match=\'\', mismatch=\'\', repMatch=\'\', N=\'\', QGapCount=\'\', QGapBases=\'\', TGapCount=\'\', TGapBases=\'\', strand=\'\', QName=\'\', QSize=\'\', QStart=\'\', QEnd=\'\', TName=\'\', TSize=\'\', TStart=\'\', TEnd=\'\', blockCount=\'\', blockSizes=\'\', qStarts=\'\', tStarts=\'\'):\n+ self._match = match\n+ self._mismatch = mismatch\n+ self._repMatch = repMatch\n+ self._N = N\n+ self._QGapCount = QGapCount\n+ self._QGapBases = QGapBases\n+ self._TGapCount = TGapCount\n+ self._TGapBases = TGapBases\n+ self._strand = strand\n+ self._QName = QName\n+ self._QSize = QSize\n+ self._QStart = QStart\n+ self._QEnd = QEnd\n+ self._TName = TName\n+ self._TSize = TSize\n+ self._TStart = TStart\n+ self._TEnd = TEnd\n+ self._blockCount = blockCount\n+ self._blockSizes = blockSizes\n+ self._qStarts = qStarts\n+ self._tStarts = tStarts\n+ \n+ def __eq__(self, o):\n+ return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd\n+ \n+ def setMatch(self, match):\n+ self._match = match\n+ \n+ def setMismatch(self, mismatch):\n+ self._mismatch = mismatch\n+ \n+ def setRepMatch(self, repMatch):\n+ self._repMatch = repMatch\n+ \n+ def setN(self, N):\n+ self._N = N\n+ \n+ def setQGapCount(self, QGapCount):\n+ self._QGapCount = QGapCount\n+ \n+ def setQGapBases(self, QGapBases):\n+ self._QGapBases = QGapBases\n+ \n+ def setTGapCount(self, TGapCount):\n+ self._TGapCount = TGapCount\n+ \n+ def setTGapBases(self, TGapBases):\n+ self._TGapBases = TGapBases\n+ \n+ def setStrand(self, strand):\n+ self._strand = strand\n+ \n+ def setQName(self, QName):\n+ self._QName = QName\n+ \n+ def setQSize(self, QSize):\n+ self._QSize = QSize\n+ \n+ def setQStart(self, QStart):\n+ self._QStart = QStart\n+ \n+ def setQEnd(self, QEnd):\n+ self._QEnd = QEnd\n+ \n+ def setTName(self, TName):\n+ self._TName = TName\n+ \n+ def setTSize(self, TSize):\n+ self._TSize = TSize\n+ \n+ def setTStart(self'..b'e:\n+ sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[10] != \'\':\n+ self.setQSize(lResults[10])\n+ else:\n+ sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[11] != \'\':\n+ self.setQStart(lResults[11])\n+ else:\n+ sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[12] != \'\':\n+ self.setQEnd(lResults[12])\n+ else:\n+ sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[13] != \'\':\n+ self.setTName(lResults[13])\n+ else:\n+ sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[14] != \'\':\n+ self.setTSize(lResults[14])\n+ else:\n+ sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[15] != \'\':\n+ self.setTStart(lResults[15])\n+ else:\n+ sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[16] != \'\':\n+ self.setTEnd(lResults[16])\n+ else:\n+ sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[17] != \'\':\n+ self.setBlockCount(lResults[17])\n+ else:\n+ sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[18] != \'\':\n+ self.setBlockSizes(lResults[18])\n+ else:\n+ sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[19] != \'\':\n+ self.setQStarts(lResults[19])\n+ else:\n+ sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if lResults[20] != \'\':\n+ self.setTStarts(lResults[20])\n+ else:\n+ sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\\n" % iCurrentLineNumber)\n+ error = True\n+ \n+ if error == True:\n+ self._setAllToNull()\n+ \n+ def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\\t"):\n+ blatLine = blatLine.rstrip()\n+ lBlatLineItem = blatLine.split(fieldSeparator)\n+ if not len(lBlatLineItem) == 21:\n+ sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\\n" % (iCurrentLineNumber, len(lBlatLineItem)))\n+ else:\n+ self.setAttributes(lBlatLineItem, iCurrentLineNumber)\n+ \n+ def _setAllToNull(self):\n+ self._match = \'\'\n+ self._mismatch = \'\'\n+ self._repMatch = \'\'\n+ self._N = \'\'\n+ self._QGapCount = \'\'\n+ self._QGapBases = \'\'\n+ self._TGapCount = \'\'\n+ self._TGapBases = \'\'\n+ self._strand = \'\'\n+ self._QName = \'\'\n+ self._QSize = \'\'\n+ self._QStart = \'\'\n+ self._QEnd = \'\'\n+ self._TName = \'\'\n+ self._TSize = \'\'\n+ self._TStart = \'\'\n+ self._TEnd = \'\'\n+ self._blockCount = \'\'\n+ self._blockSizes = \'\'\n+ self._qStarts = \'\'\n+ self._tStarts = \'\'\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BlatToGff.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,116 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import optparse
+import os
+from commons.core.parsing.BlatParser import BlatParser
+
+class BlatToGff(object):
+
+
+    def __init__(self):
+        pass
+
+    def setAttributesFromCmdLine(self):
+        help = '\
+        \nThis Script Launch BlatToGff.\n\n\
+        Example 1: python BlatToGff.py -i blatResultsFile.tab -o outputFile.gff3\n\n'
+        parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")
+        parser.add_option( '-i', '--input', dest='inputBLAT', help='Blat Input File Name [Format: tabular]', default= None )
+        parser.add_option( '-o', '--output', dest='output', help='Output File Name [Format: GFF3]', default= None )
+        parser.add_option( '-n', '--methodname', dest='methodName', help='Method name in col. 3 [Default: None]', default= None )
+        ( options, args ) = parser.parse_args()
+        self._options = options
+
+    def checkOptions(self):
+        if self._options.inputBLAT == '':
+            raise Exception("ERROR: No Blat file specified for -i !")
+        elif not os.path.exists(self._options.inputBLAT):
+            raise Exception("ERROR: Blat Input File doesn't exist !")
+        else:
+            self._inputFileBlat = self._options.inputBLAT
+
+        if self._options.output == '':
+            raise Exception("ERROR: No Output file specified for -o !")
+        else:
+            self._outputFileGFF = self._options.output
+
+        self._methodName = self._options.methodName
+
+    def run(self):
+        self.checkOptions()
+        self._createGFFOutputFile()
+        BLATFile = open(self._inputFileBlat, 'r')
+
+        headerBlatLine = BLATFile.readline()
+        headerBlatLine = BLATFile.readline()
+        headerBlatLine = BLATFile.readline()
+        headerBlatLine = BLATFile.readline()
+        headerBlatLine = BLATFile.readline()
+        blatLine = BLATFile.readline()
+        numberLine = 6
+        while blatLine != '':
+            gffLine = self.convertBlatObjectToGffLine(blatLine, numberLine)
+            self._printGFFLinesToOutputFile(gffLine)
+            blatLine = BLATFile.readline()
+            numberLine = numberLine + 1
+
+    def convertBlatObjectToGffLine(self, blatLine, numberLine):
+        iBlatHit = BlatParser()
+        iBlatHit.setAttributesFromString(blatLine, numberLine)
+        col1 = iBlatHit.getTName()
+        col2 = 'BlatToGff'
+        if self._methodName == '' or self._methodName == None:
+            col3 = 'BES'
+        else:
+            col3 = '%s:BES' % self._methodName
+        col4 = iBlatHit.getTStart()
+        col5 = iBlatHit.getTEnd()
+        col6 = '.'
+        col7 = '+'
+        col8 = '.'
+        col9 = 'ID=%s;Name=%s;bes_start=%s;bes_end=%s;bes_size=%s' % (iBlatHit.getQName(), iBlatHit.getQName(), iBlatHit.getTStart(), iBlatHit.getTEnd(), iBlatHit.getTSize())
+        gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)
+        return gffLine
+
+    def _createGFFOutputFile(self):
+        GFFfile = open(self._outputFileGFF, 'w')
+        GFFfile.write("##gff-version 3\n")
+        GFFfile.close()
+
+    def _printGFFLinesToOutputFile(self, line):
+        GFFfile = open(self._outputFileGFF, 'a')
+        GFFfile.write(line)
+        GFFfile.close()
+
+if __name__ == '__main__':
+    iBlatToGff = BlatToGff()
+    iBlatToGff.setAttributesFromCmdLine()
+    iBlatToGff.run()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BlatToGffForBesPaired.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BlatToGffForBesPaired.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,266 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import optparse\n+import os\n+import sys\n+import re\n+import datetime\n+from commons.core.parsing.BlatParser import BlatParser\n+from commons.core.seq.FastaUtils import FastaUtils \n+\n+class BlatToGffForBesPaired(object):\n+\n+\n+ def __init__(self):\n+ pass\n+ \n+ def setAttributesFromCmdLine(self):\n+ help = \'\\\n+ \\nThis Script Launch BlatToGffForBesPaired.\\n\\n\\\n+ Example 1: python BlatToGffForBesPaired.py -i blatResultsFile.tab -f besSequences.fasta -o outputFile.gff3\\n\\\n+ Example 2: python BlatToGffForBesPaired.py -i blatResultsFile.tab -f besSequences.fasta -o outputFile.gff3 -n muscadine:filtre1\\n\\n\\\n+ Note 1: In blat input file, all BAC-Ends must be paired. In addition, they must be one above the other.\\nFor example, if you have the BES MRRE1H032F08FM1 (forward), we must have the BES MRRE1H032F08RM1 (reverse) just after, like:\\n\\\n+ 554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMRRE1H032F08FM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n\\\n+ 620\\t23\\t0\\t0\\t0\\t0\\t0\\t0\\t-\\tMRRE1H032F08RM1\\t643\\t0\\t643\\tchr11\\t19818926\\t3794984\\t3795627\\t1\\t643,\\t0,\\t3794984,\\n\\\n+ Note 2: the header in Blat results output file must be present (5 lines).\\n\\n\'\n+ \n+ parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")\n+ parser.add_option( \'-i\', \'--input\', dest=\'inputBLAT\', help=\'Blat Input File Name, with BES paired (1 Forward and 1 Reverse) [Format: tabular]\', default= None )\n+ parser.add_option( \'-f\', \'--fasta\', dest=\'inputFASTA\', help=\'Fasta Input File Name, with all sequences of BES [Format: fasta]\', default= None )\n+ parser.add_option( \'-o\', \'--output\', dest=\'output\', help=\'Output File Name [Format: GFF3]\', default= None )\n+ parser.add_option( \'-n\', \'--methodname\', dest=\'methodName\', help=\'Method name in col. 3 [Default: None]\', default= None )\n+ ( options, args ) = parser.parse_args()\n+ self._options = options\n+ \n+ def checkOptions(self):\n+ if self._options.inputBLAT == \'\':\n+ raise Exception("ERROR: No Blat file specified for -i !")\n+ elif not os.path.exists(self._options.inputBLAT):\n+ raise Exception("ERROR: Blat Input File doesn\'t exist !")\n+ else:\n+ self._inputFileBlat = self._options.inputBLAT\n+ '..b' col9 = \'ID=%s;Name=%s;bac_start=%s;bac_end=%s;bac_size=%s;besFM_name=%s;muscadine_besFM_seq=%s;besRM_name=%s;muscadine_besRM_seq=%s\' % (bacName, bacName, startBacPos, endBacPos, sizeBacPos, nameBesFM, seqBesFM, nameBesRM, seqBesRM)\n+ gffLine = \'%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n\' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)\n+ return gffLine\n+ return None\n+ \n+ def getBesFmAndRmNamesAndSequences(self, besName1, seqBes1, typeBes1, besName2, seqBes2, typeBes2):\n+ if typeBes1 == \'FM\' and typeBes2 == \'RM\':\n+ return besName1, seqBes1, besName2, seqBes2\n+ elif typeBes1== \'RM\' and typeBes2 == \'FM\':\n+ return besName2, seqBes2, besName1, seqBes1\n+\n+ def getBesName(self, col9):\n+ lCol9 = col9.split(\';\')\n+ ID = lCol9[0]\n+ besName = ID[3:]\n+ return besName\n+ \n+ def getBacName(self, besName):\n+ bacName = besName[:-3]\n+ return bacName\n+\n+ def checkBesNames(self, besName1, besName2, line):\n+ bacName1 = besName1[:-3]\n+ bacName2 = besName2[:-3]\n+ if bacName1 == bacName2:\n+ return True\n+ else:\n+ sys.stderr.write("WARNING: Lines %s and %s the two Bes (%s AND %s) do not belong to the same BAC !!!\\n -> you have to filter this Blat file...\\n" % (int(line)-1, line, besName1, besName2))\n+ return False\n+ \n+ def checkBesPositions(self, tBes1, tBes2):\n+ if tBes1[0] == tBes2[0]:\n+ minBes1 = min(tBes1[1], tBes1[2])\n+ maxBes1 = max(tBes1[1], tBes1[2])\n+ minBes2 = min(tBes2[1], tBes2[2])\n+ maxBes2 = max(tBes2[1], tBes2[2])\n+ if (minBes1 < minBes2 and maxBes1 < minBes2) or (minBes2 < minBes1 and maxBes2 < minBes1):\n+ return True\n+ return False\n+ \n+ def getBacPositions(self, tBes1, tBes2):\n+ startBacPos = 0\n+ endBacPos = 0\n+ minBes1 = min(tBes1[1], tBes1[2])\n+ maxBes1 = max(tBes1[1], tBes1[2])\n+ minBes2 = min(tBes2[1], tBes2[2])\n+ maxBes2 = max(tBes2[1], tBes2[2])\n+ if minBes1 < minBes2:\n+ startBacPos = minBes1\n+ endBacPos = maxBes2\n+ else:\n+ startBacPos = minBes2\n+ endBacPos = maxBes1\n+ return startBacPos, endBacPos\n+ \n+ def extractBesSequenceFromFastaFile(self, besName, numberLine):\n+ seq = \'\'\n+ date = datetime.datetime.now()\n+ date = date.strftime("%d%m%Y_%H%M%S")\n+ tmpFileName = \'tmp_BlatToGffForBesPaired_%s.fasta\' % date\n+ iFastaUtils = FastaUtils()\n+ iFastaUtils.dbExtractByPattern(besName, self._inputFileFasta, tmpFileName)\n+ \n+ if os.path.exists(tmpFileName):\n+ newFastaFile = open(tmpFileName, \'r\')\n+ line = newFastaFile.readline()\n+ if line != \'\':\n+ while line != \'\':\n+ if line[0] != \'>\':\n+ line = line.replace(\'\\n\', \'\')\n+ seq += line\n+ line = newFastaFile.readline()\n+ newFastaFile.close()\n+ os.remove(tmpFileName)\n+ return seq\n+ os.remove(tmpFileName)\n+ \n+ sys.stderr.write("WARNING: At line %s, the BAC-Ends (%s) hasn\'t got sequence in fasta file (%s) !!\\n" % (numberLine, besName, os.path.basename(self._inputFileFasta)))\n+ return \'NA\'\n+ \n+ def _createGFFOutputFile(self):\n+ GFFfile = open(self._outputFileGFF, \'w\')\n+ GFFfile.write("##gff-version 3\\n")\n+ GFFfile.close()\n+ \n+ def _printGFFLinesToOutputFile(self, lLines):\n+ GFFfile = open(self._outputFileGFF, \'a\')\n+ for line in lLines:\n+ GFFfile.write(line)\n+ GFFfile.close()\n+\n+if __name__ == \'__main__\':\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ iBlatToGffForBesPaired.setAttributesFromCmdLine()\n+ iBlatToGffForBesPaired.run()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BowtieParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BowtieParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,91 @@
+#
+# Copyright INRA-URGI 2009-2011
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from SMART.Java.Python.structure.Interval import Interval
+
+class BowtieParser(MapperParser):
+    """A class that parses BowTie format"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(BowtieParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(BowtieParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["bowtie"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        line   = line.strip()
+        fields = line.split("\t")
+        if len(fields) not in (7, 8):
+            raise Exception("Line %d '%s' does not look like a BowTie line (number of fields is %d instead of 7 or 8)" % (self.currentLineNb, line, len(fields)))
+        name         = fields[0]
+        direction    = 1 if fields[1] == "+" else -1
+        chromosome   = fields[2]
+        genomeStart  = int(fields[3]) + 1
+        sequence     = fields[4]
+        quality      = fields[5]
+        number       = int(fields[6])
+        nbMismatches = 0
+        if len(fields) == 8:
+            tags         = fields[7]
+            nbMismatches = len(tags.split(","))
+
+        mapping = Mapping()
+        queryInterval = Interval()
+        queryInterval.setName(name)
+        queryInterval.setStart(1)
+        queryInterval.setEnd(len(sequence) + 1)
+        targetInterval = Interval()
+        targetInterval.setChromosome(chromosome)
+        targetInterval.setStart(genomeStart)
+        targetInterval.setEnd(genomeStart + len(sequence) - 1)
+        subMapping = SubMapping()
+        subMapping.setQueryInterval(queryInterval)
+        subMapping.setTargetInterval(targetInterval)
+        mapping.addSubMapping(subMapping)
+        mapping.setSize(len(sequence))
+        mapping.setNbMismatches(nbMismatches)
+        mapping.setDirection(direction)
+        return mapping
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/BowtieParser.pyc

Binary file commons/core/parsing/BowtieParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/CoordsParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/CoordsParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,137 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from SMART.Java.Python.misc import Utils
+
+class CoordsParser(MapperParser):
+    """A class that parses the .coords output of Nucmer"""
+
+    def __init__(self, fileName, verbosity = 0):
+        self._lineParseRe = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+\|\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+\|\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+\|\s+(?P<identity>\d+\.?\d*)\s+\|\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
+        self._lineParseRe2 = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+(?P<identity>\d+\.?\d*)\s+(?P<rlen>\d+\.?\d*)\s+(?P<qlen>\d+\.?\d*)\s+(?P<rcov>\d+\.?\d*)\s+(?P<qcov>\d+\.?\d*)\s+(?P<rframe>[-]?\d+\.?\d*)\s+(?P<qframe>[-]?\d+\.?\d*)\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
+        self._lineParseRe3 = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+\|\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+\|\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+\|\s+(?P<identity>\d+\.?\d*)\s+(?P<sim>\d+\.?\d*)\s+(?P<stp>\d+\.?\d*)\s+\|\s+(?P<rframe>[-]?\d+\.?\d*)\s+(?P<qframe>[-]?\d+\.?\d*)\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
+        self._lineParseRe4 = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+(?P<identity>\d+\.?\d*)\s+(?P<sim>\d+\.?\d*)\s+(?P<stp>\d+\.?\d*)\s+(?P<rlen>\d+\.?\d*)\s+(?P<qlen>\d+\.?\d*)\s+(?P<rcov>\d+\.?\d*)\s+(?P<qcov>\d+\.?\d*)\s+(?P<rframe>[-]?\d+\.?\d*)\s+(?P<qframe>[-]?\d+\.?\d*)\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
+        self.lineType = 1
+        MapperParser.__init__(self, fileName, verbosity)
+
+    def getFileFormats():
+        return ["coords"]
+    getFileFormats = staticmethod(getFileFormats)
+
+    def skipFirstLines(self):
+        while True:
+            line = self.handle.readline()
+            self.currentLineNb += 1
+            if line == "":
+                break
+            if "=====" in line:
+                break
+            if "[S1]\t[E1]\t[S2]\t[E2]\t[LEN 1]\t[LEN 2]\t[% IDY]\t[LEN R]\t[LEN Q]\t[COV R]\t[COV Q]\t[FRM]\t[TAGS]" in line:
+                self.lineType = 2
+                break
+            if "[S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  [% SIM]  [% STP]  | [FRM]  [TAGS]" in line:
+                self.lineType = 3
+
+            if "[% IDY]\t[% SIM]\t[% STP]" in line and "[LEN Q]"in line:
+                self.lineType = 4
+                break
+
+    def parseLine(self, line):
+
+        if self.lineType == 1 :
+            m = self._lineParseRe.search(line)
+        elif self.lineType == 2:
+            m = self._lineParseRe2.search(line)
+        elif self.lineType == 3:
+            m = self._lineParseRe3.search(line)
+        elif self.lineType == 4:
+            m = self._lineParseRe4.search(line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a NucMer format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        subMapping = SubMapping()
+        subMapping.queryInterval.setName(m.group("qName"))
+        subMapping.queryInterval.setStart(min(int(m.group("qStart")), int(m.group("qEnd"))))
+        subMapping.queryInterval.setEnd(max(int(m.group("qStart")), int(m.group("qEnd"))))
+        subMapping.queryInterval.setSize(int(m.group("qLength")))
+        subMapping.queryInterval.setDirection(int(m.group("qEnd")) - int(m.group("qStart")))
+
+        subMapping.targetInterval.setChromosome(m.group("tName"))
+        subMapping.targetInterval.setStart(min(int(m.group("tStart")), int(m.group("tEnd"))))
+        subMapping.targetInterval.setEnd(max(int(m.group("tStart")), int(m.group("tEnd"))))
+        subMapping.targetInterval.setSize(int(m.group("tLength")))
+        subMapping.targetInterval.setDirection(int(m.group("tEnd")) - int(m.group("tStart")))
+
+        subMapping.setDirection(int(m.group("qEnd")) - int(m.group("qStart")))
+        subMapping.setSize(min(int(m.group("qLength")), int(m.group("tLength"))))
+        subMapping.setIdentity(float(m.group("identity")))
+
+        mapping.addSubMapping(subMapping)
+        mapping.targetInterval.setStart(min(int(m.group("tStart")), int(m.group("tEnd"))))
+        mapping.targetInterval.setEnd(max(int(m.group("tStart")), int(m.group("tEnd"))))
+        mapping.targetInterval.setSize(int(m.group("tLength")))
+        mapping.targetInterval.setChromosome(m.group("tName"))
+
+        mapping.queryInterval.setStart(min(int(m.group("qStart")), int(m.group("qEnd"))))
+        mapping.queryInterval.setEnd(max(int(m.group("qStart")), int(m.group("qEnd"))))
+        mapping.queryInterval.setSize(int(m.group("qLength")))
+        mapping.queryInterval.setName(m.group("qName"))
+        mapping.setDirection(int(m.group("qEnd")) - int(m.group("qStart")))
+        mapping.setSize(min(int(m.group("qLength")), int(m.group("tLength"))))
+        mapping.setIdentity(float(m.group("identity")))
+        mapping.setTagValue("feature", "match")
+        mapping.setTagValue("Target", "%s %d %d" % (m.group("qName"), int(m.group("qStart")), int(m.group("qEnd"))))
+
+        if self.lineType ==2 or self.lineType ==4:
+            mapping.setTagValue("target_pident", float(m.group("identity")))
+            mapping.setTagValue("target_pcover", float(m.group("qcov")))
+            mapping.setTagValue("target_length", int(m.group("qlen")))
+
+
+# Specific to Mark Work. Commented lines because of possible slowdown.
+#        for line in self.handle:
+#            string1 = line.strip()
+#            self.currentLineNb += 1
+#            break
+#        for line in self.handle:
+#            string2 = line.strip()
+#            self.currentLineNb += 1
+#            break
+#        print(len(string1),len(string2))
+#        mapping.setNbMismatches(Utils.getHammingDistance(string1, string2))
+        mapping.setNbGaps(0)
+
+        return mapping

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/CoordsParser.pyc

Binary file commons/core/parsing/CoordsParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,197 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import optparse\n+from commons.core.parsing.SsrParser import SsrParser\n+from commons.core.parsing.BlatParser import BlatParser\n+\n+class CrossSsrAndBesMappedByBlatToGff(object):\n+\n+\n+ def __init__(self):\n+ self._inputFileSSR = \'\'\n+ self._inputFileBlat = \'\'\n+ self._outputFileGFF = \'\'\n+ \n+ def setAttributesFromCmdLine(self):\n+ help = \'\\\n+ \\nThis Script Launch CrossSsrAndBesMappedByBlatToGff.\\n\\n\\\n+ Example 1: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3\\n\\\n+ Example 2: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3 -n muscadine:filtre1\\n\\n\'\n+ \n+ parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")\n+ parser.add_option( \'-s\', \'--ssr\', dest=\'inputSSR\', help=\'SSR Input File Name [Format: tabular]\', default= None )\n+ parser.add_option( \'-b\', \'--blat\', dest=\'inputBLAT\', help=\'Blat Input File Name [Format: tabular]\', default= None )\n+ parser.add_option( \'-o\', \'--output\', dest=\'output\', help=\'Output File Name [Format: GFF3]\', default= None )\n+ parser.add_option( \'-n\', \'--methodName\', dest=\'methodName\', help=\'Method name in col. 3 [Default: None]\', default= None )\n+ ( options, args ) = parser.parse_args()\n+ self.options = options\n+ \n+ def checkOptions(self):\n+ if self.options.inputSSR == \'\':\n+ raise Exception("ERROR: No SSR file specified for -s !")\n+ elif not os.path.exists(self.options.inputSSR):\n+ raise Exception("ERROR: SSR Input File doesn\'t exist !")\n+ else:\n+ self._inputFileSSR = self.options.inputSSR\n+ \n+ if self.options.inputBLAT == \'\':\n+ raise Exception("ERROR: No Blat file specified for -b !")\n+ elif not os.path.exists(self.options.inputBLAT):\n+ raise Exception("ERROR: Blat Input File doesn\'t exist !")\n+ else:\n+ self._inputFileBlat = self.options.inputBLAT\n+ \n+ if self.options.output == \'\':\n+ raise Exception("ERROR: No Output file specified for -o !")\n+ else:\n+ self._outputFileGFF = self.options.output\n+ \n+ self._methodName = self.options.methodName\n+ \n+ def run(self):\n+ '..b'\n+ besNameToKeep = BlatHitObject.getQName()\n+ lOfSSRHitObject = dictSsrParser[besNameToKeep]\n+ \n+ for SSRHitObject in lOfSSRHitObject:\n+ posSSRStart = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrStart(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand())\n+ posSSREnd = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrEnd(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand())\n+ ssrSeq = self.getSsrSeq(SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber())\n+ \n+ col1 = BlatHitObject.getTName()\n+ col2 = \'CrossSsrAndBesAlignedByBlat\'\n+ if self._methodName != \'\' and self._methodName != None:\n+ col3 = \'%s:SSR\' %self._methodName\n+ else:\n+ col3 = \'SSR\'\n+ col4 = posSSRStart\n+ col5 = posSSREnd\n+ col6 = \'.\'\n+ col7 = BlatHitObject.getStrand()\n+ col8 = \'.\'\n+ col9 = \'ID=SSR_%s_%s;Name=SSR_%s_%s;bes_name=%s;bes_size=%s;bes_matchstart=%s;bes_matchend=%s;bes_redundancy=%s;ssr_type=%s;ssr_motif=%s;ssr_motif_number=%s;ssr_start=%s;ssr_end=%s;muscadine_seq=%s\' % (besNameToKeep, SSRHitObject.getBesRedundancy(), \n+ besNameToKeep, SSRHitObject.getBesRedundancy(),\n+ besNameToKeep, BlatHitObject.getQSize(),\n+ BlatHitObject.getQStart(), BlatHitObject.getQEnd(), \n+ SSRHitObject.getBesRedundancy(), SSRHitObject.getSsrNbNucleotides(),\n+ SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber(),\n+ SSRHitObject.getSsrStart(), SSRHitObject.getSsrEnd(), ssrSeq)\n+ gffLine = \'%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n\' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)\n+ listGffLines.append(gffLine)\n+ \n+ return listGffLines\n+ \n+ def convertSSRPositionsToChromPositions(self, ssrPos, chromPosStart, chromPosEnd, strand):\n+ if strand == \'+\':\n+ newPos = int(chromPosStart) + int(ssrPos) - 1\n+ elif strand == \'-\':\n+ newPos = int(chromPosEnd) - int(ssrPos) + 1\n+ return newPos\n+ \n+ def getSsrSeq(self, motif, nbMotif):\n+ ssrSeq = motif * int(nbMotif)\n+ return ssrSeq\n+ \n+ def _createGFFOutputFile(self):\n+ GFFfile = open(self._outputFileGFF, \'w\')\n+ GFFfile.write("##gff-version 3\\n")\n+ GFFfile.close()\n+ \n+ def _printGFFLinesToOutputFile(self, lLinesToPrint):\n+ GFFfile = open(self._outputFileGFF, \'a\')\n+ for line in lLinesToPrint:\n+ GFFfile.write(line)\n+ GFFfile.close()\n+\n+if __name__ == \'__main__\':\n+ iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()\n+ iCrossSsrAndBesMappedByBlatToGff.setAttributesFromCmdLine()\n+ iCrossSsrAndBesMappedByBlatToGff.run()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ElandParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/ElandParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,126 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure import Mapping
+
+class ElandParser(MapperParser):
+    """A class that parses ELAND format"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(ElandParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(ElandParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["eland"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def getInfos(self):
+        super(ElandParser, self).getInfos()
+
+
+    def parseLine(self, line):
+
+        line = line.strip()
+
+        fields = line.split("\t")
+        if len(fields) < 22:
+            sys.exit("Line %d '%s' does not look like a ELAND line (number of fields is %d instead of 22)" % (self.currentLineNb, line, len(fields)))
+
+        flowCell = fields[0]
+        run = fields[1]
+        lane = fields[2]
+        tile = fields[3]
+        xcoord = fields[4]
+        ycoord = fields[5]
+        index = fields[6]
+        number = fields[7]
+        read = fields[8]
+        quality = fields[9]
+        chromosome = fields[10]
+        contig = fields[11]
+        position = fields[12]
+        strand = fields[13]
+        description = fields[14]
+        singleScore = fields[15]
+        pairScore = fields[16]
+        partnerChromosome = fields[17]
+        partnerContig = fields[18]
+        partnerOffset = fields[19]
+        partnerStrand = fields[20]
+        filtering = fields[21]
+
+        if number != "1":
+            sys.exit("S-MART cannot handle pair-end reads yet!")
+
+        # nothing found
+        if position == "":
+            return None
+
+        name = "%s_%s:%s:%s:%s:%s#0/1" % (flowCell, run, lane, tile, xcoord, ycoord)
+        direction = 1 if strand == "F" else -1
+        nbMismatches = 0
+        for char in description:
+            if ord("A") <= ord(char) and ord(char) <= ord("Z"):
+                nbMismatches += 1
+
+        mapping = Mapping()
+        mapping.setTagValue("qualityString", quality)
+
+        mapping.queryInterval.setName(name)
+        mapping.queryInterval.setDirection(direction)
+        mapping.queryInterval.setStart(1)
+        mapping.queryInterval.setEnd(len(read))
+
+        mapping.targetInterval.setChromosome(chromosome)
+        mapping.targetInterval.setStart(int(position))
+        mapping.targetInterval.setEnd(int(position) + len(read))
+        mapping.targetInterval.setDirection(1)
+
+        mapping.setSize(len(read))
+        mapping.setDirection(direction)
+
+        mapping.setNbGaps(0)
+        mapping.setNbMismatches(nbMismatches)
+        mapping.setTagValue("score", int(singleScore))
+
+        if filtering == "Y":
+            return mapping
+        # mapping filtered out
+        return None

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ElandParser.pyc

Binary file commons/core/parsing/ElandParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ExoParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/ExoParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,137 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.structure.SubMapping import SubMapping
+
+class ExoParser(MapperParser):
+    """A class that parses the output of Exonerate - roll your own format"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(ExoParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(ExoParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["exo", "exonerate"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        while "Hostname" not in self.handle.readline():
+            self.currentLineNb += 1
+            pass
+
+
+    def parseLine(self, line):
+
+        if line == "-- completed exonerate analysis\n":
+            return None
+
+        m = re.search(r"^\s*(\S+)\s+(\d+)\s+(\d+)\s+[+-]\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+\d+\s+(\d+)\s+(\S.*)$", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a RYO format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+        name = m.group(1)
+        queryStart = min(int(m.group(2)), int(m.group(3)))
+        queryEnd = max(int(m.group(2)), int(m.group(3)))-1
+        chromosome = m.group(4)
+        targetStart = min(int(m.group(5)), int(m.group(6)))
+        targetEnd = max(int(m.group(5)), int(m.group(6)))-1
+        direction = m.group(7)
+        nbMismatches = int(m.group(8))
+        rest = m.group(9).strip()
+
+        nbGaps = 0
+        queryOffset = 0
+        targetOffset = 0
+
+        subMapping = None
+        m = re.search(r"^(\w)\s+(\d+)\s+(\d+)", rest)
+        while m != None:
+            queryDistance    = int(m.group(2))
+            targetDistance = int(m.group(3))
+            if m.group(1) == "M":
+                if subMapping == None:
+                    subMapping = SubMapping()
+
+                    subMapping.setSize(queryDistance)
+                    subMapping.setDirection(direction)
+
+                    subMapping.queryInterval.setName(name)
+                    subMapping.queryInterval.setStart(queryStart + queryOffset)
+                    subMapping.queryInterval.setDirection(direction)
+
+                    subMapping.targetInterval.setChromosome(chromosome)
+                    subMapping.targetInterval.setStart(targetStart + targetOffset)
+                    subMapping.targetInterval.setDirection(1)
+
+            elif m.group(1) == "G":
+                nbGaps += max(queryDistance, targetDistance)
+
+            elif m.group(1) == "I" or m.group(1) == "5" or m.group(1) == "3":
+                if subMapping != None:
+                    subMapping.queryInterval.setEnd(queryStart + queryOffset - 1)
+                    subMapping.targetInterval.setEnd(targetStart + targetOffset - 1)
+                    mapping.addSubMapping(subMapping)
+                    subMapping = None
+            else:
+                sys.exit("Cannot understand sign '%s' in line %s" % (m.group(1), line))
+
+            queryOffset += queryDistance
+            targetOffset += targetDistance
+            rest = rest[m.end():].strip()
+            m = re.search(r"^(\w)\s+(\d+)\s+(\d+)", rest)
+
+        if subMapping != None:
+            subMapping.queryInterval.setEnd(queryStart + queryOffset - 1)
+            subMapping.targetInterval.setEnd(targetStart + targetOffset - 1)
+            mapping.addSubMapping(subMapping)
+
+        mapping.setNbMismatches(nbMismatches)
+        mapping.setNbGaps(nbGaps)
+        mapping.setDirection(direction)
+
+        mapping.queryInterval.setName(name)
+        mapping.queryInterval.setStart(queryStart)
+        mapping.queryInterval.setEnd(queryEnd)
+
+        mapping.targetInterval.setChromosome(chromosome)
+        mapping.targetInterval.setStart(targetStart)
+        mapping.targetInterval.setEnd(targetEnd)
+
+        return mapping
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ExoParser.pyc

Binary file commons/core/parsing/ExoParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/FastaParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/FastaParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,173 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from commons.core.parsing.SequenceListParser import SequenceListParser
+from SMART.Java.Python.structure.Sequence import Sequence
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+class FastaParser(SequenceListParser):
+ """A class that reads a list of sequences in FASTA"""
+
+ def __init__(self, fileName, verbosity = 0):
+ super(FastaParser, self).__init__(fileName, verbosity)
+ self.tags = {}
+
+
+ def getTags(self):
+ return self.tags
+
+
+ def getFileFormats():
+ return ["fasta", "mfa", "fas"]
+ getFileFormats = staticmethod(getFileFormats)
+
+
+ def getInfos(self):
+ """
+ Get some generic information about the sequences
+ """
+ self.nbSequences = 0
+ self.size = 0
+ self.reset()
+ progress = UnlimitedProgress(100000, "Reading input file", self.verbosity - 9)
+ for line in self.handle:
+ line = line.strip()
+ if line == "":
+ continue
+ if line[0] == ">":
+ self.nbSequences += 1
+ else:
+ self.size += len(line)
+ progress.inc()
+ progress.done()
+ self.reset()
+
+
+ def parseOne(self):
+ """
+ Parse only one element in the file
+ """
+ name = None
+ string = ""
+
+ if self.currentLine != None:
+ if self.currentLine[0] != ">":
+ raise Exception("First line is weird: %s" % (self.currentLine))
+ name = self.currentLine[1:].split()[0]
+ self.currentLine = None
+
+ for line in self.handle:
+ line = line.strip()
+ if line == "":
+ pass
+ elif line[0] == ">":
+ if name == None:
+ name = line[1:].split()[0]
+ else:
+ self.currentLine = line
+ return Sequence(name, string)
+ else:
+ string += line
+
+ if name == None:
+ return None
+ return Sequence(name, string)
+
+
+ def setTags(self):
+ mark = self.handle.tell()
+ thisTag = mark
+
+ line = self.handle.readline()
+ while line != "":
+ if line[0] == ">":
+ line = line.strip()
+ self.tags[line[1:].split()[0]] = thisTag
+ thisTag = self.handle.tell()
+ line = self.handle.readline()
+
+ self.handle.seek(mark)
+
+
+ def getSubSequence(self, chromosome, start, end, direction, name = None):
+ if not self.tags:
+ self.setTags()
+
+ if chromosome not in self.tags:
+ raise Exception("Cannot find " + chromosome)
+
+ if name == None:
+ name = "%s:%d-%d (%d)" % (chromosome, start, end, direction)
+ sequence = Sequence(name)
+
+ # switch from 0-based to 1-based coordinates
+ start -= 1
+ end   -= 1
+
+ self.handle.seek(self.tags[chromosome])
+ line = self.handle.readline().strip()
+ if line != ">" + chromosome:
+ raise Exception("Arrived in a wrong place (got %s)" % (line))
+
+ position1 = self.handle.tell()
+ line   = self.handle.readline().strip()
+ position2 = self.handle.tell()
+ size   = len(line)
+ address   = position1 + ((start - (start % size)) / size) * (position2 - position1);
+
+ count = max(0, start - (start % size));
+ self.handle.seek(address)
+
+ newSequence = ""
+ for line in self.handle:
+ line = line.strip()
+
+ if line[0] == ">":
+ break
+
+ subStart = start - count
+ if subStart < 0:
+ subStart = 0
+ subEnd  = end - count
+ subSize = subEnd - subStart + 1
+ if subSize + subStart > len(line):
+ subSize = len(line) - subStart
+ if subEnd < 0:
+ break
+ if subStart <= len(line):
+ newSequence += line[subStart:subStart+subSize]
+ count += len(line)
+
+ if newSequence == "":
+ raise Exception("Error, sequence %s is empty" % (name))
+ sequence.sequence = newSequence
+ if direction == -1:
+ sequence.reverseComplement()
+ return sequence

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/FastaParser.pyc

Binary file commons/core/parsing/FastaParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/FastqParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/FastqParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,104 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from commons.core.parsing.SequenceListParser import SequenceListParser
+from SMART.Java.Python.structure.Sequence import Sequence
+
+class FastqParser(SequenceListParser):
+    """A class that reads a list of sequences in FASTQ format"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(FastqParser, self).__init__(fileName, verbosity)
+
+
+    def getFileFormats():
+        return ["fastq", "mfq"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def getInfos(self):
+        """
+        Get some generic information about the sequences
+        """
+        self.nbSequences = 0
+        self.reset()
+        if self.verbosity >= 10:
+            print "Getting information on %s." % (self.fileName)
+
+        nbLines = 0
+        for line in self.handle:
+            line = line.strip()
+            if line == "":
+                continue
+            nbLines += 1
+            if self.verbosity >= 10 and nbLines % 400000 == 0:
+                sys.stdout.write("    %d sequences read\r" % (nbLines / 4))
+                sys.stdout.flush()
+        self.reset()
+        self.nbSequences = nbLines / 4
+        if self.verbosity >= 10:
+            print "    %d sequences read" % (self.nbSequences)
+            print "Done."
+
+
+    def parseOne(self):
+        """
+        Parse only one element in the file
+        """
+        string = ""
+        quality = ""
+        lineType = 0
+
+        for line in self.handle:
+            line = line.strip()
+            if lineType == 0:
+                if line[0] != "@":
+                    raise Exception("Line '%s' should start with '@'!" % (line))
+                name = line[1:]
+                inSequence = True
+                inQuality = False
+            elif lineType == 1:
+                string = line
+            elif lineType == 2:
+                if line[0] != "+":
+                    sys.exit("Line '%s' should start with '+'!" % (line))
+                if line[1:] != name and line != "+":
+                    sys.exit("Weird difference in sequence and quality names (%s and %s) while parsing FASTQ file %s." % (name, line[1:], self.fileName))
+                inQuality = True
+                inSequence = False
+            elif lineType == 3:
+                quality = line
+            lineType += 1
+            if lineType == 4:
+                sequence = Sequence(name, string)
+                sequence.setQuality(quality)
+                return sequence
+
+        return None

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/FastqParser.pyc

Binary file commons/core/parsing/FastqParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/FindRep.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/FindRep.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,113 @@
+import re
+from xml.sax.handler import ContentHandler
+
+class FindRep( ContentHandler ):
+    def __init__(self,outfileName, filter=0,count=0):
+        self.inWindowContent = 0
+        self.inSeqNameContent = 0
+        self.inStartContent = 0
+        self.inEndContent = 0
+        self.inPeriodContent = 0
+        self.inUnitContent = 0
+        self.inScoreContent = 0
+        self.count = count
+        self._outfileName = outfileName
+        self.filter=filter
+
+    def startDocument(self):
+        self._fileout = open(self._outfileName,"w")
+
+    def startElement(self,name,attrs):
+        if name=="window":
+            self.inWindowContent=1
+        elif name=="sequence-name":
+            self.inSeqNameContent=1
+            self.seqname=""
+        elif name=="repeat":
+            self.inRepContent=1
+            self.start=""
+            self.end=""
+            self.period=""
+            self.type={}
+        elif name=="start":
+            self.inStartContent=1
+        elif name=="end":
+            self.inEndContent=1
+        elif name=="period":
+            self.inPeriodContent=1
+        elif name=="unit":
+            self.inUnitContent=1
+            self.unit=""
+        elif name=="score":
+            self.inScoreContent=1
+            self.score=""
+
+    def characters(self,ch):
+        if self.inSeqNameContent:
+            self.seqname+=ch
+        elif self.inStartContent:
+            self.start+=ch
+        elif self.inEndContent:
+            self.end+=ch
+        elif self.inPeriodContent:
+            self.period+=ch
+        elif self.inUnitContent:
+            self.unit+=ch
+        elif self.inScoreContent:
+            self.score+=ch
+
+    def endElement(self,name):
+        if name=="window":
+            self.inWindowContent=0
+        elif name=="sequence-name":
+            self.inSeqNameContent=0
+        elif name=="repeat":
+            self.inRepContent=0
+            start=int(self.start)
+            end=int(self.end)
+            period=int(self.period)
+            score=float(self.score)
+            if score>self.filter:
+                return
+            max = 0
+            self.count+=1
+            for k,n in self.type.items():
+                if n>max:
+                    max = n
+                    k_max = k
+
+            m=re.match("^[0-9]+.+\{Cut\}",self.seqname)
+            if m!=None:
+                seqname=self.seqname[m.start(0):m.end(0)-5].rstrip()
+                seqname=re.sub("^[0-9]+ ","",seqname).lstrip()
+                tok=self.seqname[m.end(0):].split("..")
+                astart=start+int(tok[0])-1
+                aend=end+int(tok[0])-1
+            else:
+                astart=start
+                aend=end
+                seqname=self.seqname
+            if len(k_max) > 100:
+                k_max=k_max[:48]+"..."+k_max[-51:]
+            strout="%d\t(%s)%d\t%s\t%d\t%d"%\
+                               (self.count,k_max,(abs(start-end)+1)/period,\
+                                seqname,astart,aend)
+            self._fileout.write("%s\n"%(strout))
+
+        elif name=="start":
+            self.inStartContent=0
+        elif name=="end":
+            self.inEndContent=0
+        elif name=="period":
+            self.inPeriodContent=0
+        elif name=="score":
+            self.inScoreContent=0
+        elif name=="unit":
+            self.inUnitContent=0
+            if self.type.has_key(self.unit):
+                self.type[self.unit]+=1
+            else:
+                self.type[self.unit]=1
+
+    def endDocument(self):
+        self._fileout.close()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/GbParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/GbParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,111 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+
+
+class GbParser(TranscriptListParser):
+    """A class that parses a GBrowse file and create a transcript list"""
+
+
+    def __init__(self, fileName, verbosity = 0):
+        self.reference = None
+        self.color         = None
+        super(GbParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(GbParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["gb", "gbrowse"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        for line in self.handle:
+            self.currentLineNb += 1
+            line = line.strip()
+            m = re.search(r"^\s*bgcolor\s*=\s*(\S+)\s*$", line)
+            if m != None:
+                self.color = m.group(1)
+            if line == "":
+                return
+
+
+    def parseLine(self, line):
+        transcript = Transcript()
+        # first line (reference)
+        m = re.search(r"^\s*reference\s*=\s*(\S+)\s*$", line)
+        if m != None:
+            self.reference = m.group(1)
+            for line in self.handle:
+                line = line.strip()
+                self.currentLineNb += 1
+                break
+        # second line (genomic coordinates)
+        m = re.search(r"^\s*READS\s+(\S+)\s+(\S+)\s+\"([^\"]*)\"\s*$", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a GBrowse format" % (self.currentLineNb, line))
+        if self.reference == None:
+            sys.exit("Cannot get reference of GBrowse line %d '%s'" % (self.currentLineNb, line))
+        transcript.setChromosome(self.reference)
+        transcript.setName(m.group(1))
+        transcript.setComment(m.group(3))
+        # exons
+        exons = m.group(2).split(",")
+        transcriptStart = 1000000000
+        transcriptEnd = 0
+        direction = 0
+        for exon in exons:
+            m = re.search(r"^(\d+)-(\d+)$", exon)
+            if m == None:
+                sys.exit("\nCannot read GBrowse exon line %d '%s'" % (self.currentLineNb, exon))
+            interval = Interval()
+            interval.setChromosome(transcript.chromosome)
+            direction += int(m.group(2)) - int(m.group(1))
+            start = min(int(m.group(1)), int(m.group(2)))
+            end     = max(int(m.group(1)), int(m.group(2)))
+            interval.setStart(start)
+            interval.setEnd(end)
+            transcriptStart = min(transcriptStart, start)
+            transcriptEnd     = max(transcriptEnd, end)
+            transcript.addExon(interval)
+        transcript.setStart(transcriptStart)
+        transcript.setEnd(transcriptEnd)
+        transcript.setDirection(direction)
+        for exon in transcript.getExons():
+            exon.setDirection(direction)
+        return transcript
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/GffParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/GffParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,149 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+
+
+class GffParser(TranscriptListParser):
+ """A class that parses a GFF file and create a transcript list"""
+
+
+ def __init__(self, fileName, verbosity = 0):
+ super(GffParser, self).__init__(fileName, verbosity)
+
+
+ def __del__(self):
+ super(GffParser, self).__del__()
+
+
+ def getFileFormats():
+ return ["gff", "gff2", "gff3"]
+ getFileFormats = staticmethod(getFileFormats)
+
+
+ def skipFirstLines(self):
+ pass
+
+
+ def getInfos(self):
+ self.chromosomes = set()
+ self.nbTranscripts = 0
+ self.size = 0
+ self.reset()
+ if self.verbosity >= 10:
+ print "Getting information on %s." % (self.fileName)
+ self.reset()
+ for line in self.handle:
+ line = line.strip()
+ if line == "" or line[0] == "#":
+ continue
+ parts = line.split("\t")
+ if len(parts) != 9:
+ raise Exception("Error! Line '%s' has %d tab-separated fields instead of 9!" % (line, len(parts)))
+ self.chromosomes.add(parts[0])
+ if parts[8].find("Parent") == -1:
+ self.nbTranscripts += 1
+ else:
+ self.size += max(int(parts[3]), int(parts[4])) - min(int(parts[3]), int(parts[4])) + 1
+ if self.verbosity >= 10 and self.nbTranscripts % 100000 == 0:
+ sys.stdout.write(" %d transcripts read\r" % (self.nbTranscripts))
+ sys.stdout.flush()
+ self.reset()
+ if self.verbosity >= 10:
+ print " %d transcripts read" % (self.nbTranscripts)
+ print "Done."
+
+
+ def parseLine(self, line):
+ if not line or line[0] == "#":
+ return None
+ m = re.search(r"^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+([+-.])\s+(\S+)\s+(\S.*)$", line)
+ if m == None:
+ raise Exception("\nLine %d '%s' does not have a GFF format\n" % (self.currentLineNb, line))
+ interval = Interval()
+ interval.setChromosome(m.group(1))
+ interval.setName("unnamed transcript")
+ interval.setStart(min(int(m.group(4)), int(m.group(5))))
+ interval.setEnd(max(int(m.group(4)), int(m.group(5))))
+ if m.group(7) == ".":
+ interval.setDirection("+")
+ else:
+ interval.setDirection(m.group(7))
+ interval.setTagValue("feature", m.group(3))
+ if m.group(6).isdigit():
+ interval.setTagValue("score", m.group(6))
+
+ remainings = m.group(9).split(";")
+ for remaining in remainings:
+ remaining = remaining.strip()
+ if remaining == "":
+ continue
+ posSpace = remaining.find(" ")
+ posEqual = remaining.find("=")
+ if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
+ parts = remaining.split("=")
+ else:
+ parts = remaining.split()
+ field = parts[0].strip()
+ value = " ".join(parts[1:]).strip(" \"")
+ if field in ("Name", "name", "Sequence", "TE", "SAT"):
+ interval.setName(value)
+ else:
+ try:
+ intValue = int(value)
+ interval.setTagValue(field, intValue)
+ except ValueError:
+ interval.setTagValue(field, value)
+
+ self.currentTranscriptAddress = self.previousTranscriptAddress
+ if "Parent" in interval.getTagNames():
+ if self.currentTranscript == None:
+ raise Exception("GFF file does not start with a transcript! First line is '%s'." % (line.strip()))
+ if interval.getTagValue("Parent") != self.currentTranscript.getTagValue("ID"):
+ raise Exception("Exon '%s' is not right after its transcript in GFF file!" % (interval))
+ self.currentTranscript.addExon(interval)
+ if interval.name == None:
+ interval.name = self.currentTranscript.name
+ return None
+
+ transcript = self.currentTranscript
+ self.currentTranscript = Transcript()
+ self.currentTranscript.copy(interval)
+ self.previousTranscriptAddress = self.currentAddress
+
+ if transcript != None and transcript.name.startswith("unnamed"):
+ if "ID" in transcript.getTagNames():
+ transcript.name = transcript.getTagValue("ID")
+ else:
+ transcript.name = "unnamed transcript %s" % (self.currentLineNb)
+ return transcript

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/GffParser.pyc

Binary file commons/core/parsing/GffParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/GtfParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/GtfParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,113 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+
+
+class GtfParser(TranscriptListParser):
+    """A class that parses a GTF file and create a transcript list"""
+
+
+    def __init__(self, fileName, verbosity = 0):
+        super(GtfParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(GtfParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["gtf", "gtf2"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        if line[0] == "#":
+            return None
+        m = re.search(r"^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+([+-.])\s+(\S+)\s+(\S.*)$", line)
+        if m == None:
+            raise Exception("\nLine %d '%s' does not have a GTF format\n" % (self.currentLineNb, line))
+        interval = Interval()
+        interval.setChromosome(m.group(1))
+        interval.setName("unnamed transcript")
+        interval.setStart(min(int(m.group(4)), int(m.group(5))))
+        interval.setEnd(max(int(m.group(4)), int(m.group(5))))
+        if m.group(7) == ".":
+            interval.setDirection("+")
+        else:
+            interval.setDirection(m.group(7))
+        if m.group(6).isdigit():
+            interval.setTagValue("score", m.group(6))
+        type = m.group(3)
+
+        if type not in ("transcript", "exon"):
+            return None
+
+        remainings = m.group(9).split(";")
+        for remaining in remainings:
+            remaining = remaining.strip()
+            if remaining == "":
+                continue
+            parts = remaining.split(" ", 1)
+            field = parts[0].strip()
+            value = " ".join(parts[1:]).strip(" \"")
+            if field == "transcript_id":
+                interval.setTagValue("ID", value)
+            elif field == "gene_name":
+                interval.setName(value)
+            elif field == "transcript_name":
+                interval.setName(value)
+            elif field == "exon_number":
+                continue
+            else:
+                try:
+                    intValue = int(value)
+                    interval.setTagValue(field, intValue)
+                except ValueError:
+                    interval.setTagValue(field, value)
+
+        self.currentTranscriptAddress = self.previousTranscriptAddress
+        if self.currentTranscript == None or interval.getTagValue("ID") != self.currentTranscript.getTagValue("ID"):
+            transcript = self.currentTranscript
+            self.currentTranscript = Transcript()
+            self.currentTranscript.copy(interval)
+            self.currentTranscript.setTagValue("feature", "transcript")
+            self.previousTranscriptAddress = self.currentAddress
+            return transcript
+        if type == "exon":
+            self.currentTranscript.addExon(interval)
+        return None

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/GtfParser.pyc

Binary file commons/core/parsing/GtfParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MapParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/MapParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,67 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+
+
+class MapParser(TranscriptListParser):
+    """A class that parses the repet .map files"""
+
+    def __init__(self, fileName, verbosity = 0):
+        self._lineParseRe = re.compile(r"(?P<seqName>\w+)\s(?P<chrName>\w+)\s(?P<sStart>\d+)\s(?P<sEnd>\d+)")
+        TranscriptListParser.__init__(self, fileName, verbosity)
+
+    def getFileFormats():
+        return ["map"]
+    getFileFormats = staticmethod(getFileFormats)
+
+    def skipFirstLines(self):
+        return
+
+    def parseLine(self, line):
+        m = self._lineParseRe.search(line)
+
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a map format" % (self.currentLineNb, line))
+
+        transcript = Transcript()
+        transcript.setChromosome(m.group("chrName"))
+        transcript.setStart(min(int(m.group("sStart")), int(m.group("sEnd"))))
+        transcript.setEnd(max(int(m.group("sStart")), int(m.group("sEnd"))))
+        transcript.setName(m.group("seqName"))
+        transcript.setDirection(1)
+
+        return transcript

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MapParser.pyc

Binary file commons/core/parsing/MapParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MapperParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/MapperParser.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,129 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+
+
+class MapperParser(object):
+    """An interface that parses the output of a generic mapper"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(MapperParser, self).__init__()
+        self.verbosity = verbosity
+        self.nbMappings = None
+        self.chromosomes = None
+        self.size = None
+        self.currentMapping = Mapping()
+        self.handle = open(fileName)
+        self.currentLineNb = 0
+        self.skipFirstLines()
+        self.fileName = fileName
+        self.startingPoint = self.handle.tell()
+
+
+    def __del__(self):
+        self.handle.close()
+
+
+    def reset(self):
+        self.handle.seek(self.startingPoint)
+        self.currentLineNb = 0
+
+
+    def getNextMapping(self):
+        for line in self.handle:
+            mapping = self.parseLine(line)
+            self.currentLineNb += 1
+            if mapping != None:
+                return mapping
+        return False
+
+
+    def getIterator(self):
+        self.reset()
+        mapping = self.getNextMapping()
+        while mapping:
+            yield mapping
+            mapping = self.getNextMapping()
+
+
+    def getInfos(self):
+        self.chromosomes = set()
+        self.nbMappings = 0
+        self.size = 0
+        self.reset()
+        if self.verbosity >= 10:
+            print "Getting information."
+        for mapping in self.getIterator():
+            transcript = mapping.getTranscript()
+            self.chromosomes.add(transcript.getChromosome())
+            self.nbMappings += 1
+            self.size += transcript.getSize()
+            if self.verbosity >= 10 and self.nbMappings % 100000 == 0:
+                sys.stdout.write("    %d mappings read\r" % (self.nbMappings))
+                sys.stdout.flush()
+        self.reset()
+        if self.verbosity >= 10:
+            print "    %d mappings read" % (self.nbMappings)
+            print "Done."
+
+
+    def getNbMappings(self):
+        if self.nbMappings != None:
+            return self.nbMappings
+        self.getInfos()
+        return self.nbMappings
+
+
+    def getNbItems(self):
+        return self.getNbMappings()
+
+
+    def getChromosomes(self):
+        if self.chromosomes != None:
+            return self.chromosomes
+        self.getInfos()
+        return self.chromosomes
+
+
+    def getSize(self):
+        if self.size != None:
+            return self.size
+        self.getInfos()
+        return self.size
+
+
+    def getNbNucleotides(self):
+        return self.getSize()
+
+
+    def setDefaultTagValue(self, name, value):
+        for mapping in self.getIterator():
+            mapping.setTagValue(name, value)

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MapperParser.pyc

Binary file commons/core/parsing/MapperParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MaqParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/MaqParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,77 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+from commons.core.parsing.MapperParser import MapperParser
+
+
+class MaqParser(MapperParser):
+    """A class that parses the output of Maq"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(MaqParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(MaqParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["maq"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*(\S+)\s+(\S+)\s+(\d+)\s+([+-])\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s*$", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a MAQ format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        mapping.targetInterval.setStart(int(m.group(3)))
+        mapping.targetInterval.setSize(int(m.group(14)))
+        mapping.targetInterval.setChromosome(m.group(2))
+
+        mapping.queryInterval.setStart(1)
+        mapping.queryInterval.setSize(int(m.group(14)))
+        mapping.queryInterval.setName(m.group(1))
+
+        mapping.setDirection(m.group(4))
+        mapping.setSize(int(m.group(14)))
+        mapping.setNbMismatches(int(m.group(10)))
+        mapping.setRank(1)
+        mapping.setNbOccurrences(int(m.group(12)) + int(m.group(13)))
+
+        return mapping

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MaqParser.pyc

Binary file commons/core/parsing/MaqParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MrepsToSet.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/MrepsToSet.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,31 @@
+from commons.core.parsing.FindRep import FindRep
+from xml.sax import make_parser
+from xml.sax.handler import feature_namespaces
+import os
+
+
+class MrepsToSet(object):
+
+    def __init__(self, mrepsInputFileName="", mrepsOuputFileName="", outputFileName=None, errorFilter=0):
+        self._mrepsInputFileName = mrepsInputFileName
+        self._mrepsOuputFileName = mrepsOuputFileName
+        self._outputFileName = outputFileName or "%s.Mreps.set" % mrepsOuputFileName
+        self._errorFilter = errorFilter
+
+    def run(self):
+        xmlParser = make_parser()
+        xmlParser.setFeature( feature_namespaces, 0 )
+        xmlParser.setContentHandler( FindRep( self._outputFileName, self._errorFilter, 0 ) )
+        xmlParser.parse( self._mrepsOuputFileName )
+
+    def clean( self ):
+        """
+        Remove the output file (xml) from Mreps to keep only the 'set' file.
+        """
+        if os.path.exists(self._mrepsOuputFileName):
+            os.remove(self._mrepsOuputFileName)
+
+
+
+
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/Multifasta2SNPFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/Multifasta2SNPFile.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,846 @@\n+import re\n+import os\n+import logging\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.LoggerFactory import LoggerFactory\n+\n+DNA_ALPHABET_WITH_N_AND_DELS = set ([\'A\',\'T\',\'G\',\'C\',\'N\',\'-\'])\n+IUPAC = set([\'A\',\'T\',\'G\',\'C\',\'U\',\'R\',\'Y\',\'M\',\'K\',\'W\',\'S\',\'B\',\'D\',\'H\',\'V\',\'N\', \'-\', \'a\',\'t\',\'g\',\'c\',\'u\',\'r\',\'y\',\'m\',\'k\',\'w\',\'s\',\'b\',\'d\',\'h\',\'v\',\'n\'])\n+\n+class Multifasta2SNPFile( object ):\n+\n+ POLYM_TYPE_4_SNP = "SNP"\n+ POLYM_TYPE_4_INSERTION = "INSERTION"\n+ POLYM_TYPE_4_DELETION = "DELETION"\n+ POLYM_DEFAULT_CONFIDENCE_VALUE = "A"\n+ SNP_LENGTH = 1\n+ FLANK_LENGTH = 250\n+ \n+ def __init__(self, taxon, batchName="", geneName=""):\n+ \n+ if(batchName):\n+ self._batchName = batchName\n+ \n+ if(geneName):\n+ self._geneName = geneName\n+\n+ self._taxon = taxon\n+ self._outSubSNPFileName = "SubSNP.csv"\n+ self._outAlleleFileName = "Allele.csv"\n+ self._outIndividualFileName = "Individual.csv"\n+ self._outSequenceFSAFileName = "Sequences.fsa"\n+ self._outSequenceCSVFileName = "Sequences.csv"\n+ self._outBatchFileName = "Batch.txt"\n+ self._outBatchLineFileName = "BatchLine.csv"\n+ self._logFileName = "multifasta2SNP.log"\n+ \n+ self._lBatchFileResults = []\n+ self._lSubSNPFileResults = []\n+ self._lRefSequences = []\n+ self._lIndividualFileResults = []\n+ self._lBatchLineFileResults = []\n+ self._dIndividualNumbers4SubSNPResults = {}\n+ self._dAlleleFileResults = {}\n+ \n+ \n+ self.dcurrentIndel = {}\n+ self.lIndelsOfTheCurrentLine = []\n+ self.lIndelsOverAllLines = []\n+ self.dSNPsPositions = {}\n+ \n+ self._iCurrentLineNumber = 0\n+ self._currentBatchNumber = 1\n+ self.currentLineName = ""\n+ self.currentNucleotide = ""\n+ self.currentPosition = 0\n+ self._sPolymConfidenceValue = Multifasta2SNPFile.POLYM_DEFAULT_CONFIDENCE_VALUE \n+ self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_SNP\n+ self._iPolymLength = Multifasta2SNPFile.SNP_LENGTH\n+ self._fileUtils = FileUtils()\n+ \n+ if self._fileUtils.isRessourceExists(self._logFileName):\n+ os.remove(self._logFileName)\n+ self._logFile = LoggerFactory.createLogger(self._logFileName, logging.INFO, "%(asctime)s %(levelname)s: %(message)s")\n+ \n+ def runOneBatch( self, inFileName):\n+ self._currentFileName = inFileName\n+ #TODO: methode a virer; n\'utiliser au final que runOneBatchWithoutWriting\n+ self._wrapper = self.createWrapperFromFile(inFileName)\n+ self._lBatchFileResults = self.completeBatchList()\n+ self.detectSNPsAndIndels(self._wrapper) \n+ self._writeAllOutputFiles()\n+ self._currentBatchNumber += 1\n+ \n+ def runOneBatchWithoutWriting( self, inFileName):\n+ self.lIndelsOverAllLines = []\n+ self._currentFileName = inFileName\n+ self._wrapper = self.createWrapperFromFile(inFileName)\n+ self._lBatchFileResults = self.completeBatchList()\n+ self.detectSNPsAndIndels(self._wrapper) \n+ self._currentBatchNumber += 1\n+ \n+\n+ def _cleanOutputsInTheCurrentDir(self):\n+ #TODO: create a list of files to be deleted\n+ FileUtils.removeFilesByPattern("*.csv")\n+ if (FileUtils.isRessourceExists(self._outBatchFileName)):\n+ os.remove(self._outBatchFileName)\n+ if (FileUtils.isRessourceExists(self._outSequenceFSAFileName)):\n+ os.remove(self._outSequenceFSAFileName)\n+\n+\n+ def _createOutputObjectsIteratingOnCurrentDir(self):\n+ #TODO: gerer les extensions multiples\n+ extList = [".fasta", ".fsa"]\n+ for dirname, dirnames, filenames in os.walk("."):\n+ filenames.sort()\n+ for filename in filenames:\n+ '..b'elf, batchLineFileName, lBatchLineResults):\n+ outF = open(batchLineFileName, "w")\n+ self._writeBatchLineFileHeader(outF)\n+ for dResult in lBatchLineResults:\n+ self._writeBatchLineFileLine(outF, dResult)\n+ outF.close()\n+ \n+ def _writeSNPFileHeader(self, outF):\n+ for head in Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[:-1]:\n+ outF.write(head + self._csvFieldSeparator)\n+ outF.write(Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[-1] + self._csvLineSeparator)\n+ \n+ def _writeAlleleFileHeader(self, outF):\n+ for head in Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[:-1]:\n+ outF.write(head + self._csvFieldSeparator)\n+ outF.write(Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[-1] + self._csvLineSeparator)\n+ \n+ def _writeIndividualFileHeader(self, outF):\n+ for head in Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[:-1]:\n+ outF.write(head + self._csvFieldSeparator)\n+ outF.write(Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[-1] + self._csvLineSeparator)\n+ \n+ def _writeSequenceCSVHeader(self, outF):\n+ for head in Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[:-1]:\n+ outF.write(head + self._csvFieldSeparator)\n+ outF.write(Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[-1] + self._csvLineSeparator)\n+ \n+ def _writeBatchLineFileHeader(self, outF):\n+ for head in Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[:-1]:\n+ outF.write(head + self._csvFieldSeparator)\n+ outF.write(Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[-1] + self._csvLineSeparator) \n+ \n+ def _writeSNPFileLine(self, outF, dSNP):\n+ outF.write(dSNP[\'subSNPName\'] + self._csvFieldSeparator)\n+ outF.write(dSNP[\'confidenceValue\'] + self._csvFieldSeparator + dSNP[\'type\'] + self._csvFieldSeparator)\n+ outF.write(str(dSNP[\'position\']) + self._csvFieldSeparator + dSNP[\'5flank\'] + self._csvFieldSeparator + dSNP[\'3flank\'] + self._csvFieldSeparator)\n+ outF.write(str(dSNP[\'length\']) + self._csvFieldSeparator + str(dSNP[\'batchNumber\']) + self._csvFieldSeparator)\n+ outF.write(str(dSNP[\'lineName\']) + self._csvFieldSeparator)\n+ outF.write(self._primerType + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + str(dSNP[\'allele\']) + self._csvLineSeparator)\n+\n+ def _writeAlleleFileLine(self, outF, sAllele2Write, iAlleleNumber):\n+ outF.write(str(iAlleleNumber) + self._csvFieldSeparator)\n+ outF.write(sAllele2Write + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator)\n+ \n+ def _writeIndividualFileLine(self, outF, dIndividual):\n+ outF.write(str(dIndividual[\'individualNumber\']) + self._csvFieldSeparator)\n+ outF.write(dIndividual[\'individualName\'] + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator)\n+ outF.write(dIndividual[\'scientificName\'] + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator)\n+ \n+ def _writeSequenceCSVLine(self, outF, refSeq, taxon):\n+ outF.write(refSeq.header + self._csvFieldSeparator)\n+ outF.write("Reference" + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator)\n+ outF.write(taxon + self._csvLineSeparator) \n+ \n+ def _writeBatchLineFileLine(self, outF, dResult):\n+ outF.write(str(dResult[\'IndividualNumber\']) + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator)\n+ outF.write(str(dResult[\'BatchNumber\']) + self._csvFieldSeparator + self._csvLineSeparator)\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/MummerParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/MummerParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,93 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.structure.SubMapping import SubMapping
+
+class MummerParser(MapperParser):
+    """A class that parses the output of Mummer format"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(MummerParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(MummerParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["mummer"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        mapping = Mapping()
+
+        subMapping = SubMapping()
+
+        # handle header
+        m = re.search(r"^>\s+(\S+)\s+Reverse\s+Len\s+=\s+(\d+)$", line)
+        if m != None:
+            subMapping.queryInterval.setName(m.group(1))
+            subMapping.queryInterval.setSize(int(m.group(2)))
+            subMapping.queryInterval.setDirection(-1)
+        else:
+            m = re.search(r"^>\s+(\S+)\s+Len\s+=\s+(\d+)$", line)
+            if m != None:
+                subMapping.queryInterval.setName(m.group(1))
+                subMapping.queryInterval.setSize(int(m.group(2)))
+                subMapping.queryInterval.setDirection(1)
+            else :
+                sys.exit("Header line %d '%s' is strange in Mummer file" % (self.currentLineNb, line))
+
+        for line in self.handle:
+            self.currentLineNb += 1
+            break
+        line = line.strip()
+
+        # handle line
+        m = re.search(r"^(\w+)\s+(\d+)\s+(\d+)\s+(\d+)$", line)
+        if m != None:
+            subMapping.targetInterval.setName(m.group(1))
+            subMapping.targetInterval.setStart(int(m.group(2)))
+            subMapping.queryInterval.setStart(int(m.group(3)))
+            subMapping.targetInterval.setSize(int(m.group(4)))
+        else:
+            sys.exit("Line %d '%s' is strange in Mummer file" % (self.currentLineNb, line))
+
+        mapping.addSubMapping(subMapping)
+
+        return mapping

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/NCListParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/NCListParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,125 @@
+#
+# Copyright INRA-URGI 2009-2012
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+try:
+   import cPickle as pickle
+except:
+   import pickle
+
+
+class NCListParser(TranscriptListParser):
+
+
+ def __init__(self, fileName, verbosity = 0):
+ self.title = None
+ TranscriptListParser.__init__(self, fileName, verbosity)
+ self.parse()
+
+ def getFileFormats():
+ return ["nclist"]
+ getFileFormats = staticmethod(getFileFormats)
+
+ def skipFirstLines(self):
+ return
+
+ def parse(self):
+ handle                       = open(self.fileName)
+ self.sortedFileNames         = pickle.load(handle)
+ self.nbElements              = pickle.load(handle)
+ self.nbElementsPerChromosome = pickle.load(handle)
+ self.ncLists                 = pickle.load(handle)
+ for ncList in self.ncLists.values():
+ ncList._reopenFiles()
+ handle.close()
+ self.chromosomes     = sorted(self.nbElementsPerChromosome.keys())
+ self.fileNames       = dict([chromosome, self.ncLists[chromosome]._transcriptFileName] for chromosome in self.chromosomes)
+ self.currentReader   = None
+ self.currentChrIndex = 0
+
+ def getSortedFileNames(self):
+ return self._sortedFileNames
+
+ def getNbElements(self):
+ return self._nbElements
+
+ def getNbElementsPerChromosome(self):
+ return self._nbElementsPerChromosome
+
+ def getNCLists(self):
+ return self._ncLists
+
+ def reset(self):
+ self.currentChrIndex = 0
+ self.currentReader   = None
+
+ def gotoAddress(self, address):
+ self.currentReader.gotoAddress(address)
+
+ def getCurrentAddress(self):
+ return self.getCurrentTranscriptAddress()
+
+ def getCurrentTranscriptAddress(self):
+ if self.currentReader == None:
+ return 0
+ return self.currentReader.getCurrentTranscriptAddress()
+
+ def getNextTranscript(self):
+ if self.currentReader == None:
+ self.currentReader = NCListFileUnpickle(self.fileNames[self.chromosomes[0]])
+ transcript = self.currentReader.getNextTranscript()
+ if transcript == False:
+ self.currentChrIndex += 1
+ if self.currentChrIndex >= len(self.chromosomes):
+ return None
+ self.currentReader = NCListFileUnpickle(self.fileNames[self.chromosomes[self.currentChrIndex]])
+ transcript = self.currentReader.getNextTranscript()
+ return transcript
+
+ def getInfos(self):
+ self.size = 0
+ self.reset()
+ progress = UnlimitedProgress(100000, "Getting information on %s." % (self.fileName), self.verbosity-9)
+ transcript = self.getNextTranscript()
+ for transcript in self.getIterator():
+ self.size += transcript.getSize()
+ progress.inc()
+ progress.done()
+ self.reset()
+
+ def getNbTranscripts(self):
+ return self.nbElements

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/NCListParser.pyc

Binary file commons/core/parsing/NCListParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/NucmerParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/NucmerParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,88 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.structure.Interval import Interval
+from commons.core.parsing.MapperParser import MapperParser
+
+
+class NucmerParser(MapperParser):
+    """A class that parses the output of Nucmer"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(NucmerParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(NucmerParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["nucmer"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        if not line:
+            return None
+        if line[0] == ">":
+            self.currentChromosome = line[1:].split()[0]
+            return None
+        splittedLine = line.strip().split()
+        if len(splittedLine) != 8:
+            raise Exception("Line %d '%s' does not have a NucMer format" % (self.currentLineNb, line))
+
+        subMapping = SubMapping()
+
+        subMapping.targetInterval.setChromosome(self.currentChromosome)
+        subMapping.targetInterval.setName(self.currentChromosome)
+        subMapping.targetInterval.setStart(min(int(splittedLine[0]), int(splittedLine[1])))
+        subMapping.targetInterval.setEnd(max(int(splittedLine[0]), int(splittedLine[1])))
+        subMapping.targetInterval.setDirection(splittedLine[6])
+
+        subMapping.queryInterval.setChromosome(splittedLine[7])
+        subMapping.queryInterval.setName(splittedLine[7])
+        subMapping.queryInterval.setStart(1)
+        subMapping.queryInterval.setEnd(int(splittedLine[3]))
+        subMapping.queryInterval.setDirection("+")
+
+        mapping = Mapping()
+        mapping.addSubMapping(subMapping)
+        mapping.setDirection(splittedLine[6])
+        mapping.setIdentity(float(splittedLine[5]))
+        mapping.setSize(int(splittedLine[3]))
+
+        return mapping

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/PalsToAlign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PalsToAlign.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,66 @@
+import time
+import os
+
+class PalsToAlign(object):
+    """
+    Convert the output from PALS (GFF2 format) into the 'align' format.
+    """
+    def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False):
+        self._removeSameSequences = removeSameSequences
+        self._inputPalsFileName = inputPalsFileName
+        self._outputAlignFileName = outputAlignFileName
+
+    def run (self):
+        file = open(self._inputPalsFileName, "r")
+        tmpFileName = "PalsToAlign%s"%str(os.getpid() )
+        tmpFile = open(tmpFileName, "w")
+
+        for line in file.readlines():
+
+            if line == "":
+                break
+
+            data = line.split("\t")
+
+            qryName = data[0]
+            source = data[1]
+            feature = data[2]
+            qryStart = data[3]
+            qryEnd = data[4]
+            score = data[5]
+            strand = data[6]
+            frame = data[7]
+            attributes = data[8][:-1].split()
+
+            sbjName = attributes[1]
+            sbjStart = attributes[2]
+            sbjEnd = attributes[3][:-1]
+            percId = (1 - float(attributes[-1])) * 100.0
+
+            if strand != "+":
+                tmp = sbjStart
+                sbjStart = sbjEnd
+                sbjEnd = tmp
+
+            if self._removeSameSequences \
+            and "chunk" in qryName and "chunk" in sbjName \
+            and min(int(qryStart), int(qryEnd)) == 1 \
+            and min(int(sbjStart), int(sbjEnd)) == 1 \
+            and percId == 100.0:
+                line = self.inFile.readline()
+                continue
+
+            if qryStart < qryEnd:
+                alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId)
+            else:
+                alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId)
+
+            tmpFile.write(alignLine)
+
+        file.close()
+        tmpFile.close()
+
+        os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName))
+        os.remove(tmpFileName)
+
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ParserChooser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/ParserChooser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,128 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+from commons.core.parsing.MapperParser import MapperParser
+from commons.core.parsing.SequenceListParser import SequenceListParser
+from commons.core.parsing.BedParser import BedParser
+from commons.core.parsing.GffParser import GffParser
+from commons.core.parsing.MapperParser import MapperParser
+from commons.core.parsing.CoordsParser import CoordsParser
+from commons.core.parsing.SeqmapParser import SeqmapParser
+from commons.core.parsing.SoapParser import SoapParser
+from commons.core.parsing.Soap2Parser import Soap2Parser
+from commons.core.parsing.BlastParser import BlastParser
+from commons.core.parsing.PslParser import PslParser
+from commons.core.parsing.RmapParser import RmapParser
+from commons.core.parsing.ShrimpParser import ShrimpParser
+from commons.core.parsing.AxtParser import AxtParser
+from commons.core.parsing.ExoParser import ExoParser
+from commons.core.parsing.MaqParser import MaqParser
+from commons.core.parsing.SamParser import SamParser
+from commons.core.parsing.BamParser import BamParser
+from commons.core.parsing.BowtieParser import BowtieParser
+from commons.core.parsing.ElandParser import ElandParser
+from commons.core.parsing.GtfParser import GtfParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.parsing.FastqParser import FastqParser
+from commons.core.parsing.MapParser import MapParser
+from commons.core.parsing.NCListParser import NCListParser
+from commons.core.parsing.PklParser import PklParser
+
+#Attention!! Do not delete the imports!! They are used to know the type of file format!!!
+
+class ParserChooser(object):
+    """
+    A class that finds the correct parser
+    @ivar format: the format
+    @type format: string
+    @ivar type: transcript / mapping / sequence parser
+    @type type: string
+    @ivar parser: the parser
+    @type parser: object
+    @ivar verbosity: verbosity
+    @type verbosity: int
+    """
+
+    def __init__(self, verbosity = 0):
+        """
+        Constructor
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.type = None
+        self.parserClass = None
+        self.verbosity = verbosity
+
+
+    def findFormat(self, format, type = None):
+        """
+        Find the correct parser
+        @ivar format: the format
+        @type format: string
+        @ivar type: transcript / mapping / sequence parser (None is all)
+        @type type: string
+        @return: a parser
+        """
+        classes = {}
+        if (type == "transcript"):
+            classes = {TranscriptListParser: "transcript"}
+        elif (type == "mapping"):
+            classes = {MapperParser: "mapping"}
+        elif (type == "sequence"):
+            classes = {SequenceListParser: "sequence"}
+        elif (type == None):
+            classes = {TranscriptListParser: "transcript", MapperParser: "mapping", SequenceListParser: "sequence"}
+        else:
+            raise Exception("Do not understand format type '%s'" % (type))
+
+        for classType in classes:
+            for parserClass in classType.__subclasses__():
+                if format in parserClass.getFileFormats():
+                    self.parserClass = parserClass
+                    self.type = classes[classType]
+                    return
+        raise Exception("Cannot get parser for format '%s'" % (format))
+
+
+    def getParser(self, fileName):
+        """
+        Get the parser previously found
+        @return: the parser
+        """
+        return self.parserClass(fileName, self.verbosity)
+
+
+    def getType(self):
+        """
+        Get the type of parser previously found
+        @return: the type of parser
+        """
+        return self.type

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ParserChooser.pyc

Binary file commons/core/parsing/ParserChooser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/PathNum2Id.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PathNum2Id.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,47 @@
+class PathNum2Id( object ):
+
+    def __init__(self):
+        self._inFileName = None
+        self._outFileName = None
+
+    def setInFileName(self, fileName):
+        self._inFileName = fileName
+
+    def setOutFileName(self, fileName):
+        self._outFileName = fileName
+
+    def run( self ):
+        """
+        Adapt the path IDs as the input file is the concatenation of several 'path' files.
+        """
+        self._inFile = open( self._inFileName, "r" )
+        self._outFile = open( self._outFileName, "w" )
+        lines = self._inFile.readlines()
+        dID2count = {}
+        count = 1
+        for line in lines:
+            if line == "":
+                break
+            strippedLine = line.strip('\n')
+            data = strippedLine.split("\t")
+            path = data[0]
+            qryName = data[1]
+            qryStart = int(data[2])
+            qryEnd = int(data[3])
+            sbjName = data[4]
+            sbjStart = int(data[5])
+            sbjEnd = int(data[6])
+            BLAST_Eval = data[7]
+            BLAST_score = data[8]
+            percId = data[9]
+            key_id = path + "-" + qryName + "-" + sbjName
+            if key_id not in dID2count.keys():
+                newPath = count
+                count += 1
+                dID2count[ key_id ] = newPath
+            else:
+                newPath = dID2count[ key_id ]
+            cmd = "%i\t%s\t%i\t%i\t%s\t%i\t%i\t%s\t%s\t%s\n" % ( newPath, qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, BLAST_Eval, BLAST_score, percId )
+            self._outFile.write( cmd )
+        self._inFile.close()
+        self._outFile.close()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/PilerTAToGrouperMap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PilerTAToGrouperMap.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,85 @@
+import time
+import os
+
+class PilerTAToGrouperMap(object):
+    """
+   Convert the output file from Piler into grouper format.
+    """
+    def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName):
+        self._inputGffFileName = inputGffFileName
+        self._inputPYRFileName = inputPYRFileName
+        self._inputMOTIFFileName = inputMOTIFFileName
+        self._outFileName = outputFileName
+
+    def run (self):
+        inFileGff = open( self._inputGffFileName, "r" )
+        inFilePyr = open( self._inputPYRFileName, "r" )
+        outFile = open(self._outFileName,"w")
+
+        #step 0 : get pile Info and write out an info file
+        for pyrLine in inFilePyr.readlines():#-tan_pyr.gff
+            if pyrLine == "":
+                break
+            pileIndex = ""
+            pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid')
+            for gffLine in inFileGff.readlines(): #-tan.gff
+                if gffLine == "":
+                    break
+                if pyrIndex in gffLine:
+                    pileIndex = gffLine.split(';')[1].strip()
+                    break
+            line = "%s\t%s" % (pileIndex, pyrIndex)
+            outFile.write(line)
+
+        inFilePyr.close()
+        inFileGff.close()
+        outFile.close()
+
+        #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format
+        outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp"
+        outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map"
+
+        inFileInfo = open(self._outFileName,"r")
+        inFileMotif = open(self._inputMOTIFFileName, "r" )
+        outFileMotifGrp = open(outFileMotifGrpFileName, "w" )
+        outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" )
+
+        inFileInfos = inFileInfo.readlines()
+        lineInfoIndex = 0
+
+        for countMotif,lineMotif in enumerate(inFileMotif.readlines()):
+            if lineMotif == "":
+                    break
+            dataMotif = lineMotif.split(';')
+            motif, pyrNameMotif  = dataMotif[:2]
+            pyrNameMotif = pyrNameMotif.strip()
+            pileNameMotif = ""
+
+            while lineInfoIndex < len(inFileInfos):
+                lineInfo = inFileInfos[lineInfoIndex]
+                if lineInfo == "":
+                    break
+                if pyrNameMotif in lineInfo:
+                    pileNameMotif = lineInfo.split('\t')[0]
+                    break
+                lineInfoIndex +=1
+
+            #translate to Grouper IdFormat
+            pyrID = pyrNameMotif.split(' ')[1]
+            pileID = pileNameMotif.split(' ')[1]
+            dataMotif = motif.split ('\t')
+            chrm = dataMotif [0]
+            start,end = dataMotif [3:5]
+            countMotif += 1
+            memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID
+
+            stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif)
+            outFileMotifGrp.write( stringMotif)
+
+            stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end )
+            outFileMotifGrpMap.write( stringGrpMap )
+
+        inFileMotif.close()
+        inFileInfo.close()
+        outFileMotifGrp.close()
+        outFileMotifGrpMap.close()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/PklParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PklParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,112 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+try:
+ import cPickle as pickle
+except:
+ import pickle
+from SMART.Java.Python.structure.Interval import Interval
+from commons.core.parsing.TranscriptListParser import TranscriptListParser
+from SMART.Java.Python.structure.Transcript import Transcript
+
+
+class PklParser(TranscriptListParser):
+ """A class that parses the intern PKL file and create a transcript list"""
+
+ def __init__(self, fileName, verbosity = 1):
+ self.title = None
+ super(PklParser, self).__init__(fileName, verbosity)
+ self.handle    = open(fileName, "rb")
+ self.verbosity    = verbosity
+ self.initAddress   = 0
+ self.address    = self.initAddress
+ self.over    = False
+ self.chromosome = None
+
+ def __del__(self):
+ super(PklParser, self).__del__()
+
+ def getFileFormats():
+ return ["pkl"]
+ getFileFormats = staticmethod(getFileFormats)
+
+
+ def skipFirstLines(self):
+ return
+
+
+ def reset(self):
+ self.handle.seek(0)
+ self.initAddress = 0
+
+
+ def setChromosome(self, chromosome):
+ self.chromosome = chromosome
+
+
+ def gotoAddress(self, address):
+ self.handle.seek(address)
+ self.address = address
+
+
+ def getNextTranscript(self):
+ self.address = self.handle.tell()
+ try:
+ transcript = pickle.load(self.handle)
+ if self.chromosome != None and transcript.getChromosome() != self.chromosome:
+ self.over = True
+ return False
+ return transcript
+ except EOFError:
+ self.over = True
+ return False
+
+
+ def getIterator(self):
+ self.gotoAddress(self.initAddress)
+ while True:
+ transcript = self.getNextTranscript()
+ if not transcript:
+ self.over = True
+ return
+ yield transcript
+
+
+ def setInitAddress(self, address):
+ self.initAddress = address
+
+
+ def getCurrentTranscriptAddress(self):
+ return self.address
+
+
+ def isOver(self):
+ return self.over

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/PklParser.pyc

Binary file commons/core/parsing/PklParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/PslParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PslParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,155 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+class PslParser(MapperParser):
+    """A class that parses the output of PSL format (of SSAHA and BLAT)"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(PslParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(PslParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["psl"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def getInfos(self):
+        self.chromosomes = set()
+        self.nbMappings  = 0
+        self.size        = 0
+        self.reset()
+        progress = UnlimitedProgress(100000, "Getting info on PSL file, # mappings read:", self.verbosity)
+        for line in self.handle:
+            progress.inc()
+            line = line.strip()
+            if line == "":
+                continue
+            parts      = line.split("\t")
+            chromosome = parts[13]
+            self.chromosomes.add(chromosome)
+            self.nbMappings += 1
+            self.size += len(parts[0])
+        self.reset()
+        progress.done()
+
+
+    def skipFirstLines(self):
+        while "------" not in self.handle.readline():
+            self.currentLineNb += 1
+            pass
+
+    def _computeStarts(self,seqSize,blockSize,start,targetStrand):
+        if targetStrand == "+":
+            pass
+        else:
+            start = seqSize-blockSize-start
+        return start
+
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*(psl:\s+)?(\d+)\s+(\d+)\s+(\d+)\s+\d+\s+\d+\s+(\d+)\s+\d+\s+(\d+)\s+([+-]{1,2})\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s*$", line)
+        if m == None:
+            raise Exception("\nLine %d '%s' does not have a PSL format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        queryStrand = m.group(7)[0]
+
+        if len(m.group(7)) == 1:
+            targetStrand = "+"
+        else:
+            targetStrand = m.group(7)[1]
+
+
+        for i in range(0, int(m.group(16))):
+            size        = int(m.group(17).split(",")[i])
+            queryStart  = int(m.group(18).split(",")[i])
+            targetStart = int(m.group(19).split(",")[i])
+            querySize   = int(m.group(9))
+            targetSize  = int(m.group(13))
+
+            subMapping = SubMapping()
+            subMapping.setSize(size)
+            subMapping.setDirection(m.group(7)[0])
+
+            queryInterval  = Interval()
+            targetInterval = Interval()
+
+            queryInterval.setName(m.group(8))
+            queryStart = self._computeStarts(querySize,size,queryStart,targetStrand)
+            queryInterval.setStart(queryStart + 1)
+            queryInterval.setEnd(queryStart + size)
+            queryInterval.setDirection(queryStrand)
+
+            targetInterval.setChromosome(m.group(12))
+            targetStart = self._computeStarts(targetSize,size,targetStart,targetStrand)
+            targetInterval.setStart(targetStart + 1)
+            targetInterval.setEnd(targetStart + size)
+            targetInterval.setDirection(targetStrand)
+
+            subMapping.setQueryInterval(queryInterval)
+            subMapping.setTargetInterval(targetInterval)
+            mapping.addSubMapping(subMapping)
+
+        mapping.setSize(int(m.group(2)) + int(m.group(3)) + int(m.group(4)))
+        mapping.setNbMismatches(int(m.group(3)) + int(m.group(4)))
+        mapping.setNbGaps(int(m.group(5)))
+        mapping.setDirection(queryStrand)
+
+        queryInterval  = Interval()
+        targetInterval = Interval()
+
+        queryInterval.setName(m.group(8))
+        queryInterval.setStart(min(int(m.group(10)), int(m.group(11))))
+        queryInterval.setEnd(  max(int(m.group(10)), int(m.group(11))))
+        queryInterval.setDirection(queryStrand)
+
+        targetInterval.setChromosome(m.group(12))
+        targetInterval.setStart(min(int(m.group(14))+1, int(m.group(15))))
+        targetInterval.setEnd(  max(int(m.group(14))+1, int(m.group(15))))
+        targetInterval.setDirection(targetStrand)
+
+        mapping.setQueryInterval(queryInterval)
+        mapping.setTargetInterval(targetInterval)
+
+        return mapping
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/PslParser.pyc

Binary file commons/core/parsing/PslParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/README_MultiFasta2SNPFile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/README_MultiFasta2SNPFile Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,66 @@
+*** DESCRIPTION: ***
+This program takes as input a multifasta file (with sequences already aligned together formated in fasta in the same file), considers the first sequence as the reference sequence, infers polymorphims and generates output files in GnpSNP exchange format.
+
+
+*** INSTALLATION: ***
+Dependancies:
+- First you need Python installed in your system.
+- Repet libraries are also required.
+
+*** OPTIONS OF THE LAUNCHER: ***
+
+    -h: this help
+
+Mandatory options:
+         -b: Name of the batch of submitted sequences
+         -g: Name of the gene
+         -t: Scientific name of the taxon concerned
+
+Exclusive options (use either the first or the second)
+         -f: Name of the multifasta input file (for one input file)
+         -d: Name of the directory containing multifasta input file(s) (for several input files)
+
+
+
+*** COMMAND LINE EXAMPLE (for package use): ***
+- First, you need to set up the environment variable PYTHONPATH (lo link with the dependancies).
+
+- Then for one input file (here our example), run:
+
+python multifastaParserLauncher.py -b Batch_test -g GeneX -t "Arabidopsis thaliana" -f Exemple_multifasta_input.fasta
+
+
+- For several input files, create a directory in the root of the uncompressed package and put your input files in it. Then use this type of command line:
+
+python multifastaParserLauncher.py -b Batch_test -g GeneX -t "Arabidopsis thaliana" -d <Name_of_the_directory>
+
+Each one of the input files will generate a directory with his set of output files.
+
+
+*** SIMPLE USE (for package use): ***
+Two executables (one for windows, the other for linux/unix) are in the package.
+They show the command lines to use in order to set up environment variables and then to run the parser on our sample input file (Example_multifasta_input.fasta).
+You can edit the executable and custom the command line to use it with your own input file.
+
+
+*** BACKLOG (next version) ***
+When the launcher is called for several input files (with -d option), the parser should be able to generate only one set of files describing all the batches (one batch per input file).
+So below are listed the tasks of the backlog dedicated to this feature:
+
+- in Multifasta2SNPFile class:
+  # CONSTRUCTOR: Modify the constructor to add a "several batches" mode called without BatchName and GeneName
+  # RUNNING METHOD: Add the run_several_batches(directory) method that will browse the input files and iterate over them to run each of them successively (see runSeveralInputFile() method of the launcher)
+  => 2 days
+
+  # BATCH MANAGEMENT: Modify createBatchDict() to create one batch per file in the dictionary and add a class variable to point toward the current batch (ex: self._iCurrentLineNumber)
+  # BATCH-LINE MANAGEMENT: Modify _completeBatchLineListWithCurrentIndividual method to allow several batch and link lines to batches (for the moment hard coded batch no1)
+  # SUBSNP MANAGEMENT: check that all elements (dSUbSNP) added in SubSNP list (lSubSNPFileResults) is linked to the current batch (for the moment hard coded batch no1)
+    Impacted methods: manageSNPs(), createSubSNPFromAMissingPolym(), addMissingAllelesAndSubSNPsForOnePolym(), mergeAllelesAndSubSNPsFromOverlappingIndels()
+  => + 2 days
+
+- in Multifasta2SNPFileWriter class:
+  # Modify all the method _write<X>File (ex: _writeSubSNPFile) to write in append mode and externalize all open and close file
+  # Create one method to open all the output files and call it in Multifasta2SNPFile run_several_batches method
+  # Create one method to close all the output files and call it in Multifasta2SNPFile run_several_batches method
+
+  => + 2 days

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/RmapParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/RmapParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,76 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.Mapping import Mapping
+
+class RmapParser(MapperParser):
+    """A class that parses the output of Rmap format"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(RmapParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(RmapParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["rmap"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+([+-])\s*$", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a RMAP format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        mapping.targetInterval.setChromosome(m.group(1))
+        mapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))))
+        mapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))))
+
+        mapping.queryInterval.setName(m.group(4))
+        mapping.queryInterval.setStart(1)
+        mapping.queryInterval.setSize(mapping.targetInterval.getEnd() - mapping.targetInterval.getStart())
+
+        mapping.setSize(mapping.targetInterval.getEnd() - mapping.targetInterval.getStart())
+        mapping.setNbMismatches(int(m.group(5)))
+        mapping.setDirection(m.group(6))
+
+        return mapping
+
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/RmapParser.pyc

Binary file commons/core/parsing/RmapParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SamParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/SamParser.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,234 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import re\n+import sys\n+from commons.core.parsing.MapperParser import MapperParser\n+from SMART.Java.Python.structure.Mapping import Mapping\n+from SMART.Java.Python.structure.SubMapping import SubMapping\n+from SMART.Java.Python.structure.Interval import Interval\n+\n+class SamParser(MapperParser):\n+ """A class that parses SAM format (as given by BWA)"""\n+\n+ def __init__(self, fileName, verbosity = 0):\n+ super(SamParser, self).__init__(fileName, verbosity)\n+\n+\n+ def __del__(self):\n+ super(SamParser, self).__del__()\n+\n+\n+ def getFileFormats():\n+ return ["sam"]\n+ getFileFormats = staticmethod(getFileFormats)\n+\n+\n+ def skipFirstLines(self):\n+ pass\n+\n+\n+ def getInfos(self):\n+ self.chromosomes = set()\n+ self.nbMappings = 0\n+ self.size = 0\n+ self.reset()\n+ if self.verbosity >= 10:\n+ print "Getting information on SAM file"\n+ self.reset()\n+ for line in self.handle:\n+ line = line.strip()\n+ if line == "" or line[0] == "@":\n+ continue\n+ parts = line.split("\\t")\n+ chromosome = parts[2]\n+ if chromosome != "*":\n+ self.chromosomes.add(chromosome)\n+ self.nbMappings += 1\n+ self.size += len(parts[8])\n+ if self.verbosity >= 10 and self.nbMappings % 100000 == 0:\n+ sys.stdout.write(" %d mappings read\\r" % (self.nbMappings))\n+ sys.stdout.flush()\n+ self.reset()\n+ if self.verbosity >= 10:\n+ print " %d mappings read" % (self.nbMappings)\n+ print "Done."\n+\n+\n+ def parseLine(self, line):\n+\n+ line = line.strip()\n+ if line[0] == "@":\n+ return\n+\n+ fields = line.split("\\t")\n+ if len(fields) < 11:\n+ raise Exception("Line %d \'%s\' does not look like a SAM line (number of fields is %d instead of 11)" % (self.currentLineNb, line, len(fields)))\n+\n+ name = fields[0]\n+ flag = int(fields[1])\n+\n+ if (flag & 0x4) == 0x4:\n+ return None\n+\n+ direction = 1 if (flag & 0x10) == 0x0 else -1\n+ chromosome = fields[2]\n+ genomeStart = int(fields[3])\n+ quality = fields[4]\n+ cigar = fields[5]\n+ mate = fields[6]\n+ mateGenomeStart = fields[7]\n+ gapSize = fields[8]\n+ sequence = fields[9]'..b'e:\n+ currentNumber = currentNumber * 10 + (ord(char) - ord("0"))\n+ continue\n+ # match\n+ m = re.match(r"[M]", char)\n+ if m != None:\n+ if readStart == None:\n+ readStart = queryOffset\n+ if subMapping == None:\n+ subMapping = SubMapping()\n+ subMapping.setSize(currentNumber)\n+ subMapping.setDirection(direction)\n+ subMapping.queryInterval.setName(name)\n+ subMapping.queryInterval.setStart(queryOffset)\n+ subMapping.queryInterval.setDirection(direction)\n+ subMapping.targetInterval.setChromosome(chromosome)\n+ subMapping.targetInterval.setStart(genomeStart + targetOffset)\n+ subMapping.targetInterval.setDirection(1)\n+ nbMatches += currentNumber\n+ targetOffset += currentNumber\n+ queryOffset += currentNumber\n+ currentNumber = 0\n+ continue\n+ # insertion on the read\n+ m = re.match(r"[I]", char)\n+ if m != None:\n+ nbGaps += 1\n+ queryOffset += currentNumber\n+ currentNumber = 0\n+ continue\n+ # insertion on the genome\n+ m = re.match(r"[D]", char)\n+ if m != None:\n+ if subMapping != None:\n+ subMapping.queryInterval.setEnd(queryOffset - 1)\n+ subMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+ mapping.addSubMapping(subMapping)\n+ subMapping = None\n+ nbGaps += 1\n+ targetOffset += currentNumber\n+ currentNumber = 0\n+ continue\n+ # intron\n+ m = re.match(r"[N]", char)\n+ if m != None:\n+ if subMapping != None:\n+ subMapping.queryInterval.setEnd(queryOffset - 1)\n+ subMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+ mapping.addSubMapping(subMapping)\n+ subMapping = None\n+ targetOffset += currentNumber\n+ currentNumber = 0\n+ continue\n+ # soft clipping (substitution)\n+ m = re.match(r"[S]", char)\n+ if m != None:\n+ nbMismatches += currentNumber\n+ targetOffset += currentNumber\n+ queryOffset += currentNumber\n+ currentNumber = 0\n+ continue\n+ # hard clipping\n+ m = re.match(r"[H]", char)\n+ if m != None:\n+ targetOffset += currentNumber\n+ queryOffset += currentNumber\n+ currentNumber = 0\n+ continue\n+ # padding\n+ m = re.match(r"[P]", char)\n+ if m != None:\n+ continue\n+ raise Exception("Do not understand paramer \'%s\' in line %s" % (char, line))\n+\n+ if subMapping != None:\n+ subMapping.queryInterval.setEnd(queryOffset - 1)\n+ subMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+ mapping.addSubMapping(subMapping)\n+\n+ mapping.queryInterval.setStart(readStart)\n+ mapping.queryInterval.setEnd(queryOffset - 1)\n+ mapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n+ mapping.setNbMismatches(nbMismatches)\n+ mapping.setNbGaps(nbGaps)\n+\n+ mapping.queryInterval.setName(name)\n+ mapping.queryInterval.setDirection(direction)\n+ mapping.targetInterval.setChromosome(chromosome)\n+ mapping.targetInterval.setStart(genomeStart)\n+ mapping.targetInterval.setDirection(direction)\n+ mapping.setSize(len(sequence))\n+ mapping.setDirection(direction)\n+\n+ return mapping\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SamParser.pyc

Binary file commons/core/parsing/SamParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SeqmapParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/SeqmapParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,81 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from commons.core.parsing.MapperParser import MapperParser
+from SMART.Java.Python.structure.Mapping import Mapping
+
+
+class SeqmapParser(MapperParser):
+    """A class that parses the output of SeqMap"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(SeqmapParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(SeqmapParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["seqmap"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        self.startingPoint = self.handle.tell()
+        self.currentLineNb += 1
+        if "trans_id" not in self.handle.readline():
+            self.currentLineNb -= 1
+            self.handle.seek(self.startingPoint)
+        self.startingPoint = self.handle.tell()
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\w+)\t+([^\t]+)\t+(\w+)\t+(\d+)\t+([+-])\s*$", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a SeqMap format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        mapping.targetInterval.setChromosome(m.group(1))
+        mapping.targetInterval.setStart(int(m.group(2)))
+        mapping.targetInterval.setSize(len(m.group(3)))
+
+        mapping.queryInterval.setName(m.group(4))
+        mapping.queryInterval.setStart(1)
+        mapping.queryInterval.setSize(len(m.group(3)))
+
+        mapping.setSize(len(m.group(3)))
+        mapping.setNbMismatches(int(m.group(6)))
+        mapping.setDirection(m.group(7))
+
+        return mapping
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SeqmapParser.pyc

Binary file commons/core/parsing/SeqmapParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SequenceListParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/SequenceListParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,228 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from SMART.Java.Python.structure.SequenceList import SequenceList
+from SMART.Java.Python.misc.Progress import Progress
+
+class SequenceListParser(object):
+ """
+ A virtual class that reads a list of sequences
+ @ivar verbosity:    verbosity
+ @type verbosity:    int
+ @ivar fileName:    name of the file to parse
+ @type fileName:    string
+ @ivar handle:    file to parse
+ @type handle:    file
+ @ivar nbSequences:    number of sequences in the file
+ @type nbSequences:    int
+ @ivar nbReadSequences: number of sequences read
+ @type nbReadSequences: int
+ @ivar currentLine:    line currently read
+ @type currentLine:    string
+ @ivar size:    total number of nucleotides in the sequences
+ @type size:    int
+ @ivar sizes:    number of nucleotides per sequences
+ @type sizes:    dict of string to int
+ """
+
+ def __init__(self, fileName, verbosity = 0):
+ """
+ Constructor
+ @param verbosity:  verbosity
+ @type verbosity: int
+ @param fileName:   name of the file to parse
+ @type fileName:  string
+ """
+ self.verbosity = verbosity
+ self.fileName = fileName
+ self.nbSequences = None
+ self.nbReadSequences = 0
+ self.currentLine = None
+ self.size = None
+ self.sizes = None
+ try:
+ self.handle = open(self.fileName, "rb")
+ except IOError:
+ raise Exception("Error! Sequence file '%s' does not exist! Exiting..." % (self.fileName))
+
+
+ def __del__(self):
+ """
+ Destructor
+ """
+ if not self.handle.closed:
+ self.handle.close()
+
+
+ def close(self):
+ """
+ Close file handle
+ """
+ self.handle.close()
+
+
+ def reset(self):
+ """
+ Prepare the file to be read again from start
+ """
+ self.handle.seek(0)
+ self.currentLine = None
+ self.nbReadSequences = 0
+
+
+ def getFileFormats(self):
+ pass
+ getFileFormats = staticmethod(getFileFormats)
+
+
+ def parse(self):
+ """
+ Parse the whole file in one shot
+ @return: a list of sequence
+ """
+ sequenceList = SequenceList()
+ progress = Progress(self.getNbSequences(), "Reading %s" % (self.fileName), self.verbosity)
+ for sequence in self.getIterator():
+ sequenceList.addSequence(sequence)
+ progress.inc()
+ progress.done()
+ return sequenceList
+
+
+ def getIterator(self):
+ """
+ Iterate on the file, sequence by sequence
+ @return: an iterator to sequences
+ """
+ self.reset()
+ sequence = self.parseOne()
+ while sequence != None:
+ self.nbReadSequences += 1
+ yield sequence
+ sequence = self.parseOne()
+
+
+ def getInfos(self):
+ """
+ Get some generic information about the sequences
+ """
+ self.nbSequences = 0
+ self.size = 0
+ self.reset()
+ if self.verbosity >= 10:
+ print "Getting information on %s." % (self.fileName)
+ for sequence in self.getIterator():
+ self.nbSequences += 1
+ self.size += sequence.getSize()
+ if self.verbosity >= 10 and self.nbSequences % 100000 == 0:
+ sys.stdout.write(" %d sequences read\r" % (self.nbSequences))
+ sys.stdout.flush()
+ self.reset()
+ if self.verbosity >= 10:
+ print " %d sequences read" % (self.nbSequences)
+ print "Done."
+
+
+ def getNbSequences(self):
+ """
+ Get the number of sequences in the file
+ @return: the number of sequences
+ """
+ if self.nbSequences != None:
+ return self.nbSequences
+ self.getInfos()
+ return self.nbSequences
+
+
+ def getNbItems(self):
+ """
+ Get the number of sequences in the file
+ @return: the number of sequences
+ """
+ return self.getNbSequences()
+
+
+ def getSize(self):
+ """
+ Get the size of all the sequences
+ @return: the size
+ """
+ if self.size != None:
+ return self.size
+ self.getInfos()
+ return self.size
+
+
+ def getRegions(self):
+ """
+ Get the names of the sequences
+ @return: the names
+ """
+ if self.sizes != None:
+ return self.sizes.keys()
+
+ self.sizes = {}
+ self.reset()
+ if self.verbosity >= 10:
+ print "Getting information on %s." % (self.fileName)
+ self.nbSequences = 0
+ for sequence in self.getIterator():
+ self.sizes[sequence.name] = sequence.getSize()
+ self.nbSequences += 1
+ if self.verbosity >= 10 and self.nbSequences % 100000 == 0:
+ sys.stdout.write(" %d sequences read\r" % (self.nbSequences))
+ sys.stdout.flush()
+ self.reset()
+ if self.verbosity >= 10:
+ print " %d sequences read" % (self.nbSequences)
+ print "Done."
+ return self.sizes.keys()
+
+
+ def getSizeOfRegion(self, region):
+ """
+ Get the size of a sequence
+ @param region: the name of the sequence
+ @type region: string
+ @return: the size of the sequence
+ """
+ if self.sizes != None:
+ if region not in self.sizes:
+ raise Exception("Region %s is not found" % region)
+ return self.sizes[region]
+
+ self.getRegions()
+ if region not in self.sizes:
+ raise Exception("Region %s is not found" % region)
+
+ def __eq__(self, o):
+ if o == None:
+ return False
+ return self.fileName == o.fileName and self.nbSequences == o.nbSequences

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SequenceListParser.pyc

Binary file commons/core/parsing/SequenceListParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ShrimpParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/ShrimpParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,107 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+from commons.core.parsing.MapperParser import MapperParser
+
+
+class ShrimpParser(MapperParser):
+    """A class that parses the output of Shrimp"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(ShrimpParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(ShrimpParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["shrimp"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        self.handle.readline()
+        self.currentLineNb += 1
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*>([^\t]+)\t+(\S+)\s+([+-])\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s*$", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a Shrimp format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        mapping.queryInterval.setName(m.group(1))
+        mapping.queryInterval.setStart(min(int(m.group(6)), int(m.group(7))))
+        mapping.queryInterval.setEnd(max(int(m.group(6)), int(m.group(7))))
+
+        mapping.targetInterval.setChromosome(m.group(2))
+        mapping.targetInterval.setStart(min(int(m.group(4)), int(m.group(5))))
+        mapping.targetInterval.setEnd(max(int(m.group(4)), int(m.group(5))))
+
+        mapping.setSize(int(m.group(8)))
+        mapping.setDirection(m.group(3))
+
+        editString = m.group(10)
+        nbMismatches = 0
+        nbGaps = 0
+        while editString != "":
+            m = re.search(r"^(\d+)(\D.*)$", editString)
+            if m != None:
+                editString = m.group(2)
+            else:
+                m = re.search(r"^(\d+)$", editString)
+                if m != None:
+                    editString = ""
+                else:
+                    m = re.search(r"^([A-Z])(.*)$", editString)
+                    if m != None:
+                        nbMismatches += 1
+                        editString = m.group(2)
+                    else:
+                        m = re.search(r"^$(\w+)$(.*)$", editString)
+                        if m != None:
+                            nbGaps += len(m.group(1))
+                            editString = m.group(2)
+                        else:
+                            m = re.search(r"^-(.*)$", editString)
+                            if m != None:
+                                nbGaps += 1
+                                editString = m.group(1)
+                            else:
+                                sys.exit("Cannot understand edit string %s from line %s" % (editString, line))
+
+        mapping.setNbMismatches(nbMismatches)
+        mapping.setNbGaps(nbGaps)
+
+        return mapping

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/ShrimpParser.pyc

Binary file commons/core/parsing/ShrimpParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/Soap2Parser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/Soap2Parser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,148 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.structure.SubMapping import SubMapping
+from commons.core.parsing.MapperParser import MapperParser
+
+
+def mappingToSubMapping(mapping):
+    subMapping = SubMapping()
+    subMapping.targetInterval.copy(mapping.targetInterval)
+    subMapping.queryInterval.copy(mapping.queryInterval)
+    subMapping.setDirection(mapping.getDirection())
+    subMapping.size = mapping.size
+    subMapping.tags = mapping.tags
+    return subMapping
+
+
+
+class Soap2Parser(MapperParser):
+    """A class that parses the output of SOAP2"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(Soap2Parser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(Soap2Parser, self).__del__()
+
+
+    def getFileFormats():
+        return ["soap2"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def getIterator(self):
+        self.reset()
+        currentName = None
+        currentMappings = []
+        for line in self.handle:
+            mapping = self.parseLine(line)
+            name = mapping.queryInterval.name
+            if name == currentName:
+                if mapping.getTagValue("end") == "a":
+                    currentMappings.append(mapping)
+                else:
+                    otherEndMapping = currentMappings.pop(0)
+
+                    newMapping = Mapping()
+                    subMappingA = mappingToSubMapping(otherEndMapping)
+                    subMappingB = mappingToSubMapping(mapping)
+                    subMappingB.queryInterval.setDirection(subMappingA.queryInterval.getDirection())
+
+                    newMapping.addSubMapping(subMappingA)
+                    newMapping.addSubMapping(subMappingB)
+
+                    newMapping.tags = otherEndMapping.tags
+                    newMapping.setSize(otherEndMapping.size + mapping.size)
+                    newMapping.setNbMismatches(otherEndMapping.getTagValue("nbMismatches") + mapping.getTagValue("nbMismatches"))
+                    print otherEndMapping.getTagValue("nbMismatches")
+                    print mapping.getTagValue("nbMismatches")
+                    print newMapping.getTagValue("nbMismatches")
+                    sys.exit()
+                    newMapping.setTagValue("qualityString", otherEndMapping.getTagValue("qualityString") + mapping.getTagValue("qualityString"))
+                    newMapping.setTagValue("occurrence", "%d" % (newMapping.getTagValue("nbOccurrences") - len(currentMappings)))
+                    newMapping.setTagValue("ID", "%s-%s" % (name, newMapping.getTagValue("occurrence")))
+                    del newMapping.tags["end"]
+                    yield newMapping
+            else:
+                currentName = mapping.queryInterval.name
+                for currentMapping in currentMappings:
+                    yield currentMapping
+                currentMappings = [mapping]
+            self.currentLineNb += 1
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*(\S+)\s+(\w+)\s+(\S+)\s+(\d+)\s+([ab])\s+(\d+)\s+([+-])\s+(\w+)\s+(\d+)\s+(\d+)\s+", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a SOAP2 format" % (self.currentLineNb, line))
+
+        name          = m.group(1)
+        read          = m.group(2)
+        qualityString = m.group(3)
+        nbOccurrences = int(m.group(4))
+        end           = m.group(5)
+        size          = int(m.group(6))
+        direction     = m.group(7)
+        chromosome    = m.group(8)
+        genomeStart   = int(m.group(9))
+        nbMismatches  = int(m.group(10))
+
+        mapping = Mapping()
+        if name.endswith("/1") or name.endswith("/2"):
+            name = name[:-2]
+
+        mapping.queryInterval.name = name
+        mapping.queryInterval.setDirection(direction)
+        mapping.queryInterval.setStart(1)
+        mapping.queryInterval.setEnd(size)
+
+        mapping.targetInterval.setChromosome(chromosome)
+        mapping.targetInterval.setStart(genomeStart)
+        mapping.targetInterval.setSize(size)
+
+        mapping.setDirection(direction)
+        mapping.setSize(size)
+
+        mapping.setNbMismatches(nbMismatches)
+        mapping.setNbGaps(0)
+        mapping.setTagValue("qualityString", qualityString)
+        mapping.setTagValue("nbOccurrences", nbOccurrences)
+        mapping.setTagValue("end", end)
+
+        return mapping
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/Soap2Parser.pyc

Binary file commons/core/parsing/Soap2Parser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SoapParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/SoapParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,75 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+import sys
+from SMART.Java.Python.structure.Mapping import Mapping
+from commons.core.parsing.MapperParser import MapperParser
+
+
+class SoapParser(MapperParser):
+    """A class that parses the output of SOAP"""
+
+    def __init__(self, fileName, verbosity = 0):
+        super(SoapParser, self).__init__(fileName, verbosity)
+
+
+    def __del__(self):
+        super(SoapParser, self).__del__()
+
+
+    def getFileFormats():
+        return ["soap"]
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def skipFirstLines(self):
+        pass
+
+
+    def parseLine(self, line):
+        m = re.search(r"^\s*(\S+)\s+(\w+)\s+(\w+)\s+(\d+)\s+(a)\s+(\d+)\s+([+-])\s+(\w+)\s+(\d+)\s+(\d+)", line)
+        if m == None:
+            sys.exit("\nLine %d '%s' does not have a SOAP format" % (self.currentLineNb, line))
+
+        mapping = Mapping()
+
+        mapping.queryInterval.setName(m.group(1))
+        mapping.queryInterval.setStart(1)
+        mapping.queryInterval.setSize(len(m.group(2)))
+
+        mapping.targetInterval.setChromosome(m.group(8))
+        mapping.targetInterval.setStart(int(m.group(9)))
+        mapping.targetInterval.setSize(len(m.group(2)))
+
+        mapping.setDirection(m.group(7))
+        mapping.setSize(len(m.group(2)))
+        mapping.setNbMismatches(int(m.group(10)))
+
+        return mapping

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SoapParser.pyc

Binary file commons/core/parsing/SoapParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/SsrParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/SsrParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,170 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import sys
+
+## this class can parse a Ssr results output file. SSR.pl is developped by S.Cartinhour. (5/2000)
+#
+class SsrParser(object):
+
+
+    def __init__(self, BES_name='', BES_redundancy='', SSR_nbNucleotides='', SSR_Motif='', SSR_Motif_number='', SSR_start='', SSR_end='', BES_size=''):
+        self._BesName = BES_name
+        self._BesRedundancy = BES_redundancy
+        self._SsrNbNucleotides = SSR_nbNucleotides
+        self._SsrMotif = SSR_Motif
+        self._SsrMotifNumber = SSR_Motif_number
+        self._SsrStart = SSR_start
+        self._SsrEnd = SSR_end
+        self._BesSize = BES_size
+
+    def __eq__(self, o):
+        return self._BesName == o._BesName and self._BesRedundancy == o._BesRedundancy and self._SsrNbNucleotides == o._SsrNbNucleotides and self._SsrMotif == o._SsrMotif and self._SsrMotifNumber == o._SsrMotifNumber and self._SsrStart == o._SsrStart and self._SsrEnd == o._SsrEnd and self._BesSize == o._BesSize
+
+    def setBesName(self, BES_Name):
+        self._BesName = BES_Name
+
+    def setBesRedundancy(self, BES_redundancy):
+        self._BesRedundancy = BES_redundancy
+
+    def setSsrNbNucleotides(self, SSR_nbNucleotides):
+        self._SsrNbNucleotides = SSR_nbNucleotides
+
+    def setSsrMotif(self, SSR_Motif):
+        self._SsrMotif = SSR_Motif
+
+    def setSsrMotifNumber(self, SSR_Motif_number):
+        self._SsrMotifNumber = SSR_Motif_number
+
+    def setSsrStart(self, SSR_start):
+        self._SsrStart = SSR_start
+
+    def setSsrEnd(self, SSR_end):
+        self._SsrEnd = SSR_end
+
+    def setBesSize(self, BES_size):
+        self._BesSize = BES_size
+
+    def getBesName(self):
+        return self._BesName
+
+    def getBesRedundancy(self):
+        return self._BesRedundancy
+
+    def getSsrNbNucleotides(self):
+        return self._SsrNbNucleotides
+
+    def getSsrMotif(self):
+        return self._SsrMotif
+
+    def getSsrMotifNumber(self):
+        return self._SsrMotifNumber
+
+    def getSsrStart(self):
+        return self._SsrStart
+
+    def getSsrEnd(self):
+        return self._SsrEnd
+
+    def getBesSize(self):
+        return self._BesSize
+
+    def setAttributes(self, lResults, iCurrentLineNumber):
+        error = False
+
+        if lResults[0] != '':
+            self.setBesName(lResults[0])
+        else:
+            sys.stderr.write("WARNING: The field BES Name is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if lResults[1] != '':
+            self.setBesRedundancy(lResults[1])
+        else:
+            sys.stderr.write("WARNING: The field BES Redundancy is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if lResults[2] != '':
+            self.setSsrNbNucleotides(lResults[2])
+        else:
+            sys.stderr.write("WARNING: The field SSR Number Nucleotides is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if lResults[3] != '':
+            self.setSsrMotif(lResults[3])
+        else:
+            sys.stderr.write("WARNING: The field SSR Motif is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if lResults[4] != '':
+            self.setSsrMotifNumber(lResults[4])
+        else:
+            sys.stderr.write("WARNING: The field SSR Motif Number is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if lResults[5] != '':
+            self.setSsrStart(lResults[5])
+        else:
+            sys.stderr.write("WARNING: The field SSR Start is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if lResults[6] != '':
+            self.setSsrEnd(lResults[6])
+        else:
+            sys.stderr.write("WARNING: The field SSR End is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if lResults[7] != '':
+            self.setBesSize(lResults[7])
+        else:
+            sys.stderr.write("WARNING: The field BES Size is empty in SSR results file in line %s\n" % iCurrentLineNumber)
+            error = True
+
+        if error == True:
+            self._setAllToNull()
+
+    def setAttributesFromString(self, ssrLine, iCurrentLineNumber ="", fieldSeparator ="\t"):
+        ssrLine = ssrLine.rstrip()
+        lSsrLineItem = ssrLine.split(fieldSeparator)
+        if len(lSsrLineItem) < 8:
+            sys.stderr.write("WARNING: The line %s is not a valid SSR Result line\n" % iCurrentLineNumber)
+        else:
+            self.setAttributes(lSsrLineItem, iCurrentLineNumber)
+
+    def _setAllToNull(self):
+        self._BesName = ''
+        self._BesRedundancy = ''
+        self._SsrNbNucleotides = ''
+        self._SsrMotif = ''
+        self._SsrMotifNumber = ''
+        self._SsrStart = ''
+        self._SsrEnd = ''
+        self._BesSize = ''
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/TranscriptListParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/TranscriptListParser.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,182 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from SMART.Java.Python.structure.TranscriptList import TranscriptList
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+class TranscriptListParser(object):
+    """A (quite generic) class that reads a list of transcripts"""
+
+    def __init__(self, fileName, verbosity = 0):
+        self.verbosity         = verbosity
+        self.fileName          = fileName
+        self.nbTranscripts     = None
+        self.size              = None
+        self.chromosomes       = None
+        self.currentTranscript = None
+        self.currentLineNb     = 0
+        self.previousTranscriptAddress = None
+        try:
+            self.handle = open(self.fileName)
+        except IOError:
+            raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
+        self.skipFirstLines()
+
+
+    def __del__(self):
+        self.close()
+
+
+    def getFileFormats():
+        pass
+    getFileFormats = staticmethod(getFileFormats)
+
+
+    def close(self):
+        if self.handle != None and not self.handle.close:
+            self.handle.close()
+        self.handle = None
+
+
+    def reset(self):
+        self.handle.seek(0)
+        self.skipFirstLines()
+        self.currentTranscript = None
+        self.currentLineNb     = 0
+        self.currentTranscriptAddress  = self.handle.tell()
+        self.currentAddress            = self.handle.tell()
+
+
+    def gotoAddress(self, address):
+        self.reset()
+        self.handle.seek(address)
+        self.currentTranscriptAddress = address
+        self.currentAddress           = address
+
+
+    def parse(self):
+        transcriptList = TranscriptList()
+        progress = Progress(self.getNbTranscripts(), "Reading %s" % (self.fileName), self.verbosity)
+        for line in self.handle:
+            self.currentLineNb += 1
+            transcript = self.parseLine(line)
+            transcriptList.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+        return transcriptList
+
+
+    def getIterator(self):
+        self.reset()
+        transcript = self.getNextTranscript()
+        while transcript != None:
+            yield transcript
+            transcript = self.getNextTranscript()
+
+
+    def getCurrentAddress(self):
+        return self.currentAddress
+
+
+    def getCurrentTranscriptAddress(self):
+        return self.currentTranscriptAddress
+
+
+    def getNextTranscript(self):
+        self.currentAddress = self.handle.tell()
+        line = self.handle.readline()
+        while line != "":
+            line = line.strip()
+            self.currentLineNb += 1
+            transcript = self.parseLine(line)
+            if transcript != None:
+                return transcript
+            self.currentAddress = self.handle.tell()
+            line = self.handle.readline()
+        transcript = self.currentTranscript
+        self.currentTranscriptAddress = self.previousTranscriptAddress
+        self.currentTranscript = None
+        return transcript
+
+
+    def getInfos(self):
+        self.chromosomes = set()
+        self.nbTranscripts = 0
+        self.size = 0
+        self.reset()
+        progress = UnlimitedProgress(100000, "Getting information on %s." % (self.fileName), self.verbosity-9)
+        transcript = self.getNextTranscript()
+        for transcript in self.getIterator():
+            self.chromosomes.add(transcript.getChromosome())
+            self.nbTranscripts += 1
+            self.size += transcript.getSize()
+            progress.inc()
+        progress.done()
+        self.reset()
+
+
+    def getNbTranscripts(self):
+        if self.nbTranscripts != None:
+            return self.nbTranscripts
+        self.getInfos()
+        return self.nbTranscripts
+
+
+    def getNbItems(self):
+        return self.getNbTranscripts()
+
+
+    def getChromosomes(self):
+        if self.chromosomes != None:
+            return self.chromosomes
+        self.getInfos()
+        return self.chromosomes
+
+
+    def getSize(self):
+        if self.size != None:
+            return self.size
+        self.getInfos()
+        return self.size
+
+
+    def getNbNucleotides(self):
+        return self.getSize()
+
+
+    def setDefaultTagValue(self, name, value):
+        for transcript in self.getIterator():
+            transcript.setTag(name, value)
+
+    def __eq__(self, o):
+        if o == None:
+            return False
+        return self.fileName == o.fileName and self.nbTranscripts == o.nbTranscripts and self.size == o.size and self.chromosomes == o.chromosomes

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/TranscriptListParser.pyc

Binary file commons/core/parsing/TranscriptListParser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/VarscanFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/VarscanFile.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,145 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.parsing.VarscanHit import VarscanHit
+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag
+from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8
+from commons.core.checker.CheckerException import CheckerException
+from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag
+
+class VarscanFile(object):
+
+    def __init__(self, varscanFileName = ""):
+        self._varscanFileName = varscanFileName
+        self._varscanFieldSeparator = "\t"
+        self._lVarscanHits = []
+        self._typeOfVarscanFile = ""
+
+    def __eq__(self, o):
+        return self._varscanFieldSeparator == o._varscanFieldSeparator and self._lVarscanHits == o._lVarscanHits and self._varscanFileName == o._varscanFileName
+
+    def setVarscanHitsList(self, lVarscanHits):
+        self._lVarscanHits = lVarscanHits
+
+    def setHeaderVarcanFile(self, headerVarcanFile):
+        self._headerVarcanFile = headerVarcanFile
+
+    def setTypeOfVarscanFile(self, type):
+        if type == "Varscan_2_2" or type == "Varscan_2_2_WithTag" or type == "Varscan_2_2_8" or type == "Varscan_2_2_8_WithTag":
+            self._typeOfVarscanFile = type
+        else:
+            self._typeOfVarscanFile = ""
+
+    def getVarscanHitsList(self):
+        return self._lVarscanHits
+
+    def getHeaderVarcanFile(self):
+        return self._headerVarcanFile
+
+    def getListOfVarscanHits(self):
+        return self._lVarscanHits
+
+    def getTypeOfVarscanFile(self):
+        return self._typeOfVarscanFile
+
+    def parse(self):
+        varscanFile = open(self._varscanFileName, "r")
+        currentLineNumber = 0
+        line = varscanFile.readline()
+        if "Chrom\tPosition" in line:
+            self.setHeaderVarcanFile(line)
+            line = varscanFile.readline()
+        while line != "":
+            if not "Chrom\tPosition" in line:
+                currentLineNumber += 1
+                line = line.strip()
+                lResults = line.split(self._varscanFieldSeparator)
+                if len(lResults) == 12:
+                    currentVarscanLine = self.createVarscanHit(line, currentLineNumber)
+                    self._typeOfVarscanFile = "Varscan_2_2"
+                elif len(lResults) == 13:
+                    currentVarscanLine = self.createVarscanHitWithTag(line, currentLineNumber)
+                    self._typeOfVarscanFile = "Varscan_2_2_WithTag"
+                elif len(lResults) == 19:
+                    currentVarscanLine = self.createVarscanHit_v2_2_8(line, currentLineNumber)
+                    self._typeOfVarscanFile = "Varscan_2_2_8"
+                elif len(lResults) == 20:
+                    currentVarscanLine = self.createVarscanHit_v2_2_8_WithTag(line, currentLineNumber)
+                    self._typeOfVarscanFile = "Varscan_2_2_8_WithTag"
+                else:
+                    raise CheckerException ("Warning: this line (l.%s) is not a valid varscan line !" % currentLineNumber)
+                self._lVarscanHits.append(currentVarscanLine)
+                line = varscanFile.readline()
+        varscanFile.close()
+
+    def createVarscanObjectFromLine(self, line, currentLineNumber):
+        if self._typeOfVarscanFile == "Varscan_2_2":
+            VarscanHit =  self.createVarscanHit(line, currentLineNumber)
+            return VarscanHit
+        elif self._typeOfVarscanFile == "Varscan_2_2_WithTag":
+            return self.createVarscanHitWithTag(line, currentLineNumber)
+        elif self._typeOfVarscanFile == "Varscan_2_2_8":
+            return self.createVarscanHit_v2_2_8(line, currentLineNumber)
+        elif self._typeOfVarscanFile == "Varscan_2_2_8_WithTag":
+            return self.createVarscanHit_v2_2_8_WithTag(line, currentLineNumber)
+
+    def createVarscanHit(self, line, currentLineNumber):
+        iVarscanHit =  VarscanHit()
+        iVarscanHit.setAttributesFromString(line, currentLineNumber)
+        return iVarscanHit
+
+    def createVarscanHitWithTag(self, line, currentLineNumber):
+        iVarscanHitWithTag =  VarscanHit_WithTag()
+        iVarscanHitWithTag.setAttributesFromString(line, currentLineNumber)
+        return iVarscanHitWithTag
+
+    def createVarscanHit_v2_2_8(self, line, currentLineNumber):
+        iVarscanHit =  VarscanHit_v2_2_8()
+        iVarscanHit.setAttributesFromString(line, currentLineNumber)
+        return iVarscanHit
+
+    def createVarscanHit_v2_2_8_WithTag(self, line, currentLineNumber):
+        iVarscanHitWithTag =  VarscanHit_v2_2_8_WithTag()
+        iVarscanHitWithTag.setAttributesFromString(line, currentLineNumber)
+        return iVarscanHitWithTag
+
+    def selectTypeOfVarscanHitObject(self):
+        if self._typeOfVarscanFile == "":
+            raise CheckerException ("Error: no varscan object found !")
+        elif self._typeOfVarscanFile == "Varscan_2_2":
+            return VarscanHit()
+        elif self._typeOfVarscanFile == "Varscan_2_2_WithTag":
+            return VarscanHit_WithTag()
+        elif self._typeOfVarscanFile == "Varscan_2_2_8":
+            return VarscanHit_v2_2_8()
+        elif self._typeOfVarscanFile == "Varscan_2_2_8_WithTag":
+            return VarscanHit_v2_2_8_WithTag()
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/VarscanFileForGnpSNP.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/VarscanFileForGnpSNP.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,72 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP
+from commons.core.parsing.VarscanFile import VarscanFile
+
+class VarscanFileForGnpSNP(VarscanFile):
+
+    def __init__(self, varscanFileName, fastqFileName="", refFastaFileName="", taxonName=""):
+        VarscanFile.__init__(self, varscanFileName)
+        self._fastqFileName = fastqFileName
+        self._refFastaFileName = refFastaFileName
+        self._taxonName = taxonName
+        self._previousVarscanHit = None
+
+    ## Equal operator
+    #
+    # @param o a VarscanFileAnalysis instance
+    #
+    def __eq__(self, o):
+        return VarscanFile.__eq__(self, o) and self._fastqFileName == o._fastqFileName \
+            and self._refFastaFileName == o._refFastaFileName and self._taxonName == o._taxonName
+
+    def getVarscanFieldSeparator(self):
+        return self._varscanFieldSeparator
+
+    def getFastqFileName(self):
+        return self._fastqFileName
+
+    def getRefFastaFileName(self):
+        return self._refFastaFileName
+
+    def getTaxonName(self):
+        return self._taxonName
+
+    def createVarscanHit(self, line, currentLineNumber):
+        line = line.strip()
+        lResults = line.split(self._varscanFieldSeparator)
+        iVarscanHit = VarscanHitForGnpSNP()
+        iVarscanHit.setAttributes(lResults, currentLineNumber)
+        iVarscanHit.formatAlleles2GnpSnp()
+        iVarscanHit.manageOccurrence(self._previousVarscanHit)
+        self._previousVarscanHit = iVarscanHit
+        return iVarscanHit

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/VarscanHit.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/VarscanHit.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,175 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.checker.CheckerException import CheckerException
+
+class VarscanHit(object):
+
+    def __init__(self, chrom = "", position = "", ref = "", var = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = ""):
+        self._chrom = chrom
+        self._position = position
+        self._ref = ref
+        self._var = var
+        self._readsRef = readsRef
+        self._readsVar = readsVar
+        self._varFreq = varFreq
+        self._strandsRef = strandsRef
+        self._strandsVar = strandsVar
+        self._qualRef = qualRef
+        self._qualVar = qualVar
+        self._pValue = pValue
+
+    ## Equal operator
+    #
+    # @param o a VarscanFileAnalysis instance
+    #
+    def __eq__(self, o):
+        return self._chrom == o._chrom and self._position == o._position and self._ref == o._ref and self._var == o._var
+
+    def setChrom(self, chromosome):
+        self._chrom = chromosome
+
+    def setPosition(self, position):
+        self._position = position
+
+    def setRef(self, referenceAllele):
+        self._ref = referenceAllele
+
+    def setVar(self, variantAllele):
+        self._var = variantAllele
+
+    def setReadsRef(self, readsRef):
+        self._readsRef = readsRef
+
+    def setReadsVar(self, readsVar):
+        self._readsVar = readsVar
+
+    def setVarFreq(self, varFreq):
+        self._varFreq = varFreq
+
+    def setStrandsRef(self, strandsRef):
+        self._strandsRef = strandsRef
+
+    def setStrandsVar(self, strandsVar):
+        self._strandsVar = strandsVar
+
+    def setQualRef(self, qualRef):
+        self._qualRef = qualRef
+
+    def setQualVar(self, qualVar):
+        self._qualVar = qualVar
+
+    def setPValue(self, pValue):
+        self._pValue = pValue
+
+    def getChrom(self):
+        return self._chrom
+
+    def getPosition(self):
+        return self._position
+
+    def getRef(self):
+        return self._ref
+
+    def getVar(self):
+        return self._var
+
+    def getReadsRef(self):
+        return self._readsRef
+
+    def getReadsVar(self):
+        return self._readsVar
+
+    def getVarFreq(self):
+        return self._varFreq
+
+    def getStrandsRef(self):
+        return self._strandsRef
+
+    def getStrandsVar(self):
+        return self._strandsVar
+
+    def getQualRef(self):
+        return self._qualRef
+
+    def getQualVar(self):
+        return self._qualVar
+
+    def getPValue(self):
+        return self._pValue
+
+    def getHeader(self):
+        return "Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n"
+
+    def getVarscanLine(self):
+        return "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getVar(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(),  self.getQualRef(), self.getQualVar(), self.getPValue())
+
+    def setAttributes(self, lResults, iCurrentLineNumber):
+        if lResults[0] != '':
+            self.setChrom(lResults[0])
+        else:
+            raise CheckerException ("The field Chrom is empty in varscan file in line %s" % iCurrentLineNumber)
+        if lResults[1] != '':
+            self.setPosition(lResults[1])
+        else:
+            raise CheckerException ("The field Position is empty in varscan file in line %s" % iCurrentLineNumber)
+        if lResults[2] != '':
+            self.setRef(lResults[2])
+        else:
+            raise CheckerException ("The field Ref is empty in varscan file in line %s" % iCurrentLineNumber)
+        if lResults[3] != '':
+            self.setVar(lResults[3])
+        else:
+            raise CheckerException ("The field Var is empty in varscan file in line %s" % iCurrentLineNumber)
+        if lResults[4] != '':
+            self.setReadsRef(lResults[4])
+        if lResults[5] != '':
+            self.setReadsVar(lResults[5])
+        if lResults[6] != '':
+            self.setVarFreq(lResults[6])
+        if lResults[7] != '':
+            self.setStrandsRef(lResults[7])
+        if lResults[8] != '':
+            self.setStrandsVar(lResults[8])
+        if lResults[9] != '':
+            self.setQualRef(lResults[9])
+        if lResults[10] != '':
+            self.setQualVar(lResults[10])
+        if lResults[11] != '':
+            self.setPValue(lResults[11])
+
+    def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\t"):
+        varscanString = varscanString.rstrip()
+        lvarscanStringItem = varscanString.split(fieldSeparator)
+        if len(lvarscanStringItem)<12:
+            for i in range(len(lvarscanStringItem), 12):
+                lvarscanStringItem.append ("")
+        self.setAttributes(lvarscanStringItem, iCurrentLineNumber)
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/VarscanHitForGnpSNP.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/VarscanHitForGnpSNP.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,232 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit import VarscanHit\n+import re\n+\n+class VarscanHitForGnpSNP(VarscanHit):\n+ \n+ def __init__(self):\n+ VarscanHit.__init__(self)\n+ self._reads1 = \'\'\n+ self._reads2 = \'\'\n+ self._varFreq = \'\'\n+ self._strands1 = \'\'\n+ self._strands2 = \'\'\n+ self._qual1 = \'\'\n+ self._qual2 = \'\'\n+ self._pvalue = \'\'\n+ self._5flank = \'\'\n+ self._3flank = \'\'\n+ self._gnpSnp_ref = \'\'\n+ self._gnpSnp_var = \'\'\n+ self._gnpSnp_position = 0\n+ self._polymType = \'\'\n+ self._polymLength = 0\n+ self._occurrence = 1\n+ \n+ ## Equal operator\n+ #\n+ # @param o a VarscanFileAnalysis instance\n+ # \n+ def __eq__(self, o):\n+ return VarscanHit.__eq__(self, o) \\\n+ and self._reads1 == o._reads1 and self._reads2 == o._reads2 \\\n+ and self._varFreq == o._varFreq and self._strands1 == o._strands1 \\\n+ and self._strands2 == o._strands2 and self._qual1 == o._qual1 \\\n+ and self._qual2 == o._qual2 and self._pvalue == o._pvalue \\\n+ and self._3flank == o._3flank and self._5flank == o._5flank \\\n+ and self._gnpSnp_position == o._gnpSnp_position and self._gnpSnp_ref == o._gnpSnp_ref \\\n+ and self._gnpSnp_var == o._gnpSnp_var and self._polymLength == o._polymLength \\\n+ and self._polymType == o._polymType and self._occurrence == o._occurrence\n+ \n+ def isPolymTypeAlreadyFoundAtThisChromAndThisPosition(self, iVarscanHitForGnpSNP):\n+ return self._chrom == iVarscanHitForGnpSNP.getChrom() \\\n+ and self._position == iVarscanHitForGnpSNP.getPosition() \\\n+ and self._polymType == iVarscanHitForGnpSNP.getPolymType()\n+ \n+ def manageOccurrence(self, iVarscanHitForGnpSNP=None):\n+ if iVarscanHitForGnpSNP != None and self.isPolymTypeAlreadyFoundAtThisChromAndThisPosition(iVarscanHitForGnpSNP):\n+ self._occurrence = iVarscanHitForGnpSNP.getOccurrence() + 1\n+ \n+ def formatAlleles2GnpSnp(self):\n+ if self.getVar().find("-") != -1:\n+ self._polymType = "DELETION"\n+ self._gnpSnp_position = int(self._position) + 1\n+ self._gnpSnp_ref = self._var[1:]\n+ self._g'..b'randsOfReferenceAllele):\n+ self._strands1 = strandsOfReferenceAllele\n+ \n+ def setStrands2(self, strandsOfVariantAllele):\n+ self._strands2 = strandsOfVariantAllele\n+ \n+ def setQual1(self, averageQualityOfRef):\n+ self._qual1 = averageQualityOfRef\n+ \n+ def setQual2(self, averageQualityOfVar):\n+ self._qual2 = averageQualityOfVar\n+ \n+ def setPvalue(self, pvalue):\n+ self._pvalue = pvalue\n+ \n+ def set5flank(self, s5flank):\n+ self._5flank = s5flank\n+ \n+ def set3flank(self, s3flank):\n+ self._3flank = s3flank\n+ \n+ def setGnpSNPRef(self, ref):\n+ self._gnpSnp_ref = ref\n+ \n+ def setGnpSNPVar(self, var):\n+ self._gnpSnp_var = var\n+ \n+ def setGnpSNPPosition(self, position):\n+ self._gnpSnp_position = position\n+ \n+ def setOccurrence(self, occurrence):\n+ self._occurrence = occurrence\n+ \n+ def setPolymType(self, polymType):\n+ self._polymType = polymType\n+ \n+ def setPolymLength(self, polymLength):\n+ self._polymLength = polymLength\n+ \n+ def getReads1(self):\n+ return self._reads1\n+ \n+ def getReads2(self):\n+ return self._reads2\n+ \n+ def getVarFreq(self):\n+ return self._varFreq\n+ \n+ def getStrands1(self):\n+ return self._strands1\n+ \n+ def getStrands2(self):\n+ return self._strands2\n+ \n+ def getQual1(self):\n+ return self._qual1\n+ \n+ def getQual2(self):\n+ return self._qual2\n+ \n+ def getPvalue(self):\n+ return self._pvalue\n+ \n+ def get5flank(self):\n+ return self._5flank\n+ \n+ def get3flank(self):\n+ return self._3flank\n+ \n+ def getPolymType(self):\n+ return self._polymType\n+ \n+ def getGnpSnpVar(self):\n+ return self._gnpSnp_var\n+ \n+ def getGnpSnpRef(self):\n+ return self._gnpSnp_ref\n+ \n+ def getGnpSnpPosition(self):\n+ return self._gnpSnp_position\n+ \n+ def getPolymLength(self):\n+ return self._polymLength\n+ \n+ def getOccurrence(self):\n+ return self._occurrence\n+ \n+ def setAttributes(self, lResults, iCurrentLineNumber):\n+ VarscanHit.setAttributes(self, lResults, iCurrentLineNumber)\n+ if lResults[4] != \'\':\n+ self.setReads1(lResults[4])\n+ else:\n+ raise CheckerException ("The field Reads1 is empty in varscan file in line %s" % (iCurrentLineNumber))\n+ if lResults[5] != \'\':\n+ self.setReads2(lResults[5])\n+ else:\n+ raise CheckerException ("The field Reads2 is empty in varscan file in line %s" % (iCurrentLineNumber))\n+ if lResults[6] != \'\' and re.match("[0-9\\,\\%]+", lResults[6]):\n+ self.setVarFreq(lResults[6])\n+ else:\n+ raise CheckerException ("The field VarFreq is empty or in bad format in varscan file in line %s" % (iCurrentLineNumber))\n+ if lResults[7] != \'\':\n+ self.setStrands1(lResults[7])\n+ else:\n+ raise CheckerException ("The field Strands1 is empty in varscan file in line %s" % (iCurrentLineNumber))\n+ if lResults[8] != \'\':\n+ self.setStrands2(lResults[8])\n+ else:\n+ raise CheckerException ("The field Strands2 is empty in varscan file in line %s" % (iCurrentLineNumber))\n+ if lResults[9] != \'\':\n+ self.setQual1(lResults[9])\n+ else:\n+ raise CheckerException ("The field Qual1 is empty in varscan file in line %s" % (iCurrentLineNumber))\n+ if lResults[10] != \'\':\n+ self.setQual2(lResults[10])\n+ else:\n+ raise CheckerException ("The field Qual2 is empty in varscan file in line %s" % (iCurrentLineNumber))\n+ if lResults[11] != \'\':\n+ self.setPvalue(lResults[11])\n+ else:\n+ raise CheckerException ("The field Pvalue is empty in varscan file in line %s" % (iCurrentLineNumber))\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/VarscanHit_WithTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/VarscanHit_WithTag.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,70 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+
+from commons.core.parsing.VarscanHit import VarscanHit
+
+class VarscanHit_WithTag(VarscanHit):
+
+    def __init__(self, tag = "", chrom = "", position = "", ref = "", var = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = ""):
+        self._tag = tag
+        VarscanHit.__init__(self, chrom, position, ref, var, readsRef, readsVar, varFreq, strandsRef, strandsVar, qualRef, qualVar, pValue)
+
+    def __eq__(self, o):
+        if self._tag == o._tag:
+            return VarscanHit.__eq__(self, o)
+        return False
+
+    def setTag(self, tag):
+        self._tag = tag
+
+    def getTag(self):
+        return self._tag
+
+    def getHeader(self):
+        return "Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\tTag\n"
+
+    def getVarscanLine(self):
+        return "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getVar(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(),  self.getQualRef(), self.getQualVar(), self.getPValue(), self.getTag())
+
+    def setAttributes(self, lResults, iCurrentLineNumber):
+        VarscanHit.setAttributes(self, lResults, iCurrentLineNumber)
+        if lResults[12] != '':
+            self.setTag(lResults[12])
+
+    def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\t"):
+        varscanString = varscanString.rstrip()
+        lvarscanStringItem = varscanString.split(fieldSeparator)
+        if len(lvarscanStringItem)<13:
+            for i in range(len(lvarscanStringItem), 13):
+                lvarscanStringItem.append ("")
+        self.setAttributes(lvarscanStringItem, iCurrentLineNumber)
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/VarscanHit_v2_2_8.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/VarscanHit_v2_2_8.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,176 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit import VarscanHit\n+\n+class VarscanHit_v2_2_8(VarscanHit):\n+ \n+ def __init__(self, chrom = "", position = "", ref = "", cns = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = "", mapQualRef = "", mapQualVar = "", readsRefPlus = "", readsRefMinus = "", readsVarPlus = "", readsVarMinus = "", var = ""):\n+ self._cns = cns\n+ self._mapQualRef = mapQualRef\n+ self._mapQualVar = mapQualVar\n+ self._readsRefPlus = readsRefPlus\n+ self._readsRefMinus = readsRefMinus\n+ self._readsVarPlus = readsVarPlus\n+ self._readsVarMinus = readsVarMinus\n+ VarscanHit.__init__(self, chrom, position, ref, var, readsRef, readsVar, varFreq, strandsRef, strandsVar, qualRef, qualVar, pValue)\n+ \n+ ## Equal operator\n+ #\n+ # @param o a VarscanFileAnalysis instance\n+ # \n+ def __eq__(self, o):\n+ if self._cns == o._cns:\n+ return VarscanHit.__eq__(self, o)\n+ return False\n+ \n+ def setCns(self, consensus):\n+ self._cns = consensus\n+ \n+ def setMapQualRef(self, mapQualRef):\n+ self._mapQualRef = mapQualRef\n+ \n+ def setMapQualVar(self, mapQualVar):\n+ self._mapQualVar = mapQualVar\n+ \n+ def setReadsRefPlus(self, readsRefPlus):\n+ self._readsRefPlus = readsRefPlus\n+ \n+ def setReadsRefMinus(self, readsRefMinus):\n+ self._readsRefMinus = readsRefMinus\n+ \n+ def setReadsVarPlus(self, readsVarPlus):\n+ self._readsVarPlus = readsVarPlus\n+ \n+ def setReadsVarMinus(self, readsVarMinus):\n+ self._readsVarMinus = readsVarMinus\n+ \n+ def getCns(self):\n+ return self._cns\n+ \n+ def getMapQualRef(self):\n+ return self._mapQualRef\n+ \n+ def getMapQualVar(self):\n+ return self._mapQualVar\n+ \n+ def getReadsRefPlus(self):\n+ return self._readsRefPlus\n+ \n+ def getReadsRefMinus(self):\n+ return self._readsRefMinus\n+ \n+ def getReadsVarPlus(self):\n+ return self._readsVarPlus\n+ \n+ def getReadsVarMinus(self):\n+ return self._readsVarMinus\n+ \n+ def getHeader(self):\n+ return "Chrom\\tPosition\\tRef\\tC'..b'\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\tMapQual1\\tMapQual2\\tReads1Plus\\tReads1Minus\\tReads2Plus\\tReads2Minus\\tVarAllele\\n"\n+ \n+ def getVarscanLine(self):\n+ return "%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getCns(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(), self.getQualRef(), self.getQualVar(), self.getPValue(), self.getMapQualRef(), self.getMapQualVar(), self.getReadsRefPlus(), self.getReadsRefMinus(), self.getReadsVarPlus(), self.getReadsVarMinus(), self.getVar())\n+ \n+ def setAttributes(self, lResults, iCurrentLineNumber):\n+ if lResults[0] != \'\':\n+ self.setChrom(lResults[0])\n+ else:\n+ raise CheckerException ("The field Chrom is empty in varscan file in line %s" % iCurrentLineNumber)\n+ if lResults[1] != \'\':\n+ self.setPosition(lResults[1])\n+ else:\n+ raise CheckerException ("The field Position is empty in varscan file in line %s" % iCurrentLineNumber)\n+ if lResults[2] != \'\':\n+ self.setRef(lResults[2])\n+ else:\n+ raise CheckerException ("The field Ref is empty in varscan file in line %s" % iCurrentLineNumber)\n+ if lResults[3] != \'\':\n+ self.setCns(lResults[3])\n+ else:\n+ raise CheckerException ("The field Cons is empty in varscan file in line %s" % iCurrentLineNumber)\n+ if lResults[4] != \'\':\n+ self.setReadsRef(lResults[4])\n+ if lResults[5] != \'\':\n+ self.setReadsVar(lResults[5])\n+ if lResults[6] != \'\':\n+ self.setVarFreq(lResults[6])\n+ if lResults[7] != \'\':\n+ self.setStrandsRef(lResults[7])\n+ if lResults[8] != \'\':\n+ self.setStrandsVar(lResults[8])\n+ if lResults[9] != \'\':\n+ self.setQualRef(lResults[9])\n+ if lResults[10] != \'\':\n+ self.setQualVar(lResults[10])\n+ if lResults[11] != \'\':\n+ self.setPValue(lResults[11])\n+ if lResults[12] != \'\':\n+ self.setMapQualRef(lResults[12])\n+ if lResults[13] != \'\':\n+ self.setMapQualVar(lResults[13])\n+ if lResults[14] != \'\':\n+ self.setReadsRefPlus(lResults[14])\n+ if lResults[15] != \'\':\n+ self.setReadsRefMinus(lResults[15])\n+ if lResults[16] != \'\':\n+ self.setReadsVarPlus(lResults[16])\n+ if lResults[17] != \'\':\n+ self.setReadsVarMinus(lResults[17])\n+ if lResults[18] != \'\':\n+ self.setVar(lResults[18])\n+ else:\n+ raise CheckerException ("The field varAllele is empty in varscan file in line %s" % iCurrentLineNumber)\n+ \n+ def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\\t"):\n+ varscanString = varscanString.rstrip()\n+ lvarscanStringItem = varscanString.split(fieldSeparator)\n+ if len(lvarscanStringItem) < 19:\n+ raise CheckerException ("This varscan line (l.%s) is not complete" % iCurrentLineNumber)\n+ self.setAttributes(lvarscanStringItem, iCurrentLineNumber)\n+ \n+ def convertVarscanHit_v2_2_8_To_VarscanHit(self):\n+ iVarscanHit = VarscanHit()\n+ iVarscanHit.setChrom(self.getChrom())\n+ iVarscanHit.setPosition(self.getPosition())\n+ iVarscanHit.setRef(self.getRef())\n+ iVarscanHit.setVar(self.getVar())\n+ iVarscanHit.setReadsRef(self.getReadsRef())\n+ iVarscanHit.setReadsVar(self.getReadsVar())\n+ iVarscanHit.setVarFreq(self.getVarFreq())\n+ iVarscanHit.setStrandsRef(self.getStrandsRef())\n+ iVarscanHit.setStrandsVar(self.getStrandsVar())\n+ iVarscanHit.setQualRef(self.getQualRef())\n+ iVarscanHit.setQualVar(self.getQualVar())\n+ iVarscanHit.setPValue(self.getPValue())\n+ return iVarscanHit\n+ \n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/VarscanHit_v2_2_8_WithTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/VarscanHit_v2_2_8_WithTag.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,88 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.checker.CheckerException import CheckerException
+from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8
+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag
+
+class VarscanHit_v2_2_8_WithTag(VarscanHit_v2_2_8):
+
+    def __init__(self, chrom = "", position = "", ref = "", cns = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = "", mapQualRef = "", mapQualVar = "", readsRefPlus = "", readsRefMinus = "", readsVarPlus = "", readsVarMinus = "", var = "", tag = ""):
+        self._tag = tag
+        VarscanHit_v2_2_8.__init__(self, chrom, position, ref, var, readsRef, readsVar, varFreq, strandsRef, strandsVar, qualRef, qualVar, pValue, mapQualRef, mapQualVar, readsRefPlus, readsRefMinus, readsVarPlus, readsVarMinus, var)
+
+    def __eq__(self, o):
+        if self._tag == o._tag:
+            return VarscanHit_v2_2_8.__eq__(self, o)
+        return False
+
+    def setTag(self, tag):
+        self._tag = tag
+
+    def getTag(self):
+        return self._tag
+
+    def getHeader(self):
+        return "Chrom\tPosition\tRef\tCons\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\tMapQual1\tMapQual2\tReads1Plus\tReads1Minus\tReads2Plus\tReads2Minus\tVarAllele\tTag\n"
+
+    def getVarscanLine(self):
+        return "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getCns(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(),  self.getQualRef(), self.getQualVar(), self.getPValue(), self.getMapQualRef(), self.getMapQualVar(), self.getReadsRefPlus(), self.getReadsRefMinus(), self.getReadsVarPlus(), self.getReadsVarMinus(), self.getVar(), self.getTag())
+
+    def setAttributes(self, lResults, iCurrentLineNumber):
+        VarscanHit_v2_2_8.setAttributes(self, lResults, iCurrentLineNumber)
+        if lResults[19] != '':
+            self.setTag(lResults[19])
+        else:
+            raise CheckerException ("The field tag is empty in varscan file in line %s" % iCurrentLineNumber)
+
+    def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\t"):
+        varscanString = varscanString.rstrip()
+        lvarscanStringItem = varscanString.split(fieldSeparator)
+        if len(lvarscanStringItem) < 20:
+            raise CheckerException ("This varscan line (l.%s) is not complete" % iCurrentLineNumber)
+        self.setAttributes(lvarscanStringItem, iCurrentLineNumber)
+
+    def convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag(self):
+        iVarscanHit = VarscanHit_WithTag()
+        iVarscanHit.setChrom(self.getChrom())
+        iVarscanHit.setPosition(self.getPosition())
+        iVarscanHit.setRef(self.getRef())
+        iVarscanHit.setVar(self.getVar())
+        iVarscanHit.setReadsRef(self.getReadsRef())
+        iVarscanHit.setReadsVar(self.getReadsVar())
+        iVarscanHit.setVarFreq(self.getVarFreq())
+        iVarscanHit.setStrandsRef(self.getStrandsRef())
+        iVarscanHit.setStrandsVar(self.getStrandsVar())
+        iVarscanHit.setQualRef(self.getQualRef())
+        iVarscanHit.setQualVar(self.getQualVar())
+        iVarscanHit.setPValue(self.getPValue())
+        iVarscanHit.setTag(self.getTag())
+        return iVarscanHit
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/WigParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/WigParser.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,324 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import re\n+import sys\n+import os.path\n+import struct\n+from commons.core.parsing.TranscriptListParser import TranscriptListParser\n+from SMART.Java.Python.structure.Transcript import Transcript\n+\n+STRANDTOSTR = {1: "(+)", 0: "(=)", None: "(=)", -1: "(-)"}\n+\n+nbOpenHandles = 30\n+\n+\n+class WigParser(TranscriptListParser):\n+\t"""A class that parses a big WIG file, creates an index and make it possible to quickly retrieve some data"""\n+\n+\tdef __init__(self, fileName, verbosity = 1):\n+\t\tself.fileName\t\t = fileName\n+\t\tself.filler\t\t\t = "\\xFF" * struct.calcsize(\'Q\')\n+\t\tself.strands\t\t = False\n+\t\tself.indexFiles\t \t = {}\n+\t\tself.indexBuilt\t\t = False\n+\t\tself.defaultValue\t = 0.0\n+\t\tself.currentChromosome = None\n+\t\tself.currentStrand\t = 1\n+\t\tself.verbosity = verbosity\n+\t\tsuper(WigParser, self).__init__(fileName, verbosity)\n+\n+\n+\tdef __def__(self):\n+\t\tfor file in self.indexFiles.values():\n+\t\t\tfile.close()\n+\n+\n+\tdef setStrands(self, strands):\n+\t\tself.strands = strands\n+\n+\n+\tdef setDefaultValue(self, value):\n+\t\tself.defaultValue = value\n+\n+\n+\tdef getFileFormats():\n+\t\treturn ["wig"]\n+\tgetFileFormats = staticmethod(getFileFormats)\n+\n+\n+\tdef setStrands(self, strands):\n+\t\t"""\n+\t\tConsider both strands separately\n+\t\t"""\n+\t\tself.strands = strands\n+\n+\n+\tdef makeIndexName(self, chromosome, strand = None):\n+\t\t"""\n+\t\tCreate an index name for a file\n+\t\t"""\n+\t\tdirectoryName = os.path.dirname(self.fileName)\n+\t\tif strand == None:\n+\t\t\tstrandName = ""\n+\t\telse:\n+\t\t\tstrandName = "+" if strand == 1 else "-"\n+\t\tindexName = os.path.join(directoryName, ".%s%s.index" % (chromosome, strandName))\n+\t\treturn indexName\n+\t\n+\t\n+\tdef findIndexFile(self, chromosome, strand = None):\n+\t\t"""\n+\t\tCheck if the index of a file exists\n+\t\t""" \n+\t\tindexName = self.makeIndexName(chromosome, strand)\n+\t\tif os.path.exists(indexName):\n+\t\t\treturn indexName\n+\t\treturn False\n+\t\n+\t\n+\tdef makeIndexFile(self):\n+\t\t"""\n+\t\tCreate the index for a file\n+\t\t"""\n+\t\tif self.indexBuilt:\n+\t\t\treturn\n+\n+\t\tinputFile = open(self.fileName)\n+\t\toutputFile = None\n+\t\tindex\t = 0\n+\t\tmark\t = inputFile.tell()\n+\t\tline\t = inputFile.readline().strip()\n+\t\tchromosome = None\n+\n+\t\twhile line != "":\n+\t\t\tm1 = re.search(r"^\\s*-?\\d+\\.?\\d*\\s*$", line)\n+\t\t\tm2 = re.search(r"^\\s*(\\d+)\\s+-?\\d+\\.?\\d*\\s*$", line)\n+\t\t\tm3 = re.search(r"^\\s*fixedStep\\s+chrom=(\\S+)\\s+start=(\\d+)\\s+step=1\\s*$", line)\n+\t\t\tm4 = re.search(r"^\\s*fixedStep\\s+chrom=\\S+\\s+start=\\d+\\s+step=\\d+\\s+span=\\d+\\s*$", line)\n+\t\t\tm5 = re.search(r"^\\s*variable'..b'ndex for chromosome %s, strand %s does not exist." % (chromosome, STRANDTOSTR[strand])\n+\t\t\treturn False\n+\t\tindexFile = open(indexFileName, "rb")\n+\n+\t\tif len(self.indexFiles.keys()) > nbOpenHandles:\n+\t\t\tremovedKey = set(self.indexFiles.keys()).pop()\n+\t\t\tself.indexFiles[removedKey].close()\n+\t\t\tdel self.indexFiles[removedKey]\n+\t\tself.indexFiles[indexFileKey] = indexFile\n+\t\treturn indexFile\n+\t\t\n+\n+\t\n+\tdef findIndex(self, chromosome, start, strand = None):\n+\t\t"""\n+\t\tFind the point where to start reading file\n+\t\t"""\n+\n+\t\tsizeOfLong = struct.calcsize("Q")\n+\t\tempty\t = int(struct.unpack("Q", self.filler)[0])\n+\t\toffset\t = empty\n+\t\tindexFile = self.getIndexFileHandle(chromosome, strand)\n+\t\n+\t\tif not indexFile:\n+\t\t\treturn (None, None)\n+\t\t\n+\t\twhile offset == empty:\n+\t\t\taddress = start * sizeOfLong\n+\t\t\tindexFile.seek(address, os.SEEK_SET)\n+\t\t\t\n+\t\t\tbuffer = indexFile.read(sizeOfLong)\n+\t\t\tif len(buffer) != sizeOfLong:\n+\t\t\t\tif buffer == "":\n+\t\t\t\t\tprint "Warning! Index position %d of chromosome %s on strand %s seems out of range!" % (start, chromosome, STRANDTOSTR[strand])\n+\t\t\t\t\treturn (None, None)\n+\t\t\t\telse:\n+\t\t\t\t\traise Exception("Problem fetching position %d of chromosome %s on strand %s seems out of range!" % (start, chromosome, STRANDTOSTR[strand]))\n+\t\t\t\n+\t\t\toffset = int(struct.unpack("Q", buffer)[0])\n+\t\t\tstart += 1\n+\t\t\t\n+\t\tstart -= 1\n+\t\treturn (offset, start)\n+\t\n+\t\n+\n+\tdef getRange(self, chromosome, start, end):\n+\t\t"""\n+\t\tParse a wig file and output a range\n+\t\t"""\n+\t\tarrays = {}\n+\t\tstrands = {1: "+", -1: "-"} if self.strands else {0: ""}\n+\n+\t\tfor strand in strands:\n+\n+\t\t\tarray = [self.defaultValue] * (end - start + 1)\n+\t\t\tfile = open(self.fileName)\n+\t\t\toffset, index = self.findIndex(chromosome, start, strand if self.strands else None)\n+\t\t\tif offset == None:\n+\t\t\t\tarrays[strand] = array\n+\t\t\t\tcontinue\n+\t\t\tfile.seek(offset, os.SEEK_SET)\n+\n+\t\t\tfor line in file:\n+\t\t\t\tline = line.strip()\n+\n+\t\t\t\tm1 = re.search(r"^\\s*(-?\\d+\\.?\\d*)\\s*$", line)\n+\t\t\t\tm2 = re.search(r"^\\s*(\\d+)\\s+(-?\\d+\\.?\\d*)\\s*$", line)\n+\t\t\t\tm3 = re.search(r"^\\s*fixedStep\\s+chrom=(\\S+)\\s+start=(\\d+)\\s+step=\\d+\\s*$", line)\n+\t\t\t\tm4 = re.search(r"^\\s*variableStep\\s+chrom=(\\S+)\\s*$", line)\n+\n+\t\t\t\tif m1 != None:\n+\t\t\t\t\tif index > end:\n+\t\t\t\t\t\tbreak\n+\t\t\t\t\tif index >= start:\n+\t\t\t\t\t\tarray[index - start] = float(m1.group(1))\n+\t\t\t\t\tindex += 1\n+\t\t\t\telif m2 != None:\n+\t\t\t\t\tindex = int(m2.group(1))\n+\t\t\t\t\tif index > end:\n+\t\t\t\t\t\tbreak\n+\t\t\t\t\tif index >= start:\n+\t\t\t\t\t\tarray[index - start] = float(m2.group(2))\n+\t\t\t\t\tindex += 1\n+\t\t\t\telif m3 != None:\n+\t\t\t\t\tif m3.group(1) != "%s%s" % (chromosome, strands[strand]):\n+\t\t\t\t\t\tbreak\n+\t\t\t\t\tindex = int(m3.group(2))\n+\t\t\t\telif m4 != None:\n+\t\t\t\t\tif m4.group(1) != "%s%s" % (chromosome, strands[strand]):\n+\t\t\t\t\t\tbreak\n+\t\t\t\telif (len(line) == 0) or (line[0] == "#") or line.startswith("track"):\n+\t\t\t\t\tpass\n+\t\t\t\telse:\n+\t\t\t\t\traise Exception("Error! Cannot read line \'%s\' of wig file" % (line))\n+\n+\t\t\tfile.close()\n+\t\n+\t\t\tarrays[strand] = array\n+\t\t\t\n+\t\tif self.strands:\n+\t\t\treturn arrays\n+\t\treturn array\n+\t\n+\n+\tdef skipFirstLines(self):\n+\t\treturn\n+\n+\t\n+\tdef parseLine(self, line):\n+\t\tif line.startswith("track"):\n+\t\t\treturn None\n+\t\tm = re.search(r"^\\s*variableStep\\s+chrom=(\\S+)", line)\n+\t\tif m != None:\n+\t\t\tchromosome = m.group(1)\n+\t\t\tif chromosome.endswith("+"):\n+\t\t\t\tself.currentStrand = 1\n+\t\t\t\tself.currentChromosome = chromosome[:-1]\n+\t\t\telif chromosome.endswith("-"):\n+\t\t\t\tself.currentStrand = -1\n+\t\t\t\tself.currentChromosome = chromosome[:-1]\n+\t\t\telse:\n+\t\t\t\tself.currentStrand = 1\n+\t\t\t\tself.currentChromosome = chromosome\n+\t\t\treturn None\n+\t\tposition, value = line.split()\n+\t\tposition = int(position)\n+\t\tvalue\t= float(value)\n+\t\ttranscript = Transcript()\n+\t\ttranscript.setChromosome(self.currentChromosome)\n+\t\ttranscript.setStart(position)\n+\t\ttranscript.setEnd(position)\n+\t\ttranscript.setDirection(self.currentStrand)\n+\t\ttranscript.setTagValue("ID", "wig_%s_%d_%d" % (self.currentChromosome, self.currentStrand, position))\n+\t\ttranscript.setTagValue("nbElements", value)\n+\t\treturn transcript\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/__init__.pyc

Binary file commons/core/parsing/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/multifastaParserLauncher.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/multifastaParserLauncher.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+
+"""
+Launcher for the multifasta parser.
+@param b: Name of the batch of sequences
+@param g: Name of the gene
+@param t: Scientific name of the taxon concerned
+@param f: Name of the multifasta input file
+"""
+
+
+import os
+import sys
+import getopt
+from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile
+
+CURRENT_DIR = os.getcwd()
+
+def help():
+
+    """
+    Give the list of the command-line options.
+    """
+
+    print "Usage: ",sys.argv[0],"[ options ]"
+    print "     -h: this help"
+    print "Mandatory option:"
+    print "     -t: Scientific name of the taxon concerned"
+    print "Exclusive options (use either the first or the second, one should be used)"
+    print "     -f: Name of the multifasta input file in one batch mode"
+    print "     -d: Name of the directory containing multifasta input file(s) in multi-batch mode"
+    print "Only in one batch mode: mandatory options (when -f is used):"
+    print "     -b: Name of the batch of submitted sequences"
+    print "     -g: Name of the gene"
+    print ""
+
+
+def runOneInputFile(batchName, geneName, taxon, inputFileName):
+    print "Multifasta parseur launched:!\n"
+    print "-- Input File: " + inputFileName + "\n"
+    print "-- Batch name: " + batchName + "\n"
+    print "-- Gene name: " + geneName + "\n"
+    print "-- Taxon: " + taxon + "\n"
+    #TODO: gerer le delete des fichiers(mode append)
+    multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, geneName)
+    multifasta2SNPFile.runOneBatch(inputFileName)
+    print "OK: Files generated!"
+
+
+def runSeveralInputFile(taxon, rootDirectoryName):
+    multifasta2SNPFile = Multifasta2SNPFile(taxon)
+    multifasta2SNPFile.runSeveralBatches(rootDirectoryName)
+
+def main():
+    batchName = ""
+    geneName = ""
+    taxon = ""
+    inputFileName = ""
+    rootDirectoryName = ""
+
+
+    try:
+        opts,args = getopt.getopt(sys.argv[1:],"hb:g:t:f:d:")
+    except getopt.GetoptError:
+        print "Invalid options\n"
+        help()
+        sys.exit(2)
+
+    for o, a in opts:
+        if o == "-h":
+            help()
+            exit(0)
+        elif o == "-b":
+            batchName = a
+        elif o == "-g":
+            geneName = a
+        elif o == "-t":
+            taxon = a
+        elif o == "-f":
+            inputFileName = a
+        elif o == "-d":
+            rootDirectoryName = os.path.abspath(a)
+
+    if taxon == "":
+        print "*** Error: The mandatory option -t is missing"
+        help()
+        sys.exit(1)
+
+    if (inputFileName == "" and  rootDirectoryName == "") or (inputFileName != "" and  rootDirectoryName != ""):
+        print "*** Error: You have to specify the input mode: choose either -f (for one file) or -d (for one directory of several files)"
+        help()
+        sys.exit(1)
+
+    if(inputFileName != ""):
+        if batchName == "" or geneName == "":
+            print "*** Error: A mandatory option is missing in one batch mode (-b or -g)"
+            help()
+            sys.exit(1)
+
+    if(inputFileName != ""):
+        runOneInputFile(batchName, geneName, taxon, inputFileName)
+    else:
+        runSeveralInputFile(taxon, rootDirectoryName)
+
+
+    return 0
+
+#------------------------------------------------------------------------------
+if __name__ == "__main__":
+    main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_BedParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BedParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,58 @@
+import unittest, os
+from commons.core.parsing.BedParser import BedParser
+
+
+class Test_BedParser(unittest.TestCase):
+
+    def test_Parser(self):
+        parser = BedParser("data/testBedParser1.bed")
+
+        self.assertEqual(parser.getNbTranscripts(), 1)
+
+        for transcript in parser.getIterator():
+            self.assertEqual(transcript.getChromosome(), "arm_X")
+            self.assertEqual(transcript.getName(), "test1.1")
+            self.assertEqual(transcript.getStart(), 1000)
+            self.assertEqual(transcript.getEnd(), 2999)
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getNbExons(), 2)
+            exons = transcript.getExons()
+            self.assertEqual(exons[0].getChromosome(), "arm_X")
+            self.assertEqual(exons[0].getStart(), 1000)
+            self.assertEqual(exons[0].getEnd(), 1099)
+            self.assertEqual(exons[0].getDirection(), 1)
+            self.assertEqual(exons[1].getChromosome(), "arm_X")
+            self.assertEqual(exons[1].getStart(), 2000)
+            self.assertEqual(exons[1].getEnd(), 2999)
+            self.assertEqual(exons[1].getDirection(), 1)
+
+    def test_Parser_short(self):
+        tmpFileName = "tmpFile.bed"
+        tmpHandle   = open(tmpFileName, "w")
+        tmpHandle.write("""X\t554748\t554904\texon
+X\t554748\t554904\tCDS
+X\t554748\t554750\tstart_codon
+""")
+        tmpHandle.close()
+        parser = BedParser(tmpFileName)
+        self.assertEqual(parser.getNbTranscripts(), 3)
+        for cpt, transcript in enumerate(parser.getIterator()):
+            self.assertEqual(transcript.getNbExons(), 1)
+            self.assertEqual(transcript.getChromosome(), "X")
+            self.assertEqual(transcript.getStart(), 554748)
+            if cpt == 0:
+                self.assertEqual(transcript.getEnd(), 554903)
+                self.assertEqual(transcript.getName(), "exon")
+            elif cpt == 1:
+                self.assertEqual(transcript.getEnd(), 554903)
+                self.assertEqual(transcript.getName(), "CDS")
+            elif cpt == 2:
+                self.assertEqual(transcript.getEnd(), 554749)
+                self.assertEqual(transcript.getName(), "start_codon")
+        os.remove(tmpFileName)
+
+
+
+if __name__ == '__main__':
+        unittest.main()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_BlatFileParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatFileParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,61 @@
+import unittest
+from commons.core.parsing.BlatFileParser import BlatFileParser
+
+
+class Test_BlatFileParser(unittest.TestCase):
+
+
+    def test_parseBlatFile(self):
+        fileName = "dummayBlat.psl"
+        self._writeBlatInputFile(fileName)
+        blatFileParser = BlatFileParser(fileName)
+        blatFileParser.parseBlatFile()
+        obsNbHits = len(blatFileParser.getListsOfHits())
+        self.assertTrue(obsNbHits == 10)
+        obsQueries = blatFileParser.getDictOfQueries()
+        expQueries = {'5:574_1:574_539_5:1:G/C': 1, '3:574_1:574_539_5:1:G/C': 1, '5:574_2:574_433_5:1:G/C': 1,"3:574_2:574_433_5:1:G/C":1, "5:574_5:574_607_5:1:G/C": 1, "3:574_5:574_607_5:1:G/C": 1}
+        self.assertEquals(expQueries, obsQueries)
+
+    def test_parseBlatFileByQueries(self):
+        fileName = "dummayBlat.psl"
+        self._writeBlatInputFile(fileName)
+        blatFileParser = BlatFileParser(fileName)
+        blatFileParser.parseBlatFileByQueries()
+        obsDict = blatFileParser.getDictOfBlatHitsByQueries()
+        obs1 = len(obsDict["5:574_1:574_539_5:1:G/C"])
+        obs2 = len(obsDict["3:574_1:574_539_5:1:G/C"])
+        obs3 = len(obsDict["5:574_2:574_433_5:1:G/C"])
+        obs4 = len(obsDict["3:574_2:574_433_5:1:G/C"])
+        obs5 = len(obsDict["5:574_5:574_607_5:1:G/C"])
+        obs6 = len(obsDict["3:574_5:574_607_5:1:G/C"])
+        self.assertTrue(obs1 == 1)
+        self.assertTrue(obs2 == 1)
+        self.assertTrue(obs3 == 1)
+        self.assertTrue(obs4 == 5)
+        self.assertTrue(obs5 == 1)
+        self.assertTrue(obs6 == 1)
+        obsQueries = blatFileParser.getDictOfQueries()
+        expQueries = {'5:574_1:574_539_5:1:G/C': 1, '3:574_1:574_539_5:1:G/C': 1, '5:574_2:574_433_5:1:G/C': 1,"3:574_2:574_433_5:1:G/C":1, "5:574_5:574_607_5:1:G/C": 1, "3:574_5:574_607_5:1:G/C": 1}
+        self.assertEquals(expQueries, obsQueries)
+
+    def _writeBlatInputFile(self, fileName):
+        file = open(fileName, "w")
+        file.write("psLayout version 3\n")
+        file.write("\n")
+        file.write("match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n")
+        file.write("     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n")
+        file.write("---------------------------------------------------------------------------------------------------------------------------------------------------------------\n")
+        file.write("246\t0\t0\t4\t0\t0\t0\t0\t-\t5:574_1:574_539_5:1:G/C\t250\t0\t250\ttaecs3B_RPH7\t3109948\t1065213\t1065463\t1\t250,\t0,\t1065213,\n")
+        file.write("247\t0\t0\t2\t0\t0\t0\t0\t-\t3:574_1:574_539_5:1:G/C\t250\t1\t250\ttaecs3B_RPH7\t3109948\t1064962\t1065211\t1\t249,\t0,\t1064962,\n")
+        file.write("249\t0\t0\t1\t0\t0\t0\t0\t-\t5:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH7\t3109948\t1065319\t1065569\t1\t250,\t0,\t1065319,\n")
+        file.write("245\t0\t0\t5\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH8\t3109948\t1065068\t1065318\t1\t250,\t0,\t1065068,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH8\t3109948\t1065310\t1065560\t1\t250,\t0,\t1065310,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1065059\t1065309\t1\t250,\t0,\t1065059,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1064805\t1065055\t1\t250,\t0,\t1064805,\n")
+        file.write("68\t0\t0\t1\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t69\t0\t69\ttaecs3B_RPH9\t3109948\t1064733\t1064802\t1\t69,\t0,\t1064733,\n")
+        file.write("245\t0\t0\t5\t0\t0\t0\t0\t-\t5:574_5:574_607_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1065145\t1065395\t1\t250,\t0,\t1065145,\n")
+        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_5:574_607_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1064894\t1065144\t1\t250,\t0,\t1064894,\n")
+        file.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_BlatParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatParser.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,445 @@\n+from commons.core.parsing.BlatParser import BlatParser\n+import unittest\n+\n+\n+class Test_BlatParser(unittest.TestCase):\n+\n+\n+ def test_setAttributesFromString(self):\n+ blatLine = "315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,"\n+ \n+ iBlatParser = BlatParser()\n+ iBlatParser.setAttributesFromString(blatLine)\n+ \n+ obsmatch = iBlatParser.getMatch()\n+ obsmismatch = iBlatParser.getMismatch()\n+ obsrepMatch = iBlatParser.getRepMatch()\n+ obsN = iBlatParser.getN()\n+ obsQGapCount = iBlatParser.getQGapCount()\n+ obsQGapBases = iBlatParser.getQGapBases()\n+ obsTGapCount = iBlatParser.getTGapCount()\n+ obsTGapBases = iBlatParser.getTGapBases()\n+ obsstrand = iBlatParser.getStrand()\n+ obsQName = iBlatParser.getQName()\n+ obsQSize = iBlatParser.getQSize()\n+ obsQStart = iBlatParser.getQStart()\n+ obsQEnd = iBlatParser.getQEnd()\n+ obsTName = iBlatParser.getTName()\n+ obsTSize = iBlatParser.getTSize()\n+ obsTStart = iBlatParser.getTStart()\n+ obsTEnd = iBlatParser.getTEnd()\n+ obsblockCount = iBlatParser.getBlockCount()\n+ obsblockSizes = iBlatParser.getBlockSizes()\n+ obsqStarts = iBlatParser.getQStarts()\n+ obstStarts = iBlatParser.getTStarts()\n+ \n+ expmatch = "315"\n+ expmismatch = "20"\n+ exprepMatch = "0"\n+ expN = "0"\n+ expQGapCount = "3"\n+ expQGapBases = "10"\n+ expTGapCount = "2"\n+ expTGapBases = "9"\n+ expstrand = "+"\n+ expQName = "MRRE1H001H13FM1"\n+ expQSize = "378"\n+ expQStart = "0"\n+ expQEnd = "345"\n+ expTName = "chr16"\n+ expTSize = "22053297"\n+ expTStart = "21686950"\n+ expTEnd = "21687294"\n+ expblockCount = "4"\n+ expblockSizes = "76,185,7,67,"\n+ expqStarts = "0,77,263,278,"\n+ exptStarts = "21686950,21687026,21687213,21687227,"\n+ \n+ self.assertEquals(expmatch, obsmatch)\n+ self.assertEquals(expmismatch, obsmismatch)\n+ self.assertEquals(exprepMatch, obsrepMatch)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expQGapCount, obsQGapCount)\n+ self.assertEquals(expQGapBases, obsQGapBases)\n+ self.assertEquals(expTGapCount, obsTGapCount)\n+ self.assertEquals(expTGapBases, obsTGapBases)\n+ self.assertEquals(expstrand, obsstrand)\n+ self.assertEquals(expQName, obsQName)\n+ self.assertEquals(expQSize, obsQSize)\n+ self.assertEquals(expQStart, obsQStart)\n+ self.assertEquals(expQEnd, obsQEnd)\n+ self.assertEquals(expTName, obsTName)\n+ self.assertEquals(expTSize, obsTSize)\n+ self.assertEquals(expTStart, obsTStart)\n+ self.assertEquals(expTEnd, obsTEnd)\n+ self.assertEquals(expblockCount, obsblockCount)\n+ self.assertEquals(expblockSizes, obsblockSizes)\n+ self.assertEquals(expqStarts, obsqStarts)\n+ self.assertEquals(exptStarts, obstStarts)\n+ \n+ def test_setAttributesFromString_empty_QName(self):\n+ blatLine = "315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\t\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,"\n+ \n+ iBlatParser = BlatParser()\n+ iBlatParser.setAttributesFromString(blatLine)\n+ \n+ obsmatch = iBlatParser.getMatch()\n+ obsmismatch = iBlatParser.getMismatch()\n+ obsrepMatch = iBlatParser.getRepMatch()\n+ obsN = iBlatParser.getN()\n+ obsQGapCount = iBlatParser.getQGapCount()\n+ obsQGapBases = iBlatParser.getQGapBases()\n+ obsTGapCount = iBlatParser.getTGapCount()\n+ obsTGapBases = iBlatParser.getTGapBases()\n+ obsstrand = iBlatParser.getStrand()\n+ obsQName = iBlatParser.getQName()\n+ '..b'87227,")\n+ \n+ self.assertTrue(BlatParser1 == BlatParser2) \n+ \n+ def test_eq_Equals_case2(self):\n+ BlatParser1 = BlatParser()\n+ BlatParser1.setMatch("315")\n+ BlatParser1.setMismatch("20")\n+ BlatParser1.setRepMatch("0")\n+ BlatParser1.setN("0")\n+ BlatParser1.setQGapCount("3")\n+ BlatParser1.setQGapBases("10")\n+ BlatParser1.setTGapCount("2")\n+ BlatParser1.setTGapBases("9")\n+ BlatParser1.setStrand("+")\n+ BlatParser1.setQName("MRRE1H001H13FM1")\n+ BlatParser1.setQSize("378")\n+ BlatParser1.setQStart("0")\n+ BlatParser1.setQEnd("345")\n+ BlatParser1.setTName("chr16")\n+ BlatParser1.setTSize("22053297")\n+ BlatParser1.setTStart("21686950")\n+ BlatParser1.setTEnd("21687294")\n+ BlatParser1.setBlockCount("4")\n+ BlatParser1.setBlockSizes("76,185,7,67,")\n+ BlatParser1.setQStarts("0,77,263,278,")\n+ BlatParser1.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ BlatParser2 = BlatParser()\n+ BlatParser2.setMatch("315")\n+ BlatParser2.setMismatch("20")\n+ BlatParser2.setRepMatch("0")\n+ BlatParser2.setN("0")\n+ BlatParser2.setQGapCount("3")\n+ BlatParser2.setQGapBases("10")\n+ BlatParser2.setTGapCount("2")\n+ BlatParser2.setTGapBases("9")\n+ BlatParser2.setStrand("+")\n+ BlatParser2.setQName("TotoFM2")\n+ BlatParser2.setQSize("378")\n+ BlatParser2.setQStart("0")\n+ BlatParser2.setQEnd("345")\n+ BlatParser2.setTName("chr16")\n+ BlatParser2.setTSize("22053297")\n+ BlatParser2.setTStart("21686950")\n+ BlatParser2.setTEnd("21687294")\n+ BlatParser2.setBlockCount("4")\n+ BlatParser2.setBlockSizes("76,185,7,67,")\n+ BlatParser2.setQStarts("0,77,263,278,")\n+ BlatParser2.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ self.assertTrue(BlatParser1 == BlatParser2) \n+ \n+ def test_eq_notEquals(self):\n+ BlatParser1 = BlatParser()\n+ BlatParser1.setMatch("315")\n+ BlatParser1.setMismatch("20")\n+ BlatParser1.setRepMatch("0")\n+ BlatParser1.setN("0")\n+ BlatParser1.setQGapCount("3")\n+ BlatParser1.setQGapBases("10")\n+ BlatParser1.setTGapCount("2")\n+ BlatParser1.setTGapBases("9")\n+ BlatParser1.setStrand("+")\n+ BlatParser1.setQName("MRRE1H001H13FM1")\n+ BlatParser1.setQSize("378")\n+ BlatParser1.setQStart("0")\n+ BlatParser1.setQEnd("345")\n+ BlatParser1.setTName("chr16")\n+ BlatParser1.setTSize("22053297")\n+ BlatParser1.setTStart("21686950")\n+ BlatParser1.setTEnd("21687294")\n+ BlatParser1.setBlockCount("4")\n+ BlatParser1.setBlockSizes("76,185,7,67,")\n+ BlatParser1.setQStarts("0,77,263,278,")\n+ BlatParser1.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ BlatParser2 = BlatParser()\n+ BlatParser2.setMatch("315")\n+ BlatParser2.setMismatch("20")\n+ BlatParser2.setRepMatch("0")\n+ BlatParser2.setN("0")\n+ BlatParser2.setQGapCount("3")\n+ BlatParser2.setQGapBases("10")\n+ BlatParser2.setTGapCount("2")\n+ BlatParser2.setTGapBases("9")\n+ BlatParser2.setStrand("+")\n+ BlatParser2.setQName("TotoFM2")\n+ BlatParser2.setQSize("378")\n+ BlatParser2.setQStart("0")\n+ BlatParser2.setQEnd("345")\n+ BlatParser2.setTName("chr8")\n+ BlatParser2.setTSize("2205")\n+ BlatParser2.setTStart("2124")\n+ BlatParser2.setTEnd("2168")\n+ BlatParser2.setBlockCount("4")\n+ BlatParser2.setBlockSizes("76,185,7,67,")\n+ BlatParser2.setQStarts("0,77,263,278,")\n+ BlatParser2.setTStarts("21686950,21687026,21687213,21687227,")\n+ \n+ self.assertFalse(BlatParser1 == BlatParser2) \n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_BlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatToGff.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,28 @@
+from commons.core.parsing.BlatToGff import BlatToGff
+import unittest
+
+
+class Test_BlatToGff(unittest.TestCase):
+
+
+    def test_convertBlatObjectToGffLine(self):
+        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
+        nbLine = 15
+        iBlatToGff = BlatToGff()
+        BlatToGff._methodName = ''
+        obsGffLine = iBlatToGff.convertBlatObjectToGffLine(blatLine, nbLine)
+        expGffLine = 'chr16\tBlatToGff\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n'
+        self.assertEquals(expGffLine, obsGffLine)
+
+    def test_convertBlatObjectToGffLine_with_methodName(self):
+        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
+        nbLine = 15
+        iBlatToGff = BlatToGff()
+        BlatToGff._methodName = 'Test'
+        obsGffLine = iBlatToGff.convertBlatObjectToGffLine(blatLine, nbLine)
+        expGffLine = 'chr16\tBlatToGff\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n'
+        self.assertEquals(expGffLine, obsGffLine)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_BlatToGffForBesPaired.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BlatToGffForBesPaired.py Fri Jan 18 04:54:14 2013 -0500

[

b"@@ -0,0 +1,292 @@\n+import unittest, os\n+from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired\n+\n+\n+class Test_BlatToGffForBesPaired(unittest.TestCase):\n+\n+\n+ def test_convertBlatObjectToGffLine(self):\n+ blatLine = '315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n'\n+ nbLine = 15\n+ besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']\n+ self._writeBesSequences(besFastaFileName)\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ iBlatToGffForBesPaired._methodName = ''\n+ iBlatToGffForBesPaired._inputFileFasta = besFastaFileName\n+ obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)\n+ expGffLine = 'chr16\\tBlatToGffForBesPaired\\tBES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\\n'\n+ expBesName = 'MRRE1H001H13FM1'\n+ expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'\n+ expBesType = 'FM'\n+ self.assertEquals(expGffLine, obsGffLine)\n+ self.assertEquals(expBesName, obsBesName)\n+ self.assertEquals(expBesSeq, obsBesSeq)\n+ self.assertEquals(expBesType, obsBesType)\n+ os.remove(besFastaFileName)\n+\n+ def test_convertBlatObjectToGffLine_with_methodName(self):\n+ blatLine = '315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n'\n+ nbLine = 15\n+ besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']\n+ self._writeBesSequences(besFastaFileName)\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ iBlatToGffForBesPaired._methodName = 'Test'\n+ iBlatToGffForBesPaired._inputFileFasta = besFastaFileName\n+ obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)\n+ expGffLine = 'chr16\\tBlatToGffForBesPaired\\tTest:BES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\\n'\n+ expBesName = 'MRRE1H001H13FM1'\n+ expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'\n+ expBesType = 'FM'\n+ self.assertEquals(expGffLine, obsGffLine)\n+ self.assertEquals(expBesName, obsBesName)\n+ self.assertEquals(expBesSeq, obsBesSeq)\n+ self.assertEquals(expBesType, obsBesType)\n+ os.remove(besFastaFileName)\n+ \n+ def test_getBesName(self):\n+ col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\\n'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ obsBesName = iBlatToGffForBesPaired.getBesName(col9)\n+ expBesName = 'machin1'\n+ self.assertEquals(expBesName, obsBesName)\n+ \n+ def test_checkBesNames_OK(self):\n+ besName1 = 'MRRE1H001H13FM8'\n+ besName2 = 'MRRE1H001H13RM2'\n+ line = 10\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))\n+ \n+ def test_checkBesNames_NOK(self):\n+ besName1 = 'MRRE1H001H13FM1'\n+ besName2 = 'TOTORM2'\n+ line = 10\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))\n+ \n+ def test_checkBesPositions_OK1(self):\n+ tBes1 = ('chr16', 25, 150)\n+ tBes2 "..b'ommons/core/parsing/test/sequence.fasta\' % os.environ[\'REPET_PATH\']\n+ fastaFile = open(fastaFileName, \'w\')\n+ fastaFile.write(\'>seq1\\n\')\n+ fastaFile.write(\'ATCGATCGATCGATCGATACGTCAGCGATCGAT\\n\')\n+ fastaFile.write(\'TACGTACGTACGATCGATCGATCGATCGATCGG\\n\')\n+ fastaFile.write(\'TACGTACGTACGATCGACGATCGATGCCGATCG\\n\')\n+ fastaFile.write(\'ATCGAC\\n\')\n+ fastaFile.write(\'>seq2\\n\')\n+ fastaFile.write(\'GTCTAGCTAGCTATATCTGACTGACGCGACGGT\\n\')\n+ fastaFile.write(\'CATGCTAGCTAGCACTGTACAGCTATCGATGCT\\n\')\n+ fastaFile.write(\'ACTGACACTGTACGTAC\\n\')\n+ fastaFile.write(\'>seq3\\n\')\n+ fastaFile.write(\'ACTCGATCGATCG\\n\')\n+ fastaFile.close()\n+ \n+ seqName = \'seq4\'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ iBlatToGffForBesPaired._inputFileFasta = fastaFileName\n+ obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)\n+ expSeq = \'NA\'\n+ self.assertEquals(expSeq, obsSeq)\n+ os.remove(fastaFileName)\n+ \n+ def test_getBesFmAndRmNamesAndSequences_case1(self):\n+ nameBes1 = \'MRRE1H0072T1FM1\'\n+ seqBes1 = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ typeBes1 = \'FM\'\n+ nameBes2 = \'MRRE1H0072T1RM3\'\n+ seqBes2 = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ typeBes2 = \'RM\'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)\n+ expNameBesFM = \'MRRE1H0072T1FM1\'\n+ expNameBesRM = \'MRRE1H0072T1RM3\'\n+ expSeqBesFM = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ expSeqBesRM = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ self.assertEquals(expNameBesFM, obsNameBesFM)\n+ self.assertEquals(expNameBesRM, obsNameBesRM)\n+ self.assertEquals(expSeqBesFM, obsSeqBesFM)\n+ self.assertEquals(expSeqBesRM, obsSeqBesRM)\n+ \n+ def test_getBesFmAndRmNamesAndSequences_case2(self):\n+ nameBes1 = \'MRRE1H0072T1RM1\'\n+ seqBes1 = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ typeBes1 = \'RM\'\n+ nameBes2 = \'MRRE1H0072T1FM3\'\n+ seqBes2 = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ typeBes2 = \'FM\'\n+ iBlatToGffForBesPaired = BlatToGffForBesPaired()\n+ obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)\n+ expNameBesFM = \'MRRE1H0072T1FM3\'\n+ expNameBesRM = \'MRRE1H0072T1RM1\'\n+ expSeqBesFM = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n+ expSeqBesRM = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n+ self.assertEquals(expNameBesFM, obsNameBesFM)\n+ self.assertEquals(expNameBesRM, obsNameBesRM)\n+ self.assertEquals(expSeqBesFM, obsSeqBesFM)\n+ self.assertEquals(expSeqBesRM, obsSeqBesRM)\n+ \n+ def _writeBesSequences(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write(\'>MRRE1H001H13RM1\\n\')\n+ file.write(\'ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\\n\')\n+ file.write(\'TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\\n\')\n+ file.write(\'ATCGATCGATCGATCGACATCGTACG\\n\')\n+ file.write(\'>MRRE1H001H13FM1\\n\')\n+ file.write(\'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\\n\')\n+ file.write(\'CTAGCTAGCTAGCTAGCTAGCTAGC\\n\')\n+ file.write(\'>MRRE2H007A13FM3\\n\')\n+ file.write(\'TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\\n\')\n+ file.write(\'TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\\n\')\n+ file.write(\'CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\\n\')\n+ file.write(\'TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\\n\')\n+ file.close()\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_BowtieParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_BowtieParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,57 @@
+from commons.core.parsing.BowtieParser import BowtieParser
+import unittest, os
+
+
+class Test_BlatParser(unittest.TestCase):
+
+
+    def test_simple(self):
+        fileName = "tmpFile.bowtie"
+        handle   = open(fileName, "w")
+        handle.write("HWI-EAS179_0053:2:1:1365:7879#0/2\t+\tchrXHet\t191698\tACCGCTGAACCACTTTCATNCNTGGGATTGTGAACTGAAACTGTTCACATGAACTTGGAATTCCCAGTAAGTGTGA\tLcaYcacLaTdd`dacacYBaBTa^^TL^M`]`^aa`Tca`LaLTUa]a_bcLcTMMMMa^a^`bT`ccT_UbM_B\t0\t19:G>N,21:T>N\n")
+        handle.write("HWI-EAS179_0053:2:1:1365:7879#0/1\t-\tchrXHet\t191803\tCCCCTTGTACACACCGCCCGTCGCTACTACCGATTGAATTATGTAGTGAGGTCTCCGGACGTGATCACTGTGACGC\tBBBBBBBBB`O`DS]]aYabaaa[ULYLY]^b`^a^aZZZ_LLLca_a_b^^aYdbd``d^ccaY`_caccc^acc\t0\t33:T>G,72:T>C\n")
+        handle.write("HWI-EAS179_0053:2:1:1371:11420#0/2\t+\tchr3L\t16569206\tTATGAGCGCCAATTTTGCANTTTTATTTTTGTACAAGCCAAGGGTTTTGCAACATTCACAGCGCTTGCCACTTGTC\tcY^bcYLcaL]`]]`aaTaBaab^_ZZ__R[`[cYccc^Ybb^_L`L`Y`aM_a_TcTcc`LL]]MYaYabbTY`^\t0\t19:G>N\n")
+        handle.write("HWI-EAS179_0053:2:1:1371:11420#0/1\t-\tchr3L\t16569298\tAATGAACCATTGTAATTACCCACAACACATACAGTCACACACGAGATGCACACAAGTCGGAAACGGAAGCGAGACG\tBBBBBBBBBBBBBBBBBBBBBB^T`]Y^`KZY__LY_a]^T^ccYaYY__YT]VZbL]`b^cLT^a^caccYbT^b\t0\n")
+        handle.close()
+
+        parser = BowtieParser("tmpFile.bowtie", 0)
+        for cpt, mapping in enumerate(parser.getIterator()):
+            transcript = mapping.getTranscript()
+            if cpt == 0:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1365:7879#0/2")
+                self.assertEquals(transcript.getChromosome(), "chrXHet")
+                self.assertEquals(transcript.getDirection(), 1)
+                self.assertEquals(transcript.getStart(), 191699)
+                self.assertEquals(transcript.getEnd(), 191774)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 2)
+            elif cpt == 1:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1365:7879#0/1")
+                self.assertEquals(transcript.getChromosome(), "chrXHet")
+                self.assertEquals(transcript.getDirection(), -1)
+                self.assertEquals(transcript.getStart(), 191804)
+                self.assertEquals(transcript.getEnd(), 191879)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 2)
+            elif cpt == 2:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1371:11420#0/2")
+                self.assertEquals(transcript.getChromosome(), "chr3L")
+                self.assertEquals(transcript.getDirection(), 1)
+                self.assertEquals(transcript.getStart(), 16569207)
+                self.assertEquals(transcript.getEnd(), 16569282)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 1)
+            elif cpt == 3:
+                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1371:11420#0/1")
+                self.assertEquals(transcript.getChromosome(), "chr3L")
+                self.assertEquals(transcript.getDirection(), -1)
+                self.assertEquals(transcript.getStart(), 16569299)
+                self.assertEquals(transcript.getEnd(), 16569374)
+                self.assertEquals(transcript.getTagValue("nbMismatches"), 0)
+            else:
+                self.fail()
+
+        os.remove(fileName)
+
+
+
+if __name__ == "__main__":
+    unittest.main()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_CoordsParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_CoordsParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,105 @@
+import unittest
+from commons.core.parsing.CoordsParser import CoordsParser
+from SMART.Java.Python.mappingToCoordinates import MappingToCoordinates
+
+
+class Test_CoordsParser(unittest.TestCase):
+
+
+    def test_Parser(self):
+        parser = CoordsParser("data/testCoordsParser.coords")
+
+        cpt = 0
+        for mapping in parser.getIterator():
+            transcript = mapping.getTranscript()
+            cpt += 1
+            if cpt == 1:
+                self.assertEqual(transcript.getChromosome(), "scaffold_1")
+                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
+                self.assertEqual(transcript.getStart(), 1)
+                self.assertEqual(transcript.getEnd(), 6251)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("identity"), 89.030000000000001)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
+                self.assertEqual(exons[0].getStart(), 1)
+                self.assertEqual(exons[0].getEnd(), 6251)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 6251)
+            elif cpt == 2:
+                self.assertEqual(transcript.getChromosome(), "scaffold_1")
+                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
+                self.assertEqual(transcript.getStart(), 9127)
+                self.assertEqual(transcript.getEnd(), 11947)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("identity"), 90.450000000000003)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
+                self.assertEqual(exons[0].getStart(), 9127)
+                self.assertEqual(exons[0].getEnd(), 11947)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 2821)
+            if cpt == 3:
+                self.assertEqual(transcript.getChromosome(), "scaffold_1")
+                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
+                self.assertEqual(transcript.getStart(), 12201)
+                self.assertEqual(transcript.getEnd(), 12953)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
+                self.assertEqual(exons[0].getStart(), 12201)
+                self.assertEqual(exons[0].getEnd(), 12953)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 753)
+
+    def test_Parser_showcoord(self):
+        parser = CoordsParser("data/testCoordsParser_showcoord.coords")
+        expTranscriptCount = 1
+        obsTranscriptCount = 0
+
+        for mapping in parser.getIterator():
+            transcript = mapping.getTranscript()
+            obsTranscriptCount += 1
+            self.assertEqual(transcript.getChromosome(), "mivi_sl_A1_scaffold00001")
+            self.assertEqual(transcript.getName(), "mivi_sl_A2_scaffold00003")
+            self.assertEqual(transcript.getStart(), 296)
+            self.assertEqual(transcript.getEnd(), 2292)
+            self.assertEqual(transcript.getDirection(), 1)
+            self.assertEqual(transcript.getTagValue("identity"), 98.30)
+            self.assertEqual(transcript.getTagValue("target_pident"), 98.30)
+            self.assertEqual(transcript.getTagValue("target_pcover"), 3.32)
+            self.assertEqual(transcript.getTagValue("target_length"), 60273)
+            self.assertEqual(transcript.getTagValue("Target"), "mivi_sl_A2_scaffold00003 1 2001")
+            self.assertEqual(transcript.getSize(), 1997)
+
+        self.assertEquals(expTranscriptCount, obsTranscriptCount)
+
+    def test_Parser_showcoord_promer(self):
+        parser = CoordsParser("data/testCoordsParser_showcoord_promer.coords")
+        expTranscriptCount = 1
+        obsTranscriptCount = 0
+
+        for mapping in parser.getIterator():
+            transcript = mapping.getTranscript()
+            obsTranscriptCount += 1
+            self.assertEqual(transcript.getChromosome(), "mivi_sl_A1_scaffold00001")
+            self.assertEqual(transcript.getName(), "mivi_sl_A2_scaffold00003")
+            self.assertEqual(transcript.getStart(), 291)
+            self.assertEqual(transcript.getEnd(), 1229)
+            self.assertEqual(transcript.getDirection(), -1)
+            self.assertEqual(transcript.getTagValue("identity"), 94.25)
+            self.assertEqual(transcript.getTagValue("target_pident"), 94.25)
+            self.assertEqual(transcript.getTagValue("target_pcover"), 1.56)
+            self.assertEqual(transcript.getTagValue("target_length"), 60273)
+            self.assertEqual(transcript.getTagValue("Target"), "mivi_sl_A2_scaffold00003 939 1")
+            self.assertEqual(transcript.getSize(), 939)
+
+        self.assertEquals(expTranscriptCount, obsTranscriptCount)
+
+
+if __name__ == '__main__':
+        unittest.main()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_CrossSsrAndBesMappedByBlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_CrossSsrAndBesMappedByBlatToGff.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,70 @@
+from commons.core.parsing.CrossSsrAndBesMappedByBlatToGff import CrossSsrAndBesMappedByBlatToGff
+from commons.core.parsing.SsrParser import SsrParser
+
+import unittest
+import os
+
+
+class Test_CrossSsrAndBesMappedByBlatToGff(unittest.TestCase):
+
+
+    def test_createDictOfSsrParser(self):
+        obsDictSsrParser = {}
+
+        ssrFileName = 'input_SSR_Resuts.tab'
+        SSRFile = open(ssrFileName, 'w')
+        SSRFile.write('BES_name\tBES_redundancy\tSSR_di/tri/tetranucleotide\tSSR_Motif\tSSR_Motif_number\tSSR_start\tSSR_end\tBES_size\n')
+        SSRFile.write('MRRE1H001A12RM1\t1\t4\tttta\t6\t272\t295\t724\n')
+        SSRFile.write('MRRE1H001B01RM1\t1\t3\taat\t8\t264\t287\t683\n')
+        SSRFile.write('MRRE1H001B07RM1\t1\t2\tta\t19\t153\t190\t734\n')
+        SSRFile.write('MRRE1H001B07RM1\t2\t2\taata\t25\t83\t90\t734\n')
+        SSRFile.close()
+
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        iCrossSsrAndBesMappedByBlatToGff._inputFileSSR = ssrFileName
+        obsDictSsrParser = iCrossSsrAndBesMappedByBlatToGff.createDictOfSsrParser(obsDictSsrParser)
+
+        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+        SsrParser2 = SsrParser('MRRE1H001B01RM1', '1', '3', 'aat', '8', '264', '287', '683')
+        SsrParser3 = SsrParser('MRRE1H001B07RM1', '1', '2', 'ta', '19', '153', '190', '734')
+        SsrParser4 = SsrParser('MRRE1H001B07RM1', '2', '2', 'aata', '25', '83', '90', '734')
+
+        expDictSsrParser = {
+                         'MRRE1H001A12RM1': [SsrParser1],
+                         'MRRE1H001B01RM1': [SsrParser2],
+                         'MRRE1H001B07RM1': [SsrParser3, SsrParser4]
+                        }
+
+        self.assertEquals(expDictSsrParser, obsDictSsrParser)
+        os.remove(ssrFileName)
+
+    def test_convertSSRPositionsToBlatPositions_strand_FW(self):
+        ssrPos = 75
+        blatPosStart = 10501475
+        blatPosEnd = 10501985
+        strand = '+'
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        obsNewPos = iCrossSsrAndBesMappedByBlatToGff.convertSSRPositionsToChromPositions(ssrPos, blatPosStart, blatPosEnd, strand)
+        expNewPos = 10501549
+        self.assertEquals(expNewPos, obsNewPos)
+
+    def test_convertSSRPositionsToBlatPositions_strand_RV(self):
+        ssrPos = 75
+        blatPosStart = 10501475
+        blatPosEnd = 10501985
+        strand = '-'
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        obsNewPos = iCrossSsrAndBesMappedByBlatToGff.convertSSRPositionsToChromPositions(ssrPos, blatPosStart, blatPosEnd, strand)
+        expNewPos = 10501911
+        self.assertEquals(expNewPos, obsNewPos)
+
+    def test_getSsrMotif(self):
+        ssrMotif = 'atg'
+        ssrNbMotif = 4
+        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
+        obsSsrSeq = iCrossSsrAndBesMappedByBlatToGff.getSsrSeq(ssrMotif, ssrNbMotif)
+        expSsrSeq = 'atgatgatgatg'
+        self.assertEquals(expSsrSeq, obsSsrSeq)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_F_BlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_F_BlatToGff.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,77 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_F_BlatToGff(unittest.TestCase):
+
+
+    def test_run(self):
+        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
+        self._writeBlatInputFile(blatInputFileName)
+
+        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
+        cmd = 'python %s/commons/core/parsing/BlatToGff.py -i %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, obsOutputFileName)
+        os.system(cmd)
+
+        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
+        self._writeExpOutputFile(expOutputFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+        os.remove(blatInputFileName)
+        os.remove(obsOutputFileName)
+        os.remove(expOutputFileName)
+
+    def test_run_with_methodName(self):
+        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
+        self._writeBlatInputFile(blatInputFileName)
+
+        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
+        cmd = 'python %s/commons/core/parsing/BlatToGff.py -i %s -o %s -n Test_F' % (os.environ['REPET_PATH'], blatInputFileName, obsOutputFileName)
+        os.system(cmd)
+
+        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
+        self._writeExpOutputFile_with_methodName(expOutputFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+        os.remove(blatInputFileName)
+        os.remove(obsOutputFileName)
+        os.remove(expOutputFileName)
+
+    def _writeBlatInputFile(self, blatInputFileName):
+        file = open(blatInputFileName, 'w')
+        file.write('psLayout version 3\n')
+        file.write('\n')
+        file.write('match    mis-     rep.     N\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\n')
+        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\n')
+        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
+        file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
+        file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tmachin1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
+        file.write('554\t26\t0\t0\t1\t16\t1\t17\t-\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
+        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tmachin2\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
+        file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tmachin3\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
+        file.close()
+
+    def _writeExpOutputFile(self, expOutputFileName):
+        file = open(expOutputFileName, 'w')
+        file.write('##gff-version 3\n')
+        file.write('chr16\tBlatToGff\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n')
+        file.write('chr16\tBlatToGff\tBES\t21736364\t21737069\t.\t+\t.\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n')
+        file.write('chr11\tBlatToGff\tBES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\n')
+        file.write('chr11\tBlatToGff\tBES\t3794984\t3795627\t.\t+\t.\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\n')
+        file.write('chr18\tBlatToGff\tBES\t12067347\t12067719\t.\t+\t.\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\n')
+        file.close()
+
+    def _writeExpOutputFile_with_methodName(self, expOutputFileName):
+        file = open(expOutputFileName, 'w')
+        file.write('##gff-version 3\n')
+        file.write('chr16\tBlatToGff\tTest_F:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n')
+        file.write('chr16\tBlatToGff\tTest_F:BES\t21736364\t21737069\t.\t+\t.\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n')
+        file.write('chr11\tBlatToGff\tTest_F:BES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\n')
+        file.write('chr11\tBlatToGff\tTest_F:BES\t3794984\t3795627\t.\t+\t.\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\n')
+        file.write('chr18\tBlatToGff\tTest_F:BES\t12067347\t12067719\t.\t+\t.\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\n')
+        file.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py Fri Jan 18 04:54:14 2013 -0500

[

b"@@ -0,0 +1,117 @@\n+import unittest\n+import os\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_F_BlatToGffForBesPaired(unittest.TestCase):\n+\n+\n+ def test_run(self):\n+ blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']\n+ self._writeBlatInputFileName(blatInputFileName)\n+ fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']\n+ self._writeFastaInputFile(fastaInputFileName)\n+ \n+ obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']\n+ cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)\n+ os.system(cmd)\n+ \n+ expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']\n+ self._writeExpOutputFileName(expOutputFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))\n+ os.remove(blatInputFileName)\n+ os.remove(fastaInputFileName)\n+ os.remove(expOutputFileName)\n+ os.remove(obsOutputFileName)\n+ \n+ def test_run_with_methodName(self):\n+ blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']\n+ self._writeBlatInputFileName(blatInputFileName)\n+ fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']\n+ self._writeFastaInputFile(fastaInputFileName)\n+ \n+ obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']\n+ cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s -n TestF' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)\n+ os.system(cmd)\n+ \n+ expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']\n+ self._writeExpOutputFileName_with_methodName(expOutputFileName)\n+ self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))\n+ os.remove(blatInputFileName)\n+ os.remove(fastaInputFileName)\n+ os.remove(expOutputFileName)\n+ os.remove(obsOutputFileName)\n+\n+ def _writeBlatInputFileName(self, blatInputFileName):\n+ file = open(blatInputFileName, 'w')\n+ file.write('psLayout version 3\\n')\n+ file.write('\\n')\n+ file.write('match mis- rep. N\\'s Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts\\n')\n+ file.write(' match match count bases count bases name size start end name size start end count\\n')\n+ file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n')\n+ file.write('315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n')\n+ file.write('690\\t11\\t0\\t0\\t1\\t3\\t2\\t4\\t-\\tMRRE1H001H13RM1\\t704\\t0\\t704\\tchr16\\t22053297\\t21736364\\t21737069\\t3\\t40,647,14,\\t0,43,690,\\t21736364,21736406,21737055,\\n')\n+ file.write('554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMACHINFM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n')\n+ file.write('620\\t23\\t0\\t0\\t0\\t0\\t0\\t0\\t-\\tBIDULERM1\\t643\\t0\\t643\\tchr11\\t19818926\\t3794984\\t3795627\\t1\\t643,\\t0,\\t3794984,\\n')\n+ file.write('554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMRRE1H032F08FM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n')\n+ file.write('620\\t"..b'ATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\\n\')\n+ file.write(\'chr16\\tBlatToGffForBesPaired\\tTestF:BES\\t21736364\\t21737069\\t.\\t+\\t.\\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\\n\')\n+ file.write(\'chr16\\tBlatToGffForBesPaired\\tTestF:BAC\\t21686950\\t21737069\\t.\\t.\\t.\\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\\n\')\n+ file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BES\\t3725876\\t3726473\\t.\\t+\\t.\\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\\n\')\n+ file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BES\\t3794984\\t3795627\\t.\\t+\\t.\\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\\n\')\n+ file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BAC\\t3725876\\t3795627\\t.\\t.\\t.\\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\\n\')\n+ file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BES\\t12067347\\t12067719\\t.\\t+\\t.\\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\\n\')\n+ file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BES\\t11978635\\t11979338\\t.\\t+\\t.\\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n+ file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BAC\\t11978635\\t12067719\\t.\\t.\\t.\\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n+ file.close()\n+ \n+ def _writeFastaInputFile(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write(\'>MRRE1H001H13FM1\\n\')\n+ file.write(\'ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC\\n\')\n+ file.write(\'CTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGC\\n\')\n+ file.write(\'ACTGCTAGCTACG\\n\')\n+ file.write(\'>MRRE1H001H13RM1\\n\')\n+ file.write(\'ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCG\\n\')\n+ file.write(\'ACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGT\\n\')\n+ file.write(\'ACTGATCGACTGATCGACTGC\\n\')\n+ file.write(\'>MRRE1H032F08FM1\\n\')\n+ file.write(\'TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGAT\\n\')\n+ file.write(\'ATCGATCG\\n\')\n+ file.write(\'>MRRE1H032F08RM1\\n\')\n+ file.write(\'ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTG\\n\')\n+ file.write(\'TACGTACGTAC\\n\')\n+ file.write(\'>MRRE1B072N12FM1\\n\')\n+ file.write(\'ATCGTACGTACGATCGATCGCATGACTACGT\\n\')\n+ file.write(\'>MRRE1B072N12RM1\\n\')\n+ file.write(\'TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n+ file.write(\'>MACHINFM1\\n\')\n+ file.write(\'ATCGTACGCTAGCTAGTCGATCGATCGATCGATCG\\n\')\n+ file.write(\'>BIDULERM1\\n\')\n+ file.write(\'ACTCGATCGACTACGTACGTAGACTG\\n\')\n+ file.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_F_CrossSsrAndBesMappedByBlatToGff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_F_CrossSsrAndBesMappedByBlatToGff.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,66 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_F_CrossSsrAndBesMappedByBlatToGff(unittest.TestCase):
+
+
+    def test_run(self):
+        ssrInputFileName = '%s/commons/core/parsing/test/ssrInputFile.tab' % os.environ['REPET_PATH']
+        self._writeSsrInputFile(ssrInputFileName)
+        blatInputFileName = '%s/commons/core/parsing/test/blatInputFile.tab' % os.environ['REPET_PATH']
+        self._writeBlatInputFile(blatInputFileName)
+
+        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
+        cmd = 'python %s/commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py -s %s -b %s -o %s' % (os.environ['REPET_PATH'], ssrInputFileName, blatInputFileName, obsOutputFileName)
+        os.system(cmd)
+
+        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
+        self._writeExpOutputFile(expOutputFileName)
+
+        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
+        os.remove(ssrInputFileName)
+        os.remove(blatInputFileName)
+        os.remove(obsOutputFileName)
+        os.remove(expOutputFileName)
+
+    def _writeBlatInputFile(self, blatInputFileName):
+        file = open(blatInputFileName, 'w')
+        file.write('psLayout version 3\n')
+        file.write('\n')
+        file.write('match    mis-     rep.     N\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\n')
+        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\n')
+        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
+        file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
+        file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tmachin1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
+        file.write('554\t26\t0\t0\t1\t16\t1\t17\t-\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
+        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tmachin2\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
+        file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tmachin3\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
+        file.close()
+
+    def _writeSsrInputFile(self, ssrInputFileName):
+        file = open(ssrInputFileName, 'w')
+        file.write('BES_name    BES_redundancy    SSR_di/tri/tetranucleotide    SSR_Motif    SSR_Motif_number    SSR_start    SSR_end    BES_size\n')
+        file.write('truc1\t1\t4\tttta\t6\t272\t295\t724\n')
+        file.write('truc2\t1\t3\taat\t8\t264\t287\t683\n')
+        file.write('MRRE1H001H13FM1\t1\t2\tta\t19\t153\t190\t378\n')
+        file.write('truc3\t2\t4\taaag\t8\t518\t549\t734\n')
+        file.write('MRRE1H032F08FM1\t1\t4\taaat\t7\t544\t571\t606\n')
+        file.write('MRRE1H032F08FM1\t2\t2\tag\t10\t587\t606\t606\n')
+        file.write('truc4\t1\t2\tat\t16\t519\t550\t672\n')
+        file.write('truc5\t1\t3\ttct\t8\t205\t228\t752\n')
+        file.write('truc6\t1\t2\tat\t33\t287\t352\t569\n')
+        file.close()
+
+    def _writeExpOutputFile(self, expOutputFileName):
+        file = open(expOutputFileName, 'w')
+        file.write('##gff-version 3\n')
+        file.write('chr16\tCrossSsrAndBesAlignedByBlat\tSSR\t21687102\t21687139\t.\t+\t.\tID=SSR_MRRE1H001H13FM1_1;Name=SSR_MRRE1H001H13FM1_1;bes_name=MRRE1H001H13FM1;bes_size=378;bes_matchstart=0;bes_matchend=345;bes_redundancy=1;ssr_type=2;ssr_motif=ta;ssr_motif_number=19;ssr_start=153;ssr_end=190;muscadine_seq=tatatatatatatatatatatatatatatatatatata\n')
+        file.write('chr11\tCrossSsrAndBesAlignedByBlat\tSSR\t3725930\t3725903\t.\t-\t.\tID=SSR_MRRE1H032F08FM1_1;Name=SSR_MRRE1H032F08FM1_1;bes_name=MRRE1H032F08FM1;bes_size=606;bes_matchstart=10;bes_matchend=606;bes_redundancy=1;ssr_type=4;ssr_motif=aaat;ssr_motif_number=7;ssr_start=544;ssr_end=571;muscadine_seq=aaataaataaataaataaataaataaat\n')
+        file.write('chr11\tCrossSsrAndBesAlignedByBlat\tSSR\t3725887\t3725868\t.\t-\t.\tID=SSR_MRRE1H032F08FM1_2;Name=SSR_MRRE1H032F08FM1_2;bes_name=MRRE1H032F08FM1;bes_size=606;bes_matchstart=10;bes_matchend=606;bes_redundancy=2;ssr_type=2;ssr_motif=ag;ssr_motif_number=10;ssr_start=587;ssr_end=606;muscadine_seq=agagagagagagagagagag\n')
+        file.close()
+
+if __name__ == "__main__":
+    unittest.main()
+
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_FastaParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_FastaParser.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,75 @@
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.structure.Sequence import Sequence
+import unittest
+
+class Test_FastaParser(unittest.TestCase):
+
+    def test_getSubsequence(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "1"
+        expSeq = Sequence("1:1-20 (1)", "CCTAAGCCATTGCTTGGTGA")
+        obsSeq = parser.getSubSequence(chromosome, 1, 20, 1)
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        expSeq = Sequence("subsequence", "TGAAGA")
+        obsSeq = parser.getSubSequence(chromosome, 55, 60, 1, "subsequence")
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence_inside_and_outside(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        expSeq = Sequence("subsequence", "TTA")
+        obsSeq = parser.getSubSequence(chromosome, 137, 151, 1, "subsequence")
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence_last_letter(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        expSeq = Sequence("subsequence", "A")
+        obsSeq = parser.getSubSequence(chromosome, 139, 151, 1, "subsequence")
+        self.assertEquals(expSeq, obsSeq)
+
+    def test_getSubsequence_long_sequence_totally_outside(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        chromosome = "2"
+        isSysExit = False
+        try:
+            parser.getSubSequence(chromosome, 140, 151, 1, "subsequence")
+        except:
+            isSysExit = True
+        self.assertTrue(isSysExit)
+
+    def test_setTags(self):
+        fastaFile = "myFastaInput.fasta"
+        self._writeInputFastaFile(fastaFile)
+        parser = FastaParser(fastaFile)
+        parser.setTags()
+        expTags = {"1" : 0,
+                   "2" : 54}
+        obsTags = parser.getTags()
+        self.assertEquals(expTags, obsTags)
+
+    def _writeInputFastaFile(self, fastaFile):
+        myHandler = open(fastaFile, 'w')
+        myHandler.write(">1\n")
+        myHandler.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAAT\n")
+        myHandler.write(">2\n")
+        myHandler.write("TATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCG\n")
+        myHandler.write("GACCTGAAGAAATTCCTGATTGTACGTTCTGGTTACTCTTCAATTTGGGC\n")
+        myHandler.write("TGCTTAATTATCTCCTCAATTTCAATTTGGCCATGCTTA\n")
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_FindRep.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_FindRep.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,108 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from xml.sax import make_parser
+from xml.sax.handler import feature_namespaces
+from commons.core.parsing.FindRep import FindRep
+
+
+class Test_FindRep(unittest.TestCase):
+    def setUp(self):
+        self._mrepsOuputFileName = "output.xml"
+        self._obsSetFileName = "obsOuput.set"
+        self._expSetFileName = "expOuput.set"
+        self._writeExpSet(self._expSetFileName)
+        self._writeMrepsOutput(self._mrepsOuputFileName)
+
+    def tearDown(self):
+        os.remove(self._expSetFileName)
+        os.remove(self._obsSetFileName)
+        os.remove(self._mrepsOuputFileName)
+
+    def test_parse(self):
+        xmlParser = make_parser()
+        xmlParser.setFeature( feature_namespaces, 0 )
+        xmlParser.setContentHandler( FindRep( self._obsSetFileName,0,  0 ) )
+        xmlParser.parse( self._mrepsOuputFileName )
+        self.assertTrue(FileUtils.are2FilesIdentical(self._obsSetFileName, self._expSetFileName))
+
+    def _writeExpSet(self, fileName):
+        f = open(fileName, "w")
+        f.write("1\t(tatt)3\tseq1\t4\t16\n")
+        f.write("2\t(tatt)3\tseq1\t23\t35\n")
+        f.write("3\t(tatt)3\tseq1\t42\t54\n")
+        f.close()
+
+    def _writeMrepsOutput(self, fileName):
+        f = open(fileName, "w")
+        f.write("<?xml version='1.0' encoding='UTF-8' ?>\n")
+        f.write("<mreps>\n")
+        f.write("<time>Thu Dec  1 17:25:54 2011\n")
+        f.write("</time>\n")
+        f.write("<parameters>\n")
+        f.write("    <type-of-input>file in fasta format</type-of-input>\n")
+        f.write("    <err>3</err>\n")
+        f.write("    <from>1</from>\n")
+        f.write("    <to>-1</to>\n")
+        f.write("    <win>-1</win>\n")
+        f.write("    <minsize>1</minsize>\n")
+        f.write("    <maxsize>-1</maxsize>\n")
+        f.write("    <minperiod>1</minperiod>\n")
+        f.write("   <maxperiod>-1</maxperiod>\n")
+        f.write("   <minexponent>3.00</minexponent>\n")
+        f.write("</parameters>\n")
+        f.write("<results>\n")
+        f.write("<sequence-name>seq1</sequence-name>\n")
+        f.write("<repetitions>\n")
+        f.write("<window>\n")
+        f.write("<windowstart>1</windowstart>\n")
+        f.write("<windowend>60</windowend>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>4</start>\n")
+        f.write("        <end>16</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("       <period>4</period>\n")
+        f.write("       <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>23</start>\n")
+        f.write("        <end>35</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>42</start>\n")
+        f.write("       <end>54</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("<nbofreps>3</nbofreps>\n")
+        f.write("</window>\n")
+        f.write("</repetitions>\n")
+        f.write("</results>\n")
+        f.write("<errorcode>0</errorcode>\n")
+        f.write("</mreps>\n")
+        f.close()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_GffParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_GffParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,62 @@
+import unittest
+from commons.core.parsing.GffParser import GffParser
+
+
+class Test_GffParser(unittest.TestCase):
+
+
+    def test_Parser(self):
+        parser = GffParser("data/testGffParser1.gff3")
+
+        self.assertEqual(parser.getNbTranscripts(), 3)
+
+        cpt = 0
+        for transcript in parser.getIterator():
+            cpt += 1
+            if cpt == 1:
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getName(), "test1")
+                self.assertEqual(transcript.getStart(), 1000)
+                self.assertEqual(transcript.getEnd(), 2000)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("field"), "value1")
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "arm_X")
+                self.assertEqual(exons[0].getStart(), 1000)
+                self.assertEqual(exons[0].getEnd(), 2000)
+                self.assertEqual(exons[0].getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 1001)
+            elif cpt == 2:
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getName(), "test2")
+                self.assertEqual(transcript.getStart(), 10000)
+                self.assertEqual(transcript.getEnd(), 20000)
+                self.assertEqual(transcript.getDirection(), -1)
+                self.assertEqual(transcript.getNbExons(), 2)
+                self.assertEqual(transcript.getTagValue("field"), "value2")
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "arm_X")
+                self.assertEqual(exons[0].getStart(), 10000)
+                self.assertEqual(exons[0].getEnd(), 10100)
+                self.assertEqual(exons[0].getDirection(), -1)
+                self.assertEqual(transcript.getSize(), 9602)
+            if cpt == 3:
+                self.assertEqual(transcript.getChromosome(), "arm_X")
+                self.assertEqual(transcript.getName(), "test1.1")
+                self.assertEqual(transcript.getStart(), 1000)
+                self.assertEqual(transcript.getEnd(), 2000)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getNbExons(), 1)
+                self.assertEqual(transcript.getTagValue("ID"), "test1.1-1")
+                exons = transcript.getExons()
+                self.assertEqual(exons[0].getChromosome(), "arm_X")
+                self.assertEqual(exons[0].getStart(), 1000)
+                self.assertEqual(exons[0].getEnd(), 2000)
+                self.assertEqual(exons[0].getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 1001)
+
+
+if __name__ == '__main__':
+        unittest.main()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_MapParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_MapParser.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,53 @@
+import unittest
+import os
+from commons.core.parsing.MapParser import MapParser
+
+class Test_MapParser(unittest.TestCase):
+
+
+    def setUp(self):
+        self.inputMapFileName = "testMapParser.map"
+        self._writeInputMapFile()
+
+    def tearDown(self):
+        if os.path.exists(self.inputMapFileName):
+            os.remove(self.inputMapFileName)
+
+    def test_Parser(self):
+        parser = MapParser(self.inputMapFileName)
+
+        cpt = 0
+        for transcript in parser.getIterator():
+            cpt += 1
+            if cpt == 1:
+                self.assertEqual(transcript.getChromosome(), "dmel_chr4")
+                self.assertEqual(transcript.getName(), "aagatgcgtaacggccatac_17")
+                self.assertEqual(transcript.getStart(), 4380)
+                self.assertEqual(transcript.getEnd(), 4400)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 21)
+            elif cpt == 10:
+                self.assertEqual(transcript.getChromosome(), "dmel_chr4")
+                self.assertEqual(transcript.getName(), "aacggccatacattggtttg_12")
+                self.assertEqual(transcript.getStart(), 4389)
+                self.assertEqual(transcript.getEnd(), 4409)
+                self.assertEqual(transcript.getDirection(), 1)
+                self.assertEqual(transcript.getSize(), 21)
+
+
+    def _writeInputMapFile(self):
+        inputFile = open(self.inputMapFileName,'w')
+        inputFile.write("aagatgcgtaacggccatac_17\tdmel_chr4\t4380\t4400\n")
+        inputFile.write("agatgcgtaacggccataca_16\tdmel_chr4\t4381\t4401\n")
+        inputFile.write("gatgcgtaacggccatacat_16\tdmel_chr4\t4382\t4402\n")
+        inputFile.write("atgcgtaacggccatacatt_15\tdmel_chr4\t4383\t4403\n")
+        inputFile.write("tgcgtaacggccatacattg_15\tdmel_chr4\t4384\t4404\n")
+        inputFile.write("gcgtaacggccatacattgg_15\tdmel_chr4\t4385\t4405\n")
+        inputFile.write("cgtaacggccatacattggt_14\tdmel_chr4\t4386\t4406\n")
+        inputFile.write("gtaacggccatacattggtt_14\tdmel_chr4\t4387\t4407\n")
+        inputFile.write("taacggccatacattggttt_14\tdmel_chr4\t4388\t4408\n")
+        inputFile.write("aacggccatacattggtttg_12\tdmel_chr4\t4389\t4409\n")
+        inputFile.close()
+
+if __name__ == '__main__':
+        unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_MrepsToSet.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_MrepsToSet.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,105 @@
+import unittest
+import os
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.parsing.MrepsToSet import MrepsToSet
+
+class Test_MrepsToSet(unittest.TestCase):
+    def setUp(self):
+        self._mrepsInputFileName = "mrepsInput.fa"
+        self._mrepsOuputFileName = "mrepsOutput.xml"
+        self._obsSetFileName = "obsOuput.set"
+        self._expSetFileName = "expOuput.set"
+
+        self._writeExpSet(self._expSetFileName)
+        self._writeMrepsOutput(self._mrepsOuputFileName)
+
+    def tearDown(self):
+        os.remove(self._expSetFileName)
+        os.remove(self._obsSetFileName)
+        os.remove(self._mrepsOuputFileName)
+
+    def test_convert(self):
+        iMrepsToSet = MrepsToSet(self._mrepsInputFileName, self._mrepsOuputFileName, self._obsSetFileName)
+        iMrepsToSet.run()
+        self.assertTrue(FileUtils.are2FilesIdentical(self._obsSetFileName, self._expSetFileName))
+
+    def _writeExpSet(self, fileName):
+        f = open(fileName, "w")
+        f.write("1\t(tatt)3\tseq1\t4\t16\n")
+        f.write("2\t(tatt)3\tseq1\t23\t35\n")
+        f.write("3\t(tatt)3\tseq1\t42\t54\n")
+        f.close()
+
+    def _writeMrepsOutput(self, fileName):
+        f = open(fileName, "w")
+        f.write("<?xml version='1.0' encoding='UTF-8' ?>\n")
+        f.write("<mreps>\n")
+        f.write("<time>Thu Dec  1 17:25:54 2011\n")
+        f.write("</time>\n")
+        f.write("<parameters>\n")
+        f.write("    <type-of-input>file in fasta format</type-of-input>\n")
+        f.write("    <err>3</err>\n")
+        f.write("    <from>1</from>\n")
+        f.write("    <to>-1</to>\n")
+        f.write("    <win>-1</win>\n")
+        f.write("    <minsize>1</minsize>\n")
+        f.write("    <maxsize>-1</maxsize>\n")
+        f.write("    <minperiod>1</minperiod>\n")
+        f.write("   <maxperiod>-1</maxperiod>\n")
+        f.write("   <minexponent>3.00</minexponent>\n")
+        f.write("</parameters>\n")
+        f.write("<results>\n")
+        f.write("<sequence-name>seq1</sequence-name>\n")
+        f.write("<repetitions>\n")
+        f.write("<window>\n")
+        f.write("<windowstart>1</windowstart>\n")
+        f.write("<windowend>60</windowend>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>4</start>\n")
+        f.write("        <end>16</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("       <period>4</period>\n")
+        f.write("       <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>23</start>\n")
+        f.write("        <end>35</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("    <repeat>\n")
+        f.write("        <start>42</start>\n")
+        f.write("       <end>54</end>\n")
+        f.write("        <length>13</length>\n")
+        f.write("        <period>4</period>\n")
+        f.write("        <exponent>3.25</exponent>\n")
+        f.write("        <score>0.000</score>\n")
+        f.write("        <sequence>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>tatt</unit>\n")
+        f.write("            <unit>t</unit>\n")
+        f.write("        </sequence>\n")
+        f.write("    </repeat>\n")
+        f.write("<nbofreps>3</nbofreps>\n")
+        f.write("</window>\n")
+        f.write("</repetitions>\n")
+        f.write("</results>\n")
+        f.write("<errorcode>0</errorcode>\n")
+        f.write("</mreps>\n")
+        f.close()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_Multifasta2SNPFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_Multifasta2SNPFile.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1786 @@\n+import os\n+import shutil\n+import unittest\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile\n+from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.seq.BioseqDB import BioseqDB\n+from smac_pipe.tests.Utils4Test import Utils4Test\n+\n+\n+class Test_Multifasta2SNPFile(unittest.TestCase):\n+# TODO TEST LOGFILE\n+ def setUp(self):\n+ os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])\n+ self._inFileName = "multifasta_input.fasta"\n+ \n+ self._expSubSNPFileName = "%s/commons/core/parsing/test/expSubSNP.csv" % os.environ["REPET_PATH"]\n+ self._expAlleleFileName = "%s/commons/core/parsing/test/expAllele.csv" % os.environ["REPET_PATH"]\n+ \n+ self._expIndividualFileName = "%s/commons/core/parsing/test/expIndividual.csv" % os.environ["REPET_PATH"]\n+ self._expSequenceFSAFileName = "%s/commons/core/parsing/test/expSequences.fsa" % os.environ["REPET_PATH"]\n+ self._expSequenceCSVFileName = "%s/commons/core/parsing/test/expSequences.csv" % os.environ["REPET_PATH"]\n+ self._expBatchFileName = "%s/commons/core/parsing/test/expBatch.txt" % os.environ["REPET_PATH"]\n+ self._expBatchLineFileName = "%s/commons/core/parsing/test/expBatchLine.csv" % os.environ["REPET_PATH"]\n+ \n+ self._realInputFileName = "data/real_multifasta_input.fasta"\n+ self._realExpSubSNPFileName = "data/realExpSubSNP.csv"\n+ self._realExpSequenceFSAFileName = "data/realExpSequences.fsa"\n+ self._realExpBatchLineFileName = "data/realExpBatchLine.csv"\n+ self._realExpIndividualFileName = "data/realExpIndividual.csv"\n+ \n+ self._inputDirSeveralBatches = "%s/commons/core/parsing/test/severalBatchDir" % os.environ["REPET_PATH"]\n+ \n+ self._obsSubSNPFileName = "SubSNP.csv"\n+ self._obsAlleleFileName = "Allele.csv"\n+ self._obsIndividualFileName = "Individual.csv"\n+ self._obsSequenceFSAFileName = "Sequences.fsa"\n+ self._obsSequenceCSVFileName = "Sequences.csv"\n+ self._obsBatchFileName = "Batch.txt"\n+ self._obsBatchLineFileName = "BatchLine.csv"\n+ \n+ self._fileUtils = FileUtils()\n+\n+ def tearDown(self):\n+ os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])\n+ logFileName = "multifasta2SNP.log"\n+ if self._fileUtils.isRessourceExists(self._inFileName):\n+ os.remove(self._inFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSubSNPFileName):\n+ os.remove(self._obsSubSNPFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered"):\n+ os.remove(self._obsSubSNPFileName + "_filtered")\n+ if self._fileUtils.isRessourceExists(self._obsAlleleFileName):\n+ os.remove(self._obsAlleleFileName)\n+ if self._fileUtils.isRessourceExists(self._obsIndividualFileName):\n+ os.remove(self._obsIndividualFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSequenceFSAFileName):\n+ os.remove(self._obsSequenceFSAFileName)\n+ if self._fileUtils.isRessourceExists(self._obsSequenceCSVFileName):\n+ os.remove(self._obsSequenceCSVFileName)\n+ if self._fileUtils.isRessourceExists(self._obsBatchFileName):\n+ os.remove(self._obsBatchFileName)\n+ if self._fileUtils.isRessourceExists(self._obsBatchLineFileName):\n+ os.remove(self._obsBatchLineFileName)\n+\n+ if self._fileUtils.isRessourceExists(self._expSubSNPFileName): \n+ os.remove(self._expSubSNPFileName)\n+ if self._fileUtils.isRessourceExists(self._realExpSubSNPFileName + "_filtered"): \n+ os.remove(self._realExpSubSNPFileName + "_filtered")\n+ if self._fileUtils.isRessourceExists(self._expAlleleFileName):\n+ '..b' \n+ def _writeInputFileSeveralBatches(self):\n+ if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n+ os.mkdir(self._inputDirSeveralBatches)\n+ \n+ inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n+ inFileHandle.write(">Sequence_de_Reference1\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n+ inFileHandle2.write(">Sequence_de_Reference2\\n")\n+ inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line1\\n")\n+ inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line2\\n")\n+ inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle2.close()\n+ \n+ def _writeInputFileSeveralBatches_different_lines_between_files(self):\n+ if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n+ os.mkdir(self._inputDirSeveralBatches)\n+ \n+ inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n+ inFileHandle.write(">Sequence_de_Reference1\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n+ inFileHandle2.write(">Sequence_de_Reference2\\n")\n+ inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line3\\n")\n+ inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line4\\n")\n+ inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle2.close()\n+ \n+ def _writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files(self): \n+ if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n+ os.mkdir(self._inputDirSeveralBatches)\n+ \n+ inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n+ inFileHandle.write(">Sequence_de_Reference1\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n+ inFileHandle2.write(">Sequence_de_Reference1\\n")\n+ inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line3\\n")\n+ inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle2.write(">Line4\\n")\n+ inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle2.close()\n+ \n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_Multifasta2SNPFileWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_Multifasta2SNPFileWriter.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,292 @@\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFileWriter\n+from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile\n+from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper\n+from commons.core.LoggerFactory import LoggerFactory\n+import os\n+import logging\n+import unittest\n+\n+class Test_Multifasta2SNPFileWriter(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._obsSubSNPFile = "SubSNP.csv"\n+ self._expSubSNPFile = "ExpSubSNP.csv"\n+ \n+ self._obsAlleleFile = "Allele.csv"\n+ self._expAlleleFile = "ExpAllele.csv"\n+ \n+ self._obsIndividualFile = "Individual.csv"\n+ self._expIndividualFile = "ExpIndividual.csv"\n+ \n+ self._obsSequenceFSAFile = "Sequences.fsa"\n+ self._expSequenceFSAFile = "ExpSequences.fsa"\n+ \n+ self._obsSequenceCSVFile = "Sequences.csv"\n+ self._expSequenceCSVFile = "ExpSequences.csv"\n+ \n+ self._obsBatchFile = "Batch.txt"\n+ self._expBatchFile = "ExpBatch.txt"\n+ \n+ self._obsBatchLineFile = "BatchLine.csv"\n+ self._expBatchLineFile = "ExpBatchLine.csv"\n+ \n+ self._logFileName = "Test_Multifasta2SNPWriter.log"\n+ \n+ self._inputFileName = "multifasta.fsa"\n+ \n+ self._lSNPResult = []\n+ self._dAlleleResult = {}\n+ self._lIndividualResult = []\n+ self._refSeq = Bioseq()\n+ self._seqDb= BioseqDB()\n+ \n+ self._logFile = LoggerFactory.createLogger(self._logFileName, logging.INFO, "%(asctime)s %(levelname)s: %(message)s")\n+ self._lSequenceWrapper = ReferenceBioseqAndLinesBioseqDBWrapper(self._refSeq, self._seqDb, self._logFile, self._inputFileName)\n+ self._lBatchLineResults = []\n+ \n+ self._Multifasta2SNPFileWriter = Multifasta2SNPFileWriter()\n+ \n+ self._inFileName = "multifasta.txt"\n+ self._taxon = "Arabidopsis thaliana"\n+\n+ def tearDown(self):\n+ if FileUtils.isRessourceExists(self._inFileName):\n+ os.remove(self._inFileName)\n+ if FileUtils.isRessourceExists("multifasta2SNP.log"):\n+ os.remove("multifasta2SNP.log")\n+ if FileUtils.isRessourceExists("Test_Multifasta2SNPWriter.log"):\n+ os.remove("Test_Multifasta2SNPWriter.log")\n+ \n+ if FileUtils.isRessourceExists(self._obsSubSNPFile):\n+ os.remove(self._obsSubSNPFile)\n+ if FileUtils.isRessourceExists(self._expSubSNPFile):\n+ os.remove(self._expSubSNPFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsAlleleFile):\n+ os.remove(self._obsAlleleFile)\n+ if FileUtils.isRessourceExists(self._expAlleleFile):\n+ os.remove(self._expAlleleFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsIndividualFile):\n+ os.remove(self._obsIndividualFile)\n+ if FileUtils.isRessourceExists(self._expIndividualFile):\n+ os.remove(self._expIndividualFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsSequenceFSAFile):\n+ os.remove(self._obsSequenceFSAFile)\n+ if FileUtils.isRessourceExists(self._expSequenceFSAFile):\n+ os.remove(self._expSequenceFSAFile)\n+ \n+ if FileUtils.isRessourceExists(self._obsSequenceCSVFile):\n+ os.remove(self._obsSequenceCSVFile)\n+ if FileUtils.isRessourceExists(self._expSequenceCSVFile):\n+ os.remove(self._expSequenceCSVFile)\n+\n+ if FileUtils.isRessourceExists(self._obsBatchFile):\n+ FileUtils.removeFilesByPattern(self._obsBatchFile)\n+ if FileUtils.isRessourceExists(self._expBatchFile):\n+ FileUtils.removeFilesByPattern(self._expBatchFile)\n+ \n+ if FileUtils.isRessourceExists(self._ob'..b'File))\n+ self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFile))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFile, self._obsBatchFile))\n+ self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFile))\n+ self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFile, self._obsBatchLineFile)) \n+ \n+ def _writeExpSubSNPFile(self):\n+ expFile = open(self._expSubSNPFile, "w")\n+ expFile.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\\n")\n+ expFile.write("SubSNP1;A;SNP;1;A;T;1;1;1;Sequence;;;1\\n")\n+ expFile.write("SubSNP2;A;SNP;10;T;A;1;1;1;Sequence;;;2\\n")\n+ expFile.write("SubSNP3;A;SNP;20;T;A;1;1;2;Sequence;;;3\\n")\n+ expFile.close()\n+ \n+ def _writeExpAlleleFile(self):\n+ expFile = open(self._expAlleleFile, "w")\n+ expFile.write("AlleleNumber;Value;Motif;NbCopy;Comment\\n")\n+ expFile.write("1;A;;;\\n")\n+ expFile.write("2;C;;;\\n")\n+ expFile.write("3;T;;;\\n")\n+ expFile.close() \n+ \n+ \n+ def _writeExpIndividualFile(self):\n+ expFile = open(self._expIndividualFile, "w")\n+ expFile.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\\n")\n+ expFile.write("1;Individual1;;;;;;;;;;Arabidopsis thaliana;;;;;\\n")\n+ expFile.write("2;Individual2;;;;;;;;;;Arabidopsis thaliana;;;;;\\n")\n+ expFile.close() \n+\n+ def _writeInputFile(self):\n+ inFileHandle = open(self._inFileName, "w")\n+ inFileHandle.write(">Sequence_de_Reference\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line1\\n")\n+ inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n+ inFileHandle.write(">Line2\\n")\n+ inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n+ inFileHandle.close()\n+ \n+ def _writeExpSequenceFiles(self):\n+ SequenceFSAFileHandle = open(self._expSequenceFSAFile, "w")\n+ SequenceFSAFileHandle.write(">Sequence_de_Reference\\n")\n+ SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n+ SequenceFSAFileHandle.close()\n+ SequenceCSVFileHandle = open(self._expSequenceCSVFile, "w")\n+ SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\\n")\n+ SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\\n")\n+ SequenceCSVFileHandle.close()\n+ \n+ def _writeExpBatchFile(self):\n+ BatchFileHandle = open(self._expBatchFile, "w")\n+ BatchFileHandle.write("BatchNumber: 1\\n")\n+ BatchFileHandle.write("BatchName: batch1\\n")\n+ BatchFileHandle.write("GeneName: gene1\\n")\n+ BatchFileHandle.write("Description: \\n")\n+ BatchFileHandle.write("ContactNumber: \\n")\n+ BatchFileHandle.write("ProtocolNumber: \\n")\n+ BatchFileHandle.write("ThematicNumber: \\n")\n+ BatchFileHandle.write("RefSeqName: Sequence de Reference\\n")\n+ BatchFileHandle.write("AlignmentFileName: \\n")\n+ BatchFileHandle.write("SeqName: \\n")\n+ BatchFileHandle.write("//\\n")\n+ BatchFileHandle.close()\n+ \n+ def _writeExpBatchLineFile(self):\n+ BatchLineFileHandle = open(self._expBatchLineFile, "w")\n+ BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\\n")\n+ BatchLineFileHandle.write("1;;;1;\\n")\n+ BatchLineFileHandle.write("2;;;1;\\n")\n+ BatchLineFileHandle.close()\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_PalsToAlign.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_PalsToAlign.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,43 @@
+import unittest
+from commons.core.utils.FileUtils import FileUtils
+import os
+from commons.core.parsing.PalsToAlign import PalsToAlign
+
+class Test_PalsToAlign(unittest.TestCase):
+
+    def setUp(self):
+        self._palsFileName = "input.gff"
+        self._expAlignFileName = "file.align"
+        self._obsAlignFileName = "output.align"
+
+    def tearDown(self):
+        os.remove(self._palsFileName)
+        os.remove(self._expAlignFileName)
+        os.remove(self._obsAlignFileName)
+
+    def testRun(self):
+        self._writePalsFile(self._palsFileName)
+        self._writeExpAlignFile(self._expAlignFileName)
+
+        iPalsToAlign = PalsToAlign(self._palsFileName,self._obsAlignFileName)
+        iPalsToAlign.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expAlignFileName, self._obsAlignFileName))
+
+
+    def _writePalsFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk01\tpals\thit\t32290\t32583\t252\t+\t.\tTarget chunk02 28975 29268; maxe 0.035\n")
+        f.write("chunk01\tpals\thit\t28975\t29268\t252\t+\t.\tTarget chunk02 32290 32583; maxe 0.035\n")
+        f.write("chunk01\tpals\thit\t65932\t66032\t68\t+\t.\tTarget chunk02 59293 59395; maxe 0.085\n")
+        f.close()
+
+    def _writeExpAlignFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk01\t28975\t29268\tchunk02\t32290\t32583\t0.0\t252\t96.5\n")
+        f.write("chunk01\t32290\t32583\tchunk02\t28975\t29268\t0.0\t252\t96.5\n")
+        f.write("chunk01\t65932\t66032\tchunk02\t59293\t59395\t0.0\t68\t91.5\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_PathNum2Id.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_PathNum2Id.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,69 @@
+import unittest
+import os
+from commons.core.parsing.PathNum2Id import PathNum2Id
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_PathNum2Id(unittest.TestCase):
+
+    def setUp(self):
+        self._inputFileName = "dummyInputPathFile.path"
+        self._outputFileName = "dummyOutputPathFile.path"
+        self._expectedFileName = "expectedpathFile.path"
+        self._pathNum2Id = PathNum2Id()
+
+    def tearDown(self):
+        os.remove( self._inputFileName )
+        os.remove( self._outputFileName )
+        os.remove( self._expectedFileName )
+
+    def test_RunWhithoutReturnAtEndOfFile(self):
+        self._createAndFillInputFileWhithoutReturnAtTheEnd()
+        self._createExpectedFile()
+        self._pathNum2Id.setInFileName( self._inputFileName )
+        self._pathNum2Id.setOutFileName( self._outputFileName )
+        self._pathNum2Id.run()
+        fileutils = FileUtils()
+        self.assertTrue(fileutils.are2FilesIdentical(self._outputFileName, self._expectedFileName))
+
+    def test_RunWhithReturnAtEndOfFile(self):
+        self._createAndFillInputFileWhithReturnAtTheEnd()
+        self._createExpectedFile()
+        self._pathNum2Id.setInFileName( self._inputFileName )
+        self._pathNum2Id.setOutFileName( self._outputFileName )
+        self._pathNum2Id.run()
+        fileutils = FileUtils()
+        self.assertTrue(fileutils.are2FilesIdentical(self._outputFileName, self._expectedFileName))
+
+    def _createExpectedFile(self):
+        f = open(self._expectedFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("4\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("5\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("6\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+
+    def _createAndFillInputFileWhithoutReturnAtTheEnd(self):
+        f = open(self._inputFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0")
+        f.close()
+
+    def _createAndFillInputFileWhithReturnAtTheEnd(self):
+        f = open(self._inputFileName, "w")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
+        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
+        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
+        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_PslParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_PslParser.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,165 @@\n+from commons.core.parsing.PslParser import PslParser\n+import unittest, os\n+\n+\n+class Test_PslParser(unittest.TestCase):\n+\n+ def test_forward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\T\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n158\\t0\\t0\\t0\\t0\\t0\\t1\\t158\\t+\\ttest\\t158\\t0\\t158\\tchr1\\t1501\\t237\\t553\\t2\\t79,79,\\t0,79,\\t237,474,\\n")\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "test")\n+ self.assertEquals(transcript.getChromosome(), "chr1")\n+ self.assertEquals(transcript.getDirection(), 1)\n+ self.assertEquals(transcript.getStart(), 238)\n+ self.assertEquals(transcript.getEnd(), 553)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 0:\n+ self.assertEquals(exon.getStart(), 238)\n+ self.assertEquals(exon.getEnd(), 316)\n+ elif i == 1:\n+ self.assertEquals(exon.getStart(), 475)\n+ self.assertEquals(exon.getEnd(), 553)\n+ os.remove(fileName)\n+ \n+ \n+ def test_backward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n158\\t0\\t0\\t0\\t0\\t0\\t1\\t158\\t-\\ttest\\t158\\t0\\t158\\tchr1\\t1501\\t237\\t553\\t2\\t79,79,\\t0,79,\\t237,474,\\n")\n+\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "test")\n+ self.assertEquals(transcript.getChromosome(), "chr1")\n+ self.assertEquals(transcript.getDirection(), -1)\n+ self.assertEquals(transcript.getStart(), 238)\n+ self.assertEquals(transcript.getEnd(), 553)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 1:\n+ self.assertEquals(exon.getStart(), 238)\n+ self.assertEquals(exon.getEnd(), 316)\n+ elif i == 0:\n+ self.assertEquals(exon.getStart(), 475)\n+ self.assertEquals(exon.getEnd(), 553)\n+ os.remove(fileName)\n+\n+\n+ def test_query_forward_target_forward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t0\\t0\\t0\\t0\\t0\\t1\\t60\\t++\\tseq1\\t255\\t9\\t250\\tref\\t2262\\t59\\t360\\t2'..b'assertEquals(transcript.getChromosome(), "ref")\n+ self.assertEquals(transcript.getDirection(), -1)\n+ self.assertEquals(transcript.getStart(), 60)\n+ self.assertEquals(transcript.getEnd(), 360)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 1:\n+ self.assertEquals(exon.getStart(), 60)\n+ self.assertEquals(exon.getEnd(), 180)\n+ elif i == 0:\n+ self.assertEquals(exon.getStart(), 241)\n+ self.assertEquals(exon.getEnd(), 360)\n+ os.remove(fileName)\n+\n+ def test_query_backward_target_backward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t1\\t0\\t0\\t0\\t0\\t1\\t60\\t--\\tseq1\\t255\\t8\\t250\\tref\\t2262\\t58\\t360\\t2\\t120,122,\\t5,125,\\t1902,2082,\\n")\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "seq1")\n+ self.assertEquals(transcript.getChromosome(), "ref")\n+ self.assertEquals(transcript.getDirection(), 1)\n+ self.assertEquals(transcript.getStart(), 59)\n+ self.assertEquals(transcript.getEnd(), 360)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 0:\n+ self.assertEquals(exon.getStart(), 59)\n+ self.assertEquals(exon.getEnd(), 180)\n+ elif i == 1:\n+ self.assertEquals(exon.getStart(), 241)\n+ self.assertEquals(exon.getEnd(), 360)\n+ os.remove(fileName)\n+\n+\n+ def test_query_forward_target_backward(self):\n+ fileName = "tmpFile.psl"\n+ handle = open(fileName, "w")\n+ handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t1\\t0\\t0\\t0\\t0\\t1\\t60\\t+-\\tseq2\\t255\\t5\\t247\\tref\\t2262\\t58\\t360\\t2\\t120,122,\\t5,125,\\t1902,2082,\\n")\n+ handle.close()\n+\n+ parser = PslParser(fileName, 0)\n+ self.assertEquals(parser.getNbMappings(), 1)\n+ for mapping in parser.getIterator():\n+ transcript = mapping.getTranscript()\n+ self.assertEquals(transcript.getName(), "seq2")\n+ self.assertEquals(transcript.getChromosome(), "ref")\n+ self.assertEquals(transcript.getDirection(), -1)\n+ self.assertEquals(transcript.getStart(), 59)\n+ self.assertEquals(transcript.getEnd(), 360)\n+ self.assertEquals(transcript.getNbExons(), 2)\n+ for i, exon in enumerate(transcript.getExons()):\n+ if i == 1:\n+ self.assertEquals(exon.getStart(), 59)\n+ self.assertEquals(exon.getEnd(), 180)\n+ elif i == 0:\n+ self.assertEquals(exon.getStart(), 241)\n+ self.assertEquals(exon.getEnd(), 360)\n+ os.remove(fileName)\n+\n+\n+if __name__ == "__main__":\n+ unittest.main()\n+\n+\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_SsrParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_SsrParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,154 @@
+from commons.core.parsing.SsrParser import SsrParser
+import unittest
+
+
+class Test_SsrParser(unittest.TestCase):
+
+
+    def test_setAttributesFromString(self):
+        ssrLine = "MRRE1H001B07RM1\t1\t2\tta\t19\t153\t190\t734"
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributesFromString(ssrLine)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = 'MRRE1H001B07RM1'
+        expBES_redundancy = '1'
+        expSSR_nbNucleotides = '2'
+        expSSR_Motif = 'ta'
+        expSSR_Motif_number = '19'
+        expSSR_start = '153'
+        expSSR_end = '190'
+        expBES_size = '734'
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_setAttributesFromString_empty_BESName(self):
+        ssrLine = "\t1\t2\tta\t19\t153\t190\t734"
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributesFromString(ssrLine)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = ''
+        expBES_redundancy = ''
+        expSSR_nbNucleotides = ''
+        expSSR_Motif = ''
+        expSSR_Motif_number = ''
+        expSSR_start = ''
+        expSSR_end = ''
+        expBES_size = ''
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_setAttributesFromString_less_than_8_fields(self):
+        ssrLine = "1\t2\tta\t19\t153\t190\t734"
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributesFromString(ssrLine)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = ''
+        expBES_redundancy = ''
+        expSSR_nbNucleotides = ''
+        expSSR_Motif = ''
+        expSSR_Motif_number = ''
+        expSSR_start = ''
+        expSSR_end = ''
+        expBES_size = ''
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_setAttributes(self):
+        lResults = ['MRRE1H001B07RM1','1','2','ta','19','153','190','734']
+        lineNumber = 1
+
+        iSsrParser = SsrParser()
+        iSsrParser.setAttributes(lResults, lineNumber)
+
+        obsBES_name = iSsrParser.getBesName()
+        obsBES_redundancy = iSsrParser.getBesRedundancy()
+        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
+        obsSSR_Motif = iSsrParser.getSsrMotif()
+        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
+        obsSSR_start = iSsrParser.getSsrStart()
+        obsSSR_end = iSsrParser.getSsrEnd()
+        obsBES_size = iSsrParser.getBesSize()
+
+        expBES_name = 'MRRE1H001B07RM1'
+        expBES_redundancy = '1'
+        expSSR_nbNucleotides = '2'
+        expSSR_Motif = 'ta'
+        expSSR_Motif_number = '19'
+        expSSR_start = '153'
+        expSSR_end = '190'
+        expBES_size = '734'
+
+        self.assertEquals(expBES_name, obsBES_name)
+        self.assertEquals(expBES_redundancy, obsBES_redundancy)
+        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
+        self.assertEquals(expSSR_Motif, obsSSR_Motif)
+        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
+        self.assertEquals(expSSR_start, obsSSR_start)
+        self.assertEquals(expSSR_end, obsSSR_end)
+        self.assertEquals(expBES_size, obsBES_size)
+
+    def test_eq_Equals(self):
+        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+        SsrParser2 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+
+        self.assertTrue(SsrParser1 == SsrParser2)
+
+    def test_eq_NotEquals(self):
+        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
+        SsrParser2 = SsrParser('MRRE1H001A12RM3', '1', '5', 'ttta', '6', '272', '295', '852')
+
+        self.assertFalse(SsrParser1 == SsrParser2)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_VarscanFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanFile.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,287 @@\n+from commons.core.parsing.VarscanFile import VarscanFile\n+from commons.core.parsing.VarscanHit import VarscanHit\n+import unittest\n+import os\n+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n+from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8\n+from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag\n+from commons.core.checker.CheckerException import CheckerException\n+\n+class Test_VarscanFile(unittest.TestCase):\n+\n+ def test_parse_fileWithHeader(self):\n+ varscanFileName = "file.varscan"\n+ self._writeVarscanFile(varscanFileName)\n+ \n+ varscanHit1 = VarscanHit()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ expVarscanHitsList = [varscanHit1, varscanHit2]\n+ \n+ iVarscanFile = VarscanFile(varscanFileName)\n+ iVarscanFile.parse()\n+ obsVarscanHitsList = iVarscanFile.getVarscanHitsList()\n+ os.remove(varscanFileName)\n+ \n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList) \n+\n+ def test_parse_FileWithoutHeader(self):\n+ varscanFileName = "file.varscan"\n+ self._writeVarscanFileWithoutHeader(varscanFileName)\n+ \n+ varscanHit1 = VarscanHit()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ expVarscanHitsList = [varscanHit1, varscanHit2]\n+ \n+ iVarscanFile = VarscanFile(varscanFileName)\n+ iVarscanFile.parse()\n+ obsVarscanHitsList = iVarscanFile.getVarscanHitsList()\n+ obsTypeOfVarscanFile = iVarscanFile.getTypeOfVarscanFile()\n+ expTypeOfVarscanFile = "Varscan_2_2"\n+ \n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList) \n+ self.assertEquals(expTypeOfVarscanFile, obsTypeOfVarscanFile) \n+ os.remove(varscanFileName)\n+ \n+ def test_parse_VarscanFileWithTag(self):\n+ inputFileName = "%s/commons/core/parsing/test/varscan.tab" % os.environ["REPET_PATH"]\n+ self._writeVarscanFileWithTag(inputFileName)\n+ launcher = VarscanFile(inputFileName)\n+ launcher.parse()\n+ obsListOfVarscanHits = launcher.getListOfVarscanHits() \n+ \n+ varscanHit1 = VarscanHit_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setTag(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setTag(\'EspeceA\')\n+ expVarscanHitsList = [varscanHit1, varscanHit2] \n+ \n+ obsTypeOfVarscanFile = launcher.getTypeOfVarscanFile()\n+ expTypeOfVarscanFile = "Varscan_2_2_WithTag"\n+ \n+ self.assertEquals(expVarscanHitsList, obsListOfVarscanHits) \n+ self.assertEquals(expTypeOfVarscanFile, obsTypeOfVarscanFile) \n+ os.remove(inputFileName)\n+ \n+ def test_parse_VarscanFile_v2_2_8(self):\n+ inputFileName = "%s/commons/core/parsing/test/varscan.tab" % os.environ["REPET_PATH"]\n+ self._writeVarscanFile_v2_2_8(inputFileName)\n+ launcher = VarscanFile(inputFileName)\n+ launcher.parse()\n+ obsListOfVarscanHits = launcher.getListOfVarscanHits('..b'expVarscanHit = VarscanHit_v2_2_8()\n+ expVarscanHit.setChrom(\'C11HBa0064J13_LR285\')\n+ expVarscanHit.setPosition(\'3227\')\n+ expVarscanHit.setRef(\'G\')\n+ expVarscanHit.setVar(\'A\')\n+ expVarscanHit.setCns(\'A\')\n+ self.assertEquals(expVarscanHit, obsVarscanHit)\n+ \n+ def test_createVarscanObjectFromLine_VarscanHit_v2_2_8_WithTag(self):\n+ line = "C11HBa0064J13_LR285\\t3227\\tG\\tA\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tA\\tEspeceA\\n"\n+ nbLine = 1\n+ launcher = VarscanFile()\n+ launcher.setTypeOfVarscanFile("Varscan_2_2_8_WithTag")\n+ obsVarscanHit = launcher.createVarscanObjectFromLine(line, nbLine)\n+ expVarscanHit = VarscanHit_v2_2_8_WithTag()\n+ expVarscanHit.setChrom(\'C11HBa0064J13_LR285\')\n+ expVarscanHit.setPosition(\'3227\')\n+ expVarscanHit.setRef(\'G\')\n+ expVarscanHit.setVar(\'A\')\n+ expVarscanHit.setCns(\'A\')\n+ expVarscanHit.setTag(\'EspeceA\')\n+ self.assertEquals(expVarscanHit, obsVarscanHit)\n+ \n+ def _writeVarscanFile(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+\n+ def _writeVarscanFileWithoutHeader(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFileWithTag(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\tEspeceA\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\tEspeceA\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFile_v2_2_8(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tCons\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\tMapQual1\\tMapQual2\\tReads1Plus\\tReads1Minus\\tReads2Plus\\tReads2Minus\\tVarAllele\\n")\n+ varscanFile.write("C11HBa0064J13_LR285\\t3227\\tG\\tA\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tA\\n")\n+ varscanFile.write("C11HBa0064J13_LR285\\t3230\\tG\\tT\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tT\\n")\n+ varscanFile.close()\n+ \n+ def _writeOther(self, fileName):\n+ file = open(fileName, \'w\')\n+ file.write(\'##gff-version 3\\n\')\n+ file.write(\'chr16\\tBlatToGff\\tBES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\\n\')\n+ file.write(\'chr16\\tBlatToGff\\tBES\\t21736364\\t21737069\\t.\\t+\\t.\\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\\n\')\n+ file.write(\'chr11\\tBlatToGff\\tBES\\t3725876\\t3726473\\t.\\t+\\t.\\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\\n\')\n+ file.write(\'chr11\\tBlatToGff\\tBES\\t3794984\\t3795627\\t.\\t+\\t.\\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\\n\')\n+ file.write(\'chr18\\tBlatToGff\\tBES\\t12067347\\t12067719\\t.\\t+\\t.\\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\\n\')\n+ file.close()\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_VarscanFileForGnpSNP.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,305 @@\n+import unittest\n+import os\n+\n+from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP\n+from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP\n+\n+class Test_VarscanFileForGnpSNP(unittest.TestCase):\n+\n+ def test__init__(self):\n+ expFastqFileName = "SR.fastq"\n+ expRefFastaFileName = "ref.fasta"\n+ expTaxonName = "Arabidopsis thaliana"\n+ expVarscanFieldSeparator = "\\t"\n+ expVarscanHitsList = []\n+ \n+ iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName)\n+ \n+ obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName()\n+ obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName()\n+ obsTaxonName = iVarscanFileForGnpSNP.getTaxonName()\n+ obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator()\n+ obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()\n+ \n+ self.assertEquals(expFastqFileName, obsFastaqFileName)\n+ self.assertEquals(expRefFastaFileName, obsRefFastaFileName)\n+ self.assertEquals(expTaxonName, obsTaxonName)\n+ self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator)\n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList)\n+\n+ def test_parse(self):\n+ varscanFileName = "varscan.tab"\n+ self._writeVarscanFile(varscanFileName)\n+ \n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ varscanHit1.setGnpSNPRef("C")\n+ varscanHit1.setGnpSNPVar("T")\n+ varscanHit1.setGnpSNPPosition(32)\n+ varscanHit1.setOccurrence(1)\n+ varscanHit1.setPolymType("SNP")\n+ varscanHit1.setPolymLength(1)\n+ \n+ varscanHit2 = VarscanHitForGnpSNP()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setReads1(\'1\')\n+ varscanHit2.setReads2(\'2\')\n+ varscanHit2.setVarFreq(\'66,67%\')\n+ varscanHit2.setStrands1(\'1\')\n+ varscanHit2.setStrands2(\'1\')\n+ varscanHit2.setQual1(\'40\')\n+ varscanHit2.setQual2(\'34\')\n+ varscanHit2.setPvalue(\'0.3999999999999999\')\n+ varscanHit2.setGnpSNPRef("A")\n+ varscanHit2.setGnpSNPVar("T")\n+ varscanHit2.setGnpSNPPosition(34)\n+ varscanHit2.setOccurrence(1)\n+ varscanHit2.setPolymType("SNP")\n+ varscanHit2.setPolymLength(1)\n+ expVarscanHitsList = [varscanHit1, varscanHit2]\n+ \n+ iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, \'\', \'\', \'\')\n+ iVarscanFileForGnpSNP.parse()\n+ obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()\n+ os.remove(varscanFileName)\n+ \n+ self.assertEquals(expVarscanHitsList, obsVarscanHitsList)\n+ \n+ def test_parse_with_same_position_and_chr_and_type(self):\n+ varscanFileName = "varscan.tab"\n+ self._writeVarscanFile_2(varscanFileName)\n+ \n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ var'..b' refFastaFileName = "ref.fasta"\n+ taxonName = "Arabidopsis thaliana"\n+ \n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'40\')\n+ varscanHit1.setQual2(\'34\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ lVarscanHits1 = [varscanHit1]\n+ \n+ iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)\n+ iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1)\n+ \n+ varscanHit2 = VarscanHitForGnpSNP()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setReads1(\'1\')\n+ varscanHit2.setReads2(\'2\')\n+ varscanHit2.setVarFreq(\'66,67%\')\n+ varscanHit2.setStrands1(\'1\')\n+ varscanHit2.setStrands2(\'1\')\n+ varscanHit2.setQual1(\'40\')\n+ varscanHit2.setQual2(\'34\')\n+ varscanHit2.setPvalue(\'0.3999999999999999\')\n+ lVarscanHits2 = [varscanHit2]\n+\n+ iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)\n+ iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2)\n+\n+ self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)\n+ \n+ def _writeVarscanFile(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFile_2(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+ \n+ def _writeVarscanFile_3(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\t+A\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+\n+ def _writeVarscanFile_4(self, varscanFileName):\n+ varscanFile = open(varscanFileName, \'w\')\n+ varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n+ varscanFile.write("seqname\\t2\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("seqname\\t4\\tC\\tG\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("seqname\\t4\\tC\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("seqname\\t8\\tT\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("chrom\\t4\\tC\\tG\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.write("chrom\\t4\\tC\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n+ varscanFile.close()\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_VarscanHit.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,165 @@
+import unittest
+from commons.core.parsing.VarscanHit import VarscanHit
+from commons.core.checker.CheckerException import CheckerException
+
+class Test_VarscanHit(unittest.TestCase):
+
+    def test_setAttributesFromString(self):
+        line = "C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n"
+
+        expChrom = "C02HBa0291P19_LR48"
+        expPosition = "32"
+        expRef = "C"
+        expVar = "T"
+
+        varscanHit = VarscanHit()
+        varscanHit.setAttributesFromString(line)
+
+        obsChrom = varscanHit.getChrom()
+        obsPosition = varscanHit.getPosition()
+        obsRef = varscanHit.getRef()
+        obsVar = varscanHit.getVar()
+
+        self.assertEquals(expChrom, obsChrom)
+        self.assertEquals(expPosition, obsPosition)
+        self.assertEquals(expRef, obsRef)
+        self.assertEquals(expVar, obsVar)
+
+    def test_setAttributesFromString_empty_chrom(self):
+        line = "\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n"
+        iVarscanHit = VarscanHit()
+        try :
+            iVarscanHit.setAttributesFromString(line)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Chrom is empty in varscan file in line "
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def  test_setAttributesFromString_less_than_12_fields(self):
+        line = "C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\n"
+        iVarscanHit = VarscanHit()
+        iVarscanHit.setAttributesFromString(line)
+        self.assertEquals("", iVarscanHit.getQualVar())
+        self.assertEquals("", iVarscanHit.getPValue())
+
+    def test_setAttributes(self):
+        lResults = ["C02HBa0291P19_LR48", "32", "C", "T", "1", "2", "66,67%", "1", "1", "37", "35", "0.3999999999999999"]
+        lineNumber = 1
+
+        expChrom = "C02HBa0291P19_LR48"
+        expPosition = "32"
+        expRef = "C"
+        expVar = "T"
+
+        varscanHit = VarscanHit()
+        varscanHit.setAttributes(lResults, lineNumber)
+
+        obsChrom = varscanHit.getChrom()
+        obsPosition = varscanHit.getPosition()
+        obsRef = varscanHit.getRef()
+        obsVar = varscanHit.getVar()
+
+        self.assertEquals(expChrom, obsChrom)
+        self.assertEquals(expPosition, obsPosition)
+        self.assertEquals(expRef, obsRef)
+        self.assertEquals(expVar, obsVar)
+
+    def test_setAttributes_empty_chrom(self):
+        lResults = ["", "", "", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 1
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Chrom is empty in varscan file in line 1"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test_setAttributes_empty_position(self):
+        lResults = ["chrom", "", "", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 5
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Position is empty in varscan file in line 5"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test_setAttributes_empty_ref(self):
+        lResults = ["chrom", "position", "", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 5
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Ref is empty in varscan file in line 5"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test_setAttributes_empty_var(self):
+        lResults = ["chrom", "position", "ref", "", "", "", "10", "", "", "", "", ""]
+        lineNumber = 5
+
+        varscanHit = VarscanHit()
+        checkerExceptionInstance = None
+        try:
+            varscanHit.setAttributes(lResults, lineNumber)
+        except CheckerException, e:
+            checkerExceptionInstance = e
+
+        expMessage = "The field Var is empty in varscan file in line 5"
+        obsMessage = checkerExceptionInstance.msg
+
+        self.assertEquals(expMessage, obsMessage)
+
+    def test__eq__notEquals(self):
+        varscanHit1 = VarscanHit()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('32')
+        varscanHit1.setRef('C')
+        varscanHit1.setVar('T')
+
+        varscanHit2 = VarscanHit()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('34')
+        varscanHit2.setRef('A')
+        varscanHit2.setVar('T')
+
+        self.assertFalse(varscanHit1 == varscanHit2)
+
+    def test__eq__Equals(self):
+        varscanHit1 = VarscanHit()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('32')
+        varscanHit1.setRef('C')
+        varscanHit1.setVar('T')
+
+        varscanHit2 = VarscanHit()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('32')
+        varscanHit2.setRef('C')
+        varscanHit2.setVar('T')
+
+        self.assertTrue(varscanHit1 == varscanHit2)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_VarscanHitForGnpSNP.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHitForGnpSNP.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,412 @@\n+import unittest\n+from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP\n+from commons.core.checker.CheckerException import CheckerException\n+\n+class Test_VarscanHitForGnpSNP(unittest.TestCase):\n+\n+ def test_setAttributes(self):\n+ lResults = ["C02HBa0291P19_LR48", "32", "C", "T", "1", "2", "66,67%", "1", "1", "37", "35", "0.3999999999999999"]\n+ lineNumber = 1\n+ \n+ expChrom = "C02HBa0291P19_LR48"\n+ expPosition = "32"\n+ expRef = "C"\n+ expVar = "T"\n+ expReads1 = "1"\n+ expReads2 = "2"\n+ expVarFreq = 66.67\n+ expStrands1 = "1"\n+ expStrands2 = "1"\n+ expQual1 = "37"\n+ expQual2 = "35"\n+ expPvalue = "0.3999999999999999"\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsVar = varscanHit.getVar()\n+ obsReads1 = varscanHit.getReads1()\n+ obsReads2 = varscanHit.getReads2()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrands1 = varscanHit.getStrands1()\n+ obsStrands2 = varscanHit.getStrands2()\n+ obsQual1 = varscanHit.getQual1()\n+ obsQual2 = varscanHit.getQual2()\n+ obsPvalue = varscanHit.getPvalue()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expVar, obsVar)\n+ self.assertEquals(expReads1, obsReads1)\n+ self.assertEquals(expReads2, obsReads2)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrands1, obsStrands1)\n+ self.assertEquals(expStrands2, obsStrands2)\n+ self.assertEquals(expQual1, obsQual1)\n+ self.assertEquals(expQual2, obsQual2)\n+ self.assertEquals(expPvalue, obsPvalue)\n+\n+ def test_setAttributes_empty_chrom(self):\n+ lResults = ["", "", "", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Chrom is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_position(self):\n+ lResults = ["chrom", "", "", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 5\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Position is empty in varscan file in line 5"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_ref(self):\n+ lResults = ["chrom", "position", "", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 5\n+ \n+ varscanHit = VarscanHitForGnpSNP()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 5"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_var(self):\n+ lResults = ["chrom", "position", "ref", "", "", "", "10", "", "", "", "", ""]\n+ lineNumber = 5\n+ \n+ varscanHit = VarscanHitForGnpS'..b'atAlleles2GnpSnp_for_Deletion(self):\n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'-ATT\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ \n+ expPolymType = "DELETION"\n+ expGnpSnpRef = "ATT"\n+ expGnpSnpVar = "---"\n+ expGnpSnpPosition = 33\n+ \n+ varscanHit1.formatAlleles2GnpSnp()\n+ \n+ obsPolymType = varscanHit1.getPolymType()\n+ obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n+ obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n+ obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n+ \n+ self.assertEquals(expPolymType,obsPolymType)\n+ self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n+ self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n+ self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n+ \n+ def test_setVarFreq(self):\n+ varscanHit1 = VarscanHitForGnpSNP() \n+ exp = 66.67 \n+ varscanHit1.setVarFreq(\'66,67%\')\n+ obs = varscanHit1.getVarFreq()\n+ self.assertEquals(exp, obs)\n+ \n+ def test_formatAlleles2GnpSnp_for_Insertion(self):\n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setVar(\'+TG\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ \n+ expPolymType = "INSERTION"\n+ expGnpSnpRef = "--"\n+ expGnpSnpVar = "TG"\n+ expGnpSnpPosition = 32\n+ \n+ varscanHit1.formatAlleles2GnpSnp()\n+ \n+ obsPolymType = varscanHit1.getPolymType()\n+ obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n+ obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n+ obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n+ \n+ self.assertEquals(expPolymType,obsPolymType)\n+ self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n+ self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n+ self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n+ \n+ def test_formatAlleles2GnpSnp_for_SNP(self):\n+ varscanHit1 = VarscanHitForGnpSNP()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'12\')\n+ varscanHit1.setRef(\'G\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setReads1(\'1\')\n+ varscanHit1.setReads2(\'2\')\n+ varscanHit1.setVarFreq(\'66,67%\')\n+ varscanHit1.setStrands1(\'1\')\n+ varscanHit1.setStrands2(\'1\')\n+ varscanHit1.setQual1(\'37\')\n+ varscanHit1.setQual2(\'35\')\n+ varscanHit1.setPvalue(\'0.3999999999999999\')\n+ \n+ expPolymType = "SNP"\n+ expGnpSnpRef = "G"\n+ expGnpSnpVar = "T"\n+ expGnpSnpPosition = 12\n+ \n+ varscanHit1.formatAlleles2GnpSnp()\n+ \n+ obsPolymType = varscanHit1.getPolymType()\n+ obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n+ obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n+ obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n+ \n+ self.assertEquals(expPolymType,obsPolymType)\n+ self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n+ self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n+ self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n+ \n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_VarscanHit_WithTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit_WithTag.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,212 @@\n+import unittest\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n+\n+class Test_VarscanHit_WithTag(unittest.TestCase):\n+\n+ def test_setAttributesFromString(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expVar = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expTag = "EspeceA"\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ varscanHit.setAttributesFromString(line)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsVar = varscanHit.getVar()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsTag = varscanHit.getTag()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expVar, obsVar)\n+ self.assertEquals(expReadsRef, obsReadsRef)\n+ self.assertEquals(expReadsVar, obsReadsVar)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expTag, obsTag)\n+ \n+ def test_setAttributesFromString_empty_chrom(self):\n+ line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n+ varscanHit = VarscanHit_WithTag()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "The field Chrom is empty in varscan file in line "\n+ obsMessage = checkerExceptionInstance.msg\n+ self.assertEquals(expMessage, obsMessage)\n+ \n+ def test_setAttributes(self):\n+ lResults = [\'chr1\', \'1804\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expVar = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expTag = "EspeceA"\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsVar = varscanHit.getVar()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsTag = varscanHit.getTag()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals'..b'q)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expTag, obsTag)\n+\n+ def test_setAttributes_empty_chrom(self):\n+ lResults = [\'\', \'1804\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Chrom is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_position(self):\n+ lResults = [\'chr1\', \'\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Position is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_ref(self):\n+ lResults = [\'chr1\', \'1000\', \'\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_cns(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Var is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test__eq__notEquals(self):\n+ varscanHit1 = VarscanHit_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceB\')\n+ \n+ self.assertFalse(varscanHit1 == varscanHit2)\n+\n+ def test__eq__Equals(self):\n+ varscanHit1 = VarscanHit_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceA\')\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_VarscanHit_v2_2_8.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit_v2_2_8.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,290 @@\n+import unittest\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8\n+from commons.core.parsing.VarscanHit import VarscanHit\n+\n+class Test_VarscanHit_v2_2_8(unittest.TestCase):\n+\n+ def test_setAttributesFromString(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expCns = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expMapQualRef = "0"\n+ expMapQualVar = "1"\n+ expReadsRefPlus = "0"\n+ expReadsRefMinus = "0"\n+ expReadsVarPlus = "1"\n+ expReadsVarMinus = "0"\n+ expVar = "C"\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ varscanHit.setAttributesFromString(line)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsCns = varscanHit.getCns()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsMapQualRef = varscanHit.getMapQualRef()\n+ obsMapQualVar = varscanHit.getMapQualVar()\n+ obsReadsRefPlus = varscanHit.getReadsRefPlus()\n+ obsReadsRefMinus = varscanHit.getReadsRefMinus()\n+ obsReadsVarPlus = varscanHit.getReadsVarPlus()\n+ obsReadsVarMinus = varscanHit.getReadsVarMinus()\n+ obsVar = varscanHit.getVar()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expCns, obsCns)\n+ self.assertEquals(expReadsRef, obsReadsRef)\n+ self.assertEquals(expReadsVar, obsReadsVar)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expMapQualRef, obsMapQualRef)\n+ self.assertEquals(expMapQualVar, obsMapQualVar)\n+ self.assertEquals(expReadsRefPlus, obsReadsRefPlus)\n+ self.assertEquals(expReadsRefMinus, obsReadsRefMinus)\n+ self.assertEquals(expReadsVarPlus, obsReadsVarPlus)\n+ self.assertEquals(expReadsVarMinus, obsReadsVarMinus)\n+ self.assertEquals(expVar, obsVar)\n+ \n+ def test_setAttributesFromString_empty_chrom(self):\n+ line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ varscanHit = VarscanHit_v2_2_8()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "The field Chrom is empty in varscan file in line "\n+ obsMessage = checkerExceptionInstance.msg\n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributesFromString_less_than_19_fields(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ varscanHit = VarscanHit_v2_2_8()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "This varscan line (l.) is not complete"\n+ obsMessage = checkerExceptionInstance.msg\n+ self.a'..b' expMessage = "The field Position is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_ref(self):\n+ lResults = [\'chr1\', \'1000\', \'\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_cns(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Cons is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_var(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field varAllele is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test__eq__notEquals(self):\n+ varscanHit1 = VarscanHit_v2_2_8()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'32\')\n+ varscanHit1.setRef(\'C\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ \n+ self.assertFalse(varscanHit1 == varscanHit2)\n+\n+ def test__eq__Equals(self):\n+ varscanHit1 = VarscanHit_v2_2_8()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ \n+ self.assertTrue(varscanHit1 == varscanHit2)\n+ \n+ def test_convertVarscanHit_v2_2_8_To_VarscanHit(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n+ iVarscanHit_v2_2_8_WithTag = VarscanHit_v2_2_8()\n+ iVarscanHit_v2_2_8_WithTag.setAttributesFromString(line)\n+ obsVarcanHit_WithTag = iVarscanHit_v2_2_8_WithTag.convertVarscanHit_v2_2_8_To_VarscanHit()\n+ \n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\n"\n+ expVarcanHit_WithTag = VarscanHit()\n+ expVarcanHit_WithTag.setAttributesFromString(line)\n+ \n+ self.assertEquals(expVarcanHit_WithTag, obsVarcanHit_WithTag)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_VarscanHit_v2_2_8_WithTag.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_VarscanHit_v2_2_8_WithTag.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,300 @@\n+import unittest\n+from commons.core.checker.CheckerException import CheckerException\n+from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag\n+from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n+\n+class Test_VarscanHit_v2_2_8_WithTag(unittest.TestCase):\n+\n+ def test_setAttributesFromString(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ \n+ expChrom = "chr1"\n+ expPosition = "1804"\n+ expRef = "T"\n+ expCns = "C"\n+ expReadsRef = "0"\n+ expReadsVar = "1"\n+ expVarFreq = "100%"\n+ expStrandsRef = "0"\n+ expStrandsVar = "1"\n+ expQualRef = "0"\n+ expQualVar = "53"\n+ expPValue = "0.98"\n+ expMapQualRef = "0"\n+ expMapQualVar = "1"\n+ expReadsRefPlus = "0"\n+ expReadsRefMinus = "0"\n+ expReadsVarPlus = "1"\n+ expReadsVarMinus = "0"\n+ expVar = "C"\n+ expTag = "EspeceA"\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ varscanHit.setAttributesFromString(line)\n+ \n+ obsChrom = varscanHit.getChrom()\n+ obsPosition = varscanHit.getPosition()\n+ obsRef = varscanHit.getRef()\n+ obsCns = varscanHit.getCns()\n+ obsReadsRef = varscanHit.getReadsRef()\n+ obsReadsVar = varscanHit.getReadsVar()\n+ obsVarFreq = varscanHit.getVarFreq()\n+ obsStrandsRef = varscanHit.getStrandsRef()\n+ obsStrandsVar = varscanHit.getStrandsVar()\n+ obsQualRef = varscanHit.getQualRef()\n+ obsQualVar = varscanHit.getQualVar()\n+ obsPValue = varscanHit.getPValue()\n+ obsMapQualRef = varscanHit.getMapQualRef()\n+ obsMapQualVar = varscanHit.getMapQualVar()\n+ obsReadsRefPlus = varscanHit.getReadsRefPlus()\n+ obsReadsRefMinus = varscanHit.getReadsRefMinus()\n+ obsReadsVarPlus = varscanHit.getReadsVarPlus()\n+ obsReadsVarMinus = varscanHit.getReadsVarMinus()\n+ obsVar = varscanHit.getVar()\n+ obsTag = varscanHit.getTag()\n+ \n+ self.assertEquals(expChrom, obsChrom)\n+ self.assertEquals(expPosition, obsPosition)\n+ self.assertEquals(expRef, obsRef)\n+ self.assertEquals(expCns, obsCns)\n+ self.assertEquals(expReadsRef, obsReadsRef)\n+ self.assertEquals(expReadsVar, obsReadsVar)\n+ self.assertEquals(expVarFreq, obsVarFreq)\n+ self.assertEquals(expStrandsRef, obsStrandsRef)\n+ self.assertEquals(expStrandsVar, obsStrandsVar)\n+ self.assertEquals(expQualRef, obsQualRef)\n+ self.assertEquals(expQualVar, obsQualVar)\n+ self.assertEquals(expPValue, obsPValue)\n+ self.assertEquals(expMapQualRef, obsMapQualRef)\n+ self.assertEquals(expMapQualVar, obsMapQualVar)\n+ self.assertEquals(expReadsRefPlus, obsReadsRefPlus)\n+ self.assertEquals(expReadsRefMinus, obsReadsRefMinus)\n+ self.assertEquals(expReadsVarPlus, obsReadsVarPlus)\n+ self.assertEquals(expReadsVarMinus, obsReadsVarMinus)\n+ self.assertEquals(expVar, obsVar)\n+ self.assertEquals(expTag, obsTag)\n+ \n+ def test_setAttributesFromString_empty_chrom(self):\n+ line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ expMessage = "The field Chrom is empty in varscan file in line "\n+ obsMessage = checkerExceptionInstance.msg\n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributesFromString_less_than_20_fields(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ try :\n+ varscanHit.setAttributesFromString(line)\n+ '..b'\'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Ref is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_cns(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field Cons is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test_setAttributes_empty_var(self):\n+ lResults = [\'chr1\', \'1000\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'\', \'EspeceA\']\n+ lineNumber = 1\n+ \n+ varscanHit = VarscanHit_v2_2_8_WithTag()\n+ checkerExceptionInstance = None\n+ try:\n+ varscanHit.setAttributes(lResults, lineNumber)\n+ except CheckerException, e:\n+ checkerExceptionInstance = e\n+ \n+ expMessage = "The field varAllele is empty in varscan file in line 1"\n+ obsMessage = checkerExceptionInstance.msg\n+ \n+ self.assertEquals(expMessage, obsMessage)\n+\n+ def test__eq__notEquals(self):\n+ varscanHit1 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceB\')\n+ \n+ self.assertFalse(varscanHit1 == varscanHit2)\n+\n+ def test__eq__Equals(self):\n+ varscanHit1 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit1.setPosition(\'34\')\n+ varscanHit1.setRef(\'A\')\n+ varscanHit1.setCns(\'T\')\n+ varscanHit1.setVar(\'T\')\n+ varscanHit1.setVar(\'EspeceA\')\n+ \n+ varscanHit2 = VarscanHit_v2_2_8_WithTag()\n+ varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n+ varscanHit2.setPosition(\'34\')\n+ varscanHit2.setRef(\'A\')\n+ varscanHit2.setCns(\'T\')\n+ varscanHit2.setVar(\'T\')\n+ varscanHit2.setVar(\'EspeceA\')\n+ \n+ self.assertTrue(varscanHit1 == varscanHit2)\n+ \n+ def test_convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag(self):\n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n+ iVarscanHit_v2_2_8_WithTag = VarscanHit_v2_2_8_WithTag()\n+ iVarscanHit_v2_2_8_WithTag.setAttributesFromString(line)\n+ obsVarcanHit_WithTag = iVarscanHit_v2_2_8_WithTag.convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag()\n+ \n+ line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n+ expVarcanHit_WithTag = VarscanHit_WithTag()\n+ expVarcanHit_WithTag.setAttributesFromString(line)\n+ \n+ self.assertEquals(expVarcanHit_WithTag, obsVarcanHit_WithTag)\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_WigParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_WigParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,31 @@
+from commons.core.utils.FileUtils import FileUtils
+from commons.core.parsing.WigParser import WigParser
+import glob
+import unittest
+import os
+
+class Test_WigParser(unittest.TestCase):
+
+    def tearDown(self):
+        for file in glob.glob("data/.chr*.index"):
+              os.remove(file)
+
+    def test_GetRange1(self):
+        self.parser = WigParser("data/test.wig")
+        outputRange = [0.0, 1.1, 1.2, 0.0, 1.4, 1.5, 0.0, 1.7, 0.0, 1.9, 0.0]
+        self.assertEqual(self.parser.getRange("chr1", 10, 20), outputRange)
+        outputRange = [0.0, 9.5, 9.6, 0.0]
+        self.assertEqual(self.parser.getRange("chrX", 4, 7), outputRange)
+
+    def test_GetRange2(self):
+        self.parser = WigParser("data/test1.wig")
+        outputRange = [0.0, 1.1, 1.2, 0.0, 1.4, 1.5, 0.0, 1.7, 0.0, 1.9, 0.0]
+        self.assertEqual(self.parser.getRange("chr2", 10, 20), outputRange)
+
+    def test_GetRange3(self):
+        self.parser = WigParser("data/test2.wig")
+        outputRange = [1.4, 1.5]
+        self.assertEqual(self.parser.getRange("chr3", 14, 15), outputRange)
+
+if __name__ == '__main__':
+        unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/Test_pilerTAToGrouperMap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/Test_pilerTAToGrouperMap.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,108 @@
+import unittest
+import os
+from commons.core.parsing.PilerTAToGrouperMap import PilerTAToGrouperMap
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_pilerTAToGrouperMap(unittest.TestCase):
+
+    def setUp(self):
+        self._inputGffFileName = "input.gff"
+        self._inputPYRFileName = "input_pyr.gff"
+        self._inputMOTIFFileName = "input_motif.gff"
+
+        self._obsOutFileName = "output.info"
+        self._obsGrouperFileName = "input_motif.gff.grp"
+        self._obsGrpMapFileName = "input_motif.gff.grp.map"
+
+        self._expOutFileName = "exp_output.info"
+        self._expGrouperFileName = "exp_motif.gff.grp"
+        self._expGrpMapFileName = "exp_motif.gff.grp.map"
+
+    def tearDown(self):
+        os.remove(self._inputGffFileName)
+        os.remove(self._inputPYRFileName)
+        os.remove(self._inputMOTIFFileName)
+
+        os.remove(self._obsOutFileName)
+        os.remove(self._obsGrouperFileName)
+        os.remove(self._obsGrpMapFileName)
+
+        os.remove(self._expOutFileName)
+        os.remove(self._expGrouperFileName)
+        os.remove(self._expGrpMapFileName)
+
+    def testRun(self):
+        self._writePilerTAFilePYR(self._inputPYRFileName)
+        self._writePilerTAFileMOTIF(self._inputMOTIFFileName)
+        self._writePilerTAGff(self._inputGffFileName)
+
+        self._writeExpOutputFile(self._expOutFileName)
+        self._writeExpGrouperFile(self._expGrouperFileName)
+        self._writeExpGrouperMapFile(self._expGrpMapFileName)
+
+        iPilerTAToGrouperMap = PilerTAToGrouperMap(self._inputGffFileName, self._inputPYRFileName,self._inputMOTIFFileName, self._obsOutFileName)
+        iPilerTAToGrouperMap.run()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutFileName, self._obsOutFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expGrouperFileName, self._obsGrouperFileName))
+        self.assertTrue(FileUtils.are2FilesIdentical(self._expGrpMapFileName, self._obsGrpMapFileName))
+
+
+    def _writePilerTAGff(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk21\tpiler\thit\t155146\t156020\t0\t+\t.\tTarget chunk21 150519 151392 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154790\t156023\t0\t+\t.\tTarget chunk21 150519 151751 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154078\t156023\t0\t+\t.\tTarget chunk21 150519 152463 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154256\t156023\t0\t+\t.\tTarget chunk21 150519 152285 ; Pile 510 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\thit\t154434\t156023\t0\t+\t.\tTarget chunk21 150519 152107 ; Pile 510 ; Pyramid 0\n")
+        f.close()
+
+    def _writePilerTAFilePYR(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk21\tpiler\tpyramid\t150519\t156023\t0\t.\t.\tPyramidIndex 0\n")
+        f.write("chunk21\tpiler\tpyramid\t150519\t156023\t0\t.\t.\tPyramidIndex 1\n")
+        f.write("chunk21\tpiler\tpyramid\t165574\t174424\t0\t.\t.\tPyramidIndex 2\n")
+        f.write("chunk21\tpiler\tpyramid\t166301\t174424\t0\t.\t.\tPyramidIndex 3\n")
+        f.write("chunk21\tpiler\tpyramid\t168967\t174424\t0\t.\t.\tPyramidIndex 4\n")
+        f.write("chunk21\tpiler\tpyramid\t170215\t174424\t0\t.\t.\tPyramidIndex 5\n")
+        f.close()
+
+    def _writePilerTAFileMOTIF(self, fileName):
+        f = open(fileName, "w")
+        f.write("chunk21\tpiler\ttandemmotif\t155843\t156020\t0\t.\t.\tTarget chunk21 151215 151392 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t151215\t151392\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t151574\t151751\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t152286\t152463\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.write("chunk21\tpiler\ttandemmotif\t152108\t152285\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
+        f.close()
+
+    def _writeExpOutputFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("Pile 510\tPyramid 0\n")
+        f.write("\tPyramid 1\n")
+        f.write("\tPyramid 2\n")
+        f.write("\tPyramid 3\n")
+        f.write("\tPyramid 4\n")
+        f.write("\tPyramid 5\n")
+        f.close()
+
+    def _writeExpGrouperFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("MbS1Gr0Cl510\tchunk21\tpiler\ttandemmotif\t155843\t156020\t0\t.\t.\tTarget chunk21 151215 151392 \tPile 510\tPyramid 0\n")
+        f.write("MbS2Gr0Cl510\tchunk21\tpiler\ttandemmotif\t151215\t151392\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.write("MbS3Gr0Cl510\tchunk21\tpiler\ttandemmotif\t151574\t151751\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.write("MbS4Gr0Cl510\tchunk21\tpiler\ttandemmotif\t152286\t152463\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.write("MbS5Gr0Cl510\tchunk21\tpiler\ttandemmotif\t152108\t152285\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
+        f.close()
+
+    def _writeExpGrouperMapFile(self, fileName):
+        f = open(fileName, "w")
+        f.write("MbS1Gr0Cl510\tchunk21\t155843\t156020\n")
+        f.write("MbS2Gr0Cl510\tchunk21\t151215\t151392\n")
+        f.write("MbS3Gr0Cl510\tchunk21\t151574\t151751\n")
+        f.write("MbS4Gr0Cl510\tchunk21\t152286\t152463\n")
+        f.write("MbS5Gr0Cl510\tchunk21\t152108\t152285\n")
+        f.close()
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/ExpPotDooblonsSubSNP.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/ExpPotDooblonsSubSNP.csv Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,832 @@\n+SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n+Batch_AU247387_SNP_30_10102;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;9\n+Batch_AU247387_SNP_30_IRELAND;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATTCCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGT;1;12;15;Sequence;;;7\n+Batch_AU247387_SNP_30_POLAND;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGT;1;12;20;Sequence;;;9\n+Batch_AU247387_SNP_30_VIGOR;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGT;1;12;23;Sequence;;;9\n+Batch_AU247387_SNP_34_10102;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;7\n+Batch_AU247387_SNP_34_IRELAND;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGACGAT;CCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCT;1;12;15;Sequence;;;10\n+Batch_AU247387_SNP_34_POLAND;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCA;1;12;20;Sequence;;;7\n+Batch_AU247387_SNP_34_VIGOR;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCA;1;12;23;Sequence;;;7\n+Batch_AU247387_SNP_35_10102;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGAAGATC;CRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;9\n+Batch_AU247387_SNP_35_IRELAND;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGACGATT;CAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCTG;1;12;15;Sequence;;;7\n+Batch_AU247387_SNP_35_POLAND;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGAAGATC;CGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCAT;1;12;20;Sequence;;;9\n+Batch_AU247387_SNP_35_VIGOR;A;SNP;35;NNNTATAGCTCCTAACA'..b'CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCC;GTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;6\n+Batch_AU247387_SNP_601_CARILLON;A;SNP;601;-----------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCRAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTA;GANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;9;Sequence;;;10\n+Batch_AU247387_SNP_601_SPAIN;A;SNP;601;-----------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTA;GAGAAGTACGACGACAAGGTTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;10\n+Batch_AU247387_SNP_601_VIGOR;A;SNP;601;TCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTA;GAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;7\n+Batch_AU247387_SNP_613_SPAIN;A;SNP;613;-----------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGA;GACAAGGTTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;7\n+Batch_AU247387_SNP_613_VIGOR;A;SNP;613;CAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGA;GACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;6\n+Batch_AU247387_SNP_620_SPAIN;A;SNP;620;----------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAG;TTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;6\n+Batch_AU247387_SNP_620_VIGOR;A;SNP;620;TTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAG;TCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;9\n+Batch_AU247387_SNP_622_SPAIN;A;SNP;622;--------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAGGT;GATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;10\n+Batch_AU247387_SNP_622_VIGOR;A;SNP;622;GCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGAT;GATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;7\n+Batch_AU247387_SNP_634_SPAIN;A;SNP;634;--CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAGGTTGATGCTTTTGG;GAGAAG;1;12;21;Sequence;;;10\n+Batch_AU247387_SNP_634_VIGOR;A;SNP;634;ATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGG;GAGAAG;1;12;23;Sequence;;;9\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/Wig/chr1.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/Wig/chr1.wig Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,9 @@
+fixedStep  chrom=chr1  start=11  step=1
+1.1
+1.2
+fixedStep  chrom=chr1  start=14  step=1
+1.4
+1.5
+variableStep chrom=chr1
+17  1.7
+19  1.9

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/realExpBatchLine.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpBatchLine.csv Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,41 @@
+IndividualNumber;Pos5;Pos3;BatchNumber;Sequence
+1;;;1;
+2;;;1;
+3;;;1;
+4;;;1;
+5;;;1;
+6;;;1;
+7;;;1;
+8;;;1;
+9;;;1;
+10;;;1;
+11;;;1;
+12;;;1;
+13;;;1;
+14;;;1;
+15;;;1;
+16;;;1;
+17;;;1;
+18;;;1;
+19;;;1;
+20;;;1;
+21;;;1;
+22;;;1;
+23;;;1;
+24;;;1;
+25;;;1;
+26;;;1;
+27;;;1;
+28;;;1;
+29;;;1;
+30;;;1;
+31;;;1;
+32;;;1;
+33;;;1;
+34;;;1;
+35;;;1;
+36;;;1;
+37;;;1;
+38;;;1;
+39;;;1;
+40;;;1;

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/realExpIndividual.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpIndividual.csv Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,41 @@
+IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id
+1;Treesnips_40-4-3;;;;;;;;;;Pinus pinaster;;;;;
+2;Treesnips_40-8-3;;;;;;;;;;Pinus pinaster;;;;;
+3;Treesnips_40-1-2;;;;;;;;;;Pinus pinaster;;;;;
+4;Treesnips_40-14-1;;;;;;;;;;Pinus pinaster;;;;;
+5;Treesnips_40-15-2;;;;;;;;;;Pinus pinaster;;;;;
+6;Treesnips_40-20-2;;;;;;;;;;Pinus pinaster;;;;;
+7;Treesnips_40-25-1;;;;;;;;;;Pinus pinaster;;;;;
+8;Treesnips_41-3-3;;;;;;;;;;Pinus pinaster;;;;;
+9;Treesnips_41-8-1;;;;;;;;;;Pinus pinaster;;;;;
+10;Treesnips_41-1-3;;;;;;;;;;Pinus pinaster;;;;;
+11;Treesnips_41-2-1;;;;;;;;;;Pinus pinaster;;;;;
+12;Treesnips_41-3-2;;;;;;;;;;Pinus pinaster;;;;;
+13;Treesnips_41-6-2;;;;;;;;;;Pinus pinaster;;;;;
+14;Treesnips_41-9-1;;;;;;;;;;Pinus pinaster;;;;;
+15;Treesnips_42-1-3;;;;;;;;;;Pinus pinaster;;;;;
+16;Treesnips_42-8-2;;;;;;;;;;Pinus pinaster;;;;;
+17;Treesnips_42-1-2;;;;;;;;;;Pinus pinaster;;;;;
+18;Treesnips_42-2-1;;;;;;;;;;Pinus pinaster;;;;;
+19;Treesnips_42-2-2;;;;;;;;;;Pinus pinaster;;;;;
+20;Treesnips_42-8-1;;;;;;;;;;Pinus pinaster;;;;;
+21;Treesnips_42-9-2;;;;;;;;;;Pinus pinaster;;;;;
+22;Treesnips_43-4-3;;;;;;;;;;Pinus pinaster;;;;;
+23;Treesnips_43-5-3;;;;;;;;;;Pinus pinaster;;;;;
+24;Treesnips_43-1-1;;;;;;;;;;Pinus pinaster;;;;;
+25;Treesnips_43-2-1;;;;;;;;;;Pinus pinaster;;;;;
+26;Treesnips_43-7-2;;;;;;;;;;Pinus pinaster;;;;;
+27;Treesnips_43-9-3;;;;;;;;;;Pinus pinaster;;;;;
+28;Treesnips_43-10-2;;;;;;;;;;Pinus pinaster;;;;;
+29;Treesnips_44-3-3;;;;;;;;;;Pinus pinaster;;;;;
+30;Treesnips_44-6-2;;;;;;;;;;Pinus pinaster;;;;;
+31;Treesnips_44-3-1;;;;;;;;;;Pinus pinaster;;;;;
+32;Treesnips_44-5-2;;;;;;;;;;Pinus pinaster;;;;;
+33;Treesnips_44-7-1;;;;;;;;;;Pinus pinaster;;;;;
+34;Treesnips_44-10-2;;;;;;;;;;Pinus pinaster;;;;;
+35;Treesnips_45-5-3;;;;;;;;;;Pinus pinaster;;;;;
+36;Treesnips_45-8-3;;;;;;;;;;Pinus pinaster;;;;;
+37;Treesnips_45-1-1;;;;;;;;;;Pinus pinaster;;;;;
+38;Treesnips_45-4-1;;;;;;;;;;Pinus pinaster;;;;;
+39;Treesnips_45-7-1;;;;;;;;;;Pinus pinaster;;;;;
+40;Treesnips_45-9-1;;;;;;;;;;Pinus pinaster;;;;;

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/realExpSequences.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpSequences.fsa Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,2 @@
+>PpHDZ31_ref
+GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTCAGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTGCAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGACTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATTCTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATTTATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTTAGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCTTGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGCACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCTATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATACCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCTTGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCTAGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTATTCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCCTGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGCAGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCACTGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCACAGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAGGTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATGATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGATTTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTATCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAACTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAGTTATTTAAAAAAAATGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGGGAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTGTAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGATCAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTTATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGGCACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTTTGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTATATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATGTAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCTGTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTATACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAACAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGAACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGGGTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGCATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGCTGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGATCTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAACTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCATTCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATGTGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGAATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTTGGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTCGCAAAAGTA

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/realExpSubSNP.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/realExpSubSNP.csv Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,799 @@\n+SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-4-3;A;SNP;136;NNNNNNNNNNNNNNNNNNGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;1;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-8-3;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;2;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-1-2;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;3;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-14-1;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;4;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-15-2;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;5;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-20-2;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;6;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-25-1;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;7;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_41-3-3;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;8;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_41-8-1;A;SNP;136;GCTAG'..b'NRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-5-2;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGNNNNNNNNNNN;1;1;32;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-7-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;33;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-10-2;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTNNNNNNNNNN;1;1;34;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-5-3;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAANNNNNNNNNNNNNN;1;1;35;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-8-3;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGNNNNNNNNNNNN;1;1;36;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-4-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;38;Sequence;;;3\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-7-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTCGCAAAAGTA;1;1;39;Sequence;;;1\n+INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-9-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;40;Sequence;;;1\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/real_multifasta_input.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/real_multifasta_input.fasta Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,2419 @@\n+>PpHDZ31_ref\n+GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATAT\n+TGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n+AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n+GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n+GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n+GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n+CGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTC\n+AGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTG\n+CAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGA\n+CTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATT\n+CTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATT\n+TATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTT\n+AGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCT\n+TGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGC\n+ACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCT\n+ATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATA\n+CCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCT\n+TGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCT\n+AGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTAT\n+TCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCC\n+TGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGC\n+AGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCAC\n+TGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCAC\n+AGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAG\n+GTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATG\n+ATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGAT\n+TTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTA\n+TCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAA\n+CTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAG\n+TTATTTAAAAAAAA-TGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGG\n+GAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTG\n+TAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGAT\n+CAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTT\n+ATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGG\n+CACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTT\n+TGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTA\n+TATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATG\n+TAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCT\n+GTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTAT\n+ACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAA\n+CAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGA\n+ACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGG\n+GTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGC\n+ATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGC\n+TGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGAT\n+CTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAA\n+CTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCAT\n+TCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATG\n+TGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGA\n+ATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTT\n+GGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCA\n+TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n+GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n+AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n+ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n+GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n+AATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTG\n+GACAAAATTCTGGATGAAAATGGTCGCAAAAGTA\n+>Treesnips_40-4-3\n+NNNNNNNNNNNNNNNNNNGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATAT\n+TGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n+AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n+GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n+GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n+GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n+CGGT'..b'A\n+TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n+GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n+AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n+ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n+GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n+AATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTG\n+GACAAAATTCTGGATGAAAATGGTCGCAAAAGTA\n+>Treesnips_45-9-1\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n+AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n+GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n+GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n+GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n+CGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTC\n+AGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTG\n+CAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGA\n+CTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATT\n+CTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATT\n+TATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTT\n+AGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCT\n+TGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGC\n+ACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCT\n+ATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATA\n+CCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCT\n+TGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCT\n+AGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTAT\n+TCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCC\n+TGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGC\n+AGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCAC\n+TGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCAC\n+AGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAG\n+GTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATG\n+ATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGAT\n+TTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTA\n+TCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAA\n+CTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAG\n+TTATTTAAAAAAAA-TGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGG\n+GAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTG\n+TAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGAT\n+CAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTT\n+ATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGG\n+CACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTT\n+TGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTA\n+TATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATG\n+TAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCT\n+GTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTAT\n+ACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAA\n+CAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGA\n+ACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGG\n+GTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGC\n+ATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGC\n+TGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGAT\n+CTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAA\n+CTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCAT\n+TCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATG\n+TGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGA\n+ATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTT\n+GGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCA\n+TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n+GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n+AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n+ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n+GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n+AATCAAGCAGGGTTGGACATGCTGGAAACGACANNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/test.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/test.wig Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,12 @@
+fixedStep  chrom=chr1  start=11  step=1
+1.1
+1.2
+fixedStep  chrom=chr1  start=14  step=1
+1.4
+1.5
+variableStep chrom=chr1
+17  1.7
+19  1.9
+variableStep chrom=chrX
+5  9.5
+6  9.6

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/test1.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/test1.wig Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,11 @@
+fixedStep  chrom=chr2  start=9  step=1
+0
+0
+1.1
+1.2
+fixedStep  chrom=chr2  start=14  step=1
+1.4
+1.5
+variableStep chrom=chr2
+17  1.7
+19  1.9

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/test2.wig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/test2.wig Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,3 @@
+fixedStep chrom=chr3 start=14 step=1
+1.4
+1.5

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/testBedParser1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testBedParser1.bed Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,2 @@
+track name=reads description="Reads" useScore=0 visibility=full offset=0
+arm_X 1000 3000 test1.1 1000 + 1000 3000 0 2 100,1000, 0,1000,

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/testCoordsParser.coords
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testCoordsParser.coords Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,19 @@
+/home/urgi/genome_alignment/data/banks/arabidopsis/lyrata/ara_lyra_sca1.fa /home/urgi/genome_alignment/data/banks/arabidopsis/thaliana/ara_thal_chr1.fa
+NUCMER
+
+    [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  | [TAGS]
+=====================================================================================
+       1     6251  |   421251   415029  |     6251     6223  |    89.03  | scaffold_1 gi|240254421:1-30427671
+    9127    11947  |   414945   412123  |     2821     2823  |    90.45  | scaffold_1 gi|240254421:1-30427671
+   12201    12953  |   411933   411173  |      753      761  |    82.56  | scaffold_1 gi|240254421:1-30427671
+   13086    20401  |   411034   403760  |     7316     7275  |    88.56  | scaffold_1 gi|240254421:1-30427671
+   20482    20686  |   403573   403369  |      205      205  |    94.66  | scaffold_1 gi|240254421:1-30427671
+   32288    32623  |   402639   402280  |      336      360  |    76.52  | scaffold_1 gi|240254421:1-30427671
+   32936    33572  |   401974   401308  |      637      667  |    79.80  | scaffold_1 gi|240254421:1-30427671
+   33748    35013  |   401256   400080  |     1266     1177  |    82.77  | scaffold_1 gi|240254421:1-30427671
+   35456    44084  |   399895   391566  |     8629     8330  |    86.23  | scaffold_1 gi|240254421:1-30427671
+   44401    45265  |   391569   390737  |      865      833  |    90.40  | scaffold_1 gi|240254421:1-30427671
+   45374    46243  |   390633   389755  |      870      879  |    71.70  | scaffold_1 gi|240254421:1-30427671
+   46366    48958  |   389607   387128  |     2593     2480  |    82.32  | scaffold_1 gi|240254421:1-30427671
+   55079    55160  |   369603   369683  |       82       81  |    93.90  | scaffold_1 gi|240254421:1-30427671
+   55407    56537  |   369910   371016  |     1131     1107  |    81.69  | scaffold_1 gi|240254421:1-30427671
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/testCoordsParser_showcoord.coords
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testCoordsParser_showcoord.coords Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,5 @@
+/home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A1_scaffolds.fa /home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A2_scaffolds.fa
+NUCMER
+
+[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [LEN R] [LEN Q] [COV R] [COV Q] [FRM] [TAGS]
+296 2292 1 2001 1997 2001 98.30 175930 60273 1.14 3.32 1 1 mivi_sl_A1_scaffold00001 mivi_sl_A2_scaffold00003

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/testCoordsParser_showcoord_promer.coords
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testCoordsParser_showcoord_promer.coords Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,5 @@
+/home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A1_scaffolds.fa /home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A2_scaffolds.fa
+PROMER
+
+[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [% SIM] [% STP] [LEN R] [LEN Q] [COV R] [COV Q] [FRM] [TAGS]
+1229 291 939 1 939 939 94.25 97.12 3.04 175930 60273 0.53 1.56 -3 -1 mivi_sl_A1_scaffold00001 mivi_sl_A2_scaffold00003

diff -r ea3082881bf8 -r 769e306b7933 commons/core/parsing/test/data/testGffParser1.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/test/data/testGffParser1.gff3 Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,7 @@
+arm_X test test_transcript 1000 2000 1 + . ID=id1-1;Name=test1;field=value1
+arm_X test test_exon 1000 2000 1 + . ID=id1-1-exon1;Name=test1-exon1;Parent=id1-1
+arm_X test test_transcript 10000 20000 1 - . ID=id2-1;Name=test2;field=value2
+arm_X test test_exon 10000 10100 1 - . ID=id2-1-exon1;Name=test2-exon1;Parent=id2-1
+arm_X test test_exon 10500 20000 1 - . ID=id2-1-exon2;Name=test2-exon2;Parent=id2-1
+arm_X test test_transcript 1000 2000 1 + . ID=test1.1-1;Name=test1.1
+arm_X test test_exon 1000 2000 1 + . ID=test1.1-1-exon1;Name=test1.1-exon1;Parent=test1.1-1

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/AlignedBioseqDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/AlignedBioseqDB.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,396 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import sys\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Range import Range\n+from commons.core.stat.Stat import Stat\n+from math import log\n+\n+\n+## Multiple Sequence Alignment Representation \n+# \n+#\n+class AlignedBioseqDB( BioseqDB ):\n+ \n+ def __init__( self, name="" ):\n+ BioseqDB.__init__( self, name )\n+ seqLength = self.getLength()\n+ if self.getSize() > 1:\n+ for bs in self.db[1:]:\n+ if bs.getLength() != seqLength:\n+ print "ERROR: aligned sequences have different length"\n+ \n+ \n+ ## Get length of the alignment\n+ # \n+ # @return length\n+ # @warning name before migration was \'length\'\n+ #\n+ def getLength( self ):\n+ length = 0\n+ if self.db != []:\n+ length = self.db[0].getLength()\n+ return length\n+ \n+ \n+ ## Get the true length of a given sequence (without gaps)\n+ #\n+ # @param header string header of the sequence to analyze\n+ # @return length integer\n+ # @warning name before migration was \'true_length\'\n+ #\n+ def getSeqLengthWithoutGaps( self, header ):\n+ bs = self.fetch( header )\n+ count = 0\n+ for pos in xrange(0,len(bs.sequence)):\n+ if bs.sequence[pos] != "-":\n+ count += 1\n+ return count\n+ \n+ \n+ ## Record the occurrences of symbols (A, T, G, C, N, -, ...) at each site\n+ #\n+ # @return: list of dico whose keys are symbols and values are their occurrences\n+ #\n+ def getListOccPerSite( self ):\n+ lOccPerSite = [] # list of dictionaries, one per position on the sequence\n+ n = 0 # nb of sequences parsed from the input file\n+ firstSeq = True\n+\n+ # for each sequence in the bank\n+ for bs in self.db:\n+ if bs.sequence == None:\n+ break\n+ n += 1\n+\n+ # if it is the first to be parsed, create a dico at each site\n+ if firstSeq:\n+ for i in xrange(0,len(bs.sequence)):\n+ lOccPerSite.append( {} )\n+ firstSeq = False\n+\n+ # for each site, add its nucleotide\n+ for i in xrange(0,len(bs.sequence)):\n+ nuc = '..b'urn 0.0\n+ else:\n+ freq = nbOcc / float(nbNt)\n+ return - freq * log(freq) / log(2) \n+ \n+ \n+ ## Save the multiple alignment as a matrix with \'0\' if gap, \'1\' otherwise\n+ #\n+ def saveAsBinaryMatrix( self, outFile ):\n+ outFileHandler = open( outFile, "w" )\n+ for bs in self.db:\n+ string = "%s" % ( bs.header )\n+ for nt in bs.sequence:\n+ if nt != "-":\n+ string += "\\t%i" % ( 1 )\n+ else:\n+ string += "\\t%i" % ( 0 )\n+ outFileHandler.write( "%s\\n" % ( string ) )\n+ outFileHandler.close()\n+ \n+ \n+ ## Return a list of Align instances corresponding to the aligned regions (without gaps)\n+ #\n+ # @param query string header of the sequence considered as query\n+ # @param subject string header of the sequence considered as subject\n+ #\n+ def getAlignList( self, query, subject ):\n+ lAligns = []\n+ alignQ = self.fetch( query ).sequence\n+ alignS = self.fetch( subject ).sequence\n+ createNewAlign = True\n+ indexAlign = 0\n+ indexQ = 0\n+ indexS = 0\n+ while indexAlign < len(alignQ):\n+ if alignQ[ indexAlign ] != "-" and alignS[ indexAlign ] != "-":\n+ indexQ += 1\n+ indexS += 1\n+ if createNewAlign:\n+ iAlign = Align( Range( query, indexQ, indexQ ),\n+ Range( subject, indexS, indexS ),\n+ 0,\n+ int( alignQ[ indexAlign ] == alignS[ indexAlign ] ),\n+ int( alignQ[ indexAlign ] == alignS[ indexAlign ] ) )\n+ lAligns.append( iAlign )\n+ createNewAlign = False\n+ else:\n+ lAligns[-1].range_query.end += 1\n+ lAligns[-1].range_subject.end += 1\n+ lAligns[-1].score += int( alignQ[ indexAlign ] == alignS[ indexAlign ] )\n+ lAligns[-1].identity += int( alignQ[ indexAlign ] == alignS[ indexAlign ] )\n+ else:\n+ if not createNewAlign:\n+ lAligns[-1].identity = 100 * lAligns[-1].identity / lAligns[-1].getLengthOnQuery()\n+ createNewAlign = True\n+ if alignQ[ indexAlign ] != "-":\n+ indexQ += 1\n+ elif alignS[ indexAlign ] != "-":\n+ indexS += 1\n+ indexAlign += 1\n+ if not createNewAlign:\n+ lAligns[-1].identity = 100 * lAligns[-1].identity / lAligns[-1].getLengthOnQuery()\n+ return lAligns\n+ \n+ \n+ def removeGaps(self):\n+ for iBs in self.db:\n+ iBs.removeSymbol( "-" )\n+ \n+ ## Compute mean per cent identity for MSA. \n+ # First sequence in MSA is considered as reference sequence. \n+ #\n+ # \n+ def computeMeanPcentIdentity(self):\n+ seqRef = self.db[0]\n+ sumPcentIdentity = 0\n+\n+ for seq in self.db[1:]:\n+ pcentIdentity = self._computePcentIdentityBetweenSeqRefAndCurrentSeq(seqRef, seq) \n+ sumPcentIdentity = sumPcentIdentity + pcentIdentity\n+ \n+ nbSeq = len(self.db[1:])\n+ meanPcentIdentity = round (sumPcentIdentity/nbSeq)\n+ \n+ return meanPcentIdentity\n+\n+ def _computePcentIdentityBetweenSeqRefAndCurrentSeq(self, seqRef, seq):\n+ indexOnSeqRef = 0\n+ sumIdentity = 0\n+ for nuclSeq in seq.sequence:\n+ nuclRef = seqRef.sequence[indexOnSeqRef]\n+ \n+ if nuclRef != "-" and nuclRef == nuclSeq:\n+ sumIdentity = sumIdentity + 1\n+ indexOnSeqRef = indexOnSeqRef + 1 \n+ \n+ return float(sumIdentity) / float(seqRef.getLength()) * 100 \n+\n+ \n+\n+\n+ \n+ \n+ \n+ \n+ \n+ \n+ \n+ \n+ \n+ \n+\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/Bioseq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/Bioseq.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,686 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import sys\n+import string\n+import re\n+import random\n+import cStringIO\n+from commons.core.coord.Map import Map\n+\n+DNA_ALPHABET_WITH_N = set( [\'A\',\'T\',\'G\',\'C\',\'N\'] )\n+IUPAC = set([\'A\',\'T\',\'G\',\'C\',\'U\',\'R\',\'Y\',\'M\',\'K\',\'W\',\'S\',\'B\',\'D\',\'H\',\'V\',\'N\'])\n+\n+\n+## Record a sequence with its header\n+#\n+class Bioseq( object ):\n+ \n+ header = ""\n+ sequence = ""\n+ \n+ ## constructor\n+ #\n+ # @param name the header of sequence\n+ # @param seq sequence (DNA, RNA, protein)\n+ #\n+ def __init__( self, name="", seq="" ):\n+ self.header = name\n+ self.sequence = seq\n+ \n+ \n+ ## Equal operator\n+ # \n+ def __eq__( self, o ):\n+ if self.header==o.header and self.sequence==o.sequence:\n+ return True\n+ return False\n+ \n+ \n+ ## overload __repr__\n+ #\n+ def __repr__( self ):\n+ return "%s;%s" % ( self.header, self.sequence )\n+ \n+ \n+ ## set attribute header\n+ #\n+ # @param header a string\n+ #\n+ def setHeader( self, header ):\n+ self.header = header\n+ \n+ \n+ ## get attribute header\n+ #\n+ # @return header\n+ def getHeader(self):\n+ return self.header\n+ \n+ \n+ ## set attribute sequence\n+ #\n+ # @param sequence a string\n+ #\n+ def setSequence( self, sequence ):\n+ self.sequence = sequence\n+ \n+ \n+ def getSequence(self):\n+ return self.sequence\n+ \n+ ## reset\n+ #\n+ def reset( self ):\n+ self.setHeader( "" )\n+ self.setSequence( "" )\n+ \n+ \n+ ## Test if bioseq is empty\n+ #\n+ def isEmpty( self ):\n+ return self.header == "" and self.sequence == ""\n+ \n+ \n+ ## Reverse the sequence\n+ #\n+ def reverse( self ):\n+ tmp = self.sequence\n+ self.sequence = tmp[::-1]\n+ \n+ \n+ ## Turn the sequence into its complement\n+ # Force upper case letters\n+ # @warning: old name in pyRepet.Bioseq realComplement\n+ #\n+ def complement( self ):\n+ complement = ""\n+ self.upCase()\n+ for i in xrange(0,len(self.sequence),1):\n+ if self.sequence[i] == "A":\n+ complement += "T"\n+ elif self.sequence[i] == "T":\n+ complement += "A"\n+ elif self.sequence[i] == "C":\n+ complement += "G"\n+ '..b'etLMapWhithoutGap( self ):\n+ lMaps = []\n+ countSite = 1\n+ countSubseq = 1\n+ inGap = False\n+ startMap = -1\n+ endMap = -1\n+\n+ # initialize with the first site\n+ if self.sequence[0] == "-":\n+ inGap = True\n+ else:\n+ startMap = countSite\n+\n+ # for each remaining site\n+ for site in self.sequence[1:]:\n+ countSite += 1\n+\n+ # if it is a gap\n+ if site == "-":\n+\n+ # if this is the beginning of a gap, record the previous subsequence\n+ if inGap == False:\n+ inGap = True\n+ endMap = countSite - 1\n+ lMaps.append( Map( "%s_subSeq%i" % (self.header,countSubseq), self.header, startMap, endMap ) )\n+ countSubseq += 1\n+\n+ # if it is NOT a gap\n+ if site != "-":\n+\n+ # if it is the end of a gap, begin the next subsequence\n+ if inGap == True:\n+ inGap = False\n+ startMap = countSite\n+\n+ # if it is the last site\n+ if countSite == self.getLength():\n+ endMap = countSite\n+ lMaps.append( Map( "%s_subSeq%i" % (self.header,countSubseq), self.header, startMap, endMap ) )\n+\n+ return lMaps\n+ \n+ \n+ ## get the percentage of GC\n+ #\n+ # @return a percentage\n+ # \n+ def getGCpercentage( self ):\n+ tmpSeq = self.getSeqWithOnlyATGCN()\n+ nbGC = tmpSeq.count( "G" ) + tmpSeq.count( "C" )\n+ return 100 * nbGC / float( self.getLength() )\n+ \n+ ## get the percentage of GC of a sequence without counting N in sequence length\n+ #\n+ # @return a percentage\n+ # \n+ def getGCpercentageInSequenceWithoutCountNInLength(self):\n+ tmpSeq = self.getSeqWithOnlyATGCN()\n+ nbGC = tmpSeq.count( "G" ) + tmpSeq.count( "C" )\n+ return 100 * nbGC / float( self.getLength() - self.countNt("N") )\n+ \n+ ## get the 5 prime subsequence of a given length at the given position \n+ #\n+ # @param position integer\n+ # @param flankLength integer subsequence length\n+ # @return a sequence string\n+ # \n+ def get5PrimeFlank(self, position, flankLength):\n+ if(position == 1):\n+ return ""\n+ else:\n+ startOfFlank = 1\n+ endOfFlank = position -1\n+ \n+ if((position - flankLength) > 0):\n+ startOfFlank = position - flankLength\n+ else:\n+ startOfFlank = 1\n+ \n+ return self.subseq(startOfFlank, endOfFlank).sequence\n+ \n+ \n+ ## get the 3 prime subsequence of a given length at the given position \n+ # In the case of indels, the polymorphism length can be specified\n+ #\n+ # @param position integer\n+ # @param flankLength integer subsequence length\n+ # @param polymLength integer polymorphism length\n+ # @return a sequence string\n+ # \n+ def get3PrimeFlank(self, position, flankLength, polymLength = 1):\n+ if((position + polymLength) > len( self.sequence )):\n+ return ""\n+ else:\n+ startOfFlank = position + polymLength\n+ \n+ if((position+polymLength+flankLength) > len( self.sequence )):\n+ endOfFlank = len( self.sequence )\n+ else:\n+ endOfFlank = position+polymLength+flankLength-1\n+ \n+ return self.subseq(startOfFlank, endOfFlank).sequence\n+ \n+ \n+ def _createWordList(self,size,l=[\'A\',\'T\',\'G\',\'C\']):\n+ if size == 1 :\n+ return l\n+ else:\n+ l2 = []\n+ for i in l:\n+ for j in [\'A\',\'T\',\'G\',\'C\']:\n+ l2.append( i + j )\n+ return self._createWordList(size-1,l2)\n+ \n+ \n+ def removeSymbol( self, symbol ):\n+ tmp = self.sequence.replace( symbol, "" )\n+ self.sequence = tmp\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/Bioseq.pyc

Binary file commons/core/seq/Bioseq.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/BioseqDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/BioseqDB.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,461 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import sys\n+import re\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.stat.Stat import Stat\n+\n+\n+## Handle a collection of a Bioseq (header-sequence) \n+#\n+class BioseqDB( object ):\n+ \n+ def __init__( self, name="" ):\n+ self.idx = {}\n+ self.idx_renamed = {}\n+ self.db = []\n+ self.name = name\n+ if name != "":\n+ faFile = open( name )\n+ self.read( faFile )\n+ faFile.close()\n+ self.mean_seq_lgth = None\n+ self.stat = Stat()\n+ \n+ \n+ ## Equal operator\n+ #\n+ def __eq__( self, o ):\n+ selfSize = self.getSize()\n+ if selfSize != o.getSize():\n+ return False\n+ nbEqualInstances = 0\n+ for i in self.db:\n+ atLeastOneIsEqual = False\n+ for j in o.db:\n+ if i == j:\n+ atLeastOneIsEqual = True\n+ continue\n+ if atLeastOneIsEqual:\n+ nbEqualInstances += 1\n+ if nbEqualInstances == selfSize:\n+ return True\n+ return False\n+ \n+ \n+ ## Change the name of the BioseqDB\n+ #\n+ # @param name the BioseqDB name\n+ # \n+ def setName(self, name):\n+ self.name = name\n+ \n+ \n+ ## Record each sequence of the input file as a list of Bioseq instances\n+ #\n+ # @param faFileHandler handler of a fasta file\n+ #\n+ def read( self, faFileHandler ):\n+ while True:\n+ seq = Bioseq()\n+ seq.read( faFileHandler )\n+ if seq.sequence == None:\n+ break\n+ self.add( seq )\n+ \n+ \n+ ## Write all Bioseq of BioseqDB in a formatted fasta file (60 character long)\n+ #\n+ # @param faFileHandler file handler of a fasta file\n+ #\n+ def write( self, faFileHandler ):\n+ for bs in self.db:\n+ bs.writeABioseqInAFastaFile( faFileHandler )\n+ \n+ \n+ ## Write all Bioseq of BioseqDB in a formatted fasta file (60 character long)\n+ #\n+ # @param outFaFileName file name of fasta file\n+ # @param mode \'write\' or \'append\'\n+ #\n+ def save( self, outFaFileName, mode="w" ):\n+ outFaFile = open( outFaFileName, mode )\n+ self.write( outFaFile )\n+ outFaFile.close()\n+ \n+ \n+ ## Read a formatted fasta file and l'..b'on of wished Bioseq header\n+ # @param inFileName name of fasta file in which we want extract the BioseqDB\n+ #\n+ def extractPatternOfFile(self, pattern, inFileName):\n+ if pattern=="" :\n+ return\n+ srch=re.compile(pattern)\n+ file_db=open(inFileName)\n+ numseq=0\n+ nbsave=0\n+ while 1:\n+ seq=Bioseq()\n+ seq.read(file_db)\n+ if seq.sequence==None:\n+ break\n+ numseq+=1\n+ m=srch.search(seq.header)\n+ if m:\n+ self.add(seq)\n+ nbsave+=1\n+ file_db.close()\n+ \n+ \n+ ## Extract a sub BioseqDB from the instance with all Bioseq header containing the specified pattern\n+ #\n+ # @param pattern regular expression of wished Bioseq header\n+ #\n+ # @return a BioseqDB\n+ #\n+ def getByPattern(self,pattern):\n+ if pattern=="" :\n+ return\n+ iBioseqDB=BioseqDB()\n+ srch=re.compile(pattern)\n+ for iBioseq in self.db:\n+ if srch.search(iBioseq.header):\n+ iBioseqDB.add(iBioseq)\n+ return iBioseqDB\n+ \n+ \n+ ## Extract a sub BioseqDB from the instance with all Bioseq header not containing the specified pattern\n+ #\n+ # @param pattern regular expression of not wished Bioseq header\n+ #\n+ # @return a BioseqDB\n+ #\n+ def getDiffFromPattern(self,pattern):\n+ if pattern=="" :\n+ return\n+ iBioseqDB=BioseqDB()\n+ srch=re.compile(pattern)\n+ for iBioseq in self.db:\n+ if not srch.search(iBioseq.header):\n+ iBioseqDB.add(iBioseq)\n+ return iBioseqDB\n+ \n+ #TODO: to run several times to remove all concerned sequences when big data. How to fix it ?\n+ ## Remove from the instance all Bioseq which header contains the specified pattern\n+ #\n+ # @param pattern regular expression of not wished Bioseq header\n+ #\n+ def rmByPattern(self,pattern):\n+ if pattern=="" :\n+ return\n+ srch=re.compile(pattern)\n+ for seq in self.db:\n+ if srch.search(seq.header):\n+ self.db.remove(seq) \n+ \n+ \n+ ## Copy a part from another BioseqDB in the BioseqDB if Bioseq have got header containing the specified pattern\n+ # \n+ # @warning this method is called extractPattern in pyRepet.seq.BioseqDB\n+ #\n+ # @param pattern regular expression of wished Bioseq header\n+ # @param sourceBioseqDB the BioseqDB from which we want extract Bioseq\n+ #\n+ def addBioseqFromABioseqDBIfHeaderContainPattern(self, pattern, sourceBioseqDB):\n+ if pattern=="" :\n+ return\n+ srch=re.compile(pattern)\n+ for seq in sourceBioseqDB.db:\n+ m=srch.search(seq.header)\n+ if m:\n+ self.add(seq) \n+ \n+ \n+ ## Up-case the sequence characters in all sequences\n+ # \n+ def upCase( self ):\n+ for bs in self.db:\n+ bs.upCase()\n+ \n+ \n+ ## Split each gapped Bioseq in a list and store all in a dictionary\n+ #\n+ # @return a dict, keys are bioseq headers, values are list of Map instances \n+ #\n+ def getDictOfLMapsWithoutGaps( self ):\n+ dSeq2Maps = {}\n+\n+ for bs in self.db:\n+ dSeq2Maps[ bs.header ] = bs.getLMapWhithoutGap()\n+\n+ return dSeq2Maps\n+\n+ ## Give the list of the sequence length in the bank\n+ #\n+ # @return an list\n+ #\n+ def getListOfSequencesLength( self ):\n+ lLength = []\n+ for iBioseq in self.db:\n+ lLength.append(iBioseq.getLength())\n+\n+ return lLength\n+ \n+ ## Return sequence length for a list of sequence header\n+ #\n+ def getSeqLengthByListOfName( self, lHeaderName ):\n+ lseqLength=[]\n+ for headerName in lHeaderName: \n+ lseqLength.append(self.getSeqLength( headerName ))\n+ return lseqLength\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/BioseqUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/BioseqUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,296 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import math\n+import re\n+from commons.core.seq.Bioseq import Bioseq\n+\n+## Static methods for sequences manipulation\n+#\n+class BioseqUtils(object):\n+ \n+ ## Translate a nucleotide sequence\n+ #\n+ # @param bioSeqInstanceToTranslate a bioseq instance to translate\n+ # @param phase a integer : 1 (default), 2 or 3\n+ # \n+ def translateSequence(bioSeqInstanceToTranslate, phase=1):\n+ pep = ""\n+ #length = math.floor((len(self.sequence)-phase-1)/3)*3\n+ length = int( math.floor( ( len(bioSeqInstanceToTranslate.sequence )-( phase-1 ) )/3 )*3 )\n+ #We need capital letters !\n+ bioSeqInstanceToTranslate.upCase() \n+ sequence = bioSeqInstanceToTranslate.sequence \n+ for i in xrange(phase-1,length,3):\n+ if (sequence[i:i+3] == "TTT" or sequence[i:i+3] == "TTC"):\n+ pep = pep + "F"\n+ elif ( sequence[i:i+3] == "TTA" or sequence[i:i+3] == "TTG" ):\n+ pep = pep + "L"\n+ elif ( sequence[i:i+2] == "CT" ):\n+ pep = pep + "L"\n+ elif ( sequence[i:i+3] == "ATT" or sequence[i:i+3] == "ATC" or sequence[i:i+3] == "ATA" ):\n+ pep = pep + "I"\n+ elif ( sequence[i:i+3] == "ATG" ):\n+ pep = pep + "M"\n+ elif ( sequence[i:i+2] == "GT" ):\n+ pep = pep + "V"\n+ elif ( sequence[i:i+2] == "TC" ) :\n+ pep = pep + "S"\n+ elif ( sequence[i:i+2] == "CC" ) :\n+ pep = pep + "P"\n+ elif ( sequence[i:i+2] == "AC" ) :\n+ pep = pep + "T"\n+ elif ( sequence[i:i+2] == "GC" ) :\n+ pep = pep + "A"\n+ elif ( sequence[i:i+3] == "TAT" or sequence[i:i+3] == "TAC" ) :\n+ pep = pep + "Y"\n+ elif ( sequence[i:i+3] == "TAA" or sequence[i:i+3] == "TAG" ) :\n+ pep = pep + "*"\n+ elif ( sequence[i:i+3] == "CAT" or sequence[i:i+3] == "CAC" ) :\n+ pep = pep + "H"\n+ elif ( sequence[i:i+3] == "CAA" or sequence[i:i+3] == "CAG" ) :\n+ pep = pep + "Q"\n+ elif ( sequence[i:i+3] == "AAT" or sequence[i:i+3] == "AAC" ) :\n+ pep = pep + "N"\n+ elif ( sequence[i:i+3] == "AAA" or sequence[i:i+3] == "AAG" ) :\n+ pep = pep + "K"\n+ elif ( se'..b'\n+ writeBioseqListIntoFastaFile = staticmethod( writeBioseqListIntoFastaFile )\n+ \n+ ## read in a fasta file and create a list of bioseq instances\n+ #\n+ # @param fileName string\n+ # @return a list of bioseq\n+ #\n+ def extractBioseqListFromFastaFile( fileName ):\n+ file = open( fileName )\n+ lBioseq = []\n+ currentHeader = ""\n+ while currentHeader != None:\n+ bioseq = Bioseq()\n+ bioseq.read(file)\n+ currentHeader = bioseq.header\n+ if currentHeader != None:\n+ lBioseq.append(bioseq)\n+ return lBioseq\n+ \n+ extractBioseqListFromFastaFile = staticmethod( extractBioseqListFromFastaFile )\n+ \n+ ## Give the length of a sequence search by name\n+ #\n+ # @param lBioseq a list of bioseq instances\n+ # @param seqName string\n+ # @return an integer\n+ #\n+ def getSeqLengthWithSeqName( lBioseq, seqName ):\n+ length = 0\n+ for bioseq in lBioseq:\n+ if bioseq.header == seqName:\n+ length = bioseq.getLength()\n+ break \n+ return length\n+\n+ getSeqLengthWithSeqName = staticmethod( getSeqLengthWithSeqName )\n+\n+ def _translateInPositiveFrames( bioSeqInstanceToTranslate ):\n+ seq1 = bioSeqInstanceToTranslate.copyBioseqInstance()\n+ BioseqUtils.setFrameInfoOnHeader(seq1, 1)\n+ BioseqUtils.translateSequence(seq1, 1)\n+ seq2 = bioSeqInstanceToTranslate.copyBioseqInstance()\n+ BioseqUtils.setFrameInfoOnHeader(seq2, 2)\n+ BioseqUtils.translateSequence(seq2, 2)\n+ seq3 = bioSeqInstanceToTranslate.copyBioseqInstance()\n+ BioseqUtils.setFrameInfoOnHeader(seq3, 3)\n+ BioseqUtils.translateSequence(seq3, 3)\n+ return [seq1, seq2, seq3]\n+ \n+ _translateInPositiveFrames = staticmethod( _translateInPositiveFrames )\n+ \n+ def _translateInNegativeFrames(bioSeqInstanceToTranslate):\n+ seq4 = bioSeqInstanceToTranslate.copyBioseqInstance()\n+ seq4.reverseComplement()\n+ BioseqUtils.setFrameInfoOnHeader(seq4, 4)\n+ BioseqUtils.translateSequence(seq4, 1)\n+ seq5 = bioSeqInstanceToTranslate.copyBioseqInstance()\n+ seq5.reverseComplement()\n+ BioseqUtils.setFrameInfoOnHeader(seq5, 5)\n+ BioseqUtils.translateSequence(seq5, 2)\n+ seq6 = bioSeqInstanceToTranslate.copyBioseqInstance()\n+ seq6.reverseComplement()\n+ BioseqUtils.setFrameInfoOnHeader(seq6, 6)\n+ BioseqUtils.translateSequence(seq6, 3)\n+ return [seq4, seq5, seq6]\n+ \n+ _translateInNegativeFrames = staticmethod( _translateInNegativeFrames )\n+ \n+ \n+ ## Return a dictionary which keys are sequence headers and values sequence lengths.\n+ #\n+ def getLengthPerSeqFromFile( inFile ):\n+ dHeader2Length = {}\n+ inFileHandler = open( inFile, "r" )\n+ while True:\n+ iBs = Bioseq()\n+ iBs.read( inFileHandler )\n+ if iBs.sequence == None:\n+ break\n+ dHeader2Length[ iBs.header ] = iBs.getLength()\n+ inFileHandler.close()\n+ return dHeader2Length\n+ \n+ getLengthPerSeqFromFile = staticmethod( getLengthPerSeqFromFile )\n+ \n+ \n+ ## Return the list of Bioseq instances, these being sorted in decreasing length\n+ #\n+ def getBioseqListSortedByDecreasingLength( lBioseqs ):\n+ return sorted( lBioseqs, key=lambda iBs: ( iBs.getLength() ), reverse=True )\n+ \n+ getBioseqListSortedByDecreasingLength = staticmethod( getBioseqListSortedByDecreasingLength )\n+ \n+ \n+ ## Return the list of Bioseq instances, these being sorted in decreasing length (without gaps)\n+ #\n+ def getBioseqListSortedByDecreasingLengthWithoutGaps( lBioseqs ):\n+ return sorted( lBioseqs, key=lambda iBs: ( len(iBs.sequence.replace("-","")) ), reverse=True )\n+ \n+ getBioseqListSortedByDecreasingLengthWithoutGaps = staticmethod( getBioseqListSortedByDecreasingLengthWithoutGaps )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/ClusterConsensusCollection.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/ClusterConsensusCollection.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,66 @@
+import re
+from commons.core.seq.BioseqDB import BioseqDB
+
+## Record a collection of bioseqDB representing cluster consensus
+#
+class ClusterConsensusCollection(object):
+
+    ## constructor
+    #
+    # @param clusterFileName string name of file containing the cluster of consensus
+    #
+    def __init__(self, clusterFileName):
+        self._clusterFileName = clusterFileName
+        self._lClusterConsensus = []
+
+    def __eq__(self, o):
+        return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
+
+    def getLClusterConsensus(self):
+        return self._lClusterConsensus
+
+    def fillCollection(self):
+        iBioseqDBAllCluster = BioseqDB()
+        fClusterFile = open(self._clusterFileName, "r")
+        iBioseqDBAllCluster.read(fClusterFile)
+        fClusterFile.close()
+        lHeader = iBioseqDBAllCluster.getHeaderList()
+        firstHeader = lHeader[0]
+        previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
+        clusterConsensus = BioseqDB()
+        clusterConsensus.setName(previousClusterName)
+        self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
+        for header in lHeader[1:]:
+            clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
+            if clusterName != previousClusterName:
+                self._lClusterConsensus.append(clusterConsensus)
+                previousClusterName = clusterName
+                clusterConsensus = BioseqDB()
+                clusterConsensus.setName(previousClusterName)
+            self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
+        self._lClusterConsensus.append(clusterConsensus)
+
+    def _getClusterNameAndSeqHeader(self, header):
+        m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
+        clusterNumber = m.group(2)
+        clusterName = m.group(1) + clusterNumber
+        lPartsHeaderheader = header.split(" ")
+        seqHeader = lPartsHeaderheader[1]
+        return clusterName, seqHeader
+
+    def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
+        ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
+        ibioseq.setHeader(seqHeader)
+        clusterConsensus.add(ibioseq)
+
+    def getNumClusterForAConsensus(self, seqName):
+        nbCluster = 1
+        for bioseqDB in self._lClusterConsensus:
+            if seqName in bioseqDB.getHeaderList():
+                return nbCluster
+            nbCluster += 1
+
+    def getNumConsensusInCluster(self, numCluster):
+        return self._lClusterConsensus[numCluster - 1].getSize()
+
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/FastaUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/FastaUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1143 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import string\n+import math\n+import shutil\n+import re\n+import glob\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.coord.MapUtils import MapUtils\n+from commons.core.coord.Range import Range\n+from commons.core.checker.CheckerUtils import CheckerUtils\n+from commons.core.launcher.LauncherUtils import LauncherUtils\n+from commons.core.coord.ConvCoord import ConvCoord\n+from commons.core.parsing.FastaParser import FastaParser\n+\n+\n+## Static methods for fasta file manipulation\n+#\n+class FastaUtils( object ):\n+ \n+ ## Count the number of sequences in the input fasta file\n+ #\n+ # @param inFile name of the input fasta file\n+ #\n+ # @return integer number of sequences in the input fasta file\n+ #\n+ @staticmethod\n+ def dbSize( inFile ):\n+ nbSeq = 0\n+ inFileHandler = open( inFile, "r" )\n+ line = inFileHandler.readline()\n+ while line:\n+ if line[0] == ">":\n+ nbSeq = nbSeq + 1\n+ line = inFileHandler.readline()\n+ inFileHandler.close()\n+ \n+ return nbSeq\n+ \n+ \n+ ## Compute the cumulative sequence length in the input fasta file\n+ #\n+ # @param inFile handler of the input fasta file\n+ #\n+ @staticmethod\n+ def dbCumLength( inFile ):\n+ cumLength = 0\n+ line = inFile.readline()\n+ while line:\n+ if line[0] != ">":\n+ cumLength += len(string.rstrip(line))\n+ line = inFile.readline()\n+ \n+ return cumLength\n+ \n+ \n+ ## Return a list with the length of each sequence in the input fasta file\n+ #\n+ # @param inFile string name of the input fasta file\n+ #\n+ @staticmethod\n+ def dbLengths( inFile ):\n+ lLengths = []\n+ inFileHandler = open( inFile, "r" )\n+ currentLength = 0\n+ line = inFileHandler.readline()\n+ while line:\n+ if line[0] == ">":\n+ if currentLength != 0:\n+ lLengths.append( currentLength )\n+ currentLength = 0\n+ else:\n+ currentLength += len(line[:-1])\n+ line = inFileHandler.readline()\n+ lLengths.append( currentLength )\n+ inFileHandler.close()\n+ return lLengths\n+ \n+ \n+ ## Retrieve the sequence headers'..b'inputFile )\n+ prefix, extension = os.path.splitext( fastaBaseName )\n+ cmd = genericCmd.replace("INPUT",inputFile).replace("OUTPUT","%s/%s_shuffle.fa"%(outData,prefix))\n+ returnStatus = os.system( cmd )\n+ if returnStatus != 0:\n+ sys.stderr.write( "ERROR: \'shuffle\' returned \'%i\'\\n" % returnStatus )\n+ sys.exit(1)\n+ \n+ \n+ ## Convert a cluster file (one line = one cluster = one headers list) into a fasta file with cluster info in headers\n+ #\n+ # @param inClusterFileName string input cluster file name\n+ # @param inFastaFileName string input fasta file name\n+ # @param outFileName string output file name\n+ # @param verbosity integer verbosity\n+ #\n+ @staticmethod\n+ def convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, outFileName, clusteringTool = "", verbosity = 0):\n+ dHeader2ClusterClusterMember, clusterIdForSingletonCluster = FastaUtils._createHeader2ClusterMemberDict(inClusterFileName, verbosity)\n+ iFastaParser = FastaParser(inFastaFileName)\n+ with open(outFileName, "w") as f:\n+ for iSequence in iFastaParser.getIterator():\n+ \n+ header = iSequence.getName()\n+ if dHeader2ClusterClusterMember.get(header):\n+ cluster = dHeader2ClusterClusterMember[header][0]\n+ member = dHeader2ClusterClusterMember[header][1]\n+ else:\n+ clusterIdForSingletonCluster += 1\n+ cluster = clusterIdForSingletonCluster\n+ member = 1\n+ \n+ newHeader = "%sCluster%sMb%s_%s" % (clusteringTool, cluster, member, header)\n+ iSequence.setName(newHeader)\n+ f.write(iSequence.printFasta())\n+ \n+ @staticmethod \n+ def _createHeader2ClusterMemberDict(inClusterFileName, verbosity = 0):\n+ dHeader2ClusterClusterMember = {}\n+ clusterId = 0\n+ with open(inClusterFileName) as f:\n+ line = f.readline()\n+ while line:\n+ lineWithoutLastChar = line.rstrip()\n+ lHeaders = lineWithoutLastChar.split("\\t")\n+ clusterId += 1\n+ if verbosity > 0:\n+ print "%i sequences in cluster %i" % (len(lHeaders), clusterId)\n+ memberId = 0\n+ for header in lHeaders:\n+ memberId += 1\n+ dHeader2ClusterClusterMember[header] = (clusterId, memberId)\n+ line = f.readline()\n+ if verbosity > 0:\n+ print "%i clusters" % clusterId\n+ return dHeader2ClusterClusterMember, clusterId\n+ \n+ @staticmethod \n+ def convertClusteredFastaFileToMapFile(fastaFileNameFromClustering, outMapFileName = ""):\n+ """\n+ Write a map file from fasta output of clustering tool.\n+ Warning: only works if input fasta headers are formated like LTRharvest fasta output.\n+ """\n+ if not outMapFileName:\n+ outMapFileName = "%s.map" % (os.path.splitext(fastaFileNameFromClustering)[0])\n+ \n+ fileDb = open(fastaFileNameFromClustering , "r")\n+ fileMap = open(outMapFileName, "w")\n+ seq = Bioseq()\n+ numseq = 0\n+ while 1:\n+ seq.read(fileDb)\n+ if seq.sequence == None:\n+ break\n+ numseq = numseq + 1\n+ ID = seq.header.split(\' \')[0].split(\'_\')[0]\n+ chunk = seq.header.split(\' \')[0].split(\'_\')[1]\n+ start = seq.header.split(\' \')[-1].split(\',\')[0][1:]\n+ end = seq.header.split(\' \')[-1].split(\',\')[1][:-1]\n+ line = \'%s\\t%s\\t%s\\t%s\' % (ID, chunk, start, end)\n+ fileMap.write(line + "\\n")\n+ \n+ fileDb.close()\n+ fileMap.close()\n+ print "saved in %s" % outMapFileName\n+ \n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/__init__.pyc

Binary file commons/core/seq/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/TestClusterConsensusCollection.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/TestClusterConsensusCollection.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,67 @@
+import unittest
+import os
+from commons.core.seq.ClusterConsensusCollection import ClusterConsensusCollection
+from commons.core.seq.Bioseq import Bioseq
+from commons.core.seq.BioseqDB import BioseqDB
+
+class TestClusterConsensusCollection(unittest.TestCase):
+
+    def setUp(self):
+        self._clusterSequencesFileName = "clusterSequences.fa"
+        self._ClusterConsensusCollection = ClusterConsensusCollection(self._clusterSequencesFileName)
+        self._createClusterConsensusFile()
+
+    def tearDown(self):
+        os.remove(self._clusterSequencesFileName)
+
+    def test_fillCollection(self):
+        expClusterConsensusCollection = ClusterConsensusCollection(self._clusterSequencesFileName)
+        expClusterConsensusCollection._clusterFileName = self._clusterSequencesFileName
+        bioseq1 = Bioseq("seq1", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
+        bioseq2 = Bioseq("seq2", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
+        bioseq3 = Bioseq("seq3", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
+        iBioseqDB1 = BioseqDB()
+        iBioseqDB2 = BioseqDB()
+        iBioseqDB1.setData([bioseq1, bioseq2])
+        iBioseqDB2.setData([bioseq3])
+        expClusterConsensusCollection._lClusterConsensus = [iBioseqDB1, iBioseqDB2]
+        self._ClusterConsensusCollection.fillCollection()
+        self.assertEqual(expClusterConsensusCollection, self._ClusterConsensusCollection)
+
+    def test_getNumClusterForAConsensus_for_seq2(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expClusterNumber = 1
+        obsClusterNumber = self._ClusterConsensusCollection.getNumClusterForAConsensus ("seq2")
+        self.assertEqual(expClusterNumber, obsClusterNumber)
+
+    def test_getNumClusterForAConsensus_for_seq3(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expClusterNumber = 2
+        obsClusterNumber = self._ClusterConsensusCollection.getNumClusterForAConsensus ("seq3")
+        self.assertEqual(expClusterNumber, obsClusterNumber)
+
+    def test_getNumConsensusInCluster_1(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expConsensusNumber = 2
+        obsConsensusNumber = self._ClusterConsensusCollection.getNumConsensusInCluster (1)
+        self.assertEqual(expConsensusNumber, obsConsensusNumber)
+
+    def test_getNumConsensusInCluster_2(self):
+        self._ClusterConsensusCollection.fillCollection()
+        expConsensusNumber = 1
+        obsConsensusNumber = self._ClusterConsensusCollection.getNumConsensusInCluster (2)
+        self.assertEqual(expConsensusNumber, obsConsensusNumber)
+
+    def _createClusterConsensusFile(self):
+        fCluster = open(self._clusterSequencesFileName, "w")
+        fCluster.write(">BlastclustCluster1Mb1 seq1\n")
+        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
+        fCluster.write(">BlastclustCluster1Mb2 seq2\n")
+        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
+        fCluster.write(">BlasterGrouperCluster3Mb1 seq3\n")
+        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
+        fCluster.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/TestSuite_seq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/TestSuite_seq.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_AlignedBioseqDB
+import Test_Bioseq
+import Test_BioseqDB
+import Test_BioseqUtils
+import Test_FastaUtils
+
+
+def main():
+
+        TestSuite_seq = unittest.TestSuite()
+
+        TestSuite_seq.addTest( unittest.makeSuite( Test_AlignedBioseqDB.Test_AlignedBioseqDB, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_Bioseq.Test_Bioseq, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_BioseqDB.Test_BioseqDB, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_BioseqUtils.Test_BioseqUtils, "test" ) )
+        TestSuite_seq.addTest( unittest.makeSuite( Test_FastaUtils.Test_FastaUtils, "test" ) )
+
+        runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+        runner.run( TestSuite_seq )
+
+
+if __name__ == "__main__":
+    main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/Test_AlignedBioseqDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_AlignedBioseqDB.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,773 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import sys\n+import os\n+import time\n+from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Align import Align\n+from commons.core.coord.Range import Range\n+from commons.core.stat.Stat import Stat\n+\n+\n+class Test_AlignedBioseqDB( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._i = AlignedBioseqDB()\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ \n+ \n+ def tearDown( self ):\n+ self._i = None\n+ self._uniqId = ""\n+ \n+ \n+ def test_getLength(self):\n+ iAlignedBioseqDB = AlignedBioseqDB()\n+\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iAlignedBioseqDB.setData([iBioseq1])\n+ \n+ expLenght = 29\n+ obsLength = iAlignedBioseqDB.getLength() \n+\n+ self.assertEquals(expLenght, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithoutGaps( self ):\n+ iAlignedBioseqDB = AlignedBioseqDB()\n+ iAlignedBioseqDB.add( Bioseq( "seq3",\n+ "AGCG-GACGATGCAGCAT--GCGAATGA--CGAT" ) )\n+ expLenght = 29\n+ obsLength = iAlignedBioseqDB.getSeqLengthWithoutGaps( "seq3" )\n+ \n+ self.assertEquals(expLenght, obsLength)\n+ \n+ \n+ def test_getListOccPerSite(self):\n+ iBioseq1 = Bioseq( "seq1", "AGAAA")\n+ iBioseq2 = Bioseq( "seq2", "TCAAG")\n+ iBioseq3 = Bioseq( "seq3", "GGTAC")\n+ iBioseq4 = Bioseq( "seq4", "CCTTA")\n+ \n+ iAlignedBioseqDB = AlignedBioseqDB()\n+ iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])\n+\n+ expList = [\n+ \n+ {"A":1, "T":1, "G":1, "C":1},\n+\n+ {"G":2, "C":2},\n+ \n+ {"A":2, "T":2 },\n+ \n+ {"A":3, "T":1 }, \n+ \n+ {"A":2, "G":1, "C":1}\n+ ]\n+ \n+ obsList = iAlignedBioseqDB.getListOccPerSite()\n+ \n+ self.assertEquals(expList, obsList)\n+ \n+ \n+ def test_getListOccPerSite_with_none_sequence(self):\n+ iBioseq1 = Bioseq( "seq1", "AGAAA")\n+ iBioseq2 = Bioseq( "seq2", "TCAAG")\n+ iBi'..b'\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------\n+#>BlastclustCluster2Mb2_chunk7 (dbseq-nr 1) [99136,100579]\n+#GTAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA\n+#ATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATA\n+#ATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATC\n+#ATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA\n+#ATCATAATAATCATAATAATCATAATAATCATAATAATAATAATAATCATAATCATAATC\n+#ATAATAAGCGATAAAAAAATTAAAAAATAAAAATTAAAACCCACTGCAATCACGTTGGAC\n+#GGCGAGTCACAGACGTCAGAATAGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCA\n+#AGAAGGTTTTTATTGAACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATC\n+#ATAAT---AATCATAATAATCATAATAATCATAATAATCATAATAAT-------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#-----------------------------------------------CATA-ATAATCAT\n+#AATAAT--CATAATAATCATA-ATAATCATAATAATCATAATAATCATAATAATCATAAT\n+#AATCATAATAATCATAATAATCATAA----TAATCATAATAATCATAATAATCATAATAA\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------TCATAA-TAATCATAATAATCGTAA---TAATCATAA----TAATCATAATAAT\n+#CATAATAATCATAA-TAAT----CAT-----AATAATCAT-----AATAATCATAATAAT\n+#CATAATAATCATAATAATCATAATAATCATAATAATCATAAT-AA-TCAT--AA--TAAT\n+#-----CATAATAATCATAATAA--TCA----TAATAATC---AT---AATAATCATAATA\n+#-AT---CATAATAATCATAATAATC-----------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#-----------------------------------ATAATAATCATAAT-AATCA-----\n+#TAATAA------TCATAAT----AATCATAAT-AATCATAATAA-TCA-TAATAATCATA\n+#ATAATCATAATAATCATAATAATAATAATAATCATAATCATAATCATAATAAGCATAAAA\n+#AAAT--------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#------------------------------------------------------------\n+#TAAAAAATAAAAATTAAAACCCACTGCAA---TCACGTTGGACGGCGAGTCACAGACGTC\n+#A-GAAT-AGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCAAGAAGGTTTTTATTG\n+#AACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATCATAAT---AATCATA\n+#ATAA---TCA-TAATAATCAT-AATAATCATAATAATCATAA-----TAA-TCATA-ATA\n+#ATCATAATAATCATAATAA--TCATAATA-ATCA-TAATAATCATAATAATCATAATCAT\n+#CATAATAATCATAATAAT--CATAA-T-------AATC--ATAATAATCATAATAATCAT\n+#AATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAAT\n+#CATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAAT\n+#AATCATAATAAT\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_AlignedBioseqDB ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/Test_Bioseq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_Bioseq.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1017 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import sys\n+from commons.core.seq.Bioseq import Bioseq \n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Map import Map\n+\n+\n+class Test_Bioseq( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._bs = Bioseq()\n+\n+\n+ def test_isEmpty_True(self):\n+ self._bs.setHeader( "" )\n+ self._bs.setSequence( "" )\n+ exp = True\n+ obs = self._bs.isEmpty()\n+ self.assertEquals( exp, obs )\n+\n+ \n+ def test_isEmpty_False(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ exp = False\n+ obs = self._bs.isEmpty()\n+ self.assertEquals( exp, obs )\n+ \n+ \n+ def test___eq__(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ self.assertEquals( self._bs, obs )\n+ \n+ \n+ def test___ne__Header(self):\n+ self._bs.setHeader( "seq2" )\n+ self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ self.assertNotEquals( self._bs, obs )\n+ \n+ \n+ def test___ne__Sequence(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" )\n+ obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ self.assertNotEquals( self._bs, obs )\n+ \n+ \n+ def test_reverse(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "TGCGGA" )\n+ exp = "AGGCGT"\n+ self._bs.reverse()\n+ obs = self._bs.sequence\n+ self.assertEqual( obs, exp )\n+ \n+ \n+ def test_complement(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "TGCGGA" )\n+ exp = "ACGCCT"\n+ self._bs.complement()\n+ obs = self._bs.sequence\n+ self.assertEqual( obs, exp )\n+ \n+ \n+ def test_complement_with_unknown_symbol(self):\n+ self._bs.setHeader( "seq1" )\n+ self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" )\n+ exp = "ACGCCTNKYWARSMBDHVN"\n+ self._bs.complement()\n+ obs = self._bs.sequence\n+ self.assertEqual( obs, exp )\n+ \n+ \n+ def test_r'..b' bioseq = Bioseq()\n+ bioseq.sequence = "ATGCNRATGCN\\rATGCAAT\\rTATA\\r"\n+ bioseq.checkEOF()\n+ obsSequence = bioseq.sequence\n+ expSequence = "ATGCNRATGCNATGCAATTATA"\n+ \n+ self.assertEquals(expSequence, obsSequence)\n+ \n+ \n+ def test_getLMapWhithoutGap(self):\n+ iBioseq = Bioseq()\n+ iBioseq.header = "header"\n+ iBioseq.sequence = "ATGC-RA-GCT"\n+ obsLMap = iBioseq.getLMapWhithoutGap()\n+ expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]\n+ \n+ self.assertEquals(expLMap, obsLMap)\n+ \n+ \n+ def test_getLMapWhithoutGap_seqStartsWithGap(self):\n+ iBioseq = Bioseq()\n+ iBioseq.header = "header"\n+ iBioseq.sequence = "-TGC-RA-GCT"\n+ obsLMap = iBioseq.getLMapWhithoutGap()\n+ expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]\n+ \n+ self.assertEquals(expLMap, obsLMap)\n+ \n+ \n+ def test_getLMapWhithoutGap_seqEndsWithGap(self):\n+ iBioseq = Bioseq()\n+ iBioseq.header = "header"\n+ iBioseq.sequence = "ATGC-RA-GC-"\n+ obsLMap = iBioseq.getLMapWhithoutGap()\n+ expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )]\n+ \n+ self.assertEquals(expLMap, obsLMap)\n+ \n+ def test_getGCpercentage_onlyATGC( self ):\n+ iBs = Bioseq( "seq", "TGCAGCT" )\n+ exp = 100 * 4 / 7.0\n+ obs = iBs.getGCpercentage()\n+ self.assertEqual( exp, obs )\n+ \n+ def test_getGCpercentageInSequenceWithoutCountNInLength( self ):\n+ iBs = Bioseq( "seq", "TGCAGCTNNNNN" )\n+ exp = 100 * 4 / 7.0\n+ obs = iBs.getGCpercentageInSequenceWithoutCountNInLength()\n+ self.assertEqual( exp, obs ) \n+ \n+ def test_get5PrimeFlank(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get5PrimeFlank(position, 3)\n+ expFlank = "TTT"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get5PrimeFlank_flank_length_truncated(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get5PrimeFlank(position, 15)\n+ expFlank = "AACTTT"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get5PrimeFlank_flank_of_first_base(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 1\n+ obsFlank = bs.get5PrimeFlank(position, 15)\n+ expFlank = ""\n+ self.assertEquals(expFlank, obsFlank) \n+ \n+ def test_get3PrimeFlank(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get3PrimeFlank(position, 3)\n+ expFlank = "CAG"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get3PrimeFlank_flank_length_truncated(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get3PrimeFlank(position, 15)\n+ expFlank = "CAGAA"\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get3PrimeFlank_flank_of_last_base(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 12\n+ obsFlank = bs.get3PrimeFlank(position, 15)\n+ expFlank = ""\n+ self.assertEquals(expFlank, obsFlank)\n+ \n+ def test_get3PrimeFlank_polymLength_different_of_1(self):\n+ bs = Bioseq( "line1", "AACTTTCCAGAA" )\n+ position = 7\n+ obsFlank = bs.get3PrimeFlank(position, 3, 2)\n+ expFlank = "AGA"\n+ self.assertEquals(expFlank, obsFlank) \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_Bioseq ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/Test_BioseqDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_BioseqDB.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,974 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.seq.BioseqDB import BioseqDB\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Map import Map\n+\n+\n+class Test_BioseqDB( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ \n+ \n+ def tearDown( self ):\n+ if os._exists("dummyBioseqDB.fa"):\n+ os.remove("dummyBioseqDB.fa")\n+ \n+ \n+ def test__eq__(self):\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ \n+ iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ \n+ self.assertEquals( expBioseqDB, obsBioseqDB )\n+ \n+ \n+ def test__eq__instances_with_different_header(self):\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ \n+ iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" )\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ \n+ self.assertNotEquals( expBioseqDB, obsBioseqDB )\n+ \n+ \n+ def test__eq__instances_with_different_sequences(self):\n+ iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ \n+ iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n+ iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ \n+ self.assertNotEquals( expBioseqDB, obsBioseqDB )\n+ \n+ \n+ def test__eq__instance'..b'9, iBioseq10, iBioseq11] )\n+ \n+ obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB)\n+ self.assertEquals(expBioseqDB, obsBioseqDB)\n+ \n+ \n+ def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self):\n+ iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n+ iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")\n+ iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n+ iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )\n+ \n+ iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n+ iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n+ inBioseqDB = BioseqDB()\n+ inBioseqDB.setData( [ iBioseq5, iBioseq6 ])\n+\n+ iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n+ iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")\n+ iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n+ iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")\n+ \n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] )\n+ \n+ obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB)\n+ self.assertEquals(expBioseqDB, obsBioseqDB)\n+ \n+ \n+ def test_upCase (self):\n+ iBioseq1 = Bioseq("consensus4","atgacGatgca")\n+ iBioseq2 = Bioseq("consensus1","atgcgaT")\n+ obsBioseqDB = BioseqDB()\n+ obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n+ iBioseq3 = Bioseq("consensus4","ATGACGATGCA")\n+ iBioseq4 = Bioseq("consensus1","ATGCGAT")\n+ expBioseqDB = BioseqDB()\n+ expBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n+ obsBioseqDB.upCase()\n+ self.assertEquals(expBioseqDB, obsBioseqDB)\n+ \n+ \n+ def test_getMap(self):\n+ iBioseq1 = Bioseq("header1","ATGC-RA-GCT")\n+ iBioseq2 = Bioseq("header2","-TGC-RA-GCT")\n+ iBioseq3 = Bioseq("header3","ATGC-RA-GC-")\n+\n+ iAlignedBioseqDB = BioseqDB()\n+ iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])\n+ \n+ obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps()\n+ \n+ expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )]\n+ expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )]\n+ expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )] \n+ \n+ expDict = {\n+ "header1": expLMap1,\n+ "header2": expLMap2,\n+ "header3": expLMap3\n+ } \n+ \n+ self.assertEquals(expDict, obsDict)\n+\n+ def test_getSeqLengthByListOfName(self):\n+ iBioseq1 = Bioseq("header1","ATGC-RA-GCT")\n+ iBioseq2 = Bioseq("header2","-TGC-RAR")\n+ iBioseq3 = Bioseq("header3","ATGC")\n+\n+ iBioseqDB = BioseqDB()\n+ iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])\n+ \n+ expList = [11, 4]\n+ obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"])\n+ \n+ self.assertEquals( expList, obsList ) \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/Test_BioseqUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_BioseqUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,498 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.seq.BioseqUtils import BioseqUtils\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_BioseqUtils( unittest.TestCase ):\n+ \n+ def test_translateSequence_one_nt( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "G"\n+ BioseqUtils.translateSequence(bioseq, 1)\n+ expSequence = ""\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_translateSequence_frame1( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n+ BioseqUtils.translateSequence(bioseq, 1)\n+ expSequence = "XGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_translateSequence_frame2( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n+ BioseqUtils.translateSequence(bioseq, 2)\n+ expSequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL"\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_translateSequence_frame3( self ):\n+ bioseq = Bioseq()\n+ bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n+ BioseqUtils.translateSequence(bioseq, 3)\n+ expSequence = "WLLVDQFMITMISRRCLVAPTNQQYNASRA*"\n+ obsSequence = bioseq.sequence\n+ self.assertEqual(expSequence, obsSequence)\n+ \n+ \n+ def test_setFrameInfoOnHeader(self):\n+ bioseq = Bioseq()\n+ bioseq.header = "header1 description1 description2"\n+ BioseqUtils.setFrameInfoOnHeader(bioseq,1)\n+ expHeader = "header1_1 description1 description2"\n+ obsHeader = bioseq.header\n+ self.assertEquals(expHeader,obsHeader)\n+ \n+ \n+ def test_setFrameInfoOnHeader_header_without_space(self):\n+ bioseq = Bioseq()\n+ bioseq.header = "header"\n+ BioseqUtils.setFrameInfoOnHeader(bioseq,1)\n+ expHeader = "header_1"\n+ obsHeader = bioseq.header\n+ '..b' bioseq2.header = "header2"\n+ bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header1 description")\n+ expLength = 31\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_second_item ( self ):\n+ bioseq1 = Bioseq()\n+ bioseq1.header = "header1 description"\n+ bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ \n+ bioseq2 = Bioseq()\n+ bioseq2.header = "header2"\n+ bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n+ expLength = 44\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_empty_list ( self ):\n+ lBioseq = []\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n+ expLength = 0\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_empty_sequence ( self ):\n+ bioseq1 = Bioseq()\n+ bioseq1.header = "header1 description"\n+ bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ \n+ bioseq2 = Bioseq()\n+ bioseq2.header = "header2"\n+ bioseq2.sequence = ""\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n+ expLength = 0\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getSeqLengthWithSeqName_sequence_unknown ( self ):\n+ bioseq1 = Bioseq()\n+ bioseq1.header = "header1 description"\n+ bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n+ \n+ bioseq2 = Bioseq()\n+ bioseq2.header = "header2"\n+ bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n+ \n+ lBioseq = [bioseq1, bioseq2]\n+ \n+ obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header3")\n+ expLength = 0\n+ \n+ self.assertEquals( expLength, obsLength)\n+ \n+ \n+ def test_getLengthPerSeqFromFile( self ):\n+ inFile = "dummyInFile"\n+ inFileHandler = open( inFile, "w" )\n+ inFileHandler.write( ">seq1\\nAGCGATGCAGCTA\\n" )\n+ inFileHandler.write( ">seq2\\nGCGATGCGCATCGACGCGA\\n" )\n+ inFileHandler.close()\n+ \n+ dExp = { "seq1": 13, "seq2": 19 }\n+ \n+ dObs = BioseqUtils.getLengthPerSeqFromFile( inFile )\n+ \n+ self.assertEqual( dExp, dObs )\n+ \n+ os.remove( inFile )\n+ \n+ \n+ def test_getBioseqListSortedByDecreasingLength( self ):\n+ lBioseqs = [ Bioseq( "TE2", "ACC" ),\n+ Bioseq( "TE3", "TA" ),\n+ Bioseq( "TE1", "AGCG" ) ]\n+ lExp = [ Bioseq( "TE1", "AGCG" ),\n+ Bioseq( "TE2", "ACC" ),\n+ Bioseq( "TE3", "TA" ) ]\n+ lObs = BioseqUtils.getBioseqListSortedByDecreasingLength( lBioseqs )\n+ self.assertEquals( lExp, lObs )\n+ \n+ \n+ def test_getBioseqListSortedByDecreasingLengthWithoutGaps( self ):\n+ lBioseqs = [ Bioseq( "TE2", "-ACC-" ),\n+ Bioseq( "TE3", "TA---" ),\n+ Bioseq( "TE1", "-AGCG" ) ]\n+ lExp = [ Bioseq( "TE1", "-AGCG" ),\n+ Bioseq( "TE2", "-ACC-" ),\n+ Bioseq( "TE3", "TA---" ) ]\n+ lObs = BioseqUtils.getBioseqListSortedByDecreasingLengthWithoutGaps( lBioseqs )\n+ self.assertEquals( lExp, lObs )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_BioseqUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/Test_FastaUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Test_FastaUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1506 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.seq.FastaUtils import FastaUtils\n+from commons.core.seq.test.Utils_for_T_FastaUtils import Utils_for_T_FastaUtils\n+from commons.core.utils.FileUtils import FileUtils\n+import glob\n+import os\n+import shutil\n+import unittest\n+\n+\n+class Test_FastaUtils( unittest.TestCase ):\n+ \n+ \n+ def test_dbSize_for_empty_file(self):\n+ fileName = "dummyFastaFile.fa"\n+ Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)\n+ \n+ obsNb = FastaUtils.dbSize( fileName )\n+ \n+ expNb = 0\n+ os.remove(fileName)\n+ self.assertEquals(expNb, obsNb)\n+ \n+ \n+ def test_dbSize_one_sequence(self):\n+ fileName = "dummyFastaFile.fa"\n+ Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName)\n+ \n+ obsNb = FastaUtils.dbSize( fileName )\n+ \n+ expNb = 1\n+ os.remove(fileName)\n+ self.assertEquals(expNb, obsNb)\n+ \n+ \n+ def test_dbSize_four_sequences(self):\n+ fileName = "dummyFastaFile.fa"\n+ Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)\n+ \n+ obsNb = FastaUtils.dbSize( fileName )\n+ \n+ expNb = 4\n+ os.remove(fileName)\n+ self.assertEquals(expNb, obsNb)\n+ \n+ \n+ def test_dbChunks(self):\n+ inFileName = "dummyBigSeqFastaFile.fa"\n+ expChunksFileName = \'exp\' + inFileName +\'_chunks.fa\'\n+ expChunksMapFileName = \'exp\' + inFileName +\'_chunks.map\'\n+ expCutFileName = \'exp\' + inFileName +\'_cut\'\n+ expNStretchFileName = \'exp\' + inFileName +\'.Nstretch.map\'\n+ Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName)\n+ Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName)\n+ Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName)\n+ Utils_for_T_FastaUtils._createFastaFile_of_cut(expCutFileName)\n+ Utils_for_T_FastaUtils._createFastaFile_of_Nstretch(expNStretchFileName)\n+ \n+ FastaUtils.dbChunks(inFileName, \'60\', \'10\', \'11\', \'\', False, 0)\n+ \n+ obsChunksFileName = inFileName +\'_chunks.fa\'\n+ obsChunksMapFileName = inFileName +\'_chunks.map\'\n+ obsCutFileName = inFileName +\'_cut\'\n+ obsNStretchFileName = inFileName +\'.Nstretch.map\'\n+ \n+ self.assertTrue(FileUtils.are2'..b'uences(self):\n+ inClusterFileName = "in.tab"\n+ with open(inClusterFileName, "w") as f:\n+ f.write("DTX-incomp_DmelChr4-B-R10-Map3_reversed\\tDTX-incomp_DmelChr4-B-R9-Map3_reversed\\tDTX-incomp_DmelChr4-B-G9-Map3\\n")\n+ f.write("PotentialHostGene-chim_DmelChr4-B-R5-Map5\\tPotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\\n")\n+ inFastaFileName = "in.fa"\n+ with open(inFastaFileName, "w") as f:\n+ f.write(">DTX-incomp_DmelChr4-B-R10-Map3_reversed\\n")\n+ f.write("ATCGCATCGATCGATC\\n")\n+ f.write(">DTX-incomp_DmelChr4-B-R9-Map3_reversed\\n")\n+ f.write("ATCGCATCGATCGATC\\n")\n+ f.write(">RLX-incomp_DmelChr4-B-G220-Map3\\n")\n+ f.write("ATCGCC\\n")\n+ f.write(">PotentialHostGene-chim_DmelChr4-B-R5-Map5\\n")\n+ f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\\n")\n+ f.write(">PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\\n")\n+ f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\\n")\n+ f.write(">DTX-incomp_DmelChr4-B-G9-Map3\\n")\n+ f.write("ATCGCATCGATCGATC\\n")\n+ expFileName = "exp.fa"\n+ with open(expFileName, "w") as f:\n+ f.write(">BlastclustCluster1Mb1_DTX-incomp_DmelChr4-B-R10-Map3_reversed\\n")\n+ f.write("ATCGCATCGATCGATC\\n")\n+ f.write(">BlastclustCluster1Mb2_DTX-incomp_DmelChr4-B-R9-Map3_reversed\\n")\n+ f.write("ATCGCATCGATCGATC\\n")\n+ f.write(">BlastclustCluster3Mb1_RLX-incomp_DmelChr4-B-G220-Map3\\n")\n+ f.write("ATCGCC\\n")\n+ f.write(">BlastclustCluster2Mb1_PotentialHostGene-chim_DmelChr4-B-R5-Map5\\n")\n+ f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\\n")\n+ f.write(">BlastclustCluster2Mb2_PotentialHostGene-chim_DmelChr4-B-R4-Map5_reversed\\n")\n+ f.write("ATCGCATCGATCGATCATCGCATCGATCGATC\\n")\n+ f.write(">BlastclustCluster1Mb3_DTX-incomp_DmelChr4-B-G9-Map3\\n")\n+ f.write("ATCGCATCGATCGATC\\n")\n+ obsFileName = "obs.fa"\n+ \n+ FastaUtils.convertClusterFileToFastaFile(inClusterFileName, inFastaFileName, obsFileName, "Blastclust")\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n+ os.remove(inClusterFileName)\n+ os.remove(inFastaFileName)\n+ os.remove(expFileName)\n+ os.remove(obsFileName)\n+ \n+ def test_convertClusterFileToMapFile(self):\n+ for clustAlgo in ["Blastclust", "MCL"]:\n+ inFileName = "dummy%sOut.fa" % clustAlgo\n+ inF = open(inFileName, "w")\n+ inF.write(">%sCluster1Mb1_chunk1 (dbseq-nr 1) [1,14]\\n" % clustAlgo)\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">%sCluster3Mb1_chunk5 (dbseq-nr 8) [1000,1014]\\n" % clustAlgo)\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">%sCluster1Mb2_chunk1 (dbseq-nr 1) [30,44]\\n" % clustAlgo)\n+ inF.write("gaattgtttactta\\n")\n+ inF.write(">%sCluster2Mb1_chunk2 (dbseq-nr 1) [100,114]\\n" % clustAlgo)\n+ inF.write("gaattgtttactta")\n+ inF.close()\n+ \n+ fileExp = "%sToMapExpected.map" % clustAlgo\n+ outF = open(fileExp, "w")\n+ outF.write("%sCluster1Mb1\\tchunk1\\t1\\t14\\n" % clustAlgo)\n+ outF.write("%sCluster3Mb1\\tchunk5\\t1000\\t1014\\n" % clustAlgo)\n+ outF.write("%sCluster1Mb2\\tchunk1\\t30\\t44\\n" % clustAlgo)\n+ outF.write("%sCluster2Mb1\\tchunk2\\t100\\t114\\n" % clustAlgo)\n+ outF.close()\n+ \n+ fileObs = "%s.map" % os.path.splitext(inFileName)[0]\n+ FastaUtils.convertClusteredFastaFileToMapFile(inFileName, fileObs)\n+ \n+ self.assertTrue(FileUtils.are2FilesIdentical(fileObs, fileExp))\n+ \n+ os.remove(inFileName)\n+ os.remove(fileObs)\n+ os.remove(fileExp)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/seq/test/Utils_for_T_FastaUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/seq/test/Utils_for_T_FastaUtils.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,857 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+class Utils_for_T_FastaUtils( object ):\n+ \n+ def _createFastaFile_for_empty_file(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("")\n+ f.close()\n+ \n+ _createFastaFile_for_empty_file = staticmethod ( _createFastaFile_for_empty_file )\n+ \n+ \n+ def _createFastaFile_one_sequence(fileName):\n+ f = open(fileName, \'w\')\n+ f.write(">seq 1\\n")\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCG\\n")\n+ f.close()\n+ \n+ _createFastaFile_one_sequence = staticmethod ( _createFastaFile_one_sequence )\n+ \n+ \n+ def createFastaFile_twoSequences( fileName ):\n+ f = open( fileName, "w" )\n+ f.write( ">seq 1\\n" )\n+ f.write( "ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.write( ">seq 2\\n" )\n+ f.write( "ATATTCTTTCATCGATCGATCGGCGGCTATATGCTAGTGACGAAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.close()\n+ \n+ createFastaFile_twoSequences = staticmethod ( createFastaFile_twoSequences )\n+ \n+ \n+ def createFastaFile_seq_1( fileName ):\n+ f = open( fileName, "w" )\n+ f.write( ">seq 1\\n" )\n+ f.write( "ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.close()\n+ \n+ createFastaFile_seq_1 = staticmethod( createFastaFile_seq_1 )\n+ \n+ \n+ def createFastaFile_seq_2( fileName ):\n+ f = open( fileName, "w" )\n+ f.write( ">seq 2\\n" )\n+ f.write( "ATATTCTTTCATCGATCGATCGGCGGCTATATGCTAGTGACGAAGCTAGTGTGAGTAGTA\\n" )\n+ f.write( "ATATTCG\\n" )\n+ f.close()\n+ \n+ createFastaFile_seq_2 = staticmethod( createFastaFile_seq_2 )\n+ \n+ \n+ def _createFastaFile_sequence_without_header(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n+ f.write("ATATTCG\\n")\n+ f.close()\n+ \n+ _createFastaFile_sequence_without_header = staticmethod ( _createFastaFile_sequence_without_header )\n+ \n+ \n+ def _createFastaFile_four_sequences'..b'CCTCGATGAAATGGTCGCG\\n")\n+ f.write("CGCGTACGATAATGCGGGCCTGGCTCACGGATGCGCGCCTTTCCCTATCGTCAGTCACGC\\n")\n+ f.write("AAATGTAGGCTTCCATCTGGAACGCTGCTTGATGGCCTAAGAATGGGCCGTCACGGAACA\\n")\n+ f.write("GCTCACCGCCTGCAGACACGAACGGCCGTGGCGGTCATGGAAGGATCTGAACGTGTCGCC\\n")\n+ f.write("CCATACGATTGACGAAGAGATGTAAGCTCCCTTGGTA\\n")\n+ f.close()\n+ \n+ _createFastaFile_three_sequences_with_ORFs = staticmethod ( _createFastaFile_three_sequences_with_ORFs )\n+ \n+ \n+ def _createFastaFile_three_sequences_with_ORFs_expected(fileName): \n+ f = open(fileName, \'w\') \n+ f.write("ORF|1|662\\tMivi_sl_Blaster_Grouper_1_Map_3\\t307\\t969\\n")\n+ f.write("ORF|-3|254\\tMivi_sl_Blaster_Grouper_1_Map_3\\t793\\t539\\n")\n+ f.write("ORF|2|197\\tMivi_sl_Blaster_Grouper_1_Map_3\\t356\\t553\\n")\n+ f.write("ORF|3|176\\tMivi_sl_Blaster_Grouper_1_Map_3\\t288\\t464\\n")\n+ f.write("ORF|-1|176\\tMivi_sl_Blaster_Grouper_1_Map_3\\t786\\t610\\n")\n+ f.write("ORF|3|143\\tMivi_sl_Blaster_Grouper_1_Map_3\\t672\\t815\\n")\n+ f.write("ORF|1|131\\tMivi_sl_Blaster_Grouper_1_Map_3\\t175\\t306\\n")\n+ f.write("ORF|-2|131\\tMivi_sl_Blaster_Grouper_1_Map_3\\t797\\t666\\n")\n+ f.write("ORF|2|128\\tMivi_sl_Blaster_Grouper_1_Map_3\\t167\\t295\\n")\n+ f.write("ORF|-2|119\\tMivi_sl_Blaster_Grouper_1_Map_3\\t242\\t123\\n")\n+ f.write("ORF|1|464\\tMivi_sl_Blaster_Grouper_2_Map_3\\t304\\t768\\n")\n+ f.write("ORF|3|305\\tMivi_sl_Blaster_Grouper_2_Map_3\\t669\\t974\\n")\n+ f.write("ORF|-3|251\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1094\\t843\\n")\n+ f.write("ORF|-2|245\\tMivi_sl_Blaster_Grouper_2_Map_3\\t531\\t286\\n")\n+ f.write("ORF|-3|224\\tMivi_sl_Blaster_Grouper_2_Map_3\\t791\\t567\\n")\n+ f.write("ORF|-2|215\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1098\\t883\\n")\n+ f.write("ORF|2|197\\tMivi_sl_Blaster_Grouper_2_Map_3\\t353\\t550\\n")\n+ f.write("ORF|3|173\\tMivi_sl_Blaster_Grouper_2_Map_3\\t288\\t461\\n")\n+ f.write("ORF|-1|173\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1087\\t914\\n")\n+ f.write("ORF|-1|143\\tMivi_sl_Blaster_Grouper_2_Map_3\\t310\\t167\\n")\n+ f.write("ORF|3|626\\tMivi_sl_Blaster_Grouper_3_Map_3\\t141\\t767\\n")\n+ f.write("ORF|2|434\\tMivi_sl_Blaster_Grouper_3_Map_3\\t164\\t598\\n")\n+ f.write("ORF|3|365\\tMivi_sl_Blaster_Grouper_3_Map_3\\t768\\t1133\\n")\n+ f.write("ORF|-3|359\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1514\\t1155\\n")\n+ f.write("ORF|-1|320\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1879\\t1559\\n")\n+ f.write("ORF|3|272\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1299\\t1571\\n")\n+ f.write("ORF|-2|248\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1503\\t1255\\n")\n+ f.write("ORF|1|236\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1576\\t1812\\n")\n+ f.write("ORF|-1|227\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1423\\t1196\\n")\n+ f.write("ORF|-3|227\\tMivi_sl_Blaster_Grouper_3_Map_3\\t368\\t141\\n")\n+ f.close()\n+\n+ _createFastaFile_three_sequences_with_ORFs_expected = staticmethod ( _createFastaFile_three_sequences_with_ORFs_expected )\n+ \n+ \n+ def _createLinkFile_four_sequences_with_new_headers(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("seq 1\\tReconCluster1Mb155 chunk183 {Fragment} 1..5506\\t1\\t127\\n")\n+ f.write("seq 2\\tMbQ3Gr2Cl0 chunk440 {Fragment} 2678..3645\\t1\\t307\\n")\n+ f.write("seq 3\\tMbS2Gr2Cl0 chunk622 {Fragment} 104..1078\\t1\\t427\\n")\n+ f.write("seq 4\\tPilerCluster3.574Mb796 chunk0117 {Fragment} 51582..50819\\t1\\t307\\n")\n+ \n+ _createLinkFile_four_sequences_with_new_headers = staticmethod ( _createLinkFile_four_sequences_with_new_headers )\n+ \n+ \n+ def _createLinkFile_four_sequences_same_headers(fileName):\n+ f = open(fileName, \'w\')\n+ f.write("seq 1\\tseq 1\\t1\\t127\\n")\n+ f.write("seq 2\\tseq 2\\t1\\t307\\n")\n+ f.write("seq 3\\tseq 3\\t1\\t427\\n")\n+ f.write("seq 4\\tseq 4\\t1\\t307\\n")\n+ \n+ _createLinkFile_four_sequences_same_headers = staticmethod ( _createLinkFile_four_sequences_same_headers )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/DbFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/DbFactory.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,38 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.sql.DbMySql import DbMySql
+
+class DbFactory (object):
+
+    def createInstance(configFileName = "", verbosity = 1):
+        return DbMySql(cfgFileName = configFileName, verbosity = verbosity)
+
+    createInstance = staticmethod(createInstance)
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/DbMySql.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/DbMySql.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,851 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+# Exception hierarchy:\n+#\n+# StandardError\n+# |__Warning\n+# |__Error\n+# |__InterfaceError\n+# |__DatabaseError\n+# |__DataError\n+# |__OperationalError\n+# |__IntegrityError\n+# |__InternalError\n+# |__ProgrammingError\n+# |__NotSupportedError\n+\n+import os\n+import sys\n+import time\n+import ConfigParser\n+import MySQLdb\n+from MySQLdb import InterfaceError\n+from MySQLdb import OperationalError\n+from MySQLdb import InternalError\n+from MySQLdb import DatabaseError\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.LoggerFactory import LoggerFactory\n+from commons.core.checker.RepetException import RepetException\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.sql.TableSetAdaptator import TableSetAdaptator\n+\n+LOG_DEPTH = "repet.commons"\n+\n+TABLE_SCHEMA_DESCRIPTOR = {"map": [("name", "varchar(255)"), ("chr", "varchar(255)"), ("start", "int"), ("end", "int")],\n+ "set": [("path", "int unsigned"), ("name", "varchar(255)"), ("chr", "varchar(255)"), ("start", "int"), ("end", "int")],\n+ "match": [("query_name", "varchar(255)"), ("query_start", "int"), ("query_end", "int"), ("query_length", "int unsigned"), ("query_length_perc", "float"),\n+ ("match_length_perc", "float"), ("subject_name", "varchar(255)"), ("subject_start", "int unsigned"), ("subject_end", "int unsigned"),\n+ ("subject_length", "int unsigned"), ("subject_length_perc", "float"), ("E_value", "double"), ("score", "int unsigned"), ("identity", "float"),\n+ ("path", "int unsigned")],\n+ "path": [("path", "int unsigned"), ("query_name", "varchar(255)"), ("query_start", "int"), ("query_end", "int"), ("subject_name", "varchar(255)"),\n+ ("subject_start", "int unsigned"), ("subject_end", "int unsigned"), ("E_value", "double"), ("score", "int unsigned"), ("identity", "float")],\n+ "align": [("query_name", "varchar(255)"), ("query_start", "int"), ("query_end", "int"), ("subject_name", "varchar(255)"), ("subject_start", "int unsigned"),\n+ '..b' # @param setTableName string new set table name\n+ #\n+ def convertMapTableIntoSetTable( self, mapTableName, setTableName ):\n+ sqlCmd = "CREATE TABLE %s (path int(10) unsigned auto_increment primary key) select name, chr, start, end from %s;" % (setTableName, mapTableName)\n+ self.execute(sqlCmd)\n+ self.createIndex(setTableName, "set")\n+ \n+ \n+ ## Convert an Align table into a Path table\n+ #\n+ # @param inAlignTable string name of the input Align table\n+ # @param outPathTable string name of the output Path table\n+ #\n+ def convertAlignTableIntoPathTable( self, inAlignTable, outPathTable ):\n+ self.createTable( outPathTable, "path", "", True )\n+ sqlCmd = "SELECT * FROM %s" % ( inAlignTable )\n+ self.execute( sqlCmd )\n+ lResults = self.fetchall()\n+ rowIndex = 0\n+ for res in lResults:\n+ rowIndex += 1\n+ sqlCmd = "INSERT INTO %s" % ( outPathTable )\n+ sqlCmd += " (path,query_name,query_start,query_end,subject_name,subject_start,subject_end,E_value,score,identity)"\n+ sqlCmd += " VALUES ( \'%i\'" % ( rowIndex )\n+ for i in res:\n+ sqlCmd += \', "%s"\' % ( i )\n+ sqlCmd += " )"\n+ self.execute( sqlCmd )\n+ self.updateInfoTable( outPathTable, "" )\n+ \n+ \n+ ## Give a list of instances according to the SQL command\n+ #\n+ # @param SQLCmd string is a SQL command\n+ # @param methodGetInstance2Adapt a getter method name. With this method you choose the type of intances contained in lObjs. See example in Test_DbMySql.py.\n+ # @return lObjs list of instances\n+ #\n+ def getObjectListWithSQLCmd( self, SQLCmd, methodGetInstance2Adapt):\n+ self.execute( SQLCmd )\n+ res = self.fetchall()\n+ lObjs = []\n+ for t in res:\n+ iObj = methodGetInstance2Adapt()\n+ iObj.setFromTuple( t )\n+ lObjs.append( iObj )\n+ return lObjs\n+ \n+ \n+ ## Give a list of integer according to the SQL command\n+ #\n+ # @param sqlCmd string is a SQL command\n+ # @return lInteger integer list\n+ #\n+ def getIntegerListWithSQLCmd( self, sqlCmd ):\n+ self.execute(sqlCmd)\n+ res = self.fetchall()\n+ lInteger = []\n+ for t in res:\n+ if t[0] != None:\n+ lInteger.append(int(t[0]))\n+ return lInteger\n+ \n+ \n+ ## Give a int according to the SQL command\n+ #\n+ # @param sqlCmd string is a SQL command\n+ # @return nb integer \n+ #\n+ def getIntegerWithSQLCmd( self, sqlCmd ):\n+ self.execute(sqlCmd)\n+ res = self.fetchall()\n+ nb = res[0][0]\n+ if nb == None:\n+ nb = 0\n+ return nb\n+ \n+ \n+ ## Give a list of str according to the SQL command\n+ #\n+ # @param sqlCmd string is a SQL command\n+ # @return lString str list\n+ #\n+ def getStringListWithSQLCmd( self, sqlCmd ):\n+ self.execute(sqlCmd)\n+ res = self.fetchall()\n+ lString = []\n+ for i in res:\n+ lString.append(i[0])\n+ return lString\n+ \n+#TODO: use API to add indexes\n+ ## Remove doublons in a given table\n+ #\n+ # @param table string name of a MySQL table\n+ #\n+ def removeDoublons( self, table ):\n+ tmpTable = "%s_%s" % ( table, time.strftime("%Y%m%d%H%M%S") )\n+ sqlCmd = "CREATE TABLE %s SELECT DISTINCT * FROM %s" % ( tmpTable, table )\n+ self.execute( sqlCmd )\n+ self.dropTable( table )\n+ self.renameTable(tmpTable, table)\n+ \n+ \n+ ## Get a list of table names from a pattern\n+ #\n+ # @note for instance pattern = \'MyProject_%\'\n+ #\n+ def getTableListFromPattern( self, pattern ):\n+ if pattern == "*" or pattern == "%":\n+ sqlCmd = "SHOW TABLES"\n+ else:\n+ sqlCmd = "SHOW TABLES like \'%s\'" % ( pattern )\n+ lTables = self.getStringListWithSQLCmd( sqlCmd )\n+ return lTables\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/DbSQLite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/DbSQLite.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,173 @@
+import sqlite3
+import os
+import sys
+
+#TODO: update...compare with DbMySql.py
+class DbSQLite(object):
+
+    ## Constructor
+    #
+    # @param host string db file path
+    # @param cfgFileName string configuration file name
+    #
+    # @note when a parameter is left blank, the constructor is able
+    #   to set attribute values from environment variable: REPET_HOST,
+    #
+    def __init__(self, host = ""):
+        if host != "":
+            self.host = host
+        else:
+            msg = "ERROR: no host specified"
+            sys.stderr.write( "%s\n" % msg )
+            sys.exit(1)
+        # remove open() and cursor from init() use directly outside this class ...
+        self.open()
+        self.cursor = self.db.cursor()
+
+    ## Connect to the DbSQLite database
+    #
+    # @param verbose integer (default = 0)
+    #
+    def open( self, verbose = 0, nb = 0 ):
+        try:
+            #sqlite.connect(":memory:", check_same_thread = False)
+            self.db = sqlite3.connect(self.host, check_same_thread= False, isolation_level=None, detect_types=sqlite3.PARSE_DECLTYPES)
+        except sqlite3.Error, e:
+            if verbose > 0:
+                print "ERROR %s" % e
+                sys.stdout.flush()
+            return False
+        return True
+
+    ## Execute a SQL query
+    #
+    # @param qry string SQL query to execute
+    # @param params parameters of SQL query
+    #
+    def execute( self, qry, params=None ):
+        try :
+            if params == None:
+                self.cursor.execute( qry )
+            else:
+                self.cursor.execute( qry, params )
+        except Exception, e:
+            #TODO Must be test
+            try :
+                if params == None:
+                    self.cursor.execute( qry )
+                else:
+                    self.cursor.execute( qry, params )
+            except Exception, e:
+                    print "Erreur : %s" % e
+
+    ## Retrieve the results of a SQL query
+    #
+    def fetchall(self):
+        return self.cursor.fetchall()
+
+    ## Record a new table in the 'info_table' table
+    #
+    # @param tableName string table name
+    # @param info string information on the table origin
+    #
+    def updateInfoTable( self, tableName, info ):
+        if not self.doesTableExist( "info_tables" ):
+            sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+            self.execute( sqlCmd )
+        sqlCmd = 'INSERT INTO info_tables VALUES ("%s","%s")' % (tableName, info)
+        self.execute( sqlCmd )
+
+    def createTable(self, tableName, dataType, overwrite=False, verbose=0):
+        if verbose > 0:
+            print "creating table '%s' from file '%s' of type '%s'..." % (tableName, dataType)
+            sys.stdout.flush()
+        if overwrite:
+            self.dropTable(tableName)
+        if dataType.lower() in ["job", "jobs"]:
+            self.createJobTable(tableName)
+        else:
+            print "ERROR: unknown type %s" % (dataType)
+            self.close()
+            sys.exit(1)
+        if verbose > 0:
+            print "done!"; sys.stdout.flush()
+
+    ## Create a job table
+    #
+    # @param tablename new table name
+    #
+    def createJobTable( self, tablename ):
+        sqlCmd = "CREATE TABLE %s" % ( tablename )
+        sqlCmd += " ( jobid INT UNSIGNED"
+        sqlCmd += ", jobname VARCHAR(255)"
+        sqlCmd += ", groupid VARCHAR(255)"
+        sqlCmd += ", command TEXT"
+        sqlCmd += ", launcher VARCHAR(1024)"
+        sqlCmd += ", queue VARCHAR(255)"
+        sqlCmd += ", status VARCHAR(255)"
+        sqlCmd += ", time timestamp"
+        sqlCmd += ", node VARCHAR(255) )"
+        self.execute( sqlCmd )
+
+        self.updateInfoTable( tablename, "job table" )
+        sqlCmd = "CREATE INDEX igroupid ON " + tablename + " ( groupid )"
+        self.execute( sqlCmd )
+
+    ## Test if a table exists
+    #
+    # @param table string table name
+    # @return boolean True if the table exists, False otherwise
+    #
+    def doesTableExist( self, table ):
+        qry = "PRAGMA table_info(%s)" % (table)
+        self.execute( qry )
+        results = self.cursor.fetchall()
+        if results:
+            return True
+        return False
+
+    def isEmpty( self, tableName ):
+        return self.getSize( tableName ) == 0
+
+    ## Give the rows number of the table
+    #
+    # @param tableName string table name
+    #
+    def getSize( self, tableName ):
+        qry = "SELECT count(*) FROM %s;" % ( tableName )
+        self.execute( qry )
+        res = self.fetchall()
+        return int( res[0][0] )
+
+    ## Remove a table if it exists
+    #
+    # @param table string table name
+    # @param verbose integer (default = 0)
+    #
+    def dropTable( self, table, verbose = 0 ):
+        if self.doesTableExist( table ):
+            sqlCmd = "DROP TABLE %s" % ( table )
+            self.execute( sqlCmd )
+            sqlCmd = 'DELETE FROM info_tables WHERE name = "%s"' % ( table )
+            self.execute( sqlCmd )
+
+    ## Get a list with the fields
+    #
+    def getFieldList( self, table ):
+        lFields = []
+        sqlCmd = "PRAGMA table_info(%s)" % ( table )
+        self.execute( sqlCmd )
+        lResults = self.fetchall()
+        for res in lResults:
+            lFields.append( res[1] )
+        return lFields
+
+    ## delete this SQLite database session
+    #
+    def delete(self):
+        os.remove(self.host)
+
+    ## Close the connection
+    #
+    def close( self ):
+        self.db.close()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/ITableMapAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableMapAdaptator.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,113 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for TableMapAdaptator
+#
+class ITableMapAdaptator(object):
+
+    ## Insert a map instance
+    #
+    # @param obj map or set
+    # @param delayed boolean must the insert be delayed
+    #
+    # @warning old name was insAMap
+    #
+    def insert(self, obj, delayed=False):
+        pass
+
+
+    ## Insert a list of Map or Set or Match instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    # @warning old name was insMapList
+    #
+    def insertList(self, l, delayed = False):
+        pass
+
+    ## Give a list of the distinct seqName/chr present in the table
+    #
+    # @return lDistinctContigNames string list
+    #
+    # @warning old name was getContig_name
+    #
+    def getSeqNameList(self):
+        pass
+
+
+    ## Give a list of Map instances having a given seq name
+    #
+    # @param seqName string seq name
+    # @return lMap list of instances
+    #
+    # @warning old name was get_MapList_from_contig
+    #
+    def getMapListFromSeqName(self, seqName):
+        pass
+
+
+    ## Return a list of Set instances from a given sequence name
+    #
+    # @param seqName string sequence name
+    # @return lSets list of Set instances
+    #
+    # @warning old name was getSetList_from_contig
+    #
+    def getSetListFromSeqName( self, seqName ):
+        pass
+
+
+    ## Give a map instances list overlapping a given region
+    #
+    # @param seqName string seq name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lMap list of map instances
+    #
+    # @warning old name was getMapList_from_qcoord
+    #
+    def getMapListOverlappingCoord(self, seqName, start, end):
+        pass
+
+
+    ## Return a list of Set instances overlapping a given region
+    #
+    # @param seqName string sequence name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    # @warning old name was getSetList_from_qcoord
+    #
+    def getSetListOverlappingCoord( self, seqName, start, end ):
+        pass
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/ITableMatchAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableMatchAdaptator.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,68 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for TableMatchAdaptator
+#
+class ITableMatchAdaptator(object):
+
+    ## Give a list of Match instances given a query name
+    #
+    # @param query string sequence name
+    # @return lMatches list of Match instances
+    #
+    def getMatchListFromQuery( self, query ):
+        pass
+
+    ## Give a list of Match instances having the same identifier
+    #
+    # @param id integer identifier number
+    # @return lMatch a list of Match instances
+    #
+    def getMatchListFromId( self, id ):
+        pass
+
+    ## Insert a Match instance
+    #
+    # @param iMatch a Match instance
+    # @param delayed boolean
+    #
+    def insert(self, iMatch, delayed = False):
+        pass
+
+    ## Insert a list of Map or Set or Match instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    # @warning old name was insMapList
+    #
+    def insertList(self, l, delayed = False):
+        pass
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/ITablePathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITablePathAdaptator.py Fri Jan 18 04:54:14 2013 -0500

b'@@ -0,0 +1,429 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+## Interface for TablePathAdaptator\n+#\n+class ITablePathAdaptator (object):\n+\n+ ## Give the data contained in the table as a list of Path instances\n+ #\n+ # @return lPaths list of path instances\n+ #\n+ def getListOfAllPaths( self ):\n+ pass\n+ \n+ ## Give a list of Path instances having the same identifier\n+ #\n+ # @param id integer identifier number\n+ # @return lPath a list of Path instances\n+ #\n+ # @warning old name was getPathList_from_num\n+ #\n+ def getPathListFromId( self, id ):\n+ pass\n+\n+ ## Give a list of Path instances according to the given list of identifier numbers\n+ #\n+ # @param lId integer list \n+ # @return lPath a list of Path instances\n+ #\n+ # @warning old name was getPathList_from_numlist\n+ #\n+ def getPathListFromIdList( self, lId ):\n+ pass\n+ \n+ ## Give a list of Path instances having the same given query name\n+ #\n+ # @param query string name of the query \n+ # @return lPath a list of Path instances\n+ #\n+ # @warning old name was getPathList_from_query\n+ #\n+ def getPathListFromQuery( self, query ):\n+ pass\n+ \n+ ## Give a list with all the distinct identifiers corresponding to the query\n+ #\n+ # @param query string name of the query \n+ # @return lId a list of integer\n+ #\n+ # @warning old name was getPathList_from_query\n+ #\n+ def getIdListFromQuery( self, query ):\n+ pass\n+ \n+ ## Give a list with all the distinct identifiers corresponding to the subject\n+ #\n+ # @param subject string name of the subject \n+ # @return lId a list of integer\n+ #\n+ # @warning old name was getPathList_from_subject\n+ #\n+ def getIdListFromSubject( self, subject ):\n+ pass\n+ \n+ ## Insert a path instance\n+ #\n+ # @param obj a path instance\n+ # @param delayed boolean indicating if the insert must be delayed\n+ #\n+ # @note data are inserted such that the query is always on the direct strand\n+ #\n+ # @warning old name was insAPath\n+ #\n+ def insert(self, obj, delayed = False):\n+ pass\n+ \n+ ## Insert a list of Path instances\n+ #\n+ # @param l a list of Path instances\n+ # @param delayed boolean\n+ #\n+ # @warning old name was insPathList\n+ #\n+ def insertList(self, l, delayed = False):\n+ pass\n+ \n+ ## '..b'th_from_subject\n+ # \n+ def getCumulLengthFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a list of the length of all chains of paths for a given subject name\n+ #\n+ # @param subjectName string name of the subject\n+ # @return lChainLengths list of lengths per chain of paths\n+ # @warning doesn\'t take into account the overlaps !!\n+ # @warning old name was getListChainLength_from_subject\n+ #\n+ def getChainLengthListFromSubject( self, subjectName ):\n+ pass\n+\n+ ## Give a list of identity of all chains of paths for a given subject name\n+ #\n+ # @param subjectName string name of the subject\n+ # @return lChainIdentities list of identities per chain of paths\n+ # @warning doesn\'t take into account the overlaps !!\n+ # @warning old name was getListChainIdentity_from_subject\n+ # \n+ def getChainIdentityListFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a list of Path lists sorted by weighted identity.\n+ #\n+ # @param qry query name\n+ # @return lChains list of chains\n+ #\n+ def getListOfChainsSortedByAscIdentityFromQuery( self, qry ):\n+ pass\n+ \n+ ## Give a list of the length of all paths for a given subject name\n+ #\n+ # @param subjectName string name of the subject\n+ # @return lPathLengths list of lengths per path\n+ # @warning doesn\'t take into account the overlaps !!\n+ # @warning old name was getListPathLength_from_subject\n+ #\n+ def getPathLengthListFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a a list with all distinct identifiers for a given subject sorted in decreasing order according to the length of the chains\n+ # \n+ # @return lPathNums a list of paths Id\n+ #\n+ # @warning old name was getPathNumListSortedByDecreasingChainLengthFromSubject\n+ #\n+ def getIdListSortedByDecreasingChainLengthFromSubject( self, subjectName ):\n+ pass\n+ \n+ ## Give a list of Set instance list from the path contained on a query name\n+ #\n+ # @param query string query name\n+ # @return lSet list of set instance \n+ #\n+ # @warning old name was getSetList_from_contig\n+ #\n+ def getSetListFromQuery(self, query):\n+ pass\n+ \n+ ## Delete path corresponding to a given identifier number\n+ #\n+ # @param id integer identifier number\n+ #\n+ # @warning old name was delPath_from_num\n+ #\n+ def deleteFromId(self,id):\n+ pass\n+ \n+ ## Delete path corresponding to a given list of identifier number\n+ #\n+ # @param lId list of identifier number\n+ #\n+ # @warning old name was delPath_from_numlist\n+ #\n+ def deleteFromIdList(self,lId):\n+ pass\n+\n+ ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2\n+ #\n+ # @param id1 integer path number\n+ # @param id2 integer path number\n+ # @return newId integer id used to join\n+ #\n+ # @warning old name was joinPath\n+ #\n+ def joinTwoPaths(self,id1,id2):\n+ pass\n+ \n+ ## Get a new id number\n+ #\n+ # @return newId integer new id\n+ #\n+ def getNewId(self):\n+ pass\n+ \n+ ## Test if table is empty\n+ # \n+ def isEmpty( self ):\n+ pass\n+ \n+ ## Create a \'pathRange\' table from a \'path\' table. \n+ # The output table summarizes the information per identifier. \n+ # The min and max value are taken. \n+ # The identity is averaged over the fragments. \n+ # It may overwrite an existing table.\n+ #\n+ # @param outTable string name of the output table\n+ # @return outTable string Table which summarizes the information per identifier\n+ #\n+ def path2PathRange( self, outTable="" ):\n+ pass\n+ \n+ ## Return the number of times a given instance is present in the table\n+ # The identifier is not considered,\n+ # only coordinates, score, E-value and identity.\n+ #\n+ # @return nbOcc integer\n+ #\n+ def getNbOccurrences( self, iPath ):\n+ pass\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/ITableSeqAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableSeqAdaptator.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,63 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Interface for TableSeqAdaptator
+#
+class ITableSeqAdaptator(object):
+
+    ## Retrieve all the distinct accession names in a list.
+    #
+    # @return lAccessions list of accessions
+    #
+    # @warning old name was getListAccession
+    #
+    def getAccessionsList( self ):
+        pass
+
+    ## Save sequences in a fasta file from a list of accession names.
+    #
+    # @param lAccessions list of accessions
+    # @param outFileName string Fasta file
+    #
+    # @warning old name saveListAccessionInFastaFile
+    #
+    def saveAccessionsListInFastaFile( self, lAccessions, outFileName ):
+        pass
+
+    ## insert bioseq instance
+    #
+    # @param seq bioseq
+    # @param delayed boolean must the insert be delayed
+    #
+    # @warning old name was insASeq
+    #
+    def insert(self, seq, delayed = False):
+        pass
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/ITableSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/ITableSetAdaptator.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,146 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+## Interface for TableSetAdaptator
+#
+class ITableSetAdaptator (object):
+
+    ## Insert a set instance
+    #
+    # @param obj a set instance
+    # @param delayed boolean indicating if the insert must be delayed
+    #
+    # @warning old name was insASet
+    #
+    def insert(self, obj, delayed = False):
+        pass
+
+    ## Insert a list of Set instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    # @warning old name was insSetList
+    #
+    def insertList(self, l, delayed = False):
+        pass
+
+    ## Give a list of identifier numbers contained in the table
+    #
+    # @return l integer list
+    #
+    # @warning old name was getSet_num
+    #
+    def getIdList(self):
+        pass
+
+    ## Give a list of Set instances having a given seq name
+    #
+    # @param seqName string seq name
+    # @return lSets list of instances
+    #
+    # @warning old name was get_SetList_from_contig
+    #
+    def getSetListFromSeqName(self, seqName):
+        pass
+
+    ## Give a set instances list with a given identifier number
+    #
+    # @param id integer identifier number
+    # @return lSet list of set instances
+    #
+    # @warning old name was getSetList_from_num
+    #
+    def getSetListFromId(self, id):
+        pass
+
+    ## Give a set instances list with a list of identifier numbers
+    #
+    # @param lId integers list identifiers list numbers
+    # @return lSet list of set instances
+    #
+    # @warning old name was getSetList_from_numlist
+    #
+    def getSetListFromIdList(self,lId):
+        pass
+
+    ## Return a list of Set instances overlapping a given sequence
+    #
+    # @param seqName string sequence name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    # @warning old name was getSetList_from_qcoord
+    #
+    def getSetListOverlappingCoord( self, seqName, start, end ):
+        pass
+
+    ## Delete set corresponding to a given identifier number
+    #
+    # @param id integer identifier number
+    #
+    # @warning old name was delSet_from_num
+    #
+    def deleteFromId(self, id):
+        pass
+
+    ## Delete set corresponding to a given list of identifier number
+    #
+    # @param lId integers list list of identifier number
+    #
+    # @warning old name was delSet_from_listnum
+    #
+    def deleteFromIdList(self, lId):
+        pass
+
+    ## Join two set by changing id number of id1 and id2 set to the least of id1 and id2
+    #
+    # @param id1 integer id path number
+    # @param id2 integer id path number
+    #
+    # @warning old name was joinSet
+    #
+    def joinTwoSets(self, id1, id2):
+        pass
+
+    ## Get a new id number
+    #
+    # @return new_id integer max_id + 1
+    #
+    def getNewId(self):
+        pass
+
+    ## Give the data contained in the table as a list of Sets instances
+    #
+    # @return lSets list of set instances
+    #
+    def getListOfAllSets( self ):
+        pass
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/Job.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/Job.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,74 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+## Job informations to launch a command on a cluster.
+#
+class Job(object):
+
+    ## Constructor
+    #
+    #   @param jobid the job identifier
+    #   @param jobname the job name
+    #   @param groupid the group identifier to record related job series
+    #   @param queue queue name of the job manager
+    #   @param command command launched
+    #   @param node cluster node name where the execution takes place
+    #   @param launcherFile file name launched as job
+    #   @param lResources resources (memory, time...) but need to conform to SGE/Torque syntax !
+    #
+    def __init__(self, jobid=0, jobname="", groupid="", queue="", command="", launcherFile="",\
+                  node="", lResources=["mem_free=1G"], parallelEnvironment="" ):
+        if str(jobid).isdigit():
+            self.jobid = int(jobid)
+            self.jobname = jobname
+        else:
+            self.jobname = jobid
+            self.jobid = 0
+        self.jobid = jobid
+        self.groupid = groupid
+        self.setQueue(queue)
+        self.command = command
+        self.launcher = launcherFile
+        self.node = node
+        self.lResources = lResources
+        self.parallelEnvironment = parallelEnvironment
+
+    def setQueue(self, queue):
+        self.queue = ""
+        if queue != "none":
+            self.queue = queue
+
+    def __eq__(self, o):
+        if self.jobid == o.jobid and self.jobname == o.jobname\
+         and self.groupid == o.groupid and self.queue == o.queue and self.command == o.command \
+         and self.launcher == o.launcher and self.node == o.node and self.lResources == o.lResources \
+         and self.parallelEnvironment == o.parallelEnvironment:
+            return True
+        return False

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/JobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/JobAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,271 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import time\n+import sys\n+import tempfile\n+import subprocess\n+from commons.core.sql.Job import Job\n+\n+## Methods for Job persistence \n+#\n+class JobAdaptator(object):\n+ \n+ def __init__(self, lJob = [], table = "" ):\n+ self._lJobID = lJob\n+ self._table = table\n+ self._acronym = ""\n+ ## Record a job\n+ #\n+ # @param job Job instance with the job informations\n+ #\n+ def recordJob(self, job):\n+ self._lJobID.append(job)\n+ \n+ ## Remove a job from the job table\n+ #\n+ # @param job: job instance to remove\n+ #\n+ def removeJob(self, job):\n+ pass \n+ \n+ ## Set the jobid of a job with the id of SGE\n+ #\n+ # @param job job instance\n+ # @param jobid integer\n+ #\n+ def updateJobIdInDB(self, job, jobid):\n+ pass\n+ \n+ ## Get a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ #\n+ def getJobStatus(self, job):\n+ pass\n+ \n+ \n+ ## Change a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ # @param status: the new status (waiting,finished,error)\n+ #\n+ def changeJobStatus(self, job, status):\n+ pass\n+ \n+ ## Get the number of jobs belonging to the desired groupid with the desired status.\n+ #\n+ # @param groupid string a group identifier to record related job series \n+ # @param status string job status (waiting, running, finished, error)\n+ # @return int\n+ #\n+ def getCountStatus(self, groupid, status):\n+ pass\n+ \n+ ## Clean all job from a job group\n+ #\n+ # @param groupid: a group identifier to record related job series\n+ #\n+ def cleanJobGroup(self, groupid):\n+ pass \n+ \n+ ## Check if there is unfinished job from a job group.\n+ #\n+ # @param groupid string a group identifier to record related job series \n+ # \n+ def hasUnfinishedJob(self, groupid):\n+ pass\n+\n+ def _getJobIDListFromQstat(self):\n+ lJobIDFromQstat = []\n+ tmp = tempfile.NamedTemporaryFile(delete=False)\n+ cmd ="qstat | grep %s" % self._acronym\n+ process = subprocess.Popen(cmd, shell=True,stdout=tmp)\n+ process.communicate()\n+ tmp.close()\n+ if process.returncode == 0:\n+ fileName = tmp.name\n+ jo'..b'ault = 0)\n+ # \n+ def submitJob(self, job, verbose=0, maxNbWaitingJobs=10000, checkInterval=30):\n+ cmd = self._getQsubCommand(job)\n+ tmp = tempfile.NamedTemporaryFile(delete=False)\n+ process = subprocess.Popen(cmd, shell=True,stdout=tmp)\n+ process.communicate()\n+ tmp.close()\n+ if process.returncode == 0:\n+ fileName = tmp.name\n+ jobidFileHandler = open(fileName, "r")\n+ jobid = self._getJobidFromJobManager(jobidFileHandler)\n+ if verbose > 0:\n+ print "job \'%i %s\' submitted" % (jobid, job.jobname)\n+ sys.stdout.flush()\n+ job.jobid = jobid\n+ #newJob= Job(job.jobid, job.jobname, job.groupid, job.queue, job.command, job.launcher, job.node, job.lResources, job.parallelEnvironment)\n+ self._acronym = job.jobname.split("_")[0][:10]\n+ self.recordJob(job.jobid)\n+ jobidFileHandler.close()\n+ os.remove(fileName)\n+ return process.returncode\n+\n+\n+ ## Get the list of nodes where jobs of one group were executed\n+ #\n+ # @param groupid string a group identifier of job series \n+ # @return lNodes list of nodes names without redundancy\n+ #\n+ def getNodesListByGroupId(self, groupId):\n+ pass\n+ \n+ def checkJobTable(self):\n+ pass\n+ \n+ def close(self):\n+ pass\n+ \n+ def _getJobidAndNbJob(self, jobid) :\n+ tab = jobid.split(".")\n+ jobid = tab[0]\n+ tab = tab[1].split(":")\n+ nbJob = tab[0]\n+ return jobid, nbJob\n+ \n+class JobAdaptatorSGE(JobAdaptator):\n+\n+ ## Check if a job is still handled by SGE\n+ #\n+ # @param jobid string job identifier\n+ # @param jobname string job name\n+ # \n+ def isJobStillHandledBySge(self, jobid, jobname):\n+ isJobInQstat = False\n+ tmp = tempfile.NamedTemporaryFile(delete=False)\n+ cmd = "qstat"\n+ process = subprocess.Popen(cmd, shell=True,stdout=tmp)\n+ process.communicate()\n+ tmp.close()\n+ qstatFile = tmp.name\n+ if process.returncode != 0:\n+ msg = "ERROR while launching \'qstat\'"\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ qstatFileHandler = open(qstatFile, "r")\n+ lLines = qstatFileHandler.readlines()\n+ for line in lLines:\n+ tokens = line.split()\n+ if len(tokens) > 3 and tokens[0] == str(jobid) and tokens[2] == jobname[0:len(tokens[2])]:\n+ isJobInQstat = True\n+ break\n+ qstatFileHandler.close()\n+ os.remove(qstatFile)\n+ return isJobInQstat\n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ cmd += " -cwd"\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources)\n+ cmd += "\\""\n+ if job.parallelEnvironment != "":\n+ cmd += " -pe " + job.parallelEnvironment\n+ return cmd\n+ \n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(" ")[2])\n+ \n+\n+class JobAdaptatorTorque(JobAdaptator): \n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -d %s" % os.getcwd()\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources).replace("mem_free","mem")\n+ cmd += "\\""\n+ return cmd\n+\n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(".")[0])\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/OldRepetDB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/OldRepetDB.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,27 @@
+import pyRepet.sql.RepetDBMySQL
+
+
+class RepetDB ( pyRepet.sql.RepetDBMySQL.RepetDB ):
+
+    #TODO: try
+    def execute( self, qry, params=None ):
+        if params == None:
+            self.cursor.execute( qry )
+        else:
+            self.cursor.execute( qry, params )
+
+
+    ## Record a new table in the 'info_table' table
+    #
+    # @param tablename table name
+    # @param info information on the origin of the table
+    #
+    def updateInfoTable( self, tablename, info ):
+        self.execute( """SHOW TABLES""" )
+        results = self.fetchall()
+        if ("info_tables",) not in results:
+            sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+            self.execute( sqlCmd )
+        qryParams = "INSERT INTO info_tables VALUES (%s, %s)"
+        params = ( tablename, info )
+        self.execute( qryParams,params )

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/RepetJob.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/RepetJob.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,252 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import time\n+import sys\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+\n+#TODO: to remove... => replace all RepetJob() by TableJobAdaptator()...\n+## Methods for Job persistence \n+#\n+class RepetJob( DbMySql ):\n+ \n+ \n+ ## Record a job\n+ #\n+ # @param job Job instance with the job informations\n+ #\n+ def recordJob( self, job ):\n+ self.removeJob( job )\n+ sqlCmd = "INSERT INTO %s" % ( job.tablename )\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % ( job.jobid )\n+ sqlCmd += " \\"%s\\"," % ( job.jobname )\n+ sqlCmd += " \\"%s\\"," % ( job.groupid )\n+ sqlCmd += " \\"%s\\"," % ( job.command.replace("\\"","\\\'") )\n+ sqlCmd += " \\"%s\\"," % ( job.launcher )\n+ sqlCmd += " \\"%s\\"," % ( job.queue )\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % ( time.strftime( "%Y-%m-%d %H:%M:%S" ) )\n+ sqlCmd += " \\"?\\" );"\n+ self.execute( sqlCmd )\n+ \n+ \n+ ## Remove a job from the job table\n+ #\n+ # @param job: job instance to remove\n+ #\n+ def removeJob( self, job ):\n+ qry = "DELETE FROM %s" % ( job.tablename )\n+ qry += " WHERE groupid=\'%s\'" % ( job.groupid )\n+ qry += " AND jobname=\'%s\'" % ( job.jobname )\n+ qry += " AND queue=\'%s\';" % ( job.queue )\n+ self.execute( qry )\n+ \n+ \n+ ## Set the jobid of a job with the id of SGE\n+ #\n+ # @param job job instance\n+ # @param jobid integer\n+ #\n+ def setJobIdFromSge( self, job, jobid ):\n+ qry = "UPDATE %s" % ( job.tablename )\n+ qry += " SET jobid=\'%i\'" % ( int(jobid) )\n+ qry += " WHERE jobname=\'%s\'" % ( job.jobname )\n+ qry += " AND groupid=\'%s\'" % ( job.groupid )\n+ qry += " AND queue=\'%s\';" % ( job.queue )\n+ self.execute( qry )\n+ \n+ \n+ ## Get a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ #\n+ def getJobStatus( self, job ):\n+ if job.jobid != 0 and job.jobname == "":\n+ job.jobname = job.jobid\n+ job.jobid = 0\n+ qry = "SELECT status FROM %s" % ( job.tablename )\n+ qry += " WHERE groupid=\'%s\'" % ( job.groupid )\n+ qry += " AND jobname=\'%s\'" % ( job.jobname )\n+ qry += " '..b' table name to record the jobs\n+ # @param groupid string a group identifier to record related job series \n+ # \n+ def hasUnfinishedJob( self, tablename, groupid ):\n+ if not self.doesTableExist( tablename ):\n+ return False\n+ qry = "SELECT * FROM %s" % ( tablename )\n+ qry += " WHERE groupid=\'%s\'" % ( groupid )\n+ qry += " and status!=\'finished\';" \n+ self.execute( qry )\n+ res = self.fetchall()\n+ if len(res) == 0:\n+ return False\n+ return True\n+ \n+ \n+ ## Check if a job is still handled by SGE\n+ #\n+ # @param jobid string job identifier\n+ # @param jobname string job name\n+ # \n+ def isJobStillHandledBySge( self, jobid, jobname ):\n+ isJobInQstat = False\n+ qstatFile = "qstat_stdout"\n+ cmd = "qstat > %s" % ( qstatFile )\n+ returnStatus = os.system( cmd )\n+ if returnStatus != 0:\n+ msg = "ERROR while launching \'qstat\'"\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ qstatFileHandler = open( qstatFile, "r" )\n+ lLines = qstatFileHandler.readlines()\n+ for line in lLines:\n+ tokens = line.split()\n+ if len(tokens) > 3 and tokens[0] == str(jobid) and tokens[2] == jobname[0:len(tokens[2])]:\n+ isJobInQstat = True\n+ break\n+ qstatFileHandler.close()\n+ os.remove( qstatFile )\n+ return isJobInQstat\n+ \n+ \n+ ## Wait job finished status from a job group.\n+ # Job are re-launched if error (max. 3 times)\n+ #\n+ # @param tableName string table name to record the jobs\n+ # @param groupid string a group identifier to record related job series\n+ # @param checkInterval integer time laps in seconds between two checks (default = 5)\n+ # @param maxRelaunch integer max nb of times a job in error is relaunch before exiting (default = 3)\n+ # @param exitIfTooManyErrors boolean exit if a job is still in error above maxRelaunch (default = True)\n+ # @param timeOutPerJob integer max nb of seconds after which one tests if a job is still in SGE or not (default = 60*60=1h)\n+ #\n+ def waitJobGroup(self, tableName, groupid, checkInterval=5, maxRelaunch=3, exitIfTooManyErrors=True, timeOutPerJob=60*60):\n+ iTJA = TableJobAdaptatorFactory.createInstance(self, tableName)\n+ iTJA.waitJobGroup(groupid, checkInterval, maxRelaunch, exitIfTooManyErrors, timeOutPerJob)\n+ \n+ ## Submit a job to a queue and record it in job table.\n+ #\n+ # @param job a job instance\n+ # @param maxNbWaitingJobs integer max nb of waiting jobs before submitting a new one (default = 10000)\n+ # @param checkInterval integer time laps in seconds between two checks (default = 30)\n+ # @param verbose integer (default = 0)\n+ # \n+ def submitJob( self, job, verbose=0, maxNbWaitingJobs=10000, checkInterval=30 ):\n+ iTJA = TableJobAdaptatorFactory.createInstance(self, job.tablename)\n+ return iTJA.submitJob(job, verbose, maxNbWaitingJobs, checkInterval)\n+ \n+ \n+ ## Get the list of nodes where jobs of one group were executed\n+ #\n+ # @param tablename string table name where jobs are recored \n+ # @param groupid string a group identifier of job series \n+ # @return lNodes list of nodes names\n+ #\n+ def getNodesListByGroupId( self, tableName, groupId ):\n+ qry = "SELECT node FROM %s" % tableName\n+ qry += " WHERE groupid=\'%s\'" % groupId\n+ self.execute( qry )\n+ res = self.fetchall()\n+ lNodes = []\n+ for resTuple in res:\n+ lNodes.append(resTuple[0])\n+ return lNodes\n+ \n+ def getDbName(self):\n+ return "DbMySql"\n+ \n+ def _getJobidAndNbJob(self, jobid) :\n+ tab = []\n+ tab = jobid.split(".")\n+ jobid = tab[0]\n+ tab = tab[1].split(":")\n+ nbJob = tab[0]\n+ return jobid, nbJob\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableAdaptator.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,128 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+## Abstract class, Ancestor of Table*Adaptator
+#
+class TableAdaptator( object ):
+
+    ## Constructor
+    #
+    # @param iDb DbMySql instance
+    # @param table str table name
+    #
+    def __init__( self, iDb = None, table = "" ):
+        self._iDb = iDb
+        self._table = table
+
+    ## Set connector to database
+    #
+    # @param iDb database instance
+    #
+    def setDbConnector( self, iDb ):
+        self._iDb = iDb
+
+    ## Set table
+    #
+    # @param table string table name
+    #
+    def setTable( self, table ):
+        self._table = table
+
+    ## Return the table name
+    #
+    def getTable( self ):
+        return self._table
+
+    ## Return the number of rows in the table
+    #
+    def getSize( self ):
+        return self._iDb.getSize( self._table )
+
+    ## Test if table is empty
+    #
+    def isEmpty( self ):
+        return self._iDb.isEmpty( self._table )
+
+    ## Insert an instance of Map or Set or Match or Path or Seq instances
+    #
+    # @param obj a Map or Set or Match or Path or Seq instance
+    # @param delayed boolean
+    #
+    def insert(self, obj, delayed = False):
+        if obj.isEmpty():
+            return
+        self._escapeAntislash(obj)
+        sql_cmd = self._genSqlCmdForInsert(obj, delayed)
+        self._iDb.execute(sql_cmd)
+
+    ## Insert a list of Map or Set or Match or Path instances
+    #
+    # @param l a list of object instances
+    # @param delayed boolean
+    #
+    def insertList(self, l, delayed = False):
+        for i in l:
+            self.insert(i, delayed)
+
+    ## Give the data contained in the table as a list of coord object instances
+    #
+    # @return lObject list of coord object instances
+    #
+    def getListOfAllCoordObject( self ):
+        sqlCmd = "SELECT * FROM %s" % ( self._table )
+        lObjs = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lObjs
+
+    ## Generate sql command for GetListOverlappingCoord method
+    #
+    # @param obj Map, Set or Match instance
+    # @param delayed boolean
+    # @return sqlCmd string generated sql command
+    #
+    def _genSqlCmdForInsert(self, obj, delayed):
+        sqlCmd = 'INSERT '
+        if delayed :
+            sqlCmd += ' DELAYED '
+        type2Insert, attr2Insert = self._getTypeAndAttr2Insert(obj)
+        sqlCmd +=  'INTO %s VALUES (' % (self._table)
+        sqlCmd +=  ",".join(type2Insert)
+        sqlCmd += ")"
+        sqlCmd = sqlCmd % attr2Insert
+        return sqlCmd
+
+    def _getTypeAndAttr2Insert(self, obj):
+        pass
+
+    def _getInstanceToAdapt(self):
+        pass
+
+    def _escapeAntislash(self, obj):
+        pass

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableBinPathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableBinPathAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,257 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.coord.Range import getIdx\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.PathUtils import PathUtils\n+\n+## Bin Adaptator for a path table.\n+#\n+class TableBinPathAdaptator(TablePathAdaptator):\n+\n+ \n+ ## Constructor\n+ #\n+ # @param db db instance\n+ # @param tableName string table name (default = "")\n+ #\n+ def __init__(self, db, tableName = ""):\n+ TablePathAdaptator.__init__(self, db, tableName)\n+ self._table_idx = "%s_idx" % (self._table)\n+ \n+ ## Insert a path instance\n+ #\n+ # @param path a path instance\n+ # @param delayed boolean indicating if the insert must be delayed (default = false) \n+ # \n+ def insert( self, path, delayed = False ):\n+ TablePathAdaptator.insert(self, path, delayed)\n+ self._escapeAntislash(path)\n+ idx = path.range_query.findIdx()\n+ max = path.range_query.getMax()\n+ min = path.range_query.getMin()\n+ strand = path.range_query.isOnDirectStrand()\n+ if delayed:\n+ sql_cmd = \'INSERT DELAYED INTO %s VALUES (%d,%d,"%s",%d,%d,%d)\'\\\n+ % (self._table_idx,\\\n+ path.id,\\\n+ idx,\\\n+ path.range_query.seqname,\\\n+ min,\\\n+ max,\\\n+ strand)\n+ else:\n+ sql_cmd = \'INSERT INTO %s VALUES (%d,%d,"%s",%d,%d,%d)\'\\\n+ % (self._table_idx,\\\n+ path.id,\\\n+ idx,\\\n+ path.range_query.seqname,\\\n+ min,\\\n+ max,\\\n+ strand)\n+ \n+ self._iDb.execute(sql_cmd)\n+ \n+ ## Return a path instances list included in a given region using the bin scheme\n+ #\n+ # @param contig string contig name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lOutPath a path instances list\n+ #\n+ def getPathListIncludedInQueryCoord(self, contig, start, end):\n+ min_coord = min(start, end)\n+ max_coord = max(start, end)\n+ lpath = self.getChainListOverlappingQueryCoord(contig, start, end)\n+ lOutPath = []\n+ for i in lpath:\n+ if i.range_query.getMin() > min_coord and \\\n+ i.range_query.getMax() < max_'..b' \n+ sql_cmd += ") and min<=%d and max>=%d;" % (max_coord, min_coord)\n+\n+ \n+ self._iDb.execute(sql_cmd)\n+ res = self._iDb.fetchall()\n+ lnum = []\n+ for i in res:\n+ lnum.append( int(i[0]) )\n+ lpath = self.getPathListFromIdList(lnum)\n+ return lpath\n+\n+ ## Delete path corresponding to a given identifier number\n+ #\n+ # @param num integer identifier number\n+ #\n+ def deleteFromId(self, num):\n+ TablePathAdaptator.deleteFromId(self, num)\n+ sqlCmd=\'delete from %s where path=%d;\' % (self._table_idx, num)\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Delete path corresponding to a given list of identifier number\n+ #\n+ # @param lNum list list of integer identifier number\n+ #\n+ def deleteFromIdList(self, lNum):\n+ if lNum == []:\n+ return\n+ TablePathAdaptator.deleteFromIdList(self, lNum)\n+ sqlCmd = \'delete from %s where path=%d\' % (self._table_idx, lNum[0])\n+ for i in lNum[1:]:\n+ sqlCmd += " or path=%d" % (i)\n+ sqlCmd += ";"\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Join two path by changing id number of id1 and id2 path to the least of id1 and id2\n+ #\n+ # @param id1 integer id path number\n+ # @param id2 integer id path number\n+ # @return newId integer minimum of id1 id2\n+ # @note this method modify the ID even if this one not existing in the path table \n+ # \n+ def joinTwoPaths(self, id1, id2):\n+ TablePathAdaptator.joinTwoPaths(self, id1, id2)\n+ if id1 < id2:\n+ newId = id1\n+ oldId = id2\n+ else:\n+ newId = id2\n+ oldId = id1\n+ sqlCmd = \'UPDATE %s SET path=%d WHERE path=%d\' % (self._table_idx, newId, oldId)\n+ self._iDb.execute(sqlCmd)\n+ return newId\n+ \n+ ## Get a new id number\n+ #\n+ # @return newId integer max Id in path table + 1\n+ #\n+ def getNewId(self):\n+ sqlCmd = \'select max(path) from %s;\' % (self._table_idx)\n+ self._iDb.execute(sqlCmd)\n+ maxId = self._iDb.fetchall()[0][0]\n+ if maxId == None:\n+ maxId = 0\n+ newId = int(maxId) + 1\n+ return newId\n+ \n+ ## Give a list of Set instances included in a given region\n+ #\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of Set instances\n+ #\n+ def getSetListIncludedInQueryCoord(self, query, start, end):\n+ lPath=self.getPathListIncludedInQueryCoord(query, start, end)\n+ lSet = PathUtils.getSetListFromQueries(lPath) \n+ return lSet\n+ \n+ ## Give a list of Set instances overlapping a given region\n+ #\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of Set instances\n+ #\n+ def getSetListOverlappingQueryCoord(self, query, start, end):\n+ lPath = self.getPathListOverlappingQueryCoord(query, start, end)\n+ lSet = PathUtils.getSetListFromQueries(lPath)\n+ return lSet\n+ \n+ ## Give a list of identifiers contained in the table\n+ #\n+ # @return lId integer list\n+ #\n+ def getIdList(self):\n+ sqlCmd = "SELECT DISTINCT path from %s;" % (self._table_idx)\n+ lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )\n+ return lId\n+ \n+ ## Give a list of the distinct query names present in the table\n+ #\n+ # @return lDistinctQueryNames string list\n+ #\n+ def getQueryList(self):\n+ lDistinctQueryNames = self._getDistinctTypeNamesList("query")\n+ return lDistinctQueryNames\n+ \n+ def _getDistinctTypeNamesList( self, type ):\n+ sqlCmd = "SELECT DISTINCT contig FROM %s" % ( self._table_idx )\n+ lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)\n+ return lDistinctTypeNames\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableBinSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableBinSetAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,265 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+from commons.core.sql.TableSetAdaptator import TableSetAdaptator\n+from commons.core.coord.SetUtils import SetUtils\n+\n+## Adaptator for Set tables with bin indexes\n+#\n+class TableBinSetAdaptator(TableSetAdaptator):\n+ \n+ ## constructor\n+ #\n+ # @param iDb DbMySql instance instance of DbMySql\n+ # @param tableName string table name (default = "")\n+ #\n+ def __init__(self, iDb, tableName = ""):\n+ TableSetAdaptator.__init__(self, iDb, tableName)\n+ self._table_idx = "%s_idx" % (self._table)\n+ \n+ ## Insert a set instance in a set bin table\n+ # \n+ # @param iSet set instance an instance of set object\n+ # @param delayed boolean an insert delayed or not\n+ #\n+ def insASetInSetAndBinTable(self, iSet, delayed = False):\n+ self.insert(iSet, delayed)\n+ iSet.seqname = iSet.seqname.replace("\\\\", "\\\\\\\\")\n+ iSet.name = iSet.name.replace("\\\\", "\\\\\\\\")\n+ bin = iSet.getBin()\n+ max = iSet.getMax()\n+ min = iSet.getMin()\n+ strand = iSet.isOnDirectStrand()\n+ sql_prefix = \'\'\n+ if delayed:\n+ sql_prefix = \'INSERT DELAYED INTO \'\n+ else:\n+ sql_prefix = \'INSERT INTO \'\n+ sql_cmd = sql_prefix + \'%s VALUES (%d,%f,"%s",%d,%d,%d)\'\\\n+ %(self._table_idx,\\\n+ iSet.id,\\\n+ bin,\\\n+ iSet.seqname,\\\n+ min,\\\n+ max,\\\n+ strand)\n+ self._iDb.execute(sql_cmd)\n+\n+ ## Delete set corresponding to a given identifier number in set and bin set table\n+ # @param id integer identifier number\n+ # @note old name was delSet_from_num\n+ #\n+ def deleteFromIdFromSetAndBinTable(self, id):\n+ self.deleteFromId(id)\n+ sql_cmd = \'delete from %s where path=%d\' % (self._table_idx, id)\n+ self._iDb.execute(sql_cmd)\n+\n+ ## Delete path corresponding to a given list of identifier number\n+ #\n+ # @param lId integer list list of identifier number\n+ # @note old name was delSet_from_listnum\n+ #\n+ def deleteFromListIdFromSetAndBinTable(self, lId):\n+ if lId != []:\n+ self.deleteFromIdList(lId)\n+ sql_cmd = \'delete from %s where path=%d\' % (self._table_idx, lId[0])\n+ for i in lId[1:]:\n+ sql_cmd += " or path=%d" % (i)\n+ self.'..b"has been changed : I added the two first lines\n+ #\n+ def getSetListStrictlyIncludedInQueryCoord(self, contig, start, end):\n+ min_coord = min(start,end)\n+ max_coord = max(start,end)\n+ lSet = self.getSetListFromQueryCoord(contig, start, end) \n+ lSetStrictlyIncluded = []\n+ for iSet in lSet:\n+ if iSet.getMin() > min_coord and \\\n+ iSet.getMax() < max_coord:\n+ lSetStrictlyIncluded.append(iSet)\n+ \n+ return lSetStrictlyIncluded\n+ \n+ ## Get a list of the identifier Id contained in the table bin\n+ #\n+ # @return lId list of int list of identifier\n+ # @note old name was getSet_num\n+ #\n+ def getIdList(self):\n+ sql_cmd = 'select distinct path from %s;' % (self._table_idx)\n+ self._iDb.execute(sql_cmd)\n+ res = self._iDb.fetchall()\n+ lId = []\n+ for t in res:\n+ lId.append(int(t[0]))\n+ return lId\n+ \n+ ## Get a list of the query sequence name contained in the table bin\n+ #\n+ # @return lSeqName list of string list of query sequence name\n+ # @note old name was getContig_name\n+ #\n+ def getSeqNameList(self):\n+ sql_cmd = 'select distinct contig from %s;' % (self._table_idx)\n+ self._iDb.execute(sql_cmd)\n+ res = self._iDb.fetchall()\n+ lSeqName = []\n+ for t in res:\n+ lSeqName.append(t[0])\n+ return lSeqName\n+ \n+ ## Insert a Set list with the same new identifier in the table bin and set\n+ #\n+ # @note old name was insAddSetList\n+ #\n+ def insertListInSetAndBinTable(self, lSets, delayed = False):\n+ id = self.getNewId()\n+ SetUtils.changeIdInList( lSets, id )\n+ for iSet in lSets:\n+ self.insASetInSetAndBinTable(iSet, delayed)\n+ \n+ ## Insert a set list instances In table Bin and Set and merge all overlapping sets\n+ #\n+ # @param lSets reference seq name\n+ # @note old name was insMergeSetList\n+ # \n+ def insertListInSetAndBinTableAndMergeAllSets(self, lSets):\n+ min, max = SetUtils.getListBoundaries(lSets)\n+ oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)\n+ oldQueryhash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)\n+ qhash = SetUtils.getDictOfListsWithIdAsKey(lSets)\n+ for lNewSetById in qhash.values():\n+ found = False\n+ for currentId, oldLsetById in oldQueryhash.items():\n+ if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, oldLsetById):\n+ oldLsetById.extend(lNewSetById)\n+ oldLsetById = SetUtils.mergeSetsInList(oldLsetById)\n+ self.deleteFromIdFromSetAndBinTable(currentId)\n+ found = True\n+ if not found:\n+ self.insertListInSetAndBinTable(lNewSetById)\n+ else:\n+ id = self.getNewId()\n+ SetUtils.changeIdInList(oldLsetById, id)\n+ self.insertListInSetAndBinTable(oldLsetById)\n+ \n+ ## Insert a set list instances In table Bin and Set after removing all overlaps between database and lSets\n+ #\n+ # @param lSets reference seq name\n+ # @note old name was insDiffSetList\n+ # \n+ def insertListInSetAndBinTableAndRemoveOverlaps(self, lSets):\n+ min, max = SetUtils.getListBoundaries(lSets)\n+ oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)\n+ oldQueryHash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)\n+ newQueryHash = SetUtils.getDictOfListsWithIdAsKey(lSets)\n+ for lNewSetById in newQueryHash.values():\n+ for lOldSetById in oldQueryHash.values():\n+ if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, lOldSetById):\n+ lNewSetById = SetUtils.getListOfSetWithoutOverlappingBetweenTwoListOfSet(lOldSetById, lNewSetById)\n+ self.insertListInSetAndBinTable(lNewSetById)\n"

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableJobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableJobAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,405 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import time\n+import datetime\n+import sys\n+from commons.core.sql.Job import Job \n+from commons.core.sql.TableAdaptator import TableAdaptator\n+\n+## Methods for Job persistence \n+#\n+class TableJobAdaptator(TableAdaptator):\n+ \n+ ## Record a job\n+ #\n+ # @param job Job instance with the job informations\n+ #\n+ def recordJob(self, job):\n+ self.removeJob(job)\n+ sqlCmd = "INSERT INTO %s" % self._table\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % job.jobid\n+ sqlCmd += " \\"%s\\"," % job.jobname\n+ sqlCmd += " \\"%s\\"," % job.groupid\n+ sqlCmd += " \\"%s\\"," % job.launcher\n+ sqlCmd += " \\"%s\\"," % job.queue\n+ sqlCmd += " \\"%s\\"," % job.lResources\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % time.strftime("%Y-%m-%d %H:%M:%S")\n+ sqlCmd += " \\"?\\" );"\n+ self._iDb.execute(sqlCmd)\n+ \n+ \n+ ## Remove a job from the job table\n+ #\n+ # @param job: job instance to remove\n+ #\n+ def removeJob(self, job):\n+ qry = "DELETE FROM %s" % self._table\n+ qry += " WHERE groupid=\'%s\'" % job.groupid\n+ qry += " AND jobname=\'%s\'" % job.jobname\n+ qry += " AND launcher=\'%s\';" % job.launcher\n+ self._iDb.execute(qry)\n+ \n+ \n+ ## Set the jobid of a job with the id of SGE\n+ #\n+ # @param job job instance\n+ # @param jobid integer\n+ #\n+ def updateJobIdInDB(self, job, jobid):\n+ #TODO: check if only one job will be updated\n+ qry = "UPDATE %s" % self._table\n+ qry += " SET jobid=\'%i\'" % int(jobid)\n+ qry += " WHERE jobname=\'%s\'" % job.jobname\n+ qry += " AND groupid=\'%s\'" % job.groupid\n+ qry += " AND launcher=\'%s\';" % job.launcher\n+ self._iDb.execute(qry)\n+ \n+ \n+ ## Get a job status\n+ #\n+ # @param job: a Job instance with the job informations\n+ #\n+ def getJobStatus(self, job):\n+ if job.jobid != 0 and job.jobname == "":\n+ job.jobname = job.jobid\n+ job.jobid = 0\n+ qry = "SELECT status FROM %s" % self._table\n+ qry += " WHERE groupid=\'%s\'" % job.groupid\n+ qry += " AND jobname=\'%s\'" % job.jobname\n+ qry += " AND launcher=\'%s\';" % job.launcher\n+ self._iDb.execute(qry)\n+ res = self._iDb.fetchall()\n+ if len(re'..b'outside the interval: go to next interval (time out) \n+ if delta.seconds >= (nbTimeOuts+1) * timeOutPerJob:\n+ nbTimeOuts += 1\n+ # Job with \'running\' status should be in qstat. Because status in DB is set at \'running\' by the job launched.\n+ if not self.isJobStillHandledBySge(jobid, jobname):\n+ # But if not, let time for the status update (in DB), if the job finished between the query execution and now.\n+ time.sleep( 5 )\n+ # If no update at \'finished\', exit\n+ #TODO: check status in DB\n+ if not self.isJobStillHandledBySge(jobid, jobname):\n+ msg = "ERROR: job \'%s\', supposedly still running, is not handled by SGE anymore" % ( jobid )\n+ msg += "\\nit was launched the %s (> %.2f hours ago)" % ( dateTimeOldestJob, timeOutPerJob/3600.0 )\n+ msg += "\\nthis problem can be due to:"\n+ msg += "\\n* memory shortage, in that case, decrease the size of your jobs;"\n+ msg += "\\n* timeout, in that case, decrease the size of your jobs;"\n+ msg += "\\n* node failure or database error, in that case, launch the program again or ask your system administrator."\n+ sys.stderr.write("%s\\n" % msg)\n+ sys.stderr.flush()\n+ self.cleanJobGroup(groupid)\n+ sys.exit(1)\n+ return nbTimeOuts\n+ \n+ ## Check if a job is still handled by SGE\n+ #\n+ # @param jobid string job identifier\n+ # @param jobname string job name\n+ # \n+ def isJobStillHandledBySge(self, jobid, jobname):\n+ isJobInQstat = False\n+ qstatFile = "qstat_stdout"\n+ cmd = "qstat > %s" % qstatFile\n+ returnStatus = os.system(cmd)\n+ if returnStatus != 0:\n+ msg = "ERROR while launching \'qstat\'"\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit(1)\n+ qstatFileHandler = open(qstatFile, "r")\n+ lLines = qstatFileHandler.readlines()\n+ for line in lLines:\n+ tokens = line.split()\n+ if len(tokens) > 3 and tokens[0] == str(jobid) and tokens[2] == jobname[0:len(tokens[2])]:\n+ isJobInQstat = True\n+ break\n+ qstatFileHandler.close()\n+ os.remove(qstatFile)\n+ return isJobInQstat\n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ cmd += " -cwd"\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources)\n+ cmd += "\\""\n+ if job.parallelEnvironment != "":\n+ cmd += " -pe " + job.parallelEnvironment\n+ cmd += " > jobid.stdout"\n+ return cmd\n+ \n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(" ")[2])\n+ \n+\n+class TableJobAdaptatorTorque(TableJobAdaptator): \n+ \n+ def _checkIfJobsTableAndJobsManagerInfoAreConsistent(self, nbTimeOuts, timeOutPerJob, groupid):\n+ return nbTimeOuts\n+ \n+ def _getQsubCommand(self, job): \n+ cmd = "echo \'%s\' | " % job.launcher\n+ prg = "qsub"\n+ cmd += prg\n+ cmd += " -V"\n+ cmd += " -d %s" % os.getcwd()\n+ cmd += " -N %s" % job.jobname\n+ if job.queue != "":\n+ cmd += " -q %s" % job.queue\n+ if job.lResources != []:\n+ cmd += " -l \\""\n+ cmd += " ".join(job.lResources).replace("mem_free","mem")\n+ cmd += "\\""\n+ cmd += " > jobid.stdout"\n+ return cmd\n+\n+ def _getJobidFromJobManager(self, jobidFileHandler):\n+ return int(jobidFileHandler.readline().split(".")[0])\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableJobAdaptatorFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableJobAdaptatorFactory.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,66 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import os
+import sys
+from commons.core.sql.TableJobAdaptator import TableJobAdaptatorSGE
+from commons.core.sql.TableJobAdaptator import TableJobAdaptatorTorque
+from commons.core.sql.JobAdaptator import JobAdaptatorSGE
+from commons.core.sql.JobAdaptator import JobAdaptatorTorque
+
+class TableJobAdaptatorFactory(object):
+
+    def createInstance(iDb, jobTableName):
+        if os.environ["REPET_JOB_MANAGER"].lower() == "sge":
+            iTJA = TableJobAdaptatorSGE(iDb, jobTableName)
+        elif os.environ["REPET_JOB_MANAGER"].lower() == "torque":
+            iTJA = TableJobAdaptatorTorque(iDb, jobTableName)
+        else:
+            print "ERROR: unknown jobs manager : $REPET_JOB_MANAGER = %s." % os.environ["REPET_JOB_MANAGER"]
+            sys.exit(1)
+
+        return iTJA
+
+    createInstance = staticmethod(createInstance)
+
+    def createJobInstance():
+        if os.environ["REPET_JOB_MANAGER"].lower() == "sge":
+            iJA = JobAdaptatorSGE()
+        elif os.environ["REPET_JOB_MANAGER"].lower() == "torque":
+            iJA = JobAdaptatorTorque()
+        else:
+            print "ERROR: unknown jobs manager : $REPET_JOB_MANAGER = %s." % os.environ["REPET_JOB_MANAGER"]
+            sys.exit(1)
+
+        return iJA
+
+
+    createJobInstance = staticmethod(createJobInstance)
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableMapAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableMapAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,193 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import sys
+from commons.core.sql.TableAdaptator import TableAdaptator
+from commons.core.sql.ITableMapAdaptator import ITableMapAdaptator
+from commons.core.coord.Map import Map
+from commons.core.coord.MapUtils import MapUtils
+
+
+## Adaptator for Map table
+#
+class TableMapAdaptator( TableAdaptator, ITableMapAdaptator ):
+
+    ## Give a list of Map instances having a given seq name
+    #
+    # @param seqName string seq name
+    # @return lMap list of instances
+    #
+    def getListFromSeqName( self, seqName ):
+        sqlCmd = "SELECT * FROM %s" % (self._table)
+        colum2Get, type2Get, attr2Get = self._getTypeColumAttr2Get(seqName)
+        sqlCmd += " WHERE " + colum2Get
+        sqlCmd += " = "
+        sqlCmd = sqlCmd + type2Get
+        sqlCmd = sqlCmd % "'" + attr2Get + "'"
+        return self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+
+    ## Give a list of Map instances overlapping a given region
+    #
+    # @param query string query name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return list map instances
+    #
+    def getListOverlappingCoord(self, query, start, end):
+        sqlCmd = 'select * from %s where chr="%s" and ((start between least(%d,%d) and greatest(%d,%d) or end between least(%d,%d) and greatest(%d,%d)) or (least(start,end)<=least(%d,%d) and greatest(start,end)>=greatest(%d,%d)))  ;' % (self._table, query, start, end, start, end, start, end, start, end, start, end, start, end)
+        return self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+
+    ## Give a list of Map instances having a given sequence name
+    #
+    # @param seqName string sequence name
+    # @return lMap list of instances
+    #
+    def getMapListFromSeqName(self, seqName):
+        lMap = self.getListFromSeqName( seqName )
+        return lMap
+
+#TODO: Check getListFromSeqName method: uses name instead of seqname
+#    ## Give a list of Map instances having a given sequence name from list
+#    #
+#    # @param lSeqName string sequence name list
+#    # @return lMap list of instances
+#    #
+#    def getMapListFromSeqNameList(self, lSeqName):
+#        lMap = []
+#        [lMap.extend(self.getListFromSeqName(seqName)) for seqName in lSeqName]
+#        return lMap
+
+    ## Give a list of Map instances having a given chromosome
+    #
+    # @param chr string chromosome
+    # @return lMap list of instances
+    #
+    def getMapListFromChr(self, chr):
+        sqlCmd = "SELECT * FROM %s WHERE chr='%s'" % (self._table, chr)
+        lMap = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMap
+
+    ## Give a list of the distinct seqName/chr present in the table
+    #
+    # @return lDistinctContigNames string list
+    #
+    def getSeqNameList(self):
+        sqlCmd = "SELECT DISTINCT chr FROM %s" % ( self._table )
+        lDistinctContigNames = self._iDb.getStringListWithSQLCmd(sqlCmd)
+        return lDistinctContigNames
+
+    ## Return a list of Set instances from a given sequence name
+    #
+    # @param seqName string sequence name
+    # @return lSets list of Set instances
+    #
+    def getSetListFromSeqName( self, seqName ):
+        lMaps = self.getListFromSeqName( seqName )
+        lSets = MapUtils.mapList2SetList( lMaps )
+        return lSets
+
+    ## Give a map instances list overlapping a given region
+    #
+    # @param seqName string seq name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lMap list of map instances
+    #
+    def getMapListOverlappingCoord(self, seqName, start, end):
+        lMap = self.getListOverlappingCoord(seqName, start, end)
+        return lMap
+
+    ## Return a list of Set instances overlapping a given sequence
+    #
+    # @param seqName string sequence name
+    # @param start integer start coordinate
+    # @param end integer end coordinate
+    # @return lSet list of Set instances
+    #
+    def getSetListOverlappingCoord( self, seqName, start, end ):
+        lMaps = self.getListOverlappingCoord( seqName, start, end )
+        lSets = MapUtils.mapList2SetList( lMaps )
+        return lSets
+
+    ## Give a dictionary which keys are Map names and values the corresponding Map instances
+    #
+    # @return dName2Maps dict which keys are Map names and values the corresponding Map instances
+    #
+    def getDictPerName( self ):
+        dName2Maps = {}
+        lMaps = self.getListOfAllMaps()
+        for iMap in lMaps:
+            if dName2Maps.has_key( iMap.name ):
+                if iMap == dName2Maps[ iMap.name ]:
+                    continue
+                else:
+                    msg = "ERROR: in table '%s' two different Map instances have the same name '%s'" % ( self._table, iMap.name )
+                    sys.stderr.write( "%s\n" % ( msg ) )
+                    sys.exit(1)
+            dName2Maps[ iMap.name ] = iMap
+        return dName2Maps
+
+    ## Return a list of Map instances with all the data contained in the table
+    #
+    # @return lMaps list of Map instances
+    #
+    def getListOfAllMaps( self ):
+        sqlCmd = "SELECT * FROM %s" % ( self._table )
+        lMaps = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMaps
+
+    ## Give the end of map as integer
+    #
+    # @return end integer the end of map
+    #
+    def getEndFromSeqName(self, seqName):
+        sqlCmd = "SELECT end FROM %s WHERE chr = '%s'" % (self._table, seqName)
+        end = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        return end
+
+    def _getInstanceToAdapt(self):
+        iMap = Map()
+        return iMap
+
+    def _getTypeColumAttr2Get(self, name):
+        colum2Get = 'name'
+        type2Get = '%s'
+        attr2Get = name
+        return colum2Get, type2Get, attr2Get
+
+    def _getTypeAndAttr2Insert(self, map):
+        type2Insert = ("'%s'","'%s'","'%d'","'%d'")
+        attr2Insert = (map.name, map.seqname, map.start, map.end)
+        return type2Insert, attr2Insert
+
+    def _escapeAntislash(self, obj):
+        obj.name = obj.name.replace("\\", "\\\\")
+        obj.seqname = obj.seqname.replace("\\", "\\\\")

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableMatchAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableMatchAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,100 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from commons.core.sql.TableAdaptator import TableAdaptator
+from commons.core.sql.ITableMatchAdaptator import ITableMatchAdaptator
+from commons.core.coord.Match import Match
+
+## Adaptator for Match table
+#
+class TableMatchAdaptator( TableAdaptator, ITableMatchAdaptator ):
+
+    ## Give a list of Match instances given a query name
+    #
+    # @param query string sequence name
+    # @return lMatches list of Match instances
+    #
+    def getMatchListFromQuery( self, query ):
+        sqlCmd = "SELECT * FROM %s WHERE query_name='%s';" % ( self._table, query )
+        return self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+
+    ## Give a list of Match instances having the same identifier
+    #
+    # @param id integer identifier number
+    # @return lMatch a list of Match instances
+    #
+    def getMatchListFromId( self, id ):
+        sqlCmd = "SELECT * FROM %s WHERE path='%d';" % ( self._table, id )
+        lMatch = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMatch
+
+    ## Give a list of Match instances according to the given list of identifier numbers
+    #
+    # @param lId integer list
+    # @return lMatch a list of Match instances
+    #
+    def getMatchListFromIdList( self, lId ):
+        lMatch=[]
+        if lId == []:
+            return lMatch
+        sqlCmd = "select * from %s where path=%d" % (self._table, lId[0])
+        for i in lId[1:]:
+            sqlCmd += " or path=%d" % (i)
+        sqlCmd += ";"
+        lMatch = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMatch
+
+    ## Give the data contained in the table as a list of Match instances
+    #
+    # @return lMatchs list of match instances
+    #
+    def getListOfAllMatches( self ):
+        sqlCmd = "SELECT * FROM %s" % ( self._table )
+        lMatches = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )
+        return lMatches
+
+    def _getInstanceToAdapt(self):
+        iMatch = Match()
+        return iMatch
+
+    def _getTypeAndAttr2Insert(self, match):
+        type2Insert = ("'%s'","'%d'","'%d'","'%d'","'%f'","'%f'","'%s'","'%d'","'%d'","'%d'","'%f'","'%g'","'%d'","'%f'","'%d'")
+        attr2Insert = ( match.range_query.seqname, match.range_query.start, \
+                        match.range_query.end, match.query_length, match.query_length_perc, \
+                        match.match_length_perc, match.range_subject.seqname, match.range_subject.start,\
+                        match.range_subject.end, match.subject_length, match.subject_length_perc, \
+                        match.e_value, match.score, match.identity, \
+                        match.id)
+        return type2Insert, attr2Insert
+
+    def _escapeAntislash(self, obj):
+        obj.range_query.seqname = obj.range_query.seqname.replace("\\", "\\\\")
+        obj.range_subject.seqname = obj.range_subject.seqname.replace("\\", "\\\\")

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TablePathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TablePathAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,673 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.coord.Path import Path\n+from commons.core.coord.PathUtils import PathUtils\n+from commons.core.sql.TableAdaptator import TableAdaptator\n+from commons.core.sql.ITablePathAdaptator import ITablePathAdaptator\n+\n+\n+## Adaptator for a Path table\n+#\n+class TablePathAdaptator( TableAdaptator, ITablePathAdaptator ):\n+\n+ ## Give a list of Path instances having the same identifier\n+ #\n+ # @param id integer identifier number\n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromId( self, id ):\n+ sqlCmd = "SELECT * FROM %s WHERE path=\'%d\';" % ( self._table, id )\n+ lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lPath\n+ \n+ ## Give a list of Path instances according to the given list of identifier numbers\n+ #\n+ # @param lId integer list \n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromIdList( self, lId ):\n+ lPath=[]\n+ if lId == []:\n+ return lPath\n+ sqlCmd = "select * from %s where path=%d" % (self._table, lId[0])\n+ for i in lId[1:]:\n+ sqlCmd += " or path=%d" % (i)\n+ sqlCmd += ";"\n+ lPath = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lPath\n+ \n+ ## Give a list of Path instances having the same given query name\n+ #\n+ # @param query string name of the query \n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromQuery( self, query ):\n+ lPath = self._getPathListFromTypeName("query", query)\n+ return lPath\n+ \n+ ## Give a list of Path instances having the same given subject name\n+ #\n+ # @param subject string name of the subject \n+ # @return lPath a list of Path instances\n+ #\n+ def getPathListFromSubject( self, subject ):\n+ lPath = self._getPathListFromTypeName("subject", subject)\n+ return lPath\n+ \n+ ## Give a list of the distinct subject names present in the table\n+ #\n+ # @return lDistinctSubjectNames string list\n+ #\n+ def getSubjectList(self):\n+ lDistinctSubjectNames = self._getDistinctTypeNamesList("subject")\n+ return lDistinctSubjectNames\n+ \n+ ## Give a list of the distinct query names present in the table\n+ #\n+ # @return lDistinctQueryNames string list\n+ #\n+ def ge'..b'TypeNamesList( self, type ):\n+ sqlCmd = "SELECT DISTINCT %s_name FROM %s" % ( type, self._table )\n+ lDistinctTypeNames = self._iDb.getStringListWithSQLCmd(sqlCmd)\n+ return lDistinctTypeNames\n+ \n+ def _getPathsNbFromTypeName( self, type, typeName ):\n+ sqlCmd = "SELECT COUNT(*) FROM %s WHERE %s_name=\'%s\'" % ( self._table, type, typeName )\n+ pathNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )\n+ return pathNb\n+ \n+ def _getIdListFromTypeName( self, type, typeName ):\n+ sqlCmd = "SELECT DISTINCT path FROM %s WHERE %s_name=\'%s\'" % ( self._table, type, typeName )\n+ lId = self._iDb.getIntegerListWithSQLCmd( sqlCmd )\n+ return lId\n+ \n+ def _getIdNbFromTypeName( self, type, typeName ):\n+ sqlCmd = "SELECT COUNT( DISTINCT path ) FROM %s WHERE %s_name=\'%s\'" % ( self._table, type, typeName )\n+ idNb = self._iDb.getIntegerWithSQLCmd( sqlCmd )\n+ return idNb\n+ \n+ def _getTypeAndAttr2Insert(self, path):\n+ type2Insert = ("\'%d\'", "\'%s\'", "\'%d\'", "\'%d\'", "\'%s\'", "\'%d\'", "\'%d\'", "\'%g\'", "\'%d\'", "\'%f\'")\n+ if path.range_query.isOnDirectStrand():\n+ queryStart = path.range_query.start\n+ queryEnd = path.range_query.end\n+ subjectStart = path.range_subject.start\n+ subjectEnd = path.range_subject.end\n+ else:\n+ queryStart = path.range_query.end\n+ queryEnd = path.range_query.start\n+ subjectStart = path.range_subject.end\n+ subjectEnd = path.range_subject.start\n+ attr2Insert = ( path.id,\\\n+ path.range_query.seqname,\\\n+ queryStart,\\\n+ queryEnd,\\\n+ path.range_subject.seqname,\\\n+ subjectStart,\\\n+ subjectEnd,\\\n+ path.e_value,\\\n+ path.score,\\\n+ path.identity\\\n+ )\n+ return type2Insert, attr2Insert\n+ \n+ def _getInstanceToAdapt(self):\n+ iPath = Path()\n+ return iPath\n+ \n+ def _escapeAntislash(self, obj):\n+ obj.range_query.seqname = obj.range_query.seqname.replace("\\\\", "\\\\\\\\")\n+ obj.range_subject.seqname = obj.range_subject.seqname.replace("\\\\", "\\\\\\\\")\n+ \n+ def _genSqlCmdForTmpTableAccordingToQueryName(self, queryName, tmpTable):\n+ sqlCmd = ""\n+ if queryName == "":\n+ sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s" % (tmpTable, self._table)\n+ else:\n+ sqlCmd = "CREATE TABLE %s SELECT path, query_name, query_start, query_end, subject_name, subject_start, subject_end, e_value, score, (ABS(query_end-query_start)+1)*identity AS identity FROM %s WHERE query_name=\'%s\'" % (tmpTable, self._table, queryName)\n+ return sqlCmd\n+ \n+ ## return a filtered list with only one unique occurrence of path of a given list\n+ #\n+ # @param lPath a list of Path instances\n+ # @return lUniquePath a list of Path instances\n+ #\n+ def getListOfUniqueOccPath(self, lPath):\n+ if len(lPath) < 2 :\n+ return lPath\n+ \n+ sortedListPath = sorted(lPath, key=lambda iPath: ( iPath.range_query.getSeqname(), iPath.range_query.getStart(), iPath.range_query.getEnd(), iPath.range_subject.getSeqname(), iPath.range_subject.getStart(), iPath.range_subject.getEnd()))\n+ lUniquePath = [] \n+ for i in xrange(1, len(sortedListPath)):\n+ previousPath = sortedListPath [i-1]\n+ currentPath = sortedListPath [i]\n+ if previousPath != currentPath:\n+ lUniquePath.append(previousPath)\n+ \n+ if previousPath != currentPath:\n+ lUniquePath.append(currentPath) \n+ \n+ return lUniquePath \n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableSeqAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableSeqAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,185 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import sys
+from commons.core.sql.TableAdaptator import TableAdaptator
+from commons.core.sql.ITableSeqAdaptator import ITableSeqAdaptator
+from commons.core.coord.SetUtils import SetUtils
+from commons.core.seq.Bioseq import Bioseq
+
+
+## Adaptator for a Seq table
+#
+class TableSeqAdaptator( TableAdaptator, ITableSeqAdaptator ):
+
+    ## Retrieve all the distinct accession names in a list.
+    #
+    # @return lAccessions list of accessions
+    #
+    def getAccessionsList( self ):
+        sqlCmd = "SELECT DISTINCT accession FROM %s;" % ( self._table )
+        lAccessions = self._getStringListWithSQLCmd(sqlCmd)
+        return lAccessions
+
+    ## Save sequences in a fasta file from a list of accession names.
+    #
+    # @param lAccessions list of accessions
+    # @param outFileName string Fasta file
+    #
+    def saveAccessionsListInFastaFile( self, lAccessions, outFileName ):
+        outFile = open( outFileName, "w" )
+        for ac in lAccessions:
+            bs = self.getBioseqFromHeader( ac )
+            bs.write(outFile)
+        outFile.close()
+
+    ## Get a bioseq instance given its header
+    #
+    # @param header string name of the sequence ('accession' field in the 'seq' table)
+    # @return bioseq instance
+    #
+    def getBioseqFromHeader( self, header ):
+        sqlCmd = "SELECT * FROM %s WHERE accession='%s';" % ( self._table, header )
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        return Bioseq( res[0][0], res[0][1] )
+
+    ## Retrieve the length of a sequence given its name.
+    #
+    # @param accession name of the sequence
+    # @return seqLength integer length of the sequence
+    #
+    def getSeqLengthFromAccession( self, accession ):
+        sqlCmd = 'SELECT length FROM %s WHERE accession="%s"' % ( self._table, accession )
+        seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        return seqLength
+
+    ## Retrieve the length of a sequence given its description.
+    #
+    # @param description of the sequence
+    # @return seqLength integer length of the sequence
+    #
+    def getSeqLengthFromDescription( self, description ):
+        sqlCmd = 'SELECT length FROM %s WHERE description="%s"' % ( self._table, description )
+        seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
+        return seqLength
+
+    ## Retrieve all the accessions with length in a list of tuples
+    #
+    # @return lAccessionLengthTuples list of tuples
+    #
+    def getAccessionAndLengthList(self):
+        sqlCmd = 'SELECT accession, length FROM %s' % self._table
+        self._iDb.execute(sqlCmd)
+        res = self._iDb.fetchall()
+        lAccessionLengthTuples = []
+        for i in res:
+            lAccessionLengthTuples.append(i)
+        return lAccessionLengthTuples
+
+    ## get subsequence according to given parameters
+    #
+    # @param accession
+    # @param start integer
+    # @param end integer
+    # @return bioseq.sequence string
+    #
+    def getSubSequence( self, accession, start, end ):
+        bs = Bioseq()
+        if start <= 0 or end <= 0:
+            print "ERROR with coordinates start=%i or end=%i" % ( start, end )
+            sys.exit(1)
+
+        if accession not in self.getAccessionsList():
+            print "ERROR: accession '%s' absent from table '%s'" % ( accession, self._table )
+            sys.exit(1)
+
+        lengthAccession = self.getSeqLengthFromAccession( accession )
+        if start > lengthAccession or end > lengthAccession:
+            print "ERROR: coordinates start=%i end=%i out of sequence '%s' range (%i bp)" % ( start, end, accession, lengthAccession )
+            sys.exit(1)
+
+        sqlCmd = "SELECT SUBSTRING(sequence,%i,%i) FROM %s WHERE accession='%s'" % ( min(start,end), abs(end-start)+ 1, self._table, accession )
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        bs.setSequence( res[0][0] )
+        if start > end:
+            bs.reverseComplement()
+        return bs.sequence
+
+    ## get bioseq from given set list
+    #
+    # @param lSets set list of sets
+    # @return bioseq instance
+    #
+    def getBioseqFromSetList( self, lSets ):
+        header = "%s::%i %s " % ( lSets[0].name, lSets[0].id, lSets[0].seqname )
+        sequence = ""
+        lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
+        if not lSets[0].isOnDirectStrand():
+            lSortedSets.reverse()
+        for iSet in lSortedSets:
+            header += "%i..%i," % ( iSet.getStart(), iSet.getEnd() )
+            sequence += self.getSubSequence( iSet.seqname, iSet.getStart(), iSet.getEnd() )
+        return Bioseq( header[:-1], sequence )
+
+    ## Return True if the given accession is present in the table
+    #
+    def isAccessionInTable( self, name ):
+        sqlCmd = "SELECT accession FROM %s WHERE accession='%s'" % ( self._table, name )
+        self._iDb.execute( sqlCmd )
+        res = self._iDb.fetchall()
+        return bool(res)
+
+    ## Retrieve all the distinct accession names in a fasta file.
+    #
+    # @param outFileName string Fasta file
+    #
+    def exportInFastaFile(self, outFileName ):
+        lAccessions = self.getAccessionsList()
+        self.saveAccessionsListInFastaFile( lAccessions, outFileName )
+
+    def _getStringListWithSQLCmd( self, sqlCmd ):
+        self._iDb.execute(sqlCmd)
+        res = self._iDb.fetchall()
+        lString = []
+        for i in res:
+            lString.append(i[0])
+        return lString
+
+    def _getTypeAndAttr2Insert(self, bs):
+        type2Insert =  ( "'%s'", "'%s'", "'%s'", "'%i'" )
+        attr2Insert =  (bs.header.split()[0], bs.sequence, bs.header, bs.getLength())
+        return type2Insert, attr2Insert
+
+    def _escapeAntislash(self, obj):
+        pass
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/TableSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableSetAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,215 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+from commons.core.sql.ITableSetAdaptator import ITableSetAdaptator\n+from commons.core.sql.TableAdaptator import TableAdaptator\n+from commons.core.coord.Set import Set\n+\n+\n+## Adaptator for a Set table\n+#\n+class TableSetAdaptator( TableAdaptator, ITableSetAdaptator ):\n+ \n+ ## Give a list of Set instances having a given seq name\n+ #\n+ # @param seqName string seq name\n+ # @return lSet list of instances\n+ #\n+ def getListFromSeqName( self, seqName ):\n+ sqlCmd = "SELECT * FROM %s" % (self._table)\n+ colum2Get, type2Get, attr2Get = self._getTypeColumAttr2Get(seqName)\n+ sqlCmd += " WHERE " + colum2Get\n+ sqlCmd += " = "\n+ sqlCmd = sqlCmd + type2Get\n+ sqlCmd = sqlCmd % "\'" + attr2Get + "\'"\n+ lSet = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lSet\n+ \n+ ## Give a list of set instances overlapping a given region\n+ #\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of set instances\n+ #\n+ def getListOverlappingCoord(self, query, start, end):\n+ sqlCmd = \'select * from %s where chr="%s" and ((start between least(%d,%d) and greatest(%d,%d) or end between least(%d,%d) and greatest(%d,%d)) or (least(start,end)<=least(%d,%d) and greatest(start,end)>=greatest(%d,%d))) ;\' % (self._table, query, start, end, start, end, start, end, start, end, start, end, start, end)\n+ lSet = self._iDb.getObjectListWithSQLCmd( sqlCmd, self._getInstanceToAdapt )\n+ return lSet\n+\n+ #TODO: to test !!!\n+ ## Give a list of Set instances overlapping a given region\n+ #\n+ # @note whole chains are returned, even if only a fragment overlap with the given region\n+ # @param query string query name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSets list of Path instances\n+ #\n+ def getChainListOverlappingCoord(self, query, start, end):\n+ sqlCmd = "select distinct path from %s where chr=\'%s\' and ((start between least(%d,%d) and greatest(%d,%d) or end between least(%d,%d) and greatest(%d,%d)) or (least(start,end)<=least(%d,%d) and greatest(start,end)>=greatest(%d,%d)));" % (self._table, query,start,end,start,end,start,end,start,end,start,end,start,e'..b'lCmd)\n+ return lDistinctContigNames\n+ \n+ ## Give a list of Set instances having a given seq name\n+ #\n+ # @param seqName string seq name\n+ # @return lSet list of instances\n+ #\n+ def getSetListFromSeqName( self, seqName):\n+ lSets = self.getListFromSeqName(seqName)\n+ return lSets\n+ \n+ ## Give a set instances list with a given identifier number\n+ #\n+ # @param id integer identifier number\n+ # @return lSet list of set instances\n+ #\n+ def getSetListFromId(self, id):\n+ SQLCmd = "select * from %s where path=%d;" % (self._table, id)\n+ return self._iDb.getObjectListWithSQLCmd( SQLCmd, self._getInstanceToAdapt )\n+ \n+ ## Give a set instances list with a list of identifier numbers\n+ #\n+ # @param lId integers list identifiers list numbers\n+ # @return lSet list of set instances\n+ # \n+ def getSetListFromIdList(self,lId):\n+ lSet = []\n+ if lId == []:\n+ return lSet\n+ SQLCmd = "select * from %s where path=%d" % (self._table, lId[0])\n+ for i in lId[1:]:\n+ SQLCmd += " or path=%d" % (i)\n+ SQLCmd += ";"\n+ return self._iDb.getObjectListWithSQLCmd( SQLCmd, self._getInstanceToAdapt )\n+ \n+ ## Return a list of Set instances overlapping a given sequence\n+ # \n+ # @param seqName string sequence name\n+ # @param start integer start coordinate\n+ # @param end integer end coordinate\n+ # @return lSet list of Set instances\n+ #\n+ def getSetListOverlappingCoord( self, seqName, start, end ):\n+ lSet = self.getListOverlappingCoord( seqName, start, end )\n+ return lSet\n+ \n+ ## Delete set corresponding to a given identifier number\n+ #\n+ # @param id integer identifier number\n+ # \n+ def deleteFromId(self, id):\n+ sqlCmd = "delete from %s where path=%d;" % (self._table, id)\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Delete set corresponding to a given list of identifier number\n+ #\n+ # @param lId integers list list of identifier number\n+ # \n+ def deleteFromIdList(self, lId):\n+ if lId == []:\n+ return\n+ sqlCmd = "delete from %s where path=%d" % ( self._table, lId[0] )\n+ for i in lId[1:]:\n+ sqlCmd += " or path=%d"%(i)\n+ sqlCmd += ";"\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Join two set by changing id number of id1 and id2 set to the least of id1 and id2\n+ #\n+ # @param id1 integer id path number\n+ # @param id2 integer id path number\n+ # \n+ def joinTwoSets(self, id1, id2):\n+ if id1 < id2:\n+ newId = id1\n+ oldId = id2\n+ else:\n+ newId = id2\n+ oldId = id1\n+ sqlCmd = "UPDATE %s SET path=%d WHERE path=%d" % (self._table, newId, oldId)\n+ self._iDb.execute(sqlCmd)\n+ \n+ ## Get a new id number\n+ #\n+ # @return new_id integer max_id + 1 \n+ #\n+ def getNewId(self):\n+ sqlCmd = "select max(path) from %s;" % (self._table)\n+ maxId = self._iDb.getIntegerWithSQLCmd(sqlCmd)\n+ newId = int(maxId) + 1\n+ return newId\n+ \n+ ## Give the data contained in the table as a list of Sets instances\n+ #\n+ # @return lSets list of set instances\n+ #\n+ def getListOfAllSets( self ):\n+ return self.getListOfAllCoordObject()\n+ \n+ def _getInstanceToAdapt(self):\n+ iSet = Set()\n+ return iSet\n+ \n+ def _getTypeColumAttr2Get(self, contig):\n+ colum2Get = \'chr\'\n+ type2Get = \'%s\'\n+ attr2Get = contig\n+ return colum2Get, type2Get, attr2Get\n+ \n+ def _getTypeAndAttr2Insert(self, set):\n+ type2Insert = ("\'%d\'","\'%s\'","\'%s\'","\'%d\'","\'%d\'")\n+ attr2Insert = (set.id, set.name, set.seqname, set.start, set.end)\n+ return type2Insert, attr2Insert\n+\n+ def _escapeAntislash(self, obj):\n+ obj.name = obj.name.replace("\\\\", "\\\\\\\\")\n+ obj.seqname = obj.seqname.replace("\\\\", "\\\\\\\\")\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/TestSuite_sql.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/TestSuite_sql.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_DbMySql
+import Test_TableBinPathAdaptator
+import Test_TableMapAdaptator
+import Test_TableMatchAdaptator
+import Test_TablePathAdaptator
+import Test_TableSeqAdaptator
+import Test_TableSetAdaptator
+import Test_F_RepetJob
+import Test_RepetJob
+import Test_TableBinSetAdaptator
+
+def main():
+
+        TestSuite_sql = unittest.TestSuite()
+
+        TestSuite_sql.addTest( unittest.makeSuite( Test_DbMySql.Test_DbMySql, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableBinPathAdaptator.Test_TableBinPathAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableMapAdaptator.Test_TableMapAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableMatchAdaptator.Test_TableMatchAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableSetAdaptator.Test_TableSetAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableSeqAdaptator.Test_TableSeqAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableMatchAdaptator.Test_TableMatchAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TablePathAdaptator.Test_TablePathAdaptator, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_F_RepetJob.Test_F_RepetJob, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_RepetJob.Test_RepetJob, "test" ) )
+        TestSuite_sql.addTest( unittest.makeSuite( Test_TableBinSetAdaptator.Test_TableBinSetAdaptator, "test" ) )
+
+        runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
+        runner.run( TestSuite_sql )
+
+
+if __name__ == "__main__":
+    main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_DbFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_DbFactory.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,63 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import os
+import unittest
+from commons.core.sql.DbFactory import DbFactory
+
+class Test_DbFactory( unittest.TestCase ):
+
+    def test_createInstance (self):
+        dbInstance = DbFactory.createInstance()
+        expValue = None
+        obsValue = dbInstance
+        self.assertNotEquals(expValue, obsValue)
+
+    def test_createInstance_with_config (self):
+        configFileName = "dummyConfigFileName.cfg"
+        configF = open(configFileName,"w")
+        configF.write("[repet_env]\n")
+        configF.write( "repet_host: %s\n" % ( os.environ["REPET_HOST"] ) )
+        configF.write( "repet_user: %s\n" % ( os.environ["REPET_USER"] ) )
+        configF.write( "repet_pw: %s\n" % ( os.environ["REPET_PW"] ) )
+        configF.write( "repet_db: %s\n" % ( os.environ["REPET_DB"] ) )
+        configF.write( "repet_port: %s\n" % ( os.environ["REPET_PORT"] ) )
+        configF.close()
+
+        dbInstance = DbFactory.createInstance(configFileName)
+        expValue = None
+        obsValue = dbInstance
+        self.assertNotEquals(expValue, obsValue)
+        os.remove(configFileName)
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_DbFactory ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_DbMySql.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_DbMySql.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1554 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import unittest\n+import time\n+import os\n+from MySQLdb import ProgrammingError\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.DbMySql import TABLE_SCHEMA_DESCRIPTOR\n+from commons.core.sql.DbMySql import TABLE_TYPE_SYNONYMS\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.coord.Path import Path\n+\n+class Test_DbMySql( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._iDb = DbMySql( )\n+ self._uniqId = "%s" % time.strftime("%Y%m%d%H%M%S")\n+\n+ def tearDown( self ):\n+ if self._iDb.db.open:\n+ self._iDb.close()\n+ self._iDb = None\n+ \n+ def test_execute_syntax_error(self):\n+ expErrorMsg = "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'CHAUD TABLES\' at line 1"\n+ obsErrorMsg = ""\n+ sqlCmd = "CHAUD TABLES"\n+ try:\n+ self._iDb.execute(sqlCmd)\n+ except ProgrammingError as excep:\n+ obsErrorMsg = excep.args[1]\n+ \n+ self.assertEquals(expErrorMsg, obsErrorMsg)\n+\n+ def test_execute_with_1_retry(self):\n+ tableName = "dummyTable%s" % self._uniqId\n+ sqlCmd = "CREATE TABLE %s (dummyColumn varchar(255))" % tableName\n+ self._iDb.close()\n+ self._iDb.execute(sqlCmd)\n+ self.assertTrue(self._iDb.doesTableExist(tableName))\n+ self._iDb.dropTable(tableName)\n+\n+ def test_setAttributesFromConfigFile(self):\n+ expHost = "dummyHost"\n+ expUser = "dummyUser"\n+ expPw = "dummyPw"\n+ expDb = "dummyDb"\n+ expPort = 1000\n+ \n+ configFileName = "dummyConfigFileName.cfg"\n+ f = open( configFileName, "w" )\n+ f.write("[repet_env]\\n")\n+ f.write("repet_host: " + expHost + "\\n")\n+ f.write("repet_user: " + expUser + "\\n")\n+ f.write("repet_pw: " + expPw + "\\n")\n+ f.write("repet_db: " + expDb + "\\n")\n+ f.write("repet_port: " + str(expPort) + "\\n")\n+ f.close()\n+ \n+ self._iDb.setAttributesFromConfigFile(configFileName)\n+ \n+ obsHost = self._iDb.host\n+ obsUser = self._iDb.user\n+ obsPw = self._iDb.passwd\n+ obsDb = self._iDb.dbname\n+ obsPort = self._iDb.port\n+ \n+ os.remove(configFileName)\n+ \n+ self.asse'..b'l_r4.3: 3.73%; TermRepeats: non-termLTR: 1701; SSRCoverage=0.14<0.75)\\n")\n+ \n+ self._iDb.createTable(tableName, "classif", fileName)\n+ self.assertTrue(self._iDb.doesTableExist(tableName))\n+ \n+ expColumnNb = 8\n+ sqlCmd = "DESC %s;" % tableName\n+ self._iDb.execute(sqlCmd)\n+ res = self._iDb.fetchall()\n+ obsColumnNb = len(res)\n+ self.assertEquals(expColumnNb, obsColumnNb)\n+ \n+ expSize = 3\n+ obsSize = self._iDb.getSize(tableName)\n+ self.assertEquals(expSize, obsSize)\n+ \n+ expLIndex = ["iseq_name", "istatus", "iclass", "iorder", "icomp"]\n+ sqlCmd = "SHOW INDEX FROM %s" % tableName\n+ self._iDb.execute(sqlCmd)\n+ res = self._iDb.cursor.fetchall()\n+ obsLIndex = []\n+ for tuple in res:\n+ obsLIndex.append(tuple[2])\n+ self.assertEquals(expLIndex, obsLIndex)\n+ \n+ self._iDb.dropTable(tableName)\n+ os.remove(fileName)\n+ \n+ def test_createClassifIndex(self):\n+ tableName = "dummyclassifTable%s" % self._uniqId\n+ sqlCmd = "CREATE TABLE %s (seq_name varchar(255), length int unsigned, strand char, status varchar(255), class_classif varchar(255), order_classif varchar(255), completeness varchar(255), evidences text);" % tableName\n+ self._iDb.execute(sqlCmd)\n+ expLIndex = ["iseq_name", "istatus", "iclass", "iorder", "icomp"]\n+ \n+ self._iDb.createIndex(tableName, "classif")\n+ \n+ sqlCmd = "SHOW INDEX FROM %s" % tableName\n+ self._iDb.execute(sqlCmd)\n+ res = self._iDb.cursor.fetchall()\n+ \n+ obsLIndex = []\n+ for tuple in res:\n+ obsLIndex.append(tuple[2])\n+ self.assertEquals(expLIndex, obsLIndex)\n+ self._iDb.dropTable(tableName)\n+\n+ def test_createBinPathTable(self):\n+ pathFileName = "dummy.path"\n+ with open(pathFileName, "w") as pathF:\n+ pathF.write("1\\tqry\\t1\\t100\\tsbj\\t1\\t100\\t1e-123\\t136\\t98.4\\n")\n+ pathF.write("2\\tqry\\t500\\t401\\tsbj\\t1\\t100\\t1e-152\\t161\\t98.7\\n")\n+ \n+ expPathTuple1 = (1, 1000000, "qry", 1, 100, 1)\n+ expPathTuple2 = (2, 1000000, "qry", 401, 500, 1) # change coordinates\n+ expTPathTuples = (expPathTuple1, expPathTuple2)\n+ \n+ pathTableName = "dummy_path"\n+ idxTableName = "dummy_path_idx"\n+ self._iDb.createTable(pathTableName, "path", pathFileName)\n+ self._iDb.createBinPathTable(pathTableName, True)\n+ \n+ sqlCmd = "SELECT * FROM %s" % idxTableName\n+ self._iDb.execute(sqlCmd)\n+ obsTPathTuples = self._iDb.fetchall()\n+ \n+ self._iDb.dropTable(pathTableName)\n+ self._iDb.dropTable(idxTableName)\n+ os.remove(pathFileName)\n+ \n+ self.assertEquals(expTPathTuples, obsTPathTuples)\n+\n+ def test_createBinSetTable(self):\n+ setFileName = "dummy.set"\n+ with open(setFileName, "w") as setF:\n+ setF.write("1\\tseq1\\tchr1\\t1900\\t3900\\n")\n+ setF.write("2\\tseq2\\tchr1\\t2\\t9\\n")\n+ setF.write("3\\tseq3\\tchr1\\t8\\t13\\n")\n+ \n+ expTuple = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L))\n+ \n+ setTableName = "dummy_set"\n+ idxTableName = "dummy_set_idx"\n+ self._iDb.createTable(setTableName, "set", setFileName)\n+ self._iDb.createBinSetTable(setTableName, True)\n+ \n+ sqlCmd = "SELECT * FROM %s" % idxTableName\n+ self._iDb.execute(sqlCmd)\n+ obsTuple = self._iDb.fetchall()\n+ \n+ self._iDb.dropTable(setTableName)\n+ self._iDb.dropTable(idxTableName)\n+ os.remove(setFileName)\n+ \n+ self.assertEquals(expTuple, obsTuple)\n+\n+ def _getInstanceToAdapt(self):\n+ iPath = Path()\n+ return iPath\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_DbSQLite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_DbSQLite.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,162 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import time
+from commons.core.sql.DbSQLite import DbSQLite
+
+class Test_DbSQLite(unittest.TestCase):
+
+    def setUp( self ):
+        self._iDb = DbSQLite("test.db")
+        self._uniqId = "%s" % time.strftime("%Y%m%d%H%M%S")
+
+    def tearDown( self ):
+        if self._iDb.open():
+            self._iDb.close()
+        self._iDb.delete()
+        self._iDb = None
+
+    def test_open_True(self):
+        self._iDb.close()
+        self.assertTrue( self._iDb.open(1) )
+
+    def test_open_False(self):
+        self._iDb.close()
+        self._iDb.host = "/toto/toto.db"
+        self.assertFalse( self._iDb.open(1) )
+        self._iDb.host = "test.db"
+
+    def test_updateInfoTable(self):
+        tableName = "dummyTable" + self._uniqId
+        info = "Table_for_test"
+
+        self._iDb.updateInfoTable(tableName, info)
+
+        sqlCmd = 'SELECT file FROM info_tables WHERE name = "%s"' % ( tableName )
+        self._iDb.execute( sqlCmd )
+        results = self._iDb.fetchall()
+        obsResult = False
+        if (info,) in results:
+            obsResult = True
+            sqlCmd = 'DELETE FROM info_tables WHERE name = "%s"' % ( tableName )
+            self._iDb.execute( sqlCmd )
+
+        self.assertTrue( obsResult )
+
+    def test_doesTableExist_True(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        self.assertTrue( self._iDb.doesTableExist(tableName) )
+
+    def test_dropTable(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % tableName
+        self._iDb.execute( sqlCmd )
+        sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+        self._iDb.execute( sqlCmd )
+        sqlCmd = 'INSERT INTO info_tables VALUES ("%s","")' % tableName
+        self._iDb.execute( sqlCmd )
+
+        self._iDb.dropTable(tableName)
+        self.assertFalse( self._iDb.doesTableExist(tableName) )
+
+    def test_doesTableExist_False(self):
+        tableName = "dummyTable" + self._uniqId
+        self.assertFalse( self._iDb.doesTableExist(tableName) )
+
+    def test_createJobTable_is_table_created(self):
+        self._iDb.createTable("dummyJobTable", "jobs")
+        isTableCreated = self._iDb.doesTableExist("dummyJobTable")
+        self.assertTrue(isTableCreated)
+
+    def test_createJobTable_field_list(self):
+        self._iDb.createTable("dummyJobTable", "jobs")
+        obsLFiled = self._iDb.getFieldList("dummyJobTable")
+        expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]
+        self.assertEquals(expLField, obsLFiled)
+
+    def test_createTable(self):
+        tableName = "dummyJobTable" + self._uniqId
+        self._iDb.createTable(tableName, "job")
+        obsLFiled = self._iDb.getFieldList(tableName)
+        expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]
+        self.assertEquals(expLField, obsLFiled)
+
+    def test_createTable_with_overwrite_Job(self):
+        tableName = "dummyJobTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % tableName
+        self._iDb.execute( sqlCmd )
+        sqlCmd = "CREATE TABLE info_tables ( name varchar(255), file varchar(255) )"
+        self._iDb.execute( sqlCmd )
+        sqlCmd = 'INSERT INTO info_tables VALUES ("%s","")' % tableName
+        self._iDb.execute( sqlCmd )
+
+        self._iDb.createTable(tableName, "job", True)
+        obsLFiled = self._iDb.getFieldList(tableName)
+        expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]
+        self.assertEquals(expLField, obsLFiled)
+
+    def test_getSize_empty_table(self):
+        tableName = "dummyJobTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        expSize = 0
+        obsSize = self._iDb.getSize(tableName)
+        self.assertEquals( expSize, obsSize )
+
+    def test_getSize_one_rows(self):
+        tableName = "dummyJobTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        sqlCmd = "INSERT INTO %s (dummyColumn) VALUES ('toto')" % tableName
+        self._iDb.execute( sqlCmd )
+        expSize = 1
+        obsSize = self._iDb.getSize(tableName)
+        self.assertEquals( expSize, obsSize )
+
+    def test_isEmpty_True(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % ( tableName )
+        self._iDb.execute( sqlCmd )
+        self.assertTrue(self._iDb.isEmpty(tableName))
+
+    def test_isEmpty_False(self):
+        tableName = "dummyTable" + self._uniqId
+        sqlCmd = "CREATE TABLE %s ( dummyColumn varchar(255) )" % (tableName)
+        self._iDb.execute(sqlCmd)
+        sqlCmd = "INSERT INTO %s (dummyColumn) VALUES ('toto')" % tableName
+        self._iDb.execute(sqlCmd)
+        self.assertFalse(self._iDb.isEmpty(tableName))
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_F_JobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_F_JobAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,91 @@
+from commons.core.launcher.WriteScript import WriteScript
+from commons.core.sql.Job import Job
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+import sys
+import stat
+import os
+import time
+import unittest
+import glob
+
+class Test_F_TableJobAdaptator(unittest.TestCase):
+
+    def setUp(self):
+        self._jobTableName = "dummyJobTable"
+        self._iJA = TableJobAdaptatorFactory.createJobInstance()
+
+    def tearDown(self):
+        pass
+
+    def test_submitJob(self):
+        job1 = self._createJobInstance("job1")
+        self._createLauncherFile(job1, self._iJA)
+        job2 = self._createJobInstance("job2")
+        self._createLauncherFile(job2, self._iJA)
+        job3 = self._createJobInstance("job3")
+        self._createLauncherFile(job3, self._iJA)
+
+        self._iJA.submitJob( job1, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iJA.submitJob( job2, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iJA.submitJob( job3, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+
+        time.sleep(120)
+
+        expErrorFilePrefix1 = job1.jobname + ".e"
+        expOutputFilePrefix1 = job1.jobname + ".o"
+        expErrorFilePrefix2 = job2.jobname + ".e"
+        expOutputFilePrefix2 = job2.jobname + ".o"
+        expErrorFilePrefix3 = job3.jobname + ".e"
+        expOutputFilePrefix3 = job3.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+        lErrorFiles2 = glob.glob(expErrorFilePrefix2 + "*")
+        lOutputFiles2 = glob.glob(expOutputFilePrefix2 + "*")
+        lErrorFiles3 = glob.glob(expErrorFilePrefix3 + "*")
+        lOutputFiles3 = glob.glob(expOutputFilePrefix3 + "*")
+
+        isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0)
+        isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)
+        isLErrorFileNotEmpty2 = (len(lErrorFiles2) != 0)
+        isLOutputFileNotEmpty2 = (len(lOutputFiles2) != 0)
+        isLErrorFileNotEmpty3 = (len(lErrorFiles3) != 0)
+        isLOutputFileNotEmpty3 = (len(lOutputFiles3) != 0)
+
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)
+        self.assertTrue(isLErrorFileNotEmpty2 and isLOutputFileNotEmpty2)
+        self.assertTrue(isLErrorFileNotEmpty3 and isLOutputFileNotEmpty3)
+
+    def test_submit_and_waitJobGroup(self):
+        iJob = self._createJobInstance("test")
+        self._createLauncherFile(iJob, self._iJA)
+
+        self._iJA.submitJob( iJob, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iJA.waitJobGroup(iJob.groupid, 0, 2)
+
+        expErrorFilePrefix1 = iJob.jobname + ".e"
+        expOutputFilePrefix1 = iJob.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+
+        isLErrorFileExist = (len(lErrorFiles1) != 0)
+        isLOutputFileExist = (len(lOutputFiles1) != 0)
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileExist and isLOutputFileExist)
+
+    def _createJobInstance(self, name):
+        lResources = []
+        if os.environ.get("HOSTNAME") == "compute-2-46.local":
+            lResources.append("test=TRUE")
+        return Job(0, name, "test", "", "log = os.system(\"date;sleep 5;date\")", "%s/launcherFileTest_%s.py" % (os.getcwd(), name), lResources=lResources)
+
+    def _createLauncherFile(self, iJob, iJA):
+        iWriteScript = WriteScript(iJob, iJA, os.getcwd(), os.getcwd(), False, True)
+        iWriteScript.run(iJob.command, "", iJob.launcher)
+        os.chmod(iJob.launcher, stat.S_IRWXU+stat.S_IRWXG+stat.S_IRWXO)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_F_TableJobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_F_TableJobAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,185 @@
+from commons.core.launcher.WriteScript import WriteScript
+from commons.core.sql.Job import Job
+from commons.core.sql.DbFactory import DbFactory
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+import sys
+import stat
+import os
+import time
+import unittest
+import glob
+
+class Test_F_TableJobAdaptator(unittest.TestCase):
+
+    def setUp(self):
+        self._jobTableName = "dummyJobTable"
+        self._db = DbFactory.createInstance()
+        self._iTJA = TableJobAdaptatorFactory.createInstance(self._db, self._jobTableName)
+
+    def tearDown(self):
+        self._db.dropTable(self._jobTableName)
+        self._db.close()
+
+    def test_submitJob_with_multiple_jobs(self):
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        job1 = _createJobInstance("job1")
+        _createLauncherFile(job1, self._iTJA)
+        job2 = _createJobInstance("job2")
+        _createLauncherFile(job2, self._iTJA)
+        job3 = _createJobInstance("job3")
+        _createLauncherFile(job3, self._iTJA)
+
+        self._iTJA.submitJob( job1, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iTJA.submitJob( job2, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+        self._iTJA.submitJob( job3, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )
+
+        time.sleep(120)
+
+        expJobStatus = "finished"
+        obsJobStatus1 = self._iTJA.getJobStatus(job1)
+        obsJobStatus2 = self._iTJA.getJobStatus(job2)
+        obsJobStatus3 = self._iTJA.getJobStatus(job3)
+
+        self.assertEquals(expJobStatus, obsJobStatus1)
+        self.assertEquals(expJobStatus, obsJobStatus2)
+        self.assertEquals(expJobStatus, obsJobStatus3)
+
+        expErrorFilePrefix1 = job1.jobname + ".e"
+        expOutputFilePrefix1 = job1.jobname + ".o"
+        expErrorFilePrefix2 = job2.jobname + ".e"
+        expOutputFilePrefix2 = job2.jobname + ".o"
+        expErrorFilePrefix3 = job3.jobname + ".e"
+        expOutputFilePrefix3 = job3.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+        lErrorFiles2 = glob.glob(expErrorFilePrefix2 + "*")
+        lOutputFiles2 = glob.glob(expOutputFilePrefix2 + "*")
+        lErrorFiles3 = glob.glob(expErrorFilePrefix3 + "*")
+        lOutputFiles3 = glob.glob(expOutputFilePrefix3 + "*")
+
+        isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0)
+        isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)
+        isLErrorFileNotEmpty2 = (len(lErrorFiles2) != 0)
+        isLOutputFileNotEmpty2 = (len(lOutputFiles2) != 0)
+        isLErrorFileNotEmpty3 = (len(lErrorFiles3) != 0)
+        isLOutputFileNotEmpty3 = (len(lOutputFiles3) != 0)
+
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)
+        self.assertTrue(isLErrorFileNotEmpty2 and isLOutputFileNotEmpty2)
+        self.assertTrue(isLErrorFileNotEmpty3 and isLOutputFileNotEmpty3)
+
+    def test_submitJob_job_already_submitted(self):
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        iJob = _createJobInstance("job")
+        self._iTJA.recordJob(iJob)
+
+        isSysExitRaised = False
+        try:
+            self._iTJA.submitJob(iJob)
+        except SystemExit:
+            isSysExitRaised = True
+        self.assertTrue(isSysExitRaised)
+
+    def test_waitJobGroup_with_error_job_maxRelaunch_two(self):
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        iJob = _createJobInstance("job")
+        _createLauncherFile(iJob, self._iTJA)
+
+        self._iTJA.recordJob(iJob)
+        self._iTJA.changeJobStatus(iJob, "error")
+
+        self._iTJA.waitJobGroup(iJob.groupid, 0, 2)
+
+        time.sleep(120)
+
+        expJobStatus = "finished"
+        obsJobStatus1 = self._iTJA.getJobStatus(iJob)
+
+        self.assertEquals(expJobStatus, obsJobStatus1)
+
+        expErrorFilePrefix1 = iJob.jobname + ".e"
+        expOutputFilePrefix1 = iJob.jobname + ".o"
+
+        lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")
+        lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")
+
+        isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0)
+        isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)
+
+        self._iTJA.removeJob(iJob)
+        os.system("rm launcherFileTest*.py *.e* *.o*")
+        self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)
+
+class Test_F_TableJobAdaptator_SGE(unittest.TestCase):
+
+    def setUp(self):
+        if os.environ["REPET_JOB_MANAGER"].lower() != "sge":
+            print "ERROR: jobs manager is not SGE: REPET_JOB_MANAGER = %s." % os.environ["REPET_JOB_MANAGER"]
+            sys.exit(0)
+        self._jobTableName = "dummyJobTable"
+        self._db = DbFactory.createInstance()
+        self._db.createTable(self._jobTableName, "jobs", overwrite = True)
+        self._iTJA = TableJobAdaptatorFactory.createInstance(self._db, self._jobTableName)
+        self._iJob = _createJobInstance("job")
+        _createLauncherFile(self._iJob, self._iTJA)
+
+    def tearDown(self):
+        self._db.dropTable(self._jobTableName)
+        self._db.close()
+
+    def test_waitJobGroup_with_several_nbTimeOut_waiting(self):
+        self._iTJA.recordJob(self._iJob)
+        self._iTJA.changeJobStatus(self._iJob, "running")
+
+        expMsg = "ERROR: job '%s', supposedly still running, is not handled by SGE anymore\n" % self._iJob.jobid
+
+        obsError = "obsError.txt"
+        obsErrorHandler = open(obsError, "w")
+        stderrRef = sys.stderr
+        sys.stderr = obsErrorHandler
+
+        isSysExitRaised = False
+        try:
+            self._iTJA.waitJobGroup(self._iJob.groupid, timeOutPerJob = 3)
+        except SystemExit:
+            isSysExitRaised = True
+
+        obsErrorHandler.close()
+
+        obsErrorHandler = open(obsError, "r")
+        obsMsg = obsErrorHandler.readline()
+        obsErrorHandler.close()
+
+        sys.stderr = stderrRef
+        os.remove(obsError)
+        os.system("rm launcherFileTest*.py")
+        self.assertTrue(isSysExitRaised)
+        self.assertEquals(expMsg, obsMsg)
+
+    def test_isJobStillHandledBySge_True(self):
+        self._iTJA.submitJob(self._iJob)
+        isJobHandledBySge = self._iTJA.isJobStillHandledBySge(self._iJob.jobid, self._iJob.jobname)
+        os.system("rm launcherFileTest*.py")
+        self.assertTrue(isJobHandledBySge)
+
+    def test_isJobStillHandledBySge_False(self):
+        self._iTJA.recordJob(self._iJob)
+        isJobHandledBySge = self._iTJA.isJobStillHandledBySge(self._iJob.jobid, self._iJob.jobname)
+        os.system("rm launcherFileTest*.py")
+        self.assertFalse(isJobHandledBySge)
+
+def _createJobInstance(name):
+    lResources = []
+    if os.environ.get("HOSTNAME") == "compute-2-46.local":
+        lResources.append("test=TRUE")
+    return Job(0, name, "test", "", "log = os.system(\"date;sleep 5;date\")", "%s/launcherFileTest_%s.py" % (os.getcwd(), name), lResources=lResources)
+
+def _createLauncherFile(iJob, iTJA):
+    iWriteScript = WriteScript(iJob, iTJA, os.getcwd(), os.getcwd())
+    iWriteScript.run(iJob.command, "", iJob.launcher)
+    os.chmod(iJob.launcher, stat.S_IRWXU+stat.S_IRWXG+stat.S_IRWXO)
+
+if __name__ == "__main__":
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_Job.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_Job.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,30 @@
+import unittest
+from commons.core.sql.Job import Job
+
+class Test_Job(unittest.TestCase):
+
+    def test__eq__(self):
+        self._job = Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertEqual( self._job, o ) # same data
+        o =  Job(jobid=1, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different jobid
+        o =  Job(jobid=0, jobname="test1", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different jobname
+        o =  Job(jobid=0, jobname="test", groupid="test1", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different groupid
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test1",command="test", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different queue
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test1", launcherFile="test", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different command
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test1", node="test", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different launcherFile
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test1", lResources="mem_free=1G" )
+        self.assertNotEqual( self._job, o ) # different node
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=2G" )
+        self.assertNotEqual( self._job, o ) # different lResources
+        o =  Job(jobid=0, jobname="test", groupid="test", queue="test",command="test", launcherFile="test", node="test", lResources="mem_free=1G", parallelEnvironment="multithread 6" )
+        self.assertNotEqual( self._job, o ) # different parallelEnvironment
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableBinPathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableBinPathAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1244 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+import unittest\n+import os\n+import time\n+from commons.core.sql.TableBinPathAdaptator import TableBinPathAdaptator\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Set import Set\n+from commons.core.sql.DbFactory import DbFactory\n+\n+class Test_TableBinPathAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % (time.strftime("%Y%m%d%H%M%S") , os.getpid())\n+ self._db = DbFactory.createInstance()\n+ self._table = "dummyPathTable_%s" % self._uniqId\n+ self._table_idx = "dummyPathTable_%s_idx" % self._uniqId\n+ \n+ def tearDown( self ):\n+ self._db.dropTable(self._table)\n+ self._db.dropTable(self._table_idx)\n+ self._db.close()\n+ \n+ #TODO: strand ?!? How does it work ?\n+ def test_insert_QryRevSbjDir( self ):\n+ tuple = ("1", "chr1", "10", "25", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p1 = Path()\n+ p1.setFromTuple(tuple)\n+\n+ tuple = ("1", "chr1", "250", "100", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p2 = Path()\n+ p2.setFromTuple(tuple)\n+ \n+ tuple = ("2", "chr1", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+ \n+ tuple = ("4", "chr5", "140", "251", "TE5", "140", "251", "2e-14", "14", "73.1")\n+ p4 = Path()\n+ p4.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ self._tpA.insert(p4)\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._db.execute( sqlCmd )\n+ obsPathTuple = self._db.cursor.fetchall()\n+ expPathTuple = ((1, "chr1", 10, 25, "TE1", 11, 17, 1e-18, 20, 87.4),\n+ (1, "chr1", 100, 250, "TE1", 17, 11, 1e-18, 20, 87.4),\n+ (2, "chr1", 15, 30, "TE2", 10, 13, 5e-24, 34, 93.1),\n+ (4, "chr5", 140, 251, "TE5", 140, 251, 2e-14, 14, 73.1),)\n+ self.assertEquals(expPathTuple, obsPathTuple)\n+\n+ sqlCmd = "SELECT * FROM %s_idx" % ( self._table )\n+ self._db.execute( sqlCmd )\n+ obsPathTuple = self._db.cursor'..b'uple(tuple)\n+ \n+ tuple = ("3", "chr1", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ \n+ expLSet = []\n+ obsLSet = self._tpA.getSetListOverlappingQueryCoord(\'chr1\', 5000, 6000)\n+ \n+ self.assertEquals(expLSet, obsLSet)\n+ \n+ def test_getSetListOverlappingQueryCoord_one_included_and_two_chain(self):\n+ tuple = ("1", "chr1", "10", "25", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p1 = Path()\n+ p1.setFromTuple(tuple)\n+\n+ tuple = ("2", "chr1", "100", "250", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p2 = Path()\n+ p2.setFromTuple(tuple)\n+\n+ tuple = ("2", "chr1", "1000", "2500", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+\n+ tuple = ("3", "chr1", "50", "150", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p4 = Path()\n+ p4.setFromTuple(tuple)\n+ \n+ tuple = ("4", "chr1", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p5 = Path()\n+ p5.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ self._tpA.insert(p4)\n+ self._tpA.insert(p5)\n+ \n+ s2 = Set()\n+ s2.setFromTuple(("2","TE1","chr1","100","250"))\n+ s4 = Set()\n+ s4.setFromTuple(("3","TE1","chr1","50","150"))\n+ expLSet = [s2, s4]\n+ obsLSet = self._tpA.getSetListOverlappingQueryCoord(\'chr1\', 95, 300)\n+ \n+ self.assertEquals(expLSet, obsLSet)\n+ \n+ def test_getIdList( self ):\n+ p1 = Path()\n+ p1.setFromString( "1\\tchr1\\t1\\t10\\tTE1\\t11\\t17\\t1e-20\\t30\\t90.2\\n" )\n+ p2 = Path()\n+ p2.setFromString( "2\\tchr1\\t2\\t9\\tTE2\\t10\\t13\\t1e-20\\t30\\t90.2\\n" )\n+ p3 = Path()\n+ p3.setFromString( "2\\tchr1\\t12\\t19\\tTE2\\t15\\t22\\t1e-10\\t40\\t94.2\\n" )\n+ p4 = Path()\n+ p4.setFromString( "3\\tchr2\\t8\\t13\\tTE1\\t11\\t17\\t1e-20\\t30\\t90.2\\n" )\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ \n+ lPath = [ p1, p2, p3, p4]\n+ self._tpA.insertList(lPath)\n+ \n+ expList = [ 1, 2, 3 ]\n+ obsList = self._tpA.getIdList()\n+ \n+ self.assertEqual( expList, obsList )\n+ \n+ def test_getQueryList(self):\n+ tuple = ("1", "chr1", "10", "25", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p1 = Path()\n+ p1.setFromTuple(tuple)\n+\n+ tuple = ("2", "chr1", "100", "250", "TE1", "11", "17", "1e-18", "20", "87.4")\n+ p2 = Path()\n+ p2.setFromTuple(tuple)\n+ \n+ tuple = ("3", "chr2", "15", "30", "TE2", "10", "13", "5e-24", "34", "93.1")\n+ p3 = Path()\n+ p3.setFromTuple(tuple)\n+ \n+ self._db.createTable( self._table, "path" )\n+ self._db.createBinPathTable(self._table, True)\n+ self._tpA = TableBinPathAdaptator( self._db, self._table )\n+ self._tpA.insert(p1)\n+ self._tpA.insert(p2)\n+ self._tpA.insert(p3)\n+ \n+ expList = [ "chr1", "chr2" ]\n+ obsList = self._tpA.getQueryList()\n+ self.assertEqual( expList, obsList )\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableBinPathAdaptator ) )\n+if __name__ == \'__main__\':\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableBinSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableBinSetAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,290 @@\n+import unittest\n+import os\n+import time\n+from commons.core.sql.TableBinSetAdaptator import TableBinSetAdaptator\n+from commons.core.coord.Set import Set\n+from commons.core.sql.DbFactory import DbFactory\n+\n+class Test_TableBinSetAdaptator(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._uniqId = "%s_%s" % (time.strftime("%Y%m%d%H%M%S") , os.getpid())\n+ self._iDb = DbFactory.createInstance()\n+ radicalTableName = "dummySetTable"\n+ self._tableName = "%s_%s" % (radicalTableName, self._uniqId)\n+ self._tableName_bin = "%s_idx" % self._tableName\n+ self._setFileName = "dummySetFile_%s" % self._uniqId\n+ setF = open( self._setFileName, "w" )\n+ setF.write("1\\tseq1\\tchr1\\t1900\\t3900\\n")\n+ setF.write("2\\tseq2\\tchr1\\t2\\t9\\n")\n+ setF.write("3\\tseq3\\tchr1\\t8\\t13\\n")\n+ setF.close()\n+ self._iDb.createTable(self._tableName, "set", self._setFileName)\n+ self._iTableBinSetAdaptator = TableBinSetAdaptator(self._iDb, self._tableName)\n+ \n+ def tearDown(self):\n+ self._iDb.dropTable( self._tableName )\n+ self._iDb.dropTable( self._tableName_bin )\n+ self._iDb.close()\n+ if os.path.exists(self._setFileName):\n+ os.remove(self._setFileName)\n+ \n+ def test_insASetInSetAndBinTable(self):\n+ iSet = Set(1, "set1", "seq1", 2, 1)\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insASetInSetAndBinTable(iSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (1L, 1000.0, \'seq1\', 1L, 2L, 0L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (1L, \'set1\', \'seq1\', 2L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+ \n+ def test_insASetInSetAndBinTable_delayedCase(self):\n+ iSet = Set(1, "set1", "seq1", 2, 1)\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insASetInSetAndBinTable(iSet, True)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (1L, 1000.0, \'seq1\', 1L, 2L, 0L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (1L, \'set1\', \'seq1\', 2L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+ \n+ def test_deleteFromIdFromSetAndBinTable(self):\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.deleteFromIdFromSetAndBinTable(2)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (3L, \'seq3\', \'chr1\', 8L, 13L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ '..b' obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (5L, \'seq5\', \'chr1\', 1L, 13L), (4L, \'seq4\', \'chr1\', 100L, 390L) )\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+\n+ def test_insertListInSetAndBinTableAndRemoveOverlaps(self):\n+ iSet1 = Set(1, "seq4", "chr1", 100, 390)\n+ iSet2 = Set(2, "seq5", "chr1", 1, 13)\n+ lSet = [iSet1, iSet2]\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insertListInSetAndBinTableAndRemoveOverlaps(lSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (4L, 1000.0, \'chr1\', 100L, 390L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (4L, \'seq4\', \'chr1\', 100L, 390L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+\n+ def test_insertListInSetAndBinTableAndRemoveOverlaps_Without_Overlaps(self):\n+ iSet1 = Set(1, "seq4", "chr1", 100, 390)\n+ iSet2 = Set(2, "seq5", "chr1", 50, 65)\n+ lSet = [iSet1, iSet2]\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insertListInSetAndBinTableAndRemoveOverlaps(lSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L), (4L, 1000.0, \'chr1\', 100L, 390L, 1L), (5L, 1000.0, \'chr1\', 50L, 65L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L), (4L, \'seq4\', \'chr1\', 100L, 390L), (5L, \'seq5\', \'chr1\', 50L, 65L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+\n+ def test_insertListInSetAndBinTableAndRemoveOverlaps_With_Only_Overlaps(self):\n+ iSet1 = Set(1, "seq4", "chr1", 1, 5)\n+ iSet2 = Set(2, "seq5", "chr1", 8, 13)\n+ lSet = [iSet1, iSet2]\n+ self._iDb.createBinSetTable(self._tableName, True)\n+ self._iTableBinSetAdaptator.insertListInSetAndBinTableAndRemoveOverlaps(lSet)\n+ expTupleInBinTable = ((1L, 10000.0, \'chr1\', 1900L, 3900L, 1L), (2L, 1000.0, \'chr1\', 2L, 9L, 1L), (3L, 1000.0, \'chr1\', 8L, 13L, 1L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName_bin )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInBinTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInBinTable, obsTupleInBinTable)\n+ expTupleInSetTable = ((1L, \'seq1\', \'chr1\', 1900L, 3900L), (2L, \'seq2\', \'chr1\', 2L, 9L), (3L, \'seq3\', \'chr1\', 8L, 13L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._tableName )\n+ self._iDb.execute( sqlCmd )\n+ obsTupleInSetTable = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTupleInSetTable, obsTupleInSetTable)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableJobAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableJobAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,640 @@\n+import unittest\n+import sys\n+import os\n+import time\n+#import stat\n+#import threading\n+from commons.core.sql.DbMySql import DbMySql\n+#from commons.core.sql.DbSQLite import DbSQLite\n+from commons.core.sql.Job import Job\n+from commons.core.utils.FileUtils import FileUtils\n+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n+\n+#class Test_TableJobAdaptator_SQLite( unittest.TestCase ):\n+# \n+# def setUp(self):\n+# self._jobTableName = "dummyJobTable"\n+# self._dbName = "test.db"\n+# self._db = DbSQLite(self._dbName)\n+# self._iTJA = TableJobAdaptator(self._db, self._jobTableName)\n+# if not self._db.doesTableExist(self._jobTableName):\n+# self._db.createJobTable(self._jobTableName)\n+# self._iJob = self._createJobInstance()\n+# \n+# def tearDown(self):\n+# self._iTJA = None\n+# self._db.close()\n+## self._db.delete()\n+# \n+## def test_recordJob(self):\n+## self._iTJA.recordJob(self._iJob)\n+## qryParams = "SELECT jobid, groupid, command, launcher, queue, status, node FROM " + self._jobTableName + " WHERE jobid = ?" \n+## params = (self._iJob.jobid,)\n+## self._db.execute(qryParams, params)\n+## tObs = self._db.fetchall()[0]\n+## tExp =(self._iJob.jobid, self._iJob.groupid, self._iJob.command, self._iJob.launcher, self._iJob.queue, "waiting", "?")\n+## self.assertEquals(tExp,tObs)\n+## \n+## def test_removeJob(self):\n+## self._iTJA.recordJob(self._iJob)\n+## self._iTJA.removeJob(self._iJob)\n+## self.assertTrue(self._db.isEmpty(self._jobTableName))\n+## \n+## def test_getJobStatus(self):\n+## self._iTJA.recordJob(self._iJob)\n+## expStatus = "waiting"\n+## obsStatus = self._iTJA.getJobStatus(self._iJob)\n+## self.assertEquals(expStatus, obsStatus)\n+## \n+## def test_getJobStatus_no_job(self):\n+## expStatus = "unknown"\n+## obsStatus = self._iTJA.getJobStatus(self._iJob)\n+## self.assertEquals(expStatus, obsStatus)\n+##\n+## def test_getJobStatus_no_name(self):\n+## iJob = Job( self._jobTableName, 20, "", "groupid", "queue", "command", "launcherFile", "node", "lResources" ) \n+## expStatus = "unknown"\n+## obsStatus = self._iTJA.getJobStatus(iJob)\n+## self.assertEquals(expStatus, obsStatus)\n+## \n+## def test_getJobStatus_two_jobs(self):\n+## # Warning : this case will not append, because recordJob() begin by removeJob()\n+## sqlCmd = "INSERT INTO %s" % self._iJob.tablename\n+## sqlCmd += " VALUES ("\n+## sqlCmd += " \\"%s\\"," % self._iJob.jobid\n+## sqlCmd += " \\"%s\\"," % self._iJob.jobname\n+## sqlCmd += " \\"%s\\"," % self._iJob.groupid\n+## sqlCmd += " \\"%s\\"," % self._iJob.command.replace("\\"","\\\'")\n+## sqlCmd += " \\"%s\\"," % self._iJob.launcher\n+## sqlCmd += " \\"%s\\"," % self._iJob.queue\n+## sqlCmd += " \\"waiting\\","\n+## sqlCmd += " \\"%s\\"," % time.strftime( "%Y-%m-%d %H:%M:%S" )\n+## sqlCmd += " \\"?\\" );"\n+## self._db.execute(sqlCmd)\n+## self._db.execute(sqlCmd)\n+## \n+## expError = "expError.txt"\n+## expErrorHandler = open(expError, "w")\n+## expErrorHandler.write("ERROR while getting job status: non-unique jobs\\n")\n+## expErrorHandler.close()\n+## obsError = "obsError.txt"\n+## obsErrorHandler = open(obsError, "w")\n+## stderrRef = sys.stderr\n+## sys.stderr = obsErrorHandler\n+## \n+## isSysExitRaised = False\n+## try:\n+## self._iTJA.getJobStatus(self._iJob)\n+## except SystemExit:\n+## isSysExitRaised = True\n+## \n+## obsErrorHandler.close()\n+## \n+## self.assertTrue(isSysExitRaised)\n+## self.assertTrue(FileUtils.are2FilesIdentical(expError, obsError))\n+## sys.stderr = stderrRef\n+## os.remove(obs'..b':\n+ obs = False\n+ self._iTJA.recordJob(self._iJob)\n+ self._iTJA.changeJobStatus(self._iJob, "error")\n+ try:\n+ self._iTJA.waitJobGroup(self._iJob.groupid, 0, 0)\n+ except SystemExit:\n+ obs = True\n+ self.assertTrue(obs)\n+ \n+ #TODO: how to test ?!?\n+# def test_waitJobGroup_with_error_relaunch(self):\n+# iJob = Job(0, "job1", "groupid", "queue.q", "command", "launcherFile", "node", ["mem_free=10M", "test=TRUE"])\n+# obs = False\n+# self._iTJA.recordJob(iJob)\n+# self._iTJA.changeJobStatus(iJob, "error")\n+# try:\n+# self._iTJA.waitJobGroup(iJob.groupid)\n+# except SystemExit:\n+# obs = True\n+# self.assertTrue(obs)\n+ \n+ def test_updateJobIdInDB(self):\n+ self._iTJA.recordJob(self._iJob)\n+ self._iTJA.updateJobIdInDB(self._iJob, 1000)\n+ qryParams = "SELECT jobid FROM " + self._jobTableName + " WHERE jobname = %s AND queue = %s AND groupid = %s" \n+ params = (self._iJob.jobname, self._iJob.queue, self._iJob.groupid)\n+ self._db.execute(qryParams, params)\n+ tObs = self._db.fetchall()[0]\n+ tExp =(1000,)\n+ self.assertEquals(tExp,tObs)\n+\n+ def test_getNodesListByGroupId(self):\n+ iJob1 = Job(0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources")\n+ iJob2 = Job(1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources")\n+ iJob3 = Job(2, "job3", "groupid", "queue", "command", "launcherFile", "node2", "lResources")\n+ iJob4 = Job(3, "job4", "groupid2", "queue", "command", "launcherFile", "node3", "lResources")\n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ self._insertJob(iJob4)\n+ expNodeList = ["node1", "node2"]\n+ obsNodeList = self._iTJA.getNodesListByGroupId("groupid")\n+ self.assertEquals(expNodeList, obsNodeList)\n+\n+ def test_getNodesListByGroupId_empty_list(self):\n+ iJob1 = Job(0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources")\n+ iJob2 = Job(1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources")\n+ iJob3 = Job(2, "job3", "groupid32", "queue", "command", "launcherFile", "node3", "lResources")\n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ expNodeList = []\n+ obsNodeList = self._iTJA.getNodesListByGroupId("groupid3")\n+ self.assertEquals(expNodeList, obsNodeList)\n+ \n+# TODO test TableJobAdaptator._createJobInstance TableJobAdaptator._createLauncherFile\n+ def _insertJob(self, iJob):\n+ self._iTJA = TableJobAdaptatorFactory.createInstance(self._db, self._jobTableName) \n+ self._iTJA.removeJob(iJob)\n+ sqlCmd = "INSERT INTO %s" % self._jobTableName\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % iJob.jobid\n+ sqlCmd += " \\"%s\\"," % iJob.jobname\n+ sqlCmd += " \\"%s\\"," % iJob.groupid\n+ sqlCmd += " \\"%s\\"," % iJob.launcher\n+ sqlCmd += " \\"%s\\"," % iJob.queue\n+ sqlCmd += " \\"%s\\"," % iJob.lResources\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % time.strftime("%Y-%m-%d %H:%M:%S")\n+ sqlCmd += " \\"%s\\" );" % iJob.node\n+ self._db.execute(sqlCmd)\n+\n+ def _createJobInstance(self):\n+ return Job(0, "job1", "groupid", "", "command", "launcherFile", "node", ["mem_free=10M"])\n+\n+#class RecordJobThread(threading.Thread):\n+#\n+# def __init__(self, iTableJobAdaptator, iJob):\n+# threading.Thread.__init__(self)\n+# self._iTableJobAdaptator = iTableJobAdaptator\n+# self._iJob = iJob\n+# \n+# def run(self):\n+# self._iTableJobAdaptator.recordJob(self._iJob)\n+# #self._iTableJobAdaptator.submitJob(self._iJob)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableJobAdaptatorFactory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableJobAdaptatorFactory.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,27 @@
+import os
+import unittest
+from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
+from commons.core.sql.DbFactory import DbFactory
+
+class Test_TableJobAdaptatorFactory(unittest.TestCase):
+
+    def test_createInstance_SGE(self):
+        REPET_JOB_MANAGER_Initial_Value = os.environ["REPET_JOB_MANAGER"]
+        os.environ["REPET_JOB_MANAGER"] = "SGE"
+        instance = TableJobAdaptatorFactory.createInstance(DbFactory.createInstance(), "dummyJobTable")
+        obsClassName = instance.__class__.__name__
+        expClassName = "TableJobAdaptatorSGE"
+        os.environ["REPET_JOB_MANAGER"] = REPET_JOB_MANAGER_Initial_Value
+        self.assertEquals(expClassName, obsClassName)
+
+    def test_createInstance_Torque(self):
+        REPET_JOB_MANAGER_Initial_Value = os.environ["REPET_JOB_MANAGER"]
+        os.environ["REPET_JOB_MANAGER"] = "Torque"
+        instance = TableJobAdaptatorFactory.createInstance(DbFactory.createInstance(), "dummyJobTable")
+        obsClassName = instance.__class__.__name__
+        expClassName = "TableJobAdaptatorTorque"
+        os.environ["REPET_JOB_MANAGER"] = REPET_JOB_MANAGER_Initial_Value
+        self.assertEquals(expClassName, obsClassName)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableMapAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableMapAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,250 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import time\n+import os\n+from commons.core.sql.TableMapAdaptator import TableMapAdaptator\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Map import Map\n+from commons.core.coord.Set import Set\n+\n+\n+class Test_TableMapAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._iDb = DbMySql( cfgFileName=self._configFileName )\n+ self._table = "dummyMapTable_%s" % ( self._uniqId )\n+ self._tMapA = TableMapAdaptator( self._iDb, self._table )\n+ \n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._iDb.dropTable( self._table )\n+ self._iDb.close()\n+ self._table = None\n+ self._tMapA = None\n+ os.remove( self._configFileName )\n+ self._configFileName = ""\n+ \n+##################################################################################\n+################## Tests for methods in ITableMapAdaptator #######################\n+################################################################################## \n+\n+ def test_getEndFromSeqName(self):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+ for m in [ map1, map2]:\n+ self._tMapA.insert(m)\n+ expEnd = 20\n+ obsEnd = self._tMapA.getEndFromSeqName("desc2")\n+ self.assertEqual(expEnd, obsEnd) \n+ \n+\n+ def test_getMapListFromSeqName( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ '..b' map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+ map3 = Map()\n+ map3.setFromString( "name2\\tdesc2\\t1\\t50\\n" )\n+ for m in [ map1, map2, map3 ]: self._tMapA.insert( m )\n+ explMap = [Set( 1,"name2", "desc2", 1, 20), Set( 2,"name2", "desc2", 1, 50)]\n+ obslMap = self._tMapA.getSetListFromSeqName("name2")\n+ self.assertEqual( explMap, obslMap )\n+ \n+ def test_getMapListOverlappingCoord( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t70\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc1\\t1\\t20\\n" )\n+ map3 = Map()\n+ map3.setFromString( "name3\\tdesc1\\t1\\t50\\n" ) \n+ for m in [ map1, map2, map3 ]: self._tMapA.insert( m )\n+ explMap = [Map("name2", "desc1", 1, 20), Map("name3", "desc1", 1, 50)]\n+ obslMap = self._tMapA.getMapListOverlappingCoord("desc1", 1, 60)\n+ self.assertEqual( explMap, obslMap )\n+ \n+ def test_getSetListOverlappingCoord( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t70\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc1\\t1\\t20\\n" )\n+ map3 = Map()\n+ map3.setFromString( "name3\\tdesc1\\t1\\t50\\n" ) \n+ for m in [ map1, map2, map3 ]: self._tMapA.insert( m )\n+ explSet = [Set(1, "name2", "desc1", 1, 20), Set(2, "name3", "desc1", 1, 50)]\n+ obslSet = self._tMapA.getSetListOverlappingCoord("desc1", 1, 60)\n+ self.assertEqual( explSet, obslSet )\n+ \n+##################################################################################\n+########################### Tests for other methods ##############################\n+##################################################################################\n+ \n+ def test_getListOfAllMaps( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ map1 = Map()\n+ map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+ map2 = Map()\n+ map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+ for m in [ map1, map2 ]: self._tMapA.insert( m )\n+ lExp = [ map1, map2 ]\n+ lObs = self._tMapA.getListOfAllMaps()\n+ self.assertEqual( lObs, lExp )\n+ \n+ def test_getDictPerNameFromMapFile( self ):\n+ self._iDb.createTable( self._table, "map", "" )\n+ iMap1 = Map( "chunk1", "chromosome1", 1, 100 )\n+ iMap2 = Map( "chunk2", "chromosome1", 91, 190 )\n+ iMap3 = Map( "chunk3", "chromosome2", 1, 100 )\n+ iMap4 = Map( "chunk1", "chromosome1", 1, 100 ) # redundant with iMap1\n+ for iMap in [ iMap1, iMap2, iMap3, iMap4 ]:\n+ self._tMapA.insert( iMap )\n+ dExp = { "chunk1": iMap1, "chunk2": iMap2, "chunk3": iMap3 }\n+ dObs = self._tMapA.getDictPerName()\n+ self.assertEquals( dExp, dObs )\n+ \n+#TODO: Check getListFromSeqName method: uses name instead of seqname\n+# def test_getMapListFromSeqNameList( self ):\n+# self._iDb.createTable( self._table, "map", "" )\n+# map1 = Map()\n+# map1.setFromString( "name1\\tdesc1\\t1\\t120\\n" )\n+# map2 = Map()\n+# map2.setFromString( "name2\\tdesc2\\t1\\t20\\n" )\n+# map3 = Map()\n+# map3.setFromString( "name3\\tdesc2\\t1\\t10\\n" )\n+# map4 = Map()\n+# map4.setFromString( "name4\\tdesc3\\t10\\t200\\n" )\n+# for m in [map1, map2, map3, map4]: self._tMapA.insert( m )\n+# \n+# lMapToRetrieve = ["name1", "desc2"]\n+# lExp = [map1, map2, map3]\n+# lObs = self._tMapA.getMapListFromSeqNameList(lMapToRetrieve)\n+# self.assertEqual( lObs, lExp )\n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableMapAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableMatchAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableMatchAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,264 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import time\n+import os\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Match import Match\n+from commons.core.sql.TableMatchAdaptator import TableMatchAdaptator\n+\n+\n+class Test_TableMatchAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % (time.strftime("%Y%m%d%H%M%S") , os.getpid())\n+ self._configFileName = "dummyConfigFile_%s" % self._uniqId\n+ self._iDb = DbMySql()\n+ self._table = "dummyMatchTable_%s" % self._uniqId\n+ self._tMatchA = TableMatchAdaptator( self._iDb, self._table )\n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._iDb.dropTable( self._table )\n+ self._iDb.close()\n+ self._table = None\n+ self._tMatchA = None\n+ \n+##################################################################################\n+################## Tests for methods in ITableMatchAdaptator #####################\n+################################################################################## \n+ def test_insert(self):\n+ match = Match() \n+\n+ tuple = ("QName1", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ \n+ match.setFromTuple(tuple)\n+ \n+ self._iDb.createTable( self._table, "match", "" ) \n+ self._tMatchA.insert( match, False )\n+ \n+ expTMatchTuple = ((\'QName1\', 1L, 5L, 5L, 0.1, 0.2, \'SName1\', 5L, 25L, 20L, 0.15, 1e-20, 15L, 87.2, 1L),)\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ obsTmatchTuple = self._iDb.cursor.fetchall()\n+ \n+ self.assertEquals( expTMatchTuple, obsTmatchTuple )\n+ \n+\n+ def test_insert_empty_match(self):\n+ match = Match() \n+\n+ tuple = ("", -1, -1, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ \n+ match.setFromTuple(tuple)\n+ \n+ self._iDb.createTable( self._table, "match", "" ) \n+ self._tMatchA.insert( match, False )\n+ \n+ expTMatchTuple = ()\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ obsTmatchTuple = self._iDb.cursor.fetchall()\n+ \n+ self.assertEquals( expTMatchTuple, obsTmatchTuple'..b' = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ expListMatch = [ match1 ]\n+ self._tMatchA.insertList(lMatch)\n+ \n+ obsListMatch = self._tMatchA.getMatchListFromId(1)\n+ \n+ self.assertEquals(expListMatch, obsListMatch)\n+ \n+ \n+ def test_getMatchListFromIdList_empty_id_list( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ tuple2 = ("QName", 1, 6, 6, 0.2, 0.1, "SName", 6, 26, 10, 0.18, 1e-30, 18, 85.2, 2)\n+ tuple3 = ("QName", 1, 7, 8, 0.1, 0.2, "SName", 5, 20, 15, 0.20, 1e-25, 20, 89.0, 3)\n+ tuple4 = ("QName", 1, 8, 8, 0.1, 0.1, "SName", 5, 15, 10, 0.17, 1e-23, 14, 89.5, 4)\n+ match1 = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ self._tMatchA.insertList(lMatch)\n+ \n+ expList = []\n+ obsList = self._tMatchA.getMatchListFromIdList([])\n+ self.assertEquals(expList, obsList)\n+ \n+ \n+ def test_getMatchListFromIdList( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ tuple2 = ("QName", 1, 6, 6, 0.2, 0.1, "SName", 6, 26, 10, 0.18, 1e-30, 18, 85.2, 2)\n+ tuple3 = ("QName", 1, 7, 8, 0.1, 0.2, "SName", 5, 20, 15, 0.20, 1e-25, 20, 89.0, 3)\n+ tuple4 = ("QName", 1, 8, 8, 0.1, 0.1, "SName", 5, 15, 10, 0.17, 1e-23, 14, 89.5, 4)\n+ match1 = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ self._tMatchA.insertList(lMatch)\n+ \n+ lObs = self._tMatchA.getMatchListFromIdList((1, 2, 3))\n+ \n+ lExp = [match1, match2, match3]\n+ self.assertEquals(lExp, lObs)\n+ \n+ def test_getListOfAllMatches( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n+ tuple2 = ("QName", 1, 6, 6, 0.2, 0.1, "SName", 6, 26, 10, 0.18, 1e-30, 18, 85.2, 2)\n+ tuple3 = ("QName", 1, 7, 8, 0.1, 0.2, "SName", 5, 20, 15, 0.20, 1e-25, 20, 89.0, 3)\n+ tuple4 = ("QName", 1, 8, 8, 0.1, 0.1, "SName", 5, 15, 10, 0.17, 1e-23, 14, 89.5, 4)\n+ match1 = Match()\n+ match1.setFromTuple( tuple1 )\n+ match2 = Match()\n+ match2.setFromTuple( tuple2 )\n+ match3 = Match()\n+ match3.setFromTuple( tuple3 )\n+ match4 = Match()\n+ match4.setFromTuple( tuple4 )\n+ lMatch = [ match1, match2, match3, match4 ]\n+ expList = [ match1, match2, match3, match4 ]\n+ self._tMatchA.insertList(lMatch)\n+\n+ obsList = self._tMatchA.getListOfAllMatches()\n+ self.assertEqual( expList, obsList )\n+ \n+ \n+ def test_getListOfAllMatches_empty_table( self ):\n+ self._iDb.createTable( self._table, "match", "" )\n+ expList = []\n+ obsList = self._tMatchA.getListOfAllMatches()\n+ self.assertEqual( expList, obsList )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableMatchAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TablePathAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TablePathAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,1376 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n+from commons.core.coord.Path import Path\n+from commons.core.coord.Set import Set\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Range import Range\n+from commons.core.coord.PathUtils import PathUtils\n+from copy import deepcopy\n+\n+class Test_TablePathAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._db = DbMySql( cfgFileName = self._configFileName )\n+ self._table = "dummyPathTable_%s" % ( self._uniqId )\n+ self._tpA = TablePathAdaptator( self._db, self._table )\n+ \n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._db.dropTable( self._table )\n+ self._db.close()\n+ self._table = None\n+ self._tMatchA = None\n+ os.remove( self._configFileName )\n+ self._configFileName = "" \n+ \n+ \n+##################################################################################\n+################## Tests for methods in ITableMapAdaptator #######################\n+################################################################################## \n+ \n+ def test_getPathListFromId( self ):\n+ pathFileName = "dummyPathFile_%s" % ( self._uniqId )\n+ pathF = open( pathFileName, "w" )\n+ pathF.write( "1\\tchr1\\t1\\t6\\tTE2\\t11\\t16\\t1e-20\\t30\\t90.2\\n" )\n+ pathF.write( "2\\tchr1\\t1001\\t1006\\tTE2\\t11\\t16\\t1e-20\\t30\\t90.2\\n" )\n+ pathF.write( "2\\tchr1\\t1201\\t1226\\tTE2\\t10\\t26\\t1e-40\\t70\\t87.2\\n" )\n+ pathF.close()\n+ p1 = Path()\n+ p1.setFromString( "2\\tchr1\\t1001\\t1006\\tTE2\\t11\\t16\\t1e-20\\t30\\t90.2\\n" )\n+ p2 = Pat'..b'()\n+ self.assertEqual( expList, obsList )\n+ self._db.dropTable( obsTable )\n+ \n+ \n+ def test_path2PathRangeFromQuery_QryDirSbjRev( self ):\n+ self._db.createTable( self._table, "path" )\n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p2a = Path()\n+ p2a.setFromTuple( ( "2", "chr2", "1", "100", "TE2", "109", "10", "1e-20", "163", "92.1" ) )\n+ p2b = Path()\n+ p2b.setFromTuple( ( "2", "chr2", "201", "250", "TE2", "200", "151", "1e-10", "75", "88.7" ) )\n+ for p in [ p1, p2a, p2b ]: self._tpA.insert( p )\n+ p2 = Path()\n+ p2.setFromTuple( ( "2", "chr2", "1", "250", "TE2", "200", "10", "1e-20", "238", "90.96" ) ) # \'merge\' p2a and p2b\n+ expList = [ p2 ]\n+ obsTable = self._tpA._path2PathRangeFromQuery( "chr2" )\n+ self._tpA._table = obsTable\n+ obsList = self._tpA.getListOfAllPaths()\n+ self.assertEqual( obsList, expList )\n+ self._db.dropTable( obsTable )\n+ \n+ \n+ def test_getNbOccurrences( self ):\n+ self._db.createTable( self._table, "path" )\n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ \n+ exp = 0\n+ obs = self._tpA.getNbOccurrences( p1 )\n+ self.assertEquals( exp, obs )\n+ \n+ self._tpA.insert( p1 )\n+ exp = 1\n+ obs = self._tpA.getNbOccurrences( p1 )\n+ self.assertEquals( exp, obs )\n+ \n+ self._tpA.insert( p1 )\n+ exp = 2\n+ obs = self._tpA.getNbOccurrences( p1 )\n+ self.assertEquals( exp, obs )\n+ \n+ def test_getListOfUniqueOccPath(self):\n+ \n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p2 = Path()\n+ p2.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p3 = Path()\n+ p3.setFromTuple( ( "1", "chr1", "2", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p4 = Path()\n+ p4.setFromTuple( ( "2", "chr2", "2", "11", "TE4", "10", "18", "1e-30", "40", "95.0" ) )\n+ lPath = [p1,p2,p3,p4]\n+ \n+ expListPath = deepcopy([p1,p3,p4]) \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath(lPath)\n+ self.assertEquals( expListPath, obsListUniquePath )\n+\n+ def test_getListOfUniqueOccPath_empty_list(self):\n+ expListPath = [] \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath([])\n+ self.assertEquals( expListPath, obsListUniquePath )\n+ \n+ def test_getListOfUniqueOccPath_one_item(self):\n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ expListPath = deepcopy([p1]) \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath([p1])\n+ self.assertEquals( expListPath, obsListUniquePath )\n+\n+ def test_getListOfUniqueOccPath_unsorted_list(self):\n+ \n+ p1 = Path()\n+ p1.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p3 = Path()\n+ p3.setFromTuple( ( "1", "chr1", "3", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ p4 = Path()\n+ p4.setFromTuple( ( "2", "chr2", "2", "11", "TE4", "10", "18", "1e-30", "40", "95.0" ) )\n+ p2 = Path()\n+ p2.setFromTuple( ( "1", "chr1", "1", "10", "TE3", "11", "17", "1e-20", "30", "85.0" ) )\n+ \n+ lPath = [p1,p3,p4,p2]\n+ \n+ expListPath = deepcopy([p1,p3,p4]) \n+ obsListUniquePath = self._tpA.getListOfUniqueOccPath(lPath)\n+ self.assertEquals( expListPath, obsListUniquePath )\n+\n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TablePathAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableSeqAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableSeqAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,321 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import os\n+import time\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.TableSeqAdaptator import TableSeqAdaptator\n+from commons.core.seq.Bioseq import Bioseq\n+from commons.core.coord.Set import Set\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_TableSeqAdaptator( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self.fileUtils = FileUtils()\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._db = DbMySql( cfgFileName=self._configFileName )\n+ self._table = "dummySeqTable_%s" % ( self._uniqId )\n+ self._tsA = TableSeqAdaptator( self._db, self._table )\n+ \n+ \n+ def tearDown( self ):\n+ self._db.dropTable( self._table )\n+ self._db.close()\n+ os.remove( self._configFileName )\n+ self._configFileName = ""\n+ \n+ \n+##################################################################################\n+################## Tests for methods in ITableSeqAdaptator #######################\n+##################################################################################\n+ \n+ def test_insert( self ):\n+ bs = Bioseq( "seq1", "AGCGATGACGATGCGAGT" )\n+ self._db.createTable( self._table, "fasta" )\n+ self._tsA.insert( bs )\n+ \n+ expBioseqTuple = (("seq1", "AGCGATGACGATGCGAGT", "seq1", 18L), )\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._db.execute( sqlCmd )\n+ obsBioseqTuple = self._db.cursor.fetchall()\n+ \n+ self.assertEqual( expBioseqTuple, obsBioseqTuple )\n+ \n+ \n+ def test_insertList( self ):\n+ bs1 = Bioseq( "seq1 desc", "AGCGATGACGATGCGAGT" )\n+ bs2 = Bioseq( "seq2", "AGCGATGACGATGCGAGT")\n+ '..b'")\n+ inF.write(">seq2\\n")\n+ inF.write("GCGATGCAGATGACGGCGGATGC\\n")\n+ inF.close()\n+ self._db.createTable( self._table, "fasta", inFileName )\n+ lSeq1 = ("seq1", 18)\n+ lSeq2 = ("seq2", 23)\n+ lExp = [lSeq1,lSeq2]\n+ lObs = self._tsA.getAccessionAndLengthList()\n+ self.assertEqual( lObs, lExp )\n+ os.remove( inFileName )\n+ \n+ \n+ def test_getSeqLengthFromAccessionWithSingleQuote( self ):\n+ inFileName = "dummyFaFile_%s" % ( self._uniqId )\n+ inF = open( inFileName, "w" )\n+ inF.write(">seq1\'\\n")\n+ inF.write("AGCGATGACGATGCGAGT\\n")\n+ inF.write(">seq2\\n")\n+ inF.write("GCGATGCAGATGACGGCGGATGC\\n")\n+ inF.close()\n+ self._db.createTable( self._table, "fasta", inFileName )\n+ exp = 18\n+ obs = self._tsA.getSeqLengthFromAccession( "seq1\'" )\n+ self.assertEqual( obs, exp )\n+ os.remove( inFileName )\n+ \n+ \n+ def test_getSubSequence_directStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ exp = "TTTGGG"\n+ obs = self._tsA.getSubSequence( "chr2", 13, 18 )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_getSubSequence_reverseStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ exp = "CCCAAA"\n+ obs = self._tsA.getSubSequence( "chr2", 18, 13 )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_getBioseqFromSetList_directStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ lSets = []\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 1, 10 ) )\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 16, 25 ) )\n+ exp = Bioseq( "Dm-B-G600-Map3_classI-LTR-incomp::3 chr2 1..10,16..25", "AAAAAAAAAAGGGGGGGGGG" )\n+ obs = self._tsA.getBioseqFromSetList( lSets )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_getBioseqFromSetList_reverseStrand( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ lSets = []\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 10, 1 ) )\n+ lSets.append( Set( 3, "Dm-B-G600-Map3_classI-LTR-incomp", "chr2", 25, 16 ) )\n+ exp = Bioseq( "Dm-B-G600-Map3_classI-LTR-incomp::3 chr2 25..16,10..1", "CCCCCCCCCCTTTTTTTTTT" )\n+ obs = self._tsA.getBioseqFromSetList( lSets )\n+ self.assertEqual( exp, obs )\n+ \n+ \n+ def test_isAccessionInTable_true( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ \n+ obs = self._tsA.isAccessionInTable( "chr2" )\n+ self.assertTrue( obs )\n+ \n+ \n+ def test_isAccessionInTable_false( self ):\n+ self._db.createTable( self._table, "seq" )\n+ chr = Bioseq()\n+ chr.setHeader( "chr2" )\n+ chr.setSequence( "AAAAAAAAAATTTTTGGGGGGGGGG" )\n+ self._tsA.insert( chr )\n+ \n+ obs = self._tsA.isAccessionInTable( "chr1" )\n+ self.assertFalse( obs )\n+ \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableSeqAdaptator ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Test_TableSetAdaptator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Test_TableSetAdaptator.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,330 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import unittest\n+import time\n+import os\n+from commons.core.sql.TableSetAdaptator import TableSetAdaptator\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.coord.Set import Set\n+\n+\n+class Test_TableSetAdaptator( unittest.TestCase ):\n+\n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n+ self._configFileName = "dummyConfigFile_%s" % ( self._uniqId )\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+ self._iDb = DbMySql( cfgFileName=self._configFileName )\n+ self._table = "dummySetTable_%s" % ( self._uniqId )\n+ self._tSetA = TableSetAdaptator( self._iDb, self._table )\n+ \n+ def tearDown( self ):\n+ self._uniqId = None\n+ self._iDb.dropTable( self._table )\n+ self._iDb.close()\n+ self._table = None\n+ self._tSetA = None\n+ os.remove( self._configFileName )\n+ self._configFileName = ""\n+\n+ def test_insert(self):\n+ set2Insert = Set()\n+ set2Insert.id = 1\n+ set2Insert.name = "name1"\n+ set2Insert.seqname = "name2"\n+ set2Insert.start = 1L\n+ set2Insert.end = 50L\n+ self._iDb.createTable( self._table, "set", "" )\n+ self._tSetA.insert( set2Insert, False )\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ expTsetTuple = ((1, "name1", "name2", 1L, 50L),)\n+ obsTsetTuples = self._iDb.cursor.fetchall()\n+ self.assertEquals(expTsetTuple, obsTsetTuples )\n+ \n+ def test_insertList ( self ):\n+ self._iDb.createTable( self._table, "set", "" )\n+ set1 = Set()\n+ set1.setFromString( "1\\tname1\\tdesc1\\t1\\t120\\n" )\n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ lset = [ set1, set2 ]\n+ self._tSetA.insertList( lset )\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ '..b'3 ]: self._tSetA.insert( m )\n+ lId2del = []\n+ self._tSetA.deleteFromIdList(lId2del)\n+ expTSetTuples = ((1L, \'name1\', \'desc1\', 1L, 120L), (2L, \'name2\', \'desc2\', 1L, 20L), (3L, \'name2\', \'desc3\', 1L, 50L))\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ obsTsetTuples = self._iDb.cursor.fetchall()\n+ \n+ self.assertEqual( expTSetTuples, obsTsetTuples )\n+ \n+ def test_joinTwoSets(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ idSet1 = 5\n+ set1 = Set()\n+ set1.setFromString( "5\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ idSet2 = 2\n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ lset = [ set1, set2 ]\n+ self._tSetA.insertList( lset )\n+ self._tSetA.joinTwoSets(idSet1, idSet2)\n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ \n+ expTSetTuples = ((2L, "name1", "desc1", 1L, 120L ), (2L, "name2", "desc2", 1L, 20L ))\n+ obsTSetTuples = self._iDb.cursor.fetchall()\n+ \n+ self.assertEqual( expTSetTuples, obsTSetTuples)\n+ self._iDb.dropTable(self._table)\n+ \n+ def test_joinTwoSetsWhereId1InfId2(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ idSet1 = 2\n+ set1 = Set()\n+ set1.setFromString( "5\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ \n+ idSet2 = 5\n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ \n+ lset = [ set1, set2 ]\n+ self._tSetA.insertList( lset )\n+\n+ self._tSetA.joinTwoSets(idSet1, idSet2)\n+ \n+ sqlCmd = "SELECT * FROM %s" % ( self._table )\n+ self._iDb.execute( sqlCmd )\n+ \n+ expTSetTuples = ((2L, "name1", "desc1", 1L, 120L ), (2L, "name2", "desc2", 1L, 20L ))\n+ obsTSetTuples = self._iDb.cursor.fetchall()\n+ \n+ self.assertEqual( expTSetTuples, obsTSetTuples)\n+ self._iDb.dropTable(self._table)\n+ \n+ def test_getNewId(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ set1 = Set()\n+ set1.setFromString( "1\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ set2 = Set()\n+ set2.setFromString( "2\\tname2\\tdesc2\\t1\\t20\\n" )\n+ set3 = Set()\n+ set3.setFromString( "5\\tname1\\tdesc1\\t1\\t120\\n" ) \n+ set4 = Set()\n+ set4.setFromString( "8\\tname2\\tdesc2\\t1\\t20\\n" )\n+ lset = [ set1, set2, set3, set4 ]\n+ self._tSetA.insertList( lset )\n+ expId = 9\n+ obsId = self._tSetA.getNewId()\n+ self.assertEqual( expId, obsId)\n+ self._iDb.dropTable(self._table)\n+ \n+ def test_getNewId_set_null(self):\n+ self._iDb.createTable( self._table, "set", "" )\n+ set1 = Set()\n+ lset = [ set1 ]\n+ self._tSetA.insertList( lset )\n+ expId = 1\n+ obsId = self._tSetA.getNewId()\n+ self.assertEqual( expId, obsId)\n+ self._iDb.dropTable(self._table) \n+ \n+ def test_getListOfAllSets( self ):\n+ self._iDb.createTable( self._table, "set" )\n+ s1 = Set()\n+ s1.setFromString( "1\\tchr1\\tTE3\\t1\\t10\\n" )\n+ s2a = Set()\n+ s2a.setFromString( "2\\tchr1\\tTE2\\t2\\t9\\n" )\n+ s2b = Set()\n+ s2b.setFromString( "2\\tchr1\\tTE2\\t12\\t19\\n" )\n+ lSets = [ s1, s2a, s2b ]\n+ self._tSetA.insertList( lSets )\n+ expLSets = [ s1, s2a, s2b ]\n+ obsLSets = self._tSetA.getListOfAllSets()\n+ self.assertEqual( expLSets, obsLSets )\n+ \n+ def test_getListOfAllSets_empty_table( self ):\n+ self._iDb.createTable( self._table, "set" )\n+ expList = []\n+ obsList = self._tSetA.getListOfAllSets()\n+ self.assertEqual( expList, obsList ) \n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_TableSetAdaptator ) ) \n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Tst_F_RepetJob.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Tst_F_RepetJob.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,236 @@\n+import os\n+import time\n+import sys\n+import stat\n+import unittest\n+import glob\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.RepetJob import RepetJob\n+from commons.core.sql.Job import Job\n+\n+class Test_F_RepetJob(unittest.TestCase):\n+\n+ def setUp(self):\n+ self._jobTableName = "dummyJobTable"\n+ self._db = DbMySql()\n+ self._iRepetJob = RepetJob()\n+ self._configFileName = "dummyConfigFile"\n+ configF = open(self._configFileName, "w" )\n+ configF.write( "[repet_env]\\n" )\n+ configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n+ configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n+ configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n+ configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n+ configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n+ configF.close()\n+\n+ def tearDown(self):\n+ self._iRepetJob = None\n+ self._db.dropTable( self._jobTableName )\n+ self._db.close()\n+ os.remove(self._configFileName)\n+ \n+ def test_submitJob_with_multiple_jobs(self):\n+ job1 = self._createJobInstance("job1")\n+ self._createLauncherFile(job1)\n+\n+ job2 = self._createJobInstance("job2")\n+ self._createLauncherFile(job2)\n+\n+ job3 = self._createJobInstance("job3")\n+ self._createLauncherFile(job3)\n+ \n+ self._iRepetJob.submitJob( job1, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )\n+ self._iRepetJob.submitJob( job2, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )\n+ self._iRepetJob.submitJob( job3, maxNbWaitingJobs=3, checkInterval=5, verbose=0 )\n+\n+ time.sleep(70)\n+ \n+ expJobStatus = "finished"\n+ obsJobStatus1 = self._iRepetJob.getJobStatus(job1)\n+ obsJobStatus2 = self._iRepetJob.getJobStatus(job2)\n+ obsJobStatus3 = self._iRepetJob.getJobStatus(job3)\n+ \n+ self.assertEquals(expJobStatus, obsJobStatus1)\n+ self.assertEquals(expJobStatus, obsJobStatus2)\n+ self.assertEquals(expJobStatus, obsJobStatus3)\n+ \n+ jobName1 = job1.jobname\n+ jobName2 = job2.jobname\n+ jobName3 = job3.jobname\n+ \n+ expErrorFilePrefix1 = jobName1+ ".e" \n+ expOutputFilePrefix1 = jobName1 + ".o"\n+ expErrorFilePrefix2 = jobName2 + ".e" \n+ expOutputFilePrefix2 = jobName2 + ".o"\n+ expErrorFilePrefix3 = jobName3 + ".e" \n+ expOutputFilePrefix3 = jobName3 + ".o"\n+ \n+ lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")\n+ lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")\n+ lErrorFiles2 = glob.glob(expErrorFilePrefix2 + "*")\n+ lOutputFiles2 = glob.glob(expOutputFilePrefix2 + "*")\n+ lErrorFiles3 = glob.glob(expErrorFilePrefix3 + "*")\n+ lOutputFiles3 = glob.glob(expOutputFilePrefix3 + "*")\n+ \n+ isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0) \n+ isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)\n+ isLErrorFileNotEmpty2 = (len(lErrorFiles2) != 0) \n+ isLOutputFileNotEmpty2 = (len(lOutputFiles2) != 0)\n+ isLErrorFileNotEmpty3 = (len(lErrorFiles3) != 0) \n+ isLOutputFileNotEmpty3 = (len(lOutputFiles3) != 0)\n+ \n+ os.system("rm launcherFileTest*.py *.e* *.o*")\n+ self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)\n+ self.assertTrue(isLErrorFileNotEmpty2 and isLOutputFileNotEmpty2)\n+ self.assertTrue(isLErrorFileNotEmpty3 and isLOutputFileNotEmpty3)\n+\n+ def test_submitJob_job_already_submitted(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance("job")\n+ self._iRepetJob.recordJob(iJob)\n+ \n+ isSysExitRaised = False\n+ try:\n+ self._iRepetJob.submitJob(iJob)\n+ except SystemExit:\n+ isSysExitRaised = True\n+ self.'..b'ordJob(iJob)\n+ self._iRepetJob.changeJobStatus(iJob, "error", "method")\n+ \n+ self._iRepetJob.waitJobGroup(self._jobTableName ,iJob.groupid, 0, 2)\n+ \n+ time.sleep(10)\n+ \n+ expJobStatus = "finished"\n+ obsJobStatus1 = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expJobStatus, obsJobStatus1)\n+ \n+ jobName = iJob.jobname\n+ \n+ expErrorFilePrefix1 = jobName + ".e" \n+ expOutputFilePrefix1 = jobName + ".o"\n+ \n+ lErrorFiles1 = glob.glob(expErrorFilePrefix1 + "*")\n+ lOutputFiles1 = glob.glob(expOutputFilePrefix1 + "*")\n+ \n+ isLErrorFileNotEmpty1 = (len(lErrorFiles1) != 0) \n+ isLOutputFileNotEmpty1 = (len(lOutputFiles1) != 0)\n+ \n+ self._iRepetJob.removeJob(iJob) \n+ os.system("rm launcherFileTest*.py *.e* *.o*")\n+ self.assertTrue(isLErrorFileNotEmpty1 and isLOutputFileNotEmpty1)\n+ \n+\n+ def test_isJobStillHandledBySge_True(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance("job")\n+ self._createLauncherFile(iJob)\n+ self._iRepetJob.submitJob(iJob)\n+ \n+ isJobHandledBySge = self._iRepetJob.isJobStillHandledBySge(iJob.jobid, iJob.jobname)\n+ os.system("rm launcherFileTest*.py")\n+ \n+ self.assertTrue(isJobHandledBySge)\n+\n+ def test_isJobStillHandledBySge_False(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance("job")\n+ self._createLauncherFile(iJob)\n+ self._iRepetJob.recordJob(iJob)\n+ \n+ isJobHandledBySge = self._iRepetJob.isJobStillHandledBySge(iJob.jobid, iJob.jobname)\n+ os.system("rm launcherFileTest*.py")\n+ \n+ self.assertFalse(isJobHandledBySge)\n+ \n+ def _createJobInstance(self, name):\n+ return Job(self._jobTableName, 0, name, "test", "", "date;sleep 5;date", "./launcherFileTest_"+ name +".py")\n+ \n+ def _createLauncherFile(self, iJob):\n+ jobFileHandler = open( iJob.launcher , "w" )\n+\n+ launcher = "#!/usr/bin/python\\n"\n+ launcher += "import os\\n"\n+ launcher += "import sys\\n"\n+ \n+ launcher += "print \\"system:\\", os.uname()\\n"\n+ launcher += "sys.stdout.flush()\\n"\n+ newStatus = "running"\n+ prg = "%s/bin/srptChangeJobStatus.py" % (os.environ["REPET_PATH"])\n+ cmd = prg\n+ cmd += " -t %s" % ( iJob.tablename )\n+ cmd += " -n %s" % ( iJob.jobname )\n+ cmd += " -g %s" % ( iJob.groupid )\n+ if iJob.queue != "":\n+ cmd += " -q %s" % ( iJob.queue )\n+ cmd += " -s %s" % ( newStatus )\n+ cmd += " -c %s" %( self._configFileName )\n+ cmd += " -v 1"\n+ launcher +="os.system( \\"" + cmd + "\\" )\\n"\n+ \n+ launcher += "print \\"LAUNCH: "+ iJob.command + "\\"\\n"\n+ launcher += "sys.stdout.flush()\\n"\n+ launcher += "exitStatus = os.system (\\"" + iJob.command + "\\")\\n"\n+ launcher += "if exitStatus != 0:\\n"\n+ launcher += "\\tprint \\"ERROR: "+ iJob.command + " returned exit status \'%i\'\\" % ( exitStatus )\\n"\n+ \n+ newStatus = "finished"\n+ prg = os.environ["REPET_PATH"] + "/bin/srptChangeJobStatus.py"\n+ cmd = prg\n+ cmd += " -t %s" % ( iJob.tablename )\n+ cmd += " -n %s" % ( iJob.jobname )\n+ cmd += " -g %s" % ( iJob.groupid )\n+ if iJob.queue != "":\n+ cmd += " -q %s" % ( iJob.queue )\n+ cmd += " -s %s" % ( newStatus )\n+ cmd += " -c %s" %( self._configFileName )\n+ cmd += " -v 1"\n+ launcher +="os.system( \\"" + cmd + "\\" )\\n"\n+ launcher += "sys.exit(0)\\n"\n+ jobFileHandler.write(launcher)\n+ jobFileHandler.close()\n+ os.chmod( iJob.launcher, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC )\n+\n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/sql/test/Tst_RepetJob.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/test/Tst_RepetJob.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,395 @@\n+import unittest\n+import sys\n+import os\n+import time\n+from commons.core.sql.DbMySql import DbMySql\n+from commons.core.sql.Job import Job\n+from commons.core.sql.RepetJob import RepetJob\n+from commons.core.utils.FileUtils import FileUtils\n+\n+#TODO: to remove... => replace all RepetJob() by TableJobAdaptator()...\n+class Test_RepetJob( unittest.TestCase ):\n+ \n+ def setUp(self):\n+ self._jobTableName = "dummyJobTable"\n+ self._db = DbMySql()\n+ self._iRepetJob = RepetJob()\n+ \n+ def tearDown(self):\n+ self._iRepetJob = None\n+ self._db.close()\n+ \n+ def _createJobInstance(self):\n+ return Job( self._jobTableName, 0, "job1", "groupid", "queue", "command", "launcherFile", "node", "lResources" )\n+ \n+ def test_createJobTable_is_table_created(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ \n+ isTableCreated = self._db.doesTableExist(self._jobTableName)\n+ self.assertTrue(isTableCreated)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_createJobTable_field_list(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+\n+ obsLFiled = self._db.getFieldList(self._jobTableName)\n+ expLField = ["jobid", "jobname", "groupid", "command", "launcher", "queue", "status", "time", "node"]\n+ \n+ self.assertEquals(expLField, obsLFiled)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_recordJob(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+ \n+ qryParams = "SELECT jobid, groupid, command, launcher, queue, status, node FROM " + self._jobTableName + " WHERE jobid = %s" \n+ params = (iJob.jobid)\n+ \n+ self._db.execute(qryParams, params)\n+ \n+ tObs = self._db.fetchall()[0]\n+ tExp =(iJob.jobid, iJob.groupid, iJob.command, iJob.launcher, iJob.queue, "waiting", "?")\n+ \n+ self.assertEquals(tExp,tObs)\n+\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_removeJob(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+\n+ self._iRepetJob.removeJob(iJob)\n+ \n+ isTableEmpty = self._db.isEmpty(self._jobTableName)\n+ \n+ self.assertTrue(isTableEmpty)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+\n+ expStatus = "waiting"\n+ obsStatus = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expStatus, obsStatus)\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus_unknown(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance() \n+\n+ expStatus = "unknown"\n+ obsStatus = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expStatus, obsStatus)\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus_no_name(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = Job( self._jobTableName, 20, "", "groupid", "queue", "command", "launcherFile", "node", "lResources" ) \n+ \n+ expStatus = "unknown"\n+ obsStatus = self._iRepetJob.getJobStatus(iJob)\n+ \n+ self.assertEquals(expStatus, obsStatus)\n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getJobStatus_non_unique_job(self):\n+ # Warning : this case will not append, because recordJob() begin by removeJob()\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ sqlCmd = "INSERT I'..b'RepetJob.removeJob(iJob)\n+ \n+ def test_setJobIdFromSge(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob = self._createJobInstance()\n+ self._iRepetJob.recordJob(iJob)\n+ self._iRepetJob.setJobIdFromSge(iJob, 1000)\n+ \n+ qryParams = "SELECT jobid FROM " + self._jobTableName + " WHERE jobname = %s AND queue = %s AND groupid = %s" \n+ params = (iJob.jobname, iJob.queue, iJob.groupid)\n+ \n+ self._db.execute(qryParams, params)\n+ \n+ tObs = self._db.fetchall()[0]\n+ tExp =(1000,)\n+ \n+ self.assertEquals(tExp,tObs)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_submitJob_8_fields_for_job_table(self):\n+ iJob = self._createJobInstance()\n+ self._db.dropTable(self._jobTableName)\n+ sqlCmd = "CREATE TABLE " + self._jobTableName \n+ sqlCmd += " ( jobid INT UNSIGNED"\n+ sqlCmd += ", groupid VARCHAR(255)"\n+ sqlCmd += ", command TEXT"\n+ sqlCmd += ", launcher VARCHAR(1024)"\n+ sqlCmd += ", queue VARCHAR(255)"\n+ sqlCmd += ", status VARCHAR(255)"\n+ sqlCmd += ", time DATETIME"\n+ sqlCmd += ", node VARCHAR(255) )"\n+ self._db.execute(sqlCmd)\n+ \n+ self._iRepetJob.submitJob(iJob)\n+ \n+ expFieldsNb = 9\n+ obsFieldsNb = len(self._iRepetJob.getFieldList(self._jobTableName))\n+ \n+ self.assertEquals(expFieldsNb, obsFieldsNb)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getNodesListByGroupId(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob1 = Job( self._jobTableName, 0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources" )\n+ iJob2 = Job( self._jobTableName, 1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources" )\n+ iJob3 = Job( self._jobTableName, 2, "job3", "groupid2", "queue", "command", "launcherFile", "node3", "lResources" )\n+ \n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ \n+ expNodeList = ["node1", "node2"]\n+ obsNodeList = self._iRepetJob.getNodesListByGroupId(self._jobTableName, "groupid")\n+ self.assertEquals(expNodeList, obsNodeList)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def test_getNodesListByGroupId_empty_list(self):\n+ self._iRepetJob.createTable(self._jobTableName, "jobs")\n+ iJob1 = Job( self._jobTableName, 0, "job1", "groupid", "queue", "command", "launcherFile", "node1", "lResources" )\n+ iJob2 = Job( self._jobTableName, 1, "job2", "groupid", "queue", "command", "launcherFile", "node2", "lResources" )\n+ iJob3 = Job( self._jobTableName, 2, "job3", "groupid32", "queue", "command", "launcherFile", "node3", "lResources" )\n+ \n+ self._insertJob(iJob1)\n+ self._insertJob(iJob2)\n+ self._insertJob(iJob3)\n+ \n+ expNodeList = []\n+ obsNodeList = self._iRepetJob.getNodesListByGroupId(self._jobTableName, "groupid3")\n+ self.assertEquals(expNodeList, obsNodeList)\n+ \n+ self._db.dropTable(self._jobTableName)\n+ \n+ def _insertJob(self, iJob):\n+ self._iRepetJob.removeJob( iJob )\n+ sqlCmd = "INSERT INTO %s" % ( iJob.tablename )\n+ sqlCmd += " VALUES ("\n+ sqlCmd += " \\"%s\\"," % ( iJob.jobid )\n+ sqlCmd += " \\"%s\\"," % ( iJob.jobname )\n+ sqlCmd += " \\"%s\\"," % ( iJob.groupid )\n+ sqlCmd += " \\"%s\\"," % ( iJob.command.replace("\\"","\\\'") )\n+ sqlCmd += " \\"%s\\"," % ( iJob.launcher )\n+ sqlCmd += " \\"%s\\"," % ( iJob.queue )\n+ sqlCmd += " \\"waiting\\","\n+ sqlCmd += " \\"%s\\"," % ( time.strftime( "%Y-%m-%d %H:%M:%S" ) )\n+ sqlCmd += " \\"%s\\" );" % ( iJob.node )\n+ self._iRepetJob.execute( sqlCmd )\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/stat/Stat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/stat/Stat.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,209 @@
+import math
+
+class Stat(object):
+
+    def __init__(self, lValues = []):
+        self.reset()
+        if lValues != []:
+            self.fill(lValues)
+
+    def __eq__(self, o):
+        self._lValues.sort()
+        o._lValues.sort()
+        return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \
+            and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \
+            and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6)
+
+    def getValuesList(self):
+        return self._lValues
+
+    def getSum(self):
+        return self._sum
+
+    def getSumOfSquares(self):
+        return self._sumOfSquares
+
+    def getValuesNumber(self):
+        return self._n
+
+    def getMin(self):
+        return self._min
+
+    def getMax(self):
+        return self._max
+
+    ## Reset all attributes
+    #
+    def reset(self):
+        self._lValues = []
+        self._sum = 0.0
+        self._sumOfSquares = 0.0
+        self._n = 0
+        self._max = 0.0
+        self._min = 0.0
+
+    ## Add a value to Stat instance list and update attributes
+    #
+    # @param v float value to add
+    #
+    def add(self, v):
+        self._lValues.append( float(v) )
+        self._sum += float(v)
+        self._sumOfSquares += float(v) * float(v)
+        self._n = self._n + 1
+        if v > self._max:
+            self._max = float(v)
+        if self._n == 1:
+            self._min = float(v)
+        elif v < self._min:
+            self._min = float(v)
+
+    ## Add a list of values to Stat instance list and update attributes
+    #
+    # @param lValues list of float list to add
+    #
+    def fill(self, lValues):
+        for v in lValues:
+            self.add(v)
+
+    ## Get the arithmetic mean of the Stat instance list
+    #
+    # @return float
+    #
+    def mean(self):
+        if self._n == 0:
+            return 0
+        else:
+            return self._sum / float(self._n)
+
+    ## Get the variance of the sample
+    # @note we consider a sample, not a population. So for calculation, we use n-1
+    #
+    # @return float
+    #
+    def var(self):
+        if self._n < 2 or self.mean() == 0.0:
+            return 0
+        else:
+            variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean()
+            if round(variance, 10) == 0:
+                variance = 0
+            return variance
+
+    ## Get the standard deviation of the sample
+    #
+    # @return float
+    #
+    def sd(self):
+        return math.sqrt( self.var() )
+
+    ## Get the coefficient of variation of the sample
+    #
+    # @return float
+    #
+    def cv(self):
+        if self._n < 2 or self.mean() == 0.0:
+            return 0
+        else:
+            return self.sd() / self.mean()
+
+    ## Get the median of the sample
+    #
+    # @return number or "NA" (Not available)
+    #
+    def median( self ):
+        if len(self._lValues) == 0:
+            return "NA"
+        if len(self._lValues) == 1:
+            return self._lValues[0]
+        self._lValues.sort()
+        m = int( math.ceil( len(self._lValues) / 2.0 ) )
+        if len(self._lValues) % 2:
+            return self._lValues[m-1]
+        else:
+            return ( self._lValues[m-1] + self._lValues[m] ) / 2.0
+
+    ## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)).
+    #  k = 0 -> completely flat
+    #  k = 3 -> same as normal distribution
+    #  k >> 3 -> peak
+    #
+    # @return float
+    #
+    def kurtosis(self):
+        numerator = 0
+        for i in self._lValues:
+            numerator += math.pow( i - self.mean(), 4 )
+        return numerator / float(self._n - 1) * self.sd()
+
+    ## Prepare a string with calculations on your values
+    #
+    # @return string
+    #
+    def string(self):
+        msg = ""
+        msg += "n=%d" % ( self._n )
+        msg += " mean=%5.3f" % ( self.mean() )
+        msg += " var=%5.3f" % ( self.var() )
+        msg += " sd=%5.3f" % ( self.sd() )
+        msg += " min=%5.3f" % ( self.getMin() )
+        median = self.median()
+        if median == "NA":
+            msg += " med=%s" % (median)
+        else:
+            msg += " med=%5.3f" % (median)
+        msg += " max=%5.3f" % ( self.getMax() )
+        return msg
+
+    ## Print descriptive statistics
+    #
+    def view(self):
+        print self.string()
+
+    ## Return sorted list of values, ascending (default) or descending
+    #
+    # @return list
+    #
+    def sort( self, isReverse = False ):
+        self._lValues.sort(reverse = isReverse)
+        return self._lValues
+
+    ## Give the quantile corresponding to the chosen percentage
+    #
+    # @return number
+    #
+    def quantile( self, percentage ):
+        if self._n == 0:
+            return 0
+        elif percentage == 1:
+            return self.getMax()
+        else:
+            return self.sort()[int(self._n * percentage)]
+
+    ## Prepare a string with quantile values
+    #
+    # @return string
+    #
+    def stringQuantiles( self ):
+        return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \
+               (self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1))
+
+    ## Print quantiles string
+    #
+    def viewQuantiles( self ):
+        print self.stringQuantiles()
+
+    ## Compute N50
+    # @return number
+    def N50(self ):
+        lSorted = self.sort(True)
+        midlValues = self.getSum() / 2
+        cumul = 0
+        index = 0
+        while cumul < midlValues:
+            cumul =  cumul + lSorted[index]
+            index += 1
+        if (index == 0):
+            return lSorted[index]
+        else :
+            return lSorted[index - 1]
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/stat/test/Test_F_Stat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/stat/test/Test_F_Stat.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,22 @@
+import unittest
+from commons.core.stat.Stat import Stat
+
+
+class Test_F_Stat(unittest.TestCase):
+
+
+    def test_output(self):
+        lValues = [0, -1, -5, 112, 10.2, 0.5, 4, -0.5]
+        iStat = Stat(lValues)
+        expString = "n=8 mean=15.025 var=1554.934 sd=39.433 min=-5.000 med=0.250 max=112.000"
+        self.assertEquals(expString, iStat.string())
+
+    def test_outputQuantile(self):
+        lValues = [0, -1, -5, 112, 10.2, 0.5, 4, -0.5]
+        iStat = Stat(lValues)
+        expString = "n=8 min=-5.000 Q1=-0.500 median=0.500 Q3=10.200 max=112.000"
+        self.assertEquals(expString, iStat.stringQuantiles())
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/stat/test/Test_Stat.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/stat/test/Test_Stat.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,356 @@\n+from commons.core.stat.Stat import Stat\n+import unittest\n+\n+class Test_Stat(unittest.TestCase):\n+ \n+ def test__eq__true(self):\n+ iStat1 = Stat([1, 2, 3, 46])\n+ iStat2 = Stat([1, 2, 3, 46])\n+ self.assertTrue(iStat1 == iStat2)\n+\n+ def test__eq__false(self):\n+ iStat1 = Stat([1, 2, 3, 4])\n+ iStat2 = Stat([1, 2, 3, 46])\n+ self.assertFalse(iStat1 == iStat2)\n+\n+ def test__eq__disordered_list(self):\n+ iStat1 = Stat([3, 2, 1, 46])\n+ iStat2 = Stat([1, 2, 3, 46])\n+ self.assertTrue(iStat1 == iStat2)\n+\n+ def test_reset(self):\n+ lValues = [1, 2, 5, 9, 12, 46]\n+ iStat = Stat(lValues)\n+ iStat.reset()\n+ expValuesList = []\n+ expSum = 0\n+ expSum2 = 0\n+ expN = 0\n+ expMin = 0\n+ expMax = 0\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+\n+ def test_add_EmptyList(self):\n+ lValues = []\n+ iStat = Stat(lValues)\n+ iStat.add(5)\n+ expValuesList = [5]\n+ expSum = 5\n+ expSum2 = 25\n+ expN = 1\n+ expMin = 5\n+ expMax = 5\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+ \n+ def test_add_Max(self):\n+ lValues = [0,1,1]\n+ iStat = Stat(lValues)\n+ iStat.add(2)\n+ expValuesList = [0,1,1,2]\n+ expSum = 4\n+ expSum2 = 6\n+ expN = 4\n+ expMin = 0\n+ expMax = 2\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+ \n+ def test_add_Min(self):\n+ lValues = [2,1,1]\n+ iStat = Stat(lValues)\n+ iStat.add(0)\n+ expValuesList = [2,1,1,0]\n+ expSum = 4\n+ expSum2 = 6\n+ expN = 4\n+ expMin = 0\n+ expMax = 2\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()\n+ obsMin = iStat.getMin()\n+ obsMax = iStat.getMax()\n+ self.assertEquals(expValuesList, obsValuesList)\n+ self.assertEquals(expSum, obsSum)\n+ self.assertEquals(expSum2, obsSum2)\n+ self.assertEquals(expN, obsN)\n+ self.assertEquals(expMin, obsMin)\n+ self.assertEquals(expMax, obsMax)\n+ \n+ def test_fill_emptyList(self):\n+ lValues = [2,1,1]\n+ iStat = Stat(lValues)\n+ iStat.fill([])\n+ expValuesList = [2,1,1]\n+ expSum = 4\n+ expSum2 = 6\n+ expN = 3\n+ expMin = 1\n+ expMax = 2\n+ obsValuesList = iStat.getValuesList()\n+ obsSum = iStat.getSum()\n+ obsSum2 = iStat.getSumOfSquares()\n+ obsN = iStat.getValuesNumber()'..b'\n+ lValues = [1, 2, 3, 4, 1, 2, 54, 6, 7]\n+ iStat = Stat(lValues)\n+ expMedian = 3\n+ obsMedian = iStat.median()\n+ self.assertEquals(expMedian, obsMedian)\n+ \n+ def test_median_odd(self):\n+ lValues = [1, 2, 3, 4, 2, 54, 6, 7]\n+ iStat = Stat(lValues)\n+ expMedian = 3.5\n+ obsMedian = iStat.median()\n+ self.assertEquals(expMedian, obsMedian)\n+ \n+ def test_kurtosis_flat(self):\n+ lValues = [1, 1, 1]\n+ iStat = Stat(lValues)\n+ expKurtosis = 0\n+ obsKurtosis = iStat.kurtosis()\n+ self.assertEquals(expKurtosis, obsKurtosis)\n+ \n+ def test_kurtosis_peak(self):\n+ lValues = [1, 100, -5]\n+ iStat = Stat(lValues)\n+ expKurtosis = round(712872278.6609683, 2)\n+ obsKurtosis = round(iStat.kurtosis(), 2)\n+ self.assertEquals(expKurtosis, obsKurtosis)\n+ \n+ def test_kurtosis_normal(self):\n+ lValues = [-1, 0, 1.64, 1.64, 0, -1]\n+ iStat = Stat(lValues)\n+ expKurtosis = 3.0\n+ obsKurtosis = round(iStat.kurtosis(), 1)\n+ self.assertEquals(expKurtosis, obsKurtosis)\n+ \n+ def test_sort(self):\n+ lValues = [-1, 0, 1.64, 1.64, 0, -1]\n+ iStat = Stat(lValues)\n+ expSort = [-1, -1, 0, 0, 1.64, 1.64]\n+ obsSort = iStat.sort()\n+ self.assertEquals(expSort, obsSort)\n+ \n+ def test_sort_reverse(self):\n+ lValues = [-1, 0, 1.64, 1.64, 0, -1]\n+ iStat = Stat(lValues)\n+ expSort = [1.64, 1.64, 0, 0, -1, -1]\n+ obsSort = iStat.sort(True)\n+ self.assertEquals(expSort, obsSort)\n+ \n+ def test_sort_emptyList(self):\n+ lValues = []\n+ iStat = Stat(lValues)\n+ expSort = []\n+ obsSort = iStat.sort()\n+ self.assertEquals(expSort, obsSort)\n+ \n+ def test_quantile_emptyList(self):\n+ lValues = []\n+ iStat = Stat(lValues)\n+ expQuantile = 0\n+ obsQuantile = iStat.quantile(0.25)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_0perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = -1\n+ obsQuantile = iStat.quantile(0)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_25perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 0\n+ obsQuantile = iStat.quantile(0.25)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_41perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 1.64\n+ obsQuantile = iStat.quantile(0.41)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_75perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 2.64\n+ obsQuantile = iStat.quantile(0.75)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_81perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 5\n+ obsQuantile = iStat.quantile(0.81)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_quantile_100perc(self):\n+ lValues = [0, 2.64, 1.64, -1, 5]\n+ iStat = Stat(lValues)\n+ expQuantile = 5\n+ obsQuantile = iStat.quantile(1)\n+ self.assertEquals(expQuantile, obsQuantile)\n+ \n+ def test_N50(self):\n+ lValues = [10, 10, 2, 16, 3, 4, 5]\n+ iStat = Stat(lValues)\n+ expN50 = 10\n+ obsN50 = iStat.N50()\n+ self.assertEquals(expN50, obsN50)\n+\n+ def test_N50SpecialValues(self):\n+ lValues = [1, 100, 2, 3]\n+ iStat = Stat(lValues)\n+ expN50 = 100\n+ obsN50 = iStat.N50()\n+ self.assertEquals(expN50, obsN50)\n+ \n+if __name__ == "__main__":\n+ unittest.main()\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/tree/Tree.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/tree/Tree.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,122 @@
+import os, re, sys
+
+class Tree:
+
+    def __init__( self, inFileName="" ):
+        self.tree = None
+        self.inFileName = inFileName
+        if self.inFileName != "":
+            self.loadTree()
+
+    def loadTree( self, verbose=0 ):
+        inF = open( self.inFileName, "r" )
+        lines = inF.readlines()
+        inF.close()
+        line = "".join(lines).replace("\n","")
+        self.tree = self.parseTree( line )
+        if verbose > 0:
+            print "nb of leaves: %i" % ( self.getNbOfLeaves( self.tree ) )
+
+    def parseTree( self, sTree ):
+        if "," not in sTree:
+            name, length = sTree.split(":")
+            return self.makeLeaf( name, float(length) )
+
+        distPattern = re.compile(r'(?P<tree>$.+$)\:(?P<length>[e\-\d\.]+)$')
+ m = distPattern.search( sTree )
+ length = 0
+ if m:
+            if m.group('length'): length = float( m.group('length') )
+            sTree = m.group('tree')
+ if length == "": length = 0
+
+        lhs, rhs = self.parseSubTree( sTree )
+
+        return { "name": "internal",
+                       "left": self.parseTree( lhs ),
+                       "right": self.parseTree( rhs ),
+                       "length": length }
+
+    def makeLeaf( self, name, length ):
+        return { "left":None, "right":None, "name":name, "length":length }
+
+    def parseSubTree( self, sTree ):
+        """
+        Parse a newick-formatted string of type 'a,b' into [a,b]
+        """
+        chars = list( sTree[1:-1] )
+        count = 0
+        isLhs = True
+        leftS = ""
+ rightS = ""
+ for c in chars:
+            if c == "(":
+                count += 1
+            elif c == ")":
+                count -= 1
+            elif (c == ",") and (count == 0) and (isLhs) :
+                isLhs = False
+                continue
+            if isLhs: leftS += c
+            else: rightS += c
+ return [ leftS, rightS ]
+
+    def toNewick( self, tree ):
+        newString = ""
+        if tree["name"] is not "internal":
+            newString += tree["name"]
+        else:
+            newString += "("
+            newString += self.toNewick( tree["left"] )
+            newString += ","
+            newString += self.toNewick( tree["right"] )
+            newString += ")"
+        if tree["length"]:
+            newString += ":"
+            newString += "%f" % ( tree["length"] )
+ return newString
+
+    def saveTree( self, outFileName ):
+        outF = open( outFileName, "w" )
+        outF.write( self.toNewick( self.tree ) )
+        outF.close()
+
+    def replaceHeaderViaPrefixSearch( self, tree, dNew2Init ):
+        if dNew2Init.has_key( tree["name"] ):
+            tree["name"] = dNew2Init[ tree["name"] ].replace(" ","_").replace("::","-").replace(",","-")
+        if tree["left"] != None:
+            self.replaceHeaderViaPrefixSearch( tree["left"], dNew2Init )
+        if tree["right"] != None:
+            self.replaceHeaderViaPrefixSearch( tree["right"], dNew2Init )
+
+    def retrieveInitialSequenceHeaders( self, dNew2Init, outFileName  ):
+        tree = self.tree
+        self.replaceHeaderViaPrefixSearch( tree, dNew2Init )
+        self.tree = tree
+        self.saveTree( outFileName )
+
+    def getNbOfChildNodes( self, tree, nbNodes ):
+        if tree["left"] is not None:
+            nbNodes += 1
+            nbNodes = self.getNbOfChildNodes( tree["left"], nbNodes )
+        if tree["right"] is not None:
+            nbNodes += 1
+            nbNodes = self.getNbOfChildNodes( tree["right"], nbNodes )
+        return nbNodes
+
+    def getNbOfNodes( self ):
+        nbNodes = 0
+        return self.getNbOfChildNodes( self.tree, nbNodes )
+
+    def getNbOfChildLeaves( self, tree, nbLeaves ):
+        if tree["name"] != "internal":
+            nbLeaves += 1
+        if tree["left"] is not None:
+            nbLeaves = self.getNbOfChildLeaves( tree["left"], nbLeaves )
+        if tree["right"] is not None:
+            nbLeaves = self.getNbOfChildLeaves( tree["right"], nbLeaves )
+        return nbLeaves
+
+    def getNbOfLeaves( self ):
+        nbLeaves = 0
+        return self.getNbOfChildLeaves( self.tree, nbLeaves )

diff -r ea3082881bf8 -r 769e306b7933 commons/core/tree/test/Test_Tree.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/tree/test/Test_Tree.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,90 @@
+import unittest
+import os
+import time
+from commons.core.tree.Tree import Tree
+from commons.core.utils.FileUtils import FileUtils
+
+
+class Test_Tree( unittest.TestCase ):
+
+    def setUp( self ):
+        self._tree = Tree()
+        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )
+
+
+    def test_parseTree_oneLeaf( self ):
+        inString = "seq1:0.0023"
+        obs = self._tree.parseTree( inString )
+        exp = { "left":None, "right":None, "name":"seq1", "length":0.0023 }
+        self.assertEqual( obs, exp )
+
+
+    def test_parseTree_twoLeaves( self ):
+        inString = "(seq1:0.0023,seq2:0.0017)"
+        obs = self._tree.parseTree( inString )
+        exp = {'length':0, 'right':{'length':0.0016999999999999999, 'right':None, 'name':'seq2', 'left':None}, 'name':'internal', 'left':{'length':0.0023, 'right':None, 'name':'seq1', 'left':None}}
+        self.assertEqual( obs, exp )
+
+##     def test_parseTree_threeLeaves( self ):
+##         inString = "(seq1:0.0023,(seq2:0.0017,seq3:0.0009))"
+##         obs = self._tree.parseTree( inString )
+##         print obs
+##         exp = {'length':0, 'right':{'length':0.0016999999999999999, 'right':None, 'name':'seq2', 'left':None}, 'name':'internal', 'left':{'length':0.0023, 'right':None, 'name':'seq1', 'left':None}}
+##         self.assertEqual( obs, exp )
+
+
+    def test_parseSubTree( self ):
+        inString = "(seq1:0.0023,seq2:0.0017)"
+        lExp = [ "seq1:0.0023", "seq2:0.0017" ]
+        lObs = self._tree.parseSubTree( inString )
+        self.assertEqual( lObs, lExp )
+
+
+    def test_saveTree( self ):
+        inFileName = "dummyInFile_%s" % ( self._uniqId )
+        inF = open( inFileName, "w" )
+        inF.write( "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))" )
+        inF.close()
+        self._tree = Tree( inFileName )
+        obsFileName = "dummyObsFile_%s" % ( self._uniqId )
+        self._tree.saveTree( obsFileName )
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, inFileName ) )
+        for f in [ inFileName, obsFileName ]:
+            os.remove( f )
+
+
+    def test_retrieveInitialSequenceHeaders( self ):
+        inString = "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))"
+        self._tree.tree = self._tree.parseTree( inString )
+        dNew2Init = { "seq2":"consensus524::215 dmel_chr4 142..765", "seq3":"DmelChr4-B-G387-MAP16", "seq4":"1360|1cl-3gr" }
+        expFileName = "dummyExpFile_%s"  % ( self._uniqId )
+        expF = open( expFileName, "w" )
+        expF.write( "(1360|1cl-3gr:0.012511,(DmelChr4-B-G387-MAP16:0.005340,consensus524-215_dmel_chr4_142..765:0.002201))" )
+        expF.close()
+        obsFileName = "dummyObsFile_%s"  % ( self._uniqId )
+        self._tree.retrieveInitialSequenceHeaders( dNew2Init, obsFileName )
+        self.assertTrue( FileUtils.are2FilesIdentical( obsFileName, expFileName ) )
+        for f in [ expFileName, obsFileName ]:
+            os.remove( f )
+
+
+    def test_getNbOfLeaves( self ):
+        inString = "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))"
+        self._tree.tree = self._tree.parseTree( inString )
+        exp = 3
+        obs = self._tree.getNbOfLeaves()
+        self.assertEqual( obs, exp )
+
+
+    def test_getNbOfNodes( self ):
+        inString = "(seq4:0.012511,(seq3:0.005340,seq2:0.002201))"
+        self._tree.tree = self._tree.parseTree( inString )
+        exp = 4
+        obs = self._tree.getNbOfNodes()
+        self.assertEqual( obs, exp )
+
+
+test_suite = unittest.TestSuite()
+test_suite.addTest( unittest.makeSuite( Test_Tree ) )
+if __name__ == "__main__":
+    unittest.TextTestRunner(verbosity=2).run( test_suite )

diff -r ea3082881bf8 -r 769e306b7933 commons/core/tree/test/treeTestSuite.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/tree/test/treeTestSuite.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,16 @@
+import unittest
+import sys
+import Test_Tree
+
+
+
+def main():
+
+        commonsTestSuite = unittest.TestSuite()
+        commonsTestSuite.addTest(unittest.makeSuite(Test_Tree.Test_Tree,'test'))
+        runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+        runner.run(commonsTestSuite)
+
+
+if __name__ == '__main__':
+    main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/utils/FileUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/FileUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,447 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import glob\n+import shutil\n+import sys\n+import re\n+import math\n+try:\n+ import hashlib\n+except:\n+ pass\n+\n+\n+class FileUtils( object ):\n+ \n+ ## Return the number of lines in the given file\n+ #\n+ def getNbLinesInSingleFile( fileName ):\n+ fileHandler = open( fileName, "r" )\n+ lines = fileHandler.readlines()\n+ fileHandler.close()\n+ if (len(lines)>0 and lines[-1]== "\\n"):\n+ return (len(lines)-1)\n+ else :\n+ return len(lines)\n+ \n+ getNbLinesInSingleFile = staticmethod( getNbLinesInSingleFile )\n+ \n+ ## Return the number of lines in the files in the given list\n+ #\n+ def getNbLinesInFileList( lFileNames ):\n+ count = 0\n+ for fileName in lFileNames:\n+ count += FileUtils.getNbLinesInSingleFile( fileName )\n+ return count\n+ \n+ getNbLinesInFileList = staticmethod( getNbLinesInFileList )\n+ \n+ ## Return True if the given file exists, False otherwise\n+ #\n+ def isRessourceExists( fileName ):\n+ return os.path.exists( fileName )\n+ \n+ isRessourceExists = staticmethod( isRessourceExists )\n+ \n+ ## Return True if the given file is empty, False otherwise\n+ #\n+ def isEmpty( fileName ):\n+ return 0 == FileUtils.getNbLinesInSingleFile( fileName )\n+ \n+ isEmpty = staticmethod( isEmpty )\n+ \n+ ## Return True if both files are identical, False otherwise\n+ #\n+ def are2FilesIdentical( file1, file2 ):\n+ tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) )\n+ cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile )\n+ returnStatus = os.system( cmd )\n+ if returnStatus != 0:\n+ msg = "ERROR: \'diff\' returned \'%i\'" % ( returnStatus )\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.stderr.flush()\n+ os.remove( tmpFile )\n+ return False\n+ if FileUtils.isEmpty( tmpFile ):\n+ os.remove( tmpFile )\n+ return True\n+ else:\n+ os.remove( tmpFile )\n+ return False\n+ \n+ are2FilesIdentical = staticmethod( are2FilesIdentical )\n+ \n+ ## Return a string with all the content of the files in the given list\n+ #\n+ def getFileContent( lFiles ):\n+ content = ""\n+ lFiles.sort()\n+ for fileName in lFiles:\n+ '..b' \n+ ## Give the list of file names found in the given directory\n+ #\n+ # @param dirPath string absolute path of the given directory\n+ #\n+ # @return lFilesInDir list of file names\n+ #\n+ def getFileNamesList( dirPath, patternFileFilter = ".*" ):\n+ lFilesInDir = []\n+ lPaths = glob.glob( dirPath + "/*" )\n+ for ressource in lPaths:\n+ if os.path.isfile( ressource ):\n+ fileName = os.path.basename( ressource )\n+ if re.match(patternFileFilter, fileName):\n+ lFilesInDir.append( fileName )\n+ return lFilesInDir\n+ \n+ getFileNamesList = staticmethod( getFileNamesList )\n+ \n+ ## Return the MD5 sum of a file\n+ #\n+ def getMd5SecureHash( inFile ):\n+ if "hashlib" in sys.modules:\n+ md5 = hashlib.md5()\n+ inFileHandler = open( inFile, "r" )\n+ while True:\n+ line = inFileHandler.readline()\n+ if line == "":\n+ break\n+ md5.update( line )\n+ inFileHandler.close()\n+ return md5.hexdigest()\n+ else:\n+ return ""\n+ \n+ getMd5SecureHash = staticmethod( getMd5SecureHash )\n+ \n+ ## Cat all files of a given directory\n+ #\n+ # @param dir string directory name\n+ # @param outFileName string output file name\n+ #\n+ def catFilesOfDir(dir, outFileName):\n+ lFiles = FileUtils.getFileNamesList(dir)\n+ lFile2 = []\n+ for file in lFiles:\n+ lFile2.append(dir + "/" + file)\n+ FileUtils.catFilesFromList(lFile2, outFileName)\n+ \n+ catFilesOfDir = staticmethod(catFilesOfDir)\n+ \n+ ## Return True if size file > 0 octet\n+ #\n+ # @param fileName string file name\n+ #\n+ def isSizeNotNull(fileName):\n+ size = os.path.getsize(fileName)\n+ if size > 0:\n+ return True\n+ return False\n+ \n+ isSizeNotNull = staticmethod(isSizeNotNull)\n+ \n+ ## Split one file into N Files by lines\n+ #\n+ # @param fileName string file name\n+ # @param N int number of files to create\n+ # \n+ @staticmethod\n+ def splitFileIntoNFiles(fileName, N):\n+ nbLine = FileUtils.getNbLinesInSingleFile(fileName)\n+ nbLinesInEachFile = nbLine\n+ if N > nbLine:\n+ N = nbLine\n+ if N != 0:\n+ nbLinesInEachFile = math.ceil(float(nbLine) / N)\n+ else:\n+ N = 1\n+ filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n+ fileHandler = open(fileName, "r")\n+ for i in range(1,N+1):\n+ with open("%s-%s%s" %(filePrefix, i, fileExt), "w") as f:\n+ j = 0\n+ while j < nbLinesInEachFile:\n+ j += 1\n+ f.write(fileHandler.readline())\n+ fileHandler.close() \n+ \n+ ## Split one file into files of N lines\n+ #\n+ # @param fileName string input file name\n+ # @param N int lines number per files\n+ # \n+ @staticmethod\n+ def splitFileAccordingToLineNumber(fileName, N):\n+ filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n+ with open(fileName) as inF:\n+ fileNb = 1\n+ line = inF.readline()\n+ if not line or N == 0:\n+ outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)\n+ f = open(outFileName, "wb")\n+ shutil.copyfileobj(open(fileName, "rb"), f)\n+ f.close()\n+ else:\n+ while line:\n+ outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)\n+ with open(outFileName, "w") as outF:\n+ lineNb = 1\n+ while lineNb <= N and line:\n+ outF.write(line)\n+ line = inF.readline()\n+ lineNb += 1\n+ fileNb += 1\n\\ No newline at end of file\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/utils/PipelineStepFTests.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/PipelineStepFTests.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import sys
+import os
+import shutil
+from commons.core.utils.FileUtils import FileUtils
+
+class PipelineStepFTests(object):
+
+    def __init__(self, pipelineName, packageDir, workingDir, projectName, config = "", clean = True):
+        self._pipelineName = pipelineName
+        self._packageDir = packageDir
+        self._workingDir = workingDir
+        self._projectName = projectName
+        self._clean = clean
+        self._configFileName = config
+
+    def run(self):
+        self.launchStep()
+        self.assertStep()
+
+#    def replaceInFile(self, fileName, oldPattern, newPattern, newFileName = ""):
+#        if newFileName == "":
+#            newFileName = "%s.new" % fileName
+#        f = open(newFileName, "w")
+#        for line in fileinput.input(fileName, inplace=1):
+#            newLine = line.replace(oldPattern, newPattern)
+#            f.write(newLine)
+#        f.close()
+#        fileinput.close()
+
+    def _checkIfFileExist(self, fileName):
+        if not FileUtils.isRessourceExists(fileName):
+            print "%s do not exists\n" % fileName
+            return False
+        return True
+
+    def _printMessageAndClean(self, msg):
+        print "%s in %s functional test\n" % (msg, self._pipelineName)
+        sys.stdout.flush()
+        os.chdir("../")
+        if self._clean:
+            shutil.rmtree(self._workingDir)
+
+    def _areTwoFilesIdenticalByScript( self, expFileName, obsFileName, scriptName):
+        cmd = "%s -v 1 -r %s -t %s 2>/dev/null" % (scriptName, expFileName, obsFileName)
+        log = os.system(cmd)
+        print
+        sys.stdout.flush()
+        if log != 0:
+            return False
+        else:
+            return True
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/utils/RepetConfigParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/RepetConfigParser.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,38 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+from ConfigParser import ConfigParser
+
+
+class RepetConfigParser(ConfigParser):
+
+    def optionxform(self, optionstr):
+        return optionstr
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/utils/RepetOptionParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/RepetOptionParser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+"""
+Class overriding optparse.OptionParser default epilog formatter.
+The resulting epilog display format is the same as if the corresponding string was printed.
+"""
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from optparse import OptionParser
+from optparse import BadOptionError
+from optparse import OptionValueError
+SUPPRESS_USAGE = "SUPPRESS"+"USAGE"
+
+class RepetOptionParser(OptionParser):
+
+    def parse_args(self, args=None, values=None):
+        rargs = self._get_args(args)
+        if not rargs:
+            rargs = ["-h"]
+        if values is None:
+            values = self.get_default_values()
+        self.rargs = rargs
+        self.largs = largs = []
+        self.values = values
+        try:
+            self._process_args(largs, rargs, values)
+        except (BadOptionError, OptionValueError), err:
+            self.error(str(err))
+        args = largs + rargs
+        return self.check_values(values, args)
+
+    def set_usage(self, usage):
+        if not usage or usage is SUPPRESS_USAGE:
+            self.usage = None
+        elif usage.lower().startswith("usage: "):
+            self.usage = usage[7:]
+        else:
+            self.usage = usage
+
+    def format_epilog(self, formatter):
+        if self.epilog != None:
+            return self.epilog
+        else :
+            return ""
+
+    def format_description(self, formatter):
+        if self.description != None:
+            return self.description
+        else :
+            return ""

diff -r ea3082881bf8 -r 769e306b7933 commons/core/utils/test/TestSuite_utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/test/TestSuite_utils.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+
+import unittest
+import sys
+import Test_FileUtils
+
+
+def main():
+
+    TestSuite_utils = unittest.TestSuite()
+
+    TestSuite_utils.addTest( unittest.makeSuite( Test_FileUtils.Test_FileUtils, "test" ) )
+
+    runner = unittest.TextTestRunner(sys.stderr, 2, 2)
+    runner.run( TestSuite_utils )
+
+
+if __name__ == "__main__":
+    main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/utils/test/Test_FileUtils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/utils/test/Test_FileUtils.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,885 @@\n+# Copyright INRA (Institut National de la Recherche Agronomique)\n+# http://www.inra.fr\n+# http://urgi.versailles.inra.fr\n+#\n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use, \n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info". \n+#\n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability. \n+#\n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or \n+# data to be ensured and, more generally, to use and operate it in the \n+# same conditions as regards security. \n+#\n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+\n+\n+import os\n+import sys\n+import unittest\n+import time\n+import shutil\n+from commons.core.utils.FileUtils import FileUtils\n+\n+\n+class Test_FileUtils( unittest.TestCase ):\n+ \n+ def setUp( self ):\n+ self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n+ \n+ def tearDown( self ):\n+ self._uniqId = ""\n+ \n+ def test_getNbLinesInSingleFile_non_empty( self ):\n+ file = "dummyFile_%s" % ( self._uniqId )\n+ f = open( file, "w" )\n+ f.write( "line1\\n" )\n+ f.write( "line2\\n" )\n+ f.write( "line3" )\n+ f.close()\n+ exp = 3\n+ obs = FileUtils.getNbLinesInSingleFile( file )\n+ self.assertEquals( exp, obs )\n+ os.remove( file )\n+ \n+ def test_getNbLinesInSingleFile_non_empty_endEmptyLine( self ):\n+ file = "dummyFile_%s" % ( self._uniqId )\n+ f = open( file, "w" )\n+ f.write( "line1\\n" )\n+ f.write( "line2\\n" )\n+ f.write( "line3\\n" )\n+ f.write( "\\n" )\n+ f.close()\n+ exp = 3\n+ obs = FileUtils.getNbLinesInSingleFile( file )\n+ self.assertEquals( exp, obs )\n+ os.remove( file )\n+ \n+ def test_getNbLinesInSingleFile_empty( self ):\n+ file = "dummyFile_%s" % ( self._uniqId )\n+ os.system( "touch %s" % ( file ) )\n+ exp = 0\n+ obs = FileUtils.getNbLinesInSingleFile( file )\n+ self.assertEquals( exp, obs )\n+ os.remove( file )\n+ \n+ def test_getNbLinesInFileList_non_empty( self ):\n+ f = open("dummy1.txt", "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3")\n+ f.close()\n+ f = open("dummy2.txt", "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3")\n+ f.close()\n+ f = open("dummy3.txt", "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3")\n+ f.close()\n+ lFiles = [ "dummy1.txt", "dummy2.txt", "dummy3.txt" ]\n+ exp = 9\n+ obs = FileUtils.getNbLinesInFileList( lFiles )\n+ self.assertEqual( exp, obs )\n+ for f in lFiles:\n+ os.remove( f )\n+ \n+ def test_catFilesByPattern( self ):\n+ f = open("dummy1.txt", "w")\n+ f.write("line11\\n")\n+ f.write("line12\\n")\n+ f.write("line13")\n+ f.close()\n+ '..b's("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def test_splitFileAccordingToLineNumber_more_maxLines_than_lines(self):\n+ inputFile = "dummy.txt"\n+ obsFile1 = "dummy-1.txt"\n+ \n+ f = open(inputFile, "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3\\n")\n+ f.close()\n+\n+ exp1 = "line1\\nline2\\nline3\\n"\n+ \n+ FileUtils.splitFileAccordingToLineNumber(inputFile, 10)\n+ \n+ obs1 = open(obsFile1).read()\n+\n+ self.assertEqual(exp1, obs1)\n+ self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def test_splitFileAccordingToLineNumber_empty_file(self):\n+ inputFile = "dummy.txt"\n+ obsFile1 = "dummy-1.txt"\n+\n+ os.system( "touch %s" % ( inputFile ) )\n+\n+ exp1 = ""\n+ \n+ FileUtils.splitFileAccordingToLineNumber(inputFile, 10)\n+ \n+ obs1 = open(obsFile1).read()\n+ \n+ self.assertEqual(exp1, obs1)\n+ self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def test_splitFileAccordingToLineNumber_0_lines(self):\n+ inputFile = "dummy.txt"\n+ obsFile1 = "dummy-1.txt"\n+ \n+ f = open(inputFile, "w")\n+ f.write("line1\\n")\n+ f.write("line2\\n")\n+ f.write("line3\\n")\n+ f.close()\n+\n+ exp1 = "line1\\nline2\\nline3\\n"\n+ \n+ FileUtils.splitFileAccordingToLineNumber(inputFile, 0)\n+ \n+ obs1 = open(obsFile1).read()\n+ \n+ self.assertEqual(exp1, obs1)\n+ self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n+ FileUtils.removeFilesByPattern("dummy*")\n+ \n+ def _writeFile( self, fileName ):\n+ inFile = open(fileName, \'w\')\n+ inFile.write(">Sequence_de_reference\\n")\n+ inFile.write("ATTTTGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ inFile.write(">Lignee1_mismatch\\n")\n+ inFile.write("ATTTTGCAGACTTATTCGAG-----GCCATTGCT\\n")\n+ inFile.write(">Lignee2_insertion\\n")\n+ inFile.write("ATTTTGCAGTCTTATTCGAGATTACGCCATTGCT\\n")\n+ inFile.write(">Lignee3_deletion\\n")\n+ inFile.write("A---TGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ inFile.close() \n+ \n+ def _writeFileWithEmptyLine( self, fileName ):\n+ fileWithEmptyLine = open(fileName, \'w\')\n+ fileWithEmptyLine.write(">Sequence_de_reference\\n")\n+ fileWithEmptyLine.write("ATTTTGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ fileWithEmptyLine.write("\\n\\n")\n+ fileWithEmptyLine.write(">Lignee1_mismatch\\n")\n+ fileWithEmptyLine.write("ATTTTGCAGACTTATTCGAG-----GCCATTGCT\\n")\n+ fileWithEmptyLine.write("\\n\\n")\n+ fileWithEmptyLine.write(">Lignee2_insertion\\n")\n+ fileWithEmptyLine.write("ATTTTGCAGTCTTATTCGAGATTACGCCATTGCT\\n")\n+ fileWithEmptyLine.write("\\n")\n+ fileWithEmptyLine.write(">Lignee3_deletion\\n")\n+ fileWithEmptyLine.write("A---TGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n+ fileWithEmptyLine.close() \n+ \n+ def _writeFileWithRepeatedBlanks( self, fileName ):\n+ fileWithRepeatedBlanks = open(fileName, \'w\')\n+ fileWithRepeatedBlanks.write(">Sequ ence_de _reference\\n")\n+ fileWithRepeatedBlanks.write("ATTTT GCAGTCTT TTCGAG- ----GCCATT GCT\\n")\n+ fileWithRepeatedBlanks.close() \n+ \n+ def _writeFileWithoutRepeatedBlanks( self, fileName ):\n+ fileWithoutRepeatedBlanks = open(fileName, \'w\')\n+ fileWithoutRepeatedBlanks.write(">Sequ ence_de _reference\\n")\n+ fileWithoutRepeatedBlanks.write("ATTTT GCAGTCTT TTCGAG- ----GCCATT GCT\\n")\n+ fileWithoutRepeatedBlanks.close()\n+ \n+test_suite = unittest.TestSuite()\n+test_suite.addTest( unittest.makeSuite( Test_FileUtils ) )\n+if __name__ == "__main__":\n+ unittest.TextTestRunner(verbosity=2).run( test_suite )\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/BedWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/BedWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,100 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class BedWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with BED format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle:     handle to the file
+    @type handle:     file handle
+    @ivar header:     first lines of the file
+    @type header:     string
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName:    name of the file
+        @type    fileName:    string
+        @param verbosity: verbosity
+        @type    verbosity: int
+        """
+        self.header = "track name=reads description=\"Reads\" useScore=0 visibility=full offset=0\n"
+        super(BedWriter, self).__init__(fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["bed"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "bed"
+
+
+    def setTitle(self, title):
+        """
+        Set the title of the track
+        @param title: the title of the track
+        @type    title: string
+        """
+        if title != None:
+            self.header = "track name=%s description=\"%s\" useScore=0 visibility=full offset=0\n" % (title, title)
+
+
+    def copyProperties(self, bedParser):
+        """
+        Copy the properties collected by a parser, to produce a similar output
+        @param bedParser: a BED Parser parser
+        @type    bedParser: class L{BedParser<BedParser>}
+        """
+        self.setTitle(bedParser.title)
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GBrowse format
+        @param transcript: transcript to be printed
+        @type    transcript: class L{Transcript<Transcript>}
+        @return:                     a string
+        """
+        return transcript.printBed()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/BedWriter.pyc

Binary file commons/core/writer/BedWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/CsvWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/CsvWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,153 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+import random
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+class CsvWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with CSV (Excel) format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        super(CsvWriter, self).__init__(fileName, verbosity)
+        self.header   = ""
+        self.title    = "chromosome,start,end,strand,exons,tags\n"
+        self.modified = False
+
+
+    def __del__(self):
+        """
+        Destructor
+        (Trick to write 1 tag per column)
+        """
+        if self.handle != None:
+            self.modifyCsv()
+        super(CsvWriter, self).__del__()
+
+
+    def close(self):
+        if self.handle != None:
+            self.modifyCsv()
+        super(CsvWriter, self).close()
+
+
+    def modifyCsv(self):
+        """
+        Clean CSV file so that there is one column per tag
+        """
+        if self.modified:
+            return
+
+        # read all the tags
+        self.handle.close()
+        self.handle = open(self.fileName)
+        nbFirstFields = 5
+        tags = set()
+        if self.verbosity >= 10:
+            print "Modifying CSV file..."
+        number = -1
+        for number, line in enumerate(self.handle):
+            if number != 0:
+                theseTags = line.strip().split(",")[nbFirstFields:]
+                for tag in theseTags:
+                    if tag.find("=") != -1:
+                        (key, value) = tag.split("=", 1)
+                        if value != None:
+                            tags.add(key)
+        if self.verbosity >= 10:
+            print " ...done"
+
+        # re-write the file
+        tmpFileName = "tmpFile%d.csv" % (random.randint(0, 100000))
+        tmpFile = open(tmpFileName, "w")
+        self.handle.seek(0)
+        progress = Progress(number + 1, "Re-writting CSV file", self.verbosity)
+        tmpFile.write(self.title.replace("tags", ",".join(sorted(tags))))
+        for line in self.handle:
+            tagValues = dict([(key, None) for key in tags])
+            tmpFile.write(",".join(line.strip().split(",")[:nbFirstFields]))
+            for tag in line.strip().split(",")[nbFirstFields:]:
+                if tag.find("=") != -1:
+                    key = tag.split("=", 1)[0]
+                    tagValues[key] = tag.split("=", 1)[1]
+                else:
+                    tagValues[key] += ";%s" % (tag)
+            for key in sorted(tagValues.keys()):
+                tmpFile.write(",%s" % (tagValues[key]))
+            tmpFile.write("\n")
+            progress.inc()
+        tmpFile.close()
+
+        # replace former file
+        import shutil
+        shutil.move(tmpFile.name, self.fileName)
+        progress.done()
+        self.modified = True
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["csv", "xls", "excel"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "csv"
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GFF2 format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        return transcript.printCsv()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/CsvWriter.pyc

Binary file commons/core/writer/CsvWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/EmblWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/EmblWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,116 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class EmblWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into several files with EMBL format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.fileName = fileName
+        self.verbosity = verbosity
+        self.handles = {}
+        self.handle = None
+
+
+    def __del__(self):
+        """
+        Destructor
+        Trick to append the sequences at the end of the EMBL files
+        """
+        handle                = open(self.sequenceFileName)
+        currentHandle = None
+        for line in handle:
+            if line[0] == ">":
+                chromosome = line[1:].strip()
+                if chromosome in self.handles:
+                    currentHandle = self.handles[chromosome]
+                else:
+                    currentHandle = None
+            else:
+                if currentHandle != None:
+                    currentHandle.write(line)
+        handle.close()
+        for handle in self.handles.values():
+            handle.close()
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["embl"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "embl"
+
+
+    def addTranscript(self, transcript):
+        """
+        Add a transcript to the list of transcripts to be written
+        @param transcript: transcript to be written
+        @type    transcript: class L{Transcript<Transcript>}
+        """
+        chromosome = transcript.getChromosome()
+        if chromosome not in self.handles:
+            self.handles[chromosome] = open("%s%s.embl" % (self.fileName[:-len(".embl")], chromosome.title()), "w")
+        self.handles[chromosome].write(self.printTranscript(transcript))
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GFF2 format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        return transcript.printEmbl()
+
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/EmblWriter.pyc

Binary file commons/core/writer/EmblWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/FastaWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/FastaWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,77 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.SequenceListWriter import SequenceListWriter
+
+
+class FastaWriter(SequenceListWriter):
+    """
+    A class that writes a sequence list into a file with FASTA format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    @ivar header: first lines of the file
+    @type header: string
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        super(FastaWriter, self).__init__(fileName, verbosity)
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["fasta", "mfa"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "fasta"
+
+
+    def getLine(self, sequence):
+        """
+        Convert a sequence
+        @param sequence: sequence to be written
+        @type    sequence: class L{Sequence<Sequence>}
+        """
+        return sequence.printFasta()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/FastaWriter.pyc

Binary file commons/core/writer/FastaWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/FastqWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/FastqWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,78 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.SequenceListWriter import SequenceListWriter
+
+
+class FastqWriter(SequenceListWriter):
+    """
+    A class that writes a sequence list into a file with FASTQ format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    @ivar header: first lines of the file
+    @type header: string
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        super(FastqWriter, self).__init__(fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["fastq", "mfq"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "fastq"
+
+
+    def getLine(self, sequence):
+        """
+        Convert a sequence
+        @param sequence: sequence to be written
+        @type    sequence: class L{Sequence<Sequence>}
+        """
+        return sequence.printFastq()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/FastqWriter.pyc

Binary file commons/core/writer/FastqWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/GbWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/GbWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,102 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class GbWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with GBrowse format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    @ivar header: first lines of the file
+    @type header: string
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.header = "[READS]\nbgcolor = red\nstrand_arrow = 1\n\n"
+        super(GbWriter, self).__init__(fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["gb", "gbrowse"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "gb"
+
+
+    def setColor(self, color):
+        """
+        Set the color of the track
+        @param color: the color of the track
+        @type    color: string
+        """
+        if color != None:
+            self.header = "[READS]\nbgcolor= %s\nstrand_arrow = 1\n\n" % (color)
+
+
+    def copyProperties(self, gbParser):
+        """
+        Copy the properties collected by a parser, to produce a similar output
+        @param gbParser: a GBrowse parser
+        @type    gbParser: class L{GbParser<GbParser>}
+        """
+        self.setColor(gbParser.color)
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GBrowse format
+        Possibly skip the reference if already put
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        if self.lastChromosome != None and self.lastChromosome == transcript.getChromosome():
+            return transcript.printGBrowseLine()
+        return transcript.printGBrowse()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/GbWriter.pyc

Binary file commons/core/writer/GbWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/Gff2Writer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/Gff2Writer.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,89 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class Gff2Writer(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with GFF2 format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.header = ""
+        self.title = ""
+        super(Gff2Writer, self).__init__(fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["gff2"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "gff2"
+
+
+    def setTitle(self, title):
+        """
+        Set the title of the transcripts
+        @param title: the title of the transcripts
+        @type    title: string
+        """
+        self.title = title
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GFF2 format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        return transcript.printGff2(self.title)
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/Gff2Writer.pyc

Binary file commons/core/writer/Gff2Writer.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/Gff3Writer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/Gff3Writer.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,130 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class Gff3Writer(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with GFF3 format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    """
+
+
+    def __init__(self, fileName, verbosity = 0, title="S-MART", feature="transcript", featurePart="exon"):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.header = ""
+        self.title    = title
+        self.feature = feature
+        self.featurePart = featurePart
+        super(Gff3Writer, self).__init__(fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["gff3", "gff"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "gff3"
+
+
+    def setTitle(self, title):
+        """
+        Set the title of the transcripts
+        @param title: the title of the transcripts
+        @type    title: string
+        """
+        self.title = title
+
+    def setFeature(self, feature):
+        """
+        Set the name of the feature
+        @param title: the title of the feature
+        @type    feature: string
+        """
+        self.feature = feature
+
+    def setFeaturePart(self, featurePart):
+        """
+        Set the name of the feature part
+        @param title: the title of the feature part
+        @type    featurePart: string
+        """
+        self.featurePart = featurePart
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GFF2 format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        direction = "+"
+        if transcript.getDirection() == -1:
+            direction = "-"
+        transcript.sortExonsIncreasing()
+        if "ID" not in transcript.getTagValues():
+            transcript.setTagValue("ID", transcript.getUniqueName())
+        feature = self.feature
+        tags = transcript.tags
+        if "feature" in transcript.getTagNames():
+            feature = transcript.getTagValue("feature")
+            del transcript.tags["feature"]
+        score = "."
+        if "score" in transcript.getTagNames():
+            score = "%d" % (int(transcript.getTagValue("score")))
+            del transcript.tags["score"]
+        comment = transcript.getTagValues(";", "=")
+        string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\t%s\n" % (transcript.getChromosome(), self.title, feature, transcript.getStart(), transcript.getEnd(), score, direction, comment)
+        if len(transcript.exons) > 1:
+            for i, exon in enumerate(transcript.getExons()):
+                if "score" in exon.getTagNames():
+                    score = "%d" % (int(exon.getTagValue("score")))
+                string += "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\tID=%s-%s%d;Name=%s-%s%d;Parent=%s\n" % (transcript.getChromosome(), self.title,self.featurePart, exon.getStart(), exon.getEnd(), score, direction, transcript.getTagValue("ID"),self.featurePart, i+1, transcript.name,self.featurePart, i+1, transcript.getTagValue("ID"))
+        self.tags = tags
+        return string
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/Gff3Writer.pyc

Binary file commons/core/writer/Gff3Writer.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/GtfWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/GtfWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,89 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class GtfWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with GTF format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.header = ""
+        self.title    = "S-MART"
+        super(GtfWriter, self).__init__(fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["gtf", "gtf2"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "gtf"
+
+
+    def setTitle(self, title):
+        """
+        Set the title of the transcripts
+        @param title: the title of the transcripts
+        @type    title: string
+        """
+        self.title = title
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GTF format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        return transcript.printGtf(self.title)
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/GtfWriter.pyc

Binary file commons/core/writer/GtfWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/MapWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/MapWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,100 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class MapWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with GFF3 format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    """
+
+
+    def __init__(self, fileName, verbosity = 0, title="S-MART"):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.header = ""
+        self.title    = title
+        TranscriptListWriter.__init__(self, fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["map"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "map"
+
+
+    def setTitle(self, title):
+        """
+        Set the title of the transcripts
+        @param title: the title of the transcripts
+        @type    title: string
+        """
+        self.title = title
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript to map format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        name = transcript.name
+        if "nbOccurrences" in transcript.getTagNames() and transcript.getTagValue("nbOccurrences") != 1 and transcript.getTagValue("occurrences"):
+            name = "%s-%d" % (name, transcript.getTagValue("occurrence"))
+        sizes   = []
+        starts  = []
+        transcript.sortExonsIncreasing()
+        for exon in transcript.getExons():
+            sizes.append("%d" % (exon.getSize()))
+            starts.append("%d" % (exon.getStart() - transcript.getStart()))
+        return "%s\t%s\t%d\t%d\n" % (name, transcript.getChromosome(), transcript.getStart(), transcript.getEnd()+1)
+
+
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/MapWriter.pyc

Binary file commons/core/writer/MapWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/MySqlTranscriptWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/MySqlTranscriptWriter.py Fri Jan 18 04:54:14 2013 -0500

[

b'@@ -0,0 +1,214 @@\n+#\n+# Copyright INRA-URGI 2009-2010\n+# \n+# This software is governed by the CeCILL license under French law and\n+# abiding by the rules of distribution of free software. You can use,\n+# modify and/ or redistribute the software under the terms of the CeCILL\n+# license as circulated by CEA, CNRS and INRIA at the following URL\n+# "http://www.cecill.info".\n+# \n+# As a counterpart to the access to the source code and rights to copy,\n+# modify and redistribute granted by the license, users are provided only\n+# with a limited warranty and the software\'s author, the holder of the\n+# economic rights, and the successive licensors have only limited\n+# liability.\n+# \n+# In this respect, the user\'s attention is drawn to the risks associated\n+# with loading, using, modifying and/or developing or reproducing the\n+# software by the user in light of its specific status of free software,\n+# that may mean that it is complicated to manipulate, and that also\n+# therefore means that it is reserved for developers and experienced\n+# professionals having in-depth computer knowledge. Users are therefore\n+# encouraged to load and test the software\'s suitability as regards their\n+# requirements in conditions enabling the security of their systems and/or\n+# data to be ensured and, more generally, to use and operate it in the\n+# same conditions as regards security.\n+# \n+# The fact that you are presently reading this means that you have had\n+# knowledge of the CeCILL license and that you accept its terms.\n+#\n+import os\n+import random\n+from SMART.Java.Python.mySql.MySqlTable import MySqlTable\n+from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable\n+from SMART.Java.Python.misc.Progress import Progress\n+\n+class MySqlTranscriptWriter(object):\n+ """\n+ A class that writes a transcript list into a mySQL table\n+ @ivar name: name of the tables \n+ @type name: string\n+ @ivar tables: the tables\n+ @type tables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>}\n+ @ivar mySqlConnection: connection to a MySQL database\n+ @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n+ @ivar tmpTranscriptFileHandles: files where transcripts are temporary stored, before copy into database\n+ @type tmpTranscriptFileHandles: dict of file handles\n+ @ivar nbTranscriptsByChromosome: number of transcripts written\n+ @type nbTranscriptsByChromosome: dict of int (one for each chromosome)\n+ @ivar randomNumber: a random number, used for having a unique name for the tables\n+ @type randomNumber: int\n+ @ivar toBeWritten: there exists transcripts to be copied into database\n+ @type toBeWritten: bool \n+ @ivar verbosity: verbosity\n+ @type verbosity: int \n+ """\n+\n+\n+ def __init__(self, connection, name = None, verbosity = 0):\n+ """\n+ Constructor\n+ @param name: name of the file \n+ @type name: string\n+ @param verbosity: verbosity\n+ @type verbosity: int\n+ """\n+ self.name = name\n+ self.verbosity = verbosity\n+ self.tables = {}\n+ self.indices = {}\n+ self.tmpTranscriptFileHandles = {}\n+ self.nbTranscriptsByChromosome = {}\n+ self.toBeWritten = False\n+ self.randomNumber = random.randint(0, 100000)\n+ self.mySqlConnection = connection\n+ self.nbTmpFiles = 100\n+ self.transcriptValues = {}\n+ self.nbTranscriptValues = 1000\n+ if self.name != None:\n+ pos = self.name.rfind(os.sep)\n+ if pos != -1:\n+ self.name = self.name[pos+1:]\n+ \n+\n+ def __del__(self):\n+ '..b' @param transcript: transcript to be written\n+ @type transcript: class L{Transcript<Transcript>}\n+ """\n+ chromosome = transcript.getChromosome()\n+ if chromosome not in self.tables:\n+ self.createTable(chromosome)\n+ self.nbTranscriptsByChromosome[chromosome] = 1\n+ if chromosome not in self.transcriptValues:\n+ self.transcriptValues[chromosome] = []\n+ \n+ self.transcriptValues[chromosome].append(transcript.getSqlValues())\n+\n+ self.nbTranscriptsByChromosome[chromosome] += 1\n+ self.toBeWritten = True\n+ if sum([len(transcripts) for transcripts in self.transcriptValues.values()]) > self.nbTranscriptValues:\n+ self.write() \n+\n+\n+ def addElement(self, element):\n+ """\n+ Same as "addTranscript"\n+ @param element: transcript to be written\n+ @type element: class L{Transcript<Transcript>}\n+ """\n+ self.addTranscript(element)\n+\n+\n+# def addTranscriptList(self, transcriptListParser):\n+# """\n+# Add a list of transcripts to the transcripts to be written\n+# @param transcriptListParser: transcripts to be written\n+# @type transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}\n+# """\n+# progress = Progress(transcriptListParser.getNbTranscripts(), "Storing %s into database" % (transcriptListParser.fileName), self.verbosity)\n+# for transcript in transcriptListParser.getIterator():\n+# self.addTranscript(transcript)\n+# progress.inc()\n+# progress.done()\n+ \n+ \n+ def addTranscriptList(self, transcriptListParser):\n+ """\n+ Add a list of transcripts to the transcripts to be written\n+ @param transcriptListParser: transcripts to be written\n+ @type transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}\n+ """\n+ self.transcriptListParser = transcriptListParser\n+ self.mySqlConnection.executeManyQueriesIterator(self)\n+ \n+ \n+ def getIterator(self):\n+ """\n+ Iterator to the SQL commands to insert the list\n+ """\n+ progress = Progress(self.transcriptListParser.getNbTranscripts(), "Storing %s into database" % (self.transcriptListParser.fileName), self.verbosity)\n+ for transcript in self.transcriptListParser.getIterator():\n+ chromosome = transcript.getChromosome()\n+ if chromosome not in self.tables:\n+ self.createTable(chromosome)\n+ self.nbTranscriptsByChromosome[chromosome] = self.nbTranscriptsByChromosome.get(chromosome, 0) + 1\n+ values = transcript.getSqlValues()\n+ yield "INSERT INTO \'%s\' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join([MySqlTable.formatSql(values[variable], self.tables[chromosome].types[variable], self.tables[chromosome].sizes[variable]) for variable in self.tables[chromosome].variables]))\n+ progress.inc()\n+ progress.done()\n+ \n+ \n+ def write(self):\n+ """\n+ Copy the content of the files into the database\n+ (May add transcripts to already created databases)\n+ """\n+ for chromosome in self.transcriptValues:\n+ if chromosome in self.transcriptValues:\n+ self.tables[chromosome].insertMany(self.transcriptValues[chromosome])\n+ self.transcriptValues = {}\n+ self.toBeWritten = False\n+ \n+ \n+ def getTables(self):\n+ """\n+ Get the tables\n+ @return: the mySQL tables\n+ """\n+ if self.toBeWritten:\n+ self.write()\n+ return self.tables\n+\n+ \n+ \n+ def removeTables(self):\n+ """\n+ Drop the tables\n+ """\n+ for chromosome in self.tables:\n+ self.tables[chromosome].remove()\n'

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/MySqlTranscriptWriter.pyc

Binary file commons/core/writer/MySqlTranscriptWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/SamWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/SamWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,101 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+import random
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class SamWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with SAM format
+    @ivar sizes: estimated sizes of the chromosomes
+    @type sizes: dict of string to int
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName:    name of the file
+        @type fileName:     string
+        @param verbosity: verbosity
+        @type verbosity:    int
+        """
+        super(SamWriter, self).__init__(fileName, verbosity)
+        self.sizes         = {}
+        self.headerWritten = False
+
+
+    def close(self):
+        """
+        Close file (trick to add header)
+        """
+        super(SamWriter, self).close()
+        if self.headerWritten:
+            return
+        tmpFileName = "tmpFile%d.sam" % (random.randint(0, 100000))
+        tmpHandle = open(tmpFileName, "w")
+        for chromosome, size in self.sizes.iteritems():
+            tmpHandle.write("@SQ\tSN:%s\tLN:%d\n" % (chromosome, size))
+        self.handle = open(self.fileName)
+        for line in self.handle:
+            tmpHandle.write(line)
+        tmpHandle.close()
+        self.handle.close()
+        os.rename(tmpFileName, self.fileName)
+        self.headerWritten = True
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["sam"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "sam"
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GBrowse format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        self.sizes[transcript.getChromosome()] = max(transcript.getEnd(), self.sizes.get(transcript.getChromosome(), 0))
+        return transcript.printSam()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/SamWriter.pyc

Binary file commons/core/writer/SamWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/SequenceListWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/SequenceListWriter.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,94 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+class SequenceListWriter(object):
+    """
+    An interface that writes a list of sequences into a file
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    @ivar header: first lines of the file
+    @type header: string
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.fileName = fileName
+        self.verbosity = verbosity
+        self.handle = open(self.fileName, "w")
+
+
+    def __del__(self):
+        """
+        Destructor
+        """
+        self.close()
+
+
+    def write(self):
+        """
+        No-op
+        """
+        pass
+
+
+    def close(self):
+        """
+        Close writer
+        """
+        if self.handle != None:
+            self.handle.close()
+
+
+    def addSequence(self, sequence):
+        """
+        Add a sequence to the list of sequence to be written
+        @param sequence: sequence to be written
+        @type    sequence: class L{Sequence<Sequence>}
+        """
+        self.handle.write(self.getLine(sequence))
+
+
+    def addElement(self, element):
+        """
+        Same as "addSequence"
+        @param element: sequence to be written
+        @type    element: class L{Sequence<Sequence>}
+        """
+        self.addSequence(element)
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/SequenceListWriter.pyc

Binary file commons/core/writer/SequenceListWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/TranscriptListWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/TranscriptListWriter.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,163 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from SMART.Java.Python.misc.Progress import Progress
+
+class TranscriptListWriter(object):
+    """
+    An interface that writes a transcript list into a file
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    @ivar header: first lines of the file
+    @type header: string
+    @ivar started: whether some transcripts have already been writted
+    @type started: boolean
+    @ivar lastChromosome: the chromosome on which the transcript which was inserted last
+    @type lastChromosome: string
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.fileName = fileName
+        self.verbosity = verbosity
+        self.handle = open(self.fileName, "w")
+        self.started = False
+        self.lastChromosome = None
+        self.header = ""
+        self.sequenceFileName = None
+
+
+    def __del__(self):
+        """
+        Destructor
+        """
+        self.close()
+
+
+    def close(self):
+        """
+        Close writer
+        """
+        if self.handle != None and not self.handle.closed:
+            self.handle.close()
+        self.handle = None
+
+
+    def addTranscript(self, transcript):
+        """
+        Add a transcript to the list of transcripts to be written
+        @param transcript: transcript to be written
+        @type    transcript: class L{Transcript<Transcript>}
+        """
+        if not self.started:
+            self.handle.write(self.header)
+            self.started = True
+
+        self.handle.write(self.printTranscript(transcript))
+        self.lastChromosome = transcript.getChromosome()
+
+
+    def addElement(self, element):
+        """
+        Same as "addTranscript"
+        @param element: transcript to be written
+        @type    element: class L{Transcript<Transcript>}
+        """
+        self.addTranscript(element)
+
+
+    def addTranscriptList(self, transcriptList):
+        """
+        Add a list of transcripts to the transcripts to be written
+        @param transcriptList: transcripts to be written
+        @type    transcriptList: class L{TranscriptList<TranscriptList>}
+        """
+        progress = Progress(transcriptList.getNbTranscripts(), "Writing transcripts", self.verbosity)
+        for transcript in transcriptList.getIterator():
+            self.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+
+
+    def addTranscriptTable(self, transcriptTable):
+        """
+        Add a list of transcripts in a mySQL table to the transcripts to be written
+        @param transcriptTable: transcripts to be written
+        @type    transcriptTable: class L{MySqlTranscriptTable<MySqlTranscriptTable>}
+        """
+        for transcript in transcriptTable.getIterator():
+            self.addTranscript(transcript)
+
+
+    def setTitle(self, title):
+        """
+        Possibly write a title for the list (by default, do nothing)
+        @param title: a title for the list
+        @type title:    string
+        """
+        pass
+
+    def setFeature(self, feature):
+        """
+        Set the name of the feature
+        @param title: the title of the feature
+        @type    feature: string
+        """
+        pass
+
+    def setFeaturePart(self, featurePart):
+        """
+        Set the name of the feature part
+        @param title: the title of the feature part
+        @type    featurePart: string
+        """
+        pass
+
+
+    def addSequenceFile(self, fileName):
+        """
+        Get the multi-fasta file of the sequences
+        """
+        self.sequenceFileName = fileName
+
+
+    def write(self):
+        """
+        No-op
+        """
+        pass

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/TranscriptListWriter.pyc

Binary file commons/core/writer/TranscriptListWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/TranscriptWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/TranscriptWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,189 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+import sys
+from commons.core.writer.WriterChooser import WriterChooser
+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+
+class TranscriptWriter(object):
+    """
+    An interface class that writes a list of transcripts, handle different formats
+    @ivar container: container of the data
+    @type container: L{TranscriptContainer<TranscriptContainer>}
+    @ivar format: format of the data to be printed
+    @type format: string
+    @ivar file: the file where to print
+    @type file: string
+    @ivar type: type of the data (transcripts, mappings or mySQL)
+    @type type: string
+    @ivar writer: a transcript list writer
+    @type writer: L{TranscriptListWriter<TranscriptListWriter>} or None
+    @ivar mode: use a container or enter transcript one by one
+    @type mode: string
+    @ivar verbosity: verbosity
+    @type verbosity: int
+    """
+
+    def __init__(self, file, format, verbosity = 0):
+        """
+        Constructor
+        @param container: container of the data
+        @type container: string
+        @param format: format of the data
+        @type format: string
+        @param file: file where to print
+        @type file: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.container = None
+        self.format = format
+        self.file = file
+
+        self.verbosity = verbosity
+        self.type = None
+        self.writer = None
+        self.mode = None
+        if self.format == None:
+            sys.exit("Error! Writer input format is empty!")
+
+        if self.format == "sql":
+            self.type = "sql"
+            pos = self.file.rfind(os.sep)
+            if pos > -1:
+                self.file = self.file[pos+1:]
+            self.writer = MySqlTranscriptWriter(self.file, self.verbosity)
+        else:
+            writerChooser = WriterChooser(self.verbosity)
+            writerChooser.findFormat(self.format)
+            self.writer = writerChooser.getWriter(self.file)
+            self.type = writerChooser.getType()
+
+
+    def close(self):
+        """
+        Close writer
+        """
+        if self.writer != None:
+            self.writer.close()
+
+
+    def setContainer(self, container):
+        """
+        Set a container for the data
+        @param container: container of the data
+        @type container: class L{TranscriptContainer<TranscriptContainer>}
+        """
+        self.container = container
+        if self.mode == "transcript":
+            raise Exception("Error! TranscriptWriter '%s' on 'transcript' mode is currently used on 'container' mode." % (self.file))
+        self.mode = "container"
+
+
+    def addTranscript(self, transcript):
+        """
+        Add a transcript to write
+        @param transcript: a transcript
+        @type transcript: class L{Transcript<Transcript>}
+        """
+        self.writer.addTranscript(transcript)
+        if self.mode == "container":
+            sys.exit("Error! TranscriptWriter '%s' on 'container' mode is currently used on 'transcript' mode." % (self.file))
+        self.mode = "transcript"
+
+
+    def addElement(self, transcript):
+        """
+        Same as addTranscript
+        """
+        self.addTranscript(transcript)
+
+
+    def setTitle(self, title):
+        """
+        Possibly write a title for the list
+        @param title: a title for the list
+        @type title: string
+        """
+        if self.writer != None:
+            self.writer.setTitle(title)
+
+    def setFeature(self, feature):
+        """
+        Possibly Set the name of the feature
+        @param title: the title of the feature
+        @type    feature: string
+        """
+        if self.writer != None:
+            self.writer.setFeature(feature)
+
+    def setFeaturePart(self, featurePart):
+        """
+        Possibly Set the name of the feature part
+        @param title: the title of the feature part
+        @type    featurePart: string
+        """
+        if self.writer != None:
+            self.writer.setFeaturePart(featurePart)
+
+    def setStrands(self, strands):
+        """
+        Possibly consider both strands separately
+        @param strands: whether both strands should be considered separately
+        @type  strands: boolean
+        """
+        if self.writer != None:
+            self.writer.setStrands(strands)
+
+
+    def write(self):
+        """
+        Write the content and possibly convert data
+        """
+        if self.type == "transcript" or self.type == "sequence":
+            if self.mode == "container":
+                self.writer.addTranscriptList(self.container)
+            return
+
+        if self.mode == "transcript" or self.type == "sequence":
+            self.writer.write()
+            return
+
+        if self.container.format != "sql":
+            self.container.storeIntoDatabase()
+        tables = self.container.getTables()
+        for chromosome in tables:
+            tables[chromosome].rename("%s_%s" % (self.file, chromosome))
+        return
+
+
+    def addSequenceFile(self, fileName):
+        self.writer.addSequenceFile(fileName)
+
\ No newline at end of file

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/TranscriptWriter.pyc

Binary file commons/core/writer/TranscriptWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/UcscWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/UcscWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,73 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.BedWriter import BedWriter
+
+class UcscWriter(BedWriter):
+    """
+    A class that writes a transcript list into a file with UCSC BED format (minor differences with BED format)
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        super(UcscWriter, self).__init__(fileName, verbosity)
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["ucsc"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "bed"
+
+
+    def printTranscript(self, transcript):
+        """
+        Export the given transcript with GBrowse format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        return transcript.printUcsc()
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/UcscWriter.pyc

Binary file commons/core/writer/UcscWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/WigWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/WigWriter.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,139 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+
+
+class WigWriter(TranscriptListWriter):
+    """
+    A class that writes a transcript list into a file with WIGGLE format
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar handle: handle to the file
+    @type handle: file handle
+    @ivar header: first lines of the file
+    @type header: string
+    """
+
+
+    def __init__(self, fileName, verbosity = 0):
+        """
+        Constructor
+        @param fileName: name of the file
+        @type fileName: string
+        @param verbosity: verbosity
+        @type verbosity: int
+        """
+        self.fileName  = fileName
+        self.verbosity = verbosity
+        self.data      = {-1: {}, 0: {}, 1: {}}
+        self.title     = "Reads"
+        self.strands   = False
+        self.handle    = None
+
+
+    def __del__(self):
+        """
+        Destructor
+        Actually print the file
+        """
+        strand2string = {-1: "-", 1: "+", 0: ""}
+        self.handle   = open(self.fileName, "w")
+        self.handle.write("track type=wiggle_0 name=\"%s\"\n" % (self.title))
+        for strand in self.data:
+            for chromosome in sorted(self.data[strand]):
+                 self.handle.write("variableStep chrom=%s%s\n" % (chromosome, strand2string[strand]))
+                 for pos in sorted(self.data[strand][chromosome]):
+                     self.handle.write("%d\t%d\n" % (pos, self.data[strand][chromosome][pos]))
+        self.handle.close()
+
+
+    @staticmethod
+    def getFileFormats():
+        """
+        Get the format of the file
+        """
+        return ["wig", "wiggle"]
+
+
+    @staticmethod
+    def getExtension():
+        """
+        Get the usual extension for the file
+        """
+        return "wig"
+
+
+    def setTitle(self, title):
+        """
+        Set the title of the track
+        @param title: the title of the track
+        @type    title: string
+        """
+        if title != None:
+            self.title = title
+
+
+    def setStrands(self, strands):
+        """
+        Consider each strand separately
+        @param boolean: whether each strand should be considered separately
+        @type  boolean: boolean
+        """
+        self.strands = strands
+
+
+    def copyProperties(self, parser):
+        """
+        Copy the properties collected by a parser, to produce a similar output
+        @param bedParser: a parser
+        @type    bedParser: class L{TranscriptListWriter<TranscriptListWriter>}
+        """
+        self.setTitle(parser.title)
+
+
+    def addTranscript(self, transcript):
+        """
+        Export the given transcript with GBrowse format
+        @param transcript: transcript to be printed
+        @type transcript: class L{Transcript<Transcript>}
+        @return: a string
+        """
+        chromosome = transcript.getChromosome()
+        direction  = transcript.getDirection()
+        if not self.strands:
+            direction = 0
+        if chromosome not in self.data[direction]:
+            self.data[direction][chromosome] = {}
+        for exon in transcript.getExons():
+            for pos in range(exon.getStart(), exon.getEnd()+1):
+                if pos not in self.data[direction][chromosome]:
+                    self.data[direction][chromosome][pos]  = 1
+                else:
+                    self.data[direction][chromosome][pos] += 1

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/WigWriter.pyc

Binary file commons/core/writer/WigWriter.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/WriterChooser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/WriterChooser.py Fri Jan 18 04:54:14 2013 -0500

[

@@ -0,0 +1,127 @@
+#
+# Copyright INRA-URGI 2009-2010
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from commons.core.writer.TranscriptListWriter import TranscriptListWriter
+from commons.core.writer.SequenceListWriter import SequenceListWriter
+from commons.core.writer.BedWriter import BedWriter
+from commons.core.writer.CsvWriter import CsvWriter
+from commons.core.writer.EmblWriter import EmblWriter
+from commons.core.writer.FastaWriter import FastaWriter
+from commons.core.writer.FastqWriter import FastqWriter
+from commons.core.writer.GbWriter import GbWriter
+from commons.core.writer.Gff2Writer import Gff2Writer
+from commons.core.writer.SamWriter import SamWriter
+from commons.core.writer.UcscWriter import UcscWriter
+from commons.core.writer.WigWriter import WigWriter
+from commons.core.writer.Gff3Writer import Gff3Writer
+from commons.core.writer.GtfWriter import GtfWriter
+from commons.core.writer.MapWriter import  MapWriter
+
+
+class WriterChooser(object):
+    """
+    A class that finds the correct writer
+    @ivar type: transcript / sequence writer
+    @type type: string
+    @ivar format: the format of the writer
+    @type format: string
+    @ivar writerClass: the class of the writer
+    @type writerClass: string
+    @ivar extension: default extension of the file
+    @type extension: string
+    @ivar verbosity: verbosity
+    @type verbosity: int
+    """
+
+    def __init__(self, verbosity = 0):
+        """
+        Constructor
+        @param verbosity: verbosity
+        @type    verbosity: int
+        """
+        self.type = None
+        self.format = None
+        self.writerClass = None
+        self.extension = None
+        self.verbosity = verbosity
+
+
+    def findFormat(self, format, type = None):
+        """
+        Find the correct parser
+        @ivar format: the format
+        @type format: string
+        @ivar type: transcript sequence parser (None is all)
+        @type type: string
+        @return: a parser
+        """
+        classes = {}
+        if (type == "transcript"):
+            classes = {TranscriptListWriter: "transcript"}
+        elif (type == "sequence"):
+            classes = {SequenceListWriter: "sequence"}
+        elif (type == None):
+            classes = {TranscriptListWriter: "transcript", SequenceListWriter: "sequence"}
+        else:
+            sys.exit("Do not understand format type '%s'" % (type))
+
+        for classType in classes:
+            for writerClass in classType.__subclasses__():
+                if format in writerClass.getFileFormats():
+                    self.writerClass = writerClass
+                    self.extension = writerClass.getExtension()
+                    self.type = classes[classType]
+                    return
+        sys.exit("Cannot get writer for format '%s'" % (format))
+
+
+    def getWriter(self, fileName):
+        """
+        Get the writer previously found
+        @return: the writer
+        """
+        return self.writerClass(fileName, self.verbosity)
+
+
+    def getType(self):
+        """
+        Get the type of writer previously found
+        @return: the type of writer
+        """
+        return self.type
+
+
+    def getExtension(self):
+        """
+        Get the default extension of writer previously found
+        @return: the extension
+        """
+        return self.extension
+

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/WriterChooser.pyc

Binary file commons/core/writer/WriterChooser.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/__init__.pyc

Binary file commons/core/writer/__init__.pyc has changed

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/test/Test_Gff3Writer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/test/Test_Gff3Writer.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,101 @@
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+import unittest
+import os
+from SMART.Java.Python.misc import Utils
+
+class Test_Gff3Writer(unittest.TestCase):
+
+    def test_writer(self):
+        obsFileName = "testGffWriter1.gff3"
+        writer = Gff3Writer(obsFileName)
+
+        transcript = Transcript()
+        transcript.setName("test1.1")
+        transcript.setChromosome("arm_X")
+        transcript.setStart(1000)
+        transcript.setEnd(4000)
+        transcript.setDirection("+")
+        transcript.setTagValue("ID", "test1.1-1")
+        transcript.setTagValue("occurrence", 1)
+        transcript.setTagValue("nbOccurrences", 2)
+
+        exon1 = Interval()
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+        exon1.setDirection("+")
+
+        exon2 = Interval()
+        exon2.setChromosome("arm_X")
+        exon2.setStart(3000)
+        exon2.setEnd(4000)
+        exon2.setDirection("+")
+
+        transcript.addExon(exon1)
+        transcript.addExon(exon2)
+
+        writer.addTranscript(transcript)
+        writer.write()
+        writer.close()
+
+        expFileName = "expFile.gff3"
+        f = open(expFileName, "w")
+        f.write("arm_X\tS-MART\ttranscript\t1000\t4000\t.\t+\t.\tnbOccurrences=2;ID=test1.1-1;occurrence=1;Name=test1.1\n")
+        f.write("arm_X\tS-MART\texon\t1000\t2000\t.\t+\t.\tID=test1.1-1-exon1;Name=test1.1-exon1;Parent=test1.1-1\n")
+        f.write("arm_X\tS-MART\texon\t3000\t4000\t.\t+\t.\tID=test1.1-1-exon2;Name=test1.1-exon2;Parent=test1.1-1\n")
+        f.close()
+
+        self.assertTrue(Utils.diff(expFileName, obsFileName))
+
+        os.remove(expFileName)
+        os.remove(obsFileName)
+
+    def test_writerAltNames(self):
+        obsFileName = "testGffWriter1.gff3"
+        writer = Gff3Writer(obsFileName,title="ALTSOURCE", feature="Match", featurePart="Match-Part")
+
+        transcript = Transcript()
+        transcript.setName("test1.1")
+        transcript.setChromosome("arm_X")
+        transcript.setStart(1000)
+        transcript.setEnd(4000)
+        transcript.setDirection("+")
+        transcript.setTagValue("ID", "test1.1-1")
+        transcript.setTagValue("occurrence", 1)
+        transcript.setTagValue("nbOccurrences", 2)
+
+        exon1 = Interval()
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+        exon1.setDirection("+")
+
+        exon2 = Interval()
+        exon2.setChromosome("arm_X")
+        exon2.setStart(3000)
+        exon2.setEnd(4000)
+        exon2.setDirection("+")
+
+        transcript.addExon(exon1)
+        transcript.addExon(exon2)
+
+        writer.addTranscript(transcript)
+        writer.write()
+        writer.close()
+
+        expFileName = "expFile.gff3"
+        f = open(expFileName, "w")
+        f.write("arm_X\tALTSOURCE\tMatch\t1000\t4000\t.\t+\t.\tnbOccurrences=2;ID=test1.1-1;occurrence=1;Name=test1.1\n")
+        f.write("arm_X\tALTSOURCE\tMatch-Part\t1000\t2000\t.\t+\t.\tID=test1.1-1-Match-Part1;Name=test1.1-Match-Part1;Parent=test1.1-1\n")
+        f.write("arm_X\tALTSOURCE\tMatch-Part\t3000\t4000\t.\t+\t.\tID=test1.1-1-Match-Part2;Name=test1.1-Match-Part2;Parent=test1.1-1\n")
+        f.close()
+
+        self.assertTrue(Utils.diff(expFileName, obsFileName))
+
+        os.remove(expFileName)
+        os.remove(obsFileName)
+
+if __name__ == '__main__':
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 commons/core/writer/test/Test_MapWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/writer/test/Test_MapWriter.py Fri Jan 18 04:54:14 2013 -0500

@@ -0,0 +1,61 @@
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+import unittest
+import os
+from SMART.Java.Python.misc import Utils
+from commons.core.writer.MapWriter import MapWriter
+from commons.core.utils.FileUtils import FileUtils
+
+class Test_MapWriter(unittest.TestCase):
+
+    def setUp(self):
+        self.expFileName = "expMapWriter.map"
+        self.obsFileName = "testMapWriter1.map"
+
+    def tearDown(self):
+        os.remove(self.expFileName)
+        os.remove(self.obsFileName)
+
+    def test_writer(self):
+        self.write_ExpMapFileName()
+        writer = MapWriter(self.obsFileName)
+
+        transcript = Transcript()
+        transcript.setName("test1.1")
+        transcript.setChromosome("arm_X")
+        transcript.setStart(1000)
+        transcript.setEnd(4000)
+        transcript.setDirection("+")
+        transcript.setTagValue("ID", "test1.1-1")
+        transcript.setTagValue("occurrence", 1)
+        transcript.setTagValue("nbOccurrences", 2)
+
+        exon1 = Interval()
+        exon1.setChromosome("arm_X")
+        exon1.setStart(1000)
+        exon1.setEnd(2000)
+        exon1.setDirection("+")
+
+        exon2 = Interval()
+        exon2.setChromosome("arm_X")
+        exon2.setStart(3000)
+        exon2.setEnd(4000)
+        exon2.setDirection("+")
+
+        transcript.addExon(exon1)
+        transcript.addExon(exon2)
+
+        writer.addTranscript(transcript)
+        writer.write()
+        writer.close()
+
+        self.assertTrue(FileUtils.are2FilesIdentical(self.expFileName, self.obsFileName))
+
+
+    def write_ExpMapFileName(self):
+        f = open(self.expFileName, "w")
+        f.write("test1.1\tarm_X\t1000\t4001\n")
+        f.close()
+
+if __name__ == '__main__':
+    unittest.main()

diff -r ea3082881bf8 -r 769e306b7933 documentation.pdf

Binary file documentation.pdf has changed