Mercurial > repos > yufei-luo > s_mart
changeset 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/File.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,55 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +public class File { + String name; + String formatType; + String format; + + + public File(String name, String type, String format) { + this.name = name; + this.formatType = type; + this.format = format; + } + + public String getName() { + return this.name; + } + + public String getFormatType() { + return this.formatType; + } + + public String getFormat() { + return this.format; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Files.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,75 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; + +public class Files { + HashMap <String, File> files; + + public Files () { + files = new HashMap < String, File> (); + } + + public void addFile(String fileName, String type, String format) { + this.addFile(new File(fileName, type, format)); + } + + public void addFile(File file) { + files.put(file.name, file); + } + + public void clear() { + files.clear(); + } + + public String getType(String fileName) { + if (fileName == null) { + System.out.println("Error! Looking for format of empty file name!"); + } + if (! files.containsKey(fileName)) { + System.out.println("Oops! Format type of file " + fileName + " is not found!"); + return null; + } + return files.get(fileName).formatType; + } + + public String getFormat(String fileName) { + if (fileName == null) { + System.out.println("Error! Looking for format of empty file name!"); + } + if (! files.containsKey(fileName)) { + System.out.println("Oops! Format of file " + fileName + " is not found!"); + return null; + } + return files.get(fileName).format; + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/FormatType.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,64 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; + +public class FormatType { + String type; + Vector < String > formats; + + public FormatType (String type) { + this.type = type; + this.formats = new Vector < String > (); + } + + public String getType () { + return this.type; + } + + public void addFormat (String format) { + formats.add(format); + } + + public boolean containsFormat (String format) { + for (int i = 0; i < formats.size(); i++) { + if (((String) formats.get(i)).compareToIgnoreCase(format) == 0) { + return true; + } + } + return false; + } + + public Vector < String > getFormats () { + return formats; + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/FormatsContainer.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,90 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; + +public class FormatsContainer { + + HashMap < String, FormatType > formatTypes; + + + public FormatsContainer() { + this.formatTypes = new HashMap < String, FormatType > (); + } + + + public void addFormat(String type, String format) { + FormatType formatType; + if (formatTypes.containsKey(type)) { + formatType = this.formatTypes.get(type); + } + else { + formatType = new FormatType(type); + this.formatTypes.put(type, formatType); + } + formatType.addFormat(format); + } + + + public Vector < String > getFormatTypes () { + Vector < String > v = new Vector < String > (); + v.addAll(this.formatTypes.keySet()); + return v; + } + + + public FormatType getFormats (String type) { + if (! formatTypes.containsKey(type)) { + System.out.print("Format type " + type + " is unavailable. Got: "); + Iterator it = formatTypes.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry pairs = (Map.Entry) it.next(); + System.out.print(pairs.getKey() + " "); + } + System.out.println(); + } + return formatTypes.get(type); + } + + + public String getFormatType (String format) { + for (Iterator it = formatTypes.keySet().iterator(); it.hasNext(); ) { + Object type = it.next(); + Object formatType = formatTypes.get(type); + if (((FormatType) formatType).containsFormat(format)) { + return (String) type; + } + } + return null; + } +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/FormatsReader.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,83 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; +import java.io.File; +import java.io.*; + + +public class FormatsReader { + + String fileName; + Vector < FormatType > formatTypes; + Vector < String > typeNames; + + + public FormatsReader(String fileName) { + this.fileName = fileName; + this.formatTypes = new Vector < FormatType > (); + } + + + public boolean read() { + File file = new File(this.fileName); + + try { + BufferedReader reader = new BufferedReader(new FileReader(file)); + String line = null; + String[] lineElements; + String[] formats; + String typeName; + + while ((line = reader.readLine()) != null) { + if (line.length() > 0) { + lineElements = line.split(":"); + typeName = lineElements[0].trim(); + formats = lineElements[1].split(","); + for (int i = 0; i < formats.length; i++) { + Global.formats.addFormat(typeName, formats[i].trim()); + } + } + } + + reader.close(); + } + catch (FileNotFoundException e) { + return false; + } + catch (IOException e) { + return false; + } + + return true; + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Global.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,70 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.Vector; +import java.util.HashMap; +import javax.swing.DefaultListModel; +import javax.swing.JButton; +import javax.swing.JTextField; + +public class Global { + + public static int logAreaSize = 100; + + public static String smartConfFileName = "smart.conf"; + + public static String smartProgramsFileName = "programs.txt"; + + public static String smartFormatsFileName = "formats.txt"; + + public static String pythonPath = new String(); + + public static String pythonCommand = "python"; + + public static String mysqlCommand = "mysql"; + + public static String rCommand = "R"; + + public static Files files = new Files(); + + public static Vector < String > fileNames = new Vector < String >(); + + public static FormatsContainer formats = new FormatsContainer(); + + public static boolean programRunning = false; + + public static HashMap < JButton, JTextField > otherFilesChooser = new HashMap < JButton, JTextField >(); + + public static HashMap < JButton, JTextField > otherDirectoriesChooser = new HashMap < JButton, JTextField >(); + + public static HashMap < JButton, JTextField > otherFileConcatenationChooser = new HashMap < JButton, JTextField >(); + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/Old/PasswordAsker.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,87 @@ +import java.awt.*; +import java.awt.event.*; +import javax.swing.*; +import java.util.concurrent.CountDownLatch; + +public class PasswordAsker { + + static String password; + static JFrame frame; + static CountDownLatch latch; + + + public PasswordAsker() { + password = null; + javax.swing.SwingUtilities.invokeLater(new Runnable() { + public void run() { + createAndShowGUI(); + } + }); + latch = new CountDownLatch(1); + } + + + private static void createAndShowGUI() { + //Create and set up the window. + frame = new JFrame("Password"); + frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + frame.setContentPane(setMainPane()); + + //Display the window. + frame.pack(); + frame.setVisible(true); + } + + + private static JPanel setMainPane() { + JPanel rootPanel = new JPanel(false); + rootPanel.setLayout(new GridLayout(0, 1)); + + JPanel infoPanel = new JPanel(false); + JLabel infoLabel = new JLabel("Please write here the password that you entered for the mySQL root account.\r\nNo information is stored nor sent. I promise."); + infoPanel.add(infoLabel); + + JPanel passPanel = new JPanel(false); + passPanel.setLayout(new GridLayout(1, 0)); + JLabel passLabel = new JLabel("password"); + final JTextField passText = new JTextField(20); + passLabel.setLabelFor(passText); + passPanel.add(passLabel); + passPanel.add(passText); + + JPanel okPanel = new JPanel(false); + JButton okButton = new JButton("OK"); + okPanel.add(okButton); + + okButton.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + password = passText.getText(); + frame.setVisible(false); + frame.dispose(); + latch.countDown(); + } + }); + + rootPanel.add(infoPanel); + rootPanel.add(passPanel); + rootPanel.add(okPanel); + + return rootPanel; + } + + + public boolean waitForPassword() { + try { + latch.await(); + } + catch (InterruptedException e) { + return false; + } + return true; + } + + + public String getPassword() { + return password; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/Old/SmartInstaller.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,167 @@ +import java.util.*; +import java.awt.*; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.*; +import javax.swing.*; +import javax.swing.filechooser.*; +import javax.swing.border.*; +import javax.swing.SwingUtilities; +import java.net.*; + +public class SmartInstaller extends JPanel implements ActionListener { + int BUFFER = 1024; + + JFrame mainFrame; + JTextArea logArea; + + // configuration chooser buttons + String configurations[] = {"32 bits", "64 bits"}; + JRadioButton configurationButtons[]; + + // program chooser buttons + String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"}; + JCheckBox programChooserButtons[]; + + JButton goButton; + + // install directory + JButton installDirectoryChooserButton; + JTextField installDirectoryChooserTextField; + + + public SmartInstaller() { + super(); + + Box box = Box.createVerticalBox(); + + // Header + JPanel headerPanel = new JPanel(false); + JTextArea headerArea = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters.\r\nPlease remember the root password if you install MySQL!"); + TitledBorder headerBorder = BorderFactory.createTitledBorder("Wellcome to the S-MART installer!"); + headerArea.setEditable(false); + headerArea.setBackground(headerPanel.getBackground()); + headerPanel.add(headerArea); + headerPanel.setBorder(headerBorder); + + + // Configuration + JPanel configurationPanel = new JPanel(false); + configurationPanel.setLayout(new GridLayout(1, 0)); + configurationButtons = new JRadioButton[configurations.length]; + ButtonGroup configurationGroup = new ButtonGroup(); + for (int i = 0; i < configurations.length; i++) { + JRadioButton button = new JRadioButton(configurations[i]); + configurationPanel.add(button); + configurationButtons[i] = button; + configurationGroup.add(button); + } + configurationButtons[0].setSelected(true); + TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration"); + configurationPanel.setBorder(configurationBorder); + + + // Program chooser panel + JPanel programPanel = new JPanel(false); + programPanel.setLayout(new GridLayout(0, 1)); + + JLabel programLabel = new JLabel("Choose which programs to install:"); + programPanel.add(programLabel); + programChooserButtons = new JCheckBox[programChoosers.length]; + for (int i = 0; i < programChoosers.length; i++) { + JCheckBox button = new JCheckBox(programChoosers[i]); + button.setSelected(true); + programPanel.add(button); + programChooserButtons[i] = button; + } + TitledBorder programBorder = BorderFactory.createTitledBorder("Programs"); + programPanel.setBorder(programBorder); + + // Install directory chooser + JPanel installDirectoryChooserPanel = new JPanel(false); + installDirectoryChooserPanel.setLayout(new GridLayout(1, 0)); + JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: "); + installDirectoryChooserTextField = new JTextField(); + installDirectoryChooserButton = new JButton("Open..."); + installDirectoryChooserButton.addActionListener(this); + + installDirectoryChooserPanel.add(installDirectoryChooserLabel); + installDirectoryChooserPanel.add(installDirectoryChooserTextField); + installDirectoryChooserPanel.add(installDirectoryChooserButton); + TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory"); + installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder); + + // GO! + JPanel goPanel = new JPanel(false); + goButton = new JButton("GO!"); + goButton.addActionListener(this); + goButton.setSelected(true); + goPanel.add(goButton); + TitledBorder goBorder = BorderFactory.createTitledBorder("Start install"); + goPanel.setBorder(goBorder); + + // Log + logArea = new JTextArea(10, 120); + logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize())); + JScrollPane logScroll = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED); + TitledBorder logBorder = BorderFactory.createTitledBorder("Log"); + logScroll.setBorder(logBorder); + + GridLayout horizontalLayout = new GridLayout(1, 0); + + box.add(headerPanel); + box.add(configurationPanel); + box.add(programPanel); + box.add(installDirectoryChooserPanel); + box.add(goPanel); + box.add(logScroll); + + add(box); + } + + + public void actionPerformed(ActionEvent e) { + + // Install directories chooser + if (e.getSource() == goButton) { + boolean[] selectedPrograms = new boolean[programChoosers.length]; + for (int i = 0; i < programChoosers.length; i++) { + selectedPrograms[i] = programChooserButtons[i].isSelected(); + } + SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1); + task.execute(); + } + // Install directories chooser + else if (e.getSource() == installDirectoryChooserButton) { + JFileChooser chooser = new JFileChooser(); + chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); + if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) { + installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath()); + } + } + } + + private static void createAndShowGUI() { + // Create and set up the window. + JFrame mainFrame = new JFrame("S-Mart Installer"); + mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + + //Create and set up the content pane. + JComponent newContentPane = new SmartInstaller(); + newContentPane.setOpaque(true); + mainFrame.setContentPane(newContentPane); + + // Display the window. + mainFrame.pack(); + mainFrame.setVisible(true); + } + + + public static void main(String[] args) { + javax.swing.SwingUtilities.invokeLater(new Runnable() { + public void run() { + createAndShowGUI(); + } + }); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/Old/SmartInstallerTask.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,455 @@ +import java.util.*; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.*; +import javax.swing.*; +import javax.swing.filechooser.*; +import javax.swing.border.*; +import javax.swing.SwingUtilities; +import java.net.*; +import java.util.Stack; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +public class SmartInstallerTask extends SwingWorker<Boolean, String> { + + int BUFFER = 1024; + + int architecture = 0; + String installDirectoryName = null; + JTextArea logArea = null; + boolean[] selectedPrograms = null; + + // program chooser buttons + String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"}; + + // Web addresses for the tools + String packageAddresses[][] = { + {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"}, + {"", ""}, + {"", ""}, + {"http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-win32.msi", "http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-winx64.msi"}, + {"", ""}, + {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"}, + {"http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe", "http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe"}, + {"http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip", "http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip"} + }; + + // Packages to install + String rPackages[] = {"RColorBrewer", "Hmisc"}; + + // Script lines + String scriptLines[][] = { + {"\"<INSTALLDIR>\\R-2.11.0-win32.exe\"", "\"<INSTALLDIR>\\R-2.11.0-win64.exe\""}, + {"\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installRColorBrewer.R\"", "\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installRColorBrewer.R\""}, + {"\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installHmisc.R\"", "\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installHmisc.R\""}, + {"msiexec /i \"<INSTALLDIR>\\mysql-essential-5.1.47-win32.msi\"", "msiexec /i \"<INSTALLDIR>\\mysql-essential-5.1.47-winx64.msi\""}, + {"", ""}, + {"msiexec /i \"<INSTALLDIR>\\python-2.6.5.msi\"", "msiexec /i \"<INSTALLDIR>\\python-2.6.5.amd64.msi\""}, + {"<INSTALLDIR>\\MySQL-python-1.2.2.win32-py2.6.exe", "<INSTALLDIR>\\MySQL-python-1.2.2.win32-py2.6.exe"}, + {"", ""} + }; + + // Files to uncompress + String compressedFiles[][] = { + {"", ""}, + {"", ""}, + {"", ""}, + {"", ""}, + {"", ""}, + {"", ""}, + {"", ""}, + {"<INSTALLDIR>\\s-mart.zip", "<INSTALLDIR>\\s-mart.zip"} + }; + + + public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) { + logArea = ta; + selectedPrograms = b; + installDirectoryName = s; + architecture = a; + } + + + @Override + public Boolean doInBackground() { + boolean installOk; + publish("Starting install\n"); + writeFiles(); + for (int i = 0; i < selectedPrograms.length; i++) { + if (selectedPrograms[i]) { + if (! install(i)) { + return Boolean.FALSE; + } + } + } + removeFiles(); + setEnvironmentVariables(); + publish("Ending install\n"); + return Boolean.TRUE; + } + + + @Override + protected void process(List<String> chunks) { + for (String chunk: chunks) { + logArea.append(chunk); + } + } + + + private boolean launch(String command) { + return realLaunch(new ProcessBuilder(command), command); + } + + private boolean launch(String[] command) { + return realLaunch(new ProcessBuilder(command), Arrays.toString(command)); + } + + private boolean realLaunch(ProcessBuilder pb, String command) { + BufferedReader outputReader; + pb = pb.redirectErrorStream(true); + Process process = null; + publish(" Starting command '" + command + "'\n"); + try { + process = pb.start(); + BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream()); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is); + outputReader = new BufferedReader(isr); + } + catch (Exception exception) { + publish(" !Process cannot be started (command is '" + command + "')!\n"); + exception.printStackTrace(); + return false; + } + if (outputReader == null) { + publish(" !Problem in the output of the command!\n"); + return false; + } + else { + publish(" Output is:\n"); + try { + publish(" ---\n"); + String line; + while ((line = outputReader.readLine()) != null) { + publish(" " + line + "\r\n"); + } + publish(" ---\n"); + } + catch (IOException e) { + publish(" !Cannot get the output of the command!\n"); + return false; + } + } + int exitValue = process.exitValue(); + if (exitValue != 0) { + publish(" !Problem during the execution of the command '" + command + "'!\n"); + return false; + } + publish(" Ending command '" + command + "'\n"); + return true; + } + + + private File lookForFile(String fileName, String[] putativePlaces) { + publish(" Looking for file " + fileName + "\n"); + for (String place: putativePlaces) { + File file = new File(place, fileName); + publish(" Look at " + file.getAbsolutePath() + "\n"); + if (file.exists()) { + publish(" Found it in expected place " + file.getAbsolutePath() + "\n"); + return file; + } + } + Stack<File> files = new Stack<File>(); + files.push(new File("\\")); + while (! files.empty()) { + File file = files.pop(); + for (File childFile: file.listFiles()) { + if (childFile.isDirectory()) { + files.push(childFile); + } + else { + if (fileName.compareToIgnoreCase(childFile.getName()) == 0) { + publish(" Found it in unexpected place " + childFile.getAbsolutePath() + "\n"); + return childFile; + } + } + } + } + publish(" !Cannot file file '" + fileName + "'!\n"); + return null; + } + + + private boolean writeFile(String fileName, String content) { + try { + FileWriter fw = new FileWriter(fileName); + BufferedWriter bw = new BufferedWriter(fw); + bw.write(content); + bw.close(); + fw.close(); + } + catch (Exception e) { + publish(" !Cannot write file '" + fileName + "'!\n"); + return false; + } + return true; + } + + + private boolean removeFile(String fileName) { + File file = new File(fileName); + if (file.exists()) { + if (! file.delete()) { + publish(" !Cannot delete file '" + file.getAbsolutePath() + "'!\n"); + return false; + } + } + return true; + } + + + private boolean writeFiles() { + for (String rPackage: rPackages) { + String fileName = installDirectoryName + File.separator + "install" + rPackage + ".R"; + String content = "install.packages(\"" + rPackage + "\", repos = \"http://cran.cict.fr\", dependencies = TRUE)\n"; + if (! writeFile(fileName, content)) { + publish(" !Cannot write file for R package '" + rPackage + "'!\n"); + return false; + } + } + String fileName = installDirectoryName + File.separator + "createUser.sql"; + String content = "CREATE USER 'smart'@'localhost';\nGRANT ALL PRIVILEGES ON *.* TO 'smart'@'localhost' WITH GRANT OPTION;\nCREATE DATABASE smart;\nGRANT ALL ON smart.* TO 'smart'@'localhost';\n"; + if (! writeFile(fileName, content)) { + publish(" !Cannot write mySQL configuration file!\n"); + return false; + } + return true; + } + + private boolean removeFiles() { + for (String rPackage: rPackages) { + File file = new File(installDirectoryName + File.separator + "install" + rPackage + ".R"); + if (! file.delete()) { + publish("!Cannot delete R install file for " + rPackage + "!\n"); + return false; + } + } + File file = new File(installDirectoryName + File.separator + "createUser.sql"); + if (! file.delete()) { + publish("!Cannot delete mySQL configuration file!\n"); + return false; + } + return true; + } + + private boolean install(int element) { + publish(" Starting install of " + programChoosers[element] + "\n"); + downloadPackage(element); + executeInstall(element); + uncompressPackage(element); + removePackage(element); + postProcess(element); + publish(" Ending install of " + programChoosers[element] + "\n"); + return true; + } + + + private String getLocalName(String remoteName) { + String localName = installDirectoryName + File.separator + (new File(remoteName)).getName(); + int position = localName.indexOf("?"); + if (position >= 0) { + localName = localName.substring(0, position); + } + return localName; + } + + + private boolean downloadPackage(int element) { + String fileName = packageAddresses[element][architecture]; + if (! "".equals(fileName)) { + publish(" Starting download of " + programChoosers[element] + "\n"); + try { + BufferedInputStream bis = new BufferedInputStream(new URL(fileName).openStream()); + FileOutputStream fos = new FileOutputStream(getLocalName(fileName)); + BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER); + byte[] data = new byte[BUFFER]; + int x = 0; + while((x = bis.read(data, 0, BUFFER)) >= 0) { + bos.write(data, 0, x); + } + bos.close(); + fos.close(); + bis.close(); + } + catch (IOException e) { + publish(" !Cannot download file '" + fileName + "'!\n"); + return false; + } + publish(" Ending download of " + programChoosers[element] + "\n"); + } + return true; + } + + + private String replaceSubstring(String line) { + if (line.contains("<INSTALLDIR>")) { + String protectedDirectory = installDirectoryName.replaceAll("\\\\", "\\\\\\\\"); + line = line.replaceAll("<INSTALLDIR>", protectedDirectory); + } + if (line.contains("<RFILE>")) { + String userName = System.getenv().get("USERNAME"); + String[] possibleRDirectories = {"C:\\Program Files\\R-2.11.0", "C:\\Documents and Settings\\" + userName + "\\Mes documents\\R\\R-2.11.0\\bin", "C:\\Documents and Settings\\" + userName + "\\My documents\\R\\R-2.11.0\\bin"}; + String rDirectory = lookForFile("'.exe", possibleRDirectories).getAbsolutePath(); + rDirectory = rDirectory.replaceAll("\\\\", "\\\\\\\\"); + line = line.replaceAll("<RFILE>", rDirectory); + } + if (line.contains("<MYSQLFILE>")) { + String userName = System.getenv().get("USERNAME"); + String[] possibleRDirectories = {"C:\\Program Files\\MySQL\\MySQL Server 5.1\\bin", "C:\\Documents and Settings\\" + userName + "\\Mes documents\\MySQL\\MySQL Server 5.1\\bin", "C:\\Documents and Settings\\" + userName + "\\My documents\\MySQL\\MySQL Server 5.1\\bin"}; + String rDirectory = lookForFile("mysql.exe", possibleRDirectories).getAbsolutePath(); + rDirectory = rDirectory.replaceAll("\\\\", "\\\\\\\\"); + line = line.replaceAll("<MYSQLFILE>", rDirectory); + } + return line; + } + + + private boolean executeInstall(int element) { + String commands = scriptLines[element][architecture]; + if (! "".equals(commands)) { + for (String command: commands.split(";")) { + command = replaceSubstring(command); + publish(" Starting command '" + command + "'\n"); + Process process = null; + try { + process = Runtime.getRuntime().exec(command); + } + catch (IOException e) { + publish(" !Cannot execute command '" + command + "'!\n"); + return false; + } + try { + process.waitFor(); + } + catch (InterruptedException e) { + publish(" !Cannot wait for the end of the command '" + command + "'!\n"); + return false; + } + int exitValue = process.exitValue(); + if (exitValue != 0) { + publish(" !Problem during the execution of the command '" + command + "'!\n"); + return false; + } + publish(" Ending command '" + command + "'\n"); + } + } + return true; + } + + + private boolean uncompressPackage(int element) { + String file = compressedFiles[element][architecture]; + if (! "".equals(file)) { + file = replaceSubstring(file); + publish(" Starting uncompressing file '" + file + "'\n"); + try { + FileInputStream fis = new FileInputStream(file); + BufferedInputStream bis = new BufferedInputStream(fis); + ZipInputStream zis = new ZipInputStream(bis); + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (! entry.isDirectory()) { + File newFile = new File(installDirectoryName + File.separator + entry.getName()); + // create parent directories + File upDirectory = newFile.getParentFile(); + while (upDirectory != null){ + if (! upDirectory.exists()) { + upDirectory.mkdir(); + publish(" Creating directory '" + upDirectory.getAbsolutePath() + "'\n"); + } + upDirectory = upDirectory.getParentFile(); + } + // write the files to the disk + publish(" Extracting '" + entry.getName() + "' to '" + newFile.getAbsolutePath() + "'\n"); + int count; + byte data[] = new byte[BUFFER]; + FileOutputStream fos = new FileOutputStream(newFile); + BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER); + while ((count = zis.read(data, 0, BUFFER)) != -1){ + bos.write(data, 0, count); + } + bos.flush(); + bos.close(); + fos.close(); + } + } + zis.close(); + bis.close(); + fis.close(); + } + catch(FileNotFoundException e) { + publish(" !Cannot find file '" + file + "'!\n"); + return false; + } + catch(Exception e){ + publish(" !Cannot uncompress file '" + file + "'!\n"); + return false; + } + publish(" Ending uncompressing file '" + file + "'\n"); + } + return true; + } + + + private boolean removePackage(int element) { + String packageName = packageAddresses[element][architecture]; + if ("".equals(packageName)) { + return true; + } + String fileName = getLocalName(packageAddresses[element][architecture]); + return removeFile(fileName); + } + + + private boolean postProcess(int element) { + switch (element) { + case 4: + // Create mySQL user + PasswordAsker pa = new PasswordAsker(); + if (! pa.waitForPassword()) { + publish("Problem in the password asker!\n"); + return false; + } + String command = "\"<MYSQLFILE>\" --user=root --password=" + pa.getPassword() + " -e \"source <INSTALLDIR>\\createUser.sql\""; + command = replaceSubstring(command); + if (! launch(command)) { + publish(" !Cannot create SQL accounts!\n"); + return false; + } + return true; + case 7: + // Move S-MART files to parent directory + File installDirectory = new File(installDirectoryName + File.separator + "S-Mart"); + for (File file: installDirectory.listFiles()) { + File destinationFile = new File(file.getParentFile().getParentFile(), file.getName()); + if (! file.renameTo(destinationFile)) { + publish(" !Cannot move '" + file.getAbsolutePath() + "' to '" + destinationFile.getAbsolutePath() + "'!\n"); + } + } + if (! installDirectory.delete()) { + publish(" !Cannot remove installation S-MART directory '" + installDirectory.getAbsolutePath() + "'!\n"); + } + } + return true; + } + + + private boolean setEnvironmentVariables() { + String[] command = {"REG", "ADD", "HKCU\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\"" + installDirectoryName + "\\Python\"", "/f"}; + return launch(command); + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/PasswordAsker.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,87 @@ +import java.awt.*; +import java.awt.event.*; +import javax.swing.*; +import java.util.concurrent.CountDownLatch; + +public class PasswordAsker { + + static String password; + static JFrame frame; + static CountDownLatch latch; + + + public PasswordAsker() { + password = null; + javax.swing.SwingUtilities.invokeLater(new Runnable() { + public void run() { + createAndShowGUI(); + } + }); + latch = new CountDownLatch(1); + } + + + private static void createAndShowGUI() { + //Create and set up the window. + frame = new JFrame("Password"); + frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + frame.setContentPane(setMainPane()); + + //Display the window. + frame.pack(); + frame.setVisible(true); + } + + + private static JPanel setMainPane() { + JPanel rootPanel = new JPanel(false); + rootPanel.setLayout(new GridLayout(0, 1)); + + JPanel infoPanel = new JPanel(false); + JLabel infoLabel = new JLabel("Please write here the password that you entered for the mySQL root account.\r\nNo information is stored nor sent. I promise."); + infoPanel.add(infoLabel); + + JPanel passPanel = new JPanel(false); + passPanel.setLayout(new GridLayout(1, 0)); + JLabel passLabel = new JLabel("password"); + final JTextField passText = new JTextField(20); + passLabel.setLabelFor(passText); + passPanel.add(passLabel); + passPanel.add(passText); + + JPanel okPanel = new JPanel(false); + JButton okButton = new JButton("OK"); + okPanel.add(okButton); + + okButton.addActionListener(new ActionListener() { + public void actionPerformed(ActionEvent e) { + password = passText.getText(); + frame.setVisible(false); + frame.dispose(); + latch.countDown(); + } + }); + + rootPanel.add(infoPanel); + rootPanel.add(passPanel); + rootPanel.add(okPanel); + + return rootPanel; + } + + + public boolean waitForPassword() { + try { + latch.await(); + } + catch (InterruptedException e) { + return false; + } + return true; + } + + + public String getPassword() { + return password; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/SmartInstaller.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,167 @@ +import java.util.*; +import java.awt.*; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.*; +import javax.swing.*; +import javax.swing.filechooser.*; +import javax.swing.border.*; +import javax.swing.SwingUtilities; +import java.net.*; + +public class SmartInstaller extends JPanel implements ActionListener { + int BUFFER = 1024; + + JFrame mainFrame; + JTextArea logArea; + + // configuration chooser buttons + String configurations[] = {"32 bits", "64 bits"}; + JRadioButton configurationButtons[]; + + // program chooser buttons + String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"}; + JCheckBox programChooserButtons[]; + + JButton goButton; + + // install directory + JButton installDirectoryChooserButton; + JTextField installDirectoryChooserTextField; + + + public SmartInstaller() { + super(); + + Box box = Box.createVerticalBox(); + + // Header + JPanel headerPanel = new JPanel(false); + JTextArea headerArea = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters."); + TitledBorder headerBorder = BorderFactory.createTitledBorder("Welcome to the S-MART installer!"); + headerArea.setEditable(false); + headerArea.setBackground(headerPanel.getBackground()); + headerPanel.add(headerArea); + headerPanel.setBorder(headerBorder); + + + // Configuration + JPanel configurationPanel = new JPanel(false); + configurationPanel.setLayout(new GridLayout(1, 0)); + configurationButtons = new JRadioButton[configurations.length]; + ButtonGroup configurationGroup = new ButtonGroup(); + for (int i = 0; i < configurations.length; i++) { + JRadioButton button = new JRadioButton(configurations[i]); + configurationPanel.add(button); + configurationButtons[i] = button; + configurationGroup.add(button); + } + configurationButtons[0].setSelected(true); + TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration"); + configurationPanel.setBorder(configurationBorder); + + + // Program chooser panel + JPanel programPanel = new JPanel(false); + programPanel.setLayout(new GridLayout(0, 1)); + + JLabel programLabel = new JLabel("Choose which programs to install:"); + programPanel.add(programLabel); + programChooserButtons = new JCheckBox[programChoosers.length]; + for (int i = 0; i < programChoosers.length; i++) { + JCheckBox button = new JCheckBox(programChoosers[i]); + button.setSelected(true); + programPanel.add(button); + programChooserButtons[i] = button; + } + TitledBorder programBorder = BorderFactory.createTitledBorder("Programs"); + programPanel.setBorder(programBorder); + + // Install directory chooser + JPanel installDirectoryChooserPanel = new JPanel(false); + installDirectoryChooserPanel.setLayout(new GridLayout(1, 0)); + JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: "); + installDirectoryChooserTextField = new JTextField(); + installDirectoryChooserButton = new JButton("Open..."); + installDirectoryChooserButton.addActionListener(this); + + installDirectoryChooserPanel.add(installDirectoryChooserLabel); + installDirectoryChooserPanel.add(installDirectoryChooserTextField); + installDirectoryChooserPanel.add(installDirectoryChooserButton); + TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory"); + installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder); + + // GO! + JPanel goPanel = new JPanel(false); + goButton = new JButton("GO!"); + goButton.addActionListener(this); + goButton.setSelected(true); + goPanel.add(goButton); + TitledBorder goBorder = BorderFactory.createTitledBorder("Start install"); + goPanel.setBorder(goBorder); + + // Log + logArea = new JTextArea(10, 120); + logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize())); + JScrollPane logScroll = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED); + TitledBorder logBorder = BorderFactory.createTitledBorder("Log"); + logScroll.setBorder(logBorder); + + GridLayout horizontalLayout = new GridLayout(1, 0); + + box.add(headerPanel); + box.add(configurationPanel); + box.add(programPanel); + box.add(installDirectoryChooserPanel); + box.add(goPanel); + box.add(logScroll); + + add(box); + } + + + public void actionPerformed(ActionEvent e) { + + // Install directories chooser + if (e.getSource() == goButton) { + boolean[] selectedPrograms = new boolean[programChoosers.length]; + for (int i = 0; i < programChoosers.length; i++) { + selectedPrograms[i] = programChooserButtons[i].isSelected(); + } + SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1); + task.execute(); + } + // Install directories chooser + else if (e.getSource() == installDirectoryChooserButton) { + JFileChooser chooser = new JFileChooser(); + chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); + if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) { + installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath()); + } + } + } + + private static void createAndShowGUI() { + // Create and set up the window. + JFrame mainFrame = new JFrame("S-Mart Installer"); + mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + + //Create and set up the content pane. + JComponent newContentPane = new SmartInstaller(); + newContentPane.setOpaque(true); + mainFrame.setContentPane(newContentPane); + + // Display the window. + mainFrame.pack(); + mainFrame.setVisible(true); + } + + + public static void main(String[] args) { + javax.swing.SwingUtilities.invokeLater(new Runnable() { + public void run() { + createAndShowGUI(); + } + }); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/SmartInstallerTask.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,419 @@ +import java.util.*; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.*; +import javax.swing.*; +import javax.swing.filechooser.*; +import javax.swing.border.*; +import javax.swing.SwingUtilities; +import java.net.*; +import java.util.Stack; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +public class SmartInstallerTask extends SwingWorker<Boolean, String> { + + int BUFFER = 1024; + + int architecture = 0; + String installDirectoryName = null; + JTextArea logArea = null; + boolean[] selectedPrograms = null; + + // program chooser buttons + String programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"}; + + // Web addresses for the tools + String packageAddresses[][] = { + {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"}, + {"", ""}, + {"", ""}, + {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"}, + {"http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip", "http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip"} + }; + + // Packages to install + String rPackages[] = {"RColorBrewer", "Hmisc"}; + + // Script lines + String scriptLines[][] = { + {"\"<INSTALLDIR>\\R-2.11.0-win32.exe\"", "\"<INSTALLDIR>\\R-2.11.0-win64.exe\""}, + {"\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installRColorBrewer.R\"", "\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installRColorBrewer.R\""}, + {"\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installHmisc.R\"", "\"<RFILE>\" CMD BATCH \"<INSTALLDIR>\\installHmisc.R\""}, + {"msiexec /i \"<INSTALLDIR>\\python-2.6.5.msi\"", "msiexec /i \"<INSTALLDIR>\\python-2.6.5.amd64.msi\""}, + {"", ""} + }; + + // Files to uncompress + String compressedFiles[][] = { + {"", ""}, + {"", ""}, + {"", ""}, + {"", ""}, + {"<INSTALLDIR>\\s-mart-1.0.15.zip", "<INSTALLDIR>\\s-mart-1.0.15.zip"} + }; + + + public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) { + logArea = ta; + selectedPrograms = b; + installDirectoryName = s; + architecture = a; + } + + + @Override + public Boolean doInBackground() { + boolean installOk; + publish("Starting install\n"); + writeFiles(); + for (int i = 0; i < selectedPrograms.length; i++) { + if (selectedPrograms[i]) { + if (! install(i)) { + return Boolean.FALSE; + } + } + } + removeFiles(); + setEnvironmentVariables(); + publish("Ending install\n"); + return Boolean.TRUE; + } + + + @Override + protected void process(List<String> chunks) { + for (String chunk: chunks) { + logArea.append(chunk); + } + } + + + private boolean launch(String command) { + return realLaunch(new ProcessBuilder(command), command); + } + + private boolean launch(String[] command) { + return realLaunch(new ProcessBuilder(command), Arrays.toString(command)); + } + + private boolean realLaunch(ProcessBuilder pb, String command) { + BufferedReader outputReader; + pb = pb.redirectErrorStream(true); + Process process = null; + publish(" Starting command '" + command + "'\n"); + try { + process = pb.start(); + BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream()); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is); + outputReader = new BufferedReader(isr); + } + catch (Exception exception) { + publish(" !Process cannot be started (command is '" + command + "')!\n"); + exception.printStackTrace(); + return false; + } + if (outputReader == null) { + publish(" !Problem in the output of the command!\n"); + return false; + } + else { + publish(" Output is:\n"); + try { + publish(" ---\n"); + String line; + while ((line = outputReader.readLine()) != null) { + publish(" " + line + "\r\n"); + } + publish(" ---\n"); + } + catch (IOException e) { + publish(" !Cannot get the output of the command!\n"); + return false; + } + } + int exitValue = process.exitValue(); + if (exitValue != 0) { + publish(" !Problem during the execution of the command '" + command + "'!\n"); + return false; + } + publish(" Ending command '" + command + "'\n"); + return true; + } + + + private File lookForFile(String fileName, String[] putativePlaces) { + publish(" Looking for file " + fileName + "\n"); + for (String place: putativePlaces) { + File file = new File(place, fileName); + publish(" Look at " + file.getAbsolutePath() + "\n"); + if (file.exists()) { + publish(" Found it in expected place " + file.getAbsolutePath() + "\n"); + return file; + } + } + Stack<File> files = new Stack<File>(); + files.push(new File("\\")); + while (! files.empty()) { + File file = files.pop(); + for (File childFile: file.listFiles()) { + if (childFile.isDirectory()) { + files.push(childFile); + } + else { + if (fileName.compareToIgnoreCase(childFile.getName()) == 0) { + publish(" Found it in unexpected place " + childFile.getAbsolutePath() + "\n"); + return childFile; + } + } + } + } + publish(" !Cannot file file '" + fileName + "'!\n"); + return null; + } + + + private boolean writeFile(String fileName, String content) { + try { + FileWriter fw = new FileWriter(fileName); + BufferedWriter bw = new BufferedWriter(fw); + bw.write(content); + bw.close(); + fw.close(); + } + catch (Exception e) { + publish(" !Cannot write file '" + fileName + "'!\n"); + return false; + } + return true; + } + + + private boolean removeFile(String fileName) { + File file = new File(fileName); + if (file.exists()) { + if (! file.delete()) { + publish(" !Cannot delete file '" + file.getAbsolutePath() + "'!\n"); + return false; + } + } + return true; + } + + + private boolean writeFiles() { + for (String rPackage: rPackages) { + String fileName = installDirectoryName + File.separator + "install" + rPackage + ".R"; + String content = "install.packages(\"" + rPackage + "\", repos = \"http://cran.cict.fr\", dependencies = TRUE)\n"; + if (! writeFile(fileName, content)) { + publish(" !Cannot write file for R package '" + rPackage + "'!\n"); + return false; + } + } + return true; + } + + private boolean removeFiles() { + for (String rPackage: rPackages) { + File file = new File(installDirectoryName + File.separator + "install" + rPackage + ".R"); + if (! file.delete()) { + publish("!Cannot delete R install file for " + rPackage + "!\n"); + return false; + } + } + File file = new File(installDirectoryName + File.separator + "createUser.sql"); + if (! file.delete()) { + publish("!Cannot delete mySQL configuration file!\n"); + return false; + } + return true; + } + + private boolean install(int element) { + publish(" Starting install of " + programChoosers[element] + "\n"); + downloadPackage(element); + executeInstall(element); + uncompressPackage(element); + removePackage(element); + postProcess(element); + publish(" Ending install of " + programChoosers[element] + "\n"); + return true; + } + + + private String getLocalName(String remoteName) { + String localName = installDirectoryName + File.separator + (new File(remoteName)).getName(); + int position = localName.indexOf("?"); + if (position >= 0) { + localName = localName.substring(0, position); + } + return localName; + } + + + private boolean downloadPackage(int element) { + String fileName = packageAddresses[element][architecture]; + if (! "".equals(fileName)) { + publish(" Starting download of " + programChoosers[element] + "\n"); + try { + BufferedInputStream bis = new BufferedInputStream(new URL(fileName).openStream()); + FileOutputStream fos = new FileOutputStream(getLocalName(fileName)); + BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER); + byte[] data = new byte[BUFFER]; + int x = 0; + while((x = bis.read(data, 0, BUFFER)) >= 0) { + bos.write(data, 0, x); + } + bos.close(); + fos.close(); + bis.close(); + } + catch (IOException e) { + publish(" !Cannot download file '" + fileName + "'!\n"); + return false; + } + publish(" Ending download of " + programChoosers[element] + "\n"); + } + return true; + } + + + private String replaceSubstring(String line) { + if (line.contains("<INSTALLDIR>")) { + String protectedDirectory = installDirectoryName.replaceAll("\\\\", "\\\\\\\\"); + line = line.replaceAll("<INSTALLDIR>", protectedDirectory); + } + if (line.contains("<RFILE>")) { + String userName = System.getenv().get("USERNAME"); + String[] possibleRDirectories = {"C:\\Program Files\\R-2.11.0", "C:\\Documents and Settings\\" + userName + "\\Mes documents\\R\\R-2.11.0\\bin", "C:\\Documents and Settings\\" + userName + "\\My documents\\R\\R-2.11.0\\bin"}; + String rDirectory = lookForFile("'.exe", possibleRDirectories).getAbsolutePath(); + rDirectory = rDirectory.replaceAll("\\\\", "\\\\\\\\"); + line = line.replaceAll("<RFILE>", rDirectory); + } + return line; + } + + + private boolean executeInstall(int element) { + String commands = scriptLines[element][architecture]; + if (! "".equals(commands)) { + for (String command: commands.split(";")) { + command = replaceSubstring(command); + publish(" Starting command '" + command + "'\n"); + Process process = null; + try { + process = Runtime.getRuntime().exec(command); + } + catch (IOException e) { + publish(" !Cannot execute command '" + command + "'!\n"); + return false; + } + try { + process.waitFor(); + } + catch (InterruptedException e) { + publish(" !Cannot wait for the end of the command '" + command + "'!\n"); + return false; + } + int exitValue = process.exitValue(); + if (exitValue != 0) { + publish(" !Problem during the execution of the command '" + command + "'!\n"); + return false; + } + publish(" Ending command '" + command + "'\n"); + } + } + return true; + } + + + private boolean uncompressPackage(int element) { + String file = compressedFiles[element][architecture]; + if (! "".equals(file)) { + file = replaceSubstring(file); + publish(" Starting uncompressing file '" + file + "'\n"); + try { + FileInputStream fis = new FileInputStream(file); + BufferedInputStream bis = new BufferedInputStream(fis); + ZipInputStream zis = new ZipInputStream(bis); + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (! entry.isDirectory()) { + File newFile = new File(installDirectoryName + File.separator + entry.getName()); + // create parent directories + File upDirectory = newFile.getParentFile(); + while (upDirectory != null){ + if (! upDirectory.exists()) { + upDirectory.mkdir(); + publish(" Creating directory '" + upDirectory.getAbsolutePath() + "'\n"); + } + upDirectory = upDirectory.getParentFile(); + } + // write the files to the disk + publish(" Extracting '" + entry.getName() + "' to '" + newFile.getAbsolutePath() + "'\n"); + int count; + byte data[] = new byte[BUFFER]; + FileOutputStream fos = new FileOutputStream(newFile); + BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER); + while ((count = zis.read(data, 0, BUFFER)) != -1){ + bos.write(data, 0, count); + } + bos.flush(); + bos.close(); + fos.close(); + } + } + zis.close(); + bis.close(); + fis.close(); + } + catch(FileNotFoundException e) { + publish(" !Cannot find file '" + file + "'!\n"); + return false; + } + catch(Exception e){ + publish(" !Cannot uncompress file '" + file + "'!\n"); + return false; + } + publish(" Ending uncompressing file '" + file + "'\n"); + } + return true; + } + + + private boolean removePackage(int element) { + String packageName = packageAddresses[element][architecture]; + if ("".equals(packageName)) { + return true; + } + String fileName = getLocalName(packageAddresses[element][architecture]); + return removeFile(fileName); + } + + + private boolean postProcess(int element) { + switch (element) { + case 4: + // Move S-MART files to parent directory + File installDirectory = new File(installDirectoryName + File.separator + "S-Mart"); + for (File file: installDirectory.listFiles()) { + File destinationFile = new File(file.getParentFile().getParentFile(), file.getName()); + if (! file.renameTo(destinationFile)) { + publish(" !Cannot move '" + file.getAbsolutePath() + "' to '" + destinationFile.getAbsolutePath() + "'!\n"); + } + } + if (! installDirectory.delete()) { + publish(" !Cannot remove installation S-MART directory '" + installDirectory.getAbsolutePath() + "'!\n"); + } + } + return true; + } + + + private boolean setEnvironmentVariables() { + String[] command = {"REG", "ADD", "HKCU\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\"" + installDirectoryName + "\\Python\"", "/f"}; + return launch(command); + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/build.sh Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,5 @@ +#! /bin/sh + +rm -rf SmartInstaller.jar +javac *.java +jar cvfm SmartInstaller.jar manifest.txt *.class
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Installer/manifest.txt Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,3 @@ +Manifest-Version: 1.0 +Created-By: Matthias Zytnicki +Main-Class: SmartInstaller
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/LICENSE.txt Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,506 @@ + +CeCILL FREE SOFTWARE LICENSE AGREEMENT + + + Notice + +This Agreement is a Free Software license agreement that is the result +of discussions between its authors in order to ensure compliance with +the two main principles guiding its drafting: + + * firstly, compliance with the principles governing the distribution + of Free Software: access to source code, broad rights granted to + users, + * secondly, the election of a governing law, French law, with which + it is conformant, both as regards the law of torts and + intellectual property law, and the protection that it offers to + both authors and holders of the economic rights over software. + +The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre]) +license are: + +Commissariat à l'Energie Atomique - CEA, a public scientific, technical +and industrial research establishment, having its principal place of +business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France. + +Centre National de la Recherche Scientifique - CNRS, a public scientific +and technological establishment, having its principal place of business +at 3 rue Michel-Ange, 75794 Paris cedex 16, France. + +Institut National de Recherche en Informatique et en Automatique - +INRIA, a public scientific and technological establishment, having its +principal place of business at Domaine de Voluceau, Rocquencourt, BP +105, 78153 Le Chesnay cedex, France. + + + Preamble + +The purpose of this Free Software license agreement is to grant users +the right to modify and redistribute the software governed by this +license within the framework of an open source distribution model. + +The exercising of these rights is conditional upon certain obligations +for users so as to preserve this status for all subsequent redistributions. + +In consideration of access to the source code and the rights to copy, +modify and redistribute granted by the license, users are provided only +with a limited warranty and the software's author, the holder of the +economic rights, and the successive licensors only have limited liability. + +In this respect, the risks associated with loading, using, modifying +and/or developing or reproducing the software by the user are brought to +the user's attention, given its Free Software status, which may make it +complicated to use, with the result that its use is reserved for +developers and experienced professionals having in-depth computer +knowledge. Users are therefore encouraged to load and test the +suitability of the software as regards their requirements in conditions +enabling the security of their systems and/or data to be ensured and, +more generally, to use and operate it in the same conditions of +security. This Agreement may be freely reproduced and published, +provided it is not altered, and that no provisions are either added or +removed herefrom. + +This Agreement may apply to any or all software for which the holder of +the economic rights decides to submit the use thereof to its provisions. + + + Article 1 - DEFINITIONS + +For the purpose of this Agreement, when the following expressions +commence with a capital letter, they shall have the following meaning: + +Agreement: means this license agreement, and its possible subsequent +versions and annexes. + +Software: means the software in its Object Code and/or Source Code form +and, where applicable, its documentation, "as is" when the Licensee +accepts the Agreement. + +Initial Software: means the Software in its Source Code and possibly its +Object Code form and, where applicable, its documentation, "as is" when +it is first distributed under the terms and conditions of the Agreement. + +Modified Software: means the Software modified by at least one +Contribution. + +Source Code: means all the Software's instructions and program lines to +which access is required so as to modify the Software. + +Object Code: means the binary files originating from the compilation of +the Source Code. + +Holder: means the holder(s) of the economic rights over the Initial +Software. + +Licensee: means the Software user(s) having accepted the Agreement. + +Contributor: means a Licensee having made at least one Contribution. + +Licensor: means the Holder, or any other individual or legal entity, who +distributes the Software under the Agreement. + +Contribution: means any or all modifications, corrections, translations, +adaptations and/or new functions integrated into the Software by any or +all Contributors, as well as any or all Internal Modules. + +Module: means a set of sources files including their documentation that +enables supplementary functions or services in addition to those offered +by the Software. + +External Module: means any or all Modules, not derived from the +Software, so that this Module and the Software run in separate address +spaces, with one calling the other when they are run. + +Internal Module: means any or all Module, connected to the Software so +that they both execute in the same address space. + +GNU GPL: means the GNU General Public License version 2 or any +subsequent version, as published by the Free Software Foundation Inc. + +Parties: mean both the Licensee and the Licensor. + +These expressions may be used both in singular and plural form. + + + Article 2 - PURPOSE + +The purpose of the Agreement is the grant by the Licensor to the +Licensee of a non-exclusive, transferable and worldwide license for the +Software as set forth in Article 5 hereinafter for the whole term of the +protection granted by the rights over said Software. + + + Article 3 - ACCEPTANCE + +3.1 The Licensee shall be deemed as having accepted the terms and +conditions of this Agreement upon the occurrence of the first of the +following events: + + * (i) loading the Software by any or all means, notably, by + downloading from a remote server, or by loading from a physical + medium; + * (ii) the first time the Licensee exercises any of the rights + granted hereunder. + +3.2 One copy of the Agreement, containing a notice relating to the +characteristics of the Software, to the limited warranty, and to the +fact that its use is restricted to experienced users has been provided +to the Licensee prior to its acceptance as set forth in Article 3.1 +hereinabove, and the Licensee hereby acknowledges that it has read and +understood it. + + + Article 4 - EFFECTIVE DATE AND TERM + + + 4.1 EFFECTIVE DATE + +The Agreement shall become effective on the date when it is accepted by +the Licensee as set forth in Article 3.1. + + + 4.2 TERM + +The Agreement shall remain in force for the entire legal term of +protection of the economic rights over the Software. + + + Article 5 - SCOPE OF RIGHTS GRANTED + +The Licensor hereby grants to the Licensee, who accepts, the following +rights over the Software for any or all use, and for the term of the +Agreement, on the basis of the terms and conditions set forth hereinafter. + +Besides, if the Licensor owns or comes to own one or more patents +protecting all or part of the functions of the Software or of its +components, the Licensor undertakes not to enforce the rights granted by +these patents against successive Licensees using, exploiting or +modifying the Software. If these patents are transferred, the Licensor +undertakes to have the transferees subscribe to the obligations set +forth in this paragraph. + + + 5.1 RIGHT OF USE + +The Licensee is authorized to use the Software, without any limitation +as to its fields of application, with it being hereinafter specified +that this comprises: + + 1. permanent or temporary reproduction of all or part of the Software + by any or all means and in any or all form. + + 2. loading, displaying, running, or storing the Software on any or + all medium. + + 3. entitlement to observe, study or test its operation so as to + determine the ideas and principles behind any or all constituent + elements of said Software. This shall apply when the Licensee + carries out any or all loading, displaying, running, transmission + or storage operation as regards the Software, that it is entitled + to carry out hereunder. + + + 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS + +The right to make Contributions includes the right to translate, adapt, +arrange, or make any or all modifications to the Software, and the right +to reproduce the resulting software. + +The Licensee is authorized to make any or all Contributions to the +Software provided that it includes an explicit notice that it is the +author of said Contribution and indicates the date of the creation thereof. + + + 5.3 RIGHT OF DISTRIBUTION + +In particular, the right of distribution includes the right to publish, +transmit and communicate the Software to the general public on any or +all medium, and by any or all means, and the right to market, either in +consideration of a fee, or free of charge, one or more copies of the +Software by any means. + +The Licensee is further authorized to distribute copies of the modified +or unmodified Software to third parties according to the terms and +conditions set forth hereinafter. + + + 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION + +The Licensee is authorized to distribute true copies of the Software in +Source Code or Object Code form, provided that said distribution +complies with all the provisions of the Agreement and is accompanied by: + + 1. a copy of the Agreement, + + 2. a notice relating to the limitation of both the Licensor's + warranty and liability as set forth in Articles 8 and 9, + +and that, in the event that only the Object Code of the Software is +redistributed, the Licensee allows future Licensees unhindered access to +the full Source Code of the Software by indicating how to access it, it +being understood that the additional cost of acquiring the Source Code +shall not exceed the cost of transferring the data. + + + 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE + +When the Licensee makes a Contribution to the Software, the terms and +conditions for the distribution of the resulting Modified Software +become subject to all the provisions of this Agreement. + +The Licensee is authorized to distribute the Modified Software, in +source code or object code form, provided that said distribution +complies with all the provisions of the Agreement and is accompanied by: + + 1. a copy of the Agreement, + + 2. a notice relating to the limitation of both the Licensor's + warranty and liability as set forth in Articles 8 and 9, + +and that, in the event that only the object code of the Modified +Software is redistributed, the Licensee allows future Licensees +unhindered access to the full source code of the Modified Software by +indicating how to access it, it being understood that the additional +cost of acquiring the source code shall not exceed the cost of +transferring the data. + + + 5.3.3 DISTRIBUTION OF EXTERNAL MODULES + +When the Licensee has developed an External Module, the terms and +conditions of this Agreement do not apply to said External Module, that +may be distributed under a separate license agreement. + + + 5.3.4 COMPATIBILITY WITH THE GNU GPL + +The Licensee can include a code that is subject to the provisions of one +of the versions of the GNU GPL in the Modified or unmodified Software, +and distribute that entire code under the terms of the same version of +the GNU GPL. + +The Licensee can include the Modified or unmodified Software in a code +that is subject to the provisions of one of the versions of the GNU GPL, +and distribute that entire code under the terms of the same version of +the GNU GPL. + + + Article 6 - INTELLECTUAL PROPERTY + + + 6.1 OVER THE INITIAL SOFTWARE + +The Holder owns the economic rights over the Initial Software. Any or +all use of the Initial Software is subject to compliance with the terms +and conditions under which the Holder has elected to distribute its work +and no one shall be entitled to modify the terms and conditions for the +distribution of said Initial Software. + +The Holder undertakes that the Initial Software will remain ruled at +least by this Agreement, for the duration set forth in Article 4.2. + + + 6.2 OVER THE CONTRIBUTIONS + +The Licensee who develops a Contribution is the owner of the +intellectual property rights over this Contribution as defined by +applicable law. + + + 6.3 OVER THE EXTERNAL MODULES + +The Licensee who develops an External Module is the owner of the +intellectual property rights over this External Module as defined by +applicable law and is free to choose the type of agreement that shall +govern its distribution. + + + 6.4 JOINT PROVISIONS + +The Licensee expressly undertakes: + + 1. not to remove, or modify, in any manner, the intellectual property + notices attached to the Software; + + 2. to reproduce said notices, in an identical manner, in the copies + of the Software modified or not. + +The Licensee undertakes not to directly or indirectly infringe the +intellectual property rights of the Holder and/or Contributors on the +Software and to take, where applicable, vis-à-vis its staff, any and all +measures required to ensure respect of said intellectual property rights +of the Holder and/or Contributors. + + + Article 7 - RELATED SERVICES + +7.1 Under no circumstances shall the Agreement oblige the Licensor to +provide technical assistance or maintenance services for the Software. + +However, the Licensor is entitled to offer this type of services. The +terms and conditions of such technical assistance, and/or such +maintenance, shall be set forth in a separate instrument. Only the +Licensor offering said maintenance and/or technical assistance services +shall incur liability therefor. + +7.2 Similarly, any Licensor is entitled to offer to its licensees, under +its sole responsibility, a warranty, that shall only be binding upon +itself, for the redistribution of the Software and/or the Modified +Software, under terms and conditions that it is free to decide. Said +warranty, and the financial terms and conditions of its application, +shall be subject of a separate instrument executed between the Licensor +and the Licensee. + + + Article 8 - LIABILITY + +8.1 Subject to the provisions of Article 8.2, the Licensee shall be +entitled to claim compensation for any direct loss it may have suffered +from the Software as a result of a fault on the part of the relevant +Licensor, subject to providing evidence thereof. + +8.2 The Licensor's liability is limited to the commitments made under +this Agreement and shall not be incurred as a result of in particular: +(i) loss due the Licensee's total or partial failure to fulfill its +obligations, (ii) direct or consequential loss that is suffered by the +Licensee due to the use or performance of the Software, and (iii) more +generally, any consequential loss. In particular the Parties expressly +agree that any or all pecuniary or business loss (i.e. loss of data, +loss of profits, operating loss, loss of customers or orders, +opportunity cost, any disturbance to business activities) or any or all +legal proceedings instituted against the Licensee by a third party, +shall constitute consequential loss and shall not provide entitlement to +any or all compensation from the Licensor. + + + Article 9 - WARRANTY + +9.1 The Licensee acknowledges that the scientific and technical +state-of-the-art when the Software was distributed did not enable all +possible uses to be tested and verified, nor for the presence of +possible defects to be detected. In this respect, the Licensee's +attention has been drawn to the risks associated with loading, using, +modifying and/or developing and reproducing the Software which are +reserved for experienced users. + +The Licensee shall be responsible for verifying, by any or all means, +the suitability of the product for its requirements, its good working +order, and for ensuring that it shall not cause damage to either persons +or properties. + +9.2 The Licensor hereby represents, in good faith, that it is entitled +to grant all the rights over the Software (including in particular the +rights set forth in Article 5). + +9.3 The Licensee acknowledges that the Software is supplied "as is" by +the Licensor without any other express or tacit warranty, other than +that provided for in Article 9.2 and, in particular, without any warranty +as to its commercial value, its secured, safe, innovative or relevant +nature. + +Specifically, the Licensor does not warrant that the Software is free +from any error, that it will operate without interruption, that it will +be compatible with the Licensee's own equipment and software +configuration, nor that it will meet the Licensee's requirements. + +9.4 The Licensor does not either expressly or tacitly warrant that the +Software does not infringe any third party intellectual property right +relating to a patent, software or any other property right. Therefore, +the Licensor disclaims any and all liability towards the Licensee +arising out of any or all proceedings for infringement that may be +instituted in respect of the use, modification and redistribution of the +Software. Nevertheless, should such proceedings be instituted against +the Licensee, the Licensor shall provide it with technical and legal +assistance for its defense. Such technical and legal assistance shall be +decided on a case-by-case basis between the relevant Licensor and the +Licensee pursuant to a memorandum of understanding. The Licensor +disclaims any and all liability as regards the Licensee's use of the +name of the Software. No warranty is given as regards the existence of +prior rights over the name of the Software or as regards the existence +of a trademark. + + + Article 10 - TERMINATION + +10.1 In the event of a breach by the Licensee of its obligations +hereunder, the Licensor may automatically terminate this Agreement +thirty (30) days after notice has been sent to the Licensee and has +remained ineffective. + +10.2 A Licensee whose Agreement is terminated shall no longer be +authorized to use, modify or distribute the Software. However, any +licenses that it may have granted prior to termination of the Agreement +shall remain valid subject to their having been granted in compliance +with the terms and conditions hereof. + + + Article 11 - MISCELLANEOUS + + + 11.1 EXCUSABLE EVENTS + +Neither Party shall be liable for any or all delay, or failure to +perform the Agreement, that may be attributable to an event of force +majeure, an act of God or an outside cause, such as defective +functioning or interruptions of the electricity or telecommunications +networks, network paralysis following a virus attack, intervention by +government authorities, natural disasters, water damage, earthquakes, +fire, explosions, strikes and labor unrest, war, etc. + +11.2 Any failure by either Party, on one or more occasions, to invoke +one or more of the provisions hereof, shall under no circumstances be +interpreted as being a waiver by the interested Party of its right to +invoke said provision(s) subsequently. + +11.3 The Agreement cancels and replaces any or all previous agreements, +whether written or oral, between the Parties and having the same +purpose, and constitutes the entirety of the agreement between said +Parties concerning said purpose. No supplement or modification to the +terms and conditions hereof shall be effective as between the Parties +unless it is made in writing and signed by their duly authorized +representatives. + +11.4 In the event that one or more of the provisions hereof were to +conflict with a current or future applicable act or legislative text, +said act or legislative text shall prevail, and the Parties shall make +the necessary amendments so as to comply with said act or legislative +text. All other provisions shall remain effective. Similarly, invalidity +of a provision of the Agreement, for any reason whatsoever, shall not +cause the Agreement as a whole to be invalid. + + + 11.5 LANGUAGE + +The Agreement is drafted in both French and English and both versions +are deemed authentic. + + + Article 12 - NEW VERSIONS OF THE AGREEMENT + +12.1 Any person is authorized to duplicate and distribute copies of this +Agreement. + +12.2 So as to ensure coherence, the wording of this Agreement is +protected and may only be modified by the authors of the License, who +reserve the right to periodically publish updates or new versions of the +Agreement, each with a separate number. These subsequent versions may +address new issues encountered by Free Software. + +12.3 Any Software distributed under a given version of the Agreement may +only be subsequently distributed under the same version of the Agreement +or a subsequent version, subject to the provisions of Article 5.3.4. + + + Article 13 - GOVERNING LAW AND JURISDICTION + +13.1 The Agreement is governed by French law. The Parties agree to +endeavor to seek an amicable solution to any disagreements or disputes +that may arise during the performance of the Agreement. + +13.2 Failing an amicable solution within two (2) months as from their +occurrence, and unless emergency proceedings are necessary, the +disagreements or disputes shall be referred to the Paris Courts having +jurisdiction, by the more diligent Party. + + +Version 2.0 dated 2006-09-05.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Program.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,175 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; +import java.awt.*; +import javax.swing.*; + + +public class Program { + String shortName; + String name; + String section; + String description; + Vector <ProgramOption> options; + JPanel panel; + JButton button; + + + public Program() { + this.shortName = null; + this.name = null; + this.options = new Vector <ProgramOption> (); + } + + + public void setShortName(String shortName) { + this.shortName = shortName; + } + + + public void setName(String name) { + this.name = name; + } + + + public void setSection(String section) { + this.section = section; + } + + public void setDescription(String description) { + this.description = description; + } + + + public void addOption(ProgramOption option) { + options.add(option); + } + + + public String getShortName() { + return this.shortName; + } + + + public String getName() { + return this.name; + } + + + public String getSection() { + return this.section; + } + + public String getDescription() { + return this.description; + } + + + public String checkValues() { + for (int i = 0; i < options.size(); i++) { + String comment = options.get(i).checkValue(); + if (comment != null) { + return comment; + } + } + return null; + } + + + public LinkedList<String> getCommand() { + LinkedList<String> parameterList = new LinkedList<String>(); + parameterList.add(Global.pythonCommand); + parameterList.add("Python" + java.io.File.separator + this.shortName); + for (int i = 0; i < options.size(); i++) { + ProgramOption option = options.get(i); + parameterList.addAll(option.getCommand()); + } + return parameterList; + } + + + public JPanel getPanel() { + if (this.panel != null) { + return this.panel; + } + + this.panel = new JPanel(false); + this.panel.setLayout(new FlowLayout()); + Box box = Box.createVerticalBox(); + + JPanel descriptionPanel = new JPanel(false); + JLabel descriptionLabel = new JLabel(this.description); + descriptionPanel.add(descriptionLabel); + box.add(descriptionPanel); + + for (int i = 0; i < options.size(); i++) { + ProgramOption option = options.get(i); + JPanel panel = option.getPanel(); + if (panel == null) { + System.out.println("Problem with Python program '" + this.shortName + "'."); + return null; + } + box.add(option.getPanel()); + } + + JPanel buttonPanel = new JPanel(false); + this.button = new JButton("GO!"); + + buttonPanel.add(button); + + box.add(buttonPanel); + + this.panel.add(box); + + return this.panel; + } + + + public JButton getButton() { + if (this.button == null) { + this.getPanel(); + } + return this.button; + } + + + public Vector < File > getOutputFiles() { + Vector < File > files = new Vector < File > (); + for (int i = 0; i < options.size(); i++) { + ProgramOption option = options.get(i); + if (! option.isInput()) { + files.add(option.getOutputFile()); + } + } + return files; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/ProgramFileReader.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,174 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; +import java.io.File; +import java.io.*; + + +public class ProgramFileReader { + String fileName; + Vector <Program> programs; + + + public ProgramFileReader(String fileName) { + this.fileName = fileName; + this.programs = new Vector <Program> (); + } + + + public boolean read() { +// File file = new File(this.fileName); +// Program program = null; +// int step = 0; +// TreeMap <String, ProgramOption> options = new TreeMap <String, ProgramOption> (); + +// try { +// BufferedReader reader = new BufferedReader(new FileReader(file)); +// String line = null; +// String section = null; + +// while ((line = reader.readLine()) != null) { + +// line = line.trim(); + +// if (line.length() == 0) { +// if (program != null) { +// programs.add(program); +// } +// program = null; +// step = 0; +// continue; +// } + +// if ((line.charAt(0) == '[') && (line.charAt(line.length() - 1) == ']')) { +// section = line.substring(1, line.length() - 1).trim(); +// continue; +// } +// switch (step) { +// case 0: +// program = new Program(); +// program.setName(line); +// if (section == null) { +// System.out.println("Error! Section of program '" + line + "' is not set!"); +// } +// program.setSection(section); +// step = 1; +// break; +// case 1: +// program.setShortName(line); +// step = 2; +// break; +// case 2: +// ProgramOption option = new ProgramOption(); + +// String[] elements = line.split(":"); +// boolean input = elements[0].trim().equalsIgnoreCase("input")? true: false; +// String[] subElements = elements[1].split(";"); +// String identifier = subElements[0].trim(); + +// option.setInput(input); + +// if (input) { + +// if (subElements.length < 4) { +// System.out.println("Line '" + line + "' is weird..."); +// } + +// String type = subElements[1].trim(); +// String comment = subElements[2].trim(); +// boolean compulsory = subElements[3].trim().equalsIgnoreCase("0")? false: true; + +// option.setIdentifier(identifier); +// option.setType(type); +// option.setComment(comment); +// option.setCompulsory(compulsory); + +// if ("file".compareToIgnoreCase(type) == 0) { +// if (subElements.length < 5) { +// System.out.println("Line '" + line + "' is weird..."); +// } + +// String formatIdentifier = subElements[4].trim(); +// option.setFormatIdentifier(formatIdentifier); +// } +// else if ("choice".compareToIgnoreCase(type) == 0) { +// if (subElements.length < 5) { +// System.out.println("Line '" + line + "' is weird..."); +// } + +// String[] choices = subElements[4].trim().split(","); +// for (int i = 0; i < choices.length; i++) { +// choices[i] = choices[i].trim(); +// } +// option.setChoices(choices); +// } +// options.put(identifier, option); +// } +// else { +// String format = subElements[1].trim(); + +// option.setFormat(format); +// option.setAssociatedOption(options.get(identifier)); +// } + +// program.addOption(option); + +// break; +// default: +// return false; +// } +// } + +// reader.close(); +// } +// catch (FileNotFoundException e) { +// return false; +// } +// catch (IOException e) { +// return false; +// } + +// if (program != null) { +// programs.add(program); +// } + + return true; + } + + public int getNbPrograms() { + return programs.size(); + } + + public Program getProgram(int i) { + return programs.get(i); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/ProgramLauncher.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,209 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; +import java.io.*; +import javax.swing.SwingUtilities; +import javax.swing.*; +import java.util.concurrent.CountDownLatch; + +public class ProgramLauncher extends SwingWorker<Boolean, String> { + + String[] command; + JTextArea logArea; + JLabel messageField; + JProgressBar progressBar; + JLabel etaField; + int exitValue; + CountDownLatch latch; + + + + public ProgramLauncher (LinkedList <String> c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) { + command = new String[c.size()]; + logArea = la; + messageField = mf; + progressBar = pb; + etaField = ef; + exitValue = -1; + c.toArray(command); + latch = new CountDownLatch(1); + } + + + public ProgramLauncher (String[] c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) { + command = c; + logArea = la; + messageField = mf; + progressBar = pb; + etaField = ef; + exitValue = -1; + latch = new CountDownLatch(1); + } + + + @Override + public Boolean doInBackground() { + ProcessBuilder pb = new ProcessBuilder(command); + Process process = null; + BufferedReader outputReader = null; + pb = pb.redirectErrorStream(true); + Map<String, String> env = pb.environment(); + env.put("PYTHONPATH", System.getProperty("user.dir")); + env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "SMART" + java.io.File.separator + "Java" + java.io.File.separator + "Python"); + env.put("SMARTMYSQLPATH", Global.mysqlCommand); + env.put("SMARTRPATH", Global.rCommand); + String commandJoined = Arrays.toString(command); + + try { + publish("=== Starting command '" + commandJoined.trim() + "' ===\n"); + process = pb.start(); + + BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream()); + InputStream is = process.getInputStream(); + InputStreamReader isr = new InputStreamReader(is); + outputReader = new BufferedReader(isr); + } + catch (Exception exception) { + publish("!Process cannot be started (command is '" + commandJoined + "')!\n"); + exception.printStackTrace(); + latch.countDown(); + return Boolean.FALSE; + } + if (outputReader == null) { + publish("!Problem in the output of the command!\n"); + latch.countDown(); + return Boolean.FALSE; + } + else { + try { + String line; + while ((line = outputReader.readLine()) != null) { + publish(line + "\n"); + } + } + catch (IOException e) { + e.printStackTrace(); + publish("!Cannot get the output of the command!\n"); + latch.countDown(); + return Boolean.FALSE; + } + } + try { + process.waitFor(); + } + catch (InterruptedException e) { + e.printStackTrace(); + publish("!Cannot wait for the end of the command!\n"); + latch.countDown(); + return Boolean.FALSE; + } + try { + exitValue = process.exitValue(); + } + catch (IllegalThreadStateException e) { + e.printStackTrace(); + publish("!Cannot get the exit value of the command!\n"); + latch.countDown(); + return Boolean.FALSE; + } + if (exitValue != 0) { + publish("!Problem during the execution of the command '" + commandJoined + "'!\n"); + latch.countDown(); + return Boolean.FALSE; + } + publish("=== Ending command '" + commandJoined.trim() + "' ===\n"); + latch.countDown(); + return Boolean.TRUE; + } + + + @Override + protected void process(List<String> chunks) { + String message = ""; + String text = logArea.getText(); + for (String chunk: chunks) { + text += chunk; + } + for (String lineSeparatedByCarriageReturn: text.split("\n")) { + for (String line: lineSeparatedByCarriageReturn.split("\r")) { + boolean progressLine = false; + if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*")) { + String[] ratioElements = line.split("\\]")[1].trim().split("/"); + int current = Integer.parseInt(ratioElements[0].trim()); + int aim = Integer.parseInt(ratioElements[1].trim()); + messageField.setText(line.split("\\[")[0].trim()); + progressBar.setValue(current * 100 / aim); + etaField.setText(""); + progressLine = true; + } + else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*ETA:\\s*.*")) { + String[] ratioElements = line.split("\\]")[1].split("E")[0].trim().split("/"); + int current = Integer.parseInt(ratioElements[0].trim()); + int aim = Integer.parseInt(ratioElements[1].trim()); + String eta = line.split("ETA:")[1].trim(); + messageField.setText(line.split("\\[")[0].trim()); + progressBar.setValue(current * 100 / aim); + etaField.setText("ETA: " + eta); + progressLine = true; + } + else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*\\s*completed in.*")) { + String nbElements = line.split("\\]")[1].split("completed")[0].trim(); + String timeSpent = line.split("completed in")[1].trim(); + message += line.split("\\[")[0].trim() + ": " + nbElements + " elements completed in " + timeSpent + "\n"; + messageField.setText(line.split("\\[")[0].trim()); + progressLine = true; + } + if (! progressLine) { + message += line + "\n"; + } + } + } + String lines[] = message.split("\n"); + String toBeWritten = ""; + for (int i = Math.max(0, lines.length - Global.logAreaSize); i < lines.length; i++) { + toBeWritten += lines[i] + "\n"; + } + logArea.setText(toBeWritten); + } + + public int getExitValue() { + try { + latch.await(); + } + catch (InterruptedException e) { + logArea.append("Cannot wait for the end of the process!\n"); + e.printStackTrace(); + return -1; + } + return exitValue; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/ProgramOption.java Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,358 @@ +/** + * + * Copyright INRA-URGI 2009-2010 + * + * This software is governed by the CeCILL license under French law and + * abiding by the rules of distribution of free software. You can use, + * modify and/ or redistribute the software under the terms of the CeCILL + * license as circulated by CEA, CNRS and INRIA at the following URL + * "http://www.cecill.info". + * + * As a counterpart to the access to the source code and rights to copy, + * modify and redistribute granted by the license, users are provided only + * with a limited warranty and the software's author, the holder of the + * economic rights, and the successive licensors have only limited + * liability. + * + * In this respect, the user's attention is drawn to the risks associated + * with loading, using, modifying and/or developing or reproducing the + * software by the user in light of its specific status of free software, + * that may mean that it is complicated to manipulate, and that also + * therefore means that it is reserved for developers and experienced + * professionals having in-depth computer knowledge. Users are therefore + * encouraged to load and test the software's suitability as regards their + * requirements in conditions enabling the security of their systems and/or + * data to be ensured and, more generally, to use and operate it in the + * same conditions as regards security. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL license and that you accept its terms. + * + */ +import java.util.*; +import java.awt.*; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.*; +import javax.swing.*; +import javax.swing.filechooser.*; +import javax.swing.border.*; +import javax.swing.SwingUtilities; + + +public class ProgramOption { + boolean input; + String identifier; + String type; + String comment; + boolean compulsory; + String[] format; + String formatIdentifier; + ProgramOption associatedOption; + String defaultValue; + String[] choices; + JComponent component; + JPanel panel; + + + public ProgramOption() { + this.input = true; + this.identifier = null; + this.type = null; + this.comment = null; + this.compulsory = false; + this.format = null; + this.formatIdentifier = null; + this.associatedOption = null; + this.defaultValue = ""; + this.choices = null; + this.component = null; + this.panel = null; + } + + + public void setInput(boolean input) { + this.input = input; + } + + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + + public void setType(String type) { + this.type = type; + } + + + public void setComment(String comment) { + this.comment = comment; + } + + + public void setCompulsory(boolean compulsory) { + this.compulsory = compulsory; + } + + + public void setFormat(String[] format) { + this.format = format; + } + + + public void setFormat(String format) { + this.format = new String[1]; + this.format[0] = format; + } + + + public void setFormatIdentifier(String formatIdentifier) { + this.formatIdentifier = formatIdentifier; + } + + + public void setAssociatedOption(ProgramOption option) { + this.associatedOption = option; + } + + + public void setChoices(String[] choices) { + this.choices = new String[choices.length+1]; + this.choices[0] = "---"; + for (int i = 0; i < choices.length; i++) { + this.choices[i+1] = choices[i]; + } + } + + + public void setDefault(String defaultValue) { + this.defaultValue = defaultValue; + } + + + public boolean isInput() { + return this.input; + } + + + public boolean checkSettings() { + if (this.identifier == null) { + return false; + } + if (this.type == null) { + return false; + } + if (this.comment == null) { + return false; + } + if (this.comment == null) { + return false; + } + if (("choice".compareToIgnoreCase(this.type) == 0) && (this.choices == null)) { + return false; + } + return true; + } + + + public JPanel getPanel() { + if (this.panel != null) { + return this.panel; + } + String comment = this.comment; + if (this.compulsory) { + comment += " [*]"; + } + + GridLayout horizontalLayout = new GridLayout(1, 0); + this.panel = new JPanel(false); + this.panel.setLayout(horizontalLayout); + JLabel label = new JLabel(comment); + + if (this.type == null) { + System.out.println("Error! Option '" + this.identifier + "' is not set!"); + } + + if (("int".compareToIgnoreCase(this.type) == 0) || ("float".compareToIgnoreCase(this.type) == 0) || ("string".compareToIgnoreCase(this.type) == 0) || (("file".compareToIgnoreCase(this.type) == 0) && (!this.input))) { + this.component = new JTextField(); + if (this.defaultValue != null) { + ((JTextField) this.component).setText(this.defaultValue); + } + label.setLabelFor(this.component); + this.panel.add(label); + this.panel.add(this.component); + } + else if ("file".compareToIgnoreCase(this.type) == 0) { + this.component = new JComboBox(Global.fileNames); + label.setLabelFor(this.component); + this.panel.add(label); + this.panel.add(this.component); + } + else if ("boolean".compareToIgnoreCase(this.type) == 0) { + this.component = new JCheckBox(); + if ((this.defaultValue != null) && (this.defaultValue.compareToIgnoreCase("true") == 0)) { + ((JCheckBox) this.component).setSelected(true); + } + label.setLabelFor(this.component); + this.panel.add(label); + this.panel.add(this.component); + } + else if ("format".compareToIgnoreCase(this.type) == 0) { + Vector < String > formats = new Vector < String > (); + for (String format: this.format) { + if (Global.formats.getFormats(format) == null) { + System.out.println("Do not know how to handle format '" + format + "'."); + } + formats.addAll(Global.formats.getFormats(format).getFormats()); + } + this.component = new JComboBox(formats); + label.setLabelFor(this.component); + this.panel.add(label); + this.panel.add(this.component); + } + else if ("files".compareToIgnoreCase(this.type) == 0) { + JButton button = new JButton("file..."); + this.component = new JTextField(); + label.setLabelFor(this.component); + this.panel.add(label); + this.panel.add(this.component); + this.panel.add(button); + Global.otherFileConcatenationChooser.put(button, (JTextField) this.component); + } + else if ("directory".compareToIgnoreCase(this.type) == 0) { + JButton button = new JButton("directory..."); + this.component = new JTextField(); + label.setLabelFor(this.component); + this.panel.add(label); + JPanel rightPanel = new JPanel(false); + rightPanel.setLayout(new BoxLayout(rightPanel, BoxLayout.LINE_AXIS)); + rightPanel.add(this.component); + rightPanel.add(button); + this.panel.add(rightPanel); + Global.otherDirectoriesChooser.put(button, (JTextField) this.component); + } + else if ("choice".compareToIgnoreCase(this.type) == 0) { + this.component = new JComboBox(this.choices); + label.setLabelFor(this.component); + this.panel.add(label); + this.panel.add(this.component); + } + else { + System.out.println("Do not know how to read type " + this.type); + } + + return this.panel; + } + + + public JComponent getComponent() { + if (component == null) { + this.getPanel(); + } + return this.component; + } + + + private String getValue() { + if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("directory".equals(this.type)) || ("files".equals(this.type))) { + String s = ((JTextField) this.component).getText(); + if ("None".equals(s)) { + return ""; + } + return s; + } + if ("file".equals(this.type)) { + return (String) ((JComboBox) this.component).getSelectedItem(); + } + if ("boolean".equals(this.type)) { + return ((JCheckBox) this.component).isSelected()? "true": "false"; + } + if ("format".equals(this.type)) { + return (String) ((JComboBox) this.component).getSelectedItem(); + } + if ("choice".equals(this.type)) { + String s = (String) ((JComboBox) this.component).getSelectedItem(); + if ("---".equals(s)) { + return ""; + } + return s; + } + System.out.println("Do not know how to get value of '" + this.type + "' (" + this.identifier + ")."); + return null; + } + + + public String checkValue() { + String value = this.getValue(); + if ((this.compulsory) && ((value == null) || ("".equals(value)))) { + return "Option '" + this.comment + "' has no value... Please specify it.\n"; + } + if ("int".equals(this.type)) { + if ((value != null) && (! "".equals(value)) && (! "None".equals(value))) { + try { + int i = Integer.parseInt(value); + } + catch (NumberFormatException e) { + return "Option '" + this.comment + "' should be an integer... Please correct it.\n"; + } + } + } + else if ("float".equals(this.type)) { + if ((value != null) && (! "".equals(value))) { + try { + float i = Float.parseFloat(value); + } + catch (NumberFormatException e) { + return "Option '" + this.comment + "' should be a float... Please correct it.\n"; + } + } + } + return null; + } + + + public LinkedList <String> getCommand() { + LinkedList <String> list = new LinkedList <String> (); + + if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("format".equals(this.type)) || ("directory".equals(this.type)) || ("files".equals(this.type)) || ("choice".equals(this.type))) { + String value = this.getValue(); + if (value.length() == 0) { + return list; + } + list.add(this.identifier); + list.add(value); + return list; + } + if ("file".equals(this.type)) { + String fileName = (String) ((JComboBox) this.component).getSelectedItem(); + if (fileName == null) { + return list; + } + list.add(this.identifier); + list.add(this.getValue()); + return list; + } + if (("boolean".equals(this.type)) || ("bool".equals(this.type))) { + if ("true".equals(this.getValue())) { + list.add(this.identifier); + } + return list; + } + System.out.println("Cannot get type of option " + this.type + " (" + this.identifier + "): " + this.getValue()); + return null; + } + + + public File getOutputFile() { + if (this.input) return null; + String format = ""; + if (this.format != null) { + format = this.format[0]; + } + if (this.associatedOption != null) { + format = this.associatedOption.getValue(); + } + return new File(this.getValue(), Global.formats.getFormatType(format), format); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/.gitignore Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,1 @@ +/CleanTranscriptFile.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/CleanTranscriptFile.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,74 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +from optparse import OptionParser +from SMART.Java.Python.cleaning.CleanerChooser import CleanerChooser + + +class CleanTranscriptFile(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.chooser = CleanerChooser(self.verbosity) + + def setInputFile(self, fileName, format): + self.chooser.findFormat(format) + self.cleaner = self.chooser.getCleaner() + self.cleaner.setInputFileName(fileName) + + def setOutputFile(self, fileName): + self.cleaner.setOutputFileName(fileName) + + def setAcceptedTypes(self, types): + if types != None: + self.cleaner.setAcceptedTypes(types) + + def run(self): + self.cleaner.clean() + + +if __name__ == "__main__": + + description = "Clean Transcript File v1.0.1: Clean a transcript file so that it is useable for S-MART. [Category: Other]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-t", "--types", dest="acceptedTypes", action="store", default=None, type="string", help="name of the types you want to keep in GFF/GTF (list separated by commas) [format: string] [default: None]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + ctf = CleanTranscriptFile(options.verbosity) + ctf.setInputFile(options.inputFileName, options.format) + ctf.setOutputFile(options.outputFileName) + ctf.setAcceptedTypes(None if options.acceptedTypes == None else options.acceptedTypes.split(",")) + ctf.run() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/ClusterizeByTags.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,157 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import random +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.TranscriptWriter import TranscriptWriter +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.misc.Progress import Progress +from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection +from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter + + +OPERATIONS = ("diff", "div") +BOOLTOSTRANDS = {True: [0], False: [-1, 1]} + +class ClusterizeByTags(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.connection = MySqlConnection(self.verbosity-1) + self.defautValue = None + self.maxDistance = None + self.oneStrand = False + + def setInputFile(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + parser = chooser.getParser(fileName) + writer = MySqlTranscriptWriter(self.connection, None, self.verbosity) + writer.addTranscriptList(parser) + writer.write() + self.transcriptTables = writer.getTables() + + def setOutputFile(self, fileName): + self.writer = TranscriptWriter(fileName, "gff3", self.verbosity) + + def setTag(self, tagName, defaultValue): + self.tagName = tagName + self.defaultValue = defaultValue + + def setThreshold(self, threshold): + self.threshold = threshold + + def setOperation(self, operation): + self.operation = operation + if self.operation not in OPERATIONS: + raise Exception("Operation '%s' unsupported: choose among %s" % (self.operation, ", ".join(OPERATIONS))) + + def setMaxDistance(self, distance): + self.maxDistance = distance + + def setOneStrand(self, oneStrand): + self.oneStrand = oneStrand + + def run(self): + for chromosome in sorted(self.transcriptTables.keys()): + progress = Progress(self.transcriptTables[chromosome].getNbElements(), "Analyzing %s" % (chromosome), self.verbosity) + for strand in BOOLTOSTRANDS[self.oneStrand]: + previousValue = None + previousTrend = None + previousTranscript = None + sumValue = 0 + command = "SELECT * FROM %s" % (self.transcriptTables[chromosome].getName()) + if not self.oneStrand: + command += " WHERE direction = %d" % (strand) + command += " ORDER BY start, end" + for index, transcript in self.transcriptTables[chromosome].selectTranscripts(command): + if self.tagName in transcript.getTagNames(): + value = transcript.getTagValue(self.tagName) + else: + value = self.defaultValue + if previousValue == None: + trend = None + else: + if self.operation == "diff": + trend = value - previousValue + else: + trend = value / previousValue + if previousTranscript == None: + sumValue = value + elif (previousTrend == None or abs(trend - previousTrend) <= self.threshold) and (self.maxDistance == None or previousTranscript.getDistance(transcript) <= self.maxDistance) and (previousTranscript.getDirection() == transcript.getDirection() or not self.oneStrand): + if previousTranscript.getDirection() != transcript.getDirection(): + transcript.reverse() + previousTranscript.merge(transcript) + transcript = previousTranscript + sumValue += value + previousTrend = trend + else: + previousTranscript.setTagValue(self.tagName, sumValue) + self.writer.addTranscript(previousTranscript) + sumValue = value + previousTrend = None + previousValue = value + previousTranscript = transcript + progress.inc() + if previousTranscript != None: + previousTranscript.setTagValue(self.tagName, sumValue) + self.writer.addTranscript(previousTranscript) + progress.done() + self.writer.close() + + +if __name__ == "__main__": + + description = "Clusterize By Tags v1.0.1: Clusterize a set of element using their tag values. [Category: Merge]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-t", "--tag", dest="tagName", action="store", type="string", help="name of the tag [format: string] [compulsory]") + parser.add_option("-e", "--default", dest="defaultValue", action="store", default=None, type="int", help="default value for the tag [format: string]") + parser.add_option("-r", "--threshold", dest="threshold", action="store", type="int", help="threshold between two consecutive tags [format: int] [compulsory]") + parser.add_option("-p", "--operation", dest="operation", action="store", type="string", help="operation to apply between 2 different clusters to compare them [format: choice (diff, div)] [compulsory]") + parser.add_option("-d", "--distance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance for 2 clusters to be merged [format: int] [default: None]") + parser.add_option("-1", "--oneStrand", dest="oneStrand", action="store_true", default=False, help="also cluster the elements which are on different strands [format: bool] [default: False]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + cbt = ClusterizeByTags(options.verbosity) + cbt.setInputFile(options.inputFileName, options.format) + cbt.setOutputFile(options.outputFileName) + cbt.setTag(option.tagName, option.defaultValue) + cbt.setThreshold(option.threshold) + cbt.setOperation(option.operation) + cbt.setMaxDistance(operation.maxDistance) + cbt.setOneStrand(operation.oneStrand) + cbt.run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/CollapseReads.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,174 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import os +from optparse import OptionParser, OptionGroup +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.Gff3Writer import Gff3Writer +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle +from SMART.Java.Python.ncList.FileSorter import FileSorter +from SMART.Java.Python.misc.Progress import Progress + + +class CollapseReads(object): + """ + Merge two reads if they have exactly the same genomic coordinates + """ + + def __init__(self, verbosity = 0): + self.verbosity = verbosity + self.inputReader = None + self.outputWriter = None + self.strands = True + self.nbRead = 0 + self.nbWritten = 0 + self.nbMerges = 0 + self.splittedFileNames = {} + + def __del__(self): + for fileName in self.splittedFileNames.values(): + os.remove(fileName) + + def close(self): + self.outputWriter.close() + + def setInputFile(self, fileName, format): + parserChooser = ParserChooser(self.verbosity) + parserChooser.findFormat(format, "transcript") + self.parser = parserChooser.getParser(fileName) + self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0]) + + def setOutputFile(self, fileName): + self.outputWriter = Gff3Writer(fileName, self.verbosity) + + def getNbElements(self): + return self.parser.getNbTranscripts() + + def _sortFile(self): + fs = FileSorter(self.parser, self.verbosity-4) + fs.perChromosome(True) + fs.setOutputFileName(self.sortedFileName) + fs.sort() + self.splittedFileNames = fs.getOutputFileNames() + self.nbElementsPerChromosome = fs.getNbElementsPerChromosome() + self.nbRead = fs.getNbElements() + + def _iterate(self, chromosome): + progress = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity) + transcripts = [] + parser = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity) + for newTranscript in parser.getIterator(): + newTranscripts = [] + for oldTranscript in transcripts: + if self._checkOverlap(newTranscript, oldTranscript): + self._merge(newTranscript, oldTranscript) + elif self._checkPassed(newTranscript, oldTranscript): + self._write(oldTranscript) + else: + newTranscripts.append(oldTranscript) + newTranscripts.append(newTranscript) + transcripts = newTranscripts + progress.inc() + for transcript in transcripts: + self._write(transcript) + progress.done() + + def _merge(self, transcript1, transcript2): + self.nbMerges += 1 + transcript2.setDirection(transcript1.getDirection()) + transcript1.merge(transcript2) + + def _write(self, transcript): + self.nbWritten += 1 + self.outputWriter.addTranscript(transcript) + + def _checkOverlap(self, transcript1, transcript2): + if transcript1.getStart() != transcript2.getStart() or transcript1.getEnd() != transcript2.getEnd(): + return False + return (not self.strands or transcript1.getDirection() == transcript2.getDirection()) + + def _checkPassed(self, transcript1, transcript2): + return (transcript2.getStart() < transcript1.getStart()) + + def collapseChromosome(self, chromosome): + progress = Progress(table.getNbElements(), "Analysing chromosome %s" % (chromosome), self.verbosity) + command = "SELECT * FROM %s ORDER BY start ASC, end DESC" % (table.name) + transcriptStart = None + transcriptEnd = None + transcriptDirection = None + currentTranscript = None + if self.strands: + command += ", direction" + for index, transcript in table.selectTranscripts(command, True): + self.nbRead += 1 + if not self.strands: + transcript.setDirection("+") + if transcriptStart != transcript.getStart() or transcriptEnd != transcript.getEnd() or transcriptDirection != transcript.getDirection(): + self.writeTranscript(currentTranscript) + transcriptStart = transcript.getStart() + transcriptEnd = transcript.getEnd() + transcriptDirection = transcript.getDirection() + currentTranscript = transcript + else: + currentTranscript.setTagValue("nbElements", (currentTranscript.getTagValue("nbElements") + 1) if "nbElements" in currentTranscript.getTagNames() else 1) + progress.inc() + self.writeTranscript(currentTranscript) + progress.done() + + def collapse(self): + self._sortFile() + for chromosome in sorted(self.nbElementsPerChromosome.keys()): + self._iterate(chromosome) + self.outputWriter.close() + if self.verbosity > 1: + print "# reads read: %d" % (self.nbRead) + print "# reads written: %d (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbRead * 100) + print "# reads merges: %d" % (self.nbMerges) + +if __name__ == "__main__": + + # parse command line + description = "Collapse Reads v1.0.3: Merge two reads if they have exactly the same genomic coordinates. [Category: Merge]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in mapping format given by -f]") + parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the file [compulsory] [format: mapping file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]") + parser.add_option("-s", "--strands", dest="strands", action="store_true", default=False, help="merge elements on 2 different strands [format: bool] [default: false]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]") + (options, args) = parser.parse_args() + + collapser = CollapseReads(options.verbosity) + collapser.setInputFile(options.inputFileName, options.format) + collapser.setOutputFile(options.outputFileName) + collapser.strands = not options.strands + collapser.collapse() + collapser.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/CombineTags.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,115 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import os +import random +from optparse import OptionParser +from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer +from SMART.Java.Python.misc.Progress import Progress +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.Gff3Writer import Gff3Writer + +OPERATIONS = ("plus", "minus", "times", "div") + +class CombineTags(object): + + def __init__(self, verbosity = 0): + self.verbosity = verbosity + + def setInputFile(self, fileName, format): + self.inputFileName = fileName + parserChooser = ParserChooser(self.verbosity) + parserChooser.findFormat(format, "transcript") + self.parser = parserChooser.getParser(fileName) + + def setOutputFile(self, fileName): + self.outputWriter = Gff3Writer(fileName, self.verbosity) + + def setTags(self, tag1, tag2, outputTag, defaultValue = None): + self.tag1 = tag1 + self.tag2 = tag2 + self.outputTag = outputTag + self.defaultValue = defaultValue + + def setOperation(self, operation): + self.operation = operation + if self.operation not in OPERATIONS: + raise Exception("Do no handle operation %s, only: %s" % (self.operation, ", ".join(OPERATIONS))) + + def run(self): + progress = Progress(self.parser.getNbTranscripts(), "Printing transcripts %s" % (self.inputFileName), self.verbosity) + for transcript in self.parser.getIterator(): + tag1 = transcript.getTagValue(self.tag1) + tag2 = transcript.getTagValue(self.tag2) + if tag1 == None or tag2 == None: + if self.defaultValue == None: + raise Exception("Transcript %s misses one of the tags %s and %s, and has no default value !" % (transcript, self.tag1, self.tag2)) + newTag = self.defaultValue + else: + tag1, tag2 = float(tag1), float(tag2) + if self.operation == "plus": + newTag = tag1 + tag2 + elif self.operation == "minus": + newTag = tag1 - tag2 + elif self.operation == "times": + newTag = tag1 * tag2 + elif self.operation == "div": + newTag = tag1 / tag2 + transcript.setTagValue(self.outputTag, newTag) + self.outputWriter.addTranscript(transcript) + progress.inc() + progress.done() + self.parser.close() + self.outputWriter.close() + + +if __name__ == "__main__": + + # parse command line + description = "Change Tag Name v1.0.1: Change the name of tag of a list of transcripts. [Category: Data Modification]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--inputFormat", dest="inputFormat", action="store", type="string", help="format of the input file [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]") + parser.add_option("-t", "--tag1", dest="tag1", action="store", type="string", help="name of the first tag [compulsory] [format: string]") + parser.add_option("-T", "--tag2", dest="tag2", action="store", type="string", help="name of the second tag [compulsory] [format: string]") + parser.add_option("-d", "--default", dest="defaultValue", action="store", default=None, type="string", help="default value when one of the tag is absent [compulsory] [format: float]") + parser.add_option("-n", "--new", dest="newTag", action="store", type="string", help="name of the new tag [compulsory] [format: string]") + parser.add_option("-p", "--operation", dest="operation", action="store", type="string", help="operation combining the tags [compulsory] [format: choice (plus, minus, times, div)]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int] [default: 1]") + (options, args) = parser.parse_args() + + combiner = CombineTags(options.verbosity) + combiner.setInputFile(options.inputFileName, options.inputFormat) + combiner.setOutputFile("%s.gff3" % (options.outputFileName)) + combiner.setTags(options.tag1, options.tag2, options.newTag, options.defaultValue) + combiner.setOperation(options.operation) + combiner.run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/CompareOverlapping.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,491 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import os, struct, time, random +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.Gff3Writer import Gff3Writer +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.ncList.NCList import NCList +from SMART.Java.Python.ncList.NCListCursor import NCListCursor +from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle +from SMART.Java.Python.ncList.NCListHandler import NCListHandler +from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList +from SMART.Java.Python.misc.Progress import Progress +from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress +from SMART.Java.Python.misc import Utils +try: + import cPickle as pickle +except: + import pickle + +REFERENCE = 0 +QUERY = 1 +TYPES = (REFERENCE, QUERY) +TYPETOSTRING = {0: "reference", 1: "query"} + +class CompareOverlapping(object): + + def __init__(self, verbosity = 1): + self._outputFileName = "outputOverlaps.gff3" + self._iWriter = None + self._nbOverlappingQueries = 0 + self._nbOverlaps = 0 + self._nbLines = {REFERENCE: 0, QUERY: 0} + self._verbosity = verbosity + self._ncLists = {} + self._cursors = {} + self._splittedFileNames = {} + self._nbElements = {} + self._nbElementsPerChromosome = {} + self._inputFileNames = {REFERENCE: None, QUERY: None} + self._inputFileFormats = {REFERENCE: None, QUERY: None} + self._starts = {REFERENCE: None, QUERY: None} + self._ends = {REFERENCE: None, QUERY: None} + self._fivePrimes = {REFERENCE: None, QUERY: None} + self._threePrimes = {REFERENCE: None, QUERY: None} + self._ncListHandlers = {REFERENCE: None, QUERY: None} + self._convertedFileNames = {REFERENCE: False, QUERY: False} + self._sorted = False + self._index = False + self._introns = False + self._antisense = False + self._colinear = False + self._invert = False + self._distance = 0 + self._minOverlap = 1 + self._pcOverlap = None + self._included = False + self._including = False + self._outputNotOverlapping = False + self._tmpRefFileName = None + self._currentQueryTranscript = None + self._currentOrQueryTranscript = None + self._currentExQueryTranscript = None + self._randInt = random.randint(0, 100000) + + def __del__(self): + for fileName in [self._tmpRefFileName] + self._convertedFileNames.values(): + if fileName != None and os.path.exists(fileName): + os.remove(fileName) + + def close(self): + self._iWriter.close() + + def setInput(self, fileName, format, type): + chooser = ParserChooser(self._verbosity) + chooser.findFormat(format) + self._inputFileNames[type] = fileName + self._inputFileFormats[type] = format + + def setOutput(self, outputFileName): + if outputFileName != '': + self._outputFileName = outputFileName + self._iWriter = Gff3Writer(self._outputFileName) + + def setSorted(self, sorted): + self._sorted = sorted + + def setIndex(self, index): + self._index = index + + def restrictToStart(self, distance, type): + self._starts[type] = distance + + def restrictToEnd(self, distance, type): + self._ends[type] = distance + + def extendFivePrime(self, distance, type): + self._fivePrimes[type] = distance + + def extendThreePrime(self, distance, type): + self._threePrimes[type] = distance + + def acceptIntrons(self, boolean): + self._introns = boolean + + def getAntisenseOnly(self, boolean): + self._antisense = boolean + + def getColinearOnly(self, boolean): + self._colinear = boolean + + def getInvert(self, boolean): + self._invert = boolean + + def setMaxDistance(self, distance): + self._distance = distance + + def setMinOverlap(self, overlap): + self._minOverlap = overlap + + def setPcOverlap(self, overlap): + self._pcOverlap = overlap + + def setIncludedOnly(self, boolean): + self._included = boolean + + def setIncludingOnly(self, boolean): + self._including = boolean + + def includeNotOverlapping(self, boolean): + self._outputNotOverlapping = boolean + + def transformTranscript(self, transcript, type): + if self._starts[type] != None: + transcript.restrictStart(self._starts[type]) + if self._ends[type] != None: + transcript.restrictEnd(self._ends[type]) + if self._fivePrimes[type] != None: + transcript.extendStart(self._fivePrimes[type]) + if self._threePrimes[type] != None: + transcript.extendEnd(self._threePrimes[type]) + if self._introns: + transcript.exons = [] + if type == REFERENCE and self._distance > 0: + transcript.extendExons(self._distance) + return transcript + + def extendQueryTranscript(self, transcript): + self._currentExQueryTranscript = Transcript() + self._currentExQueryTranscript.copy(transcript) + if self._fivePrimes[QUERY] != None: + self._currentExQueryTranscript.extendStart(self._fivePrimes[QUERY]) + if self._threePrimes[QUERY] != None: + self._currentExQueryTranscript.extendEnd(self._threePrimes[QUERY]) + transcript.exons = [] + + def createTmpRefFile(self): + self._tmpRefFileName = "tmp_ref_%d.pkl" % (self._randInt) + if "SMARTTMPPATH" in os.environ: + self._tmpRefFileName = os.path.join(os.environ["SMARTTMPPATH"], self._tmpRefFileName) + chooser = ParserChooser(self._verbosity) + chooser.findFormat(self._inputFileFormats[REFERENCE]) + parser = chooser.getParser(self._inputFileNames[REFERENCE]) + writer = NCListFilePickle(self._tmpRefFileName, self._verbosity) + for transcript in parser.getIterator(): + transcript = self.transformTranscript(transcript, REFERENCE) + writer.addTranscript(transcript) + writer.close() + self._inputFileNames[REFERENCE] = self._tmpRefFileName + self._inputFileFormats[REFERENCE] = "pkl" + + def createNCLists(self): + self._ncLists = dict([type, {}] for type in TYPES) + self._indices = dict([type, {}] for type in TYPES) + self._cursors = dict([type, {}] for type in TYPES) + for type in TYPES: + if self._verbosity > 2: + print "Creating %s NC-list..." % (TYPETOSTRING[type]) + self._convertedFileNames[type] = "%s_%d_%d.ncl" % (self._inputFileNames[type], self._randInt, type) + ncLists = ConvertToNCList(self._verbosity) + ncLists.setInputFileName(self._inputFileNames[type], self._inputFileFormats[type]) + ncLists.setOutputFileName(self._convertedFileNames[type]) + ncLists.setSorted(self._sorted) + if type == REFERENCE and self._index: + ncLists.setIndex(True) + ncLists.run() + self._ncListHandlers[type] = NCListHandler(self._verbosity) + self._ncListHandlers[type].setFileName(self._convertedFileNames[type]) + self._ncListHandlers[type].loadData() + self._nbLines[type] = self._ncListHandlers[type].getNbElements() + self._nbElementsPerChromosome[type] = self._ncListHandlers[type].getNbElementsPerChromosome() + self._ncLists[type] = self._ncListHandlers[type].getNCLists() + for chromosome, ncList in self._ncLists[type].iteritems(): + self._cursors[type][chromosome] = NCListCursor(None, ncList, 0, self._verbosity) + if type == REFERENCE and self._index: + self._indices[REFERENCE][chromosome] = ncList.getIndex() + if self._verbosity > 2: + print " ...done" + + def compare(self): + nbSkips, nbMoves = 0, 0 + previousChromosome = None + done = False + refNCList = None + queryNCList = None + startTime = time.time() + progress = Progress(len(self._ncLists[QUERY].keys()), "Checking overlap", self._verbosity) + for chromosome, queryNCList in self._ncLists[QUERY].iteritems(): + queryParser = self._ncListHandlers[QUERY].getParser(chromosome) + queryNCList = self._ncLists[QUERY][chromosome] + queryCursor = self._cursors[QUERY][chromosome] + if chromosome != previousChromosome: + skipChromosome = False + previousChromosome = chromosome + if chromosome not in self._ncLists[REFERENCE]: + if self._outputNotOverlapping: + while not queryCursor.isOut(): + self._currentQueryTranscript = queryCursor.getTranscript() + self._writeIntervalInNewGFF3({}) + if queryCursor.hasChildren(): + queryCursor.moveDown() + else: + queryCursor.moveNext() + progress.inc() + continue + refNCList = self._ncLists[REFERENCE][chromosome] + refCursor = self._cursors[REFERENCE][chromosome] + while True: + self._currentOrQueryTranscript = queryCursor.getTranscript() + self._currentQueryTranscript = Transcript() + self._currentQueryTranscript.copy(self._currentOrQueryTranscript) + self._currentQueryTranscript = self.transformTranscript(self._currentQueryTranscript, QUERY) + self.extendQueryTranscript(self._currentOrQueryTranscript) + newRefLaddr = self.checkIndex(refCursor) + if newRefLaddr != None: + nbMoves += 1 + refCursor.setLIndex(newRefLaddr) + done = False + refCursor, done, unmatched = self.findOverlapIter(refCursor, done) + if refCursor.isOut(): + if not self._invert and not self._outputNotOverlapping: + break + if (unmatched and not self._invert and not self._outputNotOverlapping) or not queryCursor.hasChildren(): + queryCursor.moveNext() + nbSkips += 1 + else: + queryCursor.moveDown() + if queryCursor.isOut(): + break + progress.inc() + progress.done() + endTime = time.time() + self._timeSpent = endTime - startTime + if self._verbosity >= 10: + print "# skips: %d" % (nbSkips) + print "# moves: %d" % (nbMoves) + + def findOverlapIter(self, cursor, done): + chromosome = self._currentQueryTranscript.getChromosome() + matched = False + if chromosome not in self._ncLists[REFERENCE]: + return None, False, True + ncList = self._ncLists[REFERENCE][chromosome] + overlappingNames = {} + nextDone = False + firstOverlapLAddr = NCListCursor(cursor) + firstOverlapLAddr.setLIndex(-1) + if cursor.isOut(): + self._writeIntervalInNewGFF3(overlappingNames) + return firstOverlapLAddr, False, True + parentCursor = NCListCursor(cursor) + parentCursor.moveUp() + firstParentAfter = False + while not parentCursor.isOut(): + if self.isOverlapping(parentCursor) == 0: + matched = True + if self._checkOverlap(parentCursor.getTranscript()): + overlappingNames.update(self._extractID(parentCursor.getTranscript())) + if firstOverlapLAddr.isOut(): + firstOverlapLAddr.copy(parentCursor) + nextDone = True + elif self.isOverlapping(parentCursor) == 1: + firstParentAfter = NCListCursor(parentCursor) + parentCursor.moveUp() + if firstParentAfter: + written = self._writeIntervalInNewGFF3(overlappingNames) + return firstParentAfter, False, not written if self._invert else not matched + #This loop finds the overlaps with currentRefLAddr.# + while True: + parentCursor = NCListCursor(cursor) + parentCursor.moveUp() + #In case: Query is on the right of the RefInterval and does not overlap. + overlap = self.isOverlapping(cursor) + if overlap == -1: + cursor.moveNext() + #In case: Query overlaps with RefInterval. + elif overlap == 0: + matched = True + if self._checkOverlap(cursor.getTranscript()): + overlappingNames.update(self._extractID(cursor.getTranscript())) + if firstOverlapLAddr.compare(parentCursor): + firstOverlapLAddr.copy(cursor) + nextDone = True + if done: + cursor.moveNext() + else: + if not cursor.hasChildren(): + cursor.moveNext() + if cursor.isOut(): + break + else: + cursor.moveDown() + #In case: Query is on the left of the RefInterval and does not overlap. + else: + if firstOverlapLAddr.isOut() or firstOverlapLAddr.compare(parentCursor): + firstOverlapLAddr.copy(cursor) + nextDone = False # new + break + + done = False + if cursor.isOut(): + break + written = self._writeIntervalInNewGFF3(overlappingNames) + return firstOverlapLAddr, nextDone, not written if self._invert else not matched + + def isOverlapping(self, refTranscript): + if (self._currentExQueryTranscript.getStart() <= refTranscript.getEnd() and self._currentExQueryTranscript.getEnd() >= refTranscript.getStart()): + return 0 + if self._currentExQueryTranscript.getEnd() < refTranscript.getStart(): + return 1 + return -1 + + def checkIndex(self, cursor): + if not self._index: + return None + if cursor.isOut(): + return None + chromosome = self._currentExQueryTranscript.getChromosome() + nextLIndex = self._indices[REFERENCE][chromosome].getIndex(self._currentExQueryTranscript) + if nextLIndex == None: + return None + ncList = self._ncLists[REFERENCE][chromosome] + nextGffAddress = ncList.getRefGffAddr(nextLIndex) + thisGffAddress = cursor.getGffAddress() + if nextGffAddress > thisGffAddress: + return nextLIndex + return None + + def _writeIntervalInNewGFF3(self, names): + nbOverlaps = 0 + for cpt in names.values(): + nbOverlaps += cpt + self._nbOverlappingQueries += 1 if Utils.xor(names, self._invert) else 0 + self._nbOverlaps += nbOverlaps if Utils.xor(names, self._invert) else 0 + if names: + self._currentQueryTranscript.setTagValue("overlapWith", ",".join(names)) + self._currentQueryTranscript.setTagValue("nbOverlaps", nbOverlaps) + if self._invert: + return False + else: + if self._outputNotOverlapping: + self._currentQueryTranscript.setTagValue("nbOverlaps", 0) + elif not self._invert: + return False + self._iWriter.addTranscript(self._currentQueryTranscript) + self._iWriter.write() + return True + + def _extractID(self, transcript): + id = transcript.getTagValue("ID") if "ID" in transcript.getTagNames() else transcript.getUniqueName() + nbElements = transcript.getTagValue("nbElements") if "nbElements" in transcript.getTagNames() else 1 + return {id: float(nbElements)} + + def _checkOverlap(self, refTranscript): + if self._currentQueryTranscript.getDistance(refTranscript) > self._distance: + return False + minOverlap = self._minOverlap + if self._pcOverlap != None: + minOverlap = max(self._minOverlap, self._currentQueryTranscript.getSize() / 100.0 * self._pcOverlap) + if not self._currentQueryTranscript.overlapWith(refTranscript, minOverlap): + return False + if self._antisense and self._currentQueryTranscript.getDirection() == refTranscript.getDirection(): + return False + if self._colinear and self._currentQueryTranscript.getDirection() != refTranscript.getDirection(): + return False + if self._included and not refTranscript.include(self._currentQueryTranscript): + return False + if self._including and not self._currentQueryTranscript.include(refTranscript): + return False + if self._introns: + return True + return self._currentQueryTranscript.overlapWithExon(refTranscript, minOverlap) + + def run(self): + self.createTmpRefFile() + self.createNCLists() + self.compare() + self.close() + if self._verbosity > 0: + print "# queries: %d" % (self._nbLines[QUERY]) + print "# refs: %d" % (self._nbLines[REFERENCE]) + print "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps) + print "time: %ds" % (self._timeSpent) + + +if __name__ == "__main__": + description = "Compare Overlapping v1.0.4: Get the data which overlap with a reference set. [Category: Data Comparison]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="output", action="store", default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]") + parser.add_option("-D", "--index", dest="index", action="store_true", default=False, help="add an index to the reference file (faster but more memory) [format: boolean] [default: False]") + parser.add_option("-r", "--sorted", dest="sorted", action="store_true", default=False, help="input files are already sorted [format: boolean] [default: False]") + parser.add_option("-S", "--start1", dest="start1", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]") + parser.add_option("-s", "--start2", dest="start2", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]") + parser.add_option("-U", "--end1", dest="end1", action="store", default=None, type="int", help="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]") + parser.add_option("-u", "--end2", dest="end2", action="store", default=None, type="int", help="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]") + parser.add_option("-t", "--intron", dest="introns", action="store_true", default=False, help="also report introns [format: bool] [default: false]") + parser.add_option("-E", "--5primeExtension1", dest="fivePrime1", action="store", default=None, type="int", help="extension towards 5' in file 1 [format: int]") + parser.add_option("-e", "--5primeExtension2", dest="fivePrime2", action="store", default=None, type="int", help="extension towards 5' in file 2 [format: int]") + parser.add_option("-N", "--3primeExtension1", dest="threePrime1", action="store", default=None, type="int", help="extension towards 3' in file 1 [format: int]") + parser.add_option("-n", "--3primeExtension2", dest="threePrime2", action="store", default=None, type="int", help="extension towards 3' in file 2 [format: int]") + parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="colinear only [format: bool] [default: false]") + parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="antisense only [format: bool] [default: false]") + parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") + parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]") + parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]") + parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=1, type="int", help="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]") + parser.add_option("-p", "--pcOverlap", dest="pcOverlap", action="store", default=None, type="int", help="minimum percentage of nucleotides to overlap to declare an overlap [format: int]") + parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") + parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + co = CompareOverlapping(options.verbosity) + co.setInput(options.inputFileName1, options.format1, QUERY) + co.setInput(options.inputFileName2, options.format2, REFERENCE) + co.setOutput(options.output) + co.setSorted(options.sorted) + co.setIndex(options.index) + co.restrictToStart(options.start1, QUERY) + co.restrictToStart(options.start2, REFERENCE) + co.restrictToEnd(options.end1, QUERY) + co.restrictToEnd(options.end2, REFERENCE) + co.extendFivePrime(options.fivePrime1, QUERY) + co.extendFivePrime(options.fivePrime2, REFERENCE) + co.extendThreePrime(options.threePrime1, QUERY) + co.extendThreePrime(options.threePrime2, REFERENCE) + co.acceptIntrons(options.introns) + co.getAntisenseOnly(options.antisense) + co.getColinearOnly(options.colinear) + co.getInvert(options.exclude) + co.setMaxDistance(options.distance) + co.setMinOverlap(options.minOverlap) + co.setPcOverlap(options.pcOverlap) + co.setIncludedOnly(options.included) + co.setIncludingOnly(options.including) + co.includeNotOverlapping(options.notOverlapping) + co.run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/CompareOverlappingSmallQuery.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,226 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.TranscriptWriter import TranscriptWriter +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.structure.Mapping import Mapping +from SMART.Java.Python.misc.Progress import Progress +from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress + +MINBIN = 3 +MAXBIN = 7 +REFERENCE = 0 +QUERY = 1 + +def getBin(start, end): + for i in range(MINBIN, MAXBIN + 1): + binLevel = 10 ** i + if int(start / binLevel) == int(end / binLevel): + return int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)) + return int((MAXBIN + 1) * 10 ** (MAXBIN + 1)) + +def getOverlappingBins(start, end): + array = [] + bigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1)) + for i in range(MINBIN, MAXBIN + 1): + binLevel = 10 ** i + array.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel)))) + array.append((bigBin, bigBin)) + return array + + +class CompareOverlappingSmallQuery(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.tableNames = {} + self.nbQueries = 0 + self.nbRefs = 0 + self.nbWritten = 0 + self.nbOverlaps = 0 + self.distance = None + self.invert = False + self.antisense = False + self.collinear = False + self.bins = {} + self.overlaps = {} + self.notOverlapping = False + + def setReferenceFile(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + self.refParser = chooser.getParser(fileName) + + def setQueryFile(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + self.queryParser = chooser.getParser(fileName) + + def setOutputFile(self, fileName): + self.writer = TranscriptWriter(fileName, "gff3", self.verbosity) + + def setDistance(self, distance): + self.distance = distance + + def setInvert(self, boolean): + self.invert = boolean + + def setCollinear(self, boolean): + self.collinear = boolean + + def setAntisense(self, boolean): + self.antisense = boolean + + def includeNotOverlapping(self, boolean): + self.notOverlapping = boolean + + def loadQuery(self): + progress = UnlimitedProgress(10000, "Reading queries", self.verbosity) + for transcript in self.queryParser.getIterator(): + if transcript.__class__.__name__ == "Mapping": + transcript = transcript.getTranscript() + chromosome = transcript.getChromosome() + bin = getBin(transcript.getStart(), transcript.getEnd()) + if chromosome not in self.bins: + self.bins[chromosome] = {} + if bin not in self.bins[chromosome]: + self.bins[chromosome][bin] = [] + self.bins[chromosome][bin].append(transcript) + if self.notOverlapping or self.invert: + self.overlaps[transcript] = {} + self.nbQueries += 1 + progress.inc() + progress.done() + + def _compareTwoTranscripts(self, queryTranscript, refTranscript): + if not queryTranscript.overlapWithExon(refTranscript): + return False + if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection(): + return False + if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection(): + return False + return True + + def _alterTranscript(self, transcript, type): + if type == REFERENCE: + if self.distance != None: + transcript.extendExons(self.distance) + return transcript + + def _compareTranscript(self, refTranscript): + refChromosome = refTranscript.getChromosome() + if refChromosome not in self.bins: + return [] + refStart = refTranscript.getStart() + refEnd = refTranscript.getEnd() + bins = getOverlappingBins(refStart, refEnd) + for binRange in bins: + for bin in range(binRange[0], binRange[1]+1): + if bin not in self.bins[refChromosome]: + continue + for queryTranscript in self.bins[refChromosome][bin]: + if self._compareTwoTranscripts(queryTranscript, refTranscript): + if queryTranscript not in self.overlaps: + self.overlaps[queryTranscript] = {} + nbElements = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1 + self.overlaps[queryTranscript][refTranscript.getName()] = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1 + self.nbOverlaps += nbElements + + def _updateTranscript(self, queryTranscript): + overlaps = self.overlaps[queryTranscript] + queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values())) + if overlaps: + queryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100]) + return queryTranscript + + def compare(self): + progress = UnlimitedProgress(10000, "Comparing references", self.verbosity) + for refTranscript in self.refParser.getIterator(): + if refTranscript.__class__.__name__ == "Mapping": + refTranscript = refTranscript.getTranscript() + refTranscript = self._alterTranscript(refTranscript, REFERENCE) + self._compareTranscript(refTranscript) + self.nbRefs += 1 + progress.inc() + progress.done() + + def printResults(self): + for transcript in self.overlaps: + if not self.invert or not self.overlaps[transcript]: + if not self.invert: + transcript = self._updateTranscript(transcript) + self.writer.addTranscript(transcript) + self.nbWritten += 1 + self.writer.close() + + def displayResults(self): + print "# queries: %d" % (self.nbQueries) + print "# refs: %d" % (self.nbRefs) + print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) + + def run(self): + self.loadQuery() + self.compare() + self.printResults() + self.displayResults() + +if __name__ == "__main__": + + description = "Compare Overlapping Small Query v1.0.1: Provide the queries that overlap with a reference, when the query is small. [Category: Data Comparison]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") + parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") + parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") + parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") + parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + cosq = CompareOverlappingSmallQuery(options.verbosity) + cosq.setQueryFile(options.inputFileName1, options.format1) + cosq.setReferenceFile(options.inputFileName2, options.format2) + cosq.setOutputFile(options.outputFileName) + cosq.includeNotOverlapping(options.notOverlapping) + cosq.setDistance(options.distance) + cosq.setCollinear(options.collinear) + cosq.setAntisense(options.antisense) + cosq.setInvert(options.exclude) + cosq.run() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/CompareOverlappingSmallRef.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,217 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.TranscriptWriter import TranscriptWriter +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.structure.Mapping import Mapping +from SMART.Java.Python.misc.Progress import Progress +from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress + +MINBIN = 3 +MAXBIN = 7 +REFERENCE = 0 +QUERY = 1 + +def getBin(start, end): + for i in range(MINBIN, MAXBIN + 1): + binLevel = 10 ** i + if int(start / binLevel) == int(end / binLevel): + return int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)) + return int((MAXBIN + 1) * 10 ** (MAXBIN + 1)) + +def getOverlappingBins(start, end): + array = [] + bigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1)) + for i in range(MINBIN, MAXBIN + 1): + binLevel = 10 ** i + array.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel)))) + array.append((bigBin, bigBin)) + return array + + +class CompareOverlappingSmallRef(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.tableNames = {} + self.nbQueries = 0 + self.nbRefs = 0 + self.nbWritten = 0 + self.nbOverlaps = 0 + self.invert = False + self.antisense = False + self.collinear = False + self.distance = None + self.bins = {} + self.notOverlapping = False + + def setReferenceFile(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + self.refParser = chooser.getParser(fileName) + + def setQueryFile(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + self.queryParser = chooser.getParser(fileName) + + def setOutputFile(self, fileName): + self.writer = TranscriptWriter(fileName, "gff3", self.verbosity) + + def setDistance(self, distance): + self.distance = distance + + def setCollinear(self, boolean): + self.collinear = boolean + + def setAntisense(self, boolean): + self.antisense = boolean + + def setInvert(self, boolean): + self.invert = boolean + + def includeNotOverlapping(self, boolean): + self.notOverlapping = boolean + + def loadRef(self): + progress = UnlimitedProgress(10000, "Reading references", self.verbosity) + for transcript in self.refParser.getIterator(): + if transcript.__class__.__name__ == "Mapping": + transcript = transcript.getTranscript() + transcript = self._alterTranscript(transcript, REFERENCE) + chromosome = transcript.getChromosome() + bin = getBin(transcript.getStart(), transcript.getEnd()) + if chromosome not in self.bins: + self.bins[chromosome] = {} + if bin not in self.bins[chromosome]: + self.bins[chromosome][bin] = [] + self.bins[chromosome][bin].append(transcript) + self.nbRefs += 1 + progress.inc() + progress.done() + + def _alterTranscript(self, transcript, type): + if type == REFERENCE: + if self.distance != None: + transcript.extendExons(self.distance) + return transcript + + def _compareTwoTranscripts(self, queryTranscript, refTranscript): + if not queryTranscript.overlapWithExon(refTranscript): + return False + if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection(): + return False + if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection(): + return False + return True + + def _compareTranscript(self, queryTranscript): + queryChromosome = queryTranscript.getChromosome() + if queryChromosome not in self.bins: + return [] + queryStart = queryTranscript.getStart() + queryEnd = queryTranscript.getEnd() + bins = getOverlappingBins(queryStart, queryEnd) + overlaps = {} + for binRange in bins: + for bin in range(binRange[0], binRange[1]+1): + if bin not in self.bins[queryChromosome]: + continue + for refTranscript in self.bins[queryChromosome][bin]: + if self._compareTwoTranscripts(queryTranscript, refTranscript): + nbElements = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1 + overlaps[refTranscript.getName()] = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1 + self.nbOverlaps += nbElements + return overlaps + + def _updateTranscript(self, queryTranscript, overlaps): + queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values())) + if overlaps: + queryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100]) + return queryTranscript + + def compare(self): + progress = UnlimitedProgress(10000, "Comparing queries", self.verbosity) + for queryTranscript in self.queryParser.getIterator(): + if queryTranscript.__class__.__name__ == "Mapping": + queryTranscript = queryTranscript.getTranscript() + progress.inc() + self.nbQueries += 1 + overlaps = self._compareTranscript(queryTranscript) + if self.notOverlapping or (overlaps and not self.invert) or (not overlaps and self.invert): + if not self.invert: + queryTranscript = self._updateTranscript(queryTranscript, overlaps) + self.writer.addTranscript(queryTranscript) + self.nbWritten += 1 + progress.done() + self.writer.close() + + def displayResults(self): + print "# queries: %d" % (self.nbQueries) + print "# refs: %d" % (self.nbRefs) + print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) + + def run(self): + self.loadRef() + self.compare() + self.displayResults() + +if __name__ == "__main__": + + description = "Compare Overlapping Small Reference v1.0.1: Provide the queries that overlap with a reference, when the reference is small. [Category: Data Comparison]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") + parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") + parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") + parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") + parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + cosr = CompareOverlappingSmallRef(options.verbosity) + cosr.setQueryFile(options.inputFileName1, options.format1) + cosr.setReferenceFile(options.inputFileName2, options.format2) + cosr.setOutputFile(options.outputFileName) + cosr.includeNotOverlapping(options.notOverlapping) + cosr.setDistance(options.distance) + cosr.setAntisense(options.antisense) + cosr.setInvert(options.exclude) + cosr.setInvert(options.exclude) + cosr.run() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/ComputeCoverage.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,142 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import os, random +from optparse import OptionParser, OptionGroup +from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer +from SMART.Java.Python.misc.Progress import Progress +from commons.core.writer.Gff3Writer import Gff3Writer + + +class CoverageComputer(object): + + def __init__(self, verbosity = 0): + self.verbosity = verbosity + self.queryReader = None + self.referenceReader = None + self.outputWriter = None + self.introns = False + self.nbNucleotides = 0 + self.nbCovered = 0 + + def setInputQueryFile(self, fileName, format): + self.queryReader = TranscriptContainer(fileName, format, self.verbosity-1) + + def setInputReferenceFile(self, fileName, format): + self.referenceReader = TranscriptContainer(fileName, format, self.verbosity-1) + + def includeIntrons(self, boolean): + self.introns = boolean + + def setOutputFileName(self, fileName, title="S-MART", feature="transcript", featurePart="exon"): + self.outputWriter = Gff3Writer(fileName, self.verbosity-1) + self.outputWriter.setTitle(title) + self.outputWriter.setFeature(feature) + self.outputWriter.setFeaturePart(featurePart) + + def readReference(self): + self.coveredRegions = {} + progress = Progress(self.referenceReader.getNbTranscripts(), "Reading reference file", self.verbosity-1) + for transcript in self.referenceReader.getIterator(): + chromosome = transcript.getChromosome() + if chromosome not in self.coveredRegions: + self.coveredRegions[chromosome] = {} + if self.introns: + transcript.removeExons() + for exon in transcript.getExons(): + for position in range(exon.getStart(), exon.getEnd()+1): + self.coveredRegions[chromosome][position] = 1 + progress.inc() + progress.done() + + def readQuery(self): + progress = Progress(self.queryReader.getNbTranscripts(), "Reading query file", self.verbosity-1) + for transcript in self.queryReader.getIterator(): + progress.inc() + chromosome = transcript.getChromosome() + if chromosome not in self.coveredRegions: + continue + if self.introns: + transcript.removeExons() + for exon in transcript.getExons(): + for position in range(exon.getStart(), exon.getEnd()+1): + self.nbNucleotides += 1 + self.nbCovered += self.coveredRegions[chromosome].get(position, 0) + progress.done() + + def write(self): + progress = Progress(self.queryReader.getNbTranscripts(), "Writing output file", self.verbosity-1) + for transcript in self.queryReader.getIterator(): + chromosome = transcript.getChromosome() + if self.introns: + transcript.removeExons() + size = transcript.getSize() + coverage = 0 + for exon in transcript.getExons(): + for position in range(exon.getStart(), exon.getEnd()+1): + coverage += self.coveredRegions[chromosome].get(position, 0) + transcript.setTagValue("coverage", 0 if size == 0 else float(coverage) / size * 100) + self.outputWriter.addTranscript(transcript) + progress.inc() + progress.done() + + def sumUp(self): + print "%d nucleotides in query, %d (%.f%%) covered" % (self.nbNucleotides, self.nbCovered, 0 if self.nbNucleotides == 0 else float(self.nbCovered) / self.nbNucleotides * 100) + + def run(self): + self.readReference() + self.readQuery() + if self.outputWriter != None: + self.write() + self.sumUp() + + +if __name__ == "__main__": + + # parse command line + description = "Compute Coverage v1.0.1: Compute the coverage of a set with respect to another set. [Category: Personal]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input query file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of the first file [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input reference file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of the second file [compulsory] [format: transcript file format]") + parser.add_option("-t", "--introns", dest="introns", action="store_true", default=False, help="also include introns [format: boolean] [default: false]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", type="int", help="trace level [default: 1] [format: int]") + (options, args) = parser.parse_args() + + computer = CoverageComputer(options.verbosity) + computer.setInputQueryFile(options.inputFileName1, options.format1) + computer.setInputReferenceFile(options.inputFileName2, options.format2) + computer.includeIntrons(options.introns) + computer.setOutputFileName(options.outputFileName) + computer.run() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/CountReadGCPercent.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +from optparse import OptionParser +from commons.core.parsing.FastaParser import FastaParser +from commons.core.writer.Gff3Writer import Gff3Writer +from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer +from SMART.Java.Python.misc.Progress import Progress +from commons.core.utils.RepetOptionParser import RepetOptionParser +from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow + + +class CountReadGCPercent(object): + + def __init__(self): + self.referenceReader = None + self.gffReader = None + self.outputWriter = None + self.verbose = 0 + + def setInputReferenceFile(self, fileName): + self.referenceReader = fileName + + def setInputGffFile(self, fileName): + self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose) + + def setOutputFileName(self, fileName): + self.outputWriter = Gff3Writer(fileName, self.verbose) + + def readGffAnnotation(self): + self.coveredRegions = {} + progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose) + for transcript in self.gffReader.getIterator(): + chromosome = transcript.getChromosome() + if chromosome not in self.coveredRegions: + self.coveredRegions[chromosome] = {} + for exon in transcript.getExons(): + for position in range(exon.getStart(), exon.getEnd()+1): + self.coveredRegions[chromosome][position] = 1 + progress.inc() + progress.done() + + def write(self): + iParser = FastaParser(self.referenceReader) + iParser.setTags() + iGetGCPercentBySW = CountGCPercentBySlidingWindow() + progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose) + for transcript in self.gffReader.getIterator(): + chromosome = transcript.getChromosome() + GCpercent = 0 + nPercent = 0 + for exon in transcript.getExons(): + for sequenceName in iParser.getTags().keys(): + if sequenceName != chromosome: + continue + else: + subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1) + GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence) + print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent) + transcript.setTagValue("GCpercent", GCpercent) + transcript.setTagValue("NPercent", nPercent) + self.outputWriter.addTranscript(transcript) + progress.inc() + progress.done() + + def run(self): + self.readGffAnnotation() + if self.outputWriter != None: + self.write() + +if __name__ == "__main__": + description = "Count GC percent for each read against a genome." + usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]" + examples = "\nExample: \n" + examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3" + examples += "\n\n" + parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples) + parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None ) + parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None) + parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None ) + parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 ) + (options, args) = parser.parse_args() + + readGCPercent = CountReadGCPercent() + readGCPercent.setInputReferenceFile(options.fastaFile) + readGCPercent.setInputGffFile(options.gffFile) + readGCPercent.setOutputFileName(options.outputFile) + readGCPercent.run() + \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/FindOverlapsOptim.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,343 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2012 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# + +import os, struct, time, shutil +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.Gff3Writer import Gff3Writer +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.ncList.NCList import NCList +from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList +from SMART.Java.Python.ncList.NCListParser import NCListParser +from SMART.Java.Python.ncList.NCListCursor import NCListCursor +from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle +from SMART.Java.Python.ncList.NCListHandler import NCListHandler +from SMART.Java.Python.misc.Progress import Progress +from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress +try: + import cPickle as pickle +except: + import pickle + +REFERENCE = 0 +QUERY = 1 +TYPES = (REFERENCE, QUERY) +TYPETOSTRING = {0: "reference", 1: "query"} + +class FindOverlapsOptim(object): + + def __init__(self, verbosity = 1): + self._parsers = {} + self._sortedFileNames = {} + self._outputFileName = "outputOverlaps.gff3" + self._iWriter = None + self._inputFileNames = {REFERENCE: None, QUERY: None} + self._convertedFileNames = {REFERENCE: False, QUERY: False} + self._inputFileFormats = {REFERENCE: None, QUERY: None} + self._converted = {REFERENCE: False, QUERY: False} + self._ncListHandlers = {REFERENCE: None, QUERY: None} + self._splittedFileNames = {REFERENCE: {}, QUERY: {}} + self._nbOverlappingQueries = 0 + self._nbOverlaps = 0 + self._nbLines = {REFERENCE: 0, QUERY: 0} + self._sorted = False + self._index = False + self._verbosity = verbosity + self._ncLists = {} + self._cursors = {} + self._nbElementsPerChromosome = {} + self._tmpDirectories = {REFERENCE: False, QUERY: False} + + def close(self): + self._iWriter.close() + for fileName in (self._sortedFileNames.values()): + if os.path.exists(fileName): + os.remove(fileName) + for fileName in self._convertedFileNames.values(): + if fileName: + os.remove(fileName) + + def setRefFileName(self, fileName, format): + self.setFileName(fileName, format, REFERENCE) + + def setQueryFileName(self, fileName, format): + self.setFileName(fileName, format, QUERY) + + def setFileName(self, fileName, format, type): + self._inputFileNames[type] = fileName + self._inputFileFormats[type] = format + if format.lower() != "nclist": + self._converted[type] = True + + def setOutputFileName(self, outputFileName): + self._outputFileName = outputFileName + self._iWriter = Gff3Writer(self._outputFileName) + + def setSorted(self, sorted): + self._sorted = sorted + + def setIndex(self, index): + self._index = index + + def createNCLists(self): + startTime = time.time() + if self._verbosity > 1: + print "Building database" + self._ncLists = dict([type, {}] for type in TYPES) + self._indices = dict([type, {}] for type in TYPES) + self._cursors = dict([type, {}] for type in TYPES) + for type in TYPES: + self._ncListHandlers[type] = NCListHandler(self._verbosity-3) + if self._converted[type]: + self._convertedFileNames[type] = "%s_%d.ncl" % (os.path.splitext(self._inputFileNames[type])[0], type) + ncLists = ConvertToNCList(self._verbosity-3) + ncLists.setInputFileName(self._inputFileNames[type], self._inputFileFormats[type]) + ncLists.setSorted(self._sorted) + ncLists.setOutputFileName(self._convertedFileNames[type]) + if type == REFERENCE and self._index: + ncLists.setIndex(True) + ncLists.run() + self._ncListHandlers[type].setFileName(self._convertedFileNames[type]) + else: + self._ncListHandlers[type].setFileName(self._inputFileNames[type]) + self._ncListHandlers[type].loadData() + self._nbLines[type] = self._ncListHandlers[type].getNbElements() + self._nbElementsPerChromosome[type] = self._ncListHandlers[type].getNbElementsPerChromosome() + self._ncLists[type] = self._ncListHandlers[type].getNCLists() + for chromosome, ncList in self._ncLists[type].iteritems(): + self._cursors[type][chromosome] = NCListCursor(None, ncList, 0, self._verbosity) + if type == REFERENCE and self._index: + self._indices[REFERENCE][chromosome] = ncList.getIndex() + endTime = time.time() + if self._verbosity > 1: + print "done (%.2gs)" % (endTime - startTime) + + def compare(self): + nbSkips, nbMoves = 0, 0 + previousChromosome = None + done = False + startTime = time.time() + progress = Progress(len(self._ncLists[QUERY].keys()), "Checking overlap", self._verbosity) + #print "query:", self._ncLists[QUERY].keys() + #print "reference:", self._ncLists[REFERENCE].keys() + for chromosome, queryNCList in self._ncLists[QUERY].iteritems(): + queryParser = self._ncListHandlers[QUERY].getParser(chromosome) + queryCursor = self._cursors[QUERY][chromosome] + if chromosome != previousChromosome: + skipChromosome = False + previousChromosome = chromosome + if chromosome not in self._ncLists[REFERENCE]: + #print "out ", chromosome + continue + refNCList = self._ncLists[REFERENCE][chromosome] + refCursor = self._cursors[REFERENCE][chromosome] + #print "starting", chromosome + while True: + queryTranscript = queryCursor.getTranscript() + newRefLaddr = self.checkIndex(queryTranscript, refCursor) + #print "query is", queryTranscript + if newRefLaddr != None: + nbMoves += 1 + refCursor.setLIndex(newRefLaddr) + #print "skipping to", refCursor + done = False + refCursor, done, unmatched = self.findOverlapIter(queryTranscript, refCursor, done) + #print "completed with", refCursor, done, unmatched + if refCursor.isOut(): + #print "exiting 1", chromosome + break + if unmatched or not queryCursor.hasChildren(): + queryCursor.moveNext() + #print "moving next to", queryCursor + nbSkips += 1 + else: + queryCursor.moveDown() + #print "moving down to", queryCursor + if queryCursor.isOut(): + #print "exiting 2", chromosome + break + progress.inc() + progress.done() + endTime = time.time() + self._timeSpent = endTime - startTime + if self._verbosity >= 10: + print "# skips: %d" % (nbSkips) + print "# moves: %d" % (nbMoves) + + def findOverlapIter(self, queryTranscript, cursor, done): + chromosome = queryTranscript.getChromosome() + if chromosome not in self._ncLists[REFERENCE]: + return False, None + ncList = self._ncLists[REFERENCE][chromosome] + overlappingNames = {} + nextDone = False + firstOverlapLAddr = NCListCursor(cursor) + firstOverlapLAddr.setLIndex(-1) + if cursor.isOut(): + return firstOverlapLAddr, False + parentCursor = NCListCursor(cursor) + parentCursor.moveUp() + firstParentAfter = False + #print "query transcript 1", queryTranscript + #print "cursor 1", cursor + #print "parent 1", parentCursor + while not parentCursor.isOut(): + if self.isOverlapping(queryTranscript, parentCursor) == 0: + #print "overlap parent choice 0" + overlappingNames.update(self._extractID(parentCursor.getTranscript())) + if firstOverlapLAddr.isOut(): + #print "overlap parent 2" + firstOverlapLAddr.copy(parentCursor) + nextDone = True # new + elif self.isOverlapping(queryTranscript, parentCursor) == 1: + #print "overlap parent choice 1" + firstParentAfter = NCListCursor(parentCursor) + parentCursor.moveUp() + #print "parent 2", parentCursor + if firstParentAfter: + #print "exit parent", firstParentAfter, overlappingNames + self._writeIntervalInNewGFF3(queryTranscript, overlappingNames) + return firstParentAfter, False, not overlappingNames + #This loop finds the overlaps with currentRefLAddr.# + while True: + #print "ref cursor now is", cursor + parentCursor = NCListCursor(cursor) + parentCursor.moveUp() + #In case: Query is on the right of the RefInterval and does not overlap. + overlap = self.isOverlapping(queryTranscript, cursor) + if overlap == -1: + cursor.moveNext() + #In case: Query overlaps with RefInterval. + elif overlap == 0: + #print "choice 2" + overlappingNames.update(self._extractID(cursor.getTranscript())) + if firstOverlapLAddr.compare(parentCursor): + firstOverlapLAddr.copy(cursor) + nextDone = True # new + if done: + cursor.moveNext() + else: + if not cursor.hasChildren(): + cursor.moveNext() + if cursor.isOut(): + #print "break 1" + break + else: + cursor.moveDown() + #In case: Query is on the left of the RefInterval and does not overlap. + else: + #print "choice 3" + if firstOverlapLAddr.isOut() or firstOverlapLAddr.compare(parentCursor): + #print "changing nfo 2" + firstOverlapLAddr.copy(cursor) + nextDone = False # new + #print "break 2" + break + + done = False + if cursor.isOut(): + #print "break 3" + break + self._writeIntervalInNewGFF3(queryTranscript, overlappingNames) + return firstOverlapLAddr, nextDone, not overlappingNames + + def isOverlapping(self, queryTranscript, refTranscript): + if (queryTranscript.getStart() <= refTranscript.getEnd() and queryTranscript.getEnd() >= refTranscript.getStart()): + return 0 + if queryTranscript.getEnd() < refTranscript.getStart(): + return 1 + return -1 + + def checkIndex(self, transcript, cursor): + if not self._index: + return None + chromosome = transcript.getChromosome() + nextLIndex = self._indices[REFERENCE][chromosome].getIndex(transcript) + if nextLIndex == None: + return None + ncList = self._ncLists[REFERENCE][chromosome] + nextGffAddress = ncList.getRefGffAddr(nextLIndex) + thisGffAddress = cursor.getGffAddress() + if nextGffAddress > thisGffAddress: + return nextLIndex + return None + + def _writeIntervalInNewGFF3(self, transcript, names): + nbOverlaps = 0 + for cpt in names.values(): + nbOverlaps += cpt + if not names: + return + transcript.setTagValue("overlapsWith", "--".join(sorted(names.keys()))) + transcript.setTagValue("nbOverlaps", nbOverlaps) + self._iWriter.addTranscript(transcript) + self._iWriter.write() + self._nbOverlappingQueries += 1 + self._nbOverlaps += nbOverlaps + + def _extractID(self, transcript): + nbElements = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1 + id = transcript.getTagValue("ID") if "ID" in transcript.getTagNames() else transcript.getUniqueName() + return {id: nbElements} + + def run(self): + self.createNCLists() + self.compare() + self.close() + if self._verbosity > 0: + print "# queries: %d" % (self._nbLines[QUERY]) + print "# refs: %d" % (self._nbLines[REFERENCE]) + print "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps) + print "time: %.2gs" % (self._timeSpent) + + +if __name__ == "__main__": + description = "Find Overlaps Optim v1.0.0: Finds overlaps with several query intervals. [Category: Data Comparison]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--query", dest="inputQueryFileName", action="store", type="string", help="query input file [compulsory] [format: file in transcript or other format given by -f]") + parser.add_option("-f", "--queryFormat", dest="queryFormat", action="store", type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]") + parser.add_option("-j", "--ref", dest="inputRefFileName", action="store", type="string", help="reference input file [compulsory] [format: file in transcript or other format given by -g]") + parser.add_option("-g", "--refFormat", dest="refFormat", action="store", type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]") + parser.add_option("-d", "--index", dest="index", action="store_true", default=False, help="add an index to the reference file (faster but more memory) [format: boolean] [default: False]") + parser.add_option("-s", "--sorted", dest="sorted", action="store_true", default=False, help="input files are already sorted [format: boolean] [default: False]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="Trace level [format: int] [default: 1]") + (options, args) = parser.parse_args() + + iFOO = FindOverlapsOptim(options.verbosity) + iFOO.setRefFileName(options.inputRefFileName, options.refFormat) + iFOO.setQueryFileName(options.inputQueryFileName, options.queryFormat) + iFOO.setOutputFileName(options.outputFileName) + iFOO.setIndex(options.index) + iFOO.setSorted(options.sorted) + iFOO.run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/GetDifferentialExpression.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,441 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +"""Get the differential expression between 2 conditions (2 files), on regions defined by a third file""" + +import os, re +from optparse import OptionParser +from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer +from commons.core.writer.Gff3Writer import Gff3Writer +from SMART.Java.Python.misc.Progress import Progress +from SMART.Java.Python.misc.RPlotter import RPlotter +from SMART.Java.Python.misc import Utils +from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection +from SMART.Java.Python.structure.Transcript import Transcript + +class GetDifferentialExpression(object): + + def __init__(self, verbosity = 1): + self.verbosity = verbosity + self.mySqlConnection = MySqlConnection(verbosity) + self.inputs = (0, 1) + self.transcriptContainers = [None, None] + self.transcriptContainerRef = None + self.outputFileName = None + self.writer = None + self.tables = [None, None] + self.nbElements = [0, 0] + + self.regionsToValues = {} + self.regionsToNames = {} + self.valuesToPvalues = {} + + self.oriented = True + self.simpleNormalization = False + self.simpleNormalizationParameters = None + self.adjustedNormalization = False + self.fixedSizeFactor = None + self.normalizationSize = None + self.normalizationFactors = [1, 1] + self.fdr = None + self.fdrPvalue = None + + self.plot = False + self.plotter = None + self.plotterName = None + self.points = {} + + + def setInputFile(self, i, fileName, fileFormat): + self.transcriptContainers[i] = TranscriptContainer(fileName, fileFormat, self.verbosity) + self.transcriptContainers[i].mySqlConnection = self.mySqlConnection + + + def setReferenceFile(self, fileName, fileFormat): + self.transcriptContainerRef = TranscriptContainer(fileName, fileFormat, self.verbosity) + self.transcriptContainerRef.mySqlConnection = self.mySqlConnection + + + def setOutputFile(self, fileName): + self.outputFileName = fileName + self.writer = Gff3Writer(fileName, self.verbosity) + + + def setOriented(self, boolean): + self.oriented = boolean + + + def setSimpleNormalization(self, boolean): + self.simpleNormalization = boolean + + + def setSimpleNormalizationParameters(self, parameters): + if parameters != None: + self.simpleNormalization = True + self.simpleNormalizationParameters = [0, 0] + for i, splittedParameter in enumerate(parameters.split(",")): + self.simpleNormalizationParameters[i] = int(splittedParameter) + + + def setAdjustedNormalization(self, boolean): + self.adjustedNormalization = boolean + + + def setFixedSizeNormalization(self, value): + self.fixedSizeFactor = value + + + def setFdr(self, fdr): + self.fdr = fdr + + + def setPlot(self, boolean): + self.plot = boolean + + + def setPlotterName(self, plotterName): + self.plotterName = plotterName + + def setPlotter(self): + self.plot = True + self.plotter = RPlotter(self.plotterName, self.verbosity) + self.plotter.setPoints(True) + self.plotter.setLog("xy") + self.points = {} + + + def readInput(self, i): + self.transcriptContainers[i].storeIntoDatabase() + self.tables[i] = self.transcriptContainers[i].getTables() + progress = Progress(len(self.tables[i].keys()), "Adding indices", self.verbosity) + for chromosome in self.tables[i]: + if self.oriented: + self.tables[i][chromosome].createIndex("iStartEndDir_%s_%d" % (chromosome, i), ("start", "end", "direction")) + else: + self.tables[i][chromosome].createIndex("iStartEnd_%s_%d" % (chromosome, i), ("start", "end")) + progress.inc() + progress.done() + + progress = Progress(self.transcriptContainers[i].getNbTranscripts(), "Reading sample %d" % (i +1), self.verbosity) + for chromosome in self.tables[i]: + for transcript in self.tables[i][chromosome].getIterator(): + self.nbElements[i] += 1 if "nbElements" not in transcript.getTagNames() else transcript.getTagValue("nbElements") + progress.inc() + progress.done() + if self.verbosity > 0: + print "%d elements in sample %d" % (self.nbElements[i], i+1) + + + def computeSimpleNormalizationFactors(self): + nbElements = self.nbElements + if self.simpleNormalizationParameters != None: + print "Using provided normalization parameters: %s" % (", ".join([str(parameter) for parameter in self.simpleNormalizationParameters])) + nbElements = self.simpleNormalizationParameters + avgNbElements = int(float(sum(nbElements)) / len(nbElements)) + for i in self.inputs: + self.normalizationFactors[i] = float(avgNbElements) / nbElements[i] + self.nbElements[i] *= self.normalizationFactors[i] + if self.verbosity > 1: + print "Normalizing to average # reads: %d" % (avgNbElements) + if self.simpleNormalizationParameters != None: + print "# reads: %s" % (", ".join([str(nbElement) for nbElement in self.nbElements])) + + def __del__(self): + self.mySqlConnection.deleteDatabase() + + def regionToString(self, transcript): + return "%s:%d-%d(%s)" % (transcript.getChromosome(), transcript.getStart(), transcript.getEnd(), "+" if transcript.getDirection() == 1 else "-") + + def stringToRegion(self, region): + m = re.search(r"^(\S+):(\d+)-(\d+)\((\S)\)$", region) + if m == None: + raise Exception("Internal format error: cannot parse region '%s'" % (region)) + transcript = Transcript() + transcript.setChromosome(m.group(1)) + transcript.setStart(int(m.group(2))) + transcript.setEnd(int(m.group(3))) + transcript.setDirection(m.group(4)) + return transcript + + def computeMinimumSize(self): + self.normalizationSize = 1000000000 + progress = Progress(self.transcriptContainerRef.getNbTranscripts(), "Getting minimum reference size", self.verbosity) + for transcriptRef in self.transcriptContainerRef.getIterator(): + self.normalizationSize = min(self.normalizationSize, transcriptRef.getEnd() - transcriptRef.getStart()) + progress.inc() + progress.done() + if self.verbosity > 1: + print "Minimum reference size: %d" % (self.normalizationSize+1) + + def useFixedSizeNormalization(self, start, end, starts): + currentNb = 0 + sum = 0 + if not starts: + return 0 + for i in range(start - self.normalizationSize, end + 1 + self.normalizationSize): + if i not in starts: + starts[i] = 0 + for i, s in starts.iteritems(): + if i < start: + starts[start] += s + starts[i] = 0 + for i in range(start - self.normalizationSize, end + 1): + currentNb += starts[i+self.normalizationSize] - starts[i] + sum += currentNb + return (float(sum) / self.normalizationSize) * (self.fixedSizeFactor / (end - start + 1)) + + def retrieveCounts(self, transcriptRef, i): + if transcriptRef.getChromosome() not in self.tables[i]: + return (0, 0) + cumulatedCount = 0 + cumulatedNormalizedCount = 0 + for exon in transcriptRef.getExons(): + count = 0 + starts = {} + command = "SELECT start, tags FROM '%s' WHERE start >= %d AND end <= %d" % (self.tables[i][exon.getChromosome()].getName(), exon.getStart(), exon.getEnd()) + if self.oriented: + command += " AND direction = %d" % (exon.getDirection()) + query = self.mySqlConnection.executeQuery(command) + for line in query.getIterator(): + nb = 1 + tags = line[1].split(";") + for tag in tags: + key, value = tag.split("=") + if key == "nbElements": + nb = int(float(value)) + count += nb + starts[int(line[0])] = nb + normalizedCount = count if self.fixedSizeFactor == None else self.useFixedSizeNormalization(exon.getStart(), exon.getEnd(), starts) + cumulatedCount += count + cumulatedNormalizedCount += normalizedCount + return (cumulatedCount, cumulatedNormalizedCount) + + def getAllCounts(self): + progress = Progress(self.transcriptContainerRef.getNbTranscripts(), "Getting counts", self.verbosity) + for cpt, transcriptRef in enumerate(self.transcriptContainerRef.getIterator()): + if "ID" in transcriptRef.getTagNames(): + self.regionsToNames[self.regionToString(transcriptRef)] = transcriptRef.getTagValue("ID") + elif transcriptRef.getName() != None: + self.regionsToNames[self.regionToString(transcriptRef)] = transcriptRef.getName() + else: + self.regionsToNames[self.regionToString(transcriptRef)] = "region_%d" % (cpt) + values = [None, None] + normalizedValues = [None, None] + for i in self.inputs: + values[i], normalizedValues[i] = self.retrieveCounts(transcriptRef, i) + normalizedValues[i] = int(self.normalizationFactors[i] * normalizedValues[i]) + if sum(values) != 0: + self.regionsToValues[self.regionToString(transcriptRef)] = (normalizedValues[0], normalizedValues[1], values[0], values[1]) + progress.inc() + progress.done() + + def computeAdjustedNormalizationFactors(self): + nbElements = len(self.regionsToValues.keys()) + avgValues = [] + progress = Progress(nbElements, "Normalization step 1", self.verbosity) + for values in self.regionsToValues.values(): + correctedValues = [values[i] * self.normalizationFactors[i] for i in self.inputs] + avgValues.append(float(sum(correctedValues)) / len(correctedValues)) + progress.inc() + progress.done() + + sortedAvgValues = sorted(avgValues) + minAvgValues = sortedAvgValues[nbElements / 4] + maxAvgValues = sortedAvgValues[nbElements * 3 / 4] + sums = [0, 0] + progress = Progress(nbElements, "Normalization step 2", self.verbosity) + for values in self.regionsToValues.values(): + correctedValues = [values[i] * self.normalizationFactors[i] for i in self.inputs] + avgValue = float(sum(correctedValues)) / len(correctedValues) + if minAvgValues <= avgValue and avgValue <= maxAvgValues: + for i in self.inputs: + sums[i] += values[i] + progress.inc() + progress.done() + + avgSums = float(sum(sums)) / len(sums) + for i in self.inputs: + if self.verbosity > 1: + print "Normalizing sample %d: %s to" % ((i+1), self.nbElements[i]), + self.normalizationFactors[i] *= float(avgSums) / sums[i] + self.nbElements[i] *= self.normalizationFactors[i] + if self.verbosity > 1: + print "%s" % (int(self.nbElements[i])) + + def getMinimumReferenceSize(self): + self.normalizationSize = 1000000000 + progress = Progress(self.transcriptContainerRef.getNbTranscripts(), "Reference element sizes", self.verbosity) + for transcriptRef in self.transcriptContainerRef.getIterator(): + self.normalizationSize = min(self.normalizationSize, transcriptRef.getEnd() - transcriptRef.getStart() + 1) + progress.inc() + progress.done() + if self.verbosity > 1: + print "Minimum reference size: %d" % (self.normalizationSize) + + def computePvalues(self): + normalizedValues = set() + progress = Progress(len(self.regionsToValues.keys()), "Normalizing counts", self.verbosity) + for region in self.regionsToValues: + values = self.regionsToValues[region] + normalizedValues0 = int(round(values[0] * self.normalizationFactors[0])) + normalizedValues1 = int(round(values[1] * self.normalizationFactors[1])) + self.regionsToValues[region] = (normalizedValues0, normalizedValues1, self.regionsToValues[region][2], self.regionsToValues[region][3]) + normalizedValues.add((normalizedValues0, normalizedValues1, self.nbElements[0] - normalizedValues0, self.nbElements[1] - normalizedValues1, self.regionsToValues[region][2], self.regionsToValues[region][3])) + progress.inc() + progress.done() + + if self.verbosity > 1: + print "Computing p-values..." + self.valuesToPvalues = Utils.fisherExactPValueBulk(list(normalizedValues)) + if self.verbosity > 1: + print "... done" + + def setTagValues(self, transcript, values, pValue): + for tag in transcript.getTagNames(): + transcript.deleteTag(tag) + transcript.removeExons() + transcript.setTagValue("pValue", str(pValue)) + transcript.setTagValue("nbReadsCond1", str(values[0])) + transcript.setTagValue("nbReadsCond2", str(values[1])) + transcript.setTagValue("nbUnnormalizedReadsCond1", str(values[2])) + transcript.setTagValue("nbUnnormalizedReadsCond2", str(values[3])) + if (values[0] == values[1]) or (self.fdr != None and pValue > self.fdrPvalue): + transcript.setTagValue("regulation", "equal") + elif values[0] < values[1]: + transcript.setTagValue("regulation", "up") + else: + transcript.setTagValue("regulation", "down") + return transcript + + def computeFdr(self): + pValues = [] + nbRegions = len(self.regionsToValues.keys()) + progress = Progress(nbRegions, "Computing FDR", self.verbosity) + for values in self.regionsToValues.values(): + pValues.append(self.valuesToPvalues[values[0:2]]) + progress.inc() + progress.done() + + for i, pValue in enumerate(reversed(sorted(pValues))): + if pValue <= self.fdr * (nbRegions - 1 - i) / nbRegions: + self.fdrPvalue = pValue + if self.verbosity > 1: + print "FDR: %f, k: %i, m: %d" % (pValue, nbRegions - 1 - i, nbRegions) + return + + def writeDifferentialExpression(self): + if self.plot: + self.setPlotter() + + cpt = 1 + progress = Progress(len(self.regionsToValues.keys()), "Writing output", self.verbosity) + for region, values in self.regionsToValues.iteritems(): + transcript = self.stringToRegion(region) + pValue = self.valuesToPvalues[values[0:2]] + transcript.setName(self.regionsToNames[region]) + transcript = self.setTagValues(transcript, values, pValue) + self.writer.addTranscript(transcript) + cpt += 1 + + if self.plot: + self.points[region] = (values[0], values[1]) + progress.done() + self.writer.write() + self.writer.close() + + if self.plot: + self.plotter.addLine(self.points) + self.plotter.plot() + + def getDifferentialExpression(self): + for i in self.inputs: + self.readInput(i) + + if self.simpleNormalization: + self.computeSimpleNormalizationFactors() + if self.fixedSizeFactor != None: + self.computeMinimumSize() + + self.getAllCounts() + + if self.adjustedNormalization: + self.computeAdjustedNormalizationFactors() + + self.computePvalues() + + if self.fdr != None: + self.computeFdr() + + self.writeDifferentialExpression() + + +if __name__ == "__main__": + + # parse command line + description = "Get Differential Expression v1.0.1: Get the differential expression between 2 conditions using Fisher's exact test, on regions defined by a third file. [Category: Data Comparison]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]") + parser.add_option("-k", "--reference", dest="referenceFileName", action="store", type="string", help="reference file [compulsory] [format: file in transcript format given by -l]") + parser.add_option("-l", "--referenceFormat", dest="referenceFormat", action="store", type="string", help="format of reference file [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in gff3 format]") + parser.add_option("-n", "--notOriented", dest="notOriented", action="store_true", default=False, help="if the reads are not oriented [default: False] [format: bool]") + parser.add_option("-s", "--simple", dest="simple", action="store_true", default=False, help="normalize using the number of reads in each condition [format: bool]") + parser.add_option("-S", "--simpleParameters", dest="simpleParameters", action="store", default=None, type="string", help="provide the number of reads [format: bool]") + parser.add_option("-a", "--adjusted", dest="adjusted", action="store_true", default=False, help="normalize using the number of reads of 'mean' regions [format: bool]") + parser.add_option("-x", "--fixedSizeFactor", dest="fixedSizeFactor", action="store", default=None, type="int", help="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization) [format: int]") + parser.add_option("-d", "--fdr", dest="fdr", action="store", default=None, type="float", help="use FDR [format: float]") + parser.add_option("-p", "--plot", dest="plotName", action="store", default=None, type="string", help="plot cloud plot [format: output file in PNG format]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + + + differentialExpression = GetDifferentialExpression(options.verbosity) + differentialExpression.setInputFile(0, options.inputFileName1, options.format1) + differentialExpression.setInputFile(1, options.inputFileName2, options.format2) + differentialExpression.setReferenceFile(options.referenceFileName, options.referenceFormat) + differentialExpression.setOutputFile(options.outputFileName) + if options.plotName != None : + differentialExpression.setPlotterName(options.plotName) + differentialExpression.setPlotter() + differentialExpression.setOriented(not options.notOriented) + differentialExpression.setSimpleNormalization(options.simple) + differentialExpression.setSimpleNormalizationParameters(options.simpleParameters) + differentialExpression.setAdjustedNormalization(options.adjusted) + differentialExpression.setFixedSizeNormalization(options.fixedSizeFactor) + differentialExpression.setFdr(options.fdr) + differentialExpression.getDifferentialExpression() + differentialExpression.mySqlConnection.deleteDatabase() + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/GetDistribution.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,362 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2012 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import os +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.parsing.FastaParser import FastaParser +from SMART.Java.Python.structure.Transcript import Transcript +from commons.core.writer.Gff3Writer import Gff3Writer +from SMART.Java.Python.misc.RPlotter import RPlotter +from SMART.Java.Python.misc.MultipleRPlotter import MultipleRPlotter +from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress +from SMART.Java.Python.misc.Progress import Progress + +TWOSTRANDS = {True: [1, -1], False: [0]} +STRANDTOSTR = {1: "(+)", -1: "(-)", 0: ""} + +class GetDistribution(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.sizes = None + self.twoStrands = False + self.start = 1 + self.names = ["nbElements"] + self.average = False + self.nbValues = {} + self.height = 300 + self.width = 600 + self.colors = None + self.gffFileName = None + self.csvFileName = None + self.yMin = None + self.yMax = None + self.chromosome = None + self.merge = False + self.nbTranscripts = None + + def setInputFile(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + self.parser = chooser.getParser(fileName) + + def setReferenceFile(self, fileName): + if fileName == None: + return + fastaParser = FastaParser(fileName, self.verbosity) + self.chromosomes = fastaParser.getRegions() + self.sizes = dict([region, fastaParser.getSizeOfRegion(region)] for region in self.chromosomes) + self.maxSize = max(self.sizes.values()) + + def setRegion(self, chromosome, start, end): + if chromosome == None: + return + self.maxSize = options.end + self.sizes = {chromosome: end} + self.chromosomes = [chromosome] + self.chromosome = chromosome + self.start = start + self.end = end + + def setOutputFile(self, fileName): + self.outputFileName = fileName + + def setNbBins(self, nbBins): + self.nbBins = nbBins + + def set2Strands(self, twoStrands): + self.twoStrands = twoStrands + + def setNames(self, names): + self.names = names + + def setAverage(self, average): + self.average = average + + def setNormalization(self, normalization): + self.normalization = normalization + + def setImageSize(self, height, width): + self.height = height + self.width = width + + def setYLimits(self, yMin, yMax): + self.yMin = yMin + self.yMax = yMax + + def setColors(self, colors): + self.colors = colors + + def writeGff(self, fileName): + self.gffFileName = fileName + + def writeCsv(self, fileName): + self.csvFileName = fileName + + def mergePlots(self, merge): + self.merge = merge + + def _estimateSizes(self): + progress = UnlimitedProgress(10000, "Reading input for chromosome size estimate", self.verbosity) + self.sizes = {} + for self.nbTranscripts, transcript in enumerate(self.parser.getIterator()): + chromosome = transcript.getChromosome() + start = transcript.getStart() + self.sizes[chromosome] = max(start, self.sizes.get(chromosome, 0)) + progress.inc() + progress.done() + + def _computeSliceSize(self): + if self.nbBins == 0: + return + tmp1 = int(max(self.sizes.values()) / float(self.nbBins)) + tmp2 = 10 ** (len("%d" % (tmp1))-2) + self.sliceSize = max(1, int((tmp1 / tmp2) * tmp2)) + if self.verbosity > 0: + print "choosing bin size of %d" % (self.sliceSize) + + def _initBins(self): + self.bins = {} + for chromosome in self.sizes: + self.bins[chromosome] = {} + for name in self.names: + self.bins[chromosome][name] = {} + for strand in TWOSTRANDS[self.twoStrands]: + if self.nbBins == 0: + self.bins[chromosome][name][strand] = {} + else: + self.bins[chromosome][name][strand] = dict([(i * self.sliceSize + 1, 0.0) for i in range(self.start / self.sliceSize, self.sizes[chromosome] / self.sliceSize + 1)]) + + def _populateBins(self): + if self.nbTranscripts == None: + progress = UnlimitedProgress(10000, "Counting data", self.verbosity) + else: + progress = Progress(self.nbTranscripts, "Counting data", self.verbosity) + for transcript in self.parser.getIterator(): + if transcript.__class__.__name__ == "Mapping": + transcript = transcript.getTranscript() + progress.inc() + chromosome = transcript.getChromosome() + start = transcript.getStart() + if self.chromosome and (chromosome != self.chromosome or start < self.start or start > self.end): + continue + strand = transcript.getDirection() if self.twoStrands else 0 + if self.nbBins != 0: + bin = (start / self.sliceSize) * self.sliceSize + 1 + else: + bin = start + for name in self.names: + value = float(transcript.tags.get(name, 1)) + self.bins[chromosome][name][strand][bin] = self.bins[chromosome][name][strand].get(bin, 0) + value + self.nbValues[name] = self.nbValues.get(name, 0) + value + progress.done() + + def _normalize(self): + average = float(sum(self.nbValues)) / len(self.nbValues.keys()) + factors = dict([name, float(average) / self.nbValues[name]] for name in self.nbValues) + for chromosome in self.bins: + for name in self.bins[chromosome]: + for strand in self.bins[chromosome][name]: + for bin in self.bins[chromosome][name][strand]: + self.bins[chromosome][name][strand][bin] *= factors[name] + + def _computeAverage(self): + for chromosome in self.bins: + for name in self.bins[chromosome]: + for strand in self.bins[chromosome][name]: + for bin in self.bins[chromosome][name][strand]: + self.bins[chromosome][name][strand][bin] = float(self.bins[chromosome][name][strand][bin]) / self.sliceSize + + def _getPlotter(self, chromosome): + plot = RPlotter("%s_%s.png" % (os.path.splitext(self.outputFileName)[0], chromosome), self.verbosity) + plot.setImageSize(self.width, self.height) + if self.sizes[chromosome] <= 1000: + unit = "nt." + ratio = 1.0 + elif self.sizes[chromosome] <= 1000000: + unit = "kb" + ratio = 1000.0 + else: + unit = "Mb" + ratio = 1000000.0 + if self.yMin != None: + plot.setMinimumY(self.yMin) + if self.yMax != None: + plot.setMaximumY(self.yMax) + plot.setXLabel("Position on %s (in %s)" % (chromosome.replace("_", " "), unit)) + plot.setLegend(True) + for i, name in enumerate(self.bins[chromosome]): + for strand in self.bins[chromosome][name]: + fullName = "%s %s" % (name.replace("_", " ")[:6], STRANDTOSTR[strand]) + factor = 1 if strand == 0 else strand + correctedLine = dict([(key / ratio, value * factor) for key, value in self.bins[chromosome][name][strand].iteritems()]) + plot.addLine(correctedLine, fullName, self.colors[i] if self.colors else None) + return plot + + def _plot(self): + if self.merge: + multiplePlot = MultipleRPlotter(self.outputFileName, self.verbosity) + multiplePlot.setImageSize(self.width, self.height * len(self.bins.keys())) + progress = Progress(len(self.bins.keys()), "Plotting", options.verbosity) + for chromosome in sorted(self.bins.keys()): + plot = self._getPlotter(chromosome) + if self.merge: + multiplePlot.addPlot(plot) + else: + plot.plot() + progress.inc() + if self.merge: + multiplePlot.plot() + progress.done() + + def _writeCsv(self): + if self.verbosity > 1: + print "Writing CSV file..." + csvHandle = open(self.csvFileName, "w") + csvHandle.write("chromosome;tag;strand") + if self.nbBins != 0: + xValues = range(self.start / self.sliceSize, max(self.sizes.values()) / self.sliceSize + 1) + for value in xValues: + csvHandle.write(";%d-%d" % (value * self.sliceSize + 1, (value+1) * self.sliceSize)) + csvHandle.write("\n") + else: + xValues = [] + for chromosome in self.bins: + for name in self.bins[chromosome]: + for strand in self.bins[chromosome][name]: + for bin in self.bins[chromosome][name][strand]: + xValues.extend(self.bins[chromosome][name][strand].keys()) + xValues = sorted(list(set(xValues))) + for value in xValues: + csvHandle.write(";%d" % (value)) + csvHandle.write("\n") + for chromosome in self.bins: + csvHandle.write("%s" % (chromosome)) + for name in self.bins[chromosome]: + csvHandle.write(";%s" % (name)) + for strand in self.bins[chromosome][name]: + csvHandle.write(";%s" % (STRANDTOSTR[strand])) + for bin in xValues: + csvHandle.write(";%.2f" % (self.bins[chromosome][name][strand].get(bin, 0))) + csvHandle.write("\n") + csvHandle.write(";") + csvHandle.write(";") + csvHandle.close() + if self.verbosity > 1: + print "...done" + + def _writeGff(self): + if self.verbosity > 1: + print "Writing GFF file..." + writer = Gff3Writer(self.gffFileName, self.verbosity) + cpt = 1 + for chromosome in self.bins: + for name in self.bins[chromosome]: + for strand in self.bins[chromosome][name]: + for bin in self.bins[chromosome][name][strand]: + transcript = Transcript() + transcript.setChromosome(chromosome) + transcript.setStart(bin) + if self.nbBins > 0: + transcript.setEnd(bin + self.sliceSize) + else: + transcript.setEnd(start) + transcript.setDirection(1 if strand == 0 else strand) + transcript.setTagValue("ID", "region%d" % (cpt)) + cpt += 1 + writer.write() + if self.verbosity > 1: + print "...done" + + def run(self): + if self.sizes == None: + self._estimateSizes() + self._computeSliceSize() + self._initBins() + self._populateBins() + if self.normalization: + self._normalize() + if self.average: + self._computeAverage() + self._plot() + if self.csvFileName != None: + self._writeCsv() + if self.gffFileName != None: + self._writeGff() + + +if __name__ == "__main__": + + description = "Get Distribution v1.0.2: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the input file [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]") + parser.add_option("-r", "--reference", dest="referenceFileName", action="store", default=None, type="string", help="file containing the genome [format: file in FASTA format]") + parser.add_option("-b", "--nbBins", dest="nbBins", action="store", default=1000, type="int", help="number of bins [default: 1000] [format: int]") + parser.add_option("-2", "--bothStrands", dest="bothStrands", action="store_true", default=False, help="plot one curve per strand [format: bool] [default: false]") + parser.add_option("-c", "--chromosome", dest="chromosome", action="store", default=None, type="string", help="plot only a chromosome [format: string]") + parser.add_option("-s", "--start", dest="start", action="store", default=None, type="int", help="start from a given region [format: int]") + parser.add_option("-e", "--end", dest="end", action="store", default=None, type="int", help="end from a given region [format: int]") + parser.add_option("-y", "--yMin", dest="yMin", action="store", default=None, type="int", help="minimum value on the y-axis to plot [format: int]") + parser.add_option("-Y", "--yMax", dest="yMax", action="store", default=None, type="int", help="maximum value on the y-axis to plot [format: int]") + parser.add_option("-x", "--csv", dest="csv", action="store", default=None, help="write a .csv file [format: output file in CSV format] [default: None]") + parser.add_option("-g", "--gff", dest="gff", action="store", default=None, help="also write GFF3 file [format: output file in GFF format] [default: None]") + parser.add_option("-H", "--height", dest="height", action="store", default=300, type="int", help="height of the graphics [format: int] [default: 300]") + parser.add_option("-W", "--width", dest="width", action="store", default=600, type="int", help="width of the graphics [format: int] [default: 1000]") + parser.add_option("-a", "--average", dest="average", action="store_true", default=False, help="plot average (instead of sum) [default: false] [format: boolean]") + parser.add_option("-n", "--names", dest="names", action="store", default="nbElements", type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]") + parser.add_option("-l", "--color", dest="colors", action="store", default=None, type="string", help="color of the lines (separated by commas and no space) [format: string]") + parser.add_option("-z", "--normalize", dest="normalize", action="store_true", default=False, help="normalize data (when panels are different) [format: bool] [default: false]") + parser.add_option("-m", "--merge", dest="mergePlots", action="store_true", default=False, help="merge all plots in one figure [format: bool] [default: false]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [default: 1] [format: int]") + (options, args) = parser.parse_args() + + gt = GetDistribution(options.verbosity) + gt.setInputFile(options.inputFileName, options.format) + gt.setOutputFile(options.outputFileName) + gt.setReferenceFile(options.referenceFileName) + gt.setNbBins(int(options.nbBins)) + gt.set2Strands(options.bothStrands) + gt.setRegion(options.chromosome, options.start, options.end) + gt.setNormalization(options.normalize) + gt.setAverage(options.average) + gt.setYLimits(options.yMin, options.yMax) + gt.writeCsv(options.csv) + gt.writeGff(options.gff) + gt.setImageSize(options.height, options.width) + gt.setNames(options.names.split(",")) + gt.setColors(None if options.colors == None else options.colors.split(",")) + gt.setNormalization(options.normalize) + gt.mergePlots(options.mergePlots) + gt.run() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/GetFlanking.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,231 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.TranscriptWriter import TranscriptWriter +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.structure.Interval import Interval +from SMART.Java.Python.misc.Progress import Progress + +QUERY = 0 +REFERENCE = 1 +INPUTS = (QUERY, REFERENCE) +STRANDS = (-1, 1) +TAG_DISTANCE = "distance_" +TAG_SENSE = "_sense" +TAG_REGION = "_region" +TAGS_REGION = {-1: "_upstream", 0: "", 1: "_downstream"} +TAGS_RREGION = {-1: "upstream", 0: "overlapping", 1: "downstream"} +TAGS_SENSE = {-1: "antisense", 0: "", 1: "colinear"} +STRANDSTOSTR = {-1: "(-)", 0: "", 1: "(+)"} + + +def getOrderKey(transcript, direction): + if direction == 1: + return transcript.getEnd() + return - transcript.getStart() + +def isInGoodRegion(transcriptRef, transcriptQuery, direction): + if direction == 1: + return transcriptQuery.getEnd() > transcriptRef.getEnd() + return transcriptQuery.getStart() < transcriptRef.getStart() + + +class GetFlanking(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.transcripts = dict([id, {}] for id in INPUTS) + self.directions = [] + self.noOverlap = False + self.colinear = False + self.antisense = False + self.distance = None + self.minDistance = None + self.maxDistance = None + self.tagName = "flanking" + + def setInputFile(self, fileName, format, id): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + parser = chooser.getParser(fileName) + for transcript in parser.getIterator(): + chromosome = transcript.getChromosome() + if chromosome not in self.transcripts[id]: + self.transcripts[id][chromosome] = [] + self.transcripts[id][chromosome].append(transcript) + + def setOutputFile(self, fileName): + self.writer = TranscriptWriter(fileName, "gff3", self.verbosity) + + def addUpstreamDirection(self, upstream): + if upstream: + self.directions.append(-1) + + def addDownstreamDirection(self, downstream): + if downstream: + self.directions.append(1) + + def setColinear(self, colinear): + self.colinear = colinear + + def setAntisense(self, antisense): + self.antisense = antisense + + def setNoOverlap(self, noOverlap): + self.noOverlap = noOverlap + + def setMinDistance(self, distance): + self.minDistance = distance + + def setMaxDistance(self, distance): + self.maxDistance = distance + + def setNewTagName(self, tagName): + self.tagName = tagName + + def match(self, transcriptRef, transcriptQuery, direction): + if self.noOverlap and transcriptRef.overlapWith(transcriptQuery): + return False + if self.colinear and transcriptRef.getDirection() != transcriptQuery.getDirection(): + return False + if self.antisense and transcriptRef.getDirection() == transcriptQuery.getDirection(): + return False + if self.minDistance != None or self.maxDistance != None: + distance = transcriptRef.getDistance(transcriptQuery) + if self.minDistance != None and distance < self.minDistance: + return False + if self.maxDistance != None and distance > self.maxDistance: + return False + return True + + def getFlanking(self, direction): + for chromosome in sorted(self.transcripts[REFERENCE].keys()): + if chromosome not in self.transcripts[QUERY]: + continue + sortedTranscripts = dict([id, {}] for id in INPUTS) + for id in INPUTS: + sortedTranscripts[id] = sorted(self.transcripts[id][chromosome], key = lambda t: getOrderKey(t, direction)) + refIndex = 0 + currentRefs = [] + outputs = set() + progress = Progress(len(sortedTranscripts[QUERY]), "Reading chr %s %s" % (chromosome, STRANDSTOSTR[direction]), self.verbosity) + for query in sortedTranscripts[QUERY]: + while refIndex < len(sortedTranscripts[REFERENCE]) and isInGoodRegion(sortedTranscripts[REFERENCE][refIndex], query, direction): + currentRefs.append(sortedTranscripts[REFERENCE][refIndex]) + refIndex += 1 + nextCurrentRefs = [] + for currentRef in currentRefs: + if self.match(currentRef, query, direction): + if currentRef not in self.flankings: + self.flankings[currentRef] = {} + self.flankings[currentRef][direction * currentRef.getDirection()] = query + else: + nextCurrentRefs.append(currentRef) + currentRefs = nextCurrentRefs + progress.inc() + progress.done() + + def setTags(self, query, reference, direction): + refName = reference.getTagValue("ID") + if refName == None: + refName = reference.getName() + if refName == None: + refName = reference.__str__() + query.setTagValue("%s%s" % (self.tagName, TAGS_REGION[direction]), refName) + query.setTagValue("%s_%s%s" % (TAG_DISTANCE, self.tagName, TAGS_REGION[direction]), query.getDistance(reference)) + if direction == 0: + query.setTagValue("%s_%s" % (TAG_SENSE, self.tagName), TAGS_SENSE[query.getDirection() * reference.getDirection()]) + query.setTagValue("%s_%s" % (TAG_REGION, self.tagName), TAGS_RREGION[cmp(query.getRelativeDistance(reference), 0)]) + for tag in reference.getTagNames(): + if tag not in ("quality", "feature"): + query.setTagValue("%s%s_%s" % (self.tagName, TAGS_REGION[direction], tag), reference.getTagValue(tag)) + return query + + def write(self): + outputs = set() + progress = Progress(len(self.flankings.keys()), "Printing data", self.verbosity) + for transcriptRef in self.flankings.keys(): + if self.directions: + for direction in self.directions: + if direction in self.flankings[transcriptRef]: + query = self.flankings[transcriptRef][direction] + outputs.add(self.setTags(query, transcriptRef, direction)) + else: + if self.flankings[transcriptRef]: + query = sorted(self.flankings[transcriptRef].values(), key = lambda query: query.getDistance(transcriptRef))[0] + outputs.add(self.setTags(query, transcriptRef, 0)) + progress.inc() + for transcript in sorted(list(outputs), key = lambda flanking: (flanking.getChromosome(), flanking.getStart(), flanking.getEnd())): + self.writer.addTranscript(transcript) + self.writer.close() + progress.done() + + def run(self): + self.flankings = {} + for direction in STRANDS: + self.getFlanking(direction) + self.write() + +if __name__ == "__main__": + + description = "Get Flanking v1.0.1: Get the flanking regions of a set of reference. [Category: Data Selection]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-5", "--upstream", dest="upstream", action="store_true", default=False, help="output upstream elements [format: boolean] [default: False]") + parser.add_option("-3", "--downstream", dest="downstream", action="store_true", default=False, help="output downstream elements [format: boolean] [default: False]") + parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="find first colinear element [format: boolean] [default: False]") + parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="find first anti-sense element [format: boolean] [default: False]") + parser.add_option("-e", "--noOverlap", dest="noOverlap", action="store_true", default=False, help="do not consider elements which are overlapping reference elements [format: boolean] [default: False]") + parser.add_option("-d", "--minDistance", dest="minDistance", action="store", default=None, type="int", help="minimum distance between 2 elements [format: int]") + parser.add_option("-D", "--maxDistance", dest="maxDistance", action="store", default=None, type="int", help="maximum distance between 2 elements [format: int]") + parser.add_option("-t", "--tag", dest="tagName", action="store", default="flanking", type="string", help="name of the new tag [format: string] [default: flanking]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + gf = GetFlanking(options.verbosity) + gf.setInputFile(options.inputFileName1, options.format1, QUERY) + gf.setInputFile(options.inputFileName2, options.format2, REFERENCE) + gf.setOutputFile(options.outputFileName) + gf.addUpstreamDirection(options.upstream) + gf.addDownstreamDirection(options.downstream) + gf.setColinear(options.colinear) + gf.setAntisense(options.antisense) + gf.setNoOverlap(options.noOverlap) + gf.setMinDistance(options.minDistance) + gf.setMaxDistance(options.maxDistance) + gf.setNewTagName(options.tagName) + gf.run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/GetRandomSubset.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,96 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2011 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import random +from optparse import OptionParser +from commons.core.parsing.ParserChooser import ParserChooser +from commons.core.writer.TranscriptWriter import TranscriptWriter +from SMART.Java.Python.structure.Transcript import Transcript +from SMART.Java.Python.misc.Progress import Progress + +class GetRandomSubset(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + + def setInputFile(self, fileName, format): + chooser = ParserChooser(self.verbosity) + chooser.findFormat(format) + self.parser = chooser.getParser(fileName) + + def setNumber(self, number, percent): + if number != None: + self.number = number + elif percent != None: + self.number = int(float(percent) / 100 * self.parser.getNbTranscripts()) + else: + raise Exception("Error! Number of elements to output is not given!") + + def setOutputFile(self, fileName): + self.writer = TranscriptWriter(fileName, "gff3", self.verbosity) + + def chooseElements(self): + self.randomIndices = random.sample(range(self.parser.getNbTranscripts()), self.number) + + def run(self): + self.chooseElements() + progress = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity) + nbWritten = 0 + for cpt1, transcript in enumerate(self.parser.getIterator()): + if cpt1 in self.randomIndices: + self.writer.addTranscript(transcript) + nbWritten += 1 + progress.inc() + self.writer.write() + self.writer.close() + progress.done() + if self.verbosity > 1: + print "%d transcripts read" % (self.parser.getNbTranscripts()) + print "%d transcripts written" % (nbWritten) + + +if __name__ == "__main__": + + description = "Get Random Subset v1.0.1: Get a random sub-set of a list of genomic coordinates. [Category: Personal]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of file [compulsory] [format: transcript file format]") + parser.add_option("-n", "--number", dest="number", action="store", default=None, type="string", help="number of elements to output [format: int]") + parser.add_option("-p", "--percent", dest="percent", action="store", default=None, type="string", help="percentage of elements to output (between 0 and 100) [format: int]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + + grs = GetRandomSubset(options.verbosity) + grs.setInputFile(options.inputFileName, options.format) + grs.setNumber(options.number, options.percent) + grs.setOutputFile(options.outputFileName) + grs.run()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/SMART/Java/Python/GetReadDistribution.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,283 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements i