view cut_fr.py @ 1:59bd6cbbeb92 draft

Uploaded
author r-lannes
date Wed, 16 Dec 2015 08:46:07 -0500
parents 93f267c26057
children
line wrap: on
line source

#!usr/bin/pyhton3
 '''<cut_fr.py is script allwing more convenient use of cut in galaxy environment>
    Copyright (C) <2015>  <Lannes Romain>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.'''

#-------- import
import os 
import argparse
import sys

#------- argument
parser = argparse.ArgumentParser(description = "renvoie le fichier inputs moins les collones sellectionner")
parser.add_argument('--c', help = "collumn to be removed" )
parser.add_argument('--i', help = "input datasets" )
parser.add_argument('--type_cut', help = "type of cut" )
parser.add_argument('--extend_print', help = "add sdtrout print" )
parser.add_argument('--o', help = "output datasets" )

args = parser.parse_args()


if(args.type_cut == "exclude"):
	#------- First step count number of column
	dataset_initial = open(args.i,'r')
	firstline_dataset = dataset_initial.readline()

	column_header = firstline_dataset.split('\t')
	number_column = len(column_header)

	# Collumn are in foramt c1,c2,c3
	# need to get them 
	column_list = args.c
	column_list = column_list.split(',')

	# remove the letter c
	# there is certainly a more convenient way to do it
	for i in range(len(column_list)):
		column_list[i] = int(column_list[i][-1])

	# Second Step prepare the command to be printed
	str_argument = "-f"
	for i in range( 1,number_column +1 ,1 ):
		if i not in column_list:
			str_argument = str_argument + str(i) + ','
	str_argument = str_argument.strip(',')

	# Create the command to be launch
	str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o

if(args.type_cut == "include"):
	# Collumn are in foramt c1,c2,c3
	# need to get them 
	column_list = args.c
	column_list = column_list.split(',')
	# remove the letter c
	# there is certainly a more convenient way to do it 
	for i in range(len(column_list)):
		column_list[i] = int(column_list[i][-1])
	# Second Step prepare the command to be printed
	str_argument = "-f"
	for indice in column_list :
		str_argument = str_argument + str(indice) + ','
	str_argument = str_argument.strip(',')
	# Create the command to be launch
	str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o

# launch the command
result = os.system( str_cut )
if (args.extend_print == "True"):
	sys.stdout.write("input dataset : " + args.i + '\n')
	sys.stdout.write("column selected : " + args.c + '\n')
	sys.stdout.write("cut_type : " + args.type_cut+'\n')
	sys.stdout.write("output dataset : " + args.o+'\n')
	sys.stdout.write("command launch : " + str_cut+'\n' )