0
|
1 #!usr/bin/pyhton3
|
|
2 '''<cut_fr.py is script allwing more convenient use of cut in galaxy environment>
|
|
3 Copyright (C) <2015> <Lannes Romain>
|
|
4
|
|
5 This program is free software: you can redistribute it and/or modify
|
|
6 it under the terms of the GNU General Public License as published by
|
|
7 the Free Software Foundation, either version 3 of the License, or
|
|
8 (at your option) any later version.
|
|
9
|
|
10 This program is distributed in the hope that it will be useful,
|
|
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 GNU General Public License for more details.
|
|
14
|
|
15 You should have received a copy of the GNU General Public License
|
|
16 along with this program. If not, see <http://www.gnu.org/licenses/>.'''
|
|
17
|
|
18 #-------- import
|
|
19 import os
|
|
20 import argparse
|
|
21 import sys
|
|
22
|
|
23 #------- argument
|
|
24 parser = argparse.ArgumentParser(description = "renvoie le fichier inputs moins les collones sellectionner")
|
|
25 parser.add_argument('--c', help = "collumn to be removed" )
|
|
26 parser.add_argument('--i', help = "input datasets" )
|
|
27 parser.add_argument('--type_cut', help = "type of cut" )
|
|
28 parser.add_argument('--extend_print', help = "add sdtrout print" )
|
|
29 parser.add_argument('--o', help = "output datasets" )
|
|
30
|
|
31 args = parser.parse_args()
|
|
32
|
|
33
|
|
34 if(args.type_cut == "exclude"):
|
|
35 #------- First step count number of column
|
|
36 dataset_initial = open(args.i,'r')
|
|
37 firstline_dataset = dataset_initial.readline()
|
|
38
|
|
39 column_header = firstline_dataset.split('\t')
|
|
40 number_column = len(column_header)
|
|
41
|
|
42 # Collumn are in foramt c1,c2,c3
|
|
43 # need to get them
|
|
44 column_list = args.c
|
|
45 column_list = column_list.split(',')
|
|
46
|
|
47 # remove the letter c
|
|
48 # there is certainly a more convenient way to do it
|
|
49 for i in range(len(column_list)):
|
|
50 column_list[i] = int(column_list[i][-1])
|
|
51
|
|
52 # Second Step prepare the command to be printed
|
|
53 str_argument = "-f"
|
|
54 for i in range( 1,number_column +1 ,1 ):
|
|
55 if i not in column_list:
|
|
56 str_argument = str_argument + str(i) + ','
|
|
57 str_argument = str_argument.strip(',')
|
|
58
|
|
59 # Create the command to be launch
|
|
60 str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o
|
|
61
|
|
62 if(args.type_cut == "include"):
|
|
63 # Collumn are in foramt c1,c2,c3
|
|
64 # need to get them
|
|
65 column_list = args.c
|
|
66 column_list = column_list.split(',')
|
|
67 # remove the letter c
|
|
68 # there is certainly a more convenient way to do it
|
|
69 for i in range(len(column_list)):
|
|
70 column_list[i] = int(column_list[i][-1])
|
|
71 # Second Step prepare the command to be printed
|
|
72 str_argument = "-f"
|
|
73 for indice in column_list :
|
|
74 str_argument = str_argument + str(indice) + ','
|
|
75 str_argument = str_argument.strip(',')
|
|
76 # Create the command to be launch
|
|
77 str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o
|
|
78
|
|
79 # launch the command
|
|
80 result = os.system( str_cut )
|
|
81 if (args.extend_print == "True"):
|
|
82 sys.stdout.write("input dataset : " + args.i + '\n')
|
|
83 sys.stdout.write("column selected : " + args.c + '\n')
|
|
84 sys.stdout.write("cut_type : " + args.type_cut+'\n')
|
|
85 sys.stdout.write("output dataset : " + args.o+'\n')
|
|
86 sys.stdout.write("command launch : " + str_cut+'\n' )
|
|
87
|
|
88
|
|
89
|
|
90
|
|
91
|
|
92
|
|
93
|