Mercurial > repos > davidvanzessen > imgt_concatenate
changeset 1:b360a373835f draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 24 Nov 2016 10:27:14 -0500 |
parents | d3cf09f5a1a6 |
children | d77d4700fd0a |
files | imgt_concatenate.sh imgt_concatenate.xml |
diffstat | 2 files changed, 69 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/imgt_concatenate.sh Mon Aug 29 05:46:28 2016 -0400 +++ b/imgt_concatenate.sh Thu Nov 24 10:27:14 2016 -0500 @@ -32,37 +32,43 @@ indir=$1 outdir=$2 start_line=$3 #line # to start at, 2 to skip header - cat `find $indir/ -name "1_*"` | tail -n+${start_line} >> "$outdir/1_Summary.txt" - cat `find $indir/ -name "2_*"` | tail -n+${start_line} >> "$outdir/2_IMGT-gapped-nt-sequences.txt" - cat `find $indir/ -name "3_*"` | tail -n+${start_line} >> "$outdir/3_Nt-sequences.txt" - cat `find $indir/ -name "4_*"` | tail -n+${start_line} >> "$outdir/4_IMGT-gapped-AA-sequences.txt" - cat `find $indir/ -name "5_*"` | tail -n+${start_line} >> "$outdir/5_AA-sequences.txt" - cat `find $indir/ -name "6_*"` | tail -n+${start_line} >> "$outdir/6_Junction.txt" - cat `find $indir/ -name "7_*"` | tail -n+${start_line} >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" - cat `find $indir/ -name "8_*"` | tail -n+${start_line} >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" - cat `find $indir/ -name "9_*"` | tail -n+${start_line} >> "$outdir/9_V-REGION-AA-change-statistics.txt" - cat `find $indir/ -name "10_*"` | tail -n+${start_line} >> "$outdir/10_V-REGION-mutation-hotspots.txt" + id=$4 + cat `find $indir/ -name "1_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/1_Summary.txt" + cat `find $indir/ -name "2_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/2_IMGT-gapped-nt-sequences.txt" + cat `find $indir/ -name "3_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/3_Nt-sequences.txt" + cat `find $indir/ -name "4_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/4_IMGT-gapped-AA-sequences.txt" + cat `find $indir/ -name "5_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/5_AA-sequences.txt" + cat `find $indir/ -name "6_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/6_Junction.txt" + cat `find $indir/ -name "7_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt" + cat `find $indir/ -name "8_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/8_V-REGION-nt-mutation-statistics.txt" + cat `find $indir/ -name "9_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/9_V-REGION-AA-change-statistics.txt" + cat `find $indir/ -name "10_*"` | tail -n+${start_line} | awk -v id=$id '{ if($1 !~ /^header/) {$2=$2id; } print}' >> "$outdir/10_V-REGION-mutation-hotspots.txt" } echo "Unpacking IMGT file 1.." imgt_unpack ${inputs[0]} "$workdir/input1" echo "Concatenating IMGT file 1..." -concat_imgt_files "$workdir/input1" "$workdir/output" 1 +id=${inputs[1]} +concat_imgt_files "$workdir/input1" "$workdir/output" 1 $id -remaining_inputs=("${inputs[@]:1}") +remaining_inputs=("${inputs[@]:2}") -i="2" -for input in "${remaining_inputs[@]}" -do - echo "Unpacking IMGT file $i.." +i="0" +while [ $i -lt ${#remaining_inputs[@]} ]; do + j=$((i+1)) + input="${remaining_inputs[$i]}" + id="${remaining_inputs[$j]}" + + echo "Unpacking IMGT file $j.." current_dir="$workdir/input${i}" imgt_unpack "${input}" "${current_dir}" echo "Concatenating IMGT file $1..." - concat_imgt_files "${current_dir}" "$workdir/output" 2 - i=$((i+1)) + concat_imgt_files "${current_dir}" "$workdir/output" 2 $id + i=$((i+2)) done + echo "Creating new IMGT zip" cd "$workdir/output" tar cfJ "$output" * @@ -70,3 +76,17 @@ #awk to fix the sequence numbers repeating? echo "Done" + +exit 0 + +i="1" +for input in "${remaining_inputs[@]}" +do + echo "Unpacking IMGT file $i.." + current_dir="$workdir/input${i}" + imgt_unpack "${input}" "${current_dir}" + echo "Concatenating IMGT file $1..." + concat_imgt_files "${current_dir}" "$workdir/output" 2 $id + i=$((i+1)) +done +
--- a/imgt_concatenate.xml Mon Aug 29 05:46:28 2016 -0400 +++ b/imgt_concatenate.xml Thu Nov 24 10:27:14 2016 -0500 @@ -2,16 +2,40 @@ <description> </description> <command interpreter="bash"> imgt_concatenate.sh $out_file -#for $i, $f in enumerate($files) - "$f.file" -#end for +#if str( $with_id.with_id_selector ) == "true": + #for $i, $f in enumerate($with_id.files) + "$f.file" + "$f.id" + #end for +#else + #for $i, $f in enumerate($with_id.files) + "$f.file" + "" + #end for +#end if </command> <inputs> - <repeat name="files" title="Sample" min="2" default="2"> - <param name="file" format="peptideshaker_archive" type="data" label="IMGT zip file" /> - </repeat> + <conditional name="with_id"> + <param name="with_id_selector" type="select" label="Add a file ID to the sequence ID"> + <option value="true">Add file ID to SequenceID to identify the origin file</option> + <option value="false" selected="True">Keep SequenceID as is</option> + </param> + <when value='true'> + <repeat name="files" title="Sample" min="2" default="2"> + <param name="file" format="peptideshaker_archive" type="data" label="IMGT zip file" /> + <param name="id" type="text" label="ID for dataset" size="25"> + <validator type="regex" message="Fill in an ID">.+</validator> + </param> + </repeat> + </when> + <when value='false'> + <repeat name="files" title="Sample" min="2" default="2"> + <param name="file" format="peptideshaker_archive" type="data" label="IMGT zip file" /> + </repeat> + </when> + </conditional> <param name="label" type="text" label="A name for the new dataset" size="25"> - <validator type="length" message="Fill in a name for the new dataset" min="1" /> + <validator type="regex" message="Fill in a name for the new dataset">.+</validator> </param> </inputs> <outputs>