Mercurial > repos > davidvanzessen > imgt_concatenate

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/imgt_concatenate.sh	Mon Aug 29 05:46:28 2016 -0400
@@ -0,0 +1,72 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+args=("$@")
+output=$1
+inputs=("${args[@]:1}")
+
+workdir="$PWD"
+
+echo "Output: $output"
+echo "Inputs: ${inputs[@]}"
+
+mkdir "$workdir/output"
+
+function imgt_unpack {
+	local imgt_zip=$1
+	local outdir=$2
+	if [ ! -d "$outdir" ]; then
+		mkdir "$outdir"
+	fi
+	local type="`file $imgt_zip`"
+	if [[ "$type" == *"Zip archive"* ]] ; then
+		unzip $imgt_zip -d $outdir
+	elif [[ "$type" == *"XZ compressed data"* ]] ; then
+		mkdir -p $outdir
+		echo "tar -xJf $imgt_zip -C $outdir"
+		tar -xJf $imgt_zip -C $outdir
+	fi
+}
+
+function concat_imgt_files {
+	indir=$1
+	outdir=$2
+	start_line=$3 #line # to start at, 2 to skip header
+	cat `find $indir/ -name "1_*"` | tail -n+${start_line} >> "$outdir/1_Summary.txt"
+	cat `find $indir/ -name "2_*"` | tail -n+${start_line} >> "$outdir/2_IMGT-gapped-nt-sequences.txt"
+	cat `find $indir/ -name "3_*"` | tail -n+${start_line} >> "$outdir/3_Nt-sequences.txt"
+	cat `find $indir/ -name "4_*"` | tail -n+${start_line} >> "$outdir/4_IMGT-gapped-AA-sequences.txt"
+	cat `find $indir/ -name "5_*"` | tail -n+${start_line} >> "$outdir/5_AA-sequences.txt"
+	cat `find $indir/ -name "6_*"` | tail -n+${start_line} >> "$outdir/6_Junction.txt"
+	cat `find $indir/ -name "7_*"` | tail -n+${start_line} >> "$outdir/7_V-REGION-mutation-and-AA-change-table.txt"
+	cat `find $indir/ -name "8_*"` | tail -n+${start_line} >> "$outdir/8_V-REGION-nt-mutation-statistics.txt"
+	cat `find $indir/ -name "9_*"` | tail -n+${start_line} >> "$outdir/9_V-REGION-AA-change-statistics.txt"
+	cat `find $indir/ -name "10_*"` | tail -n+${start_line} >> "$outdir/10_V-REGION-mutation-hotspots.txt"
+}
+
+echo "Unpacking IMGT file 1.."
+imgt_unpack ${inputs[0]} "$workdir/input1"
+
+echo "Concatenating IMGT file 1..."
+concat_imgt_files "$workdir/input1" "$workdir/output" 1
+
+remaining_inputs=("${inputs[@]:1}")
+
+i="2"
+for input in "${remaining_inputs[@]}"
+do
+	echo "Unpacking IMGT file $i.."
+	current_dir="$workdir/input${i}"
+	imgt_unpack "${input}" "${current_dir}"
+	echo "Concatenating IMGT file $1..."
+	concat_imgt_files "${current_dir}" "$workdir/output" 2
+	i=$((i+1))
+done
+
+echo "Creating new IMGT zip"
+cd "$workdir/output"
+tar cfJ "$output" *
+
+#awk to fix the sequence numbers repeating?
+
+echo "Done"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/imgt_concatenate.xml	Mon Aug 29 05:46:28 2016 -0400
@@ -0,0 +1,25 @@
+<tool id="imgt_concatenate" name="IMGT Concatenate" version="1.0">
+	<description> </description>
+	<command interpreter="bash">
+		imgt_concatenate.sh $out_file
+#for $i, $f in enumerate($files)
+ "$f.file"
+#end for
+	</command>
+	<inputs>
+		<repeat name="files" title="Sample" min="2" default="2">
+			<param name="file" format="peptideshaker_archive" type="data" label="IMGT zip file" />
+		</repeat>
+		<param name="label" type="text" label="A name for the new dataset" size="25">
+			<validator type="length" message="Fill in a name for the new dataset" min="1" />
+		</param>
+	</inputs>
+	<outputs>
+		<data format="peptideshaker_archive" name="out_file" label="${label}"/>
+	</outputs>
+	<help>
+Concatenate two or more IMGT files to create one new IMGT zip with all the sequences of the inputs.
+
+	</help>
+
+</tool>