#!/usr/local/bin/perl -w
# Daniel Brami
# Util to split interlaced FASTQ files into pairs
use strict;
# Standard lib
use IO::File;
use File::Basename;
my $INPUT=shift;
if (!(defined ($INPUT)) || ($INPUT =~ '^\-')){
die "Usage: $0 <interleaved paired FASTQ file>\n";
}
my ($name,$path,$suffix) = fileparse($INPUT, qw/fastq FASTQ txt TXT/);
my $FH_IN = new IO::File($INPUT, "r") or die "could not open $INPUT: $!\n";
my $FH_OUT1 = new IO::File($name."split1.$suffix", "w") or die "could not open $name.split1$suffix for writing: $!\n";
my $FH_OUT2 = new IO::File($name."split2.$suffix", "w") or die "could not open $name.split2$suffix for writing: $!\n";
my ($recs1, $recs2)= (0,0);
my $flipflop = 1;
my $counter = 0;
my ($line, $TXT);
while($line = $FH_IN->getline()){
$TXT .= $line;
$counter++;
if($counter == 4){
if($flipflop == 1){
print $FH_OUT1 $TXT;
++$recs1;
}else{
print $FH_OUT2 $TXT;
++$recs2;
}
$counter = 0;
$TXT = '';
$flipflop *= -1;
}
}
$FH_IN->close();
$FH_OUT1->close();
$FH_OUT2->close();
print STDERR "Processed $recs1 records for pair file 1 and $recs2 records for pair file 2.\n";
if($recs1 != $recs2){
print STDERR "The number of processed records does not match - check input data!";
}exit;
Wednesday, March 23, 2011
Split fastq mates
Here is code in perl to split a single paired interleaved FASTQ file into a pair of FASTQ files:
Quality trimming fastq files using CLC Bio cell
- copy the mandatory license.properties file to your working directory
- /usr/local/packages/clc-ngs-cell/quality_trim -o
-p -c -l -m -r -i & - Optional: Split the resulting interleaved paired file into two components:
~/workspace/meta_scripts/splitMatedFastq.pl
Subscribe to:
Posts (Atom)