From f69f2174f2ddecdc8f7e58c5755e3370c4d5e203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carn=C3=AB=20Draug?= Date: Fri, 8 Feb 2019 13:45:16 +0000 Subject: [PATCH] Initial commit. Imported from 334b1b593f9f97c1393e997b45e48cb89782007a --- .gitignore | 10 + Changes | 5 + dist.ini | 12 + lib/Bio/Tools/Run/Maq.pm | 650 ++++++++++++++++++++++++++++++++ lib/Bio/Tools/Run/Maq/Config.pm | 335 ++++++++++++++++ t/Maq.t | 196 ++++++++++ t/data/campycoli.fas | 50 +++ t/data/r1.fq | 500 ++++++++++++++++++++++++ t/data/r2.fq | 500 ++++++++++++++++++++++++ 9 files changed, 2258 insertions(+) create mode 100644 .gitignore create mode 100644 Changes create mode 100644 dist.ini create mode 100755 lib/Bio/Tools/Run/Maq.pm create mode 100755 lib/Bio/Tools/Run/Maq/Config.pm create mode 100755 t/Maq.t create mode 100755 t/data/campycoli.fas create mode 100644 t/data/r1.fq create mode 100644 t/data/r2.fq diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..618cad3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +.* +*~ +!.gitignore + +# ignore build +Bio-Tools-Run-Maq-* + +# ignore text editors stuff +.emacs.* +*.komodoproject diff --git a/Changes b/Changes new file mode 100644 index 0000000..bbee184 --- /dev/null +++ b/Changes @@ -0,0 +1,5 @@ +Summary of important user-visible changes for Bio-Tools-Run-Maq +--------------------------------------------------------------- + +{{$NEXT}} + * First release after split from bioperl-run. diff --git a/dist.ini b/dist.ini new file mode 100644 index 0000000..9d62b35 --- /dev/null +++ b/dist.ini @@ -0,0 +1,12 @@ +name = Bio-Tools-Run-Maq +version = 1.7.3 +author = Mark A. Jensen +copyright_holder = Mark A. Jensen +license = Perl_5 + +;; Modules should be fixed so that these don't have to be removed. +[@BioPerl] +-remove = PodCoverageTests +-remove = PodWeaver +-remove = Test::EOL +-remove = Test::NoTabs diff --git a/lib/Bio/Tools/Run/Maq.pm b/lib/Bio/Tools/Run/Maq.pm new file mode 100755 index 0000000..66d3657 --- /dev/null +++ b/lib/Bio/Tools/Run/Maq.pm @@ -0,0 +1,650 @@ +# +# BioPerl module for Bio::Tools::Run::Maq +# +# Please direct questions and support issues to +# +# Cared for by Mark A. Jensen +# +# Copyright Mark A. Jensen +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Run::Maq - Run wrapper for the Maq short-read assembler *BETA* + +=head1 SYNOPSIS + + # create an assembly + $maq_fac = Bio::Tools::Run::Maq->new(); + $maq_assy = $maq_fac->run( 'reads.fastq', 'refseq.fas' ); + # if IO::Uncompress::Gunzip is available... + $maq_assy = $maq_fac->run( 'reads.fastq.gz', 'refseq.gz'); + # paired-end + $maq_assy = $maq_fac->run( 'reads.fastq', 'refseq.fas', 'paired-reads.fastq'); + # be more strict + $maq_fac->set_parameters( -c2q_min_map_quality => 60 ); + $maq_assy = $maq_fac->run( 'reads.fastq', 'refseq.fas', 'paired-reads.fastq'); + + # run maq commands separately + $maq_fac = Bio::Tools::Run::Maq->new( + -command => 'pileup', + -single_end_quality => 1 ); + $maq_fac->run_maq( -bfa => 'refseq.bfa', + -map => 'maq_assy.map', + -txt => 'maq_assy.pup.txt' ); + +=head1 DESCRIPTION + +This module provides a wrapper interface for Heng Li's +reference-directed short read assembly suite C (see +L for manuals and +downloads). + +There are two modes of action. + +=over + +=item * EasyMaq + +The first is a simple pipeline through the C commands, taking +your read data in and squirting out an assembly object of type +L. The pipeline is based on the one performed +by C: + + Action maq commands + ------ ------------ + data conversion to fasta2bfa, fastq2bfq + maq binary formats + + map sequence reads map + to reference seq + + assemble, creating assemble + consensus + + convert map & cns mapview, cns2fq + files to plaintext + (for B:A:IO:maq) + +Command-line options can be directed to the C, C, and +C steps. See L below. + +=item * BigMaq + +The second mode is direct access to C commands. To run a C +command, construct a run factory, specifying the desired command using +the C<-command> argument in the factory constructor, along with +options specific to that command (see L): + + $maqfac->Bio::Tools::Run::Maq->new( -command => 'fasta2bfa' ); + +To execute, use the C methods. Input and output files are +specified in the arguments of C (see L): + + $maqfac->run_maq( -fas => "myref.fas", -bfa => "myref.bfa" ); + +=back + +=head1 OPTIONS + +C is complex, with many subprograms (commands) and command-line +options and file specs for each. This module attempts to provide +commands and options comprehensively. You can browse the choices like so: + + $maqfac = Bio::Tools::Run::Maq->new( -command => 'assemble' ); + # all maq commands + @all_commands = $maqfac->available_parameters('commands'); + @all_commands = $maqfac->available_commands; # alias + # just for assemble + @assemble_params = $maqfac->available_parameters('params'); + @assemble_switches = $maqfac->available_parameters('switches'); + @assemble_all_options = $maqfac->available_parameters(); + +Reasonably mnemonic names have been assigned to the single-letter +command line options. These are the names returned by +C, and can be used in the factory constructor +like typical BioPerl named parameters. + +See L for the gory details. + +=head1 FILES + +When a command requires filenames, these are provided to the C method, not +the constructor (C). To see the set of files required by a command, use +C or the alias C: + + $maqfac = Bio::Tools::Run::Maq->new( -command => 'map' ); + @filespec = $maqfac->filespec; + +This example returns the following array: + + map + bfa + bfq1 + #bfq2 + 2>#log + +This indicates that map (C binary mapfile), bfa (C binary +fasta), and bfq (C binary fastq) files MUST be specified, another +bfq file MAY be specified, and a log file receiving STDERR also MAY be +specified. Use these in the C call like so: + + $maqfac->run_maq( -map => 'my.map', -bfa => 'myrefseq.bfa', + -bfq1 => 'reads1.bfq', -bfq2 => 'reads2.bfq' ); + +Here, the C parameter was unspecified. Therefore, the object will store +the programs STDERR output for you in the C attribute: + + handle_map_warning($maqfac) if ($maqfac->stderr =~ /warning/); + +STDOUT for a run is also saved, in C, unless a file is specified +to slurp it according to the filespec. C STDOUT usually contains useful +information on the run. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion +http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +L + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +the web: + + http://redmine.open-bio.org/projects/bioperl/ + +=head1 AUTHOR - Mark A. Jensen + + Email maj -at- fortinbras -dot- us + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + + +package Bio::Tools::Run::Maq; +use strict; +our $HAVE_IO_UNCOMPRESS; + +BEGIN { + eval {require IO::Uncompress::Gunzip; $HAVE_IO_UNCOMPRESS = 1}; +} + +use IPC::Run; + +# Object preamble - inherits from Bio::Root::Root + +use lib '../../..'; +use Bio::Root::Root; +use Bio::Tools::Run::Maq::Config; +use Bio::Tools::GuessSeqFormat; +use File::Basename qw(fileparse); + +use base qw(Bio::Root::Root Bio::Tools::Run::AssemblerBase ); + +## maq ( from tigr ) +our $program_name = 'maq'; # name of the executable + +# Note: +# other globals required by Bio::Tools::Run::AssemblerBase are +# imported from Bio::Tools::Run::Maq::Config + +our $qual_param = 'quality_file'; +our $use_dash = 1; +our $join = ' '; + +our $asm_format = 'maq'; + +=head2 new() + + Title : new + Usage : my $obj = new Bio::Tools::Run::Maq(); + Function: Builds a new Bio::Tools::Run::Maq object + Returns : an instance of Bio::Tools::Run::Maq + Args : + +=cut + +sub new { + my ($class,@args) = @_; + my $self = $class->SUPER::new(@args); + $self->parameters_changed(1); + $self->_register_program_commands( \@program_commands, \%command_prefixes ); + unless (grep /command/, @args) { + push @args, '-command', 'run'; + } + $self->_set_program_options(\@args, \@program_params, \@program_switches, + \%param_translation, $qual_param, $use_dash, $join); + $self->program_name($program_name) if not defined $self->program_name(); + if ($^O =~ /cygwin/) { + my @kludge = `PATH=\$PATH:/usr/bin:/usr/local/bin which $program_name`; + chomp $kludge[0]; + $self->program_name($kludge[0]); + } + $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI + $self->_assembly_format($asm_format); + return $self; +} + +=head2 run + + Title : run + Usage : $assembly = $maq_assembler->run($read1_fastq_file, + $refseq_fasta_file, + $read2_fastq_file); + Function: Run the maq assembly pipeline. + Returns : Assembly results (file, IO object or Assembly object) + Args : - fastq file containing single-end reads + - fasta file containing the reference sequence + - [optional] fastq file containing paired-end reads + Note : gzipped inputs are allowed if IO::Uncompress::Gunzip + is available + +=cut + +sub run { + my ($self, $rd1_file, $ref_file, $rd2_file) = @_; + + # Sanity checks + $self->_check_executable(); + $rd1_file or $self->throw("Fastq reads file required at arg 1"); + $ref_file or $self->throw("Fasta refseq file required at arg 2"); + # expand gzipped files as nec. + for ($rd1_file, $ref_file, $rd2_file) { + next unless $_; + if (/\.gz[^.]*$/) { + unless ($HAVE_IO_UNCOMPRESS) { + croak( "IO::Uncompress::Gunzip not available, can't expand '$_'" ); + } + my ($tfh, $tf) = $self->io->tempfile; + my $z = IO::Uncompress::Gunzip->new($_); + while (<$z>) { print $tfh $_ } + close $tfh; + $_ = $tf; + } + } + my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$rd1_file); + + $guesser->guess eq 'fastq' or $self->throw("Reads file doesn't look like fastq at arg 1"); + $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$ref_file); + $guesser->guess eq 'fasta' or $self->throw("Refseq file doesn't look like fasta at arg 2"); + if ($rd2_file) { + $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$rd2_file); + $guesser->guess eq 'fastq' or $self->throw("Reads file doesn't look like fastq at arg 3"); + } + + # maq format conversion + ($rd1_file, $ref_file, $rd2_file) = $self->_prepare_input_sequences($rd1_file, $ref_file, $rd2_file); + + # Assemble + my ($maq_file, $faq_file) = $self->_run($rd1_file, $ref_file, $rd2_file); + + # Export results in desired object type + my $asm = $self->_export_results($maq_file); + return $asm; +} + +=head2 run_maq() + + Title : run_maq + Usage : $obj->run_maq( @file_args ) + Function: Run a maq command as specified during object contruction + Returns : + Args : a specification of the files to operate on: + +=cut + +sub run_maq { + my ($self, @args) = @_; + # _translate_params will provide an array of command/parameters/switches + # -- these are set at object construction + # to set up the run, need to add the files to the call + # -- provide these as arguments to this function + + my $cmd = $self->command if $self->can('command'); + $self->throw("No maq command specified for the object") unless $cmd; + # setup files necessary for this command + my $filespec = $command_files{$cmd}; + $self->throw("No command-line file specification is defined for command '$cmd'; check Bio::Tools::Run::Maq::Config") unless $filespec; + + # parse args based on filespec + # require named args + $self->throw("Named args are required") unless !(@args % 2); + s/^-// for @args; + my %args = @args; + # validate + my @req = map { + my $s = $_; + $s =~ s/^[012]?[<>]//; + $s =~ s/[^a-zA-Z0-9_]//g; + $s + } grep !/[#]/, @$filespec; + !defined($args{$_}) && $self->throw("Required filearg '$_' not specified") for @req; + # set up redirects + my ($in, $out, $err); + for (@$filespec) { + m/^1?>(.*)/ && do { + defined($args{$1}) && ( open($out,">", $args{$1}) or $self->throw("Open for write error : $!")); + next; + }; + m/^2>#?(.*)/ && do { + defined($args{$1}) && (open($err, ">", $args{$1}) or $self->throw("Open for write error : $!")); + next; + }; + m/^<#?(.*)/ && do { + defined($args{$1}) && (open($in, "<", $args{$1}) or $self->throw("Open for read error : $!")); + next; + } + } + my $dum; + $in || ($in = \$dum); + $out || ($out = \$self->{'stdout'}); + $err || ($err = \$self->{'stderr'}); + + # Get program executable + my $exe = $self->executable; + # Get command-line options + my $options = $self->_translate_params(); + # Get file specs sans redirects in correct order + my @specs = map { + my $s = $_; + $s =~ s/[^a-zA-Z0-9_]//g; + $s + } grep !/[<>]/, @$filespec; + my @files = @args{@specs}; + # expand arrayrefs + my $l = $#files; + for (0..$l) { + splice(@files, $_, 1, @{$files[$_]}) if (ref($files[$_]) eq 'ARRAY'); + } + @files = map { defined $_ ? $_ : () } @files; # squish undefs + my @ipc_args = ( $exe, @$options, @files ); + + eval { + IPC::Run::run(\@ipc_args, $in, $out, $err) or + die ("There was a problem running $exe : $!"); + }; + if ($@) { + $self->throw("$exe call crashed: $@"); + } + + # return arguments as specified on call + return @args; +} + +=head2 stdout() + + Title : stdout + Usage : $fac->stdout() + Function: store the output from STDOUT for the run, + if no file specified in run_maq() + Example : + Returns : scalar string + Args : on set, new value (a scalar or undef, optional) + +=cut + +sub stdout { + my $self = shift; + + return $self->{'stdout'} = shift if @_; + return $self->{'stdout'}; +} + +=head2 stderr() + + Title : stderr + Usage : $fac->stderr() + Function: store the output from STDERR for the run, + if no file is specified in run_maq() + Example : + Returns : scalar string + Args : on set, new value (a scalar or undef, optional) + +=cut + +sub stderr { + my $self = shift; + + return $self->{'stderr'} = shift if @_; + return $self->{'stderr'}; +} + + + +=head1 Bio::Tools::Run::AssemblerBase overrides + +=head2 _check_sequence_input() + + No-op. + +=cut + +sub _check_sequence_input { + return 1; +} + +=head2 _check_optional_quality_input() + + No-op. + +=cut + +sub _check_optional_quality_input { + return 1; +} + +=head2 _prepare_input_sequences + + Convert input fastq and fasta to maq format. + +=cut + +sub _prepare_input_sequences { + + my ($self, @args) = @_; + my (%args, $read1, $read2, $refseq); + if (grep /^-/, @args) { # named parms + $self->throw("Input args not an even number") unless !(@args % 2); + %args = @args; + ($read1, $refseq, $read2) = @args{qw( -read1 -refseq -read2 )}; + } + else { + ($read1, $refseq, $read2) = @args; + } + # just handle file input for now... + $self->throw("maq requires at least one FASTQ read file and one FASTA reference sequence") + unless (defined $read1 && defined $refseq); + $self->throw("File cannot be found") + unless ( -e $read1 && -e $refseq && (!defined $read2 || -e $read2) ); + + # maq needs its own fasta/fastq format. Use its own converters to + # create tempfiles in bfa, bfq format. + my ($ref_h, $ref_file, $rd1_h, $rd1_file, $rd2_h, $rd2_file); + ($ref_h, $ref_file) = $self->io->tempfile( -dir => $self->tempdir() ); + ($rd1_h, $rd1_file) = $self->io->tempfile( -dir => $self->tempdir() ); + $ref_h->close; + $rd1_h->close; + my $fac = Bio::Tools::Run::Maq->new( -command => 'fasta2bfa' ); + $fac->run_maq( -bfa => $ref_file, -fas => $refseq ); + $fac->set_parameters( -command => 'fastq2bfq' ); + $fac->run_maq( -bfq => $rd1_file, -faq => $read1 ); + if (defined $read2) { + ($rd2_h, $rd2_file) = $self->io->tempfile( -dir => $self->tempdir() ); + $rd2_h->close; + $fac->run_maq( -bfq => $rd2_file, -faq => $read2); + } + return ($rd1_file, $ref_file, $rd2_file); +} + +=head2 _collate_subcmd_args() + + Title : _collate_subcmd_args + Usage : $args_hash = $self->_collate_subcmd_args + Function: collate parameters and switches into command-specific + arg lists for passing to new() + Returns : hash of named argument lists + Args : [optional] composite cmd prefix (scalar string) + [default is 'run'] + +=cut + +sub _collate_subcmd_args { + my $self = shift; + my $cmd = shift; + my %ret; + # default command is 'run' + $cmd ||= 'run'; + my @subcmds = @{$composite_commands{$cmd}}; + my %subcmds; + my $cur_options = $self->{'_options'}; + + # collate + foreach my $subcmd (@subcmds) { + # find the composite cmd form of the argument in + # the current params and switches + # e.g., map_max_mismatches + my @params = grep /^${subcmd}_/, @{$$cur_options{'_params'}}; + my @switches = grep /^${subcmd}_/, @{$$cur_options{'_switches'}}; + $ret{$subcmd} = []; + # create an argument list suitable for passing to new() of + # the subcommand factory... + foreach my $opt (@params, @switches) { + my $subopt = $opt; + $subopt =~ s/^${subcmd}_//; + push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt; + } + } + return \%ret; +} + +=head2 _run() + + Title : _run + Usage : $factory->_run() + Function: Run a maq assembly pipeline + Returns : depends on call (An assembly file) + Args : - single end read file in maq bfq format + - reference seq file in maq bfa format + - [optional] paired end read file in maq bfq format + +=cut + +sub _run { + my ($self, $rd1_file, $ref_file, $rd2_file) = @_; + my ($cmd, $filespec, @ipc_args); + # Get program executable + my $exe = $self->executable; + + # treat run() as a separate command and duplicate the component-specific + # parameters in the config globals + + # Setup needed files and filehandles first + my $tdir = $self->tempdir(); + my ($maph, $mapf) = $self->io->tempfile( -template => 'mapXXXX', -dir => $tdir ); #map + my ($cnsh, $cnsf) = $self->io->tempfile( -template => 'cnsXXXX', -dir => $tdir ); #consensus + my ($maqh, $maqf) = $self->_prepare_output_file(); + my ($nm,$dr,$suf) = fileparse($maqf,".maq"); + my $faqf = $dr.$nm.".cns.fastq"; + + $_->close for ($maph, $cnsh, $maqh); + + # Get command-line options for the component commands: + my $subcmd_args = $self->_collate_subcmd_args(); + # map reads to ref seq + # set up subcommand options + + my $maq = Bio::Tools::Run::Maq->new( + -command => 'map', + @{$subcmd_args->{map}} + ); + $maq->run_maq( -map => $mapf, -bfa => $ref_file, -bfq1 => $rd1_file, + -bfq2 => $rd2_file ); + # assemble reads into consensus + $maq = Bio::Tools::Run::Maq->new( + -command => 'assemble', + @{$subcmd_args->{asm}} + ); + $maq->run_maq( -cns => $cnsf, -bfa => $ref_file, -map => $mapf ); + # convert map into plain text + $maq = Bio::Tools::Run::Maq->new( + -command => 'mapview' + ); + $maq->run_maq( -map => $mapf, -txt => $maqf ); + + # convert consensus into plain text fastq + $maq = Bio::Tools::Run::Maq->new( + -command => 'cns2fq', + @{$subcmd_args->{c2q}} + ); + $maq->run_maq( -cns => $cnsf, -faq => $faqf ); + + return ($maqf, $faqf); + +} + +=head2 available_parameters() + + Title : available_parameters + Usage : @cmds = $fac->available_commands('commands'); + Function: Use to browse available commands, params, or switches + Returns : array of scalar strings + Args : 'commands' : all maq commands + 'params' : parameters for this object's command + 'switches' : boolean switches for this object's command + 'filespec' : the filename spec for this object's command + 4Geeks : Overrides Bio::ParameterBaseI via + Bio::Tools::Run::AssemblerBase + +=cut + +sub available_parameters { + my $self = shift; + my $subset = shift; + for ($subset) { # get commands + !defined && do { # delegate + return $self->SUPER::available_parameters($subset); + }; + m/^c/i && do { + return grep !/^run$/, @program_commands; + }; + m/^f/i && do { # get file spec + return @{$command_files{$self->command}}; + }; + do { #else delegate... + return $self->SUPER::available_parameters($subset); + }; + } +} + +sub available_commands { shift->available_parameters('commands') }; + +sub filespec { shift->available_parameters('filespec') }; + +1; diff --git a/lib/Bio/Tools/Run/Maq/Config.pm b/lib/Bio/Tools/Run/Maq/Config.pm new file mode 100755 index 0000000..77d4a20 --- /dev/null +++ b/lib/Bio/Tools/Run/Maq/Config.pm @@ -0,0 +1,335 @@ +# $Id$ +# +# BioPerl module for Bio::Tools::Run::Maq::Config +# +# Please direct questions and support issues to +# +# Cared for by Mark A. Jensen +# +# Copyright Mark A. Jensen +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Run::Maq::Config - Configuration data for maq commands + +=head1 SYNOPSIS + +Used internally by L. + +=head1 DESCRIPTION + +This package exports information describing maq commands, parameters, +switches, and input and output filetypes for individual maq commands. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion +http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +L + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +the web: + + http://redmine.open-bio.org/projects/bioperl/ + +=head1 AUTHOR - Mark A. Jensen + +Email maj@fortinbras.us + +Describe contact details here + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + + +package Bio::Tools::Run::Maq::Config; +use strict; +use warnings; +no warnings qw(qw); +use Bio::Root::Root; +use Exporter; +use base qw(Bio::Root::Root ); + +our (@ISA, @EXPORT, @EXPORT_OK); +push @ISA, 'Exporter'; +@EXPORT = qw( + @program_commands + %command_prefixes + %composite_commands + @program_params + @program_switches + %param_translation + %command_files + ); + +@EXPORT_OK = qw(); + + + +our @program_commands = qw( + run + fasta2bfa + fastq2bfq + map + mapmerge + rmdup + assemble + indelpe + indelsoa + sol2sanger + bfq2fastq + mapass2maq + mapview + mapcheck + pileup + cns2fq + cns2snp + cns2view + cns2ref + cns2win + fasta2csfa + csmap2nt + submap + eland2maq + export2maq +); + +# composite commands: pseudo-commands that run a +# sequence of commands +# composite command prefix => list of prefixes of commands this +# composite command runs +# + +our %composite_commands = ( + 'run' => [qw( map asm c2q )] + ); + +# prefixes only for commands that take params/switches... +our %command_prefixes = ( + 'fastq2bfq' => 'q2q', + 'map' => 'map', + 'assemble' => 'asm', + 'mapview' => 'mv', + 'mapcheck' => 'mck', + 'pileup' => 'pup', + 'cns2fq' => 'c2q', + 'cns2win' => 'c2w', + 'submap' => 'sub', + 'eland2maq' => 'l2m', + 'export2maq' => 'x2m', + 'run' => 'run' + ); + +our @program_params = qw( + command + q2q|n + map|adaptor_file + map|first_read_length + map|max_hits + map|max_mismatches + map|max_outer_distance + map|max_outer_distance_rf + map|mismatch_dump + map|mismatch_posn_dump + map|mismatch_thr + map|mutation_rate + map|second_read_length + map|unmapped_dump + asm|error_dep_coeff + asm|het_fraction + asm|max_mismatches + asm|max_quality_sum + asm|min_map_quality + asm|num_haplotypes + mck|max_mismatches + mck|min_map_quality + pup|max_mismatches + pup|max_quality_vals + pup|min_map_quality + pup|site_input_file + c2q|min_map_quality + c2q|min_read_depth + c2q|min_nbr_quality + c2q|max_read_depth + c2w|window_size + c2w|ref_seq + c2w|start_posn + c2w|end_posn + c2w|min_cons_quality + sub|max_mismatches + sub|max_quality_sum + sub|min_map_quality + l2q|def_qual + x2m|max_outer_distance + x2m|first_read_length + x2m|second_read_length + ); + +our @program_switches = qw( + asm|single_end_quality + asm|discard_wrong_pairs + mv|omit_seq_qual + mv|show_mismatch_posns + mck|single_end_quality + pup|single_end_quality + pup|discard_wrong_pairs + pup|verbose + pup|show_base_posn + sub|discard_wrong_pairs + x2m|retain_filt_reads +); + +our %param_translation = ( + 'q2q|n' => 'n', + 'map|max_mismatches' => 'n', + 'map|max_outer_distance' => 'a', + 'map|max_outer_distance_rf' => 'A', + 'map|first_read_length' => '1', + 'map|second_read_length' => '2', + 'map|mutation_rate' => 'm', + 'map|adaptor_file' => 'd', + 'map|unmapped_dump' => 'u', + 'map|mismatch_thr' => 'e', + 'map|mismatch_dump' => 'H', + 'map|max_hits' => 'C', + 'map|mismatch_posn_dump' => 'N', + 'asm|error_dep_coeff' => 't', + 'asm|het_fraction' => 'r', + 'asm|max_mismatches' => 'm', + 'asm|max_quality_sum' => 'Q', + 'asm|min_map_quality' => 'q', + 'asm|num_haplotypes' => 'N', + 'mck|max_mismatches' => 'm', + 'mck|min_map_quality' => 'q', + 'pup|max_mismatches' => 'm', + 'pup|max_quality_vals' => 'Q', + 'pup|min_map_quality' => 'q', + 'pup|site_input_file' => 'l', + 'c2q|min_map_quality' => 'Q', + 'c2q|min_read_depth' => 'd', + 'c2q|min_nbr_quality' => 'n', + 'c2q|max_read_depth' => 'D', + 'c2w|window_size' => 'w', + 'c2w|ref_seq' => 'c', + 'c2w|start_posn' => 'b', + 'c2w|end_posn' => 'e', + 'c2w|min_cons_quality' => 'q', + 'sub|max_mismatches' => 'm', + 'sub|max_quality_sum' => 'Q', + 'sub|min_map_quality' => 'q', + 'l2q|def_qual' => 'q', + 'x2m|max_outer_distance' => 'a', + 'x2m|first_read_length' => '1', + 'x2m|second_read_length' => '2', + 'asm|single_end_quality' => 's', + 'asm|discard_wrong_pairs' => 'p', + 'mv|omit_seq_qual' => 'b', + 'mv|show_mismatch_posns' => 'N', + 'mck|single_end_quality' => 's', + 'pup|single_end_quality' => 's', + 'pup|discard_wrong_pairs' => 'p', + 'pup|verbose' => 'v', + 'pup|show_base_posn' => 'P', + 'sub|discard_wrong_pairs' => 'p', + 'x2m|retain_filt_reads' => 'n' + ); + +# +# the order in the arrayrefs is the order required +# on the command line +# +# the strings in the arrayrefs (less special chars) +# become the keys for named parameters to run_maq +# +# special chars: +# +# '#' implies optional +# '*' implies variable number of this type +# <|> implies stdin/stdout redirect +# + +our %command_files = ( + 'run' => [qw( faq fas faq )], + 'fastq2bfq' => [qw( faq bfq )], + 'fasta2bfa' => [qw( fas bfa )], + 'map' => [qw( map bfa bfq1 #bfq2 2>#log )], + 'mapmerge' => [qw( out_map *in_map )], + 'rmdup' => [qw( out_map in_map )], + 'assemble' => [qw( cns bfa map 2>#log )], + 'indelpe' => [qw( bfa map >txt )], + 'indelsoa' => [qw( bfa map >txt )], + 'sol2sanger' => [qw( in_faq out_faq )], + 'bfq2fastq' => [qw( bfq faq )], + 'mapass2maq' => [qw( in_map out_map )], + 'mapview' => [qw( map >txt )], + 'mapcheck' => [qw( bfa map >txt )], + 'pileup' => [qw( bfa map >txt )], + 'cns2fq' => [qw( cns >faq )], + 'cns2snp' => [qw( cns >txt )], + 'cns2view' => [qw( cns >txt )], + 'cns2ref' => [qw( cns >fas )], + 'cns2win' => [qw( cns >txt )], + 'fasta2csfa' => [qw( in_fas >out_fas )], + 'csmap2nt' => [qw( out_map bfa in_map )], + 'submap' => [qw( out_map in_map )], + 'eland2maq' => [qw( map lis eld )], + 'export2maq' => [qw( map lis xpt )] + ); + +INIT { + # add subcommand params and switches for + # composite commands + my @sub_params; + my @sub_switches; + foreach my $cmd (keys %composite_commands) { + foreach my $subcmd ( @{$composite_commands{$cmd}} ) { + my @sub_program_params = grep /^$subcmd\|/, @program_params; + my @sub_program_switches = grep /^$subcmd\|/, @program_switches; + for (@sub_program_params) { + m/^$subcmd\|(.*)/; + push @sub_params, "$cmd\|${subcmd}_".$1; + } + for (@sub_program_switches) { + m/^$subcmd\|(.*)/; + push @sub_switches, "$cmd\|${subcmd}_".$1; + } + } + } + push @program_params, @sub_params; + push @program_switches, @sub_switches; + # translations for subcmd params/switches not necessary +} +1; diff --git a/t/Maq.t b/t/Maq.t new file mode 100755 index 0000000..255e47a --- /dev/null +++ b/t/Maq.t @@ -0,0 +1,196 @@ +#-*-perl-*- + + +use strict; +use warnings; +no warnings qw(once); +our $home; +BEGIN { + $home = '.'; # set to '.' for Build use, + # '..' for debugging from .t file + unshift @INC, $home; + use Bio::Root::Test; + test_begin(-tests => 52, + -requires_modules => [qw(IPC::Run Bio::Tools::Run::Maq)]); +} + +use File::Temp qw(tempfile tempdir); +use Bio::Tools::Run::WrapperBase; + +# test command functionality + +ok my $maqfac = Bio::Tools::Run::Maq->new( + -command => 'assemble', + -single_end_quality => 1, + -het_fraction => 0.005, + -max_mismatches => 4 + ), "make a factory using command 'assemble'"; +# ParameterBaseI compliance : really AssemblerBase tests... +ok $maqfac->parameters_changed, "parameters changed on construction"; +ok $maqfac->het_fraction, "access parameter"; +ok !$maqfac->parameters_changed, "parameters_changed cleared on read"; +ok $maqfac->set_parameters( -error_dep_coeff => 0.5 ), "set a param not set in constructor"; +ok $maqfac->parameters_changed, "parameters_changed set"; +is ($maqfac->error_dep_coeff, 0.5, "parameter really set"); +is ($maqfac->het_fraction, 0.005, "original parameter unchanged"); +ok !$maqfac->parameters_changed, "parameters_changed cleared on read"; +ok $maqfac->set_parameters( -het_fraction => 0.01 ), "change an original parameter"; +is ($maqfac->het_fraction, 0.01, "parameter really changed"); +ok $maqfac->reset_parameters( -het_fraction => 0.05 ), "reset parameters with arg"; +ok !$maqfac->max_mismatches, "original parameters undefined"; +is ($maqfac->het_fraction, 0.05, "parameter really reset via arg"); +#back to beginning +$maqfac->set_parameters( + -command => 'assemble', + -single_end_quality => 1, + -het_fraction => 0.005, + -max_mismatches => 4 + ); +ok $maqfac->parameters_changed, "parameters changed"; + +is( scalar $maqfac->available_parameters, 9, "all available options"); +is( scalar $maqfac->available_parameters('params'), 7, "available parameters" ); +is( scalar $maqfac->available_parameters('switches'), 2, "available switches" ); +my %pms = $maqfac->get_parameters; +is_deeply( \%pms, + { command => 'assemble', + het_fraction => 0.005, + max_mismatches => 4, + single_end_quality => 1}, "get_parameters correct"); +is( $maqfac->command, 'assemble', "command attribute set"); + +is_deeply( $maqfac->{_options}->{_commands}, + [@Bio::Tools::Run::Maq::program_commands], + "internal command array set" ); + +is_deeply( $maqfac->{_options}->{_prefixes}, + {%Bio::Tools::Run::Maq::command_prefixes}, + "internal prefix hash set"); + +is_deeply( $maqfac->{_options}->{_params}, + [qw( command error_dep_coeff het_fraction max_mismatches max_quality_sum min_map_quality num_haplotypes)], + "commands filtered by prefix"); + + +my @a = @{$maqfac->_translate_params}; +is shift @a, 'assemble', 'translate_params: command correct'; +my ($k, %h); +for (@a) { + (/^-/) ? ( $h{$k = $_} = undef ) : ( $h{$k} = $_ ); +} +is_deeply( \%h, { '-m' => 4, '-r' => 0.005, '-s' => undef }, 'translate_params: options correct'); + +# test run_maq filearg parsing +# a pipeline... + +SKIP : { + test_skip( -requires_executable => $maqfac, + -tests => 27 ); + my $rd1 = test_input_file('r1.fq'); + my $rd2 = test_input_file('r2.fq'); + my $refseq = test_input_file('campycoli.fas'); + + my $tdir = tempdir( "maqXXXX", CLEANUP => 1); + my ($r1h, $r1f) = tempfile( "rd1XXXX", DIR => $tdir ); + $r1h->close; + my ($r2h, $r2f) = tempfile( "rd2XXXX", DIR => $tdir ); + $r2h->close; + my ($refh, $reff) = tempfile( "refXXXX", DIR => $tdir ); + $refh->close; + my ($map1h, $map1f) = tempfile( "mapXXXX", DIR => $tdir ); + $map1h->close; + my ($map2h, $map2f) = tempfile( "mapXXXX", DIR => $tdir ); + $map2h->close; + my ($mmaph, $mmapf) = tempfile( "mapXXXX", DIR => $tdir ); + $mmaph->close; + my ($cnsh, $cnsf) = tempfile( "cnsXXXX", DIR => $tdir ); + $cnsh->close; + my ($maqh, $maqf) = tempfile( "maqXXXX", DIR => $tdir ); + $maqh->close; + my ($fqh, $fqf) = tempfile( "faqXXXX", DIR => $tdir ); + $fqh->close; + + ok $maqfac = Bio::Tools::Run::Maq->new( + -command => 'fasta2bfa' + ), "make fasta2bfa conversion factory"; + + ok $maqfac->run_maq( -fas => $refseq, + -bfa => $reff ), "convert refseq to bfa"; + + like($maqfac->stderr, qr/1 sequence/, "maq success"); + + ok $maqfac = Bio::Tools::Run::Maq->new( + -command => 'fastq2bfq' + ), "make fastq2bfq conversion factory"; + + ok $maqfac->run_maq( -faq => $rd1, + -bfq => $r1f ), "convert r1.fq to bfa"; + like($maqfac->stderr, qr/125 sequences were loaded/, "maq success"); + ok $maqfac->run_maq( -faq => $rd2, + -bfq => $r2f ), "convert r2.fq to bfa"; + like($maqfac->stderr, qr/125 sequences were loaded/, "maq success"); + + ok $maqfac = Bio::Tools::Run::Maq->new( + -command => 'map', + ), "make map factory"; + + ok $maqfac->run_maq( -map => $map1f, + -bfa => $reff, + -bfq1 => $r1f ), "map single-end reads"; + ok $maqfac->run_maq( -map => $map2f, + -bfa => $reff, + -bfq2 => $r2f, + -bfq1 => $r1f ), "map paired-end reads"; + + ok $maqfac = Bio::Tools::Run::Maq->new( + -command => 'mapmerge' + ), "make mapmerge factory"; + + ok $maqfac->run_maq( -out_map => $mmapf, + -in_map => [$map1f, $map2f] ), "merge maps"; + + ok $maqfac = Bio::Tools::Run::Maq->new( + -command => 'assemble' + ), "make assemble factory"; + + ok $maqfac->run_maq( -cns => $cnsf, + -bfa => $reff, + -map => $mmapf ), "assemble consensus"; + + ok $maqfac = Bio::Tools::Run::Maq->new( + -command => 'mapview' + ), "make mapview converter"; + + + ok $maqfac->run_maq( -map => $mmapf, + -txt => $maqf ), "convert mmap"; + + ok $maqfac = Bio::Tools::Run::Maq->new( + -command => 'cns2fq' + ), "make consensus->fastq converter"; + ok $maqfac->run_maq( -cns => $cnsf, + -faq => $fqf ), "convert consensus -> fastq"; + + # test run (assembly pipeline) + # these parms are the maq defaults for the respective programs + ok $maqfac = Bio::Tools::Run::Maq->new( + -map_max_mismatches => 2, + -asm_max_mismatches => 7, + -c2q_min_map_quality => 40 + ), "make an assembly factory"; + + is( $maqfac->command, 'run', "command attribute set"); + is( $maqfac->map_max_mismatches, 2, "map param set"); + is( $maqfac->asm_max_mismatches, 7, "asm param set"); + is( $maqfac->c2q_min_map_quality, 40, "c2q param set"); + ok my $assy = $maqfac->run($rd1,$refseq,$rd2), "make full assy"; + #some fuzziness in these: sometimes maq gives 41+4, sometimes 42+6. + cmp_ok( $assy->get_nof_contigs, '>=', 37, "number of contigs"); + cmp_ok( $assy->get_nof_singlets,'>=',4, "number of singlets"); + +} +1; + +# sub test_input_file { +# return "./data/".shift; +# } diff --git a/t/data/campycoli.fas b/t/data/campycoli.fas new file mode 100755 index 0000000..b02e6a5 --- /dev/null +++ b/t/data/campycoli.fas @@ -0,0 +1,50 @@ +>gi|65306796|gb|AY948116.1| Campylobacter coli plasmid p3384, complete sequence +AAGCTTTTAAAACACCAAAAAAGGAAAAATTTAAAAATGCAAAAACCAATATCCCGAAGTGCATAGCTTA +GAAGAAAGCCTTGCAATACTTAAAAAAATATAAAGATGATGTAAATAAGGAAAGATTATGAAGGTATAAA +ATCTGTTATAAGTAATCATGCGATAGAGAGTATTTACATGAATGAATTAGATATTATCGCTATGATAAAA +CAAAGTGCTTTAAAGTTAAGTGCTGATGAAATCATTGCTGAATATAAGGAAAAGGGATTTGTAGAATATG +TCAAAAGTTGCCACCGCTCATTGCAATCCTAAAAAACAACCTGCATTAAACCACAATGATAGAACCAACG +ATAATGCTAAGACAATCACTAAAGAACTTACGCATTTAAATGAATACTCTTGCACTAGCGATGAAGTGCG +TAAGAACATAGAAAGGCTTTATAAAAAAGCTTATGAAAATTTTTATAAATATTGTGAAAATAAGAATGGT +TTAGCTAAAAGTGGAAAGCCTAAAGGACTTCAAAATTTTACTAAAAAAGAAAAATGTTATCACGAGTTTA +TATACGAAATCGGCGAAAATACTACAATGGAGCAATGCCAAGAGCTTACGCAAAAAATCGCAGAGCTTAC +AGGATTTACACCTTTACAAGTTGTAATCCATAGAGATGAAGTAAGTGAGAATGCTAAAGGGGAAAAACAA +ACCCATTATCACGCCCACGCGGTATTTTTTACACTCGATAACAATGGCTTACAACTTGCTAGACGTGAAG +CAAGTTTGAATAAAGCCAATCTTAGCAAAATACAAACCCTAACCGCACAAAGTTTAAAAATGGAGCGTGG +AGCTAATCGCTACGAGAATAACGAAAAGCAACCTCAATACATACAAGATTATAAAACATACGCTCAATTT +AAAGAACAAGAAAAAGCATTACTTCAAAGAATACAAGAACAAGAGCATAAATTAACGCAAATGGCCCTAG +AATTGAAAAAAAAAGAAAAAGAGATACAAGACAAGGCTAAAGAGTTAAAATCGAAAGAAAACGAATTACA +AGCGAAAATAGAGCAACATCAAAAACATATACAAAATTTAGAACTAGGACACGAAAGAGCTTTAAAGGAA +CTTACACAAGAGTTTGAAAAGCGTTTAAGCCTATGGAAAAACATTTTAACCTTTGGAAAATACAACGCCA +AAGTAAGAGAAGACTATCAGTTAACAAAGAATGCTTTTTTAATTAGCACAGATGAAAGCAGGAGAGAAGC +TAACAAAGAGCTTGAATATTTAAAATTTGAATATCATAAAGTCAAAGATGAACGAGATAATTTAAAAACT +TTGTTTGAGGCACACAAAACAAAAAATGTTAAATTAGAAACTCGACTAAAAGAAATAGGCAAATGGTGTG +AAAAAAATTTAAGCGTGGAGCAGTTAAAAGAAATATTTCCATTAAAAGCCGAAAGAATAGAAAAAGAGCT +TAAATATCAAAGAGCTTTTGAAAATTCTTTTGAACAAGCAAAAAGAAACGATAGAGGGTTTGGGTTTAGC +AGATAGTTTTCTTTTTTGGTGCTTTAGCAAATCGCACCTGTTAAGGTGCTATGATTTGCGAAAAGGGGGT +GCGGGGGCTGTCTGAGTGCGTAGCACGGAAGACGGACAAAGCCCACCGCCCTAGAAAAAAAATTACTCAA +CTTTTTTTGTTTTTTCTCTCGCGCGCGTACGCGTACGCGATTTTTTTTATTTTTTATATATTTTTCTTAT +TTTTAGGGGGCTTTCTAAGCCCTATATGATGGGGGCTAGCGAAATATTAAAACGACAAAATTCTACTATT +AAAACGACAAAATTCTACTATTAAAACGACAAAATTCTACTATTAAAACGACAAAATTCTACTATTAAGG +GGAGAAAATTCTACTATTAAAACGACAAAATATTTGACAAATATTGTTTTAATATGCTACAATAAAAAAT +CCGCCTTGAGTTTTCACTCTTGGCGGAAATTTATCAATCAAGGCTAACTATGAAACGACATAGTAAGCGT +TTTGTGAGATTATACCTAAAATTTAGGTTTTTTAAGCTTAAAGTTGAAGTATTTTTCTAGCAAAACCGCA +GGGGCAGAGAAGCCCCTTGCCTTGATTGTCTTACTTTGGATATGCAAATGAGCGAAATAGTTAAATATAA +TAATGATTTTAATCTTTTACCTATGCCTGAACTTAAAGCCATACAAATGGATATGTTTATGGCTATTATC +TCACTAACCAAAGACAAAAAAGAAAATACGCCATTTTTAAAAAAGTTTTTTAACCCTGATAAAAGAAAAA +TAATTATTCCGCAAAAGAAGTTTATTGAATTATGTCGGTTAAACGATAGTAAAATGGACTATAAAGAAAT +TTTCTTTGCCATTGATGATTGCTTGAAAAAACTTTGCAATTTTCTTATTTCATACCAAAAAGATGAAAGA +ACGATTTATAATTTTGTTTGTTTTGAAGAAGCAAATATTATTGCTGATGAAGTGCATATAACTTTACAAA +GTCGTTTTTACGATATGATAATAAATAAAAAATTTGGTTTCACAGCCTTTGAATTAGCCGAATTTGCAGA +ACTTAGCGGTAAATATACAAAAACTTTATACAGACTTTTAAAGCAATTTAGAACCACAGGCAAAGCTTAT +TTTGAGTGGGAAGAATTTTGCAGGATTATGAAAATACCTGAAAATTATCGTCAAATAGATATAGACCAAA +GAATTTTAAAACCTGCTATCAAAGAACTTTCTAAAGAACGCAATCTTTTTGACCAAATCAGAGTGCCTTT +TAAAAATCTTGCTTATGAAAAAGAAAAAACCGCAGGGCGTGGGCGTGGCGGTAAAGTTTCAGGTATTAGC +TTTACTTTTAAACCTGAAAATATCCAAATGCAAAAGCTAGAAAATGAAAGTCAAAAAATAATGAGCGATG +AGCAAAAATATTTAAAGATTTTAAACAATATGAAACTTAATCAAGTTAGATTTGATTATAATGACAAGCT +TTGGCAATTTAACGATTTTGATTTTGATGAATTTAAAATTATTGCAATAGAGCTTGTAAGAGATGAATAC +GAGAATTTAAACTTTGGAAATCATATGCACTTTAATGCTAAAAATCAAGAGCAGTTTTTTAAAATGATTG +AAACCTTTAGAAAAGGTATTAGGTAAAAATTTGCTATAATTATATCTTTAAAGGATACAAGCTATCCGCC +ACTTGTGCCAAGTGTCGAGCTTGTAAGGGGTGCAACCCCTTAACCCCACTAATAAAAATCAACTAAAATC +AATAAATGATTTTTTTGATTTTTATT + diff --git a/t/data/r1.fq b/t/data/r1.fq new file mode 100644 index 0000000..d49d778 --- /dev/null +++ b/t/data/r1.fq @@ -0,0 +1,500 @@ +@gi|65306796|gb|AY948116.1|_1951_2136_0/1 +AAGTAAGACAATCAAGGCAAGGGGC ++ +YYYYYYYYYYYYYYYYWYYSYWYSY +@gi|65306796|gb|AY948116.1|_1531_1682_1/1 +AGTTGAGTAATTTTTTTTCTAGGGC ++ +YYYYYYYYYYWYYYYWYYWYWYYWW +@gi|65306796|gb|AY948116.1|_603_785_2/1 +CTTTATTCAAACTTGCTTCACGTCT ++ +YYYYYYYYYWYYYYYYWYWYWYYSU +@gi|65306796|gb|AY948116.1|_1888_2018_3/1 +TCGTTTCATAGTTAGCCTTGATTGA ++ +YYYYYYYYYYWYYYYYYYYSYYYWW +@gi|65306796|gb|AY948116.1|_1990_2157_4/1 +TTTATCAATCAAGGCTAACTATGAA ++ +YYYYYYYYYYYYYYYWYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_1165_1307_5/1 +TTTAACCTTTGGAAAATACAACGCC ++ +YYYYYYYYYYYYYYYYWYYSYWYSU +@gi|65306796|gb|AY948116.1|_1622_1777_6/1 +ATATAGGGCTTAGAAAGCCCCCTAA ++ +YYYYYYYYYWYYYYWWYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_1116_1298_7/1 +AGGAACTTACACAAGAGTTTGAAAA ++ +YYYYYYYYYYYYYYWWYYYWWYYWW +@gi|65306796|gb|AY948116.1|_63_202_8/1 +TAGCGATAATATCTAATTCATTCAT ++ +YYYYYYYYYYYYYYYYYYYWWYYSY +@gi|65306796|gb|AY948116.1|_2908_3068_9/1 +TAGAAAATGAAAGTCAAAAAATAAT ++ +YYYYYYYYYYYYYYYYYYWYWWYSU +@gi|65306796|gb|AY948116.1|_2909_3032_a/1 +ATCAAAATCGTTAAATTGCCAAAGC ++ +YYYYYYYYYYYYYYYYYYYWWWYSY +@gi|65306796|gb|AY948116.1|_3096_3266_b/1 +GGGTTAAGGGGTTGCACCCCTTACA ++ +YYYYYYYYYYYYYWYYYYYSYWYWY +@gi|65306796|gb|AY948116.1|_479_649_c/1 +AATAAGAATGGTTTAGCTAAAAGTG ++ +YYYYYYYYYYWYYYWWYWWYWWWQQ +@gi|65306796|gb|AY948116.1|_1502_1651_d/1 +CTTTGTCCGTCTTCCGTGCTACGCA ++ +YYYYYYYYYYYYYYWWYYWUWYYSY +@gi|65306796|gb|AY948116.1|_944_1116_e/1 +CAAGAACAAGAGCATAAATTAACGC ++ +YYYYYYYYYYYYYWYYWYYSYYYSY +@gi|65306796|gb|AY948116.1|_3061_3240_f/1 +AGCTTGTAAGAGATGAATACGAGAA ++ +YYYYYYYYYYYYYYYYYYWYWYYWW +@gi|65306796|gb|AY948116.1|_1955_2172_10/1 +TATTATATTTAACTATTTCGCTCAT ++ +YYYYYYYYYYYYYYYWYWWYWWYWW +@gi|65306796|gb|AY948116.1|_2987_3171_11/1 +TAATACCTTTTCTAAAGGTTTCAAT ++ +YYYYYYYYYYYYYWYYYYYSYYYSU +@gi|65306796|gb|AY948116.1|_2261_2430_12/1 +AAATAAGAAAATTGCAAAGTTTTTT ++ +YYYYYYYYYYYYYWYYWYYWYYYWW +@gi|65306796|gb|AY948116.1|_57_240_13/1 +TTCATCAGCACTTAACTTTAAAGCA ++ +YYYYYYYYYYYYYYYYYWWYWYYWW +@gi|65306796|gb|AY948116.1|_1168_1364_14/1 +AACCTTTGGAAAATACAACGCCAAA ++ +YYYYYYYYYYYYYYYYWYYSYWYSY +@gi|65306796|gb|AY948116.1|_673_829_15/1 +AAGTGAGAATGCTAAAGGGGAAAAA ++ +YYYYYYYYYWYYYWYYWYYWWWYWY +@gi|65306796|gb|AY948116.1|_275_459_16/1 +AATATGTCAAAAGTTGCCACCGCTC ++ +YYYYYYYYYWYYYWYYWYWYWWYWY +@gi|65306796|gb|AY948116.1|_3145_3288_17/1 +TGATTGAAACCTTTAGAAAAGGTAT ++ +YYYYYYYYYWYYYYYYWYWUWYYWY +@gi|65306796|gb|AY948116.1|_1295_1444_18/1 +TAATGGAAATATTTCTTTTAACTGC ++ +YYYYYYYYYYYYYYWWYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_2129_2328_19/1 +TCTTACTTTGGATATGCAAATGAGC ++ +YYYYYYYYYYYYYYYYWYWUWWYWY +@gi|65306796|gb|AY948116.1|_1462_1646_1a/1 +TCCGTCTTCCGTGCTACGCACTCAG ++ +YYYYYYYYYYYYYYYYWYYSYWYSU +@gi|65306796|gb|AY948116.1|_2770_2956_1b/1 +TTTAAATATTTTTGCTCATCGCTCA ++ +YYYYYYYYYYWYYYYYYYYSYWYWW +@gi|65306796|gb|AY948116.1|_1336_1508_1c/1 +CTTGTTCAAAAGAATTTTCAAAAGC ++ +YYYYYYYYYYYYYYWWYYYSYYYWW +@gi|65306796|gb|AY948116.1|_2153_2336_1d/1 +CGAAATAGTTAAATATAATAATGAT ++ +YYYYYYYYYYWYYWYYYYWYWWWQQ +@gi|65306796|gb|AY948116.1|_844_1031_1e/1 +ATTTTAACTCTTTAGCCTTGTCTTG ++ +YYYYYYYYYYWYYYWWYYWYWYYWW +@gi|65306796|gb|AY948116.1|_2821_2986_1f/1 +AAGAAAAAACCGCAGGGCGTGGGCG ++ +YYYYYYYYYYYYYYYWYWWYWYYSY +@gi|65306796|gb|AY948116.1|_58_177_20/1 +AGTGCATAGCTTAGAAGAAAGCCTT ++ +YYYYYYYYYWYYYYYYYWWYWWYWY +@gi|65306796|gb|AY948116.1|_2612_2792_21/1 +TCTGATTTGGTCAAAAAGATAGCGT ++ +YYYYYYYYYYYYYYWWYWWWYWYSU +@gi|65306796|gb|AY948116.1|_2081_2254_22/1 +ATTTTTCTAGCAAAACCGCAGGGGC ++ +YYYYYYYYYYYYYYYYYYYSYWYWW +@gi|65306796|gb|AY948116.1|_963_1092_23/1 +TAACGCAAATGGCCCTAGAATTGAA ++ +YYYYYYYYYYWYYWYWYYYSYYYWW +@gi|65306796|gb|AY948116.1|_1802_1954_24/1 +ACGACAAAATTCTACTATTAAAACG ++ +YYYYYYYYYYWYYWYYYWWYWWWQQ +@gi|65306796|gb|AY948116.1|_125_324_25/1 +GCAGGTTGTTTTTTAGGATTGCAAT ++ +YYYYYYYYYWYYYYYWYYWUWWYWW +@gi|65306796|gb|AY948116.1|_2801_2960_26/1 +AATCTTTAAATATTTTTGCTCATCG ++ +YYYYYYYYYYYYYYYYWYYSYWWQQ +@gi|65306796|gb|AY948116.1|_1744_1934_27/1 +TTCTTATTTTTAGGGGGCTTTCTAA ++ +YYYYYYYYYYYYYYYYYYWYWWWQQ +@gi|65306796|gb|AY948116.1|_2759_2911_28/1 +TCTAGCTTTTGCATTTGGATATTTT ++ +YYYYYYYYYYYYYYYYYYWUWWYWY +@gi|65306796|gb|AY948116.1|_1201_1398_29/1 +CACCATTTGCCTATTTCTTTTAGTC ++ +YYYYYYYYYYWYYYYYYYWYWWWQQ +@gi|65306796|gb|AY948116.1|_1966_2156_2a/1 +TTGAGTTTTCACTCTTGGCGGAAAT ++ +YYYYYYYYYWYYYYYYYYWYWWYSY +@gi|65306796|gb|AY948116.1|_1691_1870_2b/1 +TTTTTCTCTCGCGCGCGTACGCGTA ++ +YYYYYYYYYYWYYYYYYWWYWYYSU +@gi|65306796|gb|AY948116.1|_2329_2493_2c/1 +AGTTTATTGAATTATGTCGGTTAAA ++ +YYYYYYYYYYYYYYYYYWWUWWYWY +@gi|65306796|gb|AY948116.1|_145_306_2d/1 +GTTATAAGTAATCATGCGATAGAGA ++ +YYYYYYYYYYYYYWYYYYWUWWYWY +@gi|65306796|gb|AY948116.1|_930_1108_2e/1 +TACTTCAAAGAATACAAGAACAAGA ++ +YYYYYYYYYYYYYYYYYYYSYYYSY +@gi|65306796|gb|AY948116.1|_580_743_2f/1 +TACTACAATGGAGCAATGCCAAGAG ++ +YYYYYYYYYWYYYYYWYWWYWWWQQ +@gi|65306796|gb|AY948116.1|_1032_1194_30/1 +ACTTTGGCGTTGTATTTTCCAAAGG ++ +YYYYYYYYYYYYYWYWYYWUWYYWY +@gi|65306796|gb|AY948116.1|_2128_2314_31/1 +ATTATTTTTCTTCTATCAGGGTTAA ++ +YYYYYYYYYYYYYYWWYYYSYWYWY +@gi|65306796|gb|AY948116.1|_196_351_32/1 +TCGTTGGTTCTATCATTGTGGTTTA ++ +YYYYYYYYYWYYYYYYYWWUWYYWW +@gi|65306796|gb|AY948116.1|_1339_1491_33/1 +GGCACACAAAACAAAAAATGTTAAA ++ +YYYYYYYYYYWYYYYYYYWWWYYSY +@gi|65306796|gb|AY948116.1|_2509_2658_34/1 +TAACTTTACAAAGTCGTTTTTACGA ++ +YYYYYYYYYYYYYYYYWYYSYWWQQ +@gi|65306796|gb|AY948116.1|_1940_2118_35/1 +AAGGGGCTTCTCTGCCCCTGCGGTT ++ +YYYYYYYYYYYYYYYYYWWUWYYSU +@gi|65306796|gb|AY948116.1|_216_381_36/1 +GTAAGTTCTTTAGTGATTGTCTTAG ++ +YYYYYYYYYYYYYYWWYYYWWWYWW +@gi|65306796|gb|AY948116.1|_461_636_37/1 +AATCCTGTAAGCTCTGCGATTTTTT ++ +YYYYYYYYYYYYYWYYYYYWYYYWW +@gi|65306796|gb|AY948116.1|_325_461_38/1 +ATTAAACCACAATGATAGAACCAAC ++ +YYYYYYYYYYYYYWYYYYWYWWWQQ +@gi|65306796|gb|AY948116.1|_2304_2478_39/1 +CTTCAAAACAAACAAAATTATAAAT ++ +YYYYYYYYYYWYYYWWYYWWWWWQQ +@gi|65306796|gb|AY948116.1|_278_467_3a/1 +ATGTCAAAAGTTGCCACCGCTCATT ++ +YYYYYYYYYYYYYYYYYWWUWWYSU +@gi|65306796|gb|AY948116.1|_2717_2903_3b/1 +AGATATAGACCAAAGAATTTTAAAA ++ +YYYYYYYYYYYYYYYYYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_1389_1563_3c/1 +GCAAATGGTGTGAAAAAAATTTAAG ++ +YYYYYYYYYYYYYYYYYYWYWWWQQ +@gi|65306796|gb|AY948116.1|_3000_3167_3d/1 +ACCTTTTCTAAAGGTTTCAATCATT ++ +YYYYYYYYYYYYYYYYYYWYWWYSY +@gi|65306796|gb|AY948116.1|_360_547_3e/1 +ACATTTTTCTTTTTTAGTAAAATTT ++ +YYYYYYYYYYWYYYYYYYYSYYYWY +@gi|65306796|gb|AY948116.1|_143_319_3f/1 +TTGTTTTTTAGGATTGCAATGAGCG ++ +YYYYYYYYYWYYYYYWYYWWYYYWW +@gi|65306796|gb|AY948116.1|_1382_1538_40/1 +TAAACCCAAACCCTCTATCGTTTCT ++ +YYYYYYYYYYWYYYYWYYYSYWYSU +@gi|65306796|gb|AY948116.1|_2707_2804_41/1 +TTTAAAAGGCACTCTGATTTGGTCA ++ +YYYYYYYYYYWYYYYWYWWYWYYSU +@gi|65306796|gb|AY948116.1|_430_615_42/1 +AGAAAGGCTTTATAAAAAAGCTTAT ++ +YYYYYYYYYYWYYYWWYWWYWWYSU +@gi|65306796|gb|AY948116.1|_2383_2537_43/1 +CATATCGTAAAAACGACTTTGTAAA ++ +YYYYYYYYYYYYYYYWYYYWYYYWY +@gi|65306796|gb|AY948116.1|_839_973_44/1 +GGAGCTAATCGCTACGAGAATAACG ++ +YYYYYYYYYYWYYYWWYYWYWYYWY +@gi|65306796|gb|AY948116.1|_2761_2927_45/1 +TTTTTGACTTTCATTTTCTAGCTTT ++ +YYYYYYYYYYWYYYYYWYWWWWYWY +@gi|65306796|gb|AY948116.1|_375_561_46/1 +ATAAACTCGTGATAACATTTTTCTT ++ +YYYYYYYYYWYYYYWWYYYSYWYWW +@gi|65306796|gb|AY948116.1|_2751_2931_47/1 +TTATTTTTTGACTTTCATTTTCTAG ++ +YYYYYYYYYWYYYWYYWYWWYWYSU +@gi|65306796|gb|AY948116.1|_2312_2478_48/1 +CTTCAAAACAAACAAAATTATAAAT ++ +YYYYYYYYYYYYYYYWYYYWYWYSY +@gi|65306796|gb|AY948116.1|_1934_2078_49/1 +TTGTTTTAATATACTACAATAAAAA ++ +YYYYYYYYYWYYYYYWYWWYWWWQQ +@gi|65306796|gb|AY948116.1|_263_431_4a/1 +AGGGATTTGTAGAATATGTCAAAAG ++ +YYYYYYYYYYYYYYWWYYWYWWYWW +@gi|65306796|gb|AY948116.1|_452_644_4b/1 +AAGGTGTAAATCCTGTAAGCTCTGC ++ +YYYYYYYYYYYYYYWWYWWUWWYWY +@gi|65306796|gb|AY948116.1|_2570_2722_4c/1 +TGAATTAGCCGAATTTGCAGAACTT ++ +YYYYYYYYYYWYYYYYYWWYWYYSU +@gi|65306796|gb|AY948116.1|_2549_2708_4d/1 +ATAATTTTCAGGTATTTTCATAATC ++ +YYYYYYYYYYYYYYYYYWWUWWYWY +@gi|65306796|gb|AY948116.1|_1498_1676_4e/1 +GTAATTTTTTTTCTAGGGCGGTGGG ++ +YYYYYYYYYYYYYYWWYYWUWWYWW +@gi|65306796|gb|AY948116.1|_2890_3076_4f/1 +TCATCTCTTACAAGCTCTATTGCAA ++ +YYYYYYYYYYYYYYYYYYWWWWYWW +@gi|65306796|gb|AY948116.1|_3149_3310_50/1 +AATCAAAAAAATCATTTATTGATTT ++ +YYYYYYYYYYWYYYYWYYYSYYYSU +@gi|65306796|gb|AY948116.1|_1992_2129_51/1 +ACAATCAAGGCAAGGGGCTTCTCTG ++ +YYYYYYYYYYYYYYWWYWWYWWYSU +@gi|65306796|gb|AY948116.1|_945_1120_52/1 +AAGAACAAGAGCATAAATTAACGCA ++ +YYYYYYYYYYWYYYYYWYYWYWWQQ +@gi|65306796|gb|AY948116.1|_2526_2710_53/1 +CGATAATTTTCAGGTATTTTCATAA ++ +YYYYYYYYYWYYYWYYYWWUWYYSU +@gi|65306796|gb|AY948116.1|_2414_2548_54/1 +TTGCAATTTTCTTATTTCATACCAA ++ +YYYYYYYYYYWYYYYYYWWWWYYSU +@gi|65306796|gb|AY948116.1|_1957_2161_55/1 +AAATCCGCCTTGAGTTTTCACTCTT ++ +YYYYYYYYYYYYYYYYWYWYWWWQQ +@gi|65306796|gb|AY948116.1|_299_483_56/1 +TTATTTTCACAATATTTATAAAAAT ++ +YYYYYYYYYYYYYWYYYWWYWWYWY +@gi|65306796|gb|AY948116.1|_2162_2328_57/1 +TAAATATAATAATGATTTTAATCTT ++ +YYYYYYYYYYYYYYYYYYYWWWYSU +@gi|65306796|gb|AY948116.1|_292_484_58/1 +CTTATTTTCACAATATTTATAAAAA ++ +YYYYYYYYYYWYYYYYWYYWWYYSU +@gi|65306796|gb|AY948116.1|_1558_1716_59/1 +GTACGCGTACGCGCGCGAGAGAAAA ++ +YYYYYYYYYYWYYYYYYYYWYWYSU +@gi|65306796|gb|AY948116.1|_171_331_5a/1 +TATTTACATGAATGAATTAGATATT ++ +YYYYYYYYYYYYYYYYWYYSYYYWW +@gi|65306796|gb|AY948116.1|_2624_2822_5b/1 +ACTTTTAAAGCAATTTAGAACCACA ++ +YYYYYYYYYYYYYYYYYYYWWYYSY +@gi|65306796|gb|AY948116.1|_2024_2222_5c/1 +ATCCATTTGTATGGCTTTAAGTTCA ++ +YYYYYYYYYYYYYYWWYYYWWYYSY +@gi|65306796|gb|AY948116.1|_2196_2392_5d/1 +CCTGAACTTAAAGCCATACAAATGG ++ +YYYYYYYYYYWYYYYYYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_2377_2544_5e/1 +AAATTTTCTTTGCCATTGATGATTG ++ +YYYYYYYYYWYYYWYYWYWWYYYWY +@gi|65306796|gb|AY948116.1|_3111_3290_5f/1 +TTTAATGCTAAAAATCAAGAGCAGT ++ +YYYYYYYYYYYYYWYWYYWYWWYSY +@gi|65306796|gb|AY948116.1|_910_1065_60/1 +TAAGAACAAGAAAAAGCATTACTTC ++ +YYYYYYYYYYYYYWYYYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_2966_3121_61/1 +TTAGCATTAAAGTGCATATGATTTC ++ +YYYYYYYYYYYYYYYYYWWWWWYSY +@gi|65306796|gb|AY948116.1|_1321_1511_62/1 +TTTAAAAACTTTGTTTGAGGCACAC ++ +YYYYYYYYYWYYYWYYWYYWWYYSY +@gi|65306796|gb|AY948116.1|_1873_2073_63/1 +CTTTAAGCTTAAAAAACCTAAATTT ++ +YYYYYYYYYYYYYYYYYWWUWYYSU +@gi|65306796|gb|AY948116.1|_2702_2884_64/1 +AAATTATCGTCAAATAGATATAGAC ++ +YYYYYYYYYYYYYYYYWYWWWYYSY +@gi|65306796|gb|AY948116.1|_672_807_65/1 +GTTTGTATTTTGCTAAGATTGGCTT ++ +YYYYYYYYYYWYYYYYYYYSYYYWY +@gi|65306796|gb|AY948116.1|_2165_2331_66/1 +ACTTCTTTTGCGGAATAATTATTTT ++ +YYYYYYYYYYWYYYYWYWWUWWYWW +@gi|65306796|gb|AY948116.1|_1998_2143_67/1 +TCAAGGCTAACTATGAAACGACATA ++ +YYYYYYYYYYYYYYYYYYWUWYYSY +@gi|65306796|gb|AY948116.1|_904_1068_68/1 +TCAATTTAAGAACAAGAAAAAGCAT ++ +YYYYYYYYYYYYYYYWYWWYWWWQQ +@gi|65306796|gb|AY948116.1|_2994_3156_69/1 +AGGTTTCAATCATTTTAAAAAACTG ++ +YYYYYYYYYYYYYYYYYYYSYWYWY +@gi|65306796|gb|AY948116.1|_2715_2892_6a/1 +TATTTTCAGGTTTAAAAGTAAAGCT ++ +YYYYYYYYYYYYYYYYYWWYWWYWW +@gi|65306796|gb|AY948116.1|_633_845_6b/1 +TAGCTCCACGCTCCATTTTTAAACT ++ +YYYYYYYYYYYYYYYYYYYWYWYSU +@gi|65306796|gb|AY948116.1|_319_475_6c/1 +ACAATATTTATAAAAATTTTCATAA ++ +YYYYYYYYYYYYYYWWYWWUWWWQQ +@gi|65306796|gb|AY948116.1|_2202_2401_6d/1 +CTTAAAGCCATACAAATGGATATGT ++ +YYYYYYYYYYWYYYYYWYWWYWYWY +@gi|65306796|gb|AY948116.1|_2825_2992_6e/1 +AATCTAACTTGATTAAGTTTCATAT ++ +YYYYYYYYYWYYYYWWYYYWYYYWY +@gi|65306796|gb|AY948116.1|_1262_1447_6f/1 +TTTTAATGGAAATATTTCTTTTAAC ++ +YYYYYYYYYWYYYWYYWYYWYWYWW +@gi|65306796|gb|AY948116.1|_2751_2927_70/1 +TTTTTGACTTTCATTTTCTAGCTTT ++ +YYYYYYYYYYYYYYYYYYYSYWYWY +@gi|65306796|gb|AY948116.1|_3034_3217_71/1 +TTGATGAATTTAAAATTATTGCAAT ++ +YYYYYYYYYYYYYYYYYYWWYYYWY +@gi|65306796|gb|AY948116.1|_678_858_72/1 +AGAATGCTAAAGGGGAAAAACAAAC ++ +YYYYYYYYYWYYYYYYYYWUWWWQQ +@gi|65306796|gb|AY948116.1|_149_317_73/1 +GTTTTTTAGGATTGCAATGAGCGGT ++ +YYYYYYYYYYWYYYWWYWWWWYYSU +@gi|65306796|gb|AY948116.1|_2458_2654_74/1 +TTTGCCTGTGGTTCTAAATTGCTTT ++ +YYYYYYYYYWYYYYYYYYYWWYYWW +@gi|65306796|gb|AY948116.1|_315_506_75/1 +TTCCACTTTTAGCTAAACCATTCTT ++ +YYYYYYYYYWYYYYYYWYYSYYYSY +@gi|65306796|gb|AY948116.1|_916_1071_76/1 +TGATGTTGCTCTATTTTCGCTTGTA ++ +YYYYYYYYYYYYYYYYYYYWWWYWY +@gi|65306796|gb|AY948116.1|_1530_1707_77/1 +TTGGGTTTAGCAGATAGTTTTCTTT ++ +YYYYYYYYYYYYYWYWYYYWWWWQQ +@gi|65306796|gb|AY948116.1|_227_417_78/1 +TAAGTGCTGATGAAATCATTGCTGA ++ +YYYYYYYYYYYYYYYYYYWUWWYWY +@gi|65306796|gb|AY948116.1|_792_964_79/1 +TAATTTATGCTCTTGTTCTTGTATT ++ +YYYYYYYYYYYYYYYYYYWUWWYWW +@gi|65306796|gb|AY948116.1|_2506_2676_7a/1 +ATATAACTTTACAAAGTCGTTTTTA ++ +YYYYYYYYYWYYYYYWYYWYWWYWY +@gi|65306796|gb|AY948116.1|_881_1061_7b/1 +CTATTTTCGCTTGTAATTCGTTTTC ++ +YYYYYYYYYWYYYYWWYYWYWWYWW +@gi|65306796|gb|AY948116.1|_998_1182_7c/1 +AAAGAGATACAAGACAAGGCTAAAG ++ +YYYYYYYYYYWYYWYWYYYWWWYWY diff --git a/t/data/r2.fq b/t/data/r2.fq new file mode 100644 index 0000000..6094a3b --- /dev/null +++ b/t/data/r2.fq @@ -0,0 +1,500 @@ +@gi|65306796|gb|AY948116.1|_1951_2136_0/2 +AATAAAAAATCCGCCTTGAGTTTTC ++ +YYYYYYYYYYWYYYYYYYWWWWWQQ +@gi|65306796|gb|AY948116.1|_1531_1682_1/2 +TGGGTTTAGCAGATAGTTTTCTTTT ++ +YYYYYYYYYWYYYYYYYWWYWYYSU +@gi|65306796|gb|AY948116.1|_603_785_2/2 +AGCTTACGCAAAAAATCGCAGAGCT ++ +YYYYYYYYYWYYYYYYYYWYWYYSU +@gi|65306796|gb|AY948116.1|_1888_2018_3/2 +AGGGGAGAAAATTCTACTATTAAAA ++ +YYYYYYYYYYYYYWYYYWWYWYYWY +@gi|65306796|gb|AY948116.1|_1990_2157_4/2 +TTTCGCTCATTTGCATATCCAAAGT ++ +YYYYYYYYYWYYYYYWYYYWYYYSY +@gi|65306796|gb|AY948116.1|_1165_1307_5/2 +CTTTGACTTTATGATATTCAAATTT ++ +YYYYYYYYYYYYYWYYWYYWWWYSY +@gi|65306796|gb|AY948116.1|_1622_1777_6/2 +CTGAGTGCGTAGCACGGAAGACGGA ++ +YYYYYYYYYYYYYYYYYWWWYWYWY +@gi|65306796|gb|AY948116.1|_1116_1298_7/2 +TATGATATTCAAATTTTAAATATTC ++ +YYYYYYYYYWYYYYWWYYYSYYYSU +@gi|65306796|gb|AY948116.1|_63_202_8/2 +ATAGCTTAGAAGAAAGCCTTGCAAT ++ +YYYYYYYYYYWYYYYYYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_2908_3068_9/2 +TACAAGCTCTATTGCAATAATTTTA ++ +YYYYYYYYYWYYYWYYYYWWWWWQQ +@gi|65306796|gb|AY948116.1|_2909_3032_a/2 +AGAAAATGAAAGTCAAAAAATAATG ++ +YYYYYYYYYYYYYYWWYYWWYWWQQ +@gi|65306796|gb|AY948116.1|_3096_3266_b/2 +GGAAATCATATGCACTTTAATGCTA ++ +YYYYYYYYYYYYYYYYYYWUWWWQQ +@gi|65306796|gb|AY948116.1|_479_649_c/2 +TTGTAAAGGTGTAAATCCTGTAAGC ++ +YYYYYYYYYWYYYYYYYYWWWWWQQ +@gi|65306796|gb|AY948116.1|_1502_1651_d/2 +GAACAAGCAAAAAGAAACGATAGAG ++ +YYYYYYYYYYWYYYYYYWWYWYYSY +@gi|65306796|gb|AY948116.1|_944_1116_e/2 +TTTAAAGCTCTTTCGTGTCCTAGTT ++ +YYYYYYYYYWYYYYYYYYWYWWYWY +@gi|65306796|gb|AY948116.1|_3061_3240_f/2 +GCTCGACACTTGGCACAAGTGGCGG ++ +YYYYYYYYYYYYYYYYWYYSYYYWW +@gi|65306796|gb|AY948116.1|_1955_2172_10/2 +AAAAATCCGCCTTGAGTTTTCACTC ++ +YYYYYYYYYYYYYYYWYYYSYWYWY +@gi|65306796|gb|AY948116.1|_2987_3171_11/2 +TAGATTTGATTATAATGACAAGCTT ++ +YYYYYYYYYYYYYYWWYYYSYWYSU +@gi|65306796|gb|AY948116.1|_2261_2430_12/2 +AGAAAATACGCCATTTTTAAAAAAG ++ +YYYYYYYYYYYYYWYWYYWWYYYWY +@gi|65306796|gb|AY948116.1|_57_240_13/2 +AAGTGCATAGCTTAGAAGAAAGCCT ++ +YYYYYYYYYWYYYYWWYWWUWYYWW +@gi|65306796|gb|AY948116.1|_1168_1364_14/2 +ATTTAACATTTTTTGTTTTGTGTGC ++ +YYYYYYYYYWYYYYWWYWWWWWWQQ +@gi|65306796|gb|AY948116.1|_673_829_15/2 +TTTTAAACTTTGTGCGGTTAGGGTT ++ +YYYYYYYYYYYYYYYWYYYSYYYWY +@gi|65306796|gb|AY948116.1|_275_459_16/2 +TTTTCATAAGCTTTTTTATAAAGCC ++ +YYYYYYYYYWYYYWYWYWWUWWYWY +@gi|65306796|gb|AY948116.1|_3145_3288_17/2 +TTTTAGTTGATTTTTATTAGTGGGG ++ +YYYYYYYYYYYYYYYWYYWUWWYSU +@gi|65306796|gb|AY948116.1|_1295_1444_18/2 +CATAAAGTCAAAGATGAACGAGATA ++ +YYYYYYYYYYYYYYYYYWWYWYYSY +@gi|65306796|gb|AY948116.1|_2129_2328_19/2 +TCTTTTGCGGAATAATTATTTTTCT ++ +YYYYYYYYYYYYYYWWYYWWYWYWY +@gi|65306796|gb|AY948116.1|_1462_1646_1a/2 +AAAAGAGCTTAAATATCAAAGAGCT ++ +YYYYYYYYYYWYYYYYYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_2770_2956_1b/2 +GCTATCTTTTTGACCAAATCAGAGT ++ +YYYYYYYYYYYYYWYYYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_1336_1508_1c/2 +TGAGGCACACAAAACAAAAAATGTT ++ +YYYYYYYYYYYYYWYWYYYWWWYSU +@gi|65306796|gb|AY948116.1|_2153_2336_1d/2 +AATAAACTTCTTTTGCGGAATAATT ++ +YYYYYYYYYYYYYYWWYYWWWYYSY +@gi|65306796|gb|AY948116.1|_844_1031_1e/2 +TAATCGCTACGAGAATAACGAAAAG ++ +YYYYYYYYYWYYYWYYYYYWYYYSY +@gi|65306796|gb|AY948116.1|_2821_2986_1f/2 +ACTTGATTAAGTTTCATATTGTTTA ++ +YYYYYYYYYYYYYYYWYYWUWWYSU +@gi|65306796|gb|AY948116.1|_58_177_20/2 +GTAAATACTCTCTATCGCATGATTA ++ +YYYYYYYYYWYYYYWWYYWWYWWQQ +@gi|65306796|gb|AY948116.1|_2612_2792_21/2 +AACTTTATACAGACTTTTAAAGCAA ++ +YYYYYYYYYYYYYYYYWYYWWWYSU +@gi|65306796|gb|AY948116.1|_2081_2254_22/2 +TCTTTGGTTAGTGAGATAATAGCCA ++ +YYYYYYYYYYYYYYYYWYYWYWYWY +@gi|65306796|gb|AY948116.1|_963_1092_23/2 +TCTAAATTTTGTATATGTTTTTGAT ++ +YYYYYYYYYYYYYYWWYYWYWWYSY +@gi|65306796|gb|AY948116.1|_1802_1954_24/2 +TATTGTAGTATATTAAAACAATATT ++ +YYYYYYYYYYYYYYWWYYWWYYYSU +@gi|65306796|gb|AY948116.1|_125_324_25/2 +ATTATGAAGGTATAAAATCTGTTAT ++ +YYYYYYYYYYYYYYYWYWWYWYYWW +@gi|65306796|gb|AY948116.1|_2801_2960_26/2 +TAAAAATCTTGCTTATGAAAAAGAA ++ +YYYYYYYYYWYYYYWWYYYWWWYSU +@gi|65306796|gb|AY948116.1|_1744_1934_27/2 +ATATTTGTCAAATATTTTGTCGTTT ++ +YYYYYYYYYYYYYYYYYYWUWWWQQ +@gi|65306796|gb|AY948116.1|_2759_2911_28/2 +TTCTAAAGAACGCTATCTTTTTGAC ++ +YYYYYYYYYYYYYWYYYYYSYWYWY +@gi|65306796|gb|AY948116.1|_1201_1398_29/2 +AGACTATCAGTTAACAAAGAATGCT ++ +YYYYYYYYYYYYYYYYWYWUWWYWW +@gi|65306796|gb|AY948116.1|_1966_2156_2a/2 +TTCGCTCATTTGCATATCCAAAGTA ++ +YYYYYYYYYWYYYYYWYYYSYWYSU +@gi|65306796|gb|AY948116.1|_1691_1870_2b/2 +CGTTTTAATAGTAGAATTTTGTCGT ++ +YYYYYYYYYYWYYYYWYYYSYWYWY +@gi|65306796|gb|AY948116.1|_2329_2493_2c/2 +CAATAATATTTGCTTCTTCAAAACA ++ +YYYYYYYYYYWYYYYYWYYWYYYSU +@gi|65306796|gb|AY948116.1|_145_306_2d/2 +TTGCAATGAGCGGTGGCAACTTTTG ++ +YYYYYYYYYYYYYYYYWYWYWYYWY +@gi|65306796|gb|AY948116.1|_930_1108_2e/2 +TCTTTCGTGTCCTAGTTCTAAATTT ++ +YYYYYYYYYYYYYYYYYYWYWWWQQ +@gi|65306796|gb|AY948116.1|_580_743_2f/2 +TGTTATCGAGTGTAAAAAATACCGC ++ +YYYYYYYYYYYYYYYYWYWYWWYSU +@gi|65306796|gb|AY948116.1|_1032_1194_30/2 +CGAAAGAAAACGAATTACAAGCGAA ++ +YYYYYYYYYYYYYYYYYYWWWWYSY +@gi|65306796|gb|AY948116.1|_2128_2314_31/2 +GTCTTACTTTGGATATGCAAATGAG ++ +YYYYYYYYYYWYYYYWYYWUWWYSY +@gi|65306796|gb|AY948116.1|_196_351_32/2 +ATCGCTATGATAAAACAAAGTGCTT ++ +YYYYYYYYYYYYYYYYWYYWYWWQQ +@gi|65306796|gb|AY948116.1|_1339_1491_33/2 +TCAAAAGCTCTTTGATATTTAAGCT ++ +YYYYYYYYYWYYYYYYYYWYWWYWW +@gi|65306796|gb|AY948116.1|_2509_2658_34/2 +AAGCTTTGCCTGTGGTTCTAAATTG ++ +YYYYYYYYYWYYYYYYYYYSYYYSY +@gi|65306796|gb|AY948116.1|_1940_2118_35/2 +TAATATACTACAATAAAAAATCCGC ++ +YYYYYYYYYYYYYYYYYWWUWWWQQ +@gi|65306796|gb|AY948116.1|_216_381_36/2 +TGCTTTAAAGTTAAGTGCTGATGAA ++ +YYYYYYYYYYYYYYYYYWWYWYYWW +@gi|65306796|gb|AY948116.1|_461_636_37/2 +TTTTATAAATATTGTGAAAATAAGA ++ +YYYYYYYYYYYYYYWWYWWUWWYSU +@gi|65306796|gb|AY948116.1|_325_461_38/2 +AATTTTCATAAGCTTTTTTATAAAG ++ +YYYYYYYYYYYYYYYYYWWUWWYSY +@gi|65306796|gb|AY948116.1|_2304_2478_39/2 +AGAAAAATAATTATTCCGCAAAAGA ++ +YYYYYYYYYWYYYYYWYYWYWYYSY +@gi|65306796|gb|AY948116.1|_278_467_3a/2 +TATAAAAATTTTCATAAGCTTTTTT ++ +YYYYYYYYYYYYYYYYYYWWWWYSU +@gi|65306796|gb|AY948116.1|_2717_2903_3b/2 +TTGCATTTGGATATTTTCAGGTTTA ++ +YYYYYYYYYYWYYYYWYWWUWYYWW +@gi|65306796|gb|AY948116.1|_1389_1563_3c/2 +AGCACCAAAAAAGAAAACTATCTGC ++ +YYYYYYYYYYWYYWYWYWWWYWWQQ +@gi|65306796|gb|AY948116.1|_3000_3167_3d/2 +AATGACAAGCTTTGGCAATTTAACG ++ +YYYYYYYYYWYYYYWWYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_360_547_3e/2 +AGACAATCACTAAAGAACTTACGCA ++ +YYYYYYYYYYWYYYYYYYYSYWYSY +@gi|65306796|gb|AY948116.1|_143_319_3f/2 +CTGTTATAAGTAATCATGCGATAGA ++ +YYYYYYYYYYYYYYYYYYYSYWYSY +@gi|65306796|gb|AY948116.1|_1382_1538_40/2 +GAAATAGGCAAATGGTGTGAAAAAA ++ +YYYYYYYYYYWYYYYYYYYSYWWQQ +@gi|65306796|gb|AY948116.1|_2707_2804_41/2 +ATCGTCAAATAGATATAGACCAAAG ++ +YYYYYYYYYYYYYYWWYYYSYYYSU +@gi|65306796|gb|AY948116.1|_430_615_42/2 +TTTTGCGTAAGCTCTTGGCATTGCT ++ +YYYYYYYYYYYYYYWWYWWYWWYWW +@gi|65306796|gb|AY948116.1|_2383_2537_43/2 +TCTTTGCCATTGATGATTGCTTGAA ++ +YYYYYYYYYYYYYYWWYWWUWWYSU +@gi|65306796|gb|AY948116.1|_839_973_44/2 +CATTTGCGTTAATTTATGCTCTTGT ++ +YYYYYYYYYYYYYWYYWYWYWWYSU +@gi|65306796|gb|AY948116.1|_2761_2927_45/2 +CTAAAGAACGCTATCTTTTTGACCA ++ +YYYYYYYYYYYYYYYWYWWUWWYSY +@gi|65306796|gb|AY948116.1|_375_561_46/2 +AACTTACGCATTTAAATGAATACTC ++ +YYYYYYYYYWYYYYYYYWWUWWYSY +@gi|65306796|gb|AY948116.1|_2751_2931_47/2 +AAAGAACTTTCTAAAGAACGCTATC ++ +YYYYYYYYYWYYYYYYWYYSYWYSU +@gi|65306796|gb|AY948116.1|_2312_2478_48/2 +AATTATTCCGCAAAAGAAGTTTATT ++ +YYYYYYYYYWYYYYYYWYYWYWWQQ +@gi|65306796|gb|AY948116.1|_1934_2078_49/2 +TTCAACTTTAAGCTTAAAAAACCTA ++ +YYYYYYYYYYWYYYYWYYWUWWYSU +@gi|65306796|gb|AY948116.1|_263_431_4a/2 +CTATGTTCTTACGCACTTCATCGCT ++ +YYYYYYYYYYYYYYYYWYWUWYYWY +@gi|65306796|gb|AY948116.1|_452_644_4b/2 +TATGAAAATTTTTATAAATATTGTG ++ +YYYYYYYYYYYYYWYYYYYSYWYWY +@gi|65306796|gb|AY948116.1|_2570_2722_4c/2 +ATATCTATTTGACGATAATTTTCAG ++ +YYYYYYYYYYYYYYYYYWWYWWYWY +@gi|65306796|gb|AY948116.1|_2549_2708_4d/2 +AAAATTTGGTTTCACAGCCTTTGAA ++ +YYYYYYYYYYYYYYYYYYYSYWYSU +@gi|65306796|gb|AY948116.1|_1498_1676_4e/2 +TTTTGAACAAGCAAAAAGAAACGAT ++ +YYYYYYYYYYWYYYYYYWWYWYYSU +@gi|65306796|gb|AY948116.1|_2890_3076_4f/2 +ATATCCAAATGCAAAAGCTAGAAAA ++ +YYYYYYYYYYYYYYWWYYYSYYYSY +@gi|65306796|gb|AY948116.1|_3149_3310_50/2 +TGAAACCTTTAGAAAAGGTATTAGG ++ +YYYYYYYYYYYYYYYYYYYSYYYWY +@gi|65306796|gb|AY948116.1|_1992_2129_51/2 +TATCAATCAAGGCTAACTATGAAAC ++ +YYYYYYYYYYWYYYYYYYYSYYYSU +@gi|65306796|gb|AY948116.1|_945_1120_52/2 +TTCCTTTAAAGCTCTTTCGTGTCCT ++ +YYYYYYYYYYYYYYWWYYYWYYYWW +@gi|65306796|gb|AY948116.1|_2526_2710_53/2 +TTTTACGATATGATAATAAATAAAA ++ +YYYYYYYYYYYYYWYYWYYWYYYSU +@gi|65306796|gb|AY948116.1|_2414_2548_54/2 +TTATTTATTATCATATCGTAAAAAC ++ +YYYYYYYYYYYYYYYYWYWUWYYWY +@gi|65306796|gb|AY948116.1|_1957_2161_55/2 +ACTATTTCGCTCATTTGCATATCCA ++ +YYYYYYYYYYYYYYYYYYWWYWYWY +@gi|65306796|gb|AY948116.1|_299_483_56/2 +CATTGCAATCCTAAAAAACAACCTG ++ +YYYYYYYYYYWYYYWWYYWUWYYSY +@gi|65306796|gb|AY948116.1|_2162_2328_57/2 +TCTTTTGCGGAATAATTATTTTTCT ++ +YYYYYYYYYYYYYYYYYYYWWWYSU +@gi|65306796|gb|AY948116.1|_292_484_58/2 +CACCGCTCATTGCAATCCTAAAAAA ++ +YYYYYYYYYYYYYYWWYYYSYYYWW +@gi|65306796|gb|AY948116.1|_1558_1716_59/2 +GGTGCTTTAGCAAATCGCACCTGTT ++ +YYYYYYYYYYYYYYYYYYWWWWYWY +@gi|65306796|gb|AY948116.1|_171_331_5a/2 +GTTTAATGCAGGTTGTTTTTTAGGA ++ +YYYYYYYYYYYYYYWWYWWYWWWQQ +@gi|65306796|gb|AY948116.1|_2624_2822_5b/2 +TTTTTCATAAGCAAGATTTTTAAAA ++ +YYYYYYYYYYWYYYYYWYYSYYYWW +@gi|65306796|gb|AY948116.1|_2024_2222_5c/2 +TAAGCGTTTTGTGAGATTATACCTA ++ +YYYYYYYYYYYYYYYWYWWWYWYWW +@gi|65306796|gb|AY948116.1|_2196_2392_5d/2 +ATGGCAAAGAAAATTTCTTTATAGT ++ +YYYYYYYYYYWYYYYYYWWUWWYSY +@gi|65306796|gb|AY948116.1|_2377_2544_5e/2 +TTATTATCATATCGTAAAAACGACT ++ +YYYYYYYYYYYYYYYYYYYSYWYWW +@gi|65306796|gb|AY948116.1|_3111_3290_5f/2 +GATTTTAGTTGATTTTTATTAGTGG ++ +YYYYYYYYYYYYYYYYYWWWYYYWW +@gi|65306796|gb|AY948116.1|_910_1065_60/2 +TGCTCTATTTTCGCTTGTAATTCGT ++ +YYYYYYYYYYYYYYWWYYWYWYYWW +@gi|65306796|gb|AY948116.1|_2966_3121_61/2 +CAATATGAAACTTAATCAAGTTAGA ++ +YYYYYYYYYYWYYYWWYYYWYYYSY +@gi|65306796|gb|AY948116.1|_1321_1511_62/2 +TTGCTTGTTCAAAAGAATTTTCAAA ++ +YYYYYYYYYYYYYYYYYYYSYWYWY +@gi|65306796|gb|AY948116.1|_1873_2073_63/2 +AAAATTCTACTATTAAGGGGAGAAA ++ +YYYYYYYYYWYYYYYYWYWYWYYWW +@gi|65306796|gb|AY948116.1|_2702_2884_64/2 +GGTTTAAAAGTAAAGCTAATACCTG ++ +YYYYYYYYYYYYYYYYYYYWYYYWW +@gi|65306796|gb|AY948116.1|_672_807_65/2 +TAAGTGAGAATGCTAAAGGGGAAAA ++ +YYYYYYYYYYWYYWYWYYYSYYYSY +@gi|65306796|gb|AY948116.1|_2165_2331_66/2 +ATATAATAATGATTTTAATCTTTTA ++ +YYYYYYYYYWYYYYWWYYWUWYYWY +@gi|65306796|gb|AY948116.1|_1998_2143_67/2 +ATATCCAAAGTAAGACAATCAAGGC ++ +YYYYYYYYYYWYYYYYWYWYWWYWY +@gi|65306796|gb|AY948116.1|_904_1068_68/2 +TGTTGCTCTATTTTCGCTTGTAATT ++ +YYYYYYYYYYYYYYWWYYWUWWWQQ +@gi|65306796|gb|AY948116.1|_2994_3156_69/2 +GATTATAATGACAAGCTTTGGCAAT ++ +YYYYYYYYYYWYYWYYWYWUWWYWW +@gi|65306796|gb|AY948116.1|_2715_2892_6a/2 +ATAGATATAGACCAAAGAATTTTAA ++ +YYYYYYYYYWYYYYYWYYWWYYYSY +@gi|65306796|gb|AY948116.1|_633_845_6b/2 +GATTTACACCTTTACAAGTTGTAAT ++ +YYYYYYYYYYYYYWYYWYWWWYYSU +@gi|65306796|gb|AY948116.1|_319_475_6c/2 +ACCTGCATTAAACCACAATGATAGA ++ +YYYYYYYYYYYYYYYYWYWUWWYWY +@gi|65306796|gb|AY948116.1|_2202_2401_6d/2 +CAATCATCAATGGCAAAGAAAATTT ++ +YYYYYYYYYYYYYYYYYYYWWWWQQ +@gi|65306796|gb|AY948116.1|_2825_2992_6e/2 +AAAAACCGCAGGGCGTGGGCGCGGC ++ +YYYYYYYYYWYYYYWWYYWUWWYSY +@gi|65306796|gb|AY948116.1|_1262_1447_6f/2 +AACAAAGAGCTTGAATATTTAAAAT ++ +YYYYYYYYYWYYYYWWYYYSYYYSU +@gi|65306796|gb|AY948116.1|_2751_2927_70/2 +AAAGAACTTTCTAAAGAACGCTATC ++ +YYYYYYYYYWYYYYYWYWWWWYYWW +@gi|65306796|gb|AY948116.1|_3034_3217_71/2 +GGATAGCTTGTATCCTTTAAAGATA ++ +YYYYYYYYYWYYYWYYYYWUWWYWW +@gi|65306796|gb|AY948116.1|_678_858_72/2 +TTCTCGTAGCGATTAGCTCCACGCT ++ +YYYYYYYYYYYYYYYYYYWYWWYSU +@gi|65306796|gb|AY948116.1|_149_317_73/2 +TAAGTAATCATGCGATAGAGAGTAT ++ +YYYYYYYYYWYYYYYWYYWWYYYSY +@gi|65306796|gb|AY948116.1|_2458_2654_74/2 +ATAATTTTGTTTGTTTTGAAGAAGC ++ +YYYYYYYYYWYYYWYYYWWYWYYSU +@gi|65306796|gb|AY948116.1|_315_506_75/2 +AACAACCTGCATTAAACCACAATGA ++ +YYYYYYYYYWYYYYWWYYWWYWYWY +@gi|65306796|gb|AY948116.1|_916_1071_76/2 +ACAAGAAAAAGCATTACTTCAAAGA ++ +YYYYYYYYYWYYYYWWYWWWYWWQQ +@gi|65306796|gb|AY948116.1|_1530_1707_77/2 +CGCGCGCGAGAGAAAAAACAAAAAA ++ +YYYYYYYYYWYYYYWWYYYSYYYSY +@gi|65306796|gb|AY948116.1|_227_417_78/2 +ACTTCATCGCTAGTGCAAGAGTATT ++ +YYYYYYYYYYYYYYWWYYYWWWWQQ +@gi|65306796|gb|AY948116.1|_792_964_79/2 +TTAGCAAAATACAAACCCTAACCGC ++ +YYYYYYYYYYWYYYYYYYWUWWYWY +@gi|65306796|gb|AY948116.1|_2506_2676_7a/2 +ATTCTTCCCACTCAAAATAAGCTTT ++ +YYYYYYYYYYWYYWYYWYYSYYYSY +@gi|65306796|gb|AY948116.1|_881_1061_7b/2 +ATACAAGATTATAAAACATACGCTC ++ +YYYYYYYYYYWYYYYWYYWYWWWQQ +@gi|65306796|gb|AY948116.1|_998_1182_7c/2 +TATTTTCCAAAGGTTAAAATGTTTT ++ +YYYYYYYYYYWYYYYYWYWWWWWQQ