Raw content of Bio::Tools::Primer3
#
# Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
# This module is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# Copyright Chad Matsalla
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
Bio::Tools::Primer3 - Create input for and work with the output from the
program primer3
=head1 SYNOPSIS
Chad will put synopses here by the end of the second week of october, 2002.
=head1 DESCRIPTION
Bio::Tools::Primer3 creates the input files needed to design primers using
primer3 and provides mechanisms to access data in the primer3 output files.
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://www.bioperl.org/MailList.html - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/
=head1 AUTHOR - Chad Matsalla
bioinformatics1@dieselwurks.com
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::Tools::Primer3;
use vars qw(@ISA);
use strict;
use Bio::Seq;
use Bio::SeqFeature::Primer;
use Bio::Seq::PrimedSeq;
use Bio::Seq::SeqFactory;
use Bio::Root::Root;
use Bio::Root::IO;
use Dumpvalue;
@ISA = qw(Bio::Root::Root Bio::Root::IO);
# Chad likes to use this to debug large hashes.
my $dumper = new Dumpvalue;
# this was a bunch of the seqio things, now deprecated. delete it soon.
# sub _initialize {
# my($self,@args) = @_;
# $self->SUPER::_initialize(@args);
# if( ! defined $self->sequence_factory ) {
# $self->sequence_factory(new Bio::Seq::SeqFactory
# (-verbose => $self->verbose(),
# -type => 'Bio::Seq'));
# }
# }
=head2 new()
Title : new()
Usage :
Function:
Returns :
Args :
Notes :
=cut
sub new {
my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my($filename) = $self->_rearrange([qw(FILE)],@args);
if (!$filename) {
print("Ahh grasshopper, you are planning to create a primer3 infile\n");
return $self;
}
$self->{filename} = $filename;
# check to see that the file exists
# I think that catfile should be used here.
if (!-f $filename) {
print("That file doesn't exist. Bah.\n");
}
$self->_initialize_io( -file => $filename );
return $self;
}
=head2 null
Title :
Usage :
Function:
Returns :
Args :
Notes :
=cut
=head2 next_primer()
Title : next_primer()
Usage : $primer3 = $stream->next_primer()
Function: returns the next primer in the stream
Returns : Bio::Seq::PrimedSeq containing:
- 2 Bio::SeqFeature::Primer representing the primers
- 1 Bio::Seq representing the target sequence
- 1 Bio::Seq representing the amplified region
Args : NONE
Notes :
=cut
sub next_primer {
my $self = shift;
my $fh = $self->_fh();
my ($line,%primer);
# first, read in the next set of primers
while ($line = $self->_readline()) {
chomp ($line);
last if ($line =~ /^=/);
$line =~ m/(^.*)\=(.*$)/;
$primer{$1} = $2;
}
# then, get the primers as SeqFeature::Primer objects
my ($left,$right) = &_create_primer_features(\%primer);
# then, create the sequence to place them on
my $sequence = Bio::Seq->new(-seq => $primer{SEQUENCE},
-id => $primer{PRIMER_SEQUENCE_ID});
# print("Sequence is ".$primer{SEQUENCE}." and id is ".$primer{PRIMER_SEQUENCE_ID}."\n");
my $primedseq = new Bio::Seq::PrimedSeq(
-target_sequence => $sequence,
-left_primer => $left,
-right_primer => $right,
-primer_sequence_id => $primer{PRIMER_SEQUENCE_ID},
-primer_comment => $primer{PRIMER_COMMENT},
-target => $primer{TARGET},
-primer_product_size_range => $primer{PRIMER_PRODUCT_SIZE_RANGE},
-primer_file_flag => $primer{PRIMER_FILE_FLAG},
-primer_liberal_base => $primer{PRIMER_LIBERAL_BASE},
-primer_num_return => $primer{PRIMER_NUM_RETURN},
-primer_first_base_index => $primer{PRIMER_FIRST_BASE_INDEX},
-primer_explain_flag => $primer{PRIMER_EXPLAIN_FLAG},
-primer_pair_compl_any => $primer{PRIMER_PAIR_COMPL_ANY},
-primer_pair_compl_end => $primer{PRIMER_PAIR_COMPL_END},
-primer_product_size => $primer{PRIMER_PRODUCT_SIZE}
);
return $primedseq;
}
=head2 _create_primer_features()
Title : _create_primer_features()
Usage : &_create_primer_features()
Function: This is an internal method used by next_seq() to create the
Bio::SeqFeature::Primer objects necessary to represent the primers
themselves.
Returns : An array of 2 Bio::SeqFeature::Primer objects.
Args : None.
Notes : This is an internal method. Do not call this method.
=cut
sub _create_primer_features {
my $rdat = shift;
my (%left,%right,$updir,$downdir,$var,$trunc);
my @variables = qw(
PRIMER_DIRECTION
PRIMER_DIRECTION_END_STABILITY
PRIMER_DIRECTION_EXPLAIN
PRIMER_DIRECTION_GC_PERCENT
PRIMER_DIRECTION_PENALTY
PRIMER_DIRECTION_SELF_ANY
PRIMER_DIRECTION_SELF_END
PRIMER_DIRECTION_SEQUENCE
PRIMER_DIRECTION_TM
PRIMER_FIRST_BASE_INDEX
);
# create the hash to pass into the creation routine
# I do it this way because the primer3 outfile variables are exactly the same for each of
# left and right. I create two hashes- one for the left and one for the right primer.
foreach $updir (qw(LEFT RIGHT)) {
my %dat;
foreach (@variables) {
($var = $_) =~ s/DIRECTION/$updir/e;
# should you truncate the name of each variable?
# for example, should the value be: PRIMER_RIGHT_PENALTY or PENALTY?
# i think it should be the second one
if (/^PRIMER_DIRECTION$/) {
$trunc = "PRIMER";
}
elsif (/^PRIMER_FIRST_BASE_INDEX/) {
$trunc = "FIRST_BASE_INDEX";
}
else {
($trunc = $_) =~ s/PRIMER_DIRECTION_//;
}
$dat{"-$trunc"} = $rdat->{$var};
}
if ($updir eq "LEFT") {
%left = %dat;
$left{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-left";
}
else {
%right = %dat;
$right{-id} = $rdat->{PRIMER_SEQUENCE_ID}."-right";
}
}
my $primer_left = new Bio::SeqFeature::Primer(%left);
my $primer_right = new Bio::SeqFeature::Primer(%right);
return($primer_left,$primer_right);
}
=head2 get_amplified_region()
Title : get_amplified_region()
Usage : $primer->get_amplified_region()
Function: Returns a Bio::Seq object representing the sequence amplified
Returns : (I think) A Bio::Seq object
Args : None.
Notes : This is not implemented at this time.
Note to chad: implement this simple getter.
Developer notes: There obviously isn't a way for a single primer to know about
its amplified region unless it is paired with another primer. At this time
these object will generally be created with another so I will put in this
method. If there is no sequence null is returned.
THIS DOES NOT BELONG HERE. Put this into something else.
=cut
sub get_amplified_region {
my ($self) = @_;
} # end get_amplified_region
=head2 get_amplification_error()
Title : get_amplification_error()
Usage :
Function:
Returns :
Args :
Notes :
Developer Notes:
THIS DOES NOT BELONG HERE. Put this into something else.
=cut
sub get_amplification_error {
my $primer = $_[1];
my $error = $Primer3::primers{$primer}{PRIMER_ERROR};
if ($error) { return $error; }
else { return "Some error that primer3 didn't define.\n"; }
}
=head2 _set_target()
Title : _set_target()
Usage : &_set_target($self);
Function:
Returns :
Args :
Notes :
Developer Notes: Really I have no idea why I put this in here.
It can is referenced by new_deprecated and by run_primer3
=cut
sub _set_target {
my $self = shift;
my ($sequence,$primer,$primer_left,$primer_right,$position_left,$position_right,$boggle);
$boggle = 1;
foreach $primer (sort keys %{$self->{primers}}) {
$sequence = $self->{primers}{$primer}{SEQUENCE};
$primer_left = $self->{primers}{$primer}{PRIMER_LEFT};
$primer_right = $self->{primers}{$primer}{PRIMER_RIGHT};
if (!$primer_left) {
$self->{primers}{$primer}{design_failed} = "1";
}
else {
$primer_left =~ m/(.*)\,(.*)/;
$position_left = $1+$2-1;
$primer_right =~ m/(.*)\,(.*)/;
$position_right = $1-$2;
$self->{primers}{$primer}{left} = $position_left;
$self->{primers}{$primer}{right} = $position_right;
$self->{primers}{$primer}{amplified} = substr($sequence,$position_left,$position_right-$position_left);
}
}
}
=head2 _read_file($self,$filename)
Title : _read_file($self,$filename)
Usage :
Function:
Returns : A scalar containing the contents of $filename
Args : $self and the name of a file to parse.
Notes :
Developer notes: Honestly, I have no idea what this is for.
=cut
sub _read_file {
# my ($self,$filename) = @_;
# set this to keep track of things....
# $self->{outfilename} = $filename;
# to make this better for bioperl, chad should really be using catfile and things.
#
# my $fh = new FileHandle;
# open($fh,$filename) or die "I can't open the primer report ($filename) : $!\n";
# # _parse_report();
# # my %Primer3::primers;
# my ($output,$line);
# while ($line=<$fh>) {
# # print("Adding $line\n");
# $output .= $line;
# } # end while
# # print("\$output is $output\n");
# return $output;
}
=head2 _parse_report()
Title : _parse_report()
Usage : &_parse_report($self,$filename);
Function: Parse a primer3 outfile and place everything into an object under
{primers} with PRIMER_SEQUENCE_ID being the name of the keys for the
{primers} hash.
Returns : Nothing.
Args : $self and the name of a file to parse.
Notes :
=cut
sub _parse_report {
# old
# my ($self,$filename) = @_;
my ($self,$outputs) = @_;
# print("\$self is $self, \$outputs are $outputs\n");
# print("in _parse_report, \$self is $self\n");
# set this to keep track of things....
my ($sequence_name,$line,$counter,$variable_name,$variable_value);
my @output = split/\n/,$outputs;
foreach $line (@output) {
# print("Reading line $line\n");
next if ($line =~ /^\=/);
if ($line =~ m/^PRIMER_SEQUENCE_ID/) {
$line =~ m/(\S+)=(.*$)/;
$variable_name = $1;
$sequence_name = $2;
$variable_value = $2;
}
else {
$line =~ m/(\S+)=(.*$)/;
$variable_name = $1;
$variable_value = $2;
}
# print("$sequence_name\t$variable_name\t$variable_value\n");
$self->{primers}{$sequence_name}{$variable_name} = $variable_value;
} # end while <>
} # end parse_report
=head2 _construct_empty()
Title : _construct_empty()
Usage : &_construct_empty($self);
Function: Construct an empty object that will be used to construct a primer3
input "file" so that it can be run.
Returns :
Args :
Notes :
=cut
sub _construct_empty {
my $self = shift;
$self->{inputs} = {};
return;
}
=head2 add_target(%stuff)
Title : add_target(%stuff)
Usage : $o_primer->add_target(%stuff);
Function: Add an target to the infile constructor.
Returns :
Args : A hash. Looks something like this:
$o_primer2->add_target(
-PRIMER_SEQUENCE_ID => "sN11902",
-PRIMER_COMMENT => "3831",
-SEQUENCE => "some_sequence",
-TARGET => "513,26",
-PRIMER_PRODUCT_SIZE_RANGE => "100-500",
-PRIMER_FILE_FLAG => "0",
-PRIMER_LIBERAL_BASE => "1",
-PRIMER_NUM_RETURN => "1",
-PRIMER_FIRST_BASE_INDEX => "1",
-PRIMER_EXPLAIN_FLAG => "1");
The add_target() method does not validate the things you put into
this parameter hash. Read the docs for Primer3 to see which fields
do what and how they should be used.
Notes : To design primers, first create a new CSM::Primer3 object with the
-construct_infile parameter. Then, add targets using this method
(add_target()) with the target hash as above in the Args: section.
Be careful. No validation will be done here. All of those parameters
will be fed straight into primer3.
Once you are done adding targets, invoke the function run_primer3().
Then retrieve the results using something like a loop around the array
from get_primer_sequence_IDs();
=cut
sub add_target {
my ($self,%args) = @_;
my ($currkey,$renamed,$sequence_id,$value);
if (!$args{-PRIMER_SEQUENCE_ID}) {
print("You cannot add an element to the primer3 infile without specifying the PRIMER_SEQUENCE_ID. Sorry.\n");
}
else {
$sequence_id = $args{-PRIMER_SEQUENCE_ID};
foreach $currkey (keys %args) {
# print("\$currkey is $currkey\n");
next if ($currkey eq "-PRIMER_SEQUENCE_ID");
($renamed = $currkey) =~ s/-//;
# print("Adding $renamed to the hash under $sequence_id\n");
$value = $args{$currkey};
# print("\$value is $value\n");
if ($renamed eq "SEQUENCE") { $value =~ s/\n//g; }
$self->{infile}{$sequence_id}{$renamed} = $value;
}
}
}
=head2 get_primer_sequence_IDs()
Title : get_primer_sequence_IDs()
Usage : $o_phred->get_primer_sequence_IDs();
Function: Return the primer sequence ID's. These normally correspond to
the name of a sequence in a database but can be whatever was used when
the primer3 infile was constructed.
Returns : An array containing the names of the primer sequence ID's
Args : None.
Notes : This would be used as the basis for an iterator to loop around each
primer that was designed.
=cut
sub get_primer_sequence_IDs {
my $self = shift;
return sort keys %{$self->{primers}};
} # end get keys
=head2 dump_hash()
Title : dump_hash()
Usage : $o_primer->dump_hash();
Function: Dump out the CSM::Primer3 object.
Returns : Nothing.
Args : None.
Notes : Used extensively in debugging.
=cut
sub dump_hash {
my $self = shift;
my $dumper = new Dumpvalue;
$dumper->dumpValue($self);
} # end dump_hash
=head2 dump_infile_hash()
Title : dump_infile_hash()
Usage : $o_primer->dump_infile_hash();
Function: Dump out the contents of the infile hash.
Returns : Nothing.
Args : None.
Notes : Used for debugging the construction of the infile.
=cut
sub dump_infile_hash {
my $self = shift;
my $dumper = new Dumpvalue;
$dumper->dumpValue($self->{infile});
}
1;
__END__
=head2 placeholder
Title : This is a place holder so chad can cut and paste
Usage :
Function:
Returns :
Args :
Notes :
=cut
=head1 SEE ALSO
perl(1).
=cut