Raw content of Bio::EnsEMBL::Analysis::Tools::FeatureFactory # Ensembl module for Bio::EnsEMBL::Analysis::Tools::FeatureFactory # # Copyright (c) 2004 Ensembl # =head1 NAME Bio::EnsEMBL::Analysis::Tools::FeatureFactory =head1 SYNOPSIS my $featurefactory = new Bio::EnsEMBL::Analysis::Tools::FeatureFactory; my $feature_pair = $featurefactory->create_feature_pair(1, 13, -1, 300, 45, 56, 1 Q12732, 95, 2.3e-35, '', $slice, $analysis); $featurefactory->validate($feature_pair); =head1 DESCRIPTION This is a utilities module which provides methods for feature creation and feature validation for various feature types =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =cut package Bio::EnsEMBL::Analysis::Tools::FeatureFactory; use strict; use warnings; use Bio::EnsEMBL::FeaturePair; use Bio::EnsEMBL::Feature; use Bio::EnsEMBL::RepeatFeature; use Bio::EnsEMBL::RepeatConsensus; use Bio::EnsEMBL::DnaDnaAlignFeature; use Bio::EnsEMBL::DnaPepAlignFeature; use Bio::EnsEMBL::MiscFeature; use Bio::EnsEMBL::Attribute; use Bio::EnsEMBL::MiscSet; use Bio::EnsEMBL::PredictionTranscript; use Bio::EnsEMBL::PredictionExon; use Bio::EnsEMBL::SimpleFeature; use Bio::EnsEMBL::Map::Marker; use Bio::EnsEMBL::Map::MarkerFeature; use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Analysis::Programs; use vars qw (@ISA); @ISA = qw(); sub new{ my ($class,@args) = @_; my $self = bless {},$class; return $self; } #feature creation methods# =head2 create_simple_feature Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : int, start Arg [3] : int, end, Arg [4] : int, stand (must be 0, 1 or -1) Arg [5] : int score, Arg [6] : string, display label, Arg [7] : string, sequence name Arg [8] : Bio::EnsEMBL::Slice Arg [9] : Bio::EnsEMBL::Analysis Function : creata a Bio::EnsEMBL::SimpleFeature Returntype: Bio::EnsEMBL::SimpleFeature Exceptions: Example : =cut sub create_simple_feature{ my ($self, $start, $end, $strand, $score, $display_label, $seqname, $slice, $analysis) = @_; my $simple_feature = Bio::EnsEMBL::SimpleFeature->new ( -start => $start, -end => $end, -strand => $strand, -score => $score, -display_label => $display_label, -seqname => $seqname, -slice => $slice, -analysis => $analysis, ); return $simple_feature; } =head2 create_simple_feature Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : string, name Arg [3] : string, class Arg [4] : string, type Arg [5] : string, consensus sequence Arg [6] : int length Function : creata a Bio::EnsEMBL::RepeatConsensus Returntype: Bio::EnsEMBL::RepeatConsensus Exceptions: Example : =cut sub create_repeat_consensus{ my ($self, $name, $class, $type, $consensus_seq, $length) = @_; my $repeat_consensus = Bio::EnsEMBL::RepeatConsensus->new ( -name => $name, -length => $length, -repeat_class => $class, -repeat_consensus => $consensus_seq, -repeat_type => $type, ); return $repeat_consensus; } =head2 create_simple_feature Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : int, start Arg [3] : int, end, Arg [4] : int, stand (must be 0, 1 or -1) Arg [5] : int score, Arg [6] : int, repeat_start Arg [7] : int, repeat_end Arg [8] : Bio::EnsEMBL::RepeatConsensus Arg [9] : Bio::EnsEMBL::Slice Arg [10] : Bio::EnsEMBL::Analysis Function : creata a Bio::EnsEMBL::RepeatFeature Returntype: Bio::EnsEMBL::RepeatFeature Exceptions: Example : =cut sub create_repeat_feature{ my ($self, $start, $end, $strand, $score, $repeat_start, $repeat_end, $repeat_consensus, $seqname, $slice, $analysis) = @_; my $repeat_feature = Bio::EnsEMBL::RepeatFeature->new ( -start => $start, -end => $end, -strand => $strand, -slice => $slice, -analysis => $analysis, -repeat_consensus => $repeat_consensus, -hstart => $repeat_start, -hend => $repeat_end, -score => $score, -seqname => $seqname, ); return $repeat_feature; } =head2 create_feature_pair Arg [1] : Bio::EnsEMBL::Analysis::Runnable Arg [2] : int, start Arg [3] : int, end Arg [4] : int, strand must be 0, 1 or -1 Arg [5] : int, score Arg [6] : int, hstart Arg [7] : int, hend Arg [8] : int, hstrand Arg [9] : string, hseqname Arg [10] : int, percent id Arg [11] : int, p value Arg [12] : string, seqname Arg [13] : Bio::EnsEMBL::Slice Arg [14] : Bio::EnsEMBL::Analysis Function : creates a Bio::EnsEMBL::FeaturePair Returntype: Bio::EnsEMBL::FeaturePair Exceptions: Example : =cut sub create_feature_pair { my ($self, $start, $end, $strand, $score, $hstart, $hend, $hstrand, $hseqname, $percent_id, $p_value, $seqname, $slice, $analysis, $positive_matches, $identical_matches) = @_; my $fp = Bio::EnsEMBL::FeaturePair->new( -start => $start, -end => $end, -strand => $strand, -hstart => $hstart, -hend => $hend, -hstrand => $hstrand, -percent_id => $percent_id, -score => $score, -p_value => $p_value, -hseqname => $hseqname, -analysis => $analysis, ); $fp->seqname($seqname); $fp->slice($slice); $fp->positive_matches($positive_matches); $fp->identical_matches($identical_matches); return $fp; } =head2 create_misc_feature Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : int, start, Arg [3] : int, end Arg [4] : int, strand Arg [5] : Bio::EnsEMBL::Slice =cut sub create_misc_feature { my ($self, $start, $end, $strand, $slice) = @_; my $mf = Bio::EnsEMBL::MiscFeature->new ( -start => $start, -end => $end, -strand => $strand, -slice => $slice, ); return $mf; } =head2 add_misc_feature_attribute Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : Bio::EnsEMBL::MiscFeature Arg [3] : string, code Arg [4] : string, name Arg [5] : string, description Arg [6] : string, value =cut sub add_misc_feature_attribute { my ($self, $mf, $code, $name, $description, $value) = @_; $mf->add_Attribute ( Bio::EnsEMBL::Attribute->new (-CODE => $code, -NAME => $name, -DESCRIPTION => $description, -VALUE => $value, ) ); } =head2 add_misc_set Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : Bio::EnsEMBL::MiscFeature Arg [3] : string, code Arg [4] : string, name Arg [5] : string, description Arg [6] : string, value =cut sub add_misc_set { my ($self, $mf, $code, $name, $description, $longest_feature) = @_; $mf->add_MiscSet ( Bio::EnsEMBL::MiscSet->new (-CODE => $code, -NAME => $name, -DESCRIPTION => $description, -VALUE => $longest_feature, ) ); } =head2 create_prediction_exons Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : int, start, Arg [3] : int, end Arg [4] : int, strand Arg [5] : int, score Arg [6] : float, p value Arg [7] : int, phase Arg [8] : string, seqname Arg [9] : Bio::EnsEMBL::Slice Arg [10] : Bio::EnsEMBL::Analysis Function : create a Bio::EnsEMBL::PredictionExon Returntype: Bio::EnsEMBL::PredictionExon Exceptions: Example : =cut sub create_prediction_exon{ my ($self, $start, $end, $strand, $score, $pvalue, $phase, $seqname, $slice, $analysis) = @_; my $exon = Bio::EnsEMBL::PredictionExon->new ( -start => $start, -end => $end, -strand => $strand, -score => $score, -p_value => $pvalue, -phase => $phase, -slice => $slice, -seqname => $seqname, -analysis => $analysis, ); return $exon; } =head2 create_prediction_transcript Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : arrayref, array of Bio::EnsEMBL::PredictionExons Arg [3] : Bio::EnsEMBL::Slice Arg [4] : Bio::EnsEMBL::Analysis Function : Returntype: Exceptions: Example : =cut sub create_prediction_transcript{ my ($self, $exons, $slice, $analysis) = @_; my $transcript = Bio::EnsEMBL::PredictionTranscript->new ( -exons => $exons, -slice => $slice, -analysis => $analysis, ); return $transcript; } =head2 create_marker Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : int, database id Function : create a marker with a specific database id Returntype: Bio::EnsEMBL::Map::Marker Exceptions: Example : =cut sub create_marker{ my ($self, $dbID) = @_; my $m = Bio::EnsEMBL::Map::Marker->new(); $m->dbID($dbID); return $m; } =head2 create_marker_feature Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : int, start Arg [3] : int, end Arg [4] : int, strand Arg [5] : Bio::EnsEMBL::Map::Marker Arg [6] : string, seqname Arg [7] : Bio::EnsEMBL::Slice Arg [8] : Bio::EnsEMBL::Analysis Function : Returntype: Bio::EnsEMBL::Map::MarkerFeature Exceptions: Example : =cut sub create_marker_feature{ my ($self, $start, $end, $strand, $marker, $seqname, $slice, $analysis) = @_; my $mf = Bio::EnsEMBL::Map::MarkerFeature->new(); $mf->start($start); $mf->end($end); $mf->strand($strand); $mf->marker($marker); $mf->seqname($seqname); $mf->slice($slice); $mf->analysis($analysis); return $mf; } #validation methods# =head2 validate Arg [1] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory Arg [2] : Bio::EnsEMBL::Feature Function : validates feature Returntype: Bio::EnsEMBL::Feature Exceptions: throws if no slice or analysis is defined if the start, end or strand arent defined, if start or end are less than one or if start is greater than end Example : =cut sub validate{ my ($self, $feature) = @_; # print STDERR "validating: ".$feature->start."-".$feature->end.":".$feature->hseqname."::".$feature->hstart."-".$feature->hend."\n"; my @error_messages; if(!$feature){ throw("Can't validate a feature without a feature ". "FeatureFactory::validate"); } if(!($feature->isa('Bio::EnsEMBL::Feature'))){ throw("Wrong type ".$feature." must be a Bio::EnsEMBL::Feature ". "object FeatureFactory::validate"); } if(not defined $feature->slice){ my $string = "No slice defined"; push(@error_messages, $string); } if(not defined $feature->analysis){ my $string = "No analysis defined"; push(@error_messages, $string); } if(not defined $feature->start){ my $string = "No start defined"; push(@error_messages, $string); } if(not defined $feature->end){ my $string = "No end defined"; push(@error_messages, $string); } if(not defined $feature->strand){ my $string = "No strand defined"; push(@error_messages, $string); } if($feature->start > $feature->end){ my $string = "Start is greater than end ".$feature->start." ". $feature->end; push(@error_messages, $string); } if(@error_messages > 0){ print STDERR join("\n", @error_messages); throw("Invalid feature ".$feature." FeatureFactory:validate"); } } sub validate_prediction_transcript{ my ($self, $pt, $attach_to_exons) = @_; if(!$pt){ throw("Can't validate a prediction transcript without a ". "prediction transcript ". "FeatureFactory:validate_prediction_transcript"); } if(!($pt->isa('Bio::EnsEMBL::PredictionTranscript'))){ throw("Wrong type ".$pt." must be a Bio::EnsEMBL::PredictionTranscript". "FeatureFactory:validate_prediction_transcript"); } my @exons = @{$pt->get_all_Exons}; if(@exons == 0){ throw("problem ".$pt." has no exons"); } foreach my $e(@exons){ if($attach_to_exons){ $e->slice($pt->slice) if(!$e->slice); $e->analysis($pt->analysis) if(!$e->analysis); } $self->validate($e); } $self->validate($pt); my $tseq; eval{ $tseq = $pt->translate; }; if(!$tseq){ throw($pt." translate didn't return a sequence $@"); } if ($tseq->seq =~ /\*/) { my $msg = $pt." doesn't have a valid translation ".$tseq->seq; $msg .= "\n$@" if($@); throw($msg); } return 1; } 1;