Raw content of Bio::EnsEMBL::Analysis::Runnable::Finished::RepeatMasker
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::Finished::RepeatMasker
=head1 SYNOPSIS
my $repeat_masker = Bio::EnsEMBL::Analysis::Runnable::Finished::RepeatMasker->
new(
-query => $slice,
-program => 'repeatmasker',
-options => '-low'
);
$repeat_masker->run;
my @repeats = @{$repeat_masker->output};
=head1 DESCRIPTION
RepeatMasker expects to run the program RepeatMasker
and produce RepeatFeatures which can be stored in the repeat_feature
and repeat_consensus tables in the core database
=head1 CONTACT
Post questions to : anacode@sanger.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::Finished::RepeatMasker;
use strict;
use warnings;
use Bio::SeqIO;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Analysis::Tools::Utilities qw(write_seqfile);
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable::RepeatMasker);
=head2 parse_results
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Finished::RepeatMasker
Arg [2] : string, filename
Function : open and parse the results file into repeat
features
Returntype: none
Exceptions: throws on failure to open or close output file
Example :
=cut
sub parse_results{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my $feature_factory = $self->feature_factory;
open(OUT, "<".$results) or throw("FAILED to open ".$results.
"RepeatMasker:parse_results");
REPEAT:while(){
if (/no repetitive sequences detected/) {
print "RepeatMasker didn't find any repetitive sequences";
last REPEAT; #no repeats found no point carrying on
}
if(/only contains ambiguous bases/){
print "Sequence contains too many N's \n";
last REPEAT;
}
my @columns;
if(/\d+/){ #ignoring introductory lines
chomp;
@columns = split;
pop @columns if $columns[-1] eq '*';
#if (@columns != 15 || @columns != 14) {
# throw("Can't parse repeatmasker output unexpected number ".
# "of columns in the output ".@columns." in ".$_." ".
# "RepeatMasker:parse_results");
#}
my ($score, $name, $start, $end, $strand,
$repeat_name, $repeat_class, $repeatmunge, $repeatmunge2);
if(@columns == 15){
($score, $name, $start, $end, $strand,
$repeat_name, $repeat_class) = @columns[0, 4, 5, 6, 8, 9, 10];
}elsif(@columns == 14){
($score, $name, $start, $end, $strand,
$repeatmunge,$repeatmunge2) = @columns[0, 4, 5, 6, 8, 9, 10];
if ($repeatmunge =~ /(\S+)(LINE\S+)/) {
$repeatmunge =~ /(\S+)(LINE\S+)/;
$repeat_name = $1;
$repeat_class = $2;
} elsif ($repeatmunge2 eq 'Unknown') {
print "Unknown repeat name\n";
$repeat_name = 'Unknown';
} else {
$repeat_name = $repeatmunge;
$repeat_class = $repeatmunge2;
#throw("Can't parse repeatmasker output for line = $_\n");
}
if(!$repeat_class){
$repeat_class = 'UNK';
}
}else{
throw("Can't parse repeatmasker output unexpected number ".
"of columns in the output ".@columns." in ".$_." ".
"RepeatMasker:parse_results");
}
my $start_column;
if($strand eq '+'){
$start_column = 11;
$strand = 1;
}
if($strand eq 'C'){
$start_column = 13;
$strand = -1;
}#the location of the start and end inside the repeat
#is different depending on the strand
my $repeat_start = $columns[$start_column];
my $repeat_end = $columns[12];
if($repeat_end < $repeat_start){
my $temp_end = $repeat_start;
$repeat_start = $repeat_end;
$repeat_end = $temp_end;
}
my $rc = $feature_factory->create_repeat_consensus($repeat_name,
$repeat_class);
my $rf = $feature_factory->create_repeat_feature
($start, $end, $strand, $score,
$repeat_start, $repeat_end,
$rc, $self->query);
$self->output([$rf]);
}
}
close(OUT) or throw("FAILED to close ".$results.
"RepeatMasker:parse_results");
}
sub write_seq_file{
my ($self, $seq, $filename) = @_;
if(!$seq){
my $slice = $self->query;
$seq = Bio::PrimarySeq->new(
-display_id => $slice->seq_region_name(),
-seq => $slice->seq()
);
}
if(!$filename){
$filename = $self->queryfile;
}
$filename = write_seqfile($seq, $filename);
return $filename;
}
1;