sub parse_results
{ my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my $feature_factory = $self->feature_factory;
open(OUT, "<".$results) or throw("FAILED to open ".$results.
"RepeatMasker:parse_results");
REPEAT:while(<OUT>){
if (/no repetitive sequences detected/) {
print "RepeatMasker didn't find any repetitive sequences";
last REPEAT; }
if(/only contains ambiguous bases/){
print "Sequence contains too many N's\n ";
last REPEAT;
}
my @columns;
if(/\d+/){ chomp;
@columns = split;
pop @columns if $columns[-1] eq '*';
my ($score, $name, $start, $end, $strand,
$repeat_name, $repeat_class, $repeatmunge, $repeatmunge2);
if(@columns == 15){
($score, $name, $start, $end, $strand,
$repeat_name, $repeat_class) = @columns[0, 4, 5, 6, 8, 9, 10];
}elsif(@columns == 14){
($score, $name, $start, $end, $strand,
$repeatmunge,$repeatmunge2) = @columns[0, 4, 5, 6, 8, 9, 10];
if ($repeatmunge =~ /(\S+)(LINE\S+)/) {
$repeatmunge =~ /(\S+)(LINE\S+)/;
$repeat_name = $1;
$repeat_class = $2;
} elsif ($repeatmunge2 eq 'Unknown') {
print "Unknown repeat name\n";
$repeat_name = 'Unknown';
} else {
throw("Can't parse repeatmasker output for line = $_\n");
}
if(!$repeat_class){
$repeat_class = 'UNK';
}
}else{
throw("Can't parse repeatmasker output unexpected number ".
"of columns in the output ".@columns." in ".$_." ".
"RepeatMasker:parse_results");
}
my $start_column;
if($strand eq '+'){
$start_column = 11;
$strand = 1;
}
if($strand eq 'C'){
$start_column = 13;
$strand = -1;
} my $repeat_start = $columns[$start_column];
my $repeat_end = $columns[12];
if($repeat_end < $repeat_start){
my $temp_end = $repeat_start;
$repeat_start = $repeat_end;
$repeat_end = $temp_end;
}
my $rc = $feature_factory->create_repeat_consensus($repeat_name,
$repeat_class);
my $rf = $feature_factory->create_repeat_feature
($start, $end, $strand, $score,
$repeat_start, $repeat_end,
$rc, $self->query);
$self->output([$rf]);
}
}
close(OUT) or throw("FAILED to close ".$results.
"RepeatMasker:parse_results");
}
1; } |