Bio::EnsEMBL::Analysis::Runnable AlignmentChains
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Pipeline::Runnable::AlignmentChains
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Runnable
Bio::EnsEMBL::DnaDnaAlignFeature
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::Runnable
Synopsis
Description
Methods
axtChain
No description
Code
faToNib
No description
Code
features
No description
Code
lavToAxt
No description
Code
min_chain_score
No description
Code
new
No description
Code
parse_Chain_file
No description
Code
query_nib_dir
No description
Code
query_slice
No description
Code
runDescriptionCode
target_nib_dir
No description
Code
target_slices
No description
Code
write_lav
No description
Code
Methods description
runcode    nextTop
  Title   : run
Usage : $self->run()
Function:
Returns : none
Args :
Methods code
axtChaindescriptionprevnextTop
sub axtChain {
  my ($self,$arg) = @_;
  
  if (defined($arg)) {
    $self->{'_axtChain'} = $arg;
  }
  
  return $self->{'_axtChain'};
}



1;
}
faToNibdescriptionprevnextTop
sub faToNib {
  my ($self,$arg) = @_;
  
  if (defined($arg)) {
    $self->{'_faToNib'} = $arg;
  }

  return $self->{'_faToNib'};
}
featuresdescriptionprevnextTop
sub features {
  my ($self, $features) = @_;

  if (defined $features) {
    $self->{_features} = $features;
  }

  return $self->{_features};
}
lavToAxtdescriptionprevnextTop
sub lavToAxt {
  my ($self,$arg) = @_;
  
  if (defined($arg)) {
    $self->{'_lavToAxt'} = $arg;
  }

  return $self->{'_lavToAxt'};
}
min_chain_scoredescriptionprevnextTop
sub min_chain_score {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_min_chain_score} = $val;
  }

  if (not exists $self->{_min_chain_score}) {
    return undef;
  } else {
    return $self->{_min_chain_score};
  }
}


##############
#### programs
##############
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;

  my $self = $class->SUPER::new(@args);
  
  my ($features, 
      $query_slice,
      $query_nib_dir,
      $target_slices,
      $target_nib_dir,
      $min_chain_score,
      $fa_to_nib,
      $lav_to_axt,
      $axt_chain,

      ) = rearrange([qw(FEATURES
                        QUERY_SLICE
                        QUERY_NIB_DIR
                        TARGET_SLICES
                        TARGET_NIB_DIR
                        MIN_CHAIN_SCORE
                        FATONIB
                        LAVTOAXT
                        AXTCHAIN
                                )],
                    @args);

  throw("You must supply a reference to an array of features with -features\n") 
      if not defined $features;
  throw("You must supply a query sequence\n") 
      if not defined $query_slice;
  throw("You must supply a hash ref of target sequences with -target_slices")
      if not defined $target_slices;

  $self->faToNib($fa_to_nib) if defined $fa_to_nib;
  $self->lavToAxt($lav_to_axt) if defined $lav_to_axt;
  $self->axtChain($axt_chain) if defined $axt_chain;

  $self->query_nib_dir($query_nib_dir) if defined $query_nib_dir;
  $self->target_nib_dir($target_nib_dir) if defined $target_nib_dir;

  $self->query_slice($query_slice);
  $self->target_slices($target_slices);
  $self->min_chain_score($min_chain_score) if defined $min_chain_score;
  $self->features($features);
  
  return $self;
}
parse_Chain_filedescriptionprevnextTop
sub parse_Chain_file {
  my ($self, $fh) = @_;

  my @chains;

  while(<$fh>) {
    
    /^chain\s+(\S.+)$/ and do {
      my @data = split /\s+/, $1;

      my $chain = {
        q_id     => $data[1],
        q_len    => $data[2],
        q_strand => $data[3] eq "-" ? -1 : 1,
        t_id     => $data[6],
        t_len    => $data[7],
        t_strand => $data[8] eq "-" ? -1 : 1,
        score    => $data[0],
        blocks   => [],
      };

      my ($current_q_start, $current_t_start) = ($data[4] + 1, $data[9] + 1);
      my @blocks = ([]);
      
      while(<$fh>) {
        if (/^(\d+)(\s+\d+\s+\d+)?$/) {
          my ($ungapped, $rest) = ($1, $2);

          my ($current_q_end, $current_t_end) = 
              ($current_q_start + $ungapped - 1, $current_t_start + $ungapped - 1);

          push @{$blocks[-1]}, { q_start => $current_q_start,
                                 q_end   => $current_q_end,
                                 t_start => $current_t_start,
                                 t_end   => $current_t_end,
                               };
          
          if ($rest and $rest =~ /\s+(\d+)\s+(\d+)/) {
            my ($gap_q, $gap_t) = ($1, $2);
            
            $current_q_start = $current_q_end + $gap_q + 1;
            $current_t_start = $current_t_end + $gap_t + 1; 
            
            if ($gap_q != 0 and $gap_t !=0) {
              # simultaneous gap; start a new block
push @blocks, []; } } else { # we just had a line on its own;
last; } } else { throw("Not expecting line '$_' in chain file"); } } # can now form the cigar string and flip the reverse strand co-ordinates
foreach my $block (@blocks) { my @ug_feats; foreach my $ug_feat (@$block) { if ($chain->{q_strand} < 0) { my ($rev_q_start, $rev_q_end) = ($ug_feat->{q_start}, $ug_feat->{q_end}); $ug_feat->{q_start} = $chain->{q_len} - $rev_q_end + 1; $ug_feat->{q_end} = $chain->{q_len} - $rev_q_start + 1; } if ($chain->{t_strand} < 0) { my ($rev_t_start, $rev_t_end) = ($ug_feat->{t_start}, $ug_feat->{t_end}); $ug_feat->{t_start} = $chain->{t_len} - $rev_t_end + 1; $ug_feat->{t_end} = $chain->{t_len} - $rev_t_start + 1; } #create featurepair
my $fp = new Bio::EnsEMBL::FeaturePair->new(); $fp->seqname($chain->{q_id}); $fp->start($ug_feat->{q_start}); $fp->end($ug_feat->{q_end}); $fp->strand($chain->{q_strand}); $fp->hseqname($chain->{t_id}); $fp->hstart($ug_feat->{t_start}); $fp->hend($ug_feat->{t_end}); $fp->hstrand($chain->{t_strand}); $fp->score($chain->{score}); push @ug_feats, $fp; } my $dalf = new Bio::EnsEMBL::DnaDnaAlignFeature(-features =>\@ ug_feats); $dalf->level_id(1); push @{$chain->{blocks}}, $dalf; } push @chains, $chain->{blocks}; } } return\@ chains; } #####################
# instance vars
#####################
}
query_nib_dirdescriptionprevnextTop
sub query_nib_dir {
  my ($self, $val) = @_;
  
  if (defined $val) {
    $self->{_query_nib_dir} = $val;
  }
  return $self->{_query_nib_dir};
}
query_slicedescriptionprevnextTop
sub query_slice {
  my ($self, $slice) = @_;
  
  if (defined $slice) {
    $self->{_query_slice} = $slice;
  }
  return $self->{_query_slice};
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;

  my $query_name = $self->query_slice->seq_region_name;

  my $work_dir = $self->workdir . "/$query_name.$$.AxtChain";
  my $lav_file = "$work_dir/$query_name.lav";
  my $axt_file = "$work_dir/$query_name.axt";
  my $chain_file = "$work_dir/$query_name.chain";
  my (@nib_files, $query_nib_dir, $target_nib_dir);

  mkdir $work_dir;

  my $fh;

  #################################
# write the query in nib format
# for use by lavToAxt;
#################################
if ($self->query_nib_dir) { if (not -d $self->query_nib_dir) { throw("Could not fine query nib file directory:" . $self->query_nib_dir); } else { $query_nib_dir = $self->query_nib_dir; } } else { $query_nib_dir = "$work_dir/query_nib"; mkdir $query_nib_dir; my $seqio = Bio::SeqIO->new(-format => 'fasta', -file => ">$query_nib_dir/$query_name.fa"); $seqio->write_seq($self->query_slice); $seqio->close; system($self->faToNib, "$query_nib_dir/$query_name.fa", "$query_nib_dir/$query_name.nib") and throw("Could not convert fasta file $query_nib_dir/$query_name.fa to nib"); unlink "$query_nib_dir/$query_name.fa"; push @nib_files, "$query_nib_dir/$query_name.nib"; } #################################
# write the targets in nib format
# for use by lavToAxt;
#################################
if ($self->target_nib_dir) { if (not -d $self->target_nib_dir) { throw("Could not fine target nib file directory:" . $self->target_nib_dir); } else { $target_nib_dir = $self->target_nib_dir; } } else { $target_nib_dir = "$work_dir/target_nib"; mkdir $target_nib_dir; foreach my $nm (keys %{$self->target_slices}) { my $target = $self->target_slices->{$nm}; my $target_name = $target->seq_region_name; my $seqio = Bio::SeqIO->new(-format => 'fasta', -file => ">$target_nib_dir/$target_name.fa"); $seqio->write_seq($target); $seqio->close; system($self->faToNib, "$target_nib_dir/$target_name.fa", "$target_nib_dir/$target_name.nib") and throw("Could not convert fasta file $target_nib_dir/$target_name.fa to nib"); unlink "$target_nib_dir/$target_name.fa"; push @nib_files, "$target_nib_dir/$target_name.nib"; } } ##############################
# write features in lav format
##############################
open $fh, ">$lav_file" or throw("could not open lav file '$lav_file' for writing\n"); $self->write_lav($fh); close($fh); ##############################
# convert the lav file to axt
##############################
system($self->lavToAxt, $lav_file, $query_nib_dir, $target_nib_dir, $axt_file) and throw("Could not convert $lav_file to Axt format"); unlink $lav_file; ##################################
# convert the lav file to axtChain
##################################
my $min_parameter = "-minScore="; if (defined $self->min_chain_score) { $min_parameter .= $self->min_chain_score; } else { # default to the built-in default
$min_parameter .= 1000; } system($self->axtChain, $min_parameter, $axt_file, $query_nib_dir, $target_nib_dir, $chain_file) and throw("Something went wrong with axtChain\n"); unlink $axt_file; ##################################
# read the chain file
##################################
open $fh, $chain_file or throw("Could not open chainfile '$chain_file' for reading\n"); my $chains = $self->parse_Chain_file($fh); close($fh); $self->output($chains); unlink $chain_file, @nib_files; rmdir $query_nib_dir if not $self->query_nib_dir; rmdir $target_nib_dir if not $self->target_nib_dir; rmdir $work_dir; return 1; } #####################################################
}
target_nib_dirdescriptionprevnextTop
sub target_nib_dir {
  my ($self, $val) = @_;
  
  if (defined $val) {
    $self->{_target_nib_dir} = $val;
  }
  return $self->{_target_nib_dir};
}
target_slicesdescriptionprevnextTop
sub target_slices {
  my ($self, $hash_ref) = @_;
  
  if (defined $hash_ref) {
    $self->{_target_slices_hashref} = $hash_ref;
  }
  return $self->{_target_slices_hashref};
}
write_lavdescriptionprevnextTop
sub write_lav {
    my ($self, $fh) = @_;

  my (%features);  
  foreach my $feat (sort {$a->start <=> $b->start} @{$self->features}) {
    my $strand = $feat->strand;
    my $hstrand = $feat->hstrand;
    if ($strand == -1) {
      $strand  *= -1;
      $hstrand *= -1;
    }
    push @{$features{$feat->hseqname}{$strand}{$hstrand}}, $feat;
  }
  
  my $query_length = $self->query_slice->length;
  my $query_name   = $self->query_slice->seq_region_name;
  
  foreach my $target (sort keys %features) {

    print $fh "#:lav\n";
    print $fh "d {\n\"   generated by Runnable/AxtFilter.pm\"\n}\n";

    foreach my $qstrand (keys %{$features{$target}}) {
      foreach my $tstrand (keys %{$features{$target}{$qstrand}}) {
        
        my $query_strand = ($qstrand == 1) ? 0 : 1;
        my $target_strand = ($tstrand == 1) ? 0 : 1;
        
        my $target_length = $self->target_slices->{$target}->length;

        print $fh "#:lav\n";
        print $fh "s {\n";
        print $fh "\"   $query_name\" 1 $query_length $query_strand 1\n";
        print $fh "\"   $target\" 1 $target_length $target_strand 1\n";
        print $fh "}\n";
        
        print $fh "h {\n";
        print $fh "\"   >$query_name";
        if ($query_strand) {
          print $fh " (reverse complement)";
        }
        print $fh "\"\n\"   >$target";
        if ($target_strand) {
          print $fh " (reverse complement)";
        }
        print $fh "\"\n}\n";
	
        foreach my $reg (@{$features{$target}{$qstrand}{$tstrand}}) {
          my $qstart = $query_strand ?  $query_length - $reg->end + 1 : $reg->start; 
          my $qend = $query_strand ?  $query_length - $reg->start + 1 : $reg->end; 
          my $tstart = $target_strand ? $target_length - $reg->hend + 1 : $reg->hstart; 
          my $tend = $target_strand ? $target_length - $reg->hstart + 1 : $reg->hend; 
          
          my $score = defined($reg->score) ? $reg->score : 100;
          my $percent_id = defined($reg->percent_id) ? $reg->percent_id : 100;

          printf $fh "a {\n   s %d\n", $score;
          print $fh "   b $qstart $tstart\n"; 
          print $fh "   e $qend $tend\n";
          
          my @ug_feats = $reg->ungapped_features;
          if ($qstrand == -1) {
            @ug_feats = sort { $b->start <=> $a->start } @ug_feats;
          } else {
            @ug_feats = sort { $a->start <=> $b->start } @ug_feats;
          }

          foreach my $seg (@ug_feats) {
            my $qstartl = $query_strand ?  $query_length - $seg->end + 1 : $seg->start; 
            my $qendl = $query_strand ?  $query_length - $seg->start + 1 : $seg->end; 
            
            my $tstartl = $target_strand ? $target_length - $seg->hend + 1 : $seg->hstart; 
            my $tendl = $target_strand ? $target_length - $seg->hstart + 1 : $seg->hend; 
            
            printf $fh "   l $qstartl $tstartl $qendl $tendl %d\n", $percent_id;
            
          }
          print $fh "}\n";
        }
        
        print $fh "x {\n   n 0\n}\n"; 
      }
    }
    print $fh "m {\n   n 0\n}\n#:eof\n";
  }
}

##############################################################
}
General documentation
CONTACTTop
Describe contact details here
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _