Bio::EnsEMBL::Analysis::RunnableDB
MapCloneEnds
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::MapCloneEnds;
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::ExonerateCloneEnds
Inherit
Synopsis
my $clonemap =
Bio::EnsEMBL::Analysis::RunnableDB::MapCloneEnds->new(
-db => $refdb,
-analysis => $analysis_obj,
-database => $EST_GENOMIC,
);
$clonemap->fetch_input();
$clonemap->run();
$clonemap->write_output(); #writes to DB
Description
This object maps clone sequences to a genome by using the
exonerate alignment program,and write the results as Dna Align Features.
It relies on the following modules:
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateCloneEnds;
Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds;
In the analysis table, you have to add 3 analysis, one that calls
MapCloneEnds, and two for the different exonerate steps:
MapCloneEnds (module -> MapCloneEnds),
EXONERATE_CLONE_ENDS (module -> MapCloneEnds),
REFINE_CLONE_ENDS(module -> MapCloneEnds)
MapCloneEnds reads the input_id_chunks from a file where each line
contains a number of IDs separated by ":". In order to be able to correctly
parse the input ids, they should be given in a very specific format. The file
can be automaticaly generated from an XML using chunker.pl or manually generated
where each id should be in the format:
Clone_ID,length_of_clone,standard_deviation_of_length,Clone_end_ID,Direction_of_Clone
(i.e. CH243-307D10,184000.0,36800.0,1098421033278,F).
Methods
CHUNKSLIST | No description | Code |
DNADB | No description | Code |
GENOMICSEQS | No description | Code |
IIDREGEXP | No description | Code |
OPTIONS | No description | Code |
OUTDB | No description | Code |
QUERYSEQS | No description | Code |
QUERYTYPE | No description | Code |
clean_clusters | No description | Code |
fetch_analysis | No description | Code |
fetch_input | No description | Code |
filter_alignments | No description | Code |
new | No description | Code |
read_and_check_config | No description | Code |
refined_results | No description | Code |
run | No description | Code |
single_filter_alignments | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
sub CHUNKSLIST
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_CHUNKSLIST'} = $value;
}
if ( exists( $self->{'_CONFIG_CHUNKSLIST'} ) ) {
return $self->{'_CONFIG_CHUNKSLIST'};
} else {
return undef;
} } |
sub DNADB
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_DNADB'} = $value;
}
if ( exists( $self->{'_CONFIG_DNADB'} ) ) {
return $self->{'_CONFIG_DNADB'};
} else {
return undef;
} } |
sub GENOMICSEQS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_GENOMICSEQS'} = $value;
}
if ( exists( $self->{'_CONFIG_GENOMICSEQS'} ) ) {
return $self->{'_CONFIG_GENOMICSEQS'};
} else {
return undef;
} } |
sub IIDREGEXP
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_IIDREGEXP'} = $value;
}
if ( exists( $self->{'_CONFIG_IIDREGEXP'} ) ) {
return $self->{'_CONFIG_IIDREGEXP'};
} else {
return undef;
} } |
sub OPTIONS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_OPTIONS'} = $value;
}
if ( exists( $self->{'_CONFIG_OPTIONS'} ) ) {
return $self->{'_CONFIG_OPTIONS'};
} else {
return undef;
} } |
sub OUTDB
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_OUTDB'} = $value;
}
if ( exists( $self->{'_CONFIG_OUTDB'} ) ) {
return $self->{'_CONFIG_OUTDB'};
} else {
return undef;
} } |
sub QUERYSEQS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_QUERYSEQS'} = $value;
}
if ( exists( $self->{'_CONFIG_QUERYSEQS'} ) ) {
return $self->{'_CONFIG_QUERYSEQS'};
} else {
return undef;
} } |
sub QUERYTYPE
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_QUERYTYPE'} = $value;
}
if ( exists( $self->{'_CONFIG_QUERYTYPE'} ) ) {
return $self->{'_CONFIG_QUERYTYPE'};
} else {
return undef;
} } |
sub clean_clusters
{
my ($self, $clone_cluster_alignments) = @_;
my %chr_cluster = ();
foreach my $alignment(@{$clone_cluster_alignments}){
my $hit_name= $alignment->hseqname;
my $chr_name= $alignment->seqname;
my $cluster_key = $chr_name."-".$hit_name;
if(!$chr_cluster{$cluster_key}){
$chr_cluster{$cluster_key} = [];
}
push (@{$chr_cluster{$cluster_key}}, $alignment);
}
my @selection = ();
foreach my $chr_cluster_key (keys %chr_cluster){
if (scalar(@{$chr_cluster{$chr_cluster_key}})>1){
my $first_selected_alignment;
my @ordered_aligns = sort{$a->hstart() <=> $b->hstart()} @{$chr_cluster{$chr_cluster_key}};
my $initial_end = 0;
my $initial_alignment;
my %cluster_alignments = ();
my $first_alignment = 0;
my $alignment_added = 0;
foreach my $ordered_align(@ordered_aligns){
if ($first_alignment!=0){
my $cloneEnd_distance = abs(($ordered_align->start)-($initial_end));
if($cloneEnd_distance < 4000){
$initial_alignment = $ordered_align;
$initial_end = $ordered_align->end;
if ($alignment_added==0){
push (@selection, $initial_alignment);
$alignment_added = 1;
}
}else{
push (@selection, $ordered_align);
$initial_alignment = $ordered_align;
$initial_end = $ordered_align->end;
}
}
if ($first_alignment == 0){
$first_alignment = 1;
$initial_alignment = $ordered_align;
$initial_end = $ordered_align->end;
}
}
}else{
push (@selection, $chr_cluster{$chr_cluster_key}[0]);
}
}
return\@ selection; } |
sub fetch_analysis
{ my ($self, $logic_name) = @_;
my $db = $self->db;
my $analysis_adaptor= $db->get_AnalysisAdaptor;
my $analysis = $analysis_adaptor->fetch_by_logic_name($logic_name);
return $analysis; } |
sub fetch_input
{
my ($self) = @_;
}
} |
sub filter_alignments
{
my ($self, $clone_alignments, $clones_ref, $cloneEnd_ids_ref ) = @_;
my %clone_cluster = ();
my %aligned_clones = ();
my %clones = %{$clones_ref};
my %cloneEnd_ids = %{$cloneEnd_ids_ref};
my @selected_alignments;
foreach my $clone_alignment(@{$clone_alignments}){
my $clone_name = $clone_alignment->hseqname();
my $complete_clone_name = $cloneEnd_ids{$clone_name};
$aligned_clones{$complete_clone_name} = $clone_name;
if(!$clone_cluster{$clone_name}){
$clone_cluster{$clone_name} = [];
}
push (@{$clone_cluster{$clone_name}}, $clone_alignment);
}
foreach my $pair (keys %aligned_clones){
if ((scalar @{$clones{$pair}})/4 > 1){
# Check if at least one of the cluster pair alignments is selected. # in case none is selected it will send cloneEnds to the single_filter my $cluster_selected = 0;
my %clean_cloneEnds = ();
my %status = ();
my %clone_dir = ();
my $clone_length = $clones{$pair}[1]+$clones{$pair}[2]+200000;
for (my $numberOfEnd=0;$numberOfEnd < scalar @{$clones{$pair}};$numberOfEnd+=4){
my $cloneEnd =$clones{$pair}[$numberOfEnd];
$clone_dir{$numberOfEnd} = $clones{$pair}[$numberOfEnd+3];
my @cloneEnd_clean = @{$self->clean_clusters($clone_cluster{$cloneEnd})};
if (!$clean_cloneEnds{$numberOfEnd}){
$clean_cloneEnds{$numberOfEnd} =\@ cloneEnd_clean;
}
if (!$status{$numberOfEnd}){
$status{$numberOfEnd} = 0;
}
}
foreach my $clean_cloneEnd1 (keys %clean_cloneEnds){
foreach my $clean_cloneEnd2 (keys %clean_cloneEnds){
if ($clean_cloneEnd1 != $clean_cloneEnd2 && $clean_cloneEnd1 < $clean_cloneEnd2
&& $clone_dir{$clean_cloneEnd1} ne $clone_dir{$clean_cloneEnd2}){
my $first_prev_chr_start = 0;
my $first_chr_hit = 'Null';
foreach my $fa (@{$clean_cloneEnds{$clean_cloneEnd1}}){
if (($fa->seqname() eq $first_chr_hit && ($fa->start()-$first_prev_chr_start) > 4000)
|| $fa->seqname() ne $first_chr_hit){
my $second_prev_chr_start = 0;
foreach my $sa (@{$clean_cloneEnds{$clean_cloneEnd2}}){
if ($fa->seqname() eq $sa->seqname()){
my $diff = ($fa->start())-($sa->end());
my $abs_diff = abs($diff);
if ($abs_diff <= $clone_length && (($sa->start()-$second_prev_chr_start)>4000)){
push (@selected_alignments, $fa);
push (@selected_alignments, $sa);
$status{$clean_cloneEnd1} = 1;
$status{$clean_cloneEnd2} = 1;
$cluster_selected = 1;
$first_prev_chr_start = $fa->start();
$second_prev_chr_start = $sa->start();
$first_chr_hit= $fa->seqname();
}
}
}
}
}
}
}
}
foreach my $clone_status (keys %status){
if ($status{$clone_status} == 0){
my $cloneEnd =$clones{$pair}[$clone_status];
my $selected_align = $self->single_filter_alignments($clone_cluster{$cloneEnd});
push (@selected_alignments, ${$selected_align});
}
}
}else{
my $first_cloneEnd =$clones{$pair}[0];
my $selected_align = $self->single_filter_alignments($clone_cluster{$first_cloneEnd});
push (@selected_alignments, ${$selected_align});
}
}
return\@ selected_alignments; } |
sub new
{ my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
$self->{_refined_results}=[] ;
$self->read_and_check_config($CLONE_CONFIG);
return $self;
}
} |
sub read_and_check_config
{ my $self = shift;
$self->SUPER::read_and_check_config($CLONE_CONFIG);
my $logic = $self->analysis->logic_name;
foreach my $config_var (
qw(
CHUNKSLIST
)
){
if ( not defined $self->$config_var ){
throw("You must define $config_var in config for logic '$logic'");
}
} } |
sub refined_results
{ my ($self, $refine_result) = @_;
if ($refine_result) {
push @{$self->{_refined_results}}, $refine_result ;
}
return $self->{_refined_results}; } |
sub run
{ my ($self) = @_;
my $trace_name = ''; my $clone_id = ''; my $insert_size = ''; my $insert_stdev = '';
my $chunks_list = $self->CHUNKSLIST;
my %clones = ();
my %cloneEnd_ids = ();
open (INFILE,"<$chunks_list");
my @listOfIDs = ();
while (my $line = <INFILE>){
chomp($line);
my @clone = split (/:/,$line);
my $listOfClones = '';
foreach my $clone (@clone){
my @clone_data = split (/,/,$clone);
if (!$clones{$clone_data[0]}){
$clones{$clone_data[0]}=[];
}
push (@{$clones{$clone_data[0]}}, $clone_data[3]); push (@{$clones{$clone_data[0]}}, $clone_data[1]); push (@{$clones{$clone_data[0]}}, $clone_data[2]); push (@{$clones{$clone_data[0]}}, $clone_data[4]); $cloneEnd_ids{$clone_data[3]} = $clone_data[0];
if ($listOfClones eq ''){
$listOfClones.= $clone_data[3];
}else{
$listOfClones.= ":".$clone_data[3];
}
}
push (@listOfIDs,$listOfClones);
}
close INFILE;
my $exonerate = Bio::EnsEMBL::Analysis::RunnableDB::ExonerateCloneEnds->new(
-DB => $self->db,
-INPUT_ID => $self->input_id,
-ANALYSIS => $self->fetch_analysis("EXONERATE_CLONE_ENDS"),
);
$exonerate->fetch_input(\@listOfIDs);
$exonerate->run();
my $clone_alignments = $exonerate->output();
my @selected_alignments = @{$self->filter_alignments($clone_alignments,\% clones,\% cloneEnd_ids)};
foreach my $selected_alignment(@selected_alignments){
if ($selected_alignment ne ''){
my $clone_id = $selected_alignment->hseqname;
my $chr_id = $selected_alignment->seqname;
my $start = ($selected_alignment->start)-2000;
my $end = ($selected_alignment->end)+2000;
my @chr_name = split (/:/, $chr_id);
my $input_id = $chr_name[0].":".$chr_name[1].":".$chr_name[2].":".$start.":".$end.":".$chr_name[5].":".$clone_id;
my $refine = Bio::EnsEMBL::Analysis::RunnableDB::ExonerateCloneEnds->new(
-DB => $self->db,
-INPUT_ID => $input_id,
-ANALYSIS => $self->fetch_analysis("REFINE_CLONE_ENDS"),
);
$refine ->fetch_input();
$refine ->run();
$self->refined_results($refine);
}
}
}
} |
sub single_filter_alignments
{
my ($self, $single_clone_alignments) = @_;
my %chr_cluster = ();
foreach my $alignment(@{$single_clone_alignments}){
my $hit_name= $alignment->hseqname;
my $chr_name= $alignment->seqname;
my $cluster_key = $chr_name."-".$hit_name;
if(!$chr_cluster{$cluster_key}){
$chr_cluster{$cluster_key} = [];
}
push (@{$chr_cluster{$cluster_key}}, $alignment);
}
my $selected_alignment = "";
my $cluster_size = 0;
my $biggest_score = 0;
foreach my $chr_cluster_key (keys %chr_cluster){
if (scalar(@{$chr_cluster{$chr_cluster_key}})>1){
my $size_counter = 0;
my $first_selected_alignment;
my @ordered_aligns = sort{$a->hstart() <=> $b->hstart()} @{$chr_cluster{$chr_cluster_key}};
my $previous_end = 0;
my $previous_alignment;
my $end_of_cluster = 0;
my %cluster_alignments = ();
my $first_alignment = 0;
foreach my $ordered_align(@ordered_aligns){
if ($first_alignment!=0 && $end_of_cluster == 0){
my $cloneEnd_distance = abs($ordered_align->start)-($previous_end);
if($cloneEnd_distance<4000){
if(!$cluster_alignments{$ordered_align->hseqname}){
$cluster_alignments{$ordered_align->hseqname} = [];
push (@{$cluster_alignments{$ordered_align->hseqname}}, $previous_alignment);
$size_counter++;
$first_selected_alignment = $previous_alignment;
}
push (@{$cluster_alignments{$ordered_align->hseqname}}, $ordered_align);
$size_counter++;
}elsif($cluster_alignments{$ordered_align->hseqname}){
$end_of_cluster = 1;
}
$previous_alignment = $ordered_align;
$previous_end = $ordered_align->end;
}
if ($first_alignment == 0){
$first_alignment = 1;
$previous_alignment = $ordered_align;
$previous_end = $ordered_align->end;
}
}
if ($size_counter > $cluster_size){
$cluster_size= $size_counter;
$selected_alignment = $first_selected_alignment;
}
}
elsif($cluster_size==0 && ${$chr_cluster{$chr_cluster_key}}[0]->score()> $biggest_score){
$biggest_score = ${$chr_cluster{$chr_cluster_key}}[0]->score();
$selected_alignment = $chr_cluster{$chr_cluster_key}[0];
}
}
return\$ selected_alignment; } |
sub write_output
{
my ( $self, @output ) = @_;
foreach my $refine_object( @{ $self->refined_results }) {
$refine_object->write_output();
}
}
} |
General documentation