ensembl
Gene
Toolbar
Package variables
Privates (from "my" definitions)
($TRANSCRIPT_NUM, $GENE_NUM, $EXON_NUM, $TRANSLATION_NUM);
Included modules
Synopsis
No synopsis!
Description
No description!
Methods
cluster_transcripts | No description | Code |
compact_transcripts | No description | Code |
create_ensembl_xrefs | No description | Code |
generate_stable_id | No description | Code |
is_near | No description | Code |
merge_xrefs | No description | Code |
store_gene | No description | Code |
transfer_xrefs | No description | Code |
Methods description
None available.
Methods code
cluster_transcripts | description | prev | next | Top |
sub cluster_transcripts
{ my $transcripts = shift;
my @clusters;
info("Clustering transcripts");
foreach my $tr (@$transcripts) {
my $cl = undef;
foreach my $c (@clusters) {
if(is_near($tr->start(), $tr->end(), $tr->strand(), $tr->slice(),
$c->{'start'}, $c->{'end'}, $c->{'strand'}, $c->{'slice'})) {
$cl = $c;
}
}
if($cl) {
push @{$cl->{'transcripts'}}, $tr;
$cl->{'end'} =
( $tr->end > $cl->{'end'} ) ? $tr->{'end'} : $cl->{'end'};
$cl->{'start'} =
( $tr->start < $cl->{'start'} ) ? $tr->{'start'} : $cl->{'start'};
} else {
$cl = {'start' => $tr->start(),
'end' => $tr->end(),
'strand' => $tr->strand(),
'slice' => $tr->slice(),
'transcripts' => [$tr]};
push @clusters, $cl;
}
}
for(my $i = 0; $i < @clusters; $i++) {
for(my $j = $i+1; $j < @clusters; $j++) {
my $c1 = $clusters[$i];
my $c2 = $clusters[$j];
if(is_near($c1->{'start'}, $c1->{'end'}, $c1->{'strand'}, $c1->{'slice'},
$c2->{'start'}, $c2->{'end'}, $c2->{'strand'}, $c2->{'slice'})) {
splice(@clusters, $j, 1);
$c1->{'start'} =
($c1->{'start'} < $c2->{'start'}) ? $c1->{'start'} : $c2->{'start'};
$c1->{'end'} =
($c1->{'end'} > $c2->{'end'}) ? $c1->{'end'} : $c2->{'end'};
push @{$c1->{'transcripts'}}, @{$c2->{'transcripts'}};
$i = -1;
$j = -1;
}
}
}
foreach my $cl (@clusters) {
$cl->{'nt_len'} = 0;
$cl->{'aa_len'} = 0;
foreach my $tr (@{$cl->{'transcripts'}}) {
$cl->{'nt_len'} += length($tr->spliced_seq());
if($tr->translation) {
$cl->{'aa_len'} += length($tr->translate->seq());
}
}
}
return\@ clusters; } |
sub compact_transcripts
{ my $transcripts = shift;
my %unique_hash;
my @unique_list;
info("Compacting transcripts");
foreach my $transcript (@$transcripts) {
my $hashkey = 'exons:';
foreach my $exon (@{$transcript->get_all_Exons}) {
$hashkey .= '('.$exon->hashkey.')';
}
if($transcript->translation) {
$hashkey .= 'translation:' .
$transcript->translation->start() . '-' .
$transcript->translation->end() . '(' .
$transcript->translation->start_Exon->hashkey() . ')('.
$transcript->translation->end_Exon->hashkey() . ')';
}
$unique_hash{$hashkey} ||= [];
push @{$unique_hash{$hashkey}}, $transcript;
}
foreach my $key (keys %unique_hash) {
my $duplicates = $unique_hash{$key};
my $transcript = pop(@$duplicates);
merge_xrefs($transcript, $duplicates);
push @unique_list, $transcript;
}
return\@ unique_list; } |
sub create_ensembl_xrefs
{ my $chimp_transcripts = shift;
foreach my $transcript (@$chimp_transcripts) {
my $dbe = Bio::EnsEMBL::DBEntry->new
(-primary_id => $transcript->stable_id(),
-version => $transcript->version(),
-dbname => 'Ens_Hs_transcript',
-release => 1,
-display_id => $transcript->stable_id());
$transcript->add_DBEntry($dbe);
if($transcript->translation()) {
$dbe = Bio::EnsEMBL::DBEntry->new
(-primary_id => $transcript->translation->stable_id(),
-version => $transcript->translation->version(),
-dbname => 'Ens_Hs_translation',
-release => 1,
-display_id => $transcript->translation->stable_id());
$transcript->translation->add_DBEntry($dbe);
}
}
}
1; } |
sub generate_stable_id
{ my $object = shift;
my $SPECIES_PREFIX = 'PTR';
my $PAD = 18;
my $type_prefix;
my $num;
if($object->isa('Bio::EnsEMBL::Exon')) {
$type_prefix = 'E';
$EXON_NUM ||= 0;
$num = ++$EXON_NUM;
} elsif($object->isa('Bio::EnsEMBL::Transcript')) {
$type_prefix = 'T';
$TRANSCRIPT_NUM ||= 0;
$num = ++$TRANSCRIPT_NUM;
} elsif($object->isa('Bio::EnsEMBL::Gene')) {
$type_prefix = 'G';
$GENE_NUM ||= 0;
$num = ++$GENE_NUM;
} elsif($object->isa('Bio::EnsEMBL::Translation')) {
$type_prefix = 'P';
$TRANSLATION_NUM ||= 0;
$num = ++$TRANSLATION_NUM;
} else {
throw('Unknown object type '.ref($object).'. Cannot create stable_id.');
}
my $prefix = "ENS${SPECIES_PREFIX}${type_prefix}";
my $pad = $PAD - length($prefix) - length($num);
$object->version(1);
$object->stable_id($prefix . ('0'x$pad) . $num);
}
} |
sub is_near
{ my ($start1,$end1,$strand1, $slice1, $start2, $end2, $strand2, $slice2) = @_;
if($strand1 != $strand2) {
return 0;
}
if($slice1->name() ne $slice2->name()) {
return 0;
}
if($end1 >= $start2 && $start1 <= $end2) {
return 1;
}
if($start1 > $end2) {
return (($start1 - $end2) < NEAR) ? 1 : 0;
}
return (($start2 - $end1) < NEAR) ? 1 : 0; } |
sub merge_xrefs
{ my $kept_transcript = shift;
my $duplicates = shift;
return if(!@$duplicates);
info('Merging xrefs from duplicate transcripts');
my %existing_tl_xrefs;
my %existing_tr_xrefs;
foreach my $xref (@{$kept_transcript->get_all_DBEntries()}) {
$existing_tr_xrefs{$xref->dbname().':'.$xref->primary_id()} = 1;
}
if($kept_transcript->translation()) {
foreach my $xref (@{$kept_transcript->translation->get_all_DBEntries()}) {
$existing_tl_xrefs{$xref->dbname().':'.$xref->primary_id()} = 1;
}
}
my %tl_xrefs;
my %tr_xrefs;
foreach my $dup (@$duplicates) {
foreach my $xref (@{$dup->get_all_DBEntries()}) {
my $key = $xref->dbname().':'.$xref->primary_id();
$tr_xrefs{$key} = $xref if(!$existing_tr_xrefs{$key});
}
if($dup->translation()) {
foreach my $xref (@{$dup->translation->get_all_DBEntries()}) {
my $key = $xref->dbname().':'.$xref->primary_id();
$tl_xrefs{$key} = $xref if(!$existing_tl_xrefs{$key});
}
}
}
foreach my $xref (values %tr_xrefs) {
$kept_transcript->add_DBEntry($xref);
}
my @new_tl_xrefs = values(%tl_xrefs);
my $tl = $kept_transcript->translation();
if(@new_tl_xrefs && !$tl) {
throw("Some duplicate transcripts have translations, and others do not?");
return;
}
foreach my $xref (@new_tl_xrefs) {
$tl->add_DBEntry($xref);
}
return;
}
} |
sub store_gene
{ my $db = shift;
my $hum_gene = shift; my $ctranscripts = shift;
my $MIN_AA_LEN = 15;
my $MIN_NT_LEN = 600;
my $analysis = $db->get_AnalysisAdaptor->fetch_by_logic_name('ensembl');
foreach my $ct (@$ctranscripts) {
if($ct->translation && $ct->translate->seq() =~ /\*/) {
$ct->translation(undef);
}
}
create_ensembl_xrefs($ctranscripts);
transfer_xrefs($hum_gene, $ctranscripts);
$ctranscripts = compact_transcripts($ctranscripts);
my $clusters = cluster_transcripts($ctranscripts);
my $gene_adaptor = $db->get_GeneAdaptor();
foreach my $cluster (@$clusters) {
if($cluster->{'nt_len'} < $MIN_NT_LEN &&
$cluster->{'aa_len'} < $MIN_AA_LEN) {
next;
}
my $cgene = Bio::EnsEMBL::Gene->new();
$cgene->add_DBEntry(Bio::EnsEMBL::DBEntry->new
(-primary_id => $hum_gene->stable_id(),
-version => $hum_gene->version(),
-dbname => 'Ens_Hs_gene',
-release => 1,
-display_id => $hum_gene->stable_id()));
generate_stable_id($cgene);
foreach my $ctrans (@{$cluster->{'transcripts'}}) {
generate_stable_id($ctrans);
if($ctrans->translation) {
generate_stable_id($ctrans->translation);
}
$cgene->add_Transcript($ctrans);
}
my %ex_stable_ids;
foreach my $ex (@{$cgene->get_all_Exons()}) {
if($ex_stable_ids{$ex->hashkey()}) {
$ex->stable_id($ex_stable_ids{$ex->hashkey()});
} else {
generate_stable_id($ex);
$ex_stable_ids{$ex->hashkey()} = $ex->stable_id();
}
}
foreach my $gx (@{$hum_gene->get_all_DBEntries}) {
$cgene->add_DBEntry($gx) if($KEEP_XREF{uc($gx->dbname())});
}
if($hum_gene->display_xref &&
$KEEP_XREF{uc($hum_gene->display_xref->dbname)}){
$cgene->display_xref($hum_gene->display_xref);
}
$cgene->analysis($analysis);
my $name = $cgene->stable_id();
$name .= '/'.$cgene->display_xref->display_id() if($cgene->display_xref());
$cgene->type('ensembl');
print STDERR "Storing gene: $name\n";
$gene_adaptor->store($cgene);
}
return;
}
my ($TRANSCRIPT_NUM, $GENE_NUM, $EXON_NUM, $TRANSLATION_NUM); } |
transfer_xrefs | description | prev | next | Top |
sub transfer_xrefs
{ my $hum_gene = shift;
my $chimp_transcripts = shift;
my %chimp_transcripts;
my %chimp_translations;
foreach my $tr (@$chimp_transcripts) {
$chimp_transcripts{$tr->stable_id()} ||= [];
push @{$chimp_transcripts{$tr->stable_id()}}, $tr;
my $tl = $tr->translation();
if($tl) {
$chimp_translations{$tl->stable_id()} ||= [];
push @{$chimp_translations{$tl->stable_id()}}, $tl;
}
}
foreach my $tr (@{$hum_gene->get_all_Transcripts()}) {
foreach my $chimp_tr (@{$chimp_transcripts{$tr->stable_id}}) {
foreach my $xref (@{$tr->get_all_DBEntries}) {
$chimp_tr->add_DBEntry($xref) if($KEEP_XREF{uc($xref->dbname())});
}
if($tr->display_xref() && $KEEP_XREF{uc($tr->display_xref->dbname)}) {
$chimp_tr->display_xref($tr->display_xref);
}
}
my $tl = $tr->translation();
if($tl) {
foreach my $xref (@{$tl->get_all_DBEntries}) {
foreach my $chimp_tl (@{$chimp_translations{$tl->stable_id()}}) {
$chimp_tl->add_DBEntry($xref) if($KEEP_XREF{uc($xref->dbname())});
}
}
}
}
return; } |
General documentation
No general documentation available.