Code Search for Developers
 
 
  

ghg_search.cgi from Gene Homology Tools at Krugle


Show ghg_search.cgi syntax highlighted

#!/usr/local/bin/perl -w

use strict;
use FindBin;
use DBI;
use CGI;
use CGI::Carp;
use Time::HiRes qw(gettimeofday);
my %times;

$ENV{'PATH'} = '/misc/afcs/apps/bin:/usr/local/bin:' . $ENV{'PATH'};

$ENV{'ORACLE_HOME'} = '/usr/local/apps/oracle/product/9.2.0';
$ENV{'ORACLE_PATH'} =  "$ENV{'ORACLE_HOME'}/bin";
$ENV{'PATH'} = "$ENV{'ORACLE_PATH'}:" . $ENV{'PATH'};

my $scriptname = $FindBin::Script;

my $starttime = time;

$times{'start'} = gettimeofday;

my $q = new CGI;

my $textsearch = $q->param("name_field");
(! $textsearch) && ($textsearch = '');
my $anyword = $q->param("anyword");
my $pmatch = $q->param("pmatch");

my %terms;
my $strproc = $textsearch;
while ($textsearch =~ m/"([^"]+)"/g)
  {
   $terms{$1} = '';
   $strproc =~ s/\"$1\"/ /;
  }
$strproc =~ s/^\s+//;
$strproc =~ s/\s+$//;
$strproc =~ s/\s+/ /g;
foreach my $term (split ' ', $strproc)
  {
   $terms{$term} = '';
  }

my $ortxt = '';
my $orsep = ' & ';
$anyword && ($orsep = ' | ');
foreach my $term (keys %terms)
  {
   #(my $oterm = $term) =~ s/[^A-Za-z0-9]/ /g;
   (my $oterm = $term) =~ s/\W/ /g;
   $oterm =~ s/^\s+//;
   $oterm =~ s/\s+$//;
   if ($pmatch && (length $oterm > 2))
     {
      $oterm = '%' . $oterm . '%';
     }
   else
     {
      $oterm = '{' . $oterm . '}';
     }
   $ortxt .= $oterm . $orsep;
  }
$ortxt =~ s/\Q$orsep\E$//;

my $idsearch = $q->param("id_field");
(! $idsearch) && ($idsearch = '');

my %ids;
foreach my $dbid (split ' ', $idsearch)
  {
   $dbid =~ s/^\s+//;
   $dbid =~ s/\s.*//;
   $dbid =~ s/\.\d+$//;
   $dbid && ($ids{$dbid}++);
  }

print $q->header;
print $q->start_html('Homology Database Simple Search');
print $q->h1('Homology Database Simple Search');

unless ((scalar keys %ids) || $textsearch)
  {
   print $q->start_form;
   print '<p>Enter protein or gene names:</p><p>';
   print $q->textfield('name_field','',50,50), '<br />';
   print $q->checkbox(-name=>'pmatch', -value=>1, -label=>'Partial match');
   print '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
   print $q->checkbox(-name=>'anyword', -value=>1, -label=>'Match any word');
   print "</p>\n";
   print '<p>and/or enter Entrez Gene IDs and/or protein accessions:</p>'; 
   print '<p>';
   print $q->textfield('id_field','',50,50);
   print "</p>\n";
   print $q->submit('Search','Search');
   print $q->end_form;
   print $q->end_html;
   exit(0);
  }

my $ghgdb;

unless ($ghgdb = DBI->connect("dbi:Oracle:extdb.sdsc.edu","ghgdb","dg123",{
   PrintError => 1,
   RaiseError => 0
  }))
  {
   print "Error: cannot connect to database\n";
   print $q->end_html;
   exit;
  }
$ghgdb->{LongReadLen} = 1024 * 1024;
$ghgdb->{RaiseError} = 1;

my $fname = $ghgdb->prepare("
   select distinct taxonomy_id, g.gene_id, symbol, name
     from gene_synonyms s, genes g
     where contains (gene_syn, ?, 1) > 0
     and g.gene_id = s.gene_id
  ");

my $findid = $ghgdb->prepare("
   select taxonomy_id, gene_id, symbol, name
     from genes where gene_id = :id
    union
   select taxonomy_id, g.gene_id, symbol, name
     from genes g, gene_accession a where accession = :id
     and g.gene_id = a.gene_id
  ");

my $accfg = $ghgdb->prepare("
   select distinct accession
     from gene_accession where gene_id = ?
     order by accession
  ");

my $synfg = $ghgdb->prepare("
   select distinct gene_syn
     from gene_synonyms where gene_id = ?
     order by lower(gene_syn)
  ");

my %matchgenes;

if ($ortxt)
  {
   $fname->execute($ortxt);
   while (my ($taxid,$geneid,$symbol,$name) = $fname->fetchrow_array)
     {
      (! $symbol) && ($symbol = '');
      (! $name) && ($name = '');
      $matchgenes{$geneid}{'taxid'} = $taxid;
      $matchgenes{$geneid}{'symbol'} = $symbol;
      $matchgenes{$geneid}{'name'} = $name;
     }
  }

foreach my $dbid (keys %ids)
  {
   $findid->bind_param(':id',$dbid);
   $findid->execute();
   while (my ($taxid,$geneid,$symbol,$name) = $findid->fetchrow_array)
     {
      (! $symbol) && ($symbol = '');
      (! $name) && ($name = '');
      $matchgenes{$geneid}{'taxid'} = $taxid;
      $matchgenes{$geneid}{'symbol'} = $symbol;
      $matchgenes{$geneid}{'name'} = $name;
     }
  }

my $searchterm = $textsearch;
$textsearch && $idsearch && ($searchterm .= '; ');
$searchterm .= $idsearch;

print $q->h3("Search term: '$searchterm'");
print '<p><table border="1" cellpadding="5">';
print "<caption>Matching genes</caption>\n";
print '<tr><th>Gene ID</th><th>Taxonomy ID</th><th>Symbol</th>',
   '<th>Name</th><th>All Names</th><th>Protein Accessions</th></tr>', "\n";
foreach my $geneid (sort {
   lc $matchgenes{$a}{'symbol'} cmp lc $matchgenes{$b}{'symbol'}
                                ||
                             $a cmp $b
  } keys %matchgenes)
  {
   print '<tr>';
   print "<td><a href=\"ghg_details\.cgi\?ref_name=$geneid&ref_type=G\">",
	     "$geneid</a></td>";
   print "<td>$matchgenes{$geneid}{'taxid'}</td>";
   print "<td>$matchgenes{$geneid}{'symbol'}</td>";
   print "<td>$matchgenes{$geneid}{'name'}</td>";
   my $synstr = '';
   $synfg->execute($geneid);
   while (my $syn = $synfg->fetchrow_array)
     {
     # if ($syn ne $matchgenes{$geneid}{'name'} &&
     #     $syn ne $matchgenes{$geneid}{'symbol'})
        {
         $synstr .= $syn . '; ';
        }
     }
   $synstr =~ s/; $//;
   print "<td>$synstr</td>";
   my $accstr = '';
   $accfg->execute($geneid);
   while (my $acc = $accfg->fetchrow_array)
     {
      $accstr .= $acc . '; ';
     }
   $accstr =~ s/; $//;
   print "<td>$accstr</td>";
   print "</tr>\n";
  }
print "</table></p>\n";

$ghgdb->disconnect;

print $q->end_html;





See more files for this project here

Gene Homology Tools

A gene and protein homology database toolset that uses existing data from ensembl and entrezgene online databases.

Project homepage: http://sourceforge.net/projects/genehomology
Programming language(s): Perl,SQL
License: bsd

  .project
  Queries.pm
  ghg_details.cgi
  ghg_search.cgi
  index.html