Bioinformatics II

Bioinformatica II

Theorieles  9.00-11:30 Lokaal A1.057
Practica  16.00-18u40 PC zaal C

  • do 16 februari: Geen Les
  • do 23 februari: Geen Les
  • do 1 maart: Recap Bioinformatics I, RDBMS, (Bio)SQL
  • do 8 maart: Web Application Developent (PHP)
  • do 15 maart: MyGenBank
  • do 22 maart: Genome Browsers
  • do 29 maart:  Galaxy
  • do 5 april: Geen Les
  • do 12 april: Geen Les
  • do 19 april: Datamining (Tim De Meyer)
  • do 26 april: Textmining (Maté Ongenaert)
  • do 3 mei: Systems Biology (Bart Deplancke)
  • do 10 mei: Les 10  (projectvoorstelling)

CpG counter

use strict;
use Bio::SeqIO;
use Data::Dumper; 

my $filename="promoter.txt";
my $sequence_object; 

my $seqio = Bio::SeqIO -> new   (
'-format' => 'fasta',
'-file' => $filename);

while ($sequence_object = $seqio -> next_seq) {
#print Dumper($sequence_object);
my $sequentie = $sequence_object-> seq();
print $sequentie."\n";
#last;
my $count=$sequentie=~ s/CG/CG/g;
print "$count\n";
}

Bioperl

use strict;
use Bio::SeqIO;
use Data::Dumper;

my $filename="swiss-prot.dat";
my $sequence_object;

my $seqio = Bio::SeqIO -> new   (
                                '-format' => 'swiss',
                                '-file' => $filename
                                );

while ($sequence_object = $seqio -> next_seq) {
#print Dumper($sequence_object);
my $sequentie = $sequence_object-> seq();
print $sequentie."\n";
#last;
}

Prosite scanner

use warnings;
open(FH,"prosite.dat") || die "Problem reading file";
while (<FH>) {
if (/^ID\s\s\s(.+);/) {$c++;$name=$1}
if (/^PA\s\s\s(.+)\./) {$prosite{$name}=$1;}
#solve multiline patterns ... last;
}
@seq=("MGNLFENCTHRYSFEYIYENCTNTTNQCGLIRNVASSIDVFHWLDVYISTTIFVISGILNFYCLFIALYT YYFLDNETRKHYVFVLSRFLSSILVIISLLVLESTLFSESLSPTFAYYAVAFSIYDFSMDTLFFSYIMIS LITYFGVVHYNFYRRHVSLRSLYIILISMWTFSLAIAIPLGLYEAASNSQGPIKCDLSYCGKVVEWITCS LQGCDSFYNANELLVQSIISSVETLVGSLVFLTDPLINIFFDKNISKMVKLQLTLGKWFIALYRFLFQMT NIFENCSTHYSFEKNLQKCVNASNPCQLLQKMNTAHSLMIWMGFYIPSAMCFLAVLVDTYCLLVTISILK SLKKQSRKQYIFGRANIIGEHNDYVVVRLSAAILIALCIIIIQSTYFIDIPFRDTFAFFAVLFIIYDFSILSLLGSFTGVAM MTYFGVMRPLVYRDKFTLKTIYIIAFAIVLFSVCVAIPFGLFQAADEIDGPIKCDSESCELIVKWLLFCI ACLILMGCTGTLLFVTVSLHWHSYKSKKMGNVSSSAFNHGKSRLTWTTTILVILCCVELIPTGLLAAFGK SESISDDCYDFYNANSLIFPAIVSSLETFLGSITFLLDPIINFSFDKRISKVFSSQVSMFSIFFCGKR","MLDDRARMEA AKKEKVEQIL AEFQLQEEDL KKVMRRMQKE MDRGLRLETH EEASVKMLPT YVRSTPEGSE VGDFLSLDLG GTNFRVMLVK VGEGEEGQWS VKTKHQMYSI PEDAMTGTAE MLFDYISECI SDFLDKHQMK HKKLPLGFTF SFPVRHEDID KGILLNWTKG FKASGAEGNN VVGLLRDAIK RRGDFEMDVV AMVNDTVATM ISCYYEDHQC EVGMIVGTGC NACYMEEMQN VELVEGDEGR MCVNTEWGAF GDSGELDEFL LEYDRLVDES SANPGQQLYE KLIGGKYMGE LVRLVLLRLV DENLLFHGEA SEQLRTRGAF ETRFVSQVES DTGDRKQIYN ILSTLGLRPS TTDCDIVRRA CESVSTRAAH MCSAGLAGVI NRMRESRSED VMRITVGVDG SVYKLHPSFK ERFHASVRRL TPSCEITFIE SEEGSGRGAA LVSAVACKKA CMLGQ","MESDSFEDFLKGEDFSNYSYSSDLPPFLLDAAPCEPESLEINKYFVVIIYVLVFLLSLLGNSLVMLVILY SRVGRSGRDNVIGDHVDYVTDVYLLNLALADLLFALTLPIWAASKVTGWIFGTFLCKVVSLLKEVNFYSGILLLACISVDRY LAIVHATRTLTQKRYLVKFICLSIWGLSLLLALPVLIFRKTIYPPYVSPVCYEDMGNNTANWRMLLRILP QSFGFIVPLLIMLFCYGFTLRTLFKAHMGQKHRAMRVIFAVVLIFLLCWLPYNLVLLADTLMRTWVIQET CERRNDIDRALEATEILGILGRVNLIGEHWDYHSCLNPLIYAFIGQKFRHGLLKILAIHGLISKDSLPKDSRPSFVGSSSGH TSTTL","MEANFQQAVK KLVNDFEYPT ESLREAVKEF DELRQKGLQK NGEVLAMAPA FISTLPTGAE TGDFLALDFG GTNLRVCWIQ LLGDGKYEMK HSKSVLPREC VRNESVKPII DFMSDHVELF IKEHFPSKFG CPEEEYLPMG FTFSYPANQV SITESYLLRW TKGLNIPEAI NKDFAQFLTE GFKARNLPIR IEAVINDTVG TLVTRAYTSK ESDTFMGIIF GTGTNGAYVE QMNQIPKLAG KCTGDHMLIN MEWGATDFSC LHSTRYDLLL DHDTPNAGRQ IFEKRVGGMY LGELFRRALF HLIKVYNFNE GIFPPSITDA WSLETSVLSR MMVERSAENV RNVLSTFKFR FRSDEEALYL WDAAHAIGRR AARMSAVPIA SLYLSTGRAG KKSDVGVDGS LVEHYPHFVD MLREALRELI GDNEKLISIG IAKDGSGIGA ALCALQAVKE KKGLA MEANFQQAVK KLVNDFEYPT ESLREAVKEF DELRQKGLQK NGEVLAMAPA FISTLPTGAE TGDFLALDFG GTNLRVCWIQ LLGDGKYEMK HSKSVLPREC VRNESVKPII DFMSDHVELF IKEHFPSKFG CPEEEYLPMG FTFSYPANQV SITESYLLRW TKGLNIPEAI NKDFAQFLTE GFKARNLPIR IEAVINDTVG TLVTRAYTSK ESDTFMGIIF GTGTNGAYVE QMNQIPKLAG KCTGDHMLIN MEWGATDFSC LHSTRYDLLL DHDTPNAGRQ IFEKRVGGMY LGELFRRALF HLIKVYNFNE GIFPPSITDA WSLETSVLSR MMVERSAENV RNVLSTFKFR FRSDEEALYL WDAAHAIGRR AARMSAVPIA SLYLSTGRAG KKSDVGVDGS LVEHYPHFVD MLREALRELI GDNEKLISIG IAKDGSGIGA ALCALQAVKE KKGLA");
foreach (keys %prosite) {
$name = $_;
$patroon=$prosite{$_};
print "$name\t$patroon\n";
$patroon=~ s/-//g;
$patroon=~ s/x/[ABCDEFGHIKLMNPQRSTVWXYZ]/g;
$patroon=~ s/\(/\{/g;
$patroon=~ s/\)/\}/g;
print "regex: $patroon\n";
foreach $seq(@seq) {
$c++;$seq =~ s/ //g;
#print "\$seq$c:$seq\n";
while ($seq=~ /$patroon/g)
{$p=pos($seq)-length($&)+1;
print "found $name $& in \$seq$c \@$p\n";}
}
#last;
}