X Tutup
Skip to content

Commit 99331a6

Browse files
committed
adding a couple of demo files to the core project
1 parent c9af1d7 commit 99331a6

File tree

2 files changed

+239
-0
lines changed

2 files changed

+239
-0
lines changed
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
package demo;
2+
3+
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
4+
import org.biojava.nbio.core.sequence.DNASequence;
5+
import org.biojava.nbio.core.sequence.ProteinSequence;
6+
import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
7+
import org.biojava.nbio.core.sequence.compound.AmbiguityRNACompoundSet;
8+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
9+
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
10+
import org.biojava.nbio.core.sequence.io.DNASequenceCreator;
11+
import org.biojava.nbio.core.sequence.io.FastaReader;
12+
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
13+
import org.biojava.nbio.core.sequence.template.CompoundSet;
14+
import org.biojava.nbio.core.sequence.template.Sequence;
15+
import org.biojava.nbio.core.sequence.transcription.Frame;
16+
import org.biojava.nbio.core.sequence.transcription.TranscriptionEngine;
17+
18+
import java.io.ByteArrayInputStream;
19+
import java.io.InputStream;
20+
import java.util.LinkedHashMap;
21+
import java.util.Map;
22+
23+
/**
24+
* Created by andreas on 8/10/15.
25+
*/
26+
public class DemoSixFrameTranslation {
27+
28+
public static void main(String[] args){
29+
String dnaFastaS = ">gb:GQ903697|Organism:Arenavirus H0030026 H0030026|Segment:S|Host:Rat\n" +
30+
"CGCACAGAGGATCCTAGGCGTTACTGACTTGCGCTAATAACAGATACTGTTTCATATTTAGATAAAGACC\n" +
31+
"CAGCCAACTGATTGGTCAGCATGGGACAACTTGTGTCCCTCTTCAGTGAAATTCCATCAATCATACACGA\n" +
32+
"AGCTCTCAATGTTGCTCTCGTAGCTGTTAGCATCATTGCAATATTGAAAGGGGTTGTGAATGTTTGGAAG\n" +
33+
"AGTGGAGTTTTGCAGCTTTTGGCCTTCTTGCTCCTGGCGGGAAGATCCTGCTCAGTCATAATTGGTCATC\n" +
34+
"ATCTCGAACTGCAGCATGTGATCTTCAATGGGTCATCAATCACACCCTTTTTACCAGTTACATGTAAGAT\n" +
35+
"CAATGATACCTACTTCCTACTAAGAGGCCCCTATGAAGCTGATTGGGCAGTTGAATTGAGTGTAACTGAA\n" +
36+
"ACCACAGTCTTGGTTGATCTTGAAGGTGGCAGCTCAATGAAGCTGAAAGCCGGAAACATCTCAGGTTGTC\n" +
37+
"TTGGAGACAACCCCCATCTGAGATCAGTGGTCTTCACATTGAATTGGTTGCTAACAGGATTAGATCATGT\n" +
38+
"TATTGATTCTGACCCGAAAATTCTCTGTGATCTTAAAGACAGTGGGCACTTTCGTCTCCAGATGAACTTA\n" +
39+
"ACAGAAAAGCACTATTGTGACAAGTTTCACATCAAAATGGGCAAGGTCTTTGGCGTATTCAAAGATCCGT\n" +
40+
"GCATGGCTGGTGGTAAAATGTTTGCCATACTAAAAAATACCTCTTGGTCGAACCAGTGCCAAGGAAACCA\n" +
41+
"TGTCAGCACCATTCATCTTGTCCTTCAGAGTAATTTCAAACAGGTCCTCAGTAGCAGGAAACTGTTGAAC\n" +
42+
"TTTTTCAGCTGGTCATTGTCTGATGCCACAGGGGCTGATATGCCTGGTGGTTTTTGTCTGGAAAAATGGA\n" +
43+
"TGTTGATTTCAAGTGAACTGAAATGCTTTGGAAACACAGCTGTGGCAAAGTGCAACTTAAATCATGACTC\n" +
44+
"AGAGTTCTGTGACATGCTTAGGCTTTTTGATTTCAACAAAAAGGCAATAGTCACTCTTCAGAACAAAACA\n" +
45+
"AAGCATCGGCTGGACACAGTAATTACTGCTATCAATTCATTGATCTCTGATAATATTCTTATGAAGAACA\n" +
46+
"GGATTAAAGAATTGATAGATGTTCCTTACTGTAATTACACCAAATTTTGGTATGTCAATCACACAGGTCT\n" +
47+
"AAATCTGCACACCCTTCCAAGATGTTGGCTTGTTAAAAATGGTAGCTACTTGAATGTGTCTGACTTCAGG\n" +
48+
"AATGAGTGGATATTGGAGAGTGATCATCTTGTTTCGGAGATCCTTTCAAAGGAGTATGAGGAAAGGCAAA\n" +
49+
"ATCGTACACCACTCTCACTGGTTGACATCTGTTTCTGGAGTACATTGTTTTACACAGCATCAATTTTCCT\n" +
50+
"ACACCTCTTGAGAATTCCAACCCACAGACACATTGTTGGTGAGGGCTGCCCGAAGCCTCATAGGCTAAAC\n" +
51+
"AGGCACTCAATATGTGCTTGTGGCCTTTTCAAACAAGAAGGCAGACCCTTGAGATGGGTAAGAAAGGTGT\n" +
52+
"GAACAATGGTTGCTTGGTGGCCTCCATTGCTGCACCCCCCTAGGGGGGTGCAGCAATGGAGGTTCTCGYT\n" +
53+
"GAGCCTAGAGAACAACTGTTGAATCGGGTTCTCTAAAGAGAACATCGATTGGTAGTACCCTTTTTGGTTT\n" +
54+
"TTCATTGGTCACTGACCCTGAAAGCACAGCACTGAACATCAAACAGTCCAAAAGTGCACAGTGTGCATTT\n" +
55+
"GTTGTGGCTGGTGCTGATCCTTTCTTCTTACTTTTAATGACTATTCCCTTATGTCTGTCACACAGATGTT\n" +
56+
"CAAATCTCTTCCAAACAAGATCTTCAAAGAGCCGTGACTGTTCTGCGGTCAGTTTGACATCAACAATCTT\n" +
57+
"CAAATCCTGTCTTCCATGCATATCAAAGAGCCTCCTAATATCATCAGCACCTTGCGCAGTGAAAACCATG\n" +
58+
"GATTTAGGCAGACTCCTTATTATGCTTGTGATGAGGCCAGGTCGTGCATGTTCAACATCCTTCAGCAATA\n" +
59+
"TCCCATGACAATATTTACTTTGGTCCTTAAAAGATTTTATGTCATTGGGTTTTCTGTAGCAGTGGATGAA\n" +
60+
"TTTTTGTGATTCAGGCTGGTAAATTGCAAACTCAACAGGGTCATGTGGCGGGCCTTCAATGTCAATCCAT\n" +
61+
"GTTGTGTCACTGACCATCAACGACTCTACACTTCTCTTCACCTGAGCCTCCACCTCAGGCTTGAGCGTGG\n" +
62+
"ACAAGAGTGGGGCACCACCGTTCCGGATGGGGACTGGTGTTTTGCTTGGTAAACTCTCAAATTCCACAAC\n" +
63+
"TGTATTGTCCCATGCTCTCCCTTTGATCTGTGATCTTGATGAAATGTAAGGCCAGCCCTCACCAGAGAGA\n" +
64+
"CACACCTTATAAAGTATGTTTTCATAAGGATTCCTCTGTCCTGGTATGGCACTGATGAACATGTTTTCCC\n" +
65+
"TCTTTTTGATCTCCAAGAGGGTTTTTATAATGGTTGTGAATGTGGACTCCTCAATCTTTATTGTTTCCAG\n" +
66+
"CATGTTGCCACCATCAATCAGGCAAGCACCGGCTTTCACAGCAGCTGATAAACTAAGGTTGTAGCCTGAT\n" +
67+
"ATGTTAATTTGAGAATCCTCCTGAGTGATTACCTTTAGAGAAGGATGCTTCTCCATCAAAGCATCTAAGT\n" +
68+
"CACTTAAATTAGGGTATTTTGCTGTGTATAGCAACCCCAGATCTGTGAGGGCCTGAACCACATCATTTAG\n" +
69+
"AGTTTCCCCTCCCTGTTCAGTCATACAGGAAATTGTGAGTGCTGGCATCGATCCAAATTGGTTGATCATA\n" +
70+
"AGTGATGAGTCTTTAACGTCCCAGACTTTGACCACCCCTCCAGTTCTAGCCAACCCAGGTCTCTGAATAC\n" +
71+
"CAACAAGTTGCAGAATTTCGGACCTCCTGGTGAGCTGTGTTGTAGAGAGGTTCCCTAGATACTGGCCACC\n" +
72+
"TGTGGCTGTCAACCTCTCTGTTCTTTGAACTTTTTGCCTTAATTTGTCCAAGTCACTGGAGAGTTCCATT\n" +
73+
"AGCTCTTCCTTTGACAATGATCCTATCTTAAGGAACATGTTCTTTTGGGTTGACTTCATGACCATCAATG\n" +
74+
"AGTCAACTTCCTTATTCAAGTCCCTCAAACTAACAAGATCACTGTCATCTCTTTTAGACCTCCTCATCAT\n" +
75+
"GCGTTGCACACTTGCAACCTTTGAAAAATCTAAGCCGGACAGAAGAGCCCTCGCGTCAGTTAGGACATCT\n" +
76+
"GCCTTAACAGCAGTTGTCCAGTTCGAGAGTCCTCTCCTGAGAGACTGTGTCCATCTGAATGATGGGATTG\n" +
77+
"GTTGTTCGCTCATAGTGATGAAATTGCGCAGAGTTATCCAAAAGCCTAGGATCCTCTGTGCG";
78+
79+
80+
try {
81+
82+
// parse the raw sequence from the string
83+
InputStream stream = new ByteArrayInputStream(dnaFastaS.getBytes());
84+
85+
// define the Ambiguity Compound Sets
86+
AmbiguityDNACompoundSet ambiguityDNACompoundSet = AmbiguityDNACompoundSet.getDNACompoundSet();
87+
CompoundSet<NucleotideCompound> nucleotideCompoundSet = AmbiguityRNACompoundSet.getRNACompoundSet();
88+
89+
FastaReader<DNASequence, NucleotideCompound> proxy =
90+
new FastaReader<DNASequence, NucleotideCompound>(
91+
stream,
92+
new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
93+
new DNASequenceCreator(ambiguityDNACompoundSet));
94+
95+
// has only one entry in this example, but could be easily extended to parse a FASTA file with multiple sequences
96+
LinkedHashMap<String, DNASequence> dnaSequences = proxy.process();
97+
98+
// Initialize the Transcription Engine
99+
TranscriptionEngine engine = new
100+
TranscriptionEngine.Builder().dnaCompounds(ambiguityDNACompoundSet).rnaCompounds(nucleotideCompoundSet).build();
101+
102+
Frame[] sixFrames = Frame.getAllFrames();
103+
104+
105+
106+
for (DNASequence dna : dnaSequences.values()) {
107+
108+
Map<Frame, Sequence<AminoAcidCompound>> results = engine.multipleFrameTranslation(dna, sixFrames);
109+
110+
for (Frame frame : sixFrames){
111+
System.out.println("Translated Frame:" + frame +" : " + results.get(frame));
112+
//System.out.println(dna.getRNASequence(frame).getProteinSequence(engine));
113+
114+
ProteinSequence ps = new ProteinSequence(results.get(frame).getSequenceAsString());
115+
System.out.println(ps);
116+
try {
117+
118+
} catch (Exception e){
119+
System.err.println(e.getMessage() + " when trying to translate frame " + frame);
120+
}
121+
}
122+
123+
}
124+
} catch (Exception e){
125+
e.printStackTrace();
126+
}
127+
128+
129+
}
130+
131+
132+
133+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package demo;
2+
3+
4+
import java.io.File;
5+
import java.io.FileInputStream;
6+
import java.io.IOException;
7+
import java.io.InputStream;
8+
import java.util.LinkedHashMap;
9+
import java.util.logging.Level;
10+
import java.util.logging.Logger;
11+
import org.biojava.nbio.core.sequence.ProteinSequence;
12+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
13+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
14+
import org.biojava.nbio.core.sequence.io.FastaReader;
15+
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
16+
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
17+
import org.biojava.nbio.core.util.InputStreamProvider;
18+
19+
20+
/**
21+
* Created by andreas on 6/17/15.
22+
*/
23+
public class ParseFastaFileDemo {
24+
25+
26+
public ParseFastaFileDemo(){
27+
28+
29+
}
30+
31+
/** e.g. download ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz
32+
* and pass in path to local location of file
33+
*
34+
* @param args
35+
*/
36+
public static void main(String[] args) {
37+
38+
int mb = 1024*1024;
39+
40+
//Getting the runtime reference from system
41+
Runtime runtime = Runtime.getRuntime();
42+
43+
System.out.println("##### Heap utilization statistics [MB] #####");
44+
45+
//Print used memory
46+
System.out.println("Used Memory:"
47+
+ (runtime.totalMemory() - runtime.freeMemory()) / mb);
48+
49+
//Print free memory
50+
System.out.println("Free Memory:"
51+
+ runtime.freeMemory() / mb);
52+
53+
//Print total available memory
54+
System.out.println("Total Memory:" + runtime.totalMemory() / mb);
55+
56+
//Print Maximum available memory
57+
System.out.println("Max Memory:" + runtime.maxMemory() / mb);
58+
59+
60+
if ( args.length < 1) {
61+
System.err.println("First argument needs to be path to fasta file");
62+
return;
63+
}
64+
65+
File f = new File(args[0]);
66+
67+
if ( ! f.exists()) {
68+
System.err.println("File does not exist " + args[0]);
69+
return;
70+
}
71+
72+
long timeS = System.currentTimeMillis();
73+
74+
try {
75+
76+
// automatically uncompress files using InputStreamProvider
77+
InputStreamProvider isp = new InputStreamProvider();
78+
79+
InputStream inStream = isp.getInputStream(f);
80+
81+
82+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
83+
inStream,
84+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
85+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
86+
87+
LinkedHashMap<String, ProteinSequence> b;
88+
89+
int nrSeq = 0;
90+
91+
while ((b = fastaReader.process(100)) != null) {
92+
for (String key : b.keySet()) {
93+
nrSeq++;
94+
//System.out.println(nrSeq + " : " + key + " " + b.get(key));
95+
if ( nrSeq % 100000 == 0)
96+
System.out.println(nrSeq );
97+
}
98+
99+
}
100+
long timeE = System.currentTimeMillis();
101+
System.out.println("parsed a total of " + nrSeq + " TREMBL sequences! in " + (timeE - timeS));
102+
} catch (Exception ex) {
103+
Logger.getLogger(ParseFastaFileDemo.class.getName()).log(Level.SEVERE, null, ex);
104+
}
105+
}
106+
}

0 commit comments

Comments
 (0)
X Tutup