X Tutup
Skip to content

Commit 11c9beb

Browse files
committed
Normally I would have opened a new branch. but this issue was causing this pull to fail it's build so I'm doing both in one fel swoop. I put the target files in the resource directory and manually copy them over before each test into the temporary working directory which is scanned before calling out in case the file is already there. this should fix the 429 errors in this class specifically.
1 parent 9197e89 commit 11c9beb

File tree

11 files changed

+1038
-14
lines changed

11 files changed

+1038
-14
lines changed

biojava-core/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@
4444
<artifactId>junit</artifactId>
4545
<scope>test</scope>
4646
</dependency>
47+
<dependency>
48+
<groupId>org.apache.commons</groupId>
49+
<artifactId>commons-io</artifactId>
50+
<version>1.3.2</version>
51+
</dependency>
4752
<!-- logging dependencies (managed by parent pom, don't set versions or scopes here) -->
4853
<dependency>
4954
<groupId>org.slf4j</groupId>

biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@
3232
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
3333
import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
3434
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
35-
import org.biojava.nbio.core.sequence.features.*;
35+
import org.biojava.nbio.core.sequence.features.AbstractFeature;
36+
import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
37+
import org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface;
38+
import org.biojava.nbio.core.sequence.features.FeatureRetriever;
39+
import org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface;
3640
import org.biojava.nbio.core.sequence.io.GenbankSequenceParser;
3741
import org.biojava.nbio.core.sequence.io.GenericGenbankHeaderParser;
3842
import org.biojava.nbio.core.sequence.template.AbstractSequence;
@@ -41,7 +45,14 @@
4145
import org.slf4j.Logger;
4246
import org.slf4j.LoggerFactory;
4347

44-
import java.io.*;
48+
import java.io.BufferedInputStream;
49+
import java.io.BufferedReader;
50+
import java.io.File;
51+
import java.io.FileInputStream;
52+
import java.io.FileOutputStream;
53+
import java.io.IOException;
54+
import java.io.InputStream;
55+
import java.io.InputStreamReader;
4556
import java.net.URL;
4657
import java.net.URLConnection;
4758
import java.util.ArrayList;
@@ -54,7 +65,7 @@
5465
*/
5566
public class GenbankProxySequenceReader<C extends Compound> extends StringProxySequenceReader<C> implements FeaturesKeyWordInterface, DatabaseReferenceInterface, FeatureRetriever {
5667

57-
private final static Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class);
68+
private static final Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class);
5869

5970
private static final String eutilBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; //
6071
private String genbankDirectoryCache = null;

biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,30 @@
2020
*/
2121
package org.biojava.nbio.core.sequence.loader;
2222

23+
import org.apache.commons.io.IOUtils;
2324
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
2425
import org.biojava.nbio.core.sequence.ProteinSequence;
2526
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
2627
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
28+
import org.biojava.nbio.core.sequence.features.AbstractFeature;
2729
import org.biojava.nbio.core.sequence.features.FeatureInterface;
30+
import org.biojava.nbio.core.sequence.features.Qualifier;
2831
import org.biojava.nbio.core.sequence.template.AbstractSequence;
2932
import org.junit.Assert;
33+
import org.junit.Before;
3034
import org.junit.Test;
3135
import org.junit.runner.RunWith;
3236
import org.junit.runners.Parameterized;
3337
import org.slf4j.Logger;
3438
import org.slf4j.LoggerFactory;
3539

40+
import java.io.File;
41+
import java.io.FileOutputStream;
3642
import java.io.IOException;
43+
import java.io.InputStream;
3744
import java.util.ArrayList;
3845
import java.util.Arrays;
3946
import java.util.Collection;
40-
import org.biojava.nbio.core.sequence.features.AbstractFeature;
41-
import org.biojava.nbio.core.sequence.features.Qualifier;
4247

4348
/**
4449
* Testing example for issue #834
@@ -58,7 +63,7 @@ public GenbankProxySequenceReaderTest(String gi) {
5863

5964
@Parameterized.Parameters
6065
public static Collection<String[]> getExamples() {
61-
String[][] out = new String[][]{
66+
String[][] accessorIds = new String[][]{
6267
{"399235158"},
6368
{"7525057"},
6469
{"379015144"},
@@ -69,9 +74,42 @@ public static Collection<String[]> getExamples() {
6974
{"254839678"}
7075
};
7176

72-
return Arrays.asList(out);
77+
return Arrays.asList(accessorIds);
78+
}
79+
80+
/**
81+
* In {@link GenbankProxySequenceReader} there is a check to see if the requested files are already in the temp
82+
* directory before attemting to retrieve them from the remote server. so simply copying the test files to the temp
83+
* directory avoids calling out to the server and hitting a 429 status code from the server which fails the build.
84+
* @throws IOException
85+
*/
86+
@Before
87+
public void copyTestFiles() throws IOException {
88+
Collection<String[]> accessorIds = getExamples();
89+
for (String[] arr: accessorIds) {
90+
copyTestFileToWorkingDirectory(arr[0]+".gb");
91+
}
92+
}
93+
94+
/**
95+
* Convenience method for {@link GenbankProxySequenceReaderTest#copyTestFiles()}
96+
* @param filename name of the file to copy from the resource folder
97+
* @throws IOException when something goes wrong with copying the files.
98+
*/
99+
private void copyTestFileToWorkingDirectory(String filename) throws IOException {
100+
String dest = System.getProperty("java.io.tmpdir") + filename;
101+
String src = "org/biojava/nbio/core/sequence/GenbankProxySequenceReader/" + filename;
102+
103+
FileOutputStream destination = new FileOutputStream(new File(dest));
104+
InputStream source = this.getClass().getClassLoader().getResourceAsStream(src);
105+
106+
IOUtils.copy(source, destination);
107+
108+
destination.close();
109+
source.close();
73110
}
74111

112+
75113
@Test
76114
public void testFeatures() throws IOException, InterruptedException, CompoundNotFoundException {
77115
logger.info("run test for protein: {}", gi);
@@ -120,9 +158,6 @@ so it should be done here (manualy).
120158
Assert.assertTrue(!codedBy.isEmpty());
121159
logger.info("\t\tcoded_by: {}", codedBy);
122160
}
123-
124-
// genbank has limits on requests per second, we need to give it some time for next test or otherwise we get 429 http error codes - JD 2018-12-14
125-
Thread.sleep(500);
126161
}
127162

128163
@Test
@@ -161,9 +196,5 @@ public void testProteinSequenceFactoring() throws Exception {
161196
} else {
162197
logger.info("target {} has no CDS", gi);
163198
}
164-
165-
// genbank has limits on requests per second, we need to give it some time for next test or otherwise we get 429 http error codes - JD 2018-12-14
166-
Thread.sleep(500);
167-
168199
}
169200
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
LOCUS YP_001336026 324 aa linear CON 16-DEC-2014
2+
DEFINITION lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA acyltransferase
3+
[Klebsiella pneumoniae subsp. pneumoniae MGH 78578].
4+
ACCESSION YP_001336026
5+
VERSION YP_001336026.1
6+
DBLINK BioProject: PRJNA57619
7+
DBSOURCE REFSEQ: accession NC_009648.1
8+
KEYWORDS RefSeq.
9+
SOURCE Klebsiella pneumoniae subsp. pneumoniae MGH 78578
10+
ORGANISM Klebsiella pneumoniae subsp. pneumoniae MGH 78578
11+
Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
12+
Enterobacteriaceae; Klebsiella.
13+
REFERENCE 1 (residues 1 to 324)
14+
CONSRTM NCBI Genome Project
15+
TITLE Direct Submission
16+
JOURNAL Submitted (09-JUL-2007) National Center for Biotechnology
17+
Information, NIH, Bethesda, MD 20894, USA
18+
REFERENCE 2 (residues 1 to 324)
19+
AUTHORS McClelland,M., Sanderson,E.K., Spieth,J., Clifton,W.S.,
20+
Latreille,P., Sabo,A., Pepin,K., Bhonagiri,V., Porwollik,S., Ali,J.
21+
and Wilson,R.K.
22+
CONSRTM The Klebsiella pneumonia Genome Sequencing Project
23+
TITLE Direct Submission
24+
JOURNAL Submitted (06-SEP-2006) Genetics, Genome Sequencing Center, 4444
25+
Forest Park Parkway, St. Louis, MO 63108, USA
26+
COMMENT VALIDATED REFSEQ: This record has undergone validation or
27+
preliminary review. The reference sequence was derived from
28+
ABR77796.
29+
Method: conceptual translation.
30+
FEATURES Location/Qualifiers
31+
source 1..324
32+
/organism="Klebsiella pneumoniae subsp. pneumoniae MGH
33+
78578"
34+
/strain="ATCC 700721; MGH 78578"
35+
/sub_species="pneumoniae"
36+
/db_xref="ATCC:700721"
37+
/db_xref="taxon:272620"
38+
Protein 1..324
39+
/product="lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA
40+
acyltransferase"
41+
/calculated_mol_wt=37353
42+
Region 1..310
43+
/region_name="PRK08943"
44+
/note="lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA
45+
acyltransferase; Validated"
46+
/db_xref="CDD:236355"
47+
Site order(139,142,144,161..164,210..212)
48+
/site_type="other"
49+
/note="putative acyl-acceptor binding pocket"
50+
/db_xref="CDD:153246"
51+
CDS 1..324
52+
/gene="msbB"
53+
/locus_tag="KPN_02370"
54+
/coded_by="complement(NC_009648.1:2595658..2596632)"
55+
/inference="ab initio prediction:Genemark:2.0"
56+
/inference="protein motif:Pfam:IPR004960"
57+
/note="Transfers myristate or laurate, activated on ACP,
58+
to the lipid IVA moiety of (KDO)2-(lauroyl)-lipid IVA"
59+
/transl_table=11
60+
/db_xref="GeneID:5340071"
61+
CONTIG join(WP_002911442.1:1..324)
62+
ORIGIN
63+
1 metkknnief ipkfeksfll prywgawlgv fafagialtp psfrdpllgk lgrlvgrlak
64+
61 ssrrraqinl lycfpeksey ereaiidamy asapqamvmm aelglrdpqk ilarvdwqgk
65+
121 aiidemqrnn ekviflvpha wgvdipamlm asggqkmaam fhnqgnpvfd yvwntvrrrf
66+
181 ggrmharndg ikpfiqsvrq gywgyylpdq dhgaehsefv dffatykatl paigrlmkvc
67+
241 rarvvplfpv ydskthrltv lvrppmddll daddttiarr mneevevfvk phteqytwil
68+
301 kllktrkpge iepykrkelf pkkk
69+
//
70+
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
LOCUS 3IAN_A 321 aa linear BCT 24-NOV-2018
2+
DEFINITION Chain A, Chitinase.
3+
ACCESSION 3IAN_A
4+
VERSION 3IAN_A
5+
DBSOURCE pdb: molecule 3IAN, chain 65, release Nov 21, 2018;
6+
deposition: Jul 14, 2009;
7+
class: HYDROLASE;
8+
source: Mmdb_id: 999999, Pdb_id 1: 3IAN;
9+
Exp. method: X-ray Diffraction.
10+
KEYWORDS .
11+
SOURCE Lactococcus lactis subsp. lactis
12+
ORGANISM Lactococcus lactis subsp. lactis
13+
Bacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae;
14+
Lactococcus.
15+
REFERENCE 1 (residues 1 to 321)
16+
AUTHORS Bonanno,J.B., Rutter,M., Bain,K.T., Miller,S., Ozyurt,S.,
17+
Sauder,J.M., Burley,S.K. and Almo,S.C.
18+
TITLE Crystal structure of a chitinase from Lactococcus lactis subsp.
19+
lactis
20+
JOURNAL Unpublished
21+
REFERENCE 2 (residues 1 to 321)
22+
AUTHORS Bonanno,J.B., Rutter,M., Bain,K.T., Miller,S., Ozyurt,S.,
23+
Sauder,J.M., Burley,S.K., Almo,S.C. and New York SGX Research
24+
Center for Structural Genomics (NYSGXRC).
25+
TITLE Direct Submission
26+
JOURNAL Submitted (14-JUL-2009)
27+
COMMENT Crystal structure of a chitinase from Lactococcus lactis subsp.
28+
lactis.
29+
FEATURES Location/Qualifiers
30+
source 1..321
31+
/organism="Lactococcus lactis subsp. lactis"
32+
/sub_species="lactis"
33+
/db_xref="taxon:1360"
34+
Het join(bond(115),bond(117),bond(76))
35+
/heterogen="(NA,2572)"
36+
Region 4..313
37+
/region_name="Chi1"
38+
/note="Chitinase [Carbohydrate transport and metabolism];
39+
COG3469"
40+
/db_xref="CDD:226000"
41+
Region 5..288
42+
/region_name="Glyco_hydro_18"
43+
/note="Glycosyl hydrolases family 18; pfam00704"
44+
/db_xref="CDD:279094"
45+
SecStr 5..12
46+
/sec_str_type="sheet"
47+
/note="strand 1"
48+
Site order(10,46,122,124,189,191,283)
49+
/site_type="active"
50+
/note="putative active site [active]"
51+
/db_xref="CDD:119350"
52+
SecStr 24..28
53+
/sec_str_type="sheet"
54+
/note="strand 2"
55+
SecStr 40..45
56+
/sec_str_type="sheet"
57+
/note="strand 3"
58+
SecStr 65..78
59+
/sec_str_type="helix"
60+
/note="helix 1"
61+
SecStr 80..89
62+
/sec_str_type="sheet"
63+
/note="strand 4"
64+
SecStr 100..114
65+
/sec_str_type="helix"
66+
/note="helix 2"
67+
SecStr 117..124
68+
/sec_str_type="sheet"
69+
/note="strand 5"
70+
SecStr 133..151
71+
/sec_str_type="helix"
72+
/note="helix 3"
73+
SecStr 155..163
74+
/sec_str_type="sheet"
75+
/note="strand 6"
76+
SecStr 172..180
77+
/sec_str_type="helix"
78+
/note="helix 4"
79+
SecStr 184..190
80+
/sec_str_type="sheet"
81+
/note="strand 7"
82+
SecStr 196..201
83+
/sec_str_type="sheet"
84+
/note="strand 8"
85+
SecStr 204..209
86+
/sec_str_type="sheet"
87+
/note="strand 9"
88+
SecStr 215..228
89+
/sec_str_type="helix"
90+
/note="helix 5"
91+
SecStr 240..246
92+
/sec_str_type="sheet"
93+
/note="strand 10"
94+
SecStr 261..273
95+
/sec_str_type="helix"
96+
/note="helix 6"
97+
SecStr 278..283
98+
/sec_str_type="sheet"
99+
/note="strand 11"
100+
SecStr 289..293
101+
/sec_str_type="sheet"
102+
/note="strand 12"
103+
SecStr 300..307
104+
/sec_str_type="helix"
105+
/note="helix 7"
106+
ORIGIN
107+
1 msldkvlvgy whnwkstgkd gykggssadf nlsstqegyn vinvsfmktp egqtlptfkp
108+
61 ynktdtefra eisklnaegk svlialggad ahielkksqe sdfvneiirl vdtygfdgld
109+
121 idleqaaiea adnqtvipsa lkkvkdhyrk dgknfmitma pefpyltssg kyapyinnld
110+
181 syydfinpqy ynqggdgfwd sdlnmwisqs ndekkedfly gltqrlvtgt dgfikipask
111+
241 fviglpsnnd aaatgyvkdp navknalnrl kasgneikgl mtwsvnwdag tnsngekynn
112+
301 tfvntyapml fnneghhhhh h
113+
//
114+

0 commit comments

Comments
 (0)
X Tutup