1 from testformats.swissprot38 import *
2
3
4 import support
5
6 test_list = support.Storage()
7 add_test = test_list.add_test
8 add_test_lines = test_list.add_test_lines
9
10 add_test_lines("ID line", ID, """\
11 ID 100K_RAT STANDARD; PRT; 889 AA.
12 ID CYC_BOVIN STANDARD; PRT; 104 AA.
13 ID GIA2_GIALA STANDARD; PRT; 296 AA.
14 """)
15
16 add_test_lines("AC line", AC, """\
17 AC Q62671;
18 AC P00321; P05348;
19 """)
20
21 add_test("AC (block)", AC_block, """\
22 AC Q62671; Q05349; Q05351; Q05352; Q05353; Q05354; Q05355; Q05356;
23 AC Q92671; Q95349; Q95351; Q95352; Q95353; Q95354; Q95355; Q95356;
24 AC Q98763;
25 """)
26
27 add_test("date 1", DT_created + DT_seq_update + DT_ann_update, """\
28 DT 01-OCT-1996 (Rel. 34, Created)
29 DT 01-OCT-1996 (Rel. 34, Last sequence update)
30 DT 01-NOV-1997 (Rel. 35, Last annotation update)
31 """)
32
33 add_test("date 2", DT_created + DT_seq_update + DT_ann_update, """\
34 DT 01-AUG-1988 (Rel. 08, Created)
35 DT 01-JAN-1990 (Rel. 13, Last sequence update)
36 DT 15-APR-1999 (Rel. 38, Last annotation update)
37 """)
38
39 add_test_lines("DE (single line)", DE, """\
40 DE 100 KD PROTEIN (EC 6.3.2.-).
41 DE 10 KD PROTEIN PRECURSOR (CLONE PSAS10).
42 """)
43 add_test("DE (muliline) 1", DE_block, """\
44 DE 14-3-3 PROTEIN BETA/ALPHA (PROTEIN KINASE C INHIBITOR PROTEIN-1)
45 DE (KCIP-1).
46 """)
47 add_test("DE (muliline) 2", DE_block, """\
48 DE ANNEXIN V (LIPOCORTIN V) (ENDONEXIN II) (CALPHOBINDIN I) (CBP-I)
49 DE (PLACENTAL ANTICOAGULANT PROTEIN I) (PAP-I) (PP4) (THROMBOPLASTIN
50 DE INHIBITOR) (VASCULAR ANTICOAGULANT-ALPHA) (VAC-ALPHA) (ANCHORIN CII).
51 """)
52
53 add_test_lines("GN (single line)", GN, """\
54 GN HAG3.
55 GN REX-1.
56 GN HNS OR DRDX OR OSMZ OR BGLY.
57 GN GVPA AND (GVPB OR GVPA2).
58 """)
59
60
61 add_test("GN (block)", GN_block, """\
62 GN (CALM1 OR CAM1 OR CALM OR CAM) AND (CALM2 OR CAM2 OR CAMB) AND
63 GN (CALM3 OR CAM3 OR CAMC).
64 """)
65
66 add_test_lines("OS (single line)", OS, """\
67 OS Helianthus annuus (Common sunflower).
68 OS Escherichia coli.
69 OS Homo sapiens (Human).
70 OS Acer spicatum (Moose maple) (Mountain maple).
71 OS Rous sarcoma virus (strain Schmidt-Ruppin).
72 """)
73
74 add_test("OS (block) 1", OS_block, """\
75 OS Oncorhynchus nerka (Sockeye salmon), and
76 OS Oncorhynchus masou (Cherry salmon) (Masu salmon).
77 """)
78
79 add_test("OS (block) 2", OS_block, """\
80 OS Mus musculus (Mouse), Rattus norvegicus (Rat), and
81 OS Bos taurus (Bovine).
82 """)
83
84 add_test_lines("OG (single line)", OG, """\
85 OG Chloroplast.
86 OG Cyanelle.
87 OG Mitochondrion.
88 OG Plasmid name.
89 OG Plasmid IncI1 ColIb.
90 """)
91
92 add_test("OG (block)", OG_block, """\
93 OG Plasmid pDGO100, Plasmid IncQ pIE723, Plasmid pBP201, and
94 OG Plasmid IncM pBWH1.
95 """)
96
97 add_test("OG (block)", OG_block, """\
98 OG Plasmid R6-5, Plasmid IncFII NR1, and
99 OG Plasmid IncFII R1-19 (R1 drd-19).
100 """)
101 add_test_lines("OC (single line)", OC, """\
102 OC Eukaryota; Alveolata; Apicomplexa; Haemosporida; Plasmodium.
103 OC Eukaryota; Entamoebidae; Entamoeba.
104 """)
105
106 add_test("OC (block) 1", OC_block, """\
107 OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
108 OC euphyllophytes; Spermatophyta; Magnoliophyta; eudicotyledons;
109 OC core eudicots; Asteridae; euasterids I; Solanales; Solanaceae;
110 OC Solanum.
111 """)
112
113 add_test("OC (block) 2", OC_block, """\
114 OC Eukaryota; Metazoa; Chordata; Vertebrata; Mammalia; Eutheria;
115 OC Primates; Catarrhini; Hominidae; Homo.
116 """)
117
118 add_test_lines("RN", RN, """\
119 RN [1]
120 RN [2]
121 RN [3]
122 RN [23]
123 RN [876543]
124 """)
125
126 add_test_lines("RP", RP, """\
127 RP SEQUENCE FROM N.A.
128 RP SEQUENCE FROM N.A., AND SEQUENCE OF 12-35.
129 RP SEQUENCE OF 34-56; 67-73 AND 123-345, AND DISULFIDE BONDS.
130 RP REVISIONS TO 67-89.
131 RP STRUCTURE BY NMR.
132 RP X-RAY CRYSTALLOGRAPHY (1.8 ANGSTROMS).
133 RP CHARACTERIZATION.
134 RP MUTAGENESIS OF TYR-56.
135 RP REVIEW.
136 RP VARIANT ALA-58.
137 RP VARIANTS XLI LEU-341; ARG-372 AND TYR-446.
138 """)
139
140 add_test_lines("RC (single line)", RC, """\
141 RC STRAIN=SPRAGUE-DAWLEY; TISSUE=LIVER;
142 RC STRAIN=HOLSTEIN; TISSUE=MAMMARY GLAND, AND LYMPH NODE;
143 RC SPECIES=RAT; STRAIN=WISTAR;
144 RC PLASMID=INCFII R100;
145 """)
146
147 add_test("RC (block)", RC_block, """\
148 RC STRAIN=MVZ CATALOG 172969, 172970, 174109, 174110, 174229, AND 174230;
149 RC TISSUE=LIVER;
150 """)
151
152 add_test_lines("RX (single line)", RX, """\
153 RX MEDLINE; 91002678.
154 RX MEDLINE; 93144687.
155 """)
156
157 add_test("RA (block) 1", RA_block, """\
158 RA SMITH H. JR., VON BRAUN M.T. III;
159 """)
160
161 add_test("RA (block) 2", RA_block, """\
162 RA YANOFSKY C., PLATT T., CRAWFORD I.P., NICHOLS B.P., CHRISTIE G.E.,
163 RA HOROWITZ H., VAN CLEEMPUT M., WU A.M.;
164 """)
165 add_test("RT (single line)", RT, """\
166 RT "Organization of the sunflower 11S storage protein gene family.";
167 """)
168
169 add_test("RT (block) 1", RT_block, """\
170 RT "New insulin-like proteins with atypical disulfide bond pattern
171 RT characterized in Caenorhabditis elegans by comparative sequence
172 RT analysis and homology modeling.";
173 """)
174
175 add_test("RT (block) 2", RT_block, """\
176 RT "Stored mRNA in cotyledons of Vigna unguiculata seeds: nucleotide
177 RT sequence of cloned cDNA for a stored mRNA and induction of its
178 RT synthesis by precocious germination.";
179 """)
180 add_test_lines("RL (single line)", RL, """\
181 RL J. Mol. Biol. 168:321-331(1983).
182 RL Nucleic Acids Res. 27:0-0(1999).
183 RL Thesis (1972), University of Geneva, Switzerland.
184 """)
185
186 add_test("RL (block) 1", RL_block, """\
187 RL (In) Boyer P.D. (eds.);
188 RL The enzymes (3rd ed.), pp.11:397-547, Academic Press, New York (1975).
189 """)
190
191 add_test("RL (block) 2", RL_block, """\
192 RL (In) Rich D.H., Gross E. (eds.);
193 RL Proceedings of the 7th american peptide symposium, pp.69-72,
194 RL Pierce Chemical Co., Rockford Il. (1981).
195 """)
196
197 add_test("RL (block) 3", RL_block, """\
198 RL (In) Magnusson S., Ottesen M., Foltmann B., Dano K.,
199 RL Neurath H. (eds.);
200 RL Regulatory proteolytic enzymes and their inhibitors, pp.163-172,
201 RL Pergamon Press, New York (1978).
202 """)
203
204 add_test("RL (block) 4", RL_block, """\
205 RL (In) Plant Gene Register PGR98-023.
206 RL (In) Worm Breeder's Gazette 15(3):34(1998).
207 """)
208
209 add_test("reference 1", reference, """\
210 RN [1]
211 RP SEQUENCE FROM N.A.
212 RC STRAIN=WISTAR; TISSUE=TESTIS;
213 RX MEDLINE; 92253337.
214 RA MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.;
215 RT "Molecular characterization of a novel rat protein structurally
216 RT related to poly(A) binding proteins and the 70K protein of the U1
217 RT small nuclear ribonucleoprotein particle (snRNP).";
218 RL Nucleic Acids Res. 20:1471-1475(1992).
219 """)
220
221 add_test("reference 2", reference, """\
222 RN [2]
223 RP ERRATUM.
224 RA MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.;
225 RL Nucleic Acids Res. 20:2624-2624(1992).
226 """)
227 s1 = """\
228 CC -!- FUNCTION: E3 UBIQUITIN-PROTEIN LIGASE WHICH ACCEPTS UBIQUITIN FROM
229 CC AN E2 UBIQUITIN-CONJUGATING ENZYME IN THE FORM OF A THIOESTER AND
230 CC THEN DIRECTLY TRANSFERS THE UBIQUITIN TO TARGETED SUBSTRATES (BY
231 CC SIMILARITY). THIS PROTEIN MAY BE INVOLVED IN MATURATION AND/OR
232 CC POST-TRANSCRIPTIONAL REGULATION OF MRNA.
233 """
234 s2 = """\
235 CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
236 CC IN LIVER, KIDNEY, LUNG AND BRAIN.
237 """
238 s3 = """\
239 CC -!- DEVELOPMENTAL STAGE: IN EARLY POST-NATAL LIFE, EXPRESSION IN
240 CC THE TESTIS INCREASES TO REACH A MAXIMUM AROUND DAY 28.
241 """
242 add_test("single comment 1", single_comment, s1)
243 add_test("single comment 2", single_comment, s2)
244 add_test("single comment 3", single_comment, s3)
245
246 copyright = """\
247 CC --------------------------------------------------------------------------
248 CC This SWISS-PROT entry is copyright. It is produced through a collaboration
249 CC between the Swiss Institute of Bioinformatics and the EMBL outstation -
250 CC the European Bioinformatics Institute. There are no restrictions on its
251 CC use by non-profit institutions as long as its content is in no way
252 CC modified and this statement is not removed. Usage by and for commercial
253 CC entities requires a license agreement (See http://www.isb-sib.ch/announce/
254 CC or send an email to license@isb-sib.ch).
255 CC --------------------------------------------------------------------------
256 """
257
258
259 add_test("set of comments 1", comment, s1)
260 add_test("set of comments 2", comment, s1+s2)
261 add_test("set of comments 3", comment, s1+s3)
262 add_test("set of comments 4", comment, s1+s2+s3)
263 add_test("set of comments 5", comment, s1+copyright)
264 add_test("set of comments 6", comment, s1+s2+s3+copyright)
265
266
267 s = """\
268 DR AARHUS/GHENT-2DPAGE; 8006; IEF.
269 DR DICTYDB; DD01047; MYOA.
270 DR ECO2DBASE; G052.0; 6TH EDITION.
271 DR ECOGENE; EG10054; ARAC.
272 DR FLYBASE; FBgn0000055; Adh.
273 DR GCRDB; GCR_0087; -.
274 DR HIV; K02013; NEF$BRU.
275 DR HSC-2DPAGE; P47985; HUMAN.
276 DR HSSP; P00438; 1DOB.
277 DR MAIZEDB; 25342; -.
278 DR MAIZE-2DPAGE; P80607; COLEOPTILE.
279 DR MENDEL; 2596; AMAhy;psbA;1.
280 DR MGD; MGI:87920; ADFP.
281 DR MGD; MGI:95401; EPB4.1.
282 DR MIM; 249900; -.
283 DR PDB; 3ADK; 16-APR-88.
284 DR PIR; A02768; R5EC7.
285 DR REBASE; RB00005; EcoRI.
286 DR SGD; L0000008; AAR2.
287 DR STYGENE; SG10312; PROV.
288 DR SUBTILIST; BG10774; OPPD.
289 DR SWISS-2DPAGE; P10599; HUMAN.
290 DR TIGR; MJ0125; -.
291 DR TRANSFAC; T00141; -.
292 DR WORMPEP; ZK637.7; CE00437.
293 DR YEPD; 4270; -.
294 DR ZFIN; ZDB-GENE-980526-290; hoxa1.
295 DR EMBL; Y00312; CAA68412.1; -.
296 DR EMBL; L29151; AAA99430.1; ALT_INIT.
297 DR EMBL; L20562; AAA26884.1; ALT_TERM.
298 DR EMBL; X56420; CAA39814.1; ALT_FRAME.
299 DR EMBL; M28482; AAA26378.1; ALT_SEQ.
300 DR EMBL; M63397; AAA51662.1; -.
301 DR EMBL; M63395; AAA51662.1; JOINED.
302 DR EMBL; M63396; AAA51662.1; JOINED.
303 DR EMBL; J04126; -; NOT_ANNOTATED_CDS.
304 DR PROSITE; PS00107; PROTEIN_KINASE_ATP; 1.
305 DR PROSITE; PS00028; ZINC_FINGER_C2H2; 6.
306 DR PROSITE; PS00237; G_PROTEIN_RECEPTOR; FALSE_NEG.
307 DR PROSITE; PS01128; SHIKIMATE_KINASE; PARTIAL.
308 DR PROSITE; PS00383; TYR_PHOSPHATASE_1; UNKNOWN_1.
309 DR PFAM; PF00017; SH2; 1.
310 DR PFAM; PF00008; EGF; 8.
311 DR PFAM; PF00595; PDZ; PARTIAL.
312 """
313
314 add_test_lines("DR", DR, s)
315 add_test("DR (block)", DR_block, s)
316
317 add_test_lines("KW (single line)", KW, """\
318 KW Oxidoreductase; Acetylation.
319 KW Acetylation; Oxidoreductase.
320 KW Ubiquitin conjugation; Ligase.
321 KW Signal.
322 KW Seed storage protein; Multigene family; Signal.
323 """)
324
325 add_test("KW (block) 1", KW_block, """\
326 KW Brain; Neurone; Phosphorylation; Acetylation; Multigene family;
327 KW 3D-structure.
328 """)
329
330 add_test("KW (block) 2", KW_block, """\
331 KW Steroidogenesis; Oxidoreductase; NAD; Isomerase; Mitochondrion;
332 KW Multigene family; Multifunctional enzyme; Transmembrane;
333 KW Endoplasmic reticulum.
334 """)
335
336 add_test("KW (block) 3", KW_block, """\
337 KW Hydrolase; Ligase; Oxidoreductase; NADP; Multifunctional enzyme;
338 KW One-carbon metabolism; ATP-binding; Purine biosynthesis;
339 KW Amino-acid biosynthesis; Methionine biosynthesis;
340 KW Histidine biosynthesis.
341 """)
342
343 add_test_lines("FT range / single line", FT_range, """\
344 FT DOMAIN 77 88 ASP/GLU-RICH (ACIDIC).
345 FT DOMAIN 127 150 PRO-RICH.
346 FT DOMAIN 420 439 ARG/GLU-RICH (MIXED CHARGE).
347 FT BINDING 858 858 UBIQUITIN (BY SIMILARITY).
348 FT DOMAIN 43 57 PRO/THR-RICH.
349 FT SIGNAL <1 8 BY SIMILARITY.
350 FT NON_TER 1 1
351 FT DISULFID 56 67
352 FT CARBOHYD 114 114 POTENTIAL.
353 FT CONFLICT 102 102 D -> S (IN REF. 2).
354 FT CONFLICT 105 105 MISSING (IN REF. 3).
355 FT CHAIN ? 75 10 KD PROTEIN.
356 FT SIGNAL 1 ?24 POTENTIAL.
357 FT PROPEP ?25 ?31 POTENTIAL.
358 FT SIGNAL 1 ?
359 FT INIT_MET 0 0
360 """)
361
362 add_test("FT w/ continuation 1", FT, """\
363 FT MOD_RES 9 9 AMIDATION (G-10 PROVIDE AMIDE GROUP)
364 FT (BY SIMILARITY).
365 """)
366
367 add_test("FT w/ continuation 2", FT, """\
368 FT DOMAIN 131 296 13.5 X 12 AA TANDEM REPEATS OF E-E-T-Q-K-
369 FT T-V-E-P-E-Q-T.
370 """)
371
372 add_test("FT w/ continuation 3", FT, """\
373 FT VARIANT 33 33 F -> Y (IN A*0205, A*0206, A*0208, A*0210
374 FT AND A*0221).
375 FT /FTId=VAR_004334.
376 """)
377
378
379 add_test("feature (block)", feature_block, """\
380 FT DOMAIN 77 88 ASP/GLU-RICH (ACIDIC).
381 FT DOMAIN 127 150 PRO-RICH.
382 FT DOMAIN 420 439 ARG/GLU-RICH (MIXED CHARGE).
383 FT DOMAIN 131 296 13.5 X 12 AA TANDEM REPEATS OF E-E-T-Q-K-
384 FT T-V-E-P-E-Q-T.
385 FT BINDING 858 858 UBIQUITIN (BY SIMILARITY).
386 FT DOMAIN 43 57 PRO/THR-RICH.
387 FT SIGNAL <1 8 BY SIMILARITY.
388 FT NON_TER 1 1
389 FT DISULFID 56 67
390 FT CARBOHYD 114 114 POTENTIAL.
391 FT VARIANT 33 33 F -> Y (IN A*0205, A*0206, A*0208, A*0210
392 FT AND A*0221).
393 FT /FTId=VAR_004334.
394 FT CONFLICT 102 102 D -> S (IN REF. 2).
395 FT CONFLICT 105 105 MISSING (IN REF. 3).
396 FT CHAIN ? 75 10 KD PROTEIN.
397 FT SIGNAL 1 ?24 POTENTIAL.
398 FT PROPEP ?25 ?31 POTENTIAL.
399 FT MOD_RES 9 9 AMIDATION (G-10 PROVIDE AMIDE GROUP)
400 FT (BY SIMILARITY).
401 FT SIGNAL 1 ?
402 FT INIT_MET 0 0
403 """)
404
405 add_test_lines("SQ header", SQ, """\
406 SQ SEQUENCE 889 AA; 100368 MW; DD7E6C7A CRC32;
407 SQ SEQUENCE 111 AA; 12416 MW; 103BBA8B CRC32;
408 SQ SEQUENCE 29 AA; 2900 MW; BA38C516 CRC32;
409 SQ SEQUENCE 1707 AA; 194328 MW; 31FDA77C CRC32;
410 """)
411
412 add_test_lines("SQ_data", SQ_data, """\
413 ISFTSFNDES GENAEKLLQF KRWFWSIVER MSMTERQDLV YFWTSSPSLP ASEEGFQPMP
414 SITIRPPDDQ HLPTANTCIS RLYVPLYSSK QILKQKLLLA IKTKNFGFV
415 SITIRPPDDQ HLP
416 A
417 """)
418
419 add_test("sequence 1", sequence, """\
420 SQ SEQUENCE 889 AA; 100368 MW; DD7E6C7A CRC32;
421 MMSARGDFLN YALSLMRSHN DEHSDVLPVL DVCSLKHVAY VFQALIYWIK AMNQQTTLDT
422 PQLERKRTRE LLELGIDNED SEHENDDDTS QSATLNDKDD ESLPAETGQN HPFFRRSDSM
423 S
424 """)
425 add_test("sequence 2", sequence, """\
426 SQ SEQUENCE 4 AA; 408 MW; 34BC4AD8 CRC32;
427 GFAD
428 """)
429
430 add_test("end", end, """\
431 //
432 """)
433
434
435 record1 = """ID 100K_RAT STANDARD; PRT; 889 AA.
436 AC Q62671;
437 DT 01-NOV-1997 (Rel. 35, Created)
438 DT 01-NOV-1997 (Rel. 35, Last sequence update)
439 DT 15-JUL-1999 (Rel. 38, Last annotation update)
440 DE 100 KD PROTEIN (EC 6.3.2.-).
441 OS Rattus norvegicus (Rat).
442 OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Mammalia;
443 OC Eutheria; Rodentia; Sciurognathi; Muridae; Murinae; Rattus.
444 RN [1]
445 RP SEQUENCE FROM N.A.
446 RC STRAIN=WISTAR; TISSUE=TESTIS;
447 RX MEDLINE; 92253337.
448 RA MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.;
449 RT "Molecular characterization of a novel rat protein structurally
450 RT related to poly(A) binding proteins and the 70K protein of the U1
451 RT small nuclear ribonucleoprotein particle (snRNP).";
452 RL Nucleic Acids Res. 20:1471-1475(1992).
453 RN [2]
454 RP ERRATUM.
455 RA MUELLER D., REHBEIN M., BAUMEISTER H., RICHTER D.;
456 RL Nucleic Acids Res. 20:2624-2624(1992).
457 CC -!- FUNCTION: E3 UBIQUITIN-PROTEIN LIGASE WHICH ACCEPTS UBIQUITIN FROM
458 CC AN E2 UBIQUITIN-CONJUGATING ENZYME IN THE FORM OF A THIOESTER AND
459 CC THEN DIRECTLY TRANSFERS THE UBIQUITIN TO TARGETED SUBSTRATES (BY
460 CC SIMILARITY). THIS PROTEIN MAY BE INVOLVED IN MATURATION AND/OR
461 CC POST-TRANSCRIPTIONAL REGULATION OF MRNA.
462 CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
463 CC IN LIVER, KIDNEY, LUNG AND BRAIN.
464 CC -!- DEVELOPMENTAL STAGE: IN EARLY POST-NATAL LIFE, EXPRESSION IN
465 CC THE TESTIS INCREASES TO REACH A MAXIMUM AROUND DAY 28.
466 CC -!- MISCELLANEOUS: A CYSTEINE RESIDUE IS REQUIRED FOR
467 CC UBIQUITIN-THIOLESTER FORMATION.
468 CC -!- SIMILARITY: CONTAINS AN HECT-TYPE E3 UBIQUITIN-PROTEIN LIGASE
469 CC DOMAIN.
470 CC -!- SIMILARITY: A CENTRAL REGION (AA 485-514) IS SIMILAR TO THE
471 CC C-TERMINAL DOMAINS OF MAMMALIAN AND YEAST POLY (A) RNA BINDING
472 CC PROTEINS (PABP).
473 CC -!- SIMILARITY: THE C-TERMINAL HALF SHOWS HIGH SIMILARITY TO
474 CC DROSOPHILA HYPERPLASMIC DISC PROTEIN AND SOME, TO HUMAN E6-AP.
475 CC -!- SIMILARITY: CONTAINS MIXED-CHARGE DOMAINS SIMILAR TO RNA-BINDING
476 CC PROTEINS.
477 CC --------------------------------------------------------------------------
478 CC This SWISS-PROT entry is copyright. It is produced through a collaboration
479 CC between the Swiss Institute of Bioinformatics and the EMBL outstation -
480 CC the European Bioinformatics Institute. There are no restrictions on its
481 CC use by non-profit institutions as long as its content is in no way
482 CC modified and this statement is not removed. Usage by and for commercial
483 CC entities requires a license agreement (See http://www.isb-sib.ch/announce/
484 CC or send an email to license@isb-sib.ch).
485 CC --------------------------------------------------------------------------
486 DR EMBL; X64411; CAA45756.1; -.
487 DR PFAM; PF00632; HECT; 1.
488 DR PFAM; PF00658; PABP; 1.
489 DR PROSITE; PS00107; PROTEIN_KINASE_ATP; 1.
490 DR AARHUS/GHENT-2DPAGE; 8006; IEF.
491 DR DICTYDB; DD01047; MYOA.
492 KW Ubiquitin conjugation; G-protein coupled receptor; Transmembrane;
493 KW Glycoprotein; Ligase.
494 FT DOMAIN 77 88 ASP/GLU-RICH (ACIDIC).
495 FT DOMAIN 127 150 PRO-RICH.
496 FT DOMAIN 420 439 ARG/GLU-RICH (MIXED CHARGE).
497 FT DOMAIN 448 457 ARG/ASP-RICH (MIXED CHARGE).
498 FT DOMAIN 485 514 PABP-LIKE.
499 FT DOMAIN 579 590 ASP/GLU-RICH (ACIDIC).
500 FT DOMAIN 786 889 HECT DOMAIN.
501 FT DOMAIN 827 847 PRO-RICH.
502 FT BINDING 858 858 UBIQUITIN (BY SIMILARITY).
503 SQ SEQUENCE 889 AA; 100368 MW; DD7E6C7A CRC32;
504 MMSARGDFLN YALSLMRSHN DEHSDVLPVL DVCSLKHVAY VFQALIYWIK AMNQQTTLDT
505 PQLERKRTRE LLELGIDNED SEHENDDDTS QSATLNDKDD ESLPAETGQN HPFFRRSDSM
506 TFLGCIPPNP FEVPLAEAIP LADQPHLLQP NARKEDLFGR PSQGLYSSSA GSGKCLVEVT
507 MDRNCLEVLP TKMSYAANLK NVMNMQNRQK KAGEDQSMLA EEADSSKPGP SAHDVAAQLK
508 SSLLAEIGLT ESEGPPLTSF RPQCSFMGMV ISHDMLLGRW RLSLELFGRV FMEDVGAEPG
509 SILTELGGFE VKESKFRREM EKLRNQQSRD LSLEVDRDRD LLIQQTMRQL NNHFGRRCAT
510 TPMAVHRVKV TFKDEPGEGS GVARSFYTAI AQAFLSNEKL PNLDCIQNAN KGTHTSLMQR
511 LRNRGERDRE REREREMRRS SGLRAGSRRD RDRDFRRQLS IDTRPFRPAS EGNPSDDPDP
512 LPAHRQALGE RLYPRVQAMQ PAFASKITGM LLELSPAQLL LLLASEDSLR ARVEEAMELI
513 VAHGRENGAD SILDLGLLDS SEKVQENRKR HGSSRSVVDM DLDDTDDGDD NAPLFYQPGK
514 RGFYTPRPGK NTEARLNCFR NIGRILGLCL LQNELCPITL NRHVIKVLLG RKVNWHDFAF
515 FDPVMYESLR QLILASQSSD ADAVFSAMDL AFAVDLCKEE GGGQVELIPN GVNIPVTPQN
516 VYEYVRKYAE HRMLVVAEQP LHAMRKGLLD VLPKNSLEDL TAEDFRLLVN GCGEVNVQML
517 ISFTSFNDES GENAEKLLQF KRWFWSIVER MSMTERQDLV YFWTSSPSLP ASEEGFQPMP
518 SITIRPPDDQ HLPTANTCIS RLYVPLYSSK QILKQKLLLA IKTKNFGFV
519 //
520 """
521
522 record2 = """\
523 ID 12KD_FRAAN STANDARD; PRT; 111 AA.
524 AC Q05349;
525 DT 01-OCT-1996 (Rel. 34, Created)
526 DT 01-OCT-1996 (Rel. 34, Last sequence update)
527 DT 01-NOV-1997 (Rel. 35, Last annotation update)
528 DE AUXIN-REPRESSED 12.5 KD PROTEIN.
529 OS Fragaria ananassa (Strawberry).
530 OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
531 OC euphyllophytes; Spermatophyta; Magnoliophyta; eudicotyledons;
532 OC core eudicots; Rosidae; eurosids I; Rosales; Rosaceae; Fragaria.
533 RN [1]
534 RP SEQUENCE FROM N.A.
535 RC STRAIN=CV. OZARK BEAUTY; TISSUE=FLOWER;
536 RX MEDLINE; 91329668.
537 RA REDDY A.S.N., POOVAIAH B.W.;
538 RT "Molecular cloning and sequencing of a cDNA for an auxin-repressed
539 RT mRNA: correlation between fruit growth and repression of the
540 RT auxin-regulated gene.";
541 RL Plant Mol. Biol. 14:127-136(1990).
542 CC -!- FUNCTION: E3 UBIQUITIN-PROTEIN LIGASE WHICH ACCEPTS UBIQUITIN FROM
543 CC AN E2 UBIQUITIN-CONJUGATING ENZYME IN THE FORM OF A THIOESTER AND
544 CC THEN DIRECTLY TRANSFERS THE UBIQUITIN TO TARGETED SUBSTRATES (BY
545 CC SIMILARITY). THIS PROTEIN MAY BE INVOLVED IN MATURATION AND/OR
546 CC POST-TRANSCRIPTIONAL REGULATION OF MRNA.
547 CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
548 CC IN LIVER, KIDNEY, LUNG AND BRAIN.
549 CC -!- DEVELOPMENTAL STAGE: IN EARLY POST-NATAL LIFE, EXPRESSION IN
550 CC THE TESTIS INCREASES TO REACH A MAXIMUM AROUND DAY 28.
551 CC -!- MISCELLANEOUS: A CYSTEINE RESIDUE IS REQUIRED FOR
552 CC UBIQUITIN-THIOLESTER FORMATION.
553 CC -!- SIMILARITY: CONTAINS AN HECT-TYPE E3 UBIQUITIN-PROTEIN LIGASE
554 CC DOMAIN.
555 CC -!- SIMILARITY: A CENTRAL REGION (AA 485-514) IS SIMILAR TO THE
556 CC C-TERMINAL DOMAINS OF MAMMALIAN AND YEAST POLY (A) RNA BINDING
557 CC PROTEINS (PABP).
558 CC -!- SIMILARITY: THE C-TERMINAL HALF SHOWS HIGH SIMILARITY TO
559 CC DROSOPHILA HYPERPLASMIC DISC PROTEIN AND SOME, TO HUMAN E6-AP.
560 CC -!- SIMILARITY: CONTAINS MIXED-CHARGE DOMAINS SIMILAR TO RNA-BINDING
561 CC PROTEINS.
562 CC --------------------------------------------------------------------------
563 CC This SWISS-PROT entry is copyright. It is produced through a collaboration
564 CC between the Swiss Institute of Bioinformatics and the EMBL outstation -
565 CC the European Bioinformatics Institute. There are no restrictions on its
566 CC use by non-profit institutions as long as its content is in no way
567 CC modified and this statement is not removed. Usage by and for commercial
568 CC entities requires a license agreement (See http://www.isb-sib.ch/announce/
569 CC or send an email to license@isb-sib.ch).
570 CC --------------------------------------------------------------------------
571 DR EMBL; X52429; CAA36676.1; -.
572 DR EMBL; X64411; CAA45756.1; -.
573 DR PFAM; PF00632; HECT; 1.
574 DR PFAM; PF00658; PABP; 1.
575 DR PROSITE; PS00107; PROTEIN_KINASE_ATP; 1.
576 DR AARHUS/GHENT-2DPAGE; 8006; IEF.
577 DR DICTYDB; DD01047; MYOA.
578 KW Ubiquitin conjugation; G-protein coupled receptor; Transmembrane;
579 KW Glycoprotein; Ligase.
580 FT DOMAIN 43 57 PRO/THR-RICH.
581 SQ SEQUENCE 111 AA; 12416 MW; 103BBA8B CRC32;
582 MVLLDKLWDD IVAGPQPERG LGMLRKVPQP LNLKDEGESS KITMPTTPTT PVTPTTPISA
583 RKDNVWRSVF HPGSNLSSKT MGNQVFDSPQ PNSPTVYDWM YSGETRSKHH R
584 //
585 """
586
587 add_test("record 1", record, record1)
588 add_test("record 2", record, record2)
589 add_test("format", format, record1 + record2)
590
591
594
597
598 if __name__ == "__main__":
599 test()
600