forked from mtarbit/Rosalind-Problems
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathe024-lexv.py
executable file
·89 lines (78 loc) · 1.59 KB
/
e024-lexv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
# coding=utf-8
# Ordering Strings of Varying Length Lexicographically
# ====================================================
#
# Say that we have strings s=s1s2⋯sm and t=t1t2⋯tn with m<n. Consider the
# substring t′=t[1:m]. We have two cases:
#
# If s=t′, then we set s<Lext because s is shorter than t (e.g., APPLE<APPLET).
# Otherwise, s≠t′. We define s<Lext if s<Lext′ and define s>Lext if s>Lext′
# (e.g., APPLET<LexARTS because APPL<LexARTS). Given: A permutation of at most
# 12 symbols defining an ordered alphabet 𝒜 and a positive integer n (n≤4).
#
# Return: All strings of length at most n formed from 𝒜, ordered
# lexicographically. (Note: As in “Enumerating k-mers Lexicographically”,
# alphabet order is based on the order in which the symbols are given.)
#
# Sample Dataset
# --------------
# D N A
# 3
#
# Sample Output
# -------------
# D
# DD
# DDD
# DDN
# DDA
# DN
# DND
# DNN
# DNA
# DA
# DAD
# DAN
# DAA
# N
# ND
# NDD
# NDN
# NDA
# NN
# NND
# NNN
# NNA
# NA
# NAD
# NAN
# NAA
# A
# AD
# ADD
# ADN
# ADA
# AN
# AND
# ANN
# ANA
# AA
# AAD
# AAN
# AAA
def alpha_combs(alphabet, n, acc='', res=[]):
if n > 0:
for c in alphabet:
res.append(acc + c)
alpha_combs(alphabet, n - 1, acc + c, res)
return res
def result(s):
bits = s.split()
alphabet = bits[:-1]
length = int(bits[-1])
return alpha_combs(alphabet, length)
if __name__ == "__main__":
small_dataset = "D N A\n3"
large_dataset = open('datasets/rosalind_lexv.txt').read().strip()
print "\n".join(result(large_dataset))