% maple |\^/| Maple 18 (X86 64 LINUX) ._|\| |/|_. Copyright (c) Maplesoft, a division of Waterloo Maple Inc. 2014 \ MAPLE / All rights reserved. Maple is a trademark of <____ ____> Waterloo Maple Inc. | Type ? for help. HomeLib := /home/gptesler/Maple/packages > read `multidebruijn_demo.maple`; > ############################################################################### # Sample values to use to demonstrate the software ############################################################################### > # multiplicity of each k-mer > m1 := 2; m1 := 2 > # alphabet size > q1 := 2; q1 := 2 > # word size > k1 := 2; k1 := 2 > > m1_divisors := convert(divisors(m1),list); m1_divisors := [1, 2] > # alphabet_type=false: first q characters of "0123..." # alphabet_type=true: first q characters of "abcd..." > alphabet_type1 := false; alphabet_type1 := false > ############################################################################### # Number of multi de Bruijn sequences of various types ############################################################################### > # |L(m,q,k)| > Blinear(m1,q1,k1); 36 > # |L_y(m,q,k)| > Blinear_y(m1,q1,k1); 9 > # |LC(m,q,k)| > Blinearized_all(m1,q1,k1); 36 > # |LC_y^{(d)}(m,q,k)| using d=1 for acyclic sequences > Blinearized_y_d(m1,q1,k1,1); 8 > # |C^{(d)}(m,q,k)| using d=1 for acyclic sequences > Bcyclic_d(m1,q1,k1,1); 4 > # |LC_y^{(d)}(m,q,k)|, breaking down |LC_y(m,q,k)| by order d: > Matrix([["d",op(m1_divisors)], > ["LC_y^{(d)}(m,q,k)", > op(map(d1 -> Blinearized_y_d(m1,q1,k1,d1), m1_divisors))]] > ); [ "d" 1 2] [ ] ["LC_y^{(d)}(m,q,k)" 8 1] > # |C^{(d)}(m,q,k)|, breaking down |C(m,q,k)| by order d: > Matrix([["d",op(m1_divisors)], > ["LC_y^{(d)}(m,q,k)", > op(map(d1 -> Bcyclic_d(m1,q1,k1,d1), m1_divisors))]] > ); [ "d" 1 2] [ ] ["LC_y^{(d)}(m,q,k)" 4 1] > ############################################################################### # Find LC_{0^k}(m,q,k) and C(m,q,k) by brute force and print out the results. # LC_{0^k}(m,q,k) = all linearized multi de Bruijn sequences # that start with k-mer 0...0. # # Prints out all elements of LC_{0^k}(m,q,k). # The ones selected as representatives for C(m,q,k) are marked "*". ############################################################################### > > list_mdb_brute_force(m1,q1,k1); 00010111* 00011011* 00011101* 00100111* 00101110 00110011* 00110110 00111001 00111010 [9, 5] > # Total number of sequences found should match # theoretical formula for |LC_{0^k}(m,q,k)| > Blinearized_y(m1,q1,k1); 9 > # Number of sequences marked with "*" should match # theoretical formula for |C(m,q,k)| > Bcyclic(m1,q1,k1); 5 > # return representatitives of C(m,q,k) as a list of strings > list_mdb_brute_force(m1,q1,k1,return_seqs=true); ["00010111", "00011011", "00011101", "00100111", "00110011"] > ############################################################################### # Multicyclic sequences ############################################################################### > # |MC(m,q,k)| > Bmulticyclic(m1,q1,k1); 36 > # List all multicyclic sequences # Returns [list of multicyclic sequences, list of EBWT of each] > list_all_multicyclic_mdb(m1,q1,k1); [[["0", "0", "01", "01", "1", "1"], ["0", "0", "01", "011", "1"], ["0", "0", "01", "0111"], ["0", "0", "01011", "1"], ["0", "0", "010111"], ["0", "0", "011", "011"], ["0", "001", "01", "1", "1"], ["0", "001", "011", "1"], ["0", "001", "0111"], ["0", "001011", "1"], ["0", "0010111"], ["0", "0011", "011"], ["0", "00101", "1", "1"], ["0", "001101", "1"], ["0", "0011101"], ["0", "0011", "01", "1"], ["0", "00111", "01"], ["0", "0011011"], ["0001", "01", "1", "1"], ["0001", "011", "1"], ["0001", "0111"], ["0001011", "1"], ["00010111"], ["00011", "011"], ["000101", "1", "1"], ["0001101", "1"], ["00011101"], ["00011", "01", "1"], ["000111", "01"], ["00011011"], ["001", "001", "1", "1"], ["001", "0011", "1"], ["001", "00111"], ["0010011", "1"], ["00100111"], ["0011", "0011"]], ["00110011", "00110101", "00110110", "00111001", "00111010", "00111100", "01010011", "01010101", "01010110", "01011001", "01011010", "01011100", "01100011", "01100101", "01100110", "01101001", "01101010", "01101100", "10010011", "10010101", "10010110", "10011001", "10011010", "10011100", "10100011", "10100101", "10100110", "10101001", "10101010", "10101100", "11000011", "11000101", "11000110", "11001001", "11001010", "11001100"]] > ############################################################################### # Uniform random sequences of each type # Since these are random, results will change ############################################################################### > # Random element of C(m,q,k), cyclic multi de Bruijn seqs > random_cyclic_mdb(m1,q1,k1,alphabet_type1); "00110011" > # Random element of L(m,q,k), linear multi de Bruijn seqs > random_linear_mdb(m1,q1,k1,alphabet_type1); "010001110" > # Random element of MC(m,q,k), multicyclic de Bruijn seqs > random_multicyclic_mdb(m1,q1,k1,alphabet_type1); "(00011)(01)(1)" > # Random element of LC_{0^k}(m,q,k), linearized multi de Bruijn seqs > random_linearized_mdb(m1,q1,k1,alphabet_type1); "00101110" > # Random element of LC_y(m,q,k) with a different y # Use y = 01111...1 = (q^(k-1)-1)/(q-1) as a k digit base q number > y1 := (q1^(k1-1)-1)/(q1-1); y1 := 1 > random_linearized_mdb(m1,q1,k1,alphabet_type1,root=y1); "01110100" > ############################################################################### # Random linearized sequence, method of Kendal et al, BEST Theorem ############################################################################### > # Parameters m=2, q=3, k=3, > m2 :=2; q2 := 3; k2 := 3; m2 := 2 q2 := 3 k2 := 3 > # first edge "021", which is base q=3 representation of 7 > root2 := 7; root2 := 7 > # first vertex is the prefix "02" of that, which is base q=3 # representation of 2 > rootv2_num := 2; rootv2_str := "02"; rootv2_num := 2 rootv2_str := "02" > # Since these are random, they will differ from the values in the # paper, and successive calls to random functions will not be # coordinated with each other. > # Generate a random spanning tree of G(m,q,k) > random_db_tree(q2,k2,rootv2_num); [1, 1, 0, 2, 0, 2, 0, 1, 1] > # Generate a random linearization # (however, not coordinated with the random spanning tree # just generated) > random_linearized_mdb(m2,q2,k2,alphabet_type1,root=root2); "021112120222000202001212211120122011011022210210001010" > # Above steps will come out different each time. # The one generated in the paper: > random_seq2 := "021202210112012122202010211211101110022200100012210200"; random_seq2 := "021202210112012122202010211211101110022200100012210200" > # Corresponding exit table used in BEST Theorem bijection > exit_table2 := string2exittable(m2,q2,k2,random_seq2); exit_table2 := table(["20" = "212100", "01" = "120102", "11" = "221010", "00" = "210102", "10" = "121002", "02" = "120120", "12" = "001212", "21" = "202110", "22" = "120201" ]) > # Convert exit table back to string in L(m,q,k) # This is "linear" rather than "linearized", so the # initial (k-1)-mer ("02" in this case) is repeated # at the end and the sequence is longer by (k-1). > exittable2string(m2,q2,k2,exit_table2,rootv2_str); "02120221011201212220201021121110111002220010001221020002" > > ############################################################################### # Burrows Wheeler Transform and Extended BWT ############################################################################### > # Forward and inverse transforms. # The inverse will give the lex least rotation, which may differ from # the original input. > s1 := "hello"; s1 := "hello" > w1 := bwt_encode(s1); w1 := "hoell" > bwt_decode(w1); "elloh" > # BWT tables T_B(s), T_{IB}(w) > bwt_encode_table(s1); ["elloh", "hello", "llohe", "lohel", "ohell"] > bwt_decode_table(w1); ["elloh", "hello", "llohe", "lohel", "ohell"] > # EBWT acts on multisets of cycles. # The inverse uses the lex least rotation of each cycle, # and puts the cycle in a particular order (not standard lex order), # so it may differ from the original input > sigma2 := ["hello","hello","goodbye"]; sigma2 := ["hello", "hello", "goodbye"] > w2 := ebwt_encode(sigma2); w2 := "doyhheooeellollgb" > ebwt_decode(w2); ["byegood", "elloh", "elloh"] > # EBWT tables T_E(sigma), T_{IB}(w) > ebwt_encode_table(sigma2); ["byegoodbyegoodbyegoodbyegoodbyegood", "dbyegoodbyegoodbyegoodbyegoodbyegoo", "egoodbyegoodbyegoodbyegoodbyegoodby", "ellohellohellohellohellohellohelloh", "ellohellohellohellohellohellohelloh", "goodbyegoodbyegoodbyegoodbyegoodbye", "hellohellohellohellohellohellohello", "hellohellohellohellohellohellohello", "llohellohellohellohellohellohellohe", "llohellohellohellohellohellohellohe", "lohellohellohellohellohellohellohel", "lohellohellohellohellohellohellohel", "odbyegoodbyegoodbyegoodbyegoodbyego", "ohellohellohellohellohellohellohell", "ohellohellohellohellohellohellohell", "oodbyegoodbyegoodbyegoodbyegoodbyeg", "yegoodbyegoodbyegoodbyegoodbyegoodb"] > ebwt_decode_table(w2); ["byegoodbyegoodbyegoodbyegoodbyegood", "dbyegoodbyegoodbyegoodbyegoodbyegoo", "egoodbyegoodbyegoodbyegoodbyegoodby", "ellohellohellohellohellohellohelloh", "ellohellohellohellohellohellohelloh", "goodbyegoodbyegoodbyegoodbyegoodbye", "hellohellohellohellohellohellohello", "hellohellohellohellohellohellohello", "llohellohellohellohellohellohellohe", "llohellohellohellohellohellohellohe", "lohellohellohellohellohellohellohel", "lohellohellohellohellohellohellohel", "odbyegoodbyegoodbyegoodbyegoodbyego", "ohellohellohellohellohellohellohell", "ohellohellohellohellohellohellohell", "oodbyegoodbyegoodbyegoodbyegoodbyeg", "yegoodbyegoodbyegoodbyegoodbyegoodb"] > # number of columns in inverse EBWT table > ebwt_decode_num_columns(w2); 35 > > ############################################################################### # Properties of (E)BWT ############################################################################### > # inverse of a1^d a2^d a3^d ... vs. inverse of a1 a2 a3 ... > bwt_encode("mississippi"); "pssmipissii" > # string t may be a rotation of the original input > bwt_decode("pssmipissii"); "imississipp" > # On replacing each letter by d copies, # the inverse becomes d concatenates of t > bwt_decode("ppssssmmiippiissssiiii"); "imississippimississipp" > > # EBWT transform > ebwt_encode(["0","0","1","01101"]); "00111001" > # inverse is a multicyclic sequence sigma # and may be represented differently than the original input, # but is equivalent > ebwt_decode("00111001"); ["0", "0", "01011", "1"] > # On replacing each letter by d copies, the inverse becomes # sigma^d: Multiplicity of each cycle is multiplied by d > ebwt_decode("0000111111000011"); ["0", "0", "0", "0", "01011", "01011", "1", "1"] > ############################################################################### # Standard permutation ############################################################################### > # Example in paper # returns [pi, inverse of pi, cycle form of pi, EBWT^{-1}(w)] # where pi is the standard permutation > standard_perm("10010101"); [[1, 2, 4, 6, 0, 3, 5, 7], [4, 0, 1, 5, 2, 6, 3, 7], [[0, 1, 2, 4], [3, 6, 5], [7]], ["0001", "011", "1"]] > quit;