/* Utils for Rosalind problems in Picat. http://rosalind.info/ This model was created by Hakan Kjellerstrand, hakank@gmail.com See also my Picat page: http://www.hakank.org/picat/ */ module rosalind_utils. import v3_utils. import dcg_utils. import util. % main => go. rosalind_utils_test ?=> nl. rosalind_utils_test => true. % % Generate/parse a DNA string. % For generating strings this is too boring % except for shorter strings. % For larger strings use dna_gen*/k instead. % dna --> as ; cs ; gs ; ts. dna --> []. as --> ['A'], dna. cs --> ['C'], dna. gs --> ['G'], dna. ts --> ['T'], dna. % % Generate random DNA sequence. % Note, one cannot use fail directly since it's using random/2. % See examples in rosalind_dna.pi, rosalind_revc.pi, and rosalind_rna.pi. % dna_gen1([C|Cs]) --> {DNA = "ACGT", R = random(1,4), C = DNA[R]}, dna_gen1(Cs). dna_gen1([]) --> []. % % Parse a (Rosalind) FASTA file. % Returns a list of [FastaID, DNA] % parse_fasta(Fasta) = L => parse_fasta(L,Fasta,[]). parse_fasta1([Id,F]) --> ">", any_except(Id,"\n"), "\n", any_except(F1,">"), {F = delete_all(rstrip(F1),'\n')}. parse_fasta([F|Fs]) --> parse_fasta1(F), (parse_fasta(Fs) ; []). parse_fasta([]) --> []. % Also read the file. parse_fasta_file(File) = L => Fasta = read_file_chars(File), parse_fasta(L,Fasta,[]). % % Used in rosalind_cons.pi % consensus_and_profile(Fs,Map, Consensus,Profile) => NumFasta = Fs.len, StrLen = Fs[1].len, Profile = new_array(4,StrLen), bind_vars(Profile,0), foreach(I in 1..4) Char = Map.get(I), foreach(Pos in 1..StrLen) Profile[I,Pos] := sum([1 : F in 1..NumFasta, Fs[F,Pos] == Char] ) end end, Consensus = new_array(StrLen), foreach(Pos in 1..StrLen) Consensus[Pos] = Map.get(argmax([Profile[C,Pos] : C in 1..4])) end. % % pos is the position of the maximum element % argmax(X) = Pos => Pos1 = 1, foreach(I in 2..X.len) if X[I] > X[Pos1] then Pos1 := I end end, Pos = Pos1.