package core; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Random; public class SequenceGenerator { public String[] degeneratedSequences; public String[] informationOfDegenSequences; public String prefix; public String suffix; public String[] allMotifs; Map translator; public SequenceGenerator(String mainSequence, String prefix, String suffix, int amountOfSequences, String[] allMotifs) throws IOException{ this.prefix = prefix; this.suffix = suffix; this.allMotifs = allMotifs; generate(mainSequence, amountOfSequences, allMotifs); } public static String getStringOfPossibilities(int[] sequenceInt, Map mapp) { //Returns a string with a digit between 1 and 4 at every place, which corresponds to the places in the int[] sequenceInt //and stands for the size of the set at every place. String possibilities = ""; for (int i = 0; i < sequenceInt.length; i++) { possibilities = possibilities.concat( String.valueOf( ((String)mapp.get(Integer. valueOf(sequenceInt[i]))) .length())); } return possibilities; } public static int[] stringToBinary(String motif, Map charToInt) //Translates a string of DNA to a array of binary numbers { int[] binaryArray = new int[motif.length()]; for (int i = 0; i < motif.length(); i++) { binaryArray[i] = ((Integer)charToInt.get(Character.valueOf(motif.charAt(i)))).intValue(); } return binaryArray; } public static boolean isDifferent(int[] sequence, int[] motif, int offset) { //Determines, whether the letter (that is, set) of the motif is smaller or equal in size to its corresponding letter //on the sequence. Returns true, if one of the letters of the motif is bigger in size. boolean difference = false; for (int i = 0; i < motif.length; i++) { if ((sequence[(i + offset)] & motif[i]) == 0) { difference = true; break; } difference = false; } return difference; } public static Map invertMap(Map input) { //Creates an HashMap, in which the sides between the two values are switched Map inverse = new HashMap(); for (Map.Entry entry : input.entrySet()) { Character key = (Character)entry.getKey(); Integer value = (Integer)entry.getValue(); inverse.put(value, key); } return inverse; } public static String reverse(String input) { String output = ""; output = new StringBuilder(input).reverse().toString(); return output; } public static String complement(String input) { String output = ""; for (int i = 0; i < input.length(); i++) { if (input.charAt(i) == 'A') { output = output.concat("T"); } else if (input.charAt(i) == 'C') { output = output.concat("G"); } else if (input.charAt(i) == 'G') { output = output.concat("C"); } else if (input.charAt(i) == 'T') { output = output.concat("A"); } else if (input.charAt(i) == 'N') { output = output.concat("N"); } else if (input.charAt(i) == 'D') { output = output.concat("H"); } else if (input.charAt(i) == 'V') { output = output.concat("B"); } else if (input.charAt(i) == 'B') { output = output.concat("V"); } else if (input.charAt(i) == 'H') { output = output.concat("D"); } else if (input.charAt(i) == 'W') { output = output.concat("W"); } else if (input.charAt(i) == 'S') { output = output.concat("S"); } else if (input.charAt(i) == 'K') { output = output.concat("M"); } else if (input.charAt(i) == 'M') { output = output.concat("K"); } else if (input.charAt(i) == 'Y') { output = output.concat("R"); } else if (input.charAt(i) == 'R') { output = output.concat("Y"); } else { System.out.println("Problems in complementation"); } } return output; } public static String getMotivlessSequence(String DNAsequence, String[] allMotifs, Map charToInt, Random dice, int prefixLength, int suffixLength, Map binToPossibilities, int numberOfCall) { //Compares every section of the DNA sequence to the motifs and changes the sequence, if one motif is a possible product of //the degenerate sequence by replacing one letter in the string DNAsequence with the one that isn't included in the corresponding //letter of the motif. //It ignores the prefix, so this is a possible source of error. Map invertedMap = invertMap(charToInt); int[] intSequence = stringToBinary(DNAsequence, charToInt); for (int count = 0; count < allMotifs.length; count++) { int[] intMotif = stringToBinary(allMotifs[count], charToInt); int[][] positions = new int[4][allMotifs[count].length() + 1]; int pointer1 = 0; int pointer2 = 0; int pointer3 = 0; int pointer4 = 0; String tempString = ""; for (int i = Math.max(prefixLength - allMotifs[count].length(), 0); i < Math.min(DNAsequence.length() - suffixLength, DNAsequence.length() - allMotifs[count].length()); i++) { if (!isDifferent(intSequence, intMotif, i)) { tempString = getStringOfPossibilities( intSequence, binToPossibilities).substring( i, i + allMotifs[count].length()); //This block counts the amount of letters (that is, sets) with a size of 1, 2, 3 or 4. pointer1 = 0; pointer2 = 0; pointer3 = 0; pointer4 = 0; for (int l = 0; l < allMotifs[count].length(); l++) { if (tempString.charAt(l) == '1') { positions[0][pointer1] = l; pointer1++; } else if (tempString.charAt(l) == '2') { if ((intSequence[(l + i)] & (intMotif[l] ^ 0xFFFFFFFF)) != 0) { positions[1][pointer2] = l; pointer2++; } } else if (tempString.charAt(l) == '3') { if ((intSequence[(l + i)] & (intMotif[l] ^ 0xFFFFFFFF)) != 0) { positions[2][pointer3] = l; pointer3++; } } else if ((tempString.charAt(l) == '4') && ((intSequence[(l + i)] & (intMotif[l] ^ 0xFFFFFFFF)) != 0)) { positions[3][pointer4] = l; pointer4++; } } DNAsequence = introduceNoBias(pointer1, allMotifs, count, dice, intSequence, i, positions, intMotif, numberOfCall, DNAsequence, invertedMap); } } } return DNAsequence; } public static String introduceNoBias(int pointer1, String[] allMotifs, int count, Random dice, int[] intSequence, int i, int[][] positions, int[] intMotif, int numberOfCall, String DNAsequence, Map invertedMap){ if (pointer1 == intMotif.length) { System.out.println("PROBLEM! Part of sequence " + numberOfCall + " is equal to motif " + allMotifs[count] + "."); }else{ int[] nonIdenticalPositions = new int[intMotif.length]; int nonIdenticalPointer = 0; for(int h = 0; h charToInt = new HashMap(); charToInt.put(Character.valueOf('A'), Integer.valueOf(8)); charToInt.put(Character.valueOf('C'), Integer.valueOf(4)); charToInt.put(Character.valueOf('G'), Integer.valueOf(2)); charToInt.put(Character.valueOf('T'), Integer.valueOf(1)); charToInt.put(Character.valueOf('N'), Integer.valueOf(15)); charToInt.put(Character.valueOf('D'), Integer.valueOf(11)); charToInt.put(Character.valueOf('V'), Integer.valueOf(14)); charToInt.put(Character.valueOf('B'), Integer.valueOf(7)); charToInt.put(Character.valueOf('H'), Integer.valueOf(13)); charToInt.put(Character.valueOf('W'), Integer.valueOf(9)); charToInt.put(Character.valueOf('S'), Integer.valueOf(6)); charToInt.put(Character.valueOf('K'), Integer.valueOf(3)); charToInt.put(Character.valueOf('M'), Integer.valueOf(12)); charToInt.put(Character.valueOf('Y'), Integer.valueOf(5)); charToInt.put(Character.valueOf('R'), Integer.valueOf(10)); Map binToPossibilities = new HashMap(); binToPossibilities.put(Integer.valueOf(8), "A"); binToPossibilities.put(Integer.valueOf(4), "C"); binToPossibilities.put(Integer.valueOf(2), "G"); binToPossibilities.put(Integer.valueOf(1), "T"); binToPossibilities.put(Integer.valueOf(15), "ACGT"); binToPossibilities.put(Integer.valueOf(11), "AGT"); binToPossibilities.put(Integer.valueOf(14), "ACG"); binToPossibilities.put(Integer.valueOf(7), "CGT"); binToPossibilities.put(Integer.valueOf(13), "ACT"); binToPossibilities.put(Integer.valueOf(9), "AT"); binToPossibilities.put(Integer.valueOf(6), "CG"); binToPossibilities.put(Integer.valueOf(3), "GT"); binToPossibilities.put(Integer.valueOf(12), "AC"); binToPossibilities.put(Integer.valueOf(5), "CT"); binToPossibilities.put(Integer.valueOf(10), "AG"); Random dice = new Random(); translator = new HashMap(); translator.put(Character.valueOf('A'), "A"); translator.put(Character.valueOf('C'), "C"); translator.put(Character.valueOf('G'), "G"); translator.put(Character.valueOf('T'), "T"); translator.put(Character.valueOf('N'), "ACGT"); translator.put(Character.valueOf('D'), "AGT"); translator.put(Character.valueOf('V'), "ACG"); translator.put(Character.valueOf('B'), "CGT"); translator.put(Character.valueOf('H'), "ACT"); translator.put(Character.valueOf('W'), "AT"); translator.put(Character.valueOf('S'), "CG"); translator.put(Character.valueOf('K'), "GT"); translator.put(Character.valueOf('M'), "AC"); translator.put(Character.valueOf('Y'), "CT"); translator.put(Character.valueOf('R'), "AG"); mainSequence = mainSequence.toUpperCase(); if(containsNonDNA(mainSequence)){ System.out.println("The sequence contains non-DNA letters"); return; } prefix = prefix.toUpperCase(); if(containsNonDNA(prefix)){ System.out.println("The prefix sequence contains non-DNA letters"); return; } suffix = suffix.toUpperCase(); if(containsNonDNA(suffix)){ System.out.println("The suffix sequence contains non-DNA letters"); return; } mainSequence = expandShortFormOfSequence(mainSequence); //In this loop, the sequences are generated degeneratedSequences = new String[amountOfSequences]; informationOfDegenSequences = new String[amountOfSequences]; for (int k = 0; k < amountOfSequences; k++) { String firstSequence = ""; firstSequence = firstSequence.concat(prefix).concat(mainSequence).concat(suffix); firstSequence = getMotivlessSequence(firstSequence, allMotifs, charToInt, dice, prefix.length(), suffix.length(), binToPossibilities, k); degeneratedSequences[k] = firstSequence; informationOfDegenSequences[k] = "> Sequence number: " + (k + 1) + " Possibilities per base: " + countPossibleSequences(firstSequence); } } }