Phenylketonuria/Task2 Scripts
Identity_Evalue.jar
Identity_Evalue.jar is a Java program that filters BLAST, PSIBLAST and HHblits outputs for the e-values and identities.
Usage:
java -jar Identity_Evalue.jar
-i <input-file> : Output of a BLAST, PSIBLAST or HHblits run
-o_identity <output-file> : Outputfile containing the identities
-o_evalue <output-file> : Outputfile containing e-values
Java-code:
<source lang="java">
package identity;
import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections;
/**
* * Class that filters HHblits, BLAST and PSIBLAST output files for e-value and identity in percent. * @author Sonja Waldraff * */
public class Identity_Evalue{
private static String i; private static String o_identity; private static boolean peId = false; private static String o_evalue; private static boolean ev = false;
private static ArrayList<Double> percentageIdentity = new ArrayList<Double>();
private static ArrayList<Double> eValue = new ArrayList<Double>();
/** * read Input-Parameters -i: Input-file, -o_evalue: Output-file returns a * List of the e-values, -o_identity: Output-file returns a List of identity * in percent -hhblits|blast: information if the input-file is a blast(/psiblast) or a * hhblits output * * @param args */ public static void getCMDargs(String[] args) { int j = 0; if (args.length >= 4) { while (j < args.length) { if (args[j].equals("-i")) { j++; i = new String(args[j]); } else if (args[j].equals("-o_identity")) { j++; o_identity = new String(args[j]); peId = true; } else if (args[j].equals("-o_evalue")) { j++; o_evalue = new String(args[j]); ev = true; } j++;
} } else { System.out .println("You can call the program like: -i <input-file (result of blast/psiblast/hhblits)> -o_identity <output-file for identity percentage> -o_evalue <output-file for evalue>"); } }
/** * Methods that reads the input file and filters for the e-value and identity in % * @param inputfile * @throws Exception */ private static void read_file(String inputfile) throws Exception { String line; try { FileReader sfreader = new FileReader(inputfile); BufferedReader bfreader = new BufferedReader(sfreader); while ((line = bfreader.readLine()) != null){ if(!line.startsWith("#")){ String[] x = line.split("\t"); eValue.add(Double.valueOf(x[1])); percentageIdentity.add(Double.valueOf(x[2])); } } Collections.sort(percentageIdentity); Collections.sort(eValue);
sfreader.close(); bfreader.close(); } catch (IndexOutOfBoundsException e) { e.printStackTrace(); }
}
/** * Method that writes e-values and identities in their outputfiles * @throws Exception */ private static void write_file() throws Exception { //if an output file for identities is claimed if (peId) { PrintWriter OUT = new PrintWriter(new FileWriter(new File( o_identity))); for (double i : percentageIdentity) { OUT.println(i); } OUT.close(); } //if an output file for e-values is claimed if (ev) { PrintWriter OUT2 = new PrintWriter( new FileWriter(new File(o_evalue))); for (double d : eValue) { OUT2.println(d); } OUT2.close(); } }
/** * Main-method * @param args * @throws Exception */ public static void main(String[] args) throws Exception { getCMDargs(args); read_file(i); write_file(); } } </source>
GO_comparison.jar
Java-code: <source lang="java"> package identity;
import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet;
/**
* * Class that compares the GO-terms of P00439 to the GO-terms of sequences found * in BLAST, PSIBLAST or HHblits runs. * * @author Sonja Waldraff * */
public class GO_comparison {
private static String i; private static String o; private static String reference_id; private static boolean hhblits = false; private static boolean blast = false;
private static ArrayList<String> id = new ArrayList<String>(); private static HashMap<String, Integer> numberGO = new HashMap<>();
/** * read Input-Parameters -i: Input-file, -o: Output-file, -reference_id: * UniProt-ID of the reference sequence, -hhblits|-blast if input was result * of a hhblits or a (psi-)blast run * * @param args */ public static void getCMDargs(String[] args) { int j = 0; if (args.length == 7) { while (j < args.length) { if (args[j].equals("-i")) { j++; i = new String(args[j]); } else if (args[j].equals("-o")) { j++; o = new String(args[j]); } else if (args[j].equals("-reference_id")) { j++; reference_id = new String(args[j]); } else if (args[j].equals("-hhblits")) { hhblits = true; } else if (args[j].equals("-blast")) { blast = true; } j++; } } else { System.out.println("You can call the program like: -i <input-file special parsed (psi-)blast/hhblits outputs> -o <output-file> -reference_id <uniprot-id of the reference sequence> -hhblits|-blast"); } }
/** * Method that reads the input file per line and filters for the IDs of the * found sequences. * * @param inputfile * @throws Exception */ private static void read_file(String inputfile) throws Exception { String line; try { FileReader sfreader = new FileReader(inputfile); BufferedReader bfreader = new BufferedReader(sfreader);
if (hhblits) { while ((line = bfreader.readLine()) != null) { if (!line.startsWith("#")) { String[] x = line.split("\t"); id.add(x[0]); }
} } if (blast) { while ((line = bfreader.readLine()) != null) { if (line.startsWith("tr") || line.startsWith("sp")) { String[] x = line.split("\\|"); id.add(x[1]); }
} } sfreader.close(); bfreader.close(); } catch (IndexOutOfBoundsException e) { e.printStackTrace(); }
}
private static void compareGOs(HashSet<String> go_PAH) throws Exception { // countKeys and countNotKeys to check, if all IDs can be found in // QuickGO int countKeys = 0; int countNotKeys = 0; System.out.println(id.size()); int s_i=0; // comparison of the reference GOs with the GOs of the other sequences for (String s : id) { HashSet<String> newTerms = ReadQuickGO.readHTMLPage(s); s_i++; System.out.println(s_i); if (newTerms == null) { countNotKeys++; } else { countKeys++; for (String go_i : go_PAH) { if (newTerms.contains(go_i)) { numberGO.put(go_i, numberGO.get(go_i) + 1); } } } }
System.out.println("found Keys: " + countKeys + "\nnot found:" + countNotKeys); }
/** * Main-method which compares the GO terms of the reference sequence to the * GO terms of the other found sequences downloaded from QuickGo with the * class ReadQuickGO * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { getCMDargs(args); read_file(i); // GO-terms of the reference sequence HashSet<String> go_PAH = ReadQuickGO.readHTMLPage(reference_id); for (String i : go_PAH) { numberGO.put(i, 0); }
// comparison of the GO-Terms compareGOs(go_PAH);
// output-file is written with the GO-terms and their number of // occurrence in the other sequences. PrintWriter OUT = new PrintWriter(new FileWriter(new File(o))); OUT.println("GO\t#occurrence");
for (String s : go_PAH) { OUT.println(s + "\t" + numberGO.get(s)); } OUT.close();
} }
package identity;
import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; import java.util.HashSet;
public class ReadQuickGO {
/** * reads the HTML Page of QuickGO to download the GO annotations of the given UniProt ID. * @param uniProtID * @return set of GOterms annotated for this UniProt ID * @throws Exception */ public static HashSet<String> readHTMLPage(String uniProtID) throws Exception { HashSet<String> goTerms = new HashSet<String>(); BufferedReader reader = null; //HTML-Link String urlString = "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein=" + uniProtID + "&format=tsv"; URL url = new URL(urlString); try { reader = new BufferedReader(new InputStreamReader(url.openStream())); //finding the column with GO-ID int index=-1; String[] go_id = reader.readLine().split("\t");
for(int i=0;i<go_id.length;i++){
if(go_id[i].equals("GO ID")){ index=i; } } if(index==-1){ throw new Exception("No GO ID found for " + uniProtID); } //saving the GO-Terms
String line;
while ((line = reader.readLine()) != null) {
String[] lineSplit = line.split("\t"); goTerms.add(lineSplit[index]); }
reader.close();
} catch (Exception e) { throw new Exception("Could not find any GO-term for " + uniProtID); } //Returning HashSet with GO-Terms return goTerms; } }
</source>