Package org.forester.surfacing
Class SurfacingUtil
java.lang.Object
org.forester.surfacing.SurfacingUtil
- 
Field Summary
Fields - 
Method Summary
Modifier and TypeMethodDescriptionstatic voidaddAllBinaryDomainCombinationToSet(GenomeWideCombinableDomains genome, SortedSet<BinaryDomainCombination> binary_domain_combinations) static voidaddAllDomainIdsToSet(GenomeWideCombinableDomains genome, SortedSet<String> domain_ids) static DescriptiveStatisticscalculateDescriptiveStatisticsForMeanValues(Set<DomainSimilarity> similarities) static voidcheckForOutputFileWriteability(File outfile) static voidcheckWriteabilityForPairwiseComparisons(DomainSimilarity.PRINT_OPTION domain_similarity_print_option, String[][] input_file_properties, String automated_pairwise_comparison_suffix, File outdir) static voidcollectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, BinaryDomainCombination.DomainCombinationType dc_type, List<BinaryDomainCombination> all_binary_domains_combination_gained, boolean get_gains) createDomainIdToGoIdMap(List<PfamToGoMapping> pfam_to_go_mappings) createDomainIdToSecondaryFeaturesMap(File secondary_features_map_file) static PhylogenycreateNjTreeBasedOnMatrixToFile(File nj_tree_outfile, DistanceMatrix distance) static StringBuildercreateParametersAsString(boolean ignore_dufs, double ie_value_max, double fs_e_value_max, int max_allowed_overlap, boolean no_engulfing_overlaps, File cutoff_scores_file, BinaryDomainCombination.DomainCombinationType dc_type) static voidstatic voiddecoratePrintableDomainSimilarities(SortedSet<DomainSimilarity> domain_similarities, DomainSimilarityCalculator.Detailedness detailedness) static voiddoit(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species, Map<String, List<Integer>> average_protein_lengths_by_dc) static voiddomainsPerProteinsStatistics(String genome, List<Protein> protein_list, DescriptiveStatistics all_genomes_domains_per_potein_stats, SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo, SortedSet<String> domains_which_are_always_single, SortedSet<String> domains_which_are_sometimes_single_sometimes_not, SortedSet<String> domains_which_never_single, Writer writer) static voidexecuteDomainLengthAnalysis(String[][] input_file_properties, int number_of_genomes, DomainLengthsTable domain_lengths_table, File outfile) static voidexecuteFitchGainsAnalysis(File output_file, List<BinaryDomainCombination> all_bin_domain_combinations_changed, int sum_of_all_domains_encountered, SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered, boolean is_gains_analysis) Warning: This side-effects 'all_bin_domain_combinations_encountered'!static voidexecuteParsimonyAnalysis(long random_number_seed_for_fitch_parsimony, boolean radomize_fitch_parsimony, String outfile_name, DomainParsimonyCalculator domain_parsimony, Phylogeny phylogeny, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, String parameters_str, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> positive_filter, boolean output_binary_domain_combinations_for_graphs, List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch, List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch, BinaryDomainCombination.DomainCombinationType dc_type, Map<String, DescriptiveStatistics> protein_length_stats_by_dc, Map<String, DescriptiveStatistics> domain_number_stats_by_dc, Map<String, DescriptiveStatistics> domain_length_stats_by_domain, Map<String, Integer> tax_code_to_id_map, boolean write_to_nexus, boolean use_last_in_fitch_parsimony, boolean perform_dc_fich) static voidexecuteParsimonyAnalysisForSecondaryFeatures(String outfile_name, DomainParsimonyCalculator secondary_features_parsimony, Phylogeny phylogeny, String parameters_str, Map<Species, MappingResults> mapping_results_map, boolean use_last_in_fitch_parsimony) static voidexecutePlusMinusAnalysis(File output_file, List<String> plus_minus_analysis_high_copy_base, List<String> plus_minus_analysis_high_copy_target, List<String> plus_minus_analysis_low_copy, List<GenomeWideCombinableDomains> gwcd_list, SortedMap<Species, List<Protein>> protein_lists_per_species, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, List<Object> plus_minus_analysis_numbers) static voidextractProteinNames(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species) static voidextractProteinNames(SortedMap<Species, List<Protein>> protein_lists_per_species, String domain_id, Writer out, String separator, String limit_to_species, double domain_e_cutoff) getAllDomainIds(List<GenomeWideCombinableDomains> gwcd_list) getDomainCounts(List<Protein> protein_domain_collections) static intstatic voidstatic Phylogeny[]obtainAndPreProcessIntrees(File[] intree_files, int number_of_genomes, String[][] input_file_properties) static PhylogenyobtainFirstIntree(File intree_file) static StringobtainHexColorStringDependingOnTaxonomyGroup(String tax_code, Phylogeny phy) static StringobtainTaxonomyGroup(String tax_code, Phylogeny species_tree) static voidperformDomainArchitectureAnalysis(SortedMap<String, Set<String>> domain_architecutures, SortedMap<String, Integer> domain_architecuture_counts, int min_count, File da_counts_outfile, File unique_da_outfile) static voidpreparePhylogeny(Phylogeny p, DomainParsimonyCalculator domain_parsimony, String date_time, String method, String name, String parameters_str) static voidpreparePhylogenyForParsimonyAnalyses(Phylogeny intree, String[][] input_file_properties) static voidprintOutPercentageOfMultidomainProteins(SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo, Writer log_writer) static voidprocessFilter(File filter_file, SortedSet<String> filter) static String[][]processInputGenomesFile(File input_genomes) static voidprocessPlusMinusAnalysisOption(CommandLineArguments cla, List<String> high_copy_base, List<String> high_copy_target, List<String> low_copy, List<Object> numbers) static voidprocessPlusMinusFile(File plus_minus_file, List<String> high_copy_base, List<String> high_copy_target, List<String> low_copy, List<Object> numbers) static StringBufferproteinToDomainCombinations(Protein protein, String protein_id, String separator) static intstoreDomainArchitectures(String genome, SortedMap<String, Set<String>> domain_architecutures, List<Protein> protein_list, Map<String, Integer> distinct_domain_architecuture_counts) static voidwriteAllDomainsChangedOnAllSubtrees(Phylogeny p, boolean get_gains, String outdir, String suffix_for_filename) static voidwriteBinaryDomainCombinationsFileForGraphAnalysis(String[][] input_file_properties, File output_dir, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order) static voidwriteBinaryStatesMatrixAsListToFile(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, Map<String, String> descriptions) static voidwriteBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, BinaryDomainCombination.OutputFormat bc_output_format) static voidwriteBinaryStatesMatrixToList(Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, boolean domain_combinations, CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, String title_for_html, String prefix_for_html, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> all_pfams_encountered, SortedSet<String> pfams_gained_or_lost, String suffix_for_per_node_events_file, Map<String, Integer> tax_code_to_id_map) static voidwriteDomainCombinationsCountsFile(String[][] input_file_properties, File output_dir, Writer per_genome_domain_promiscuity_statistics_writer, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order) static voidwriteDomainSimilaritiesToFile(StringBuilder html_desc, StringBuilder html_title, Writer simple_tab_writer, Writer single_writer, Map<Character, Writer> split_writers, SortedSet<DomainSimilarity> similarities, boolean treat_as_binary, List<Species> species_order, DomainSimilarity.PRINT_OPTION print_option, DomainSimilarity.DomainSimilarityScoring scoring, boolean verbose, Map<String, Integer> tax_code_to_id_map, Phylogeny phy, Set<String> pos_filter_doms) static voidwriteHtmlHead(Writer w, String title) static voidwriteMatrixToFile(File matrix_outfile, List<DistanceMatrix> matrices) static voidwriteMatrixToFile(CharacterStateMatrix<?> matrix, String filename, CharacterStateMatrix.Format format) static voidwritePhylogenyToFile(Phylogeny phylogeny, String filename) static voidwritePresentToNexus(File output_file, File positive_filter_file, SortedSet<String> filter, List<GenomeWideCombinableDomains> gwcd_list) static voidwriteProteinListsForAllSpecies(File output_dir, SortedMap<Species, List<Protein>> protein_lists_per_species, List<GenomeWideCombinableDomains> gwcd_list, double domain_e_cutoff, Set<String> pos_filter_doms) static void 
- 
Field Details
- 
PATTERN_SP_STYLE_TAXONOMY
 
 - 
 - 
Method Details
- 
addAllBinaryDomainCombinationToSet
public static void addAllBinaryDomainCombinationToSet(GenomeWideCombinableDomains genome, SortedSet<BinaryDomainCombination> binary_domain_combinations)  - 
addAllDomainIdsToSet
public static void addAllDomainIdsToSet(GenomeWideCombinableDomains genome, SortedSet<String> domain_ids)  - 
calculateDescriptiveStatisticsForMeanValues
public static DescriptiveStatistics calculateDescriptiveStatisticsForMeanValues(Set<DomainSimilarity> similarities)  - 
checkForOutputFileWriteability
 - 
checkWriteabilityForPairwiseComparisons
public static void checkWriteabilityForPairwiseComparisons(DomainSimilarity.PRINT_OPTION domain_similarity_print_option, String[][] input_file_properties, String automated_pairwise_comparison_suffix, File outdir)  - 
collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile
public static void collectChangedDomainCombinationsFromBinaryStatesMatrixAsListToFile(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, BinaryDomainCombination.DomainCombinationType dc_type, List<BinaryDomainCombination> all_binary_domains_combination_gained, boolean get_gains)  - 
createDomainIdToGoIdMap
 - 
createDomainIdToSecondaryFeaturesMap
public static Map<String,Set<String>> createDomainIdToSecondaryFeaturesMap(File secondary_features_map_file) throws IOException - Throws:
 IOException
 - 
createNjTreeBasedOnMatrixToFile
public static Phylogeny createNjTreeBasedOnMatrixToFile(File nj_tree_outfile, DistanceMatrix distance)  - 
createParametersAsString
public static StringBuilder createParametersAsString(boolean ignore_dufs, double ie_value_max, double fs_e_value_max, int max_allowed_overlap, boolean no_engulfing_overlaps, File cutoff_scores_file, BinaryDomainCombination.DomainCombinationType dc_type)  - 
createSplitWriters
public static void createSplitWriters(File out_dir, String my_outfile, Map<Character, Writer> split_writers) throws IOException- Throws:
 IOException
 - 
createTaxCodeToIdMap
 - 
decoratePrintableDomainSimilarities
public static void decoratePrintableDomainSimilarities(SortedSet<DomainSimilarity> domain_similarities, DomainSimilarityCalculator.Detailedness detailedness)  - 
doit
public static void doit(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species, Map<String, List<Integer>> average_protein_lengths_by_dc) throws IOException- Throws:
 IOException
 - 
domainsPerProteinsStatistics
public static void domainsPerProteinsStatistics(String genome, List<Protein> protein_list, DescriptiveStatistics all_genomes_domains_per_potein_stats, SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo, SortedSet<String> domains_which_are_always_single, SortedSet<String> domains_which_are_sometimes_single_sometimes_not, SortedSet<String> domains_which_never_single, Writer writer)  - 
executeDomainLengthAnalysis
public static void executeDomainLengthAnalysis(String[][] input_file_properties, int number_of_genomes, DomainLengthsTable domain_lengths_table, File outfile) throws IOException - Throws:
 IOException
 - 
executeFitchGainsAnalysis
public static void executeFitchGainsAnalysis(File output_file, List<BinaryDomainCombination> all_bin_domain_combinations_changed, int sum_of_all_domains_encountered, SortedSet<BinaryDomainCombination> all_bin_domain_combinations_encountered, boolean is_gains_analysis) throws IOException Warning: This side-effects 'all_bin_domain_combinations_encountered'!- Parameters:
 output_file-all_bin_domain_combinations_changed-sum_of_all_domains_encountered-all_bin_domain_combinations_encountered-is_gains_analysis-protein_length_stats_by_dc-- Throws:
 IOException
 - 
executeParsimonyAnalysis
public static void executeParsimonyAnalysis(long random_number_seed_for_fitch_parsimony, boolean radomize_fitch_parsimony, String outfile_name, DomainParsimonyCalculator domain_parsimony, Phylogeny phylogeny, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, String parameters_str, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> positive_filter, boolean output_binary_domain_combinations_for_graphs, List<BinaryDomainCombination> all_binary_domains_combination_gained_fitch, List<BinaryDomainCombination> all_binary_domains_combination_lost_fitch, BinaryDomainCombination.DomainCombinationType dc_type, Map<String, DescriptiveStatistics> protein_length_stats_by_dc, Map<String, DescriptiveStatistics> domain_number_stats_by_dc, Map<String, DescriptiveStatistics> domain_length_stats_by_domain, Map<String, Integer> tax_code_to_id_map, boolean write_to_nexus, boolean use_last_in_fitch_parsimony, boolean perform_dc_fich) - Parameters:
 all_binary_domains_combination_lost_fitch-use_last_in_fitch_parsimony-perform_dc_fich-consider_directedness_and_adjacency_for_bin_combinations-all_binary_domains_combination_gained- if null ignored, otherwise this is to list all binary domain combinations which were gained under unweighted (Fitch) parsimony.
 - 
executeParsimonyAnalysisForSecondaryFeatures
public static void executeParsimonyAnalysisForSecondaryFeatures(String outfile_name, DomainParsimonyCalculator secondary_features_parsimony, Phylogeny phylogeny, String parameters_str, Map<Species, MappingResults> mapping_results_map, boolean use_last_in_fitch_parsimony)  - 
executePlusMinusAnalysis
public static void executePlusMinusAnalysis(File output_file, List<String> plus_minus_analysis_high_copy_base, List<String> plus_minus_analysis_high_copy_target, List<String> plus_minus_analysis_low_copy, List<GenomeWideCombinableDomains> gwcd_list, SortedMap<Species, List<Protein>> protein_lists_per_species, Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, List<Object> plus_minus_analysis_numbers)  - 
extractProteinNames
public static void extractProteinNames(List<Protein> proteins, List<String> query_domain_ids_nc_order, Writer out, String separator, String limit_to_species) throws IOException - Throws:
 IOException
 - 
extractProteinNames
public static void extractProteinNames(SortedMap<Species, List<Protein>> protein_lists_per_species, String domain_id, Writer out, String separator, String limit_to_species, double domain_e_cutoff) throws IOException- Throws:
 IOException
 - 
getAllDomainIds
 - 
getDomainCounts
 - 
getNumberOfNodesLackingName
 - 
log
 - 
obtainAndPreProcessIntrees
 - 
obtainFirstIntree
 - 
obtainHexColorStringDependingOnTaxonomyGroup
public static String obtainHexColorStringDependingOnTaxonomyGroup(String tax_code, Phylogeny phy) throws IllegalArgumentException - Throws:
 IllegalArgumentException
 - 
obtainTaxonomyGroup
public static String obtainTaxonomyGroup(String tax_code, Phylogeny species_tree) throws IllegalArgumentException - Throws:
 IllegalArgumentException
 - 
performDomainArchitectureAnalysis
 - 
preparePhylogeny
 - 
preparePhylogenyForParsimonyAnalyses
 - 
printOutPercentageOfMultidomainProteins
 - 
processFilter
 - 
processInputGenomesFile
 - 
processPlusMinusAnalysisOption
 - 
processPlusMinusFile
 - 
proteinToDomainCombinations
public static StringBuffer proteinToDomainCombinations(Protein protein, String protein_id, String separator)  - 
sortDomainsWithAscendingConfidenceValues
 - 
storeDomainArchitectures
 - 
writeAllDomainsChangedOnAllSubtrees
public static void writeAllDomainsChangedOnAllSubtrees(Phylogeny p, boolean get_gains, String outdir, String suffix_for_filename) throws IOException - Throws:
 IOException
 - 
writeBinaryDomainCombinationsFileForGraphAnalysis
public static void writeBinaryDomainCombinationsFileForGraphAnalysis(String[][] input_file_properties, File output_dir, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order)  - 
writeBinaryStatesMatrixAsListToFile
public static void writeBinaryStatesMatrixAsListToFile(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, Map<String, String> descriptions)  - 
writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis
public static void writeBinaryStatesMatrixAsListToFileForBinaryCombinationsForGraphAnalysis(CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, BinaryDomainCombination.OutputFormat bc_output_format)  - 
writeBinaryStatesMatrixToList
public static void writeBinaryStatesMatrixToList(Map<String, List<GoId>> domain_id_to_go_ids_map, Map<GoId, GoTerm> go_id_to_term_map, GoNameSpace go_namespace_limit, boolean domain_combinations, CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix, CharacterStateMatrix.GainLossStates state, String filename, String indentifier_characters_separator, String character_separator, String title_for_html, String prefix_for_html, Map<String, Set<String>>[] domain_id_to_secondary_features_maps, SortedSet<String> all_pfams_encountered, SortedSet<String> pfams_gained_or_lost, String suffix_for_per_node_events_file, Map<String, Integer> tax_code_to_id_map)  - 
writeDomainCombinationsCountsFile
public static void writeDomainCombinationsCountsFile(String[][] input_file_properties, File output_dir, Writer per_genome_domain_promiscuity_statistics_writer, GenomeWideCombinableDomains gwcd, int i, GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order)  - 
writeDomainSimilaritiesToFile
public static void writeDomainSimilaritiesToFile(StringBuilder html_desc, StringBuilder html_title, Writer simple_tab_writer, Writer single_writer, Map<Character, Writer> split_writers, SortedSet<DomainSimilarity> similarities, boolean treat_as_binary, List<Species> species_order, DomainSimilarity.PRINT_OPTION print_option, DomainSimilarity.DomainSimilarityScoring scoring, boolean verbose, Map<String, throws IOExceptionInteger> tax_code_to_id_map, Phylogeny phy, Set<String> pos_filter_doms) - Throws:
 IOException
 - 
writeHtmlHead
- Throws:
 IOException
 - 
writeMatrixToFile
public static void writeMatrixToFile(CharacterStateMatrix<?> matrix, String filename, CharacterStateMatrix.Format format)  - 
writeMatrixToFile
 - 
writePhylogenyToFile
 - 
writePresentToNexus
 - 
writeProteinListsForAllSpecies
 - 
writeTaxonomyLinks
public static void writeTaxonomyLinks(Writer writer, String species, Map<String, Integer> tax_code_to_id_map) throws IOException- Throws:
 IOException
 
 -