# -*- coding: utf-8 -*- import numpy as np import os import sys def analyze_dssp_output(filename, structured_elements=None): """ Analyze DSSP output file to calculate percentage of time each residue spends in structured elements. """ # Default structured elements: helices and sheets if structured_elements is None: structured_elements = ['H', 'G', 'I', 'E', 'B'] # Read and parse the file residues = [] ss_data = [] with open(filename, 'r') as f: for line in f: # Skip comment lines if line.startswith('#'): # Try to extract residue numbers from header if 'RES' in line: parts = line.split() residues = [p for p in parts if p.startswith('RES')] continue # Parse data lines parts = line.split() if len(parts) > 1: ss_data.append(parts[1:]) # Convert to numpy array ss_array = np.array(ss_data) n_frames, n_residues = ss_array.shape # Calculate percentage of structured elements for each residue structured_percentage = [] for i in range(n_residues): count_structured = np.sum([1 for ss in ss_array[:, i] if ss in structured_elements]) percentage = (float(count_structured) / n_frames) * 100 structured_percentage.append(percentage) # Create residue labels if not residues: residues = ["Res{}".format(i+1) for i in range(n_residues)] # Create a dictionary with results results = dict(zip(residues, structured_percentage)) return results, structured_percentage, residues, n_frames def save_results_to_file(results, percentages, residues, n_frames, output_file="dssp_analysis_results.txt"): """ Save the analysis results to a text file. """ with open(output_file, 'w') as f: f.write("DSSP Secondary Structure Analysis Results\n") f.write("==========================================\n\n") f.write("Total frames analyzed: {}\n".format(n_frames)) f.write("Structured elements considered: H, G, I, E, B (helices and sheets)\n\n") f.write("Residue Structured Percentage:\n") f.write("-----------------------------\n") for res, perc in results.items(): f.write("{}: {:.2f}%\n".format(res, perc)) # Calculate and write overall statistics avg_structured = np.mean(percentages) std_structured = np.std(percentages) f.write("\nOverall statistics:\n") f.write("------------------\n") f.write("Average time in structured elements: {:.2f}%\n".format(avg_structured)) f.write("Standard deviation: {:.2f}%\n".format(std_structured)) # Identify most and least structured residues max_idx = np.argmax(percentages) min_idx = np.argmin(percentages) f.write("Most structured residue: {} ({:.2f}%)\n".format(residues[max_idx], percentages[max_idx])) f.write("Least structured residue: {} ({:.2f}%)\n".format(residues[min_idx], percentages[min_idx])) # Identify consistently structured regions f.write("\nConsistently structured regions (>80% structured):\n") f.write("-----------------------------------------------\n") threshold = 80 in_region = False region_start = 0 for i, perc in enumerate(percentages): residue_num = i + 1 if perc >= threshold and not in_region: in_region = True region_start = residue_num elif perc < threshold and in_region: in_region = False region_end = residue_num - 1 f.write("Residues {}-{}\n".format(region_start, region_end)) # If we're still in a region at the end if in_region: f.write("Residues {}-{}\n".format(region_start, len(percentages))) # Save data for potential plotting with other tools f.write("\n\nData for plotting (residue number vs percentage):\n") f.write("-----------------------------------------------\n") f.write("Residue\tPercentage\n") for i, perc in enumerate(percentages): f.write("{}\t{:.2f}\n".format(i+1, perc)) print "Results saved to {}".format(output_file) return output_file # Main execution if __name__ == "__main__": # Get filename from command line argument or use default if len(sys.argv) > 1: filename = sys.argv[1] else: filename = "dssp.dat" print "No filename provided, using default: dssp.dat" # Check if file exists if not os.path.isfile(filename): print "Error: File '{}' not found!".format(filename) print "Please provide a valid filename as an argument." print "Usage: python script.py [filename]" sys.exit(1) # Analyze the DSSP output file results, percentages, residues, n_frames = analyze_dssp_output(filename) # Print results to console print "Residue Structured Percentage:" for res, perc in results.items(): print "{}: {:.2f}%".format(res, perc) # Calculate and print overall statistics avg_structured = np.mean(percentages) std_structured = np.std(percentages) print "\nOverall average time in structured elements: {:.2f}% (±{:.2f}%)".format(avg_structured, std_structured) # Identify most and least structured residues max_idx = np.argmax(percentages) min_idx = np.argmin(percentages) print "Most structured residue: {} ({:.2f}%)".format(residues[max_idx], percentages[max_idx]) print "Least structured residue: {} ({:.2f}%)".format(residues[min_idx], percentages[min_idx]) # Save detailed results to file output_file = save_results_to_file(results, percentages, residues, n_frames) # Additional analysis: Identify consistently structured regions print "\nConsistently structured regions (>80% structured):" threshold = 80 in_region = False region_start = 0 for i, perc in enumerate(percentages): residue_num = i + 1 if perc >= threshold and not in_region: in_region = True region_start = residue_num elif perc < threshold and in_region: in_region = False region_end = residue_num - 1 print " Residues {}-{}".format(region_start, region_end) # If we're still in a region at the end if in_region: print " Residues {}-{}".format(region_start, len(percentages)) print "\nFor visualization, you can:" print "1. Import the data from {} into Excel or another spreadsheet program".format(output_file) print "2. Use the 'Data for plotting' section to create a line chart" print "3. Alternatively, install matplotlib with: pip install matplotlib"