edryd.org

some of my neat stuff
git clone git://edryd.org/edryd.org
Log | Files | Refs | LICENSE

commit 275a5c90c099c7df22c63e448195ec10a823544e
parent 5c70560ba956ab0abdefb5135c43d52bd6129c79
Author: Ed van Bruggen <edvb@uw.edu>
Date:   Sun, 27 Jan 2019 00:30:26 -0800

Add aln2grishin script

Diffstat:
_data/projects.yml | 2++
projects/aln2grishin.md | 49+++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/_data/projects.yml b/_data/projects.yml @@ -22,6 +22,8 @@ main: - name: 'tim' desc: 'extendable personal assistant' - header: 'scripts' + - name: 'aln2grishin' + desc: 'convert alignment files for Rosetta simulations' - name: 'body' desc: 'print the body of a file' - name: 'ed' diff --git a/projects/aln2grishin.md b/projects/aln2grishin.md @@ -0,0 +1,49 @@ +--- +title: aln2grishin +description: convert alignment files for Rosetta simulations +--- + +```python +#!/usr/bin/env python +""" +Convert clustal alignment files to grishin for use in Rosetta protein +simulations + +Author: Ed van Bruggen <edvb@uw.edu> +""" + +import argparse +from argparse import RawTextHelpFormatter + +parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) +parser.add_argument('--file', type=str, required=True, + help='input clustal alignment file') +parser.add_argument('--target', metavar='POS', type=int, default=1, + help='position of target protein (default: 1)') +args = parser.parse_args() + +aln = open(args.file) +proteins = [] + +for i, line in enumerate(aln): + if i == 0 or line == '\n' or line[0] == ' ': + continue + words = line.split() + skip = 0 + for protein in proteins: + if protein[0] == words[0]: + protein[1] += words[1] + skip = 1 + continue + if not skip: + proteins.append([words[0], words[1]]) + +target = proteins[args.target - 1] + +for protein in proteins: + if protein == target: + continue + grishin = open(target[0] + "_" + protein[0] + ".grishin", "w") + grishin.write("## %s %s_thread\n#\nscores from program: 0\n0 %s\n0 %s\n" % + (target[0], protein[0], target[1], protein[1])) +```