KEGG注释脚本kofam2kegg.py--脚本010

发布于:2025-04-15 ⋅ 阅读:(23) ⋅ 点赞:(0)

采用kofam结合kegg官网htxt进行注释

用法: 

python kofam2kegg.py kofam.out ath00001.keg my_kegg_output

 code:

import sys
from collections import defaultdict

def parse_kofam_file(kofam_file):
    ko_to_genes = defaultdict(list)
    with open(kofam_file) as f:
        for line in f:
            parts = line.strip().split('\t')
            if len(parts) == 2:
                gene, ko = parts
                ko_to_genes[ko].append(gene)
    return ko_to_genes

def parse_keg_file(keg_file):
    ko_to_pathway = defaultdict(list)
    pathway_info = {}
    level1 = level2 = pathway = ''
    pathway_id = ''
    with open(keg_file) as f:
        for line in f:
            line = line.strip()
            if line.startswith('A'):
                level1 = line[1:].strip()
            elif line.startswith('B'):
                level2 = line[1:].strip()
            elif line.startswith('C'):
                parts = line.split()
                pathway = ' '.join(parts[1:-1])
                pathway_id = parts[-1].split(':')[-1]
                pathway_info[pathway_id] = {
                    'Pathway': pathway,
                    'Level1': level1,
                    'Level2': level2
                }
            elif line.startswith('D'):
                parts = line.split('\t')
                if len(parts) == 2:
                    ko = parts[1].split()[0]
                    ko_to_pathway[ko].append(pathway_id)
    return ko_to_pathway, pathway_info

def main(kofam_file, keg_file, output_file):
    ko_to_genes = parse_kofam_file(kofam_file)
    ko_to_pathway, pathway_info = parse_keg_file(keg_file)

    pathway_dict = defaultdict(lambda: {'genes': set(), 'kos': set()})

    for ko, genes in ko_to_genes.items():
        if ko in ko_to_pathway:
            for pw_id in ko_to_pathway[ko]:
                pathway_dict[pw_id]['genes'].update(genes)
                pathway_dict[pw_id]['kos'].add(ko)

    with open(output_file, 'w') as out:
        out.write("Pathway\tGeneCount\tPathway ID\tLevel 1\tLevel 2\tKOs\tGenes\n")
        for pw_id, data in pathway_dict.items():
            info = pathway_info.get(pw_id, {})
            out.write(f"{info.get('Pathway', '')}\t{len(data['genes'])}\tko{pw_id}\t"
                      f"{info.get('Level1', '')}\t{info.get('Level2', '')}\t"
                      f"{';'.join(sorted(data['kos']))}\t{';'.join(sorted(data['genes']))}\n")

if __name__ == "__main__":
    if len(sys.argv) != 4:
        print("用法: python 1.py kofam.out keg_file output_file")
    else:
        main(sys.argv[1], sys.argv[2], sys.argv[3])


网站公告

今日签到

点亮在社区的每一天
去签到