pdb文件分离蛋白与小分子

分离只有单个的蛋白与单个小分子

from Bio.PDB import PDBParser, PDBIO

# 创建PDB解析器
parser = PDBParser()

# 读取PDB文件
structure = parser.get_structure('example', r"F*****ctA.pdb")

# 创建蛋白质和小分子两个结构体
protein = []
ligands = []

# 遍历模型中的每个链
for chain in structure1.get_chains():

    # 遍历链中的每个残基
    for residue in chain.get_residues():

        # 判断当前残基是否为蛋白质
        if residue.get_resname() in ["ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"]:
            protein.append(residue)
        else:
            ligands.append(residue)

# 将蛋白质和小分子写入不同的文件
if protein:
    writer = PDBIO()
    writer.set_structure(protein[0].get_parent())
    writer.save(r'F****\protein1.pdb')
if ligands:
    writer = PDBIO()
    writer.set_structure(ligands[0].get_parent())
    writer.save(r'F:\***\ligands1.pdb')

多个蛋白链的pdb ，只提取出蛋白

with open("input.pdb", "r") as f:
    pdb_lines = f.readlines()

pdb_lines = [line for line in pdb_lines if line.startswith("ATOM")]  # 只保留ATOM行

chains = []
for line in pdb_lines:
    chain_id = line[21]
    if chain_id not in chains:
        chains.append(chain_id)

new_pdb_lines = []
for line in pdb_lines:
    chain_id = line[21]
    if chain_id in chains:
        new_pdb_lines.append(line)

with open("output.pdb", "w") as f:
    f.writelines(new_pdb_lines)

原文链接：https://blog.csdn.net/weixin_42357472/article/details/129536065