EnzyHTP_1.0/Class_Structure.py at master · ChemBioHTP/EnzyHTP_1.0 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import numpy as np
import os, re
from math import ceil
from Class_line import PDB_line
from Class_Conf import Config
from helper import Child, get_center, get_distance, line_feed, mkdir
from AmberMaps import *
try:
    import openbabel
    import openbabel.pybel as pybel
except ImportError:
    raise ImportError('OpenBabel not installed.')
__doc__='''
This module extract and operate structural infomation from PDB
# will replace some local function in PDB class in the future.
-------------------------------------------------------------------------------------
Class Structure
-------------------------------------------------------------------------------------
Class Chain
-------------------------------------------------------------------------------------
Class Residue
-------------------------------------------------------------------------------------
Class Atom
-------------------------------------------------------------------------------------
Class Metalatom
-------------------------------------------------------------------------------------
Class Ligand
-------------------------------------------------------------------------------------
Class Solvent
===============
'''

class Structure():
    '''
    initilize from
    PDB:        Structure.fromPDB(input_obj, input_type='path' or 'file' or 'file_str')
    raw data:   Structure(chains=None, metalatoms=None, ligands=None)
    ------------
    chains = [chain_obj, ...]
    metalatoms = [metalatom_obj, ...]
    ligands = [ligand, ...]
    solvents = [solvent, ...]
    # maybe add solvent in the future. mimic the metal treatment
    ------------
    METHOD
    ------------
    if_metalatom

    # Move in in the future
    if_art_resi
    if_ligand
    if_complete
    ligand

    get_metal_center
    get_art_resi
    add
    sort
    build

    protonation_metal_fix

    get_all_protein_atom

    ---------
    Special Method
    ---------
    __len__
        len(obj) = len(obj.child_list)
    TODO support better way to operate chains metelatoms ligands (e.g.: search by name, ... instead of just use the list id) (*The list id will have problem since the ligand, metal, and solvent are read in a reversed manner.)
    '''

    '''
    ====
    init
    ====
    '''

    def __init__(self, chains=[], metalatoms=[], ligands=[], solvents=[], name = None):
        '''
        Common part of init methods: direct from data objects
        '''
        if len(chains) == 0 and len(metalatoms) == 0 and len(ligands) == 0 and len(solvents) == 0:
            raise ValueError('need at least one input')

        self.chains = []
        self.metalatoms = []
        self.ligands = []
        self.metal_centers = []
        self.solvents = []

        # Add parent pointer and combine into a whole
        for chain in chains:
            chain.set_parent(self)
            self.chains.append(chain)
        for metalatom in metalatoms:
            metalatom.set_parent(self)
            self.metalatoms.append(metalatom)
        for ligand in ligands:
            ligand.set_parent(self)
            self.ligands.append(ligand)
        for solvent in solvents:
            solvent.set_parent(self)
            self.solvents.append(solvent)
        self.name = name

    @classmethod
    def fromPDB(cls, input_obj, input_type='path', input_name = None, ligand_list = None):
        '''
        extract the structure from PDB path. Capable with raw experimental and Amber format
        ---------
        input = path (or file or file_str)
            split the file_str to chain and init each chain
        ligand_list: ['NAME',...]
            User specific ligand names. Only extract these if provided.
        ---------
        Target:
        - structure(w/name)  - chain - residue - atom
                            |- metalatom(atom)
                            |- ligand(residue)
                            |- solvent(residue)
        - ... (add upon usage)
        '''

        # adapt general input // converge to file_str
        if input_type == 'path':
            f = open(input_obj)
            file_str = f.read()
            f.close()
        if input_type == 'file':
            file_str = input_obj.read()
        if input_type == 'file_str':
            file_str = input_obj

        raw_chains = []
        # get raw chains
        chains_str = file_str.split(line_feed+'TER') # Note LF is required
        for index, chain_str in enumerate(chains_str):
            if chain_str.strip() != 'END' and chain_str.strip() != '':
                Chain_index = chr(65+index) # Covert to ABC using ACSII mapping
                # Generate chains
                raw_chains.append(Chain.fromPDB(chain_str, Chain_index))
        # clean chains
        # clean metals
        raw_chains_woM, metalatoms = cls._get_metalatoms(raw_chains, method='1')
        # clean ligands
        raw_chains_woM_woL, ligands = cls._get_ligands(raw_chains_woM, ligand_list=ligand_list)
        # clean solvent
        raw_chains_woM_woL_woS, solvents = cls._get_solvents(raw_chains_woM_woL)

        ####### debug ##########
        if Config.debug > 1:
            for chain in raw_chains_woM_woL_woS:
                print('Structure.fromPDB: final chain sequence: '+chain.id, chain.get_chain_seq(Oneletter=1))
            for metal in metalatoms:
                print('Structure.fromPDB: final metal recorded '+metal.name)
            for ligand in ligands:
                print('Structure.fromPDB: final ligand recorded ' + ligand.name)

        return cls(raw_chains_woM_woL_woS, metalatoms, ligands, solvents, input_name)


    @classmethod
    def _get_metalatoms(cls, raw_chains, method='1'):
        '''
        get metal from raw chains and clean chains by deleting the metal part
        -----
        Method 1:   Assume metal can be in any chain
                    Assume all resiude have unique index.
        (Slow but general)

        Method 2: Assume metal can only be in a seperate chain
        (fast but limited)
        '''
        metalatoms = []
        if method == '1':
            for chain in raw_chains:
                for i in range(len(chain)-1,-1,-1):
                    # operate in residue level
                    residue = chain[i]
                    if residue.name in Metal_map.keys():
                        # add a logger in the future
                        print('\033[1;34;40mStructure: found metal in raw: '+chain.id+' '+residue.name+' '+str(residue.id)+' \033[0m')
                        metalatoms.append(residue)
                        del chain[i]
        if method == '2':
            # Not finished yet
            pass

        # Break pseudo residues into atoms and convert to Metalatom object
        holders = []
        for pseudo_resi in metalatoms:
            for metal in pseudo_resi:
                holders.append(Metalatom.fromAtom(metal))
        metalatoms = holders

        # clean empty chains
        for i in range(len(raw_chains)-1,-1,-1):
            if len(raw_chains[i]) == 0:
                del raw_chains[i]

        return raw_chains, metalatoms

    @classmethod
    def _get_ligands(cls, raw_chains, ligand_list = None):
        '''
        get ligand from raw chains and clean chains by deleting the ligand part
        -----
        ligand_list: only record user specified ligand if provided
        Method: Assume metal/ligand/solvent can only be in a seperate chain (or it can not be distinguish from artificial residues.)
                - delete names from rd_non_ligand_list
                + get names from ligand_list only if provided.
        '''
        ligands = []
        for chain in raw_chains:
            #determine if a metal/ligand/solvent chain
            if_HET_chain = 1
            for resi in chain:
                if resi.name in Resi_map2:
                   if_HET_chain = 0
                   break
            if if_HET_chain:
                for i in range(len(chain)-1,-1,-1):
                    # operate in residue level
                    residue = chain[i]

                    # User defined ligand
                    if ligand_list is not None:
                        if residue.name in ligand_list:
                            # add a logger in the future
                            print('\033[1;34;40mStructure: found user assigned ligand in raw: '+chain.id+' '+residue.name+' '+str(residue.id)+' \033[0m')
                            ligands.append(residue)
                            del chain[i]
                    else:
                        if residue.name not in rd_solvent_list:
                            if residue.name not in rd_non_ligand_list:
                                print('\033[1;34;40mStructure: found ligand in raw: '+chain.id+' '+residue.name+' '+str(residue.id)+' \033[0m')
                                ligands.append(residue)
                            del chain[i]


        # Convert pseudo residues to Ligand object
        holders = []
        for pseudo_resi in ligands:
            holders.append(Ligand.fromResidue(pseudo_resi))
        ligands = holders

        # clean empty chains
        for i in range(len(raw_chains)-1,-1,-1):
            if len(raw_chains[i]) == 0:
                del raw_chains[i]

        return raw_chains, ligands

    @classmethod
    def _get_solvents(cls, raw_chains):
        '''
        get solvent from raw chains and clean chains by deleting the solvent part
        -----
        Method: Assume metal/ligand/solvent can anywhere. Base on rd_solvent_list
        '''
        solvents = []
        for chain in raw_chains:
            for i in range(len(chain)-1,-1,-1):
                # operate in residue level
                residue = chain[i]
                if residue.name in rd_solvent_list:
                    if Config.debug > 1:
                        print('\033[1;34;40mStructure: found solvent in raw: '+residue.name+' '+str(residue.id)+' \033[0m')
                    solvents.append(residue)
                    del chain[i]

        # Convert pseudo residues to Ligand object
        holders = []
        for pseudo_resi in solvents:
            holders.append(Solvent.fromResidue(pseudo_resi))
        solvents = holders

        # clean empty chains
        for i in range(len(raw_chains)-1,-1,-1):
            if len(raw_chains[i]) == 0:
                del raw_chains[i]

        return raw_chains, solvents

    '''
    ====
    Methods
    ====
    '''
    def get_metal_center(self):
        '''
        Extract metal centers from metalatoms. Judged by the MetalCenter_map
        save to self.metal_centers
        return self.metal_centers
        '''
        self.metal_centers = []
        for metal in self.metalatoms:
            if metal.resi_name in MetalCenter_map:
                self.metal_centers.append(metal)
        return self.metal_centers


    def get_art_resi(self):
        '''
        find art_resi
        '''
        pass


    def add(self, obj, id=None, sort=0):
        '''
        1. judge obj type (go into the list)
        2. assign parent
        3. id
        if sort:
            clean original id (use a place holder to represent last)
        if not None:
            assign id
        if sort and not None:
            mark as id+i
        4. add to corresponding list
                                                    sort
                  |     |               0             |                     1               |
         assigned |  0  |   keep (for direct output)  | clean (for sort)                    |
                  |  1  |  assign (for direct output) | mark  (for relative order in sort ) |
        '''
        # list
        if type(obj) == list:

            obj_ele=obj[0]

            if type(obj_ele) != Chain and type(obj_ele) != Metalatom and type(obj_ele) != Ligand and type(obj_ele) != Solvent:
                raise TypeError('structure.Add() method only take Chain / Metalatom / Ligand / Solvent')

            # add parent and clean id (if sort) assign id (if assigned) leave mark if sort and assigned
            #                         sort
            #          |     |    0     |   1   |
            # assigned |  0  |   keep   | clean |
            #          |  1  |  assign  | mark  |
            for i in obj:
                i.set_parent(self)
                if sort:
                    if id != None:
                        i.id = str(id)+'i' #str mark
                    else:
                        i.id = id #None
                else:
                    if id != None:
                        i.id=id

            if type(obj_ele) == Chain:
                self.chains.extend(obj)
            if type(obj_ele) == Metalatom:
                self.metalatoms.extend(obj)
            if type(obj_ele) == Ligand:
                self.ligands.extend(obj)
            if type(obj_ele) == Solvent:
                self.solvents.extend(obj)

        # single building block
        else:
            if type(obj) != Chain and type(obj) != Metalatom and type(obj) != Ligand and type(obj) != Solvent:
                raise TypeError('structure.Add() method only take Chain / Metalatom / Ligand / Solvent')

            obj.set_parent(self)
            if sort:
                if id != None:
                    obj.id = str(id)+'i' #str mark
                else:
                    obj.id = id #None
            else:
                if id != None:
                    obj.id=id

            if type(obj) == Chain:
                self.chains.append(obj)
            if type(obj) == Metalatom:
                self.metalatoms.append(obj)
            if type(obj) == Ligand:
                self.ligands.append(obj)
            if type(obj) == Solvent:
                self.solvents.append(obj)

        if sort:
            self.sort()


    def sort(self, if_local = 0):
        '''
        assign index according to current items
        chain.id
        resi.id
        atom.id
        -----------
        Chain/Residue level:
            Base on the order of the old obj.id
            and potential insert mark from add (higher than same number without the mark)
            *if added object has same id and is not assigned with a insert mark -- place after a original one.
        Atom level:
            base on the parent order (parent.id):
            chains -> metalatoms -> ligands
            residue.id within each above.
            list order within each residues.
        '''
        if if_local:
            # sort chain order
            self.chains.sort(key=lambda chain: chain.id)
            # rename each chain
            for index, chain in enumerate(self.chains):
                chain.id = chr(65+index) # Covert to ABC using ACSII mapping
                # sort each chain
                chain.sort()

            # sort ligand // Do I really need?
            for ligand in self.ligands:
                ligand.sort() #Do nothing
        else:
            r_id = 0
            a_id = 0
            for chain in self.chains:
                for res in chain:
                    r_id +=1
                    res.id = r_id
                    for atom in res:
                        a_id +=1
                        atom.id = a_id
            for metal in self.metalatoms:
                a_id +=1
                metal.id = a_id
            for lig in self.ligands:
                r_id +=1
                lig.id = r_id
                for atom in lig:
                    a_id +=1
                    atom.id = a_id
            for sol in self.solvents:
                r_id += 1
                sol.id = r_id
                for atom in sol:
                    a_id += 1
                    atom.id = a_id


    def build(self, path, ff='AMBER', forcefield='ff14SB', keep_id = 0):
        '''
        build PDB after the change based on the chosen format and forcefield
        - line based on atom and contain chain index and residue index
        ----------------------------
        ff =
        AMBER (standard amber format: from tleap examples)
            - resi and atom indexes start from 1 and DO NOT reset reaching a new chain.
            - use atom and residue names from amber force field.
            - place metal, ligand, solvent in seperate chains (seperate with TER)
            - ligand -> metal -> solvent order
            * do not sort atomic order in a residue like tleap does.
        '''
        with open(path, 'w') as of:
            if ff == 'AMBER':
                if not keep_id:
                    a_id = 0
                    r_id = 0
                    for chain in self.chains:
                        #write chain
                        for resi in chain:
                            r_id = r_id+1
                            for atom in resi:
                                a_id = a_id + 1 #current line index
                                line = atom.build(a_id= a_id, r_id = r_id, ff=ff, forcefield=forcefield)
                                of.write(line)
                        #write TER after each chain
                        of.write('TER'+line_feed)

                    c_id = chr(len(self.chains)+64)

                    for ligand in self.ligands:
                        r_id = r_id + 1
                        c_id = chr(ord(c_id)+1)

                        for atom in ligand:
                            a_id = a_id + 1
                            line = atom.build(a_id= a_id, r_id = r_id, c_id = c_id, ff=ff, forcefield=forcefield)
                            of.write(line)
                        of.write('TER'+line_feed)

                    for metal in self.metalatoms:
                        a_id = a_id + 1
                        r_id = r_id + 1
                        c_id = chr(ord(c_id)+1)

                        line = metal.build(a_id= a_id, r_id = r_id, c_id = c_id, ff=ff, forcefield=forcefield)
                        of.write(line)
                        of.write('TER'+line_feed)

                    if len(self.solvents) != 0:
                        c_id = chr(ord(c_id)+1) # same chain_id for all solvent
                        for solvent in self.solvents:
                            r_id = r_id + 1
                            for atom in solvent:
                                a_id = a_id + 1
                                line = atom.build(a_id= a_id, r_id = r_id, c_id = c_id, ff=ff, forcefield=forcefield)
                                of.write(line)
                        of.write('TER'+line_feed)

                else:
                    for chain in self.chains:
                        #write chain
                        for resi in chain:
                            for atom in resi:
                                line = atom.build(ff=ff, forcefield=forcefield)
                                of.write(line)
                        #write TER after each chain
                        of.write('TER'+line_feed)

                    c_id = chr(len(self.chains)+64)

                    for ligand in self.ligands:
                        c_id = chr(ord(c_id)+1)
                        for atom in ligand:
                            line = atom.build(c_id = c_id, ff=ff, forcefield=forcefield)
                            of.write(line)
                        of.write('TER'+line_feed)

                    for metal in self.metalatoms:
                        c_id = chr(ord(c_id)+1)
                        line = metal.build(c_id = c_id, ff=ff, forcefield=forcefield)
                        of.write(line)
                        of.write('TER'+line_feed)

                    if len(self.solvents) != 0:
                        c_id = chr(ord(c_id)+1) # chain_id for all solvent
                        for solvent in self.solvents:
                            for atom in solvent:
                                line = atom.build(c_id = c_id, ff=ff, forcefield=forcefield)
                                of.write(line)
                        of.write('TER'+line_feed)


            if ff == 'XXX':
                #place holder
                pass

            of.write('END'+line_feed)


    def build_ligands(self, dir, ft='PDB', ifcharge=0 ,c_method='PYBEL', ph=7.0, ifname=0, ifunique=0):
        '''
        build files for every ligand in self.ligands
        -------
        dir      : output dir. (e.g. File path for ligand i is $dir/ligand_i.pdb)
        ft       : file type / now support: PDB(default)
        ifcharge : if calculate net charge info. (do not before add H)
        c_method : method determining the net charge (default: PYBEL)
        ph       : pH value used for determine the net charge
        ifname   : export residue name if 1 (default: 0)
        ifunique : 1: only build one ligand if there's multiple same ones. 0: build every ligand
        '''
        out_ligs = []

        l_id = 0
        lig_list = self.get_all_ligands(ifunique=ifunique)

        for lig in lig_list: # TODO make this a method of Ligand.
            l_id = l_id + 1 # current ligand id
            # make output path
            if dir[-1] == '/':
                dir = dir[:-1]
            if ifunique:
                out_path = dir+'/ligand_'+lig.name+'.pdb'
            else:
                out_path = dir+'/ligand_'+str(l_id)+'_'+lig.name+'.pdb'
            # write
            lig.build(out_path, ft=ft)
            # net charge
            net_charge=None
            if ifcharge:
                if lig.net_charge != None:
                    net_charge = lig.net_charge
                else:
                    net_charge = lig.get_net_charge(method=c_method, ph=ph, o_dir=dir)

            # record
            if ifname:
                out_ligs.append((out_path, net_charge, lig.name))
            else:
                out_ligs.append((out_path, net_charge))

        return out_ligs


    def build_protein(self, dir, ft='PDB'):
        '''
        build only protein and output under the dir
        -------
        dir: out put dir ($dir/protein.pdb)
        ft : file type / now support: PDB(default)
        '''
        # make path
        if dir[-1] == '/':
            dir = dir[:-1]
        out_path = dir+'/protein.pdb'

        #write
        if ft == 'PDB':
            with open(out_path,'w') as of:
                a_id = 0
                r_id = 0
                for chain in self.chains:
                    #write chain
                    for resi in chain:
                        r_id = r_id+1
                        for atom in resi:
                            a_id = a_id + 1 #current line index
                            line = atom.build(a_id= a_id, r_id = r_id)
                            of.write(line)
                    #write TER after each chain
                    of.write('TER'+line_feed)
                of.write('END'+line_feed)
        else:
            raise Exception('Support only PDB output now.')

        return out_path


    def build_metalcenters(self, dir, ft='PDB'):
        '''
        build metalcenters only. Use for MCPB parameterization. Deal with donor residue with different protonation states.        ----------
        TODO
        '''
        out_paths = []
        return out_paths


    def get_connect(self, metal_fix = 1, ligand_fix = 1, prepi_path=None):
        '''
        get connectivity
        -----------------
        TREATMENT
        chain: based on connectivity map of each atom in each residue
        metalatom:  fix1: treat as isolated atom
                    fix2: connect to donor atom (MCPB?)
        ligand: fix1: use antechamber generated prepin file to get connectivity.
                      according to https://ambermd.org/doc/prep.html the coordniate line will always start at the 11th line after 3 DUMM.
        '''
        # san check
        if ligand_fix == 1 and prepi_path == None:
            raise Exception('Ligand fix 1 requires prepin_path.')
        # chain part
        for chain in self.chains:
            for res in chain:
                for atom in res:
                    atom.get_connect()
        for sol in self.solvents:
            for atom in sol:
                atom.get_connect()
        # metal
        for metal in self.metalatoms:
            metal.connect = []
        if metal_fix == 1:
            pass
        if metal_fix == 2:
            raise Exception('TODO: Still working on 2 right now')

        # ligand
        # init
        for lig in self.ligands:
            for atom in lig:
                atom.connect = []
        # fix 1
        if ligand_fix == 1:
            for lig in self.ligands:
                # read prepin for each ligand
                with open(prepi_path[lig.name]) as f:
                    line_id = 0
                    if_loop = 0
                    for line in f:
                        line_id += 1
                        if line.strip() == '':
                            if if_loop == 1:
                                # switch off loop and break if first blank after LOOP encountered
                                if_loop = 0
                                break
                            continue
                        if if_loop:
                            lp = line.strip().split()
                            lig._find_atom_name(lp[0]).connect.append(lig._find_atom_name(lp[1]))
                            continue
                        # loop connect starts at LOOP
                        if line.strip() == 'LOOP':
                            if_loop = 1
                            continue
                        # coord starts at 11th
                        if line_id >= 11:
                            lp = line.strip().split()
                            atom_id = int(lp[0])-3
                            atom_cnt = int(lp[4])-3
                            if atom_cnt != 0:
                                lig[atom_id-1].connect.append(lig[atom_cnt-1])
                                lig[atom_cnt-1].connect.append(lig[atom_id-1])


    def get_connectivty_table(self, ff='GAUSSIAN', metal_fix = 1, ligand_fix = 1, prepi_path=None):
        '''
        get connectivity table with atom index based on 'ff' settings:
        ff = GAUSSIAN  -- continuous atom index start from 1, do not seperate by chain
        -------------------
        TREATMENT
        Use original atom.id.
            chain: based on connectivity map of each atom in each residue
            metalatom:  fix1: treat as isolated atom
                        fix2: connect to donor atom (MCPB?)
            ligand: fix1: use antechamber generated prepin file to get connectivity.
        Use 1.0 for all connection.
            Amber force field in gaussian do not account in bond order. (Only UFF does.)
            Note that bond order less than 0.1 do not count in MM but only in opt redundant coordinate.
        '''
        connectivty_table = ''
        # get connect for every atom in stru
        self.get_connect(metal_fix, ligand_fix, prepi_path)

        # write str in order
        # Note: Only write the connected atom with larger id
        a_id = 0
        for chain in self.chains:
            for res in chain:
                for atom in res:
                    a_id += 1
                    cnt_line = ' '+str(atom.id)
                    # san check
                    if atom.id != a_id:
                        raise Exception('atom id error.')
                    for cnt_atom in atom.connect:
                        if cnt_atom.id > atom.id:
                            cnt_line += ' '+str(cnt_atom.id)+' '+'1.0'
                    connectivty_table += cnt_line+line_feed

        for lig in self.ligands:
            for atom in lig:
                a_id += 1
                cnt_line = ' '+str(atom.id)
                # san check
                if atom.id != a_id:
                    raise Exception('atom id error.')
                for cnt_atom in atom.connect:
                    if cnt_atom.id > atom.id:
                        cnt_line += ' '+str(cnt_atom.id)+' '+'1.0'
                connectivty_table += cnt_line+line_feed

        for atom in self.metalatoms:
            a_id += 1
            cnt_line = ' '+str(atom.id)
            # san check
            if atom.id != a_id:
                raise Exception('atom id error.')
            for cnt_atom in atom.connect:
                if cnt_atom.id > atom.id:
                    cnt_line += ' '+str(cnt_atom.id)+' '+'1.0'
            connectivty_table += cnt_line+line_feed

        for sol in self.solvents:
            for atom in sol:
                a_id += 1
                cnt_line = ' '+str(atom.id)
                # san check
                if atom.id != a_id:
                    raise Exception('atom id error.')
                for cnt_atom in atom.connect:
                    if cnt_atom.id > atom.id:
                        cnt_line += ' '+str(cnt_atom.id)+' '+'1.0'
                connectivty_table += cnt_line+line_feed

        return connectivty_table


    def protonation_metal_fix(self, Fix):
        '''
        return a bool: if there's any metal center
        '''
        # try once if not exist
        if self.metal_centers == []:
            self.get_metal_center()
        if self.metal_centers == []:
            print('No metal center is found. Exit Fix.')
            return False

        # start fix
        # get donor atoms and residues
        for metal in self.metal_centers:
            metal.get_donor_residue(method = 'INC')

            if Fix == 1:
                metal._metal_fix_1()

            if Fix == 2:
                metal._metal_fix_2()

            if Fix == 3:
                metal._metal_fix_3()
        return True


    def get_all_protein_atom(self):
        '''
        get a list of all protein atoms
        return all_P_atoms
        '''
        all_P_atoms = []
        for chain in self.chains:
            for residue in chain:
                all_P_atoms.extend(residue.atoms)
        return all_P_atoms


    def get_all_residue_unit(self, ifsolvent=0):
        all_r_list = []
        for chain in self.chains:
            for resi in chain:
                all_r_list.append(resi)

        for resi in self.ligands:
            all_r_list.append(resi)

        for resi in self.metalatoms:
            all_r_list.append(resi)

        if ifsolvent:
            for resi in self.solvents:
                all_r_list.append(resi)

        return all_r_list

    def find_idx_residue(self, idx: int):
        result = list(filter(lambda x: x.id == idx, self.get_all_residue_unit()))
        if len(result) == 0:
            print(f"No residue found with idx: {idx}")
            return None
        if len(result) > 1:
            raise Exception(f"found more than one residue with idx: {idx}. check your structure")
        return result[0]


    def delete_idx_ligand(self, idx: int):
        for i in range(len(self.ligands)-1,-1,-1):
            if self.ligands[i].id == idx:
                del self.ligands[i]


    def get_residue(self, id):
        '''
        re-search residue id with all residues count togethor from 1.
        ----------
        return a residue object
        '''
        all_resi = self.get_all_residue_unit()
        for resi in all_resi:
            if resi.id == int(id):
                return resi


    def get_atom_id(self):
        '''
        return a list of id of all atoms in the structure
        '''
        atom_id_list = []
        for chain in self.chains:
            for res in chain:
                for atom in res:
                    if atom.id == None:
                        raise Exception('Detected None in chain '+str(chain.id)+str(res.id)+' '+atom.name)
                    atom_id_list.append(atom.id)

        for metal in self.metalatoms:
            if metal.id == None:
                raise Exception('Detected None in metal '+ metal.name)
            atom_id_list.append(metal.id)

        for lig in self.ligands:
            for atom in lig:
                if atom.id == None:
                    raise Exception('Detected None in ligands', res.id, atom.name)
                atom_id_list.append(atom.id)

        for sol in self.solvents:
            for atom in sol:
                if atom.id == None:
                    raise Exception('Detected None in solvent', res.id, atom.name)
                atom_id_list.append(atom.id)

        return atom_id_list


    def get_atom_charge(self, prmtop_path):
        '''
        requires generate the stru using !SAME! PDB as one that generate the prmtop.
        '''
        pass


    def get_atom_type(self, prmtop_path):
        '''
        requires generate the stru using !SAME! PDB as one that generate the prmtop.
        '''
        # get type list
        with open(prmtop_path) as f:
            type_list=[]
            line_index=0

            for line in f:

                line_index=line_index+1 #current line

                if line.strip() == r'%FLAG POINTERS':
                    format_flag=line_index
                if line.strip() == r'%FLAG AMBER_ATOM_TYPE':
                    type_flag=line_index

                if 'format_flag' in dir():
                    if line_index == format_flag+2:
                        N_atom=int(line.split()[0])
                        del format_flag

                if 'type_flag' in dir():
                    if line_index >= type_flag+2 and line_index <= type_flag+1+ceil(N_atom/5):
                        for i in line.strip().split():
                            type_list.append(i)
        # assign type to atom
        for chain in self.chains:
            for res in chain:
                for atom in res:
                    atom.type = type_list[atom.id-1]
        for atom in self.metalatoms:
            if atom.id == None:
                raise Exception('Detected None in metal '+ atom.name)
            atom.type = type_list[atom.id-1]
        for lig in self.ligands:
            for atom in lig:
                if atom.id == None:
                    raise Exception('Detected None in ligands', res.id, atom.name)
                atom.type = type_list[atom.id-1]
        for sol in self.solvents:
            for atom in sol:
                if atom.id == None:
                    raise Exception('Detected None in solvent', res.id, atom.name)
                atom.type = type_list[atom.id-1]


    def get_sele_list(self, atom_mask, fix_end='H', prepi_path=None):
        '''
        interface with class ONIOM_Frame. Generate a list for sele build. Make sure use same pdb as the one generate the frame.
        ------------
        resi_list: selected residue list
        atom_mask: atom selection with the standard grammer of Amber (incomplete)
        fix_end: fix valence of the cut bond. (default: H)
                - H: add H to where the original connecting atom is.
                    special fix for classical case:
                    "sele by residue" (cut N-C) -- adjust dihedral for added H on N.
                = Interface with write_sele_lines:
                    add {fix_flag+element_mark: coord} in sele_lines
        ------------
        return a sele list:
        - Fixing atoms are labeled as qm_atom_id-qm_atom_cnt_id-distance
        - backbone atoms are marked as b at the end (for qmcluster charge calculation)
        - other atoms use _ as place holder
        return a sele map:
        (PDB atom id -> QM atom id)
        '''
        sele_lines = {}
        #decode atom_mask (maybe in helper later) TODO
        resi_list = atom_mask[1:].strip().split(',')
        all_resi_list = self.get_all_residue_unit()

        # decode and get obj
        sele_stru_objs=[]
        for resi in resi_list:
            chain_id = re.match('[A-Z]',resi)
            resi_id = int(re.match('[0-9]+',resi).group(0))
            if chain_id == None:
                for resi in all_resi_list:
                    if resi_id == resi.id:
                        resi_obj = resi
            else:
                chain_id = chain_id.group(0)
                resi_obj = self.chains[int(chain_id)-65]._find_resi_id(resi_id)

            sele_stru_objs.append(resi_obj)

        # combine the sele
        sele_atoms = []
        for obj in sele_stru_objs:
            for atom in obj:
                sele_atoms.append(atom)

        if fix_end != None:
            self.get_connect(prepi_path=prepi_path)

        # operate on the sele objs
        for atom in sele_atoms:
            # add current atom
            atom.get_ele()
            if type(atom.parent) != Ligand: