Package rdkit :: Package Chem :: Package MolDb :: Module Loader_sa
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolDb.Loader_sa

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2007-2009 Greg Landrum 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  import sqlalchemy 
 11   
 12  from rdkit import Chem 
 13  from rdkit.Chem import AllChem 
 14  from rdkit.Chem import Lipinski,Descriptors,Crippen 
 15  from rdkit.Dbase.DbConnection import DbConnect 
 16  from rdkit.Dbase import DbModule 
 17  import os 
 18   
 19  from sqlalchemy.ext.declarative import declarative_base 
 20  from sqlalchemy import Table,Column,MetaData 
 21  from sqlalchemy import Integer,Text,String,ForeignKey,Binary,DateTime,Float 
 22  from sqlalchemy.orm import relation,mapper,sessionmaker,backref 
 23  from sqlalchemy import create_engine 
 24   
 25  decBase = declarative_base() 
 26   
27 -class Compound(decBase):
28 __tablename__='molecules' 29 guid=Column(Integer,primary_key=True) 30 molpkl=Column(Binary)
31
32 -def RegisterSchema(dbUrl,echo=False):
33 engine = create_engine(dbUrl,echo=echo) 34 decBase.metadata.create_all(engine) 35 maker = sessionmaker(bind=engine) 36 return maker
37 38 ConnectToSchema=RegisterSchema 39
40 -def _ConnectToSchema(dbUrl,echo=False):
41 engine = create_engine(dbUrl,echo=echo) 42 meta 43 decBase.metadata.create_all(engine) 44 maker = sessionmaker(bind=engine) 45 return maker
46 47 48 #set up the logger: 49 import rdkit.RDLogger as logging 50 logger = logging.logger() 51 logger.setLevel(logging.INFO) 52
53 -def ProcessMol(session,mol,globalProps,nDone,nameProp='_Name',nameCol='compound_id', 54 redraw=False,keepHs=False, 55 skipProps=False,addComputedProps=False, 56 skipSmiles=False):
57 if not mol: 58 raise ValueError('no molecule') 59 if keepHs: 60 Chem.SanitizeMol(mol) 61 try: 62 nm = mol.GetProp(nameProp) 63 except KeyError: 64 nm = None 65 if not nm: 66 nm = 'Mol_%d'%nDone 67 68 cmpd = Compound() 69 session.add(cmpd) 70 71 if redraw: 72 AllChem.Compute2DCoords(m) 73 74 if not skipSmiles: 75 cmpd.smiles=Chem.MolToSmiles(mol,True) 76 cmpd.molpkl=mol.ToBinary() 77 setattr(cmpd,nameCol,nm) 78 79 if not skipProps: 80 if addComputedProps: 81 cmpd.DonorCount=Lipinski.NumHDonors(mol) 82 cmpd.AcceptorCount=Lipinski.NumHAcceptors(mol) 83 cmpd.RotatableBondCount=Lipinski.NumRotatableBonds(mol) 84 cmpd.AMW=Descriptors.MolWt(mol) 85 cmpd.MolLogP=Crippen.MolLogP(mol) 86 pns = list(mol.GetPropNames()) 87 for pi,pn in enumerate(pns): 88 if pn.lower()==nameCol.lower(): continue 89 pv = mol.GetProp(pn).strip() 90 if pn in globalProps: 91 setattr(cmpd,pn.lower(),pv) 92 return cmpd
93
94 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False, 95 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False, 96 regName='molecules',skipSmiles=False,maxRowsCached=-1, 97 uniqNames=False,addComputedProps=False,lazySupplier=False, 98 numForPropScan=10,startAnew=True):
99 if not lazySupplier: 100 nMols = len(suppl) 101 else: 102 nMols=-1 103 if not silent: 104 logger.info("Generating molecular database in file %s"%dbName) 105 if not lazySupplier: 106 logger.info(" Processing %d molecules"%nMols) 107 108 globalProps = {} 109 if startAnew: 110 if os.path.exists(dbName): 111 for i in range(5): 112 try: 113 os.unlink(dbName) 114 break 115 except: 116 import time 117 time.sleep(2) 118 if os.path.exists(dbName): 119 raise IOError('could not delete old database %s'%dbName) 120 sIter=iter(suppl) 121 setattr(Compound,nameCol.lower(),Column(nameCol.lower(),String,default=defaultVal,unique=uniqNames)) 122 if not skipSmiles: 123 Compound.smiles = Column(Text,unique=True) 124 if not skipProps: 125 while numForPropScan>0: 126 try: 127 m = next(sIter) 128 except StopIteration: 129 numForPropScan=0 130 break 131 if not m: continue 132 for pn in m.GetPropNames(): 133 if pn.lower()==nameCol.lower(): continue 134 if pn not in globalProps: 135 globalProps[pn]=1 136 setattr(Compound,pn.lower(),Column(pn.lower(),String,default=defaultVal)) 137 numForPropScan-=1 138 if addComputedProps: 139 Compound.DonorCount=Column(Integer) 140 Compound.AcceptorCount=Column(Integer) 141 Compound.RotatableBondCount=Column(Integer) 142 Compound.AMW=Column(Float) 143 Compound.MolLogP=Column(Float) 144 session=RegisterSchema('sqlite:///%s'%(dbName))() 145 146 nDone = 0 147 cache=[] 148 for m in suppl: 149 nDone +=1 150 if not m: 151 if errorsTo: 152 if hasattr(suppl,'GetItemText'): 153 d = suppl.GetItemText(nDone-1) 154 errorsTo.write(d) 155 else: 156 logger.warning('full error file support not complete') 157 continue 158 159 cmpd=ProcessMol(session,m,globalProps,nDone,nameProp=nameProp, 160 nameCol=nameCol,redraw=redraw, 161 keepHs=keepHs,skipProps=skipProps, 162 addComputedProps=addComputedProps,skipSmiles=skipSmiles) 163 if cmpd is not None: 164 cache.append(cmpd) 165 166 if not silent and not nDone%100: 167 logger.info(' done %d'%nDone) 168 try: 169 session.commit() 170 except Exception: 171 session.rollback() 172 for cmpd in cache: 173 try: 174 session.add(cmpd) 175 session.commit() 176 except Exception: 177 session.rollback() 178 except BaseException: 179 # Rollback even with KeyboardInterrupt 180 session.rollback() 181 raise 182 cache=[] 183 184 185 try: 186 session.commit() 187 except BaseException as exc: 188 import traceback 189 traceback.print_exc() 190 session.rollback() 191 for cmpd in cache: 192 try: 193 session.add(cmpd) 194 session.commit() 195 except Exception: 196 session.rollback() 197 except BaseException: 198 session.rollback() 199 raise 200 if not isinstance(exc, Exception): 201 # Re-raise on KeyboardInterrupt, SystemExit, etc. 202 raise exc
203 if __name__=='__main__': 204 import sys 205 sdf =Chem.SDMolSupplier(sys.argv[1]) 206 db =sys.argv[2] 207 LoadDb(sdf,db,addComputedProps=False) 208 session = RegisterSchema('sqlite:///%s'%(db))() 209 print('>>>>', len(session.query(Compound).all())) 210