Back to index

python3.2  3.2.2
modulefinder.py
Go to the documentation of this file.
00001 """Find modules used by a script, using introspection."""
00002 
00003 from __future__ import generators
00004 import dis
00005 import imp
00006 import marshal
00007 import os
00008 import sys
00009 import types
00010 import struct
00011 
00012 READ_MODE = "rU"
00013 
00014 # XXX Clean up once str8's cstor matches bytes.
00015 LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')])
00016 IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')])
00017 STORE_NAME = bytes([dis.opname.index('STORE_NAME')])
00018 STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')])
00019 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
00020 HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
00021 
00022 # Modulefinder does a good job at simulating Python's, but it can not
00023 # handle __path__ modifications packages make at runtime.  Therefore there
00024 # is a mechanism whereby you can register extra paths in this map for a
00025 # package, and it will be honored.
00026 
00027 # Note this is a mapping is lists of paths.
00028 packagePathMap = {}
00029 
00030 # A Public interface
00031 def AddPackagePath(packagename, path):
00032     paths = packagePathMap.get(packagename, [])
00033     paths.append(path)
00034     packagePathMap[packagename] = paths
00035 
00036 replacePackageMap = {}
00037 
00038 # This ReplacePackage mechanism allows modulefinder to work around
00039 # situations in which a package injects itself under the name
00040 # of another package into sys.modules at runtime by calling
00041 # ReplacePackage("real_package_name", "faked_package_name")
00042 # before running ModuleFinder.
00043 
00044 def ReplacePackage(oldname, newname):
00045     replacePackageMap[oldname] = newname
00046 
00047 
00048 class Module:
00049 
00050     def __init__(self, name, file=None, path=None):
00051         self.__name__ = name
00052         self.__file__ = file
00053         self.__path__ = path
00054         self.__code__ = None
00055         # The set of global names that are assigned to in the module.
00056         # This includes those names imported through starimports of
00057         # Python modules.
00058         self.globalnames = {}
00059         # The set of starimports this module did that could not be
00060         # resolved, ie. a starimport from a non-Python module.
00061         self.starimports = {}
00062 
00063     def __repr__(self):
00064         s = "Module(%r" % (self.__name__,)
00065         if self.__file__ is not None:
00066             s = s + ", %r" % (self.__file__,)
00067         if self.__path__ is not None:
00068             s = s + ", %r" % (self.__path__,)
00069         s = s + ")"
00070         return s
00071 
00072 class ModuleFinder:
00073 
00074     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
00075         if path is None:
00076             path = sys.path
00077         self.path = path
00078         self.modules = {}
00079         self.badmodules = {}
00080         self.debug = debug
00081         self.indent = 0
00082         self.excludes = excludes
00083         self.replace_paths = replace_paths
00084         self.processed_paths = []   # Used in debugging only
00085 
00086     def msg(self, level, str, *args):
00087         if level <= self.debug:
00088             for i in range(self.indent):
00089                 print("   ", end=' ')
00090             print(str, end=' ')
00091             for arg in args:
00092                 print(repr(arg), end=' ')
00093             print()
00094 
00095     def msgin(self, *args):
00096         level = args[0]
00097         if level <= self.debug:
00098             self.indent = self.indent + 1
00099             self.msg(*args)
00100 
00101     def msgout(self, *args):
00102         level = args[0]
00103         if level <= self.debug:
00104             self.indent = self.indent - 1
00105             self.msg(*args)
00106 
00107     def run_script(self, pathname):
00108         self.msg(2, "run_script", pathname)
00109         with open(pathname, READ_MODE) as fp:
00110             stuff = ("", "r", imp.PY_SOURCE)
00111             self.load_module('__main__', fp, pathname, stuff)
00112 
00113     def load_file(self, pathname):
00114         dir, name = os.path.split(pathname)
00115         name, ext = os.path.splitext(name)
00116         with open(pathname, READ_MODE) as fp:
00117             stuff = (ext, "r", imp.PY_SOURCE)
00118             self.load_module(name, fp, pathname, stuff)
00119 
00120     def import_hook(self, name, caller=None, fromlist=None, level=-1):
00121         self.msg(3, "import_hook", name, caller, fromlist, level)
00122         parent = self.determine_parent(caller, level=level)
00123         q, tail = self.find_head_package(parent, name)
00124         m = self.load_tail(q, tail)
00125         if not fromlist:
00126             return q
00127         if m.__path__:
00128             self.ensure_fromlist(m, fromlist)
00129         return None
00130 
00131     def determine_parent(self, caller, level=-1):
00132         self.msgin(4, "determine_parent", caller, level)
00133         if not caller or level == 0:
00134             self.msgout(4, "determine_parent -> None")
00135             return None
00136         pname = caller.__name__
00137         if level >= 1: # relative import
00138             if caller.__path__:
00139                 level -= 1
00140             if level == 0:
00141                 parent = self.modules[pname]
00142                 assert parent is caller
00143                 self.msgout(4, "determine_parent ->", parent)
00144                 return parent
00145             if pname.count(".") < level:
00146                 raise ImportError("relative importpath too deep")
00147             pname = ".".join(pname.split(".")[:-level])
00148             parent = self.modules[pname]
00149             self.msgout(4, "determine_parent ->", parent)
00150             return parent
00151         if caller.__path__:
00152             parent = self.modules[pname]
00153             assert caller is parent
00154             self.msgout(4, "determine_parent ->", parent)
00155             return parent
00156         if '.' in pname:
00157             i = pname.rfind('.')
00158             pname = pname[:i]
00159             parent = self.modules[pname]
00160             assert parent.__name__ == pname
00161             self.msgout(4, "determine_parent ->", parent)
00162             return parent
00163         self.msgout(4, "determine_parent -> None")
00164         return None
00165 
00166     def find_head_package(self, parent, name):
00167         self.msgin(4, "find_head_package", parent, name)
00168         if '.' in name:
00169             i = name.find('.')
00170             head = name[:i]
00171             tail = name[i+1:]
00172         else:
00173             head = name
00174             tail = ""
00175         if parent:
00176             qname = "%s.%s" % (parent.__name__, head)
00177         else:
00178             qname = head
00179         q = self.import_module(head, qname, parent)
00180         if q:
00181             self.msgout(4, "find_head_package ->", (q, tail))
00182             return q, tail
00183         if parent:
00184             qname = head
00185             parent = None
00186             q = self.import_module(head, qname, parent)
00187             if q:
00188                 self.msgout(4, "find_head_package ->", (q, tail))
00189                 return q, tail
00190         self.msgout(4, "raise ImportError: No module named", qname)
00191         raise ImportError("No module named " + qname)
00192 
00193     def load_tail(self, q, tail):
00194         self.msgin(4, "load_tail", q, tail)
00195         m = q
00196         while tail:
00197             i = tail.find('.')
00198             if i < 0: i = len(tail)
00199             head, tail = tail[:i], tail[i+1:]
00200             mname = "%s.%s" % (m.__name__, head)
00201             m = self.import_module(head, mname, m)
00202             if not m:
00203                 self.msgout(4, "raise ImportError: No module named", mname)
00204                 raise ImportError("No module named " + mname)
00205         self.msgout(4, "load_tail ->", m)
00206         return m
00207 
00208     def ensure_fromlist(self, m, fromlist, recursive=0):
00209         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
00210         for sub in fromlist:
00211             if sub == "*":
00212                 if not recursive:
00213                     all = self.find_all_submodules(m)
00214                     if all:
00215                         self.ensure_fromlist(m, all, 1)
00216             elif not hasattr(m, sub):
00217                 subname = "%s.%s" % (m.__name__, sub)
00218                 submod = self.import_module(sub, subname, m)
00219                 if not submod:
00220                     raise ImportError("No module named " + subname)
00221 
00222     def find_all_submodules(self, m):
00223         if not m.__path__:
00224             return
00225         modules = {}
00226         # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
00227         # But we must also collect Python extension modules - although
00228         # we cannot separate normal dlls from Python extensions.
00229         suffixes = []
00230         for triple in imp.get_suffixes():
00231             suffixes.append(triple[0])
00232         for dir in m.__path__:
00233             try:
00234                 names = os.listdir(dir)
00235             except os.error:
00236                 self.msg(2, "can't list directory", dir)
00237                 continue
00238             for name in names:
00239                 mod = None
00240                 for suff in suffixes:
00241                     n = len(suff)
00242                     if name[-n:] == suff:
00243                         mod = name[:-n]
00244                         break
00245                 if mod and mod != "__init__":
00246                     modules[mod] = mod
00247         return modules.keys()
00248 
00249     def import_module(self, partname, fqname, parent):
00250         self.msgin(3, "import_module", partname, fqname, parent)
00251         try:
00252             m = self.modules[fqname]
00253         except KeyError:
00254             pass
00255         else:
00256             self.msgout(3, "import_module ->", m)
00257             return m
00258         if fqname in self.badmodules:
00259             self.msgout(3, "import_module -> None")
00260             return None
00261         if parent and parent.__path__ is None:
00262             self.msgout(3, "import_module -> None")
00263             return None
00264         try:
00265             fp, pathname, stuff = self.find_module(partname,
00266                                                    parent and parent.__path__, parent)
00267         except ImportError:
00268             self.msgout(3, "import_module ->", None)
00269             return None
00270         try:
00271             m = self.load_module(fqname, fp, pathname, stuff)
00272         finally:
00273             if fp: fp.close()
00274         if parent:
00275             setattr(parent, partname, m)
00276         self.msgout(3, "import_module ->", m)
00277         return m
00278 
00279     def load_module(self, fqname, fp, pathname, file_info):
00280         suffix, mode, type = file_info
00281         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
00282         if type == imp.PKG_DIRECTORY:
00283             m = self.load_package(fqname, pathname)
00284             self.msgout(2, "load_module ->", m)
00285             return m
00286         if type == imp.PY_SOURCE:
00287             co = compile(fp.read()+'\n', pathname, 'exec')
00288         elif type == imp.PY_COMPILED:
00289             if fp.read(4) != imp.get_magic():
00290                 self.msgout(2, "raise ImportError: Bad magic number", pathname)
00291                 raise ImportError("Bad magic number in %s" % pathname)
00292             fp.read(4)
00293             co = marshal.load(fp)
00294         else:
00295             co = None
00296         m = self.add_module(fqname)
00297         m.__file__ = pathname
00298         if co:
00299             if self.replace_paths:
00300                 co = self.replace_paths_in_code(co)
00301             m.__code__ = co
00302             self.scan_code(co, m)
00303         self.msgout(2, "load_module ->", m)
00304         return m
00305 
00306     def _add_badmodule(self, name, caller):
00307         if name not in self.badmodules:
00308             self.badmodules[name] = {}
00309         if caller:
00310             self.badmodules[name][caller.__name__] = 1
00311         else:
00312             self.badmodules[name]["-"] = 1
00313 
00314     def _safe_import_hook(self, name, caller, fromlist, level=-1):
00315         # wrapper for self.import_hook() that won't raise ImportError
00316         if name in self.badmodules:
00317             self._add_badmodule(name, caller)
00318             return
00319         try:
00320             self.import_hook(name, caller, level=level)
00321         except ImportError as msg:
00322             self.msg(2, "ImportError:", str(msg))
00323             self._add_badmodule(name, caller)
00324         else:
00325             if fromlist:
00326                 for sub in fromlist:
00327                     if sub in self.badmodules:
00328                         self._add_badmodule(sub, caller)
00329                         continue
00330                     try:
00331                         self.import_hook(name, caller, [sub], level=level)
00332                     except ImportError as msg:
00333                         self.msg(2, "ImportError:", str(msg))
00334                         fullname = name + "." + sub
00335                         self._add_badmodule(fullname, caller)
00336 
00337     def scan_opcodes(self, co,
00338                      unpack = struct.unpack):
00339         # Scan the code, and yield 'interesting' opcode combinations
00340         # Version for Python 2.4 and older
00341         code = co.co_code
00342         names = co.co_names
00343         consts = co.co_consts
00344         while code:
00345             c = code[0]
00346             if c in STORE_OPS:
00347                 oparg, = unpack('<H', code[1:3])
00348                 yield "store", (names[oparg],)
00349                 code = code[3:]
00350                 continue
00351             if c == LOAD_CONST and code[3] == IMPORT_NAME:
00352                 oparg_1, oparg_2 = unpack('<xHxH', code[:6])
00353                 yield "import", (consts[oparg_1], names[oparg_2])
00354                 code = code[6:]
00355                 continue
00356             if c >= HAVE_ARGUMENT:
00357                 code = code[3:]
00358             else:
00359                 code = code[1:]
00360 
00361     def scan_opcodes_25(self, co,
00362                      unpack = struct.unpack):
00363         # Scan the code, and yield 'interesting' opcode combinations
00364         # Python 2.5 version (has absolute and relative imports)
00365         code = co.co_code
00366         names = co.co_names
00367         consts = co.co_consts
00368         LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
00369         while code:
00370             c = bytes([code[0]])
00371             if c in STORE_OPS:
00372                 oparg, = unpack('<H', code[1:3])
00373                 yield "store", (names[oparg],)
00374                 code = code[3:]
00375                 continue
00376             if code[:9:3] == LOAD_LOAD_AND_IMPORT:
00377                 oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
00378                 level = consts[oparg_1]
00379                 if level == 0: # absolute import
00380                     yield "absolute_import", (consts[oparg_2], names[oparg_3])
00381                 else: # relative import
00382                     yield "relative_import", (level, consts[oparg_2], names[oparg_3])
00383                 code = code[9:]
00384                 continue
00385             if c >= HAVE_ARGUMENT:
00386                 code = code[3:]
00387             else:
00388                 code = code[1:]
00389 
00390     def scan_code(self, co, m):
00391         code = co.co_code
00392         if sys.version_info >= (2, 5):
00393             scanner = self.scan_opcodes_25
00394         else:
00395             scanner = self.scan_opcodes
00396         for what, args in scanner(co):
00397             if what == "store":
00398                 name, = args
00399                 m.globalnames[name] = 1
00400             elif what == "absolute_import":
00401                 fromlist, name = args
00402                 have_star = 0
00403                 if fromlist is not None:
00404                     if "*" in fromlist:
00405                         have_star = 1
00406                     fromlist = [f for f in fromlist if f != "*"]
00407                 self._safe_import_hook(name, m, fromlist, level=0)
00408                 if have_star:
00409                     # We've encountered an "import *". If it is a Python module,
00410                     # the code has already been parsed and we can suck out the
00411                     # global names.
00412                     mm = None
00413                     if m.__path__:
00414                         # At this point we don't know whether 'name' is a
00415                         # submodule of 'm' or a global module. Let's just try
00416                         # the full name first.
00417                         mm = self.modules.get(m.__name__ + "." + name)
00418                     if mm is None:
00419                         mm = self.modules.get(name)
00420                     if mm is not None:
00421                         m.globalnames.update(mm.globalnames)
00422                         m.starimports.update(mm.starimports)
00423                         if mm.__code__ is None:
00424                             m.starimports[name] = 1
00425                     else:
00426                         m.starimports[name] = 1
00427             elif what == "relative_import":
00428                 level, fromlist, name = args
00429                 if name:
00430                     self._safe_import_hook(name, m, fromlist, level=level)
00431                 else:
00432                     parent = self.determine_parent(m, level=level)
00433                     self._safe_import_hook(parent.__name__, None, fromlist, level=0)
00434             else:
00435                 # We don't expect anything else from the generator.
00436                 raise RuntimeError(what)
00437 
00438         for c in co.co_consts:
00439             if isinstance(c, type(co)):
00440                 self.scan_code(c, m)
00441 
00442     def load_package(self, fqname, pathname):
00443         self.msgin(2, "load_package", fqname, pathname)
00444         newname = replacePackageMap.get(fqname)
00445         if newname:
00446             fqname = newname
00447         m = self.add_module(fqname)
00448         m.__file__ = pathname
00449         m.__path__ = [pathname]
00450 
00451         # As per comment at top of file, simulate runtime __path__ additions.
00452         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
00453 
00454         fp, buf, stuff = self.find_module("__init__", m.__path__)
00455         try:
00456             self.load_module(fqname, fp, buf, stuff)
00457             self.msgout(2, "load_package ->", m)
00458             return m
00459         finally:
00460             if fp:
00461                 fp.close()
00462 
00463     def add_module(self, fqname):
00464         if fqname in self.modules:
00465             return self.modules[fqname]
00466         self.modules[fqname] = m = Module(fqname)
00467         return m
00468 
00469     def find_module(self, name, path, parent=None):
00470         if parent is not None:
00471             # assert path is not None
00472             fullname = parent.__name__+'.'+name
00473         else:
00474             fullname = name
00475         if fullname in self.excludes:
00476             self.msgout(3, "find_module -> Excluded", fullname)
00477             raise ImportError(name)
00478 
00479         if path is None:
00480             if name in sys.builtin_module_names:
00481                 return (None, None, ("", "", imp.C_BUILTIN))
00482 
00483             path = self.path
00484         return imp.find_module(name, path)
00485 
00486     def report(self):
00487         """Print a report to stdout, listing the found modules with their
00488         paths, as well as modules that are missing, or seem to be missing.
00489         """
00490         print()
00491         print("  %-25s %s" % ("Name", "File"))
00492         print("  %-25s %s" % ("----", "----"))
00493         # Print modules found
00494         keys = sorted(self.modules.keys())
00495         for key in keys:
00496             m = self.modules[key]
00497             if m.__path__:
00498                 print("P", end=' ')
00499             else:
00500                 print("m", end=' ')
00501             print("%-25s" % key, m.__file__ or "")
00502 
00503         # Print missing modules
00504         missing, maybe = self.any_missing_maybe()
00505         if missing:
00506             print()
00507             print("Missing modules:")
00508             for name in missing:
00509                 mods = sorted(self.badmodules[name].keys())
00510                 print("?", name, "imported from", ', '.join(mods))
00511         # Print modules that may be missing, but then again, maybe not...
00512         if maybe:
00513             print()
00514             print("Submodules thay appear to be missing, but could also be", end=' ')
00515             print("global names in the parent package:")
00516             for name in maybe:
00517                 mods = sorted(self.badmodules[name].keys())
00518                 print("?", name, "imported from", ', '.join(mods))
00519 
00520     def any_missing(self):
00521         """Return a list of modules that appear to be missing. Use
00522         any_missing_maybe() if you want to know which modules are
00523         certain to be missing, and which *may* be missing.
00524         """
00525         missing, maybe = self.any_missing_maybe()
00526         return missing + maybe
00527 
00528     def any_missing_maybe(self):
00529         """Return two lists, one with modules that are certainly missing
00530         and one with modules that *may* be missing. The latter names could
00531         either be submodules *or* just global names in the package.
00532 
00533         The reason it can't always be determined is that it's impossible to
00534         tell which names are imported when "from module import *" is done
00535         with an extension module, short of actually importing it.
00536         """
00537         missing = []
00538         maybe = []
00539         for name in self.badmodules:
00540             if name in self.excludes:
00541                 continue
00542             i = name.rfind(".")
00543             if i < 0:
00544                 missing.append(name)
00545                 continue
00546             subname = name[i+1:]
00547             pkgname = name[:i]
00548             pkg = self.modules.get(pkgname)
00549             if pkg is not None:
00550                 if pkgname in self.badmodules[name]:
00551                     # The package tried to import this module itself and
00552                     # failed. It's definitely missing.
00553                     missing.append(name)
00554                 elif subname in pkg.globalnames:
00555                     # It's a global in the package: definitely not missing.
00556                     pass
00557                 elif pkg.starimports:
00558                     # It could be missing, but the package did an "import *"
00559                     # from a non-Python module, so we simply can't be sure.
00560                     maybe.append(name)
00561                 else:
00562                     # It's not a global in the package, the package didn't
00563                     # do funny star imports, it's very likely to be missing.
00564                     # The symbol could be inserted into the package from the
00565                     # outside, but since that's not good style we simply list
00566                     # it missing.
00567                     missing.append(name)
00568             else:
00569                 missing.append(name)
00570         missing.sort()
00571         maybe.sort()
00572         return missing, maybe
00573 
00574     def replace_paths_in_code(self, co):
00575         new_filename = original_filename = os.path.normpath(co.co_filename)
00576         for f, r in self.replace_paths:
00577             if original_filename.startswith(f):
00578                 new_filename = r + original_filename[len(f):]
00579                 break
00580 
00581         if self.debug and original_filename not in self.processed_paths:
00582             if new_filename != original_filename:
00583                 self.msgout(2, "co_filename %r changed to %r" \
00584                                     % (original_filename,new_filename,))
00585             else:
00586                 self.msgout(2, "co_filename %r remains unchanged" \
00587                                     % (original_filename,))
00588             self.processed_paths.append(original_filename)
00589 
00590         consts = list(co.co_consts)
00591         for i in range(len(consts)):
00592             if isinstance(consts[i], type(co)):
00593                 consts[i] = self.replace_paths_in_code(consts[i])
00594 
00595         return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
00596                          co.co_flags, co.co_code, tuple(consts), co.co_names,
00597                          co.co_varnames, new_filename, co.co_name,
00598                          co.co_firstlineno, co.co_lnotab,
00599                          co.co_freevars, co.co_cellvars)
00600 
00601 
00602 def test():
00603     # Parse command line
00604     import getopt
00605     try:
00606         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
00607     except getopt.error as msg:
00608         print(msg)
00609         return
00610 
00611     # Process options
00612     debug = 1
00613     domods = 0
00614     addpath = []
00615     exclude = []
00616     for o, a in opts:
00617         if o == '-d':
00618             debug = debug + 1
00619         if o == '-m':
00620             domods = 1
00621         if o == '-p':
00622             addpath = addpath + a.split(os.pathsep)
00623         if o == '-q':
00624             debug = 0
00625         if o == '-x':
00626             exclude.append(a)
00627 
00628     # Provide default arguments
00629     if not args:
00630         script = "hello.py"
00631     else:
00632         script = args[0]
00633 
00634     # Set the path based on sys.path and the script directory
00635     path = sys.path[:]
00636     path[0] = os.path.dirname(script)
00637     path = addpath + path
00638     if debug > 1:
00639         print("path:")
00640         for item in path:
00641             print("   ", repr(item))
00642 
00643     # Create the module finder and turn its crank
00644     mf = ModuleFinder(path, debug, exclude)
00645     for arg in args[1:]:
00646         if arg == '-m':
00647             domods = 1
00648             continue
00649         if domods:
00650             if arg[-2:] == '.*':
00651                 mf.import_hook(arg[:-2], None, ["*"])
00652             else:
00653                 mf.import_hook(arg)
00654         else:
00655             mf.load_file(arg)
00656     mf.run_script(script)
00657     mf.report()
00658     return mf  # for -i debugging
00659 
00660 
00661 if __name__ == '__main__':
00662     try:
00663         mf = test()
00664     except KeyboardInterrupt:
00665         print("\n[interrupt]")