Back to index

python3.2  3.2.2
ftpmirror.py
Go to the documentation of this file.
00001 #! /usr/bin/env python3
00002 
00003 """Mirror a remote ftp subtree into a local directory tree.
00004 
00005 usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
00006                  [-l username [-p passwd [-a account]]]
00007                  hostname[:port] [remotedir [localdir]]
00008 -v: verbose
00009 -q: quiet
00010 -i: interactive mode
00011 -m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
00012 -n: don't log in
00013 -r: remove local files/directories no longer pertinent
00014 -l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
00015 -s pat: skip files matching pattern
00016 hostname: remote host w/ optional port separated by ':'
00017 remotedir: remote directory (default initial)
00018 localdir: local directory (default current)
00019 """
00020 
00021 import os
00022 import sys
00023 import time
00024 import getopt
00025 import ftplib
00026 import netrc
00027 from fnmatch import fnmatch
00028 
00029 # Print usage message and exit
00030 def usage(*args):
00031     sys.stdout = sys.stderr
00032     for msg in args: print(msg)
00033     print(__doc__)
00034     sys.exit(2)
00035 
00036 verbose = 1 # 0 for -q, 2 for -v
00037 interactive = 0
00038 mac = 0
00039 rmok = 0
00040 nologin = 0
00041 skippats = ['.', '..', '.mirrorinfo']
00042 
00043 # Main program: parse command line and start processing
00044 def main():
00045     global verbose, interactive, mac, rmok, nologin
00046     try:
00047         opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
00048     except getopt.error as msg:
00049         usage(msg)
00050     login = ''
00051     passwd = ''
00052     account = ''
00053     if not args: usage('hostname missing')
00054     host = args[0]
00055     port = 0
00056     if ':' in host:
00057         host, port = host.split(':', 1)
00058         port = int(port)
00059     try:
00060         auth = netrc.netrc().authenticators(host)
00061         if auth is not None:
00062             login, account, passwd = auth
00063     except (netrc.NetrcParseError, IOError):
00064         pass
00065     for o, a in opts:
00066         if o == '-l': login = a
00067         if o == '-p': passwd = a
00068         if o == '-a': account = a
00069         if o == '-v': verbose = verbose + 1
00070         if o == '-q': verbose = 0
00071         if o == '-i': interactive = 1
00072         if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
00073         if o == '-n': nologin = 1
00074         if o == '-r': rmok = 1
00075         if o == '-s': skippats.append(a)
00076     remotedir = ''
00077     localdir = ''
00078     if args[1:]:
00079         remotedir = args[1]
00080         if args[2:]:
00081             localdir = args[2]
00082             if args[3:]: usage('too many arguments')
00083     #
00084     f = ftplib.FTP()
00085     if verbose: print("Connecting to '%s%s'..." % (host,
00086                                                    (port and ":%d"%port or "")))
00087     f.connect(host,port)
00088     if not nologin:
00089         if verbose:
00090             print('Logging in as %r...' % (login or 'anonymous'))
00091         f.login(login, passwd, account)
00092     if verbose: print('OK.')
00093     pwd = f.pwd()
00094     if verbose > 1: print('PWD =', repr(pwd))
00095     if remotedir:
00096         if verbose > 1: print('cwd(%s)' % repr(remotedir))
00097         f.cwd(remotedir)
00098         if verbose > 1: print('OK.')
00099         pwd = f.pwd()
00100         if verbose > 1: print('PWD =', repr(pwd))
00101     #
00102     mirrorsubdir(f, localdir)
00103 
00104 # Core logic: mirror one subdirectory (recursively)
00105 def mirrorsubdir(f, localdir):
00106     pwd = f.pwd()
00107     if localdir and not os.path.isdir(localdir):
00108         if verbose: print('Creating local directory', repr(localdir))
00109         try:
00110             makedir(localdir)
00111         except os.error as msg:
00112             print("Failed to establish local directory", repr(localdir))
00113             return
00114     infofilename = os.path.join(localdir, '.mirrorinfo')
00115     try:
00116         text = open(infofilename, 'r').read()
00117     except IOError as msg:
00118         text = '{}'
00119     try:
00120         info = eval(text)
00121     except (SyntaxError, NameError):
00122         print('Bad mirror info in', repr(infofilename))
00123         info = {}
00124     subdirs = []
00125     listing = []
00126     if verbose: print('Listing remote directory %r...' % (pwd,))
00127     f.retrlines('LIST', listing.append)
00128     filesfound = []
00129     for line in listing:
00130         if verbose > 1: print('-->', repr(line))
00131         if mac:
00132             # Mac listing has just filenames;
00133             # trailing / means subdirectory
00134             filename = line.strip()
00135             mode = '-'
00136             if filename[-1:] == '/':
00137                 filename = filename[:-1]
00138                 mode = 'd'
00139             infostuff = ''
00140         else:
00141             # Parse, assuming a UNIX listing
00142             words = line.split(None, 8)
00143             if len(words) < 6:
00144                 if verbose > 1: print('Skipping short line')
00145                 continue
00146             filename = words[-1].lstrip()
00147             i = filename.find(" -> ")
00148             if i >= 0:
00149                 # words[0] had better start with 'l'...
00150                 if verbose > 1:
00151                     print('Found symbolic link %r' % (filename,))
00152                 linkto = filename[i+4:]
00153                 filename = filename[:i]
00154             infostuff = words[-5:-1]
00155             mode = words[0]
00156         skip = 0
00157         for pat in skippats:
00158             if fnmatch(filename, pat):
00159                 if verbose > 1:
00160                     print('Skip pattern', repr(pat), end=' ')
00161                     print('matches', repr(filename))
00162                 skip = 1
00163                 break
00164         if skip:
00165             continue
00166         if mode[0] == 'd':
00167             if verbose > 1:
00168                 print('Remembering subdirectory', repr(filename))
00169             subdirs.append(filename)
00170             continue
00171         filesfound.append(filename)
00172         if filename in info and info[filename] == infostuff:
00173             if verbose > 1:
00174                 print('Already have this version of',repr(filename))
00175             continue
00176         fullname = os.path.join(localdir, filename)
00177         tempname = os.path.join(localdir, '@'+filename)
00178         if interactive:
00179             doit = askabout('file', filename, pwd)
00180             if not doit:
00181                 if filename not in info:
00182                     info[filename] = 'Not retrieved'
00183                 continue
00184         try:
00185             os.unlink(tempname)
00186         except os.error:
00187             pass
00188         if mode[0] == 'l':
00189             if verbose:
00190                 print("Creating symlink %r -> %r" % (filename, linkto))
00191             try:
00192                 os.symlink(linkto, tempname)
00193             except IOError as msg:
00194                 print("Can't create %r: %s" % (tempname, msg))
00195                 continue
00196         else:
00197             try:
00198                 fp = open(tempname, 'wb')
00199             except IOError as msg:
00200                 print("Can't create %r: %s" % (tempname, msg))
00201                 continue
00202             if verbose:
00203                 print('Retrieving %r from %r as %r...' % (filename, pwd, fullname))
00204             if verbose:
00205                 fp1 = LoggingFile(fp, 1024, sys.stdout)
00206             else:
00207                 fp1 = fp
00208             t0 = time.time()
00209             try:
00210                 f.retrbinary('RETR ' + filename,
00211                              fp1.write, 8*1024)
00212             except ftplib.error_perm as msg:
00213                 print(msg)
00214             t1 = time.time()
00215             bytes = fp.tell()
00216             fp.close()
00217             if fp1 != fp:
00218                 fp1.close()
00219         try:
00220             os.unlink(fullname)
00221         except os.error:
00222             pass            # Ignore the error
00223         try:
00224             os.rename(tempname, fullname)
00225         except os.error as msg:
00226             print("Can't rename %r to %r: %s" % (tempname, fullname, msg))
00227             continue
00228         info[filename] = infostuff
00229         writedict(info, infofilename)
00230         if verbose and mode[0] != 'l':
00231             dt = t1 - t0
00232             kbytes = bytes / 1024.0
00233             print(int(round(kbytes)), end=' ')
00234             print('Kbytes in', end=' ')
00235             print(int(round(dt)), end=' ')
00236             print('seconds', end=' ')
00237             if t1 > t0:
00238                 print('(~%d Kbytes/sec)' % \
00239                           int(round(kbytes/dt),))
00240             print()
00241     #
00242     # Remove files from info that are no longer remote
00243     deletions = 0
00244     for filename in list(info.keys()):
00245         if filename not in filesfound:
00246             if verbose:
00247                 print("Removing obsolete info entry for", end=' ')
00248                 print(repr(filename), "in", repr(localdir or "."))
00249             del info[filename]
00250             deletions = deletions + 1
00251     if deletions:
00252         writedict(info, infofilename)
00253     #
00254     # Remove local files that are no longer in the remote directory
00255     try:
00256         if not localdir: names = os.listdir(os.curdir)
00257         else: names = os.listdir(localdir)
00258     except os.error:
00259         names = []
00260     for name in names:
00261         if name[0] == '.' or name in info or name in subdirs:
00262             continue
00263         skip = 0
00264         for pat in skippats:
00265             if fnmatch(name, pat):
00266                 if verbose > 1:
00267                     print('Skip pattern', repr(pat), end=' ')
00268                     print('matches', repr(name))
00269                 skip = 1
00270                 break
00271         if skip:
00272             continue
00273         fullname = os.path.join(localdir, name)
00274         if not rmok:
00275             if verbose:
00276                 print('Local file', repr(fullname), end=' ')
00277                 print('is no longer pertinent')
00278             continue
00279         if verbose: print('Removing local file/dir', repr(fullname))
00280         remove(fullname)
00281     #
00282     # Recursively mirror subdirectories
00283     for subdir in subdirs:
00284         if interactive:
00285             doit = askabout('subdirectory', subdir, pwd)
00286             if not doit: continue
00287         if verbose: print('Processing subdirectory', repr(subdir))
00288         localsubdir = os.path.join(localdir, subdir)
00289         pwd = f.pwd()
00290         if verbose > 1:
00291             print('Remote directory now:', repr(pwd))
00292             print('Remote cwd', repr(subdir))
00293         try:
00294             f.cwd(subdir)
00295         except ftplib.error_perm as msg:
00296             print("Can't chdir to", repr(subdir), ":", repr(msg))
00297         else:
00298             if verbose: print('Mirroring as', repr(localsubdir))
00299             mirrorsubdir(f, localsubdir)
00300             if verbose > 1: print('Remote cwd ..')
00301             f.cwd('..')
00302         newpwd = f.pwd()
00303         if newpwd != pwd:
00304             print('Ended up in wrong directory after cd + cd ..')
00305             print('Giving up now.')
00306             break
00307         else:
00308             if verbose > 1: print('OK.')
00309 
00310 # Helper to remove a file or directory tree
00311 def remove(fullname):
00312     if os.path.isdir(fullname) and not os.path.islink(fullname):
00313         try:
00314             names = os.listdir(fullname)
00315         except os.error:
00316             names = []
00317         ok = 1
00318         for name in names:
00319             if not remove(os.path.join(fullname, name)):
00320                 ok = 0
00321         if not ok:
00322             return 0
00323         try:
00324             os.rmdir(fullname)
00325         except os.error as msg:
00326             print("Can't remove local directory %r: %s" % (fullname, msg))
00327             return 0
00328     else:
00329         try:
00330             os.unlink(fullname)
00331         except os.error as msg:
00332             print("Can't remove local file %r: %s" % (fullname, msg))
00333             return 0
00334     return 1
00335 
00336 # Wrapper around a file for writing to write a hash sign every block.
00337 class LoggingFile:
00338     def __init__(self, fp, blocksize, outfp):
00339         self.fp = fp
00340         self.bytes = 0
00341         self.hashes = 0
00342         self.blocksize = blocksize
00343         self.outfp = outfp
00344     def write(self, data):
00345         self.bytes = self.bytes + len(data)
00346         hashes = int(self.bytes) / self.blocksize
00347         while hashes > self.hashes:
00348             self.outfp.write('#')
00349             self.outfp.flush()
00350             self.hashes = self.hashes + 1
00351         self.fp.write(data)
00352     def close(self):
00353         self.outfp.write('\n')
00354 
00355 def raw_input(prompt):
00356     sys.stdout.write(prompt)
00357     sys.stdout.flush()
00358     return sys.stdin.readline()
00359 
00360 # Ask permission to download a file.
00361 def askabout(filetype, filename, pwd):
00362     prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
00363     while 1:
00364         reply = raw_input(prompt).strip().lower()
00365         if reply in ['y', 'ye', 'yes']:
00366             return 1
00367         if reply in ['', 'n', 'no', 'nop', 'nope']:
00368             return 0
00369         print('Please answer yes or no.')
00370 
00371 # Create a directory if it doesn't exist.  Recursively create the
00372 # parent directory as well if needed.
00373 def makedir(pathname):
00374     if os.path.isdir(pathname):
00375         return
00376     dirname = os.path.dirname(pathname)
00377     if dirname: makedir(dirname)
00378     os.mkdir(pathname, 0o777)
00379 
00380 # Write a dictionary to a file in a way that can be read back using
00381 # rval() but is still somewhat readable (i.e. not a single long line).
00382 # Also creates a backup file.
00383 def writedict(dict, filename):
00384     dir, fname = os.path.split(filename)
00385     tempname = os.path.join(dir, '@' + fname)
00386     backup = os.path.join(dir, fname + '~')
00387     try:
00388         os.unlink(backup)
00389     except os.error:
00390         pass
00391     fp = open(tempname, 'w')
00392     fp.write('{\n')
00393     for key, value in dict.items():
00394         fp.write('%r: %r,\n' % (key, value))
00395     fp.write('}\n')
00396     fp.close()
00397     try:
00398         os.rename(filename, backup)
00399     except os.error:
00400         pass
00401     os.rename(tempname, filename)
00402 
00403 
00404 if __name__ == '__main__':
00405     main()