Back to index

obnam  1.1
sftp_plugin.py
Go to the documentation of this file.
00001 # Copyright (C) 2009  Lars Wirzenius <liw@liw.fi>
00002 #
00003 # This program is free software; you can redistribute it and/or modify
00004 # it under the terms of the GNU General Public License as published by
00005 # the Free Software Foundation; either version 2 of the License, or
00006 # (at your option) any later version.
00007 #
00008 # This program is distributed in the hope that it will be useful,
00009 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00010 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011 # GNU General Public License for more details.
00012 #
00013 # You should have received a copy of the GNU General Public License along
00014 # with this program; if not, write to the Free Software Foundation, Inc.,
00015 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00016 
00017 
00018 import errno
00019 import hashlib
00020 import logging
00021 import os
00022 import pwd
00023 import random
00024 import socket
00025 import stat
00026 import subprocess
00027 import time
00028 import urlparse
00029 
00030 # As of 2010-07-10, Debian's paramiko package triggers
00031 # RandomPool_DeprecationWarning. This will eventually be fixed. Until
00032 # then, there is no point in spewing the warning to the user, who can't
00033 # do nothing.
00034 # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=586925
00035 import warnings
00036 with warnings.catch_warnings():
00037     warnings.simplefilter('ignore')
00038     import paramiko
00039 
00040 import obnamlib
00041 
00042 
00043 DEFAULT_SSH_PORT = 22
00044 
00045 
00046 def ioerror_to_oserror(method):
00047     '''Decorator to convert an IOError exception to OSError.
00048     
00049     Python's os.* raise OSError, mostly, but paramiko's corresponding
00050     methods raise IOError. This decorator fixes that.
00051     
00052     '''
00053     
00054     def helper(self, filename, *args, **kwargs):
00055         try:
00056             return method(self, filename, *args, **kwargs)
00057         except IOError, e:
00058             raise OSError(e.errno, e.strerror or str(e), filename)
00059     
00060     return helper
00061 
00062 
00063 class SSHChannelAdapter(object):
00064 
00065     '''Take an ssh subprocess and pretend it is a paramiko Channel.'''
00066     
00067     # This is inspired by the ssh.py module in bzrlib.
00068 
00069     def __init__(self, proc):
00070         self.proc = proc
00071 
00072     def send(self, data):
00073         return os.write(self.proc.stdin.fileno(), data)
00074 
00075     def recv(self, count):
00076         try:
00077             return os.read(self.proc.stdout.fileno(), count)
00078         except socket.error, e:
00079             if e.args[0] in (errno.EPIPE, errno.ECONNRESET, errno.ECONNABORTED,
00080                              errno.EBADF):
00081                 # Connection has closed.  Paramiko expects an empty string in
00082                 # this case, not an exception.
00083                 return ''
00084             raise
00085 
00086     def get_name(self):
00087         return 'obnam SSHChannelAdapter'
00088 
00089     def close(self):
00090         logging.debug('SSHChannelAdapter.close called')
00091         for func in [self.proc.stdin.close, self.proc.stdout.close, 
00092                      self.proc.wait]:
00093             try:
00094                 func()
00095             except OSError:
00096                 pass
00097 
00098 
00099 class SftpFS(obnamlib.VirtualFileSystem):
00100 
00101     '''A VFS implementation for SFTP.
00102     
00103     
00104     
00105     '''
00106     
00107     # 32 KiB is the chunk size that gives me the fastest speed
00108     # for sftp transfers. I don't know why the size matters.
00109     chunk_size = 32 * 1024
00110 
00111     def __init__(self, baseurl, create=False, settings=None):
00112         obnamlib.VirtualFileSystem.__init__(self, baseurl)
00113         self.sftp = None
00114         self.settings = settings
00115         self._roundtrips = 0
00116         self.reinit(baseurl, create=create)
00117 
00118     def _delay(self):
00119         self._roundtrips += 1
00120         if self.settings:
00121             ms = self.settings['sftp-delay']
00122             if ms > 0:
00123                 time.sleep(ms * 0.001)
00124 
00125     def log_stats(self):
00126         obnamlib.VirtualFileSystem.log_stats(self)
00127         logging.info('VFS: baseurl=%s roundtrips=%s' %
00128                          (self.baseurl, self._roundtrips))
00129         
00130     def _to_string(self, str_or_unicode):
00131         if type(str_or_unicode) is unicode:
00132             return str_or_unicode.encode('utf-8')
00133         else:
00134             return str_or_unicode
00135 
00136     def _create_root_if_missing(self):
00137         try:
00138             self.mkdir(self.path)
00139         except OSError, e:
00140             # sftp/paramiko does not give us a useful errno so we hope
00141             # for the best
00142             pass
00143         self.create_path_if_missing = False # only create once
00144         
00145     def connect(self):
00146         try_openssh = not self.settings or not self.settings['pure-paramiko']
00147         if not try_openssh or not self._connect_openssh():
00148             self._connect_paramiko()
00149         if self.create_path_if_missing:
00150             self._create_root_if_missing()
00151         self.chdir(self.path)
00152 
00153     def _connect_openssh(self):
00154         args = ['ssh',
00155                 '-oForwardX11=no', '-oForwardAgent=no',
00156                 '-oClearAllForwardings=yes', '-oProtocol=2',
00157                 '-p', str(self.port),
00158                 '-l', self.user,
00159                 '-s']
00160         if self.settings and self.settings['ssh-key']:
00161             args += ['-i', self.settings['ssh-key']]
00162         if self.settings and self.settings['strict-ssh-host-keys']:
00163             args += ['-o', 'StrictHostKeyChecking=yes']
00164         if self.settings and self.settings['ssh-known-hosts']:
00165             args += ['-o', 
00166                      'UserKnownHostsFile=%s' % 
00167                         self.settings['ssh-known-hosts']]
00168         args += [self.host, 'sftp']
00169 
00170         logging.debug('executing openssh: %s' % args)
00171         try:
00172             proc = subprocess.Popen(args,
00173                                     stdin=subprocess.PIPE,
00174                                     stdout=subprocess.PIPE,
00175                                     close_fds=True)
00176         except OSError:
00177             return False
00178 
00179         self.transport = None
00180         self.sftp = paramiko.SFTPClient(SSHChannelAdapter(proc))
00181         return True
00182 
00183     def _connect_paramiko(self):
00184         logging.debug('connect_paramiko: host=%s port=%s' % (self.host, self.port))
00185         self.transport = paramiko.Transport((self.host, self.port))
00186         self.transport.connect()
00187         logging.debug('connect_paramiko: connected')
00188         try:
00189             self._check_host_key(self.host)
00190         except BaseException, e:
00191             self.transport.close()
00192             self.transport = None
00193             raise
00194         logging.debug('connect_paramiko: host key checked')
00195         self._authenticate(self.user)
00196         logging.debug('connect_paramiko: authenticated')
00197         self.sftp = paramiko.SFTPClient.from_transport(self.transport)
00198         logging.debug('connect_paramiko: end')
00199 
00200     def _check_host_key(self, hostname):
00201         logging.debug('checking ssh host key for %s' % hostname)
00202 
00203         offered_key = self.transport.get_remote_server_key()
00204 
00205         known_hosts_path = self.settings['ssh-known-hosts']
00206         known_hosts = paramiko.util.load_host_keys(known_hosts_path)
00207 
00208         known_keys = known_hosts.lookup(hostname)
00209         if known_keys is None:
00210             if self.settings['strict-ssh-host-keys']:
00211                 raise obnamlib.Error('No known host key for %s' % hostname)
00212             logging.warning('No known host keys for %s; accepting offered key'
00213                             % hostname)
00214             return
00215 
00216         offered_type = offered_key.get_name()
00217         if not known_keys.has_key(offered_type):
00218             if self.settings['strict-ssh-host-keys']:
00219                 raise obnamlib.Error('No known type %s host key for %s' % 
00220                                      (offered_type, hostname))
00221             logging.warning('No known host key of type %s for %s; accepting '
00222                             'offered key' % (offered_type, hostname))
00223         
00224         known_key = known_keys[offered_type]
00225         if offered_key != known_key:
00226             raise obnamlib.Error('SSH server %s offered wrong public key' %
00227                                  hostname)
00228             
00229         logging.debug('Host key for %s OK' % hostname)        
00230     
00231     def _authenticate(self, username):
00232         for key in self._find_auth_keys():
00233             try:
00234                 self.transport.auth_publickey(username, key)
00235                 return
00236             except paramiko.SSHException:
00237                 pass
00238         raise obnamlib.Error('Can\'t authenticate to SSH server using key.')
00239 
00240     def _find_auth_keys(self):
00241         if self.settings and self.settings['ssh-key']:
00242             return [self._load_from_key_file(self.settings['ssh-key'])]
00243         else:
00244             return self._load_from_agent()
00245 
00246     def _load_from_key_file(self, filename):
00247         try:
00248             key = paramiko.RSAKey.from_private_key_file(filename)
00249         except paramiko.PasswordRequiredException:
00250             password = getpass.getpass('RSA key password for %s: ' %
00251                                         filename)
00252             key = paramiko.RSAKey.from_private_key_file(filename, password)
00253         return key
00254 
00255     def _load_from_agent(self):
00256         agent = paramiko.Agent()
00257         return agent.get_keys()
00258 
00259     def close(self):
00260         logging.debug('SftpFS.close called')
00261         self.sftp.close()
00262         self.sftp = None
00263         if self.transport:
00264             self.transport.close()
00265             self.transport = None
00266         obnamlib.VirtualFileSystem.close(self)
00267         self._delay()
00268 
00269     @ioerror_to_oserror
00270     def reinit(self, baseurl, create=False):
00271         scheme, netloc, path, query, fragment = urlparse.urlsplit(baseurl)
00272 
00273         if scheme != 'sftp':
00274             raise obnamlib.Error('SftpFS used with non-sftp URL: %s' % baseurl)
00275 
00276         if '@' in netloc:
00277             user, netloc = netloc.split('@', 1)
00278         else:
00279             user = self._get_username()
00280 
00281         if ':' in netloc:
00282             host, port = netloc.split(':', 1)
00283             port = int(port)
00284         else:
00285             host = netloc
00286             port = DEFAULT_SSH_PORT
00287 
00288         if path.startswith('/~/'):
00289             path = path[3:]
00290 
00291         self.host = host
00292         self.port = port
00293         self.user = user
00294         self.path = path
00295         self.create_path_if_missing = create
00296 
00297         self._delay()
00298         
00299         if self.sftp:
00300             if create:
00301                 self._create_root_if_missing()
00302             self.sftp.chdir(path)
00303 
00304     def _get_username(self):
00305         return pwd.getpwuid(os.getuid()).pw_name
00306 
00307     def getcwd(self):
00308         self._delay()
00309         return self._to_string(self.sftp.getcwd())
00310 
00311     @ioerror_to_oserror
00312     def chdir(self, pathname):
00313         self._delay()
00314         self.sftp.chdir(pathname)
00315 
00316     @ioerror_to_oserror
00317     def listdir(self, pathname):
00318         self._delay()
00319         return [self._to_string(x) for x in self.sftp.listdir(pathname)]
00320 
00321     def _force_32bit_timestamp(self, timestamp):
00322         if timestamp is None:
00323             return None
00324 
00325         max_int32 = 2**31 - 1 # max positive 32 signed integer value
00326         if timestamp > max_int32:
00327             timestamp -= 2**32
00328             if timestamp > max_int32:
00329                 timestamp = max_int32 # it's too large, need to lose info
00330         return timestamp
00331 
00332     def _fix_stat(self, pathname, st):
00333         # SFTP and/or paramiko fail to return some of the required fields,
00334         # so we add them, using faked data.
00335         defaults = {
00336             'st_blocks': (st.st_size / 512) +
00337                          (1 if st.st_size % 512 else 0),
00338             'st_dev': 0,
00339             'st_ino': int(hashlib.md5(pathname).hexdigest()[:8], 16),
00340             'st_nlink': 1,
00341         }
00342         for name, value in defaults.iteritems():
00343             if not hasattr(st, name):
00344                 setattr(st, name, value)
00345 
00346         # Paramiko seems to deal with unsigned timestamps only, at least
00347         # in version 1.7.6. We therefore force the timestamps into
00348         # a signed 32-bit value. This limits the range, but allows
00349         # timestamps that are negative (before 1970). Once paramiko is
00350         # fixed, this code can be removed.
00351         st.st_mtime_sec = self._force_32bit_timestamp(st.st_mtime)
00352         st.st_atime_sec = self._force_32bit_timestamp(st.st_atime)
00353         
00354         # Within Obnam, we pretend stat results have st_Xtime_sec and
00355         # st_Xtime_nsec, but not st_Xtime. Remove those fields.
00356         del st.st_mtime
00357         del st.st_atime
00358         
00359         # We only get integer timestamps, so set these explicitly to 0.
00360         st.st_mtime_nsec = 0
00361         st.st_atime_nsec = 0
00362 
00363         return st        
00364 
00365     @ioerror_to_oserror
00366     def listdir2(self, pathname):
00367         self._delay()
00368         attrs = self.sftp.listdir_attr(pathname)
00369         pairs = [(self._to_string(st.filename), st) for st in attrs]
00370         fixed = [(name, self._fix_stat(name, st)) for name, st in pairs]
00371         return fixed
00372 
00373     def lock(self, lockname, data):
00374         try:
00375             self.write_file(lockname, data)
00376         except OSError, e:
00377             raise obnamlib.LockFail('Failure get lock %s' % lockname)
00378 
00379     def unlock(self, lockname):
00380         self._remove_if_exists(lockname)
00381 
00382     def exists(self, pathname):
00383         try:
00384             self.lstat(pathname)
00385         except OSError:
00386             return False
00387         else:
00388             return True
00389 
00390     def isdir(self, pathname):
00391         self._delay()
00392         try:
00393             st = self.lstat(pathname)
00394         except OSError:
00395             return False
00396         else:
00397             return stat.S_ISDIR(st.st_mode)
00398 
00399     def mknod(self, pathname, mode):
00400         # SFTP does not provide an mknod, so we can't do this. We 
00401         # raise an exception, so upper layers can handle this (we _could_
00402         # just fail silently, but that would be silly.)
00403         raise NotImplementedError('mknod on SFTP: %s' % pathname)
00404 
00405     @ioerror_to_oserror
00406     def mkdir(self, pathname):
00407         self._delay()
00408         self.sftp.mkdir(pathname)
00409         
00410     @ioerror_to_oserror
00411     def makedirs(self, pathname):
00412         parent = os.path.dirname(pathname)
00413         if parent and parent != pathname and not self.exists(parent):
00414             self.makedirs(parent)
00415         self.mkdir(pathname)
00416 
00417     @ioerror_to_oserror
00418     def rmdir(self, pathname):
00419         self._delay()
00420         self.sftp.rmdir(pathname)
00421         
00422     @ioerror_to_oserror
00423     def remove(self, pathname):
00424         self._delay()
00425         self.sftp.remove(pathname)
00426 
00427     def _remove_if_exists(self, pathname):
00428         '''Like remove, but OK if file does not exist.'''
00429         try:
00430             self.remove(pathname)
00431         except OSError, e:
00432             if e.errno != errno.ENOENT:
00433                 raise
00434 
00435     @ioerror_to_oserror
00436     def rename(self, old, new):
00437         self._delay()
00438         self._remove_if_exists(new)
00439         self.sftp.rename(old, new)
00440     
00441     @ioerror_to_oserror
00442     def lstat(self, pathname):
00443         self._delay()
00444         st = self.sftp.lstat(pathname)
00445         self._fix_stat(pathname, st)
00446         return st
00447 
00448     @ioerror_to_oserror
00449     def lchown(self, pathname, uid, gid):
00450         self._delay()
00451         if stat.S_ISLNK(self.lstat(pathname).st_mode):
00452             logging.warning('NOT changing ownership of symlink %s' % pathname)
00453         else:
00454             self.sftp.chown(pathname, uid, gid)
00455         
00456     @ioerror_to_oserror
00457     def chmod(self, pathname, mode):
00458         self._delay()
00459         self.sftp.chmod(pathname, mode)
00460         
00461     @ioerror_to_oserror
00462     def lutimes(self, pathname, atime_sec, atime_nsec, mtime_sec, mtime_nsec):
00463         # FIXME: This does not work for symlinks!
00464         # Sftp does not have a way of doing that. This means if the restore
00465         # target is over sftp, symlinks and their targets will have wrong
00466         # mtimes.
00467         self._delay()
00468         if getattr(self, 'lutimes_warned', False):
00469             logging.warning('lutimes used over SFTP, this does not work '
00470                             'against symlinks (warning appears only first '
00471                             'time)')
00472             self.lutimes_warned = True
00473         self.sftp.utime(pathname, (atime_sec, mtime_sec))
00474 
00475     def link(self, existing_path, new_path):
00476         raise obnamlib.Error('Cannot hardlink on SFTP. Sorry.')
00477 
00478     def readlink(self, symlink):
00479         self._delay()
00480         return self._to_string(self.sftp.readlink(symlink))
00481 
00482     @ioerror_to_oserror
00483     def symlink(self, source, destination):
00484         self._delay()
00485         self.sftp.symlink(source, destination)
00486 
00487     def open(self, pathname, mode, bufsize=-1):
00488         self._delay()
00489         return self.sftp.file(pathname, mode, bufsize=bufsize)
00490 
00491     def cat(self, pathname):
00492         self._delay()
00493         f = self.open(pathname, 'r')
00494         f.prefetch()
00495         chunks = []
00496         while True:
00497             chunk = f.read(self.chunk_size)
00498             if not chunk:
00499                 break
00500             chunks.append(chunk)
00501             self.bytes_read += len(chunk)
00502         f.close()
00503         return ''.join(chunks)
00504 
00505     @ioerror_to_oserror
00506     def write_file(self, pathname, contents):
00507         try:
00508             f = self.open(pathname, 'wx')
00509         except (IOError, OSError), e:
00510             if e.errno != errno.ENOENT:
00511                 raise
00512             dirname = os.path.dirname(pathname)
00513             self.makedirs(dirname)
00514             f = self.open(pathname, 'wx')
00515 
00516         self._write_helper(f, contents)
00517         f.close()
00518 
00519     def _tempfile(self, dirname):
00520         '''Create a new file with a random name, return file handle and name.'''
00521         
00522         if dirname:
00523             try:
00524                 self.makedirs(dirname)
00525             except OSError:
00526                 # We ignore the error, on the assumption that it was due
00527                 # to the directory already existing. If it didn't exist
00528                 # and the error was for something else, then we'll catch
00529                 # that when we open the file for writing.
00530                 pass
00531 
00532         while True:
00533             i = random.randint(0, 2**64-1)
00534             basename = 'tmp.%x' % i
00535             pathname = os.path.join(dirname, basename)
00536             try:
00537                 f = self.open(pathname, 'wx', bufsize=self.chunk_size)
00538             except OSError:
00539                 pass
00540             else:
00541                 return f, pathname
00542 
00543     @ioerror_to_oserror
00544     def overwrite_file(self, pathname, contents):
00545         self._delay()
00546         dirname = os.path.dirname(pathname)
00547         f, tempname = self._tempfile(dirname)
00548         self._write_helper(f, contents)
00549         f.close()
00550         self.rename(tempname, pathname)
00551         
00552     def _write_helper(self, f, contents):
00553         for pos in range(0, len(contents), self.chunk_size):
00554             chunk = contents[pos:pos + self.chunk_size]
00555             f.write(chunk)
00556             self.bytes_written += len(chunk)
00557 
00558 
00559 class SftpPlugin(obnamlib.ObnamPlugin):
00560 
00561     def enable(self):
00562         ssh_group = obnamlib.option_group['ssh'] = 'SSH/SFTP'
00563         devel_group = obnamlib.option_group['devel']
00564 
00565         self.app.settings.integer(['sftp-delay'],
00566                                   'add an artificial delay (in milliseconds) '
00567                                     'to all SFTP transfers',
00568                                   group=devel_group)
00569 
00570         self.app.settings.string(['ssh-key'],
00571                                  'use FILENAME as the ssh RSA private key for '
00572                                     'sftp access (default is using keys known '
00573                                     'to ssh-agent)',
00574                                  metavar='FILENAME',
00575                                  group=ssh_group)
00576 
00577         self.app.settings.boolean(['strict-ssh-host-keys'],
00578                                   'require that the ssh host key must be '
00579                                     'known and correct to be accepted; '
00580                                     'default is to accept unknown keys',
00581                                   group=ssh_group)
00582 
00583         self.app.settings.string(['ssh-known-hosts'],
00584                                  'filename of the user\'s known hosts file '
00585                                     '(default: %default)',
00586                                  metavar='FILENAME',
00587                                  default=
00588                                     os.path.expanduser('~/.ssh/known_hosts'),
00589                                  group=ssh_group)
00590 
00591         self.app.settings.boolean(['pure-paramiko'],
00592                                  'do not use openssh even if available, '
00593                                     'use paramiko only instead',
00594                                   group=ssh_group)
00595 
00596         self.app.fsf.register('sftp', SftpFS, settings=self.app.settings)
00597