Back to index

moin  1.9.0~rc2
text_csv.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     MoinMoin - Parser for CSV data
00004 
00005     This parser uses the databrowser widget to display the data.
00006 
00007     It supports the following parser arguments:
00008 
00009      * delimiter/separator: the delimiter to use instead of ;
00010      * quotechar: quoting character, default off, must be ascii!
00011      * show: comma-separated list of columns to show only
00012      * hide: comma-separated list of columns to hide
00013      * autofilter: comma-separated list of columns to equip with
00014                    auto-filter drop down
00015      * name: name of the dataset
00016      * link: comma separated list of columns that take links, separate
00017              the link and the description with a space
00018      * static_cols: comma-separated list of columns that are static
00019                     and present in each row
00020      * static_vals: comma-separated list of values for those static
00021                     columns
00022 
00023     The static column feature is only really useful if the dataset
00024     postprocessed by some other plugin collecting data from multiple
00025     wiki pages.
00026 
00027     @copyright: 2007, 2008 Johannes Berg <johannes@sipsolutions.net>
00028     @license: GNU GPL, see COPYING for details.
00029 """
00030 
00031 from csv import reader, QUOTE_NONE, QUOTE_MINIMAL, Sniffer
00032 from _csv import Error
00033 
00034 from MoinMoin.util.dataset import TupleDataset, Column
00035 from MoinMoin.widget.browser import DataBrowserWidget
00036 from MoinMoin.wikiutil import escape
00037 
00038 
00039 Dependencies = ['time']
00040 
00041 class Parser:
00042     extensions = ['.csv']
00043     Dependencies = []
00044 
00045     def _read_rows(self, r):
00046         if self._first_row is not None:
00047             yield self._first_row
00048         for row in r:
00049             yield row
00050 
00051     def __init__(self, raw, request, **kw):
00052         self.request = request
00053         self._first_row = None
00054         formatter = request.formatter
00055 
00056         # workaround csv.reader deficiency by encoding to utf-8
00057         # removes empty lines in front of the csv table
00058         data = raw.encode('utf-8').lstrip('\n').split('\n')
00059 
00060         delimiter = ';'
00061         # Previous versions of this parser have used only the delimiter ";" (by default).
00062         # This version now tries to sniff the delimiter from the list preferred_delimiters
00063         # Although the Python csv sniffer had quite some changes from py 2.3 to 2.5.1, we try
00064         # to avoid problems for the case it does not find a delimiter in some given data.
00065         # Newer versions of the sniffer do raise an _csv.Error while older versions do
00066         # return a whitespace as delimiter.
00067         if data[0]:
00068             try:
00069                 preferred_delimiters = [',', '\t', ';', ' ', ':']
00070                 delimiter = Sniffer().sniff(data[0], preferred_delimiters).delimiter or ';'
00071             except Error:
00072                 pass
00073 
00074         visible = None
00075         hiddenindexes = []
00076         hiddencols = []
00077         autofiltercols = []
00078         staticcols = []
00079         staticvals = []
00080         linkcols = []
00081         quotechar = '\x00' # can't be entered
00082         quoting = QUOTE_NONE
00083         name = None
00084         hdr = reader([kw.get('format_args', '').strip().encode('utf-8')], delimiter=" ")
00085         args = hdr.next()
00086 
00087         for arg in args:
00088             arg = arg.decode('utf-8')
00089             try:
00090                 key, val = arg.split('=', 1)
00091             except:
00092                 # handle compatibility with original 'csv' parser
00093                 if arg.startswith('-'):
00094                     try:
00095                         hiddenindexes.append(int(arg[1:]) - 1)
00096                     except ValueError:
00097                         pass
00098                 else:
00099                     delimiter = arg.encode('utf-8')
00100                 continue
00101             if key == 'separator' or key == 'delimiter':
00102                 delimiter = val.encode('utf-8')
00103             if key == 'quotechar':
00104                 if val == val.encode('utf-8'):
00105                     quotechar = val.encode('utf-8')
00106                     quoting = QUOTE_MINIMAL
00107             elif key == 'show':
00108                 visible = val.split(',')
00109             elif key == 'hide':
00110                 hiddencols = val.split(',')
00111             elif key == 'autofilter':
00112                 autofiltercols = val.split(',')
00113             elif key == 'name':
00114                 name = val
00115             elif key == 'static_cols':
00116                 staticcols = val.split(',')
00117             elif key == 'static_vals':
00118                 staticvals = val.split(',')
00119             elif key == 'link':
00120                 linkcols = val.split(',')
00121 
00122         if len(staticcols) > len(staticvals):
00123             staticvals.extend([''] * (len(staticcols)-len(staticvals)))
00124         elif len(staticcols) < len(staticvals):
00125             staticvals = staticvals[:len(staticcols)]
00126 
00127         r = reader(data, delimiter=delimiter, quotechar=quotechar, quoting=quoting)
00128         cols = map(lambda x: x.decode('utf-8'), r.next()) + staticcols
00129 
00130         self._show_header = True
00131 
00132         if cols == staticcols:
00133             try:
00134                 self._first_row = map(lambda x: x.decode('utf-8'), r.next())
00135                 cols = [None] * len(self._first_row) + staticcols
00136                 self._show_header = False
00137             except StopIteration:
00138                 pass
00139 
00140         num_entry_cols = len(cols) - len(staticcols)
00141 
00142         if not visible is None:
00143             for col in cols:
00144                 if not col in visible:
00145                     hiddencols.append(col)
00146 
00147         linkparse = [False] * len(cols)
00148 
00149         data = TupleDataset(name)
00150         for colidx in range(len(cols)):
00151             col = cols[colidx]
00152             autofilter = col in autofiltercols
00153             hidden = col in hiddencols or colidx in hiddenindexes
00154             data.columns.append(Column(col, autofilter=autofilter, hidden=hidden))
00155 
00156             linkparse[colidx] = col in linkcols
00157 
00158         for row in self._read_rows(r):
00159             row = map(lambda x: x.decode('utf-8'), row)
00160             if len(row) > num_entry_cols:
00161                 row = row[:num_entry_cols]
00162             elif len(row) < num_entry_cols:
00163                 row.extend([''] * (num_entry_cols-len(row)))
00164             row += staticvals
00165             for colidx in range(len(row)):
00166                 item = row[colidx]
00167                 if linkparse[colidx]:
00168                     try:
00169                         url, item = item.split(' ', 1)
00170                         if url == '':
00171                             display = escape(item)
00172                         else:
00173                             display = ''.join([
00174                                 formatter.url(1, url=url),
00175                                 formatter.text(item),
00176                                 formatter.url(0)])
00177                     except ValueError:
00178                         display = escape(item)
00179                 else:
00180                     display = escape(item)
00181                 row[colidx] = (display, item)
00182             data.addRow(tuple(row))
00183         self.data = data
00184 
00185     def format(self, formatter):
00186         browser = DataBrowserWidget(self.request, show_header=self._show_header)
00187         browser.setData(self.data)
00188         self.request.write(browser.render(method="GET"))