Back to index

texmacs  1.0.7.15
line_breaker.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : line_breaker.cpp
00004 * DESCRIPTION: Line breaking facility for paragraphs
00005 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license version 3 or later.
00008 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00009 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00010 ******************************************************************************/
00011 
00012 #include "Boxes/construct.hpp"
00013 #include "Format/line_item.hpp"
00014 #define PEN DI
00015 
00016 /******************************************************************************
00017 * Information about the best line breaks
00018 ******************************************************************************/
00019 
00020 struct lb_info_rep: concrete_struct {
00021   path prev;
00022   int  pen;
00023   PEN  pen_spc;
00024 
00025   lb_info_rep (): prev (), pen (HYPH_INVALID), pen_spc ((PEN) 1000000000) {}
00026 };
00027 
00028 struct lb_info {
00029   CONCRETE(lb_info);
00030   lb_info () { rep= tm_new<lb_info_rep> (); }
00031   operator tree () {
00032     return tuple ((tree) rep->prev,
00033                 as_string (rep->pen),
00034                 as_string ((double) rep->pen_spc)); }
00035 };
00036 CONCRETE_CODE(lb_info);
00037 
00038 tm_ostream&
00039 operator << (tm_ostream& out, lb_info hi) {
00040   return out << "[ " << hi->prev << ", "
00041             << hi->pen << ", " << hi->pen_spc << " ]";
00042 }
00043 
00044 // FIXME: explicit instanciation for broken g++
00045 #include "hashmap.hpp"
00046 template lb_info& hashmap_rep<list<int>, lb_info>::bracket_rw(list<int>);
00047 
00048 /******************************************************************************
00049 * The line_breaker class
00050 ******************************************************************************/
00051 
00052 struct line_breaker_rep {
00053   array<line_item> a;
00054   int start;
00055   int end;
00056   SI  line_width;
00057   SI  first_spc;
00058   SI  last_spc;
00059   int pass;
00060   hashmap<path,lb_info> best;
00061 
00062   line_breaker_rep (array<line_item> a, int start, int end,
00063                   SI line_width, SI first_spc, SI last_spc);
00064 
00065   void empty_line_fix (line_item& first, path& pos, int& cur_nr);
00066   path next_ragged_break (path pos);
00067   array<path> compute_ragged_breaks ();
00068 
00069   void test_better (path new_pos, path old_pos, int penalty, PEN pen_spc);
00070   bool propose_break (path new_pos, path old_pos, int penalty, space spc);
00071   void break_string (line_item item, path pos, int i, space spc);
00072   void process (path pos);
00073   void get_breaks (array<path>& ap, path p);
00074   array<path> compute_breaks ();
00075 };
00076 
00077 line_breaker_rep::line_breaker_rep (
00078   array<line_item> a2, int start2, int end2,
00079   SI line_width2, SI first_spc2, SI last_spc2):
00080     a (a2), start (start2), end (end2),
00081     line_width (line_width2), first_spc (first_spc2), last_spc (last_spc2),
00082     best (lb_info ()) {}
00083 
00084 /******************************************************************************
00085 * Some subroutines
00086 ******************************************************************************/
00087 
00088 static int
00089 get_position (font fn, string s, SI x) {
00090   int prev_i, prev_x=0, i=0, n=N(s);
00091   STACK_NEW_ARRAY (xpos, SI, n+1);
00092   fn->get_xpositions (s, xpos);
00093   while (i<n) {
00094     prev_i= i;
00095     if (s[i]=='<') {
00096       while ((i<n) && (s[i]!='>')) i++;
00097       if (i<n) i++;
00098     }
00099     else i++;
00100     int m= (prev_x + xpos[i]) >> 1;
00101     if (x<m) {
00102       STACK_DELETE_ARRAY (xpos);
00103       return prev_i;
00104     }
00105     prev_x= xpos[i];
00106   }
00107   STACK_DELETE_ARRAY (xpos);
00108   return i;
00109 }
00110 
00111 /*static*/ void
00112 hyphenate (line_item item, int pos, line_item& item1, line_item& item2) {
00113   path ip= item->b->ip;
00114   int  x1= is_accessible (ip)? item->b->get_leaf_left_pos (): 0;
00115   int  x2= is_accessible (ip)? x1+ pos+ 1: 0;
00116 
00117   box b= item->b;
00118   string s  = b->get_leaf_string ();
00119   font   fn = b->get_leaf_font ();
00120   color  col= b->get_leaf_color ();
00121 
00122   string s1, s2;
00123   array<int> hp= item->lan->get_hyphens (s);
00124   item->lan->hyphenate (s, pos, s1, s2);
00125   item1= line_item (STRING_ITEM, OP_SKIP,
00126                   shorter_box (ip, text_box (ip, x1, s1, fn, col), pos+ 1),
00127                   hp[pos], item->lan);
00128   item2= line_item (STRING_ITEM, item->op_type,
00129                     text_box (ip, x2, s2, fn, col),
00130                   item->penalty, item->lan);
00131   item2->spc= item->spc;
00132   // cout << s << " ---> " << s1 << " " << s2 << "\n";
00133 }
00134 
00135 /******************************************************************************
00136 * Naive line breaking for ragged paragraph types
00137 ******************************************************************************/
00138 
00139 void
00140 line_breaker_rep::empty_line_fix (line_item& first, path& pos, int& cur_nr) {
00141   // Fix for avoiding lines with only empty boxes
00142   int i;
00143   SI tot_spc= 0;  
00144   for (i= pos->item; i<end && i<=cur_nr; i++) {
00145     line_item cur_item = (i==pos->item? first: a[i]);
00146     tot_spc += cur_item->b->w() + cur_item->spc->def;
00147     if (tot_spc != 0) {
00148       if (i == cur_nr) {
00149        if (cur_item->spc->def != 0) return;
00150        i++;
00151       }
00152       else i= cur_nr;
00153       break;
00154     }
00155   }
00156   cur_nr= i;
00157   while (cur_nr<end) {
00158     line_item cur_item = (cur_nr-1==pos->item? first: a[cur_nr-1]);
00159     if (cur_item->spc->def != 0) break;
00160     cur_item= (cur_nr==pos->item? first: a[cur_nr]);
00161     if (cur_item->b->w()) break;
00162     cur_nr++;
00163   }
00164 }
00165 
00166 path
00167 line_breaker_rep::next_ragged_break (path pos) {
00168   int       cur_nr  = pos->item;
00169   line_item cur_item= a[cur_nr];
00170   SI        cur_spc;
00171 
00172   if (pos == path (start)) cur_spc= first_spc+ cur_item->b->w();
00173   else {
00174     path p= pos;
00175     while (!is_atom (p)) {
00176       line_item item1, item2;
00177       p= p->next;
00178       hyphenate (cur_item, p->item, item1, item2);
00179       cur_item= item2;
00180     }
00181     cur_spc= cur_item->b->w();
00182   }
00183   
00184   line_item first= cur_item;
00185   while (true) {
00186     cur_spc += cur_item->spc->def;
00187     if ((++cur_nr)==end) break;
00188     cur_item = a[cur_nr];
00189     cur_spc += cur_item->b->w();
00190     if (cur_spc > line_width) {
00191       // cout << "Overfull " << cur_spc << ", " << line_width << "\n";
00192       break;
00193     }
00194   }
00195 
00196   while (true) {
00197     if (cur_nr<end) {
00198       cur_spc -= cur_item->b->w();
00199       if ((cur_spc <= line_width) &&
00200          (cur_item->type==STRING_ITEM)) {
00201        string s= cur_item->b->get_leaf_string ();
00202        if (N(s)>4) {
00203          array<int> hp= cur_item->lan->get_hyphens (s);
00204          int i= get_position (cur_item->b->get_leaf_font (),
00205                             s, line_width- cur_spc);
00206          for (i= min (i+1, N(hp)-1); i>=0; i--)
00207            if (hp[i] < HYPH_INVALID) {
00208              line_item item1, item2;
00209              hyphenate (cur_item, i, item1, item2);
00210              if (cur_spc+item1->b->w() <= line_width)
00211               return (cur_nr>pos->item)?
00212                 path (cur_nr, i): pos * i;
00213            }
00214        }
00215       }
00216     }
00217     if ((--cur_nr)<pos->item) {
00218       do cur_nr++;
00219       while ((cur_nr<end) && (a[cur_nr]->penalty >= HYPH_INVALID));
00220       if (cur_nr<end) cur_nr++;
00221       empty_line_fix (first, pos, cur_nr);
00222       return path (cur_nr);
00223     }
00224     cur_item = (cur_nr==pos->item? first: a[cur_nr]);
00225     cur_spc -= cur_item->spc->def;
00226     if ((cur_spc <= line_width) &&
00227        ((cur_item->penalty < HYPH_INVALID) || (cur_nr==end-1))) {
00228       cur_nr++;
00229       empty_line_fix (first, pos, cur_nr);
00230       return path (cur_nr);
00231     }
00232   }
00233 }
00234 
00235 /******************************************************************************
00236 * Computation of the ragged line breaks
00237 ******************************************************************************/
00238 
00239 array<path>
00240 line_breaker_rep::compute_ragged_breaks () {
00241   array<path> ap;
00242   path p (start);
00243   while (p != path (end)) {
00244     ap << p;
00245     p= next_ragged_break (p);
00246   }
00247   ap << p;
00248   return ap;
00249 }
00250 
00251 /******************************************************************************
00252 * Test whether we found a better break
00253 ******************************************************************************/
00254 
00255 void
00256 line_breaker_rep::test_better (path new_pos, path old_pos,
00257                             int pen, PEN pen_spc)
00258 {
00259   if (!best->contains (new_pos)) best (new_pos)= lb_info ();
00260   lb_info cur= best [new_pos];
00261   //cout << "Test " << new_pos << " vs " << old_pos
00262   //     << ", " << pen << " vs " << cur->pen
00263   //     << ", " << pen_spc << " vs " << cur->pen_spc << "\n";
00264   if ((pen < cur->pen) ||
00265       ((pen == cur->pen) && (pen_spc < cur->pen_spc))) {
00266     cur->prev   = old_pos;
00267     cur->pen    = pen;
00268     cur->pen_spc= min (pen_spc, (PEN) 1000000000);
00269     //cout << "  Better\n";
00270   }
00271 }
00272 
00273 inline PEN square (PEN i) { return i*i; }
00274 
00275 bool
00276 line_breaker_rep::propose_break (path new_pos, path old_pos,
00277                              int pen, space spc)
00278 {
00279   lb_info cur= best[old_pos];
00280 
00281   if ((spc->min <= line_width) &&
00282       ((spc->max >= line_width) || (new_pos->item==end))) {
00283     SI d= max (line_width- spc->def, spc->def- line_width);
00284     if (new_pos->item==end) d=0;
00285     test_better (new_pos, old_pos, min (HYPH_INVALID, cur->pen + pen),
00286                cur->pen_spc + (cur->pen == HYPH_INVALID?
00287                                  ((PEN) 0): square ((PEN) (d / PIXEL))));
00288   }
00289   
00290   if (pass==2) {
00291     if (spc->max < line_width)
00292       test_better (new_pos, old_pos, HYPH_INVALID,
00293                  (cur->pen == HYPH_INVALID? cur->pen_spc: ((PEN) 0)) +
00294                  square ((PEN) ((line_width - spc->max)/PIXEL)) +
00295                  (new_pos->item==old_pos->item?
00296                   square ((PEN) (line_width / PIXEL)): ((PEN) 0)));
00297     if (spc->min > line_width)
00298       test_better (new_pos, old_pos, HYPH_INVALID,
00299                  (cur->pen == HYPH_INVALID? cur->pen_spc: ((PEN) 0)) +
00300                  square ((PEN) ((spc->min - line_width) / PIXEL)) +
00301                  square ((PEN) (4*line_width / PIXEL)));
00302   }
00303   
00304   return spc->min > line_width;
00305 }
00306 
00307 /******************************************************************************
00308 * Fill up a line starting from pos
00309 ******************************************************************************/
00310 
00311 void
00312 line_breaker_rep::break_string (line_item item, path pos, int i, space spc) {
00313   int j;
00314   string item_s= item->b->get_leaf_string ();
00315   array<int> hp= item->lan->get_hyphens (item_s);
00316 
00317   if ((item->b->w() > line_width) || (!is_atom (pos))) {
00318     j= get_position (item->b->get_leaf_font (), item_s, line_width- spc->def);
00319     for (j= min (j+2, N(hp)-1); j>=0; j--)
00320       if (hp[j] < HYPH_INVALID) {
00321        line_item item1, item2;
00322        hyphenate (item, j, item1, item2);
00323        path next= (i==pos->item)? pos * j: path (i, j);
00324        space spc_hyph= spc+ space (item1->b->w());
00325        if (spc_hyph->min <= line_width) {
00326          propose_break (next, pos, hp[j], spc_hyph->min);
00327          break;
00328        }
00329       }
00330   }
00331   else {
00332     for (j=0; j<N(hp); j++)
00333       if (hp[j] < HYPH_INVALID) {
00334        line_item item1, item2;
00335        hyphenate (item, j, item1, item2);
00336        path next= (i==pos->item)? pos * j: path (i, j);
00337        space spc_hyph= spc+ space (item1->b->w());
00338        (void) propose_break (next, pos, hp[j], spc_hyph);
00339       }
00340   }
00341 }
00342 
00343 void
00344 line_breaker_rep::process (path pos) {
00345   int i;
00346   space spc;
00347   line_item first;
00348 
00349   first= a[pos->item];
00350   if (pos == path (start)) spc= space (first_spc+ first->b->w());
00351   else {
00352     path p= pos;
00353     while (!is_atom (p)) {
00354       line_item item1, item2;
00355       p= p->next;
00356       hyphenate (first, p->item, item1, item2);
00357       first= item2;
00358     }
00359     spc= space (first->b->w());
00360   }
00361 
00362   if ((pass>1) || (best[pos]->pen < HYPH_INVALID)) {
00363     // cout << "Process " << pos << ": " << first << "\n";
00364     for (i=pos->item; i<end; i++) {
00365       line_item item= a[i];
00366       if (i == pos->item) item= first;
00367       else spc= spc+ a[i-1]->spc+ space (item->b->w());
00368       if ((spc->max > line_width) &&
00369          (item->type == STRING_ITEM) &&
00370          (N(item->b->get_leaf_string ())>4))
00371        break_string (item, pos, i, spc+ space (-item->b->w()));
00372       if (item->penalty < HYPH_INVALID)
00373        if (propose_break (path (i+1), pos, item->penalty, spc))
00374          break;
00375       if ((item->type == CONTROL_ITEM) &&
00376          (item->t == LINE_BREAK) &&
00377          (spc->min < line_width))
00378        if (propose_break (path (i+1), pos, 0, space (line_width)))
00379          break;
00380     }
00381     if (i==end) {
00382       line_width -= last_spc;
00383       propose_break (path (i), pos, 0, spc);
00384       line_width += last_spc;
00385     }
00386   }
00387 
00388   if (first->type == STRING_ITEM) {
00389     string first_s= first->b->get_leaf_string ();
00390     int n= N(first_s);
00391     if (n>4)
00392       for (i=0; i<n-1; i++)
00393        if (best-> contains (pos * i))
00394          process (pos * i);
00395   }
00396 }
00397 
00398 /******************************************************************************
00399 * Hyphenate an array of line_items
00400 ******************************************************************************/
00401 
00402 void
00403 line_breaker_rep::get_breaks (array<path>& ap, path p) {
00404   if (is_nil (p)) return;
00405   lb_info cur= best[p];
00406   get_breaks (ap, cur->prev);
00407   ap << p;
00408 }
00409 
00410 array<path>
00411 line_breaker_rep::compute_breaks () {
00412   int i;
00413   test_better (path (start), path (), 0, 0);
00414 
00415   pass= 1;
00416   for (i=start; i<end; i++)
00417     process (path (i));
00418 
00419   pass= 2;
00420   if (best [path (end)]->pen == HYPH_INVALID)
00421     for (i=start; i<end; i++)
00422       process (path (i));
00423 
00424   test_better (path (end), path (start), HYPH_INVALID, (PEN) 999999999);
00425 
00426   array<path> ap (0);
00427   get_breaks (ap, path (end));
00428 
00429   // Finish with fix for disallowing last lines with only empty boxes
00430   if (N(ap) <= 2 || !is_atom (ap[N(ap)-2])) return ap;
00431   for (i= ap[N(ap)-2]->item; i<end; i++)
00432     if (a[i]->b->w() + a[i]->spc->def != 0)
00433       return ap;
00434   ap[N(ap)-2]= ap[N(ap)-1];
00435   ap->resize (N(ap)-1);
00436   return ap;
00437 }
00438 
00439 /******************************************************************************
00440 * The exported line breaking routine
00441 *******************************************************************************
00442 * Given an array of line_items on input,
00443 * we compute an array of best line breaks.
00444 * These breaks are represented as paths:
00445 * the first element indicates a line_item.
00446 * The other elements correspond to successive hyphenations of
00447 * the line_item, if it was a STRING_ITEM.
00448 ******************************************************************************/
00449 
00450 array<path>
00451 line_breaks (array<line_item> a, int start, int end,
00452             SI line_width, SI first_spc, SI last_spc, bool ragged)
00453 {
00454   int tol= 5;         // extra tolerance of 5tmpt avoid rounding errors when
00455   line_width += tol;  // the widths of the boxes sum up to precisely 1par
00456   line_breaker_rep* H=
00457     tm_new<line_breaker_rep> (a, start, end, line_width, first_spc, last_spc);
00458   array<path> ap= ragged? H->compute_ragged_breaks (): H->compute_breaks ();
00459   tm_delete (H);
00460   return ap;
00461 }