1/*
2 *  Rope.java
3 *  Copyright (C) 2007 Amin Ahmad.
4 *
5 *  This file is part of Java Ropes.
6 *
7 *  Java Ropes is free software: you can redistribute it and/or modify
8 *  it under the terms of the GNU General Public License as published by
9 *  the Free Software Foundation, either version 3 of the License, or
10 *  (at your option) any later version.
11 *
12 *  Java Ropes is distributed in the hope that it will be useful,
13 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *  GNU General Public License for more details.
16 *
17 *  You should have received a copy of the GNU General Public License
18 *  along with Java Ropes.  If not, see <http://www.gnu.org/licenses/>.
19 *
20 *  Amin Ahmad can be contacted at amin.ahmad@gmail.com or on the web at
21 *  www.ahmadsoft.org.
22 */
23package org.ahmadsoft.ropes;
24
25import java.io.IOException;
26import java.io.Serializable;
27import java.io.Writer;
28import java.util.Iterator;
29import java.util.regex.Matcher;
30import java.util.regex.Pattern;
31
32
33/**
34 * <p>
35 * A rope represents character strings. Ropes are immutable which
36 * means that once they are created, they cannot be changed. This
37 * makes them suitable for sharing in multi-threaded environments.
38 * </p><p>
39 * Rope operations, unlike string operations, scale well to very
40 * long character strings. Most mutation operations run in O(log n)
41 * time or better. However, random-access character retrieval is
42 * generally slower than for a String. By traversing consecutive
43 * characters with an iterator instead, performance improves to
44 * O(1).
45 * </p><p>
46 * This rope implementation implements all performance optimizations
47 * outlined in "<a href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">Ropes: an Alternative to Strings</a>"
48 * by Hans-J. Boehm, Russ Atkinson and Michael Plass, including,
49 * notably, deferred evaluation of long substrings and automatic
50 * rebalancing.
51 * </p>
52 * <h4>Immutability (a Caveat)</h4>
53 * A rope is immutable. Specifically, calling any mutator function
54 * on a rope always returns a modified copy; the original rope is
55 * left untouched. However, care must be taken to build ropes from
56 * immutable <code>CharSequences</code> such as <code>Strings</code>,
57 * or else from mutable <code>CharSequences</code> that your program
58 * <emph>guarantees will not change</emph>. Failure to do so will result in
59 * logic errors.
60 *
61 * @author Amin Ahmad
62 */
63/*@ pure @*/ public interface Rope extends CharSequence, Iterable<Character>, Comparable<CharSequence>, Serializable {
64
65	/**
66	 * A factory used for constructing ropes.
67	 */
68	RopeBuilder BUILDER = new RopeBuilder();
69
70	/**
71	 * Returns a new rope created by appending the specified character to
72	 * this rope.
73	 * @param c the specified character.
74	 * @return a new rope.
75	 */
76	//@ ensures \result.length() == length() + 1;
77	Rope append(char c);
78
79	/**
80	 * Returns a new rope created by appending the specified character sequence to
81	 * this rope.
82	 * @param suffix the specified suffix.
83	 * @return a new rope.
84	 */
85	//@ requires suffix != null;
86	//@ ensures \result.length() == length() + suffix.length();
87	Rope append(CharSequence suffix);
88
89	/**
90	 * Returns a new rope created by appending the specified character range to
91	 * this rope.
92	 * @param csq the specified character.
93	 * @param start the start index, inclusive.
94	 * @param end the end index, non-inclusive.
95	 * @return a new rope.
96	 */
97    //@ requires start <= end && start > -1 && end <= csq.length();
98	//@ ensures \result.length() == (length() + (end-start));
99	Rope append(CharSequence csq, int start, int end);
100
101	/**
102     * Creats a new rope by delete the specified character substring.
103     * The substring begins at the specified <code>start</code> and extends to
104     * the character at index <code>end - 1</code> or to the end of the
105     * sequence if no such character exists. If
106     * <code>start</code> is equal to <code>end</code>, no changes are made.
107     *
108     * @param      start  The beginning index, inclusive.
109     * @param      end    The ending index, exclusive.
110     * @return     This object.
111     * @throws     StringIndexOutOfBoundsException  if <code>start</code>
112     *             is negative, greater than <code>length()</code>, or
113     *		   greater than <code>end</code>.
114     */
115    //@ requires start <= end && start > -1 && end <= length();
116	//@ ensures \result.length() == (length() - (end-start));
117    Rope delete(int start, int end);
118
119	/**
120	 * Returns the index within this rope of the first occurrence of the
121	 * specified character. If a character with value <code>ch</code> occurs
122	 * in the character sequence represented by this <code>Rope</code>
123	 * object, then the index of the first such occurrence is returned --
124	 * that is, the smallest value k such that:
125	 * <p>
126	 * <code>this.charAt(k) == ch</code>
127	 * <p>
128	 * is <code>true</code>. If no such character occurs in this string, then
129	 * <code>-1</code> is returned.
130	 * @param ch a character.
131	 * @return the index of the first occurrence of the character in the character
132	 * sequence represented by this object, or <code>-1</code> if the character
133	 * does not occur.
134	 */
135    //@ ensures \result >= -1 && \result < length();
136	int indexOf(char ch);
137
138	/**
139	 * Returns the index within this rope of the first occurrence of the
140	 * specified character, beginning at the specified index. If a character
141	 * with value <code>ch</code> occurs in the character sequence
142	 * represented by this <code>Rope</code> object, then the index of the
143	 * first such occurrence is returned&#8212;that is, the smallest value k
144	 * such that:
145	 * <p>
146	 * <code>this.charAt(k) == ch</code>
147	 * <p>
148	 * is <code>true</code>. If no such character occurs in this string, then
149	 * <code>-1</code> is returned.
150	 * @param ch a character.
151	 * @param fromIndex the index to start searching from.
152	 * @return the index of the first occurrence of the character in the character
153	 * sequence represented by this object, or -1 if the character does not occur.
154	 */
155	//@ requires fromIndex > -1 && fromIndex < length();
156    //@ ensures \result >= -1 && \result < length();
157	int indexOf(char ch, int fromIndex);
158
159	/**
160	 * Returns the index within this rope of the first occurrence of the
161	 * specified string. The value returned is the smallest <i>k</i> such
162	 * that:
163	 * <pre>
164	 *     this.startsWith(str, k)
165	 * </pre>
166	 * If no such <i>k</i> exists, then -1 is returned.
167	 * @param sequence the string to find.
168	 * @return the index of the first occurrence of the specified string, or
169	 * -1 if the specified string does not occur.
170	 */
171	//@ requires sequence != null;
172    //@ ensures \result >= -1 && \result < length();
173	int indexOf(CharSequence sequence);
174
175	/**
176	 * Returns the index within this rope of the first occurrence of the
177	 * specified string, beginning at the specified index. The value returned
178	 * is the smallest <i>k</i> such that:
179	 * <pre>
180	 *     k >= fromIndex && this.startsWith(str, k)
181	 * </pre>
182	 * If no such <i>k</i> exists, then -1 is returned.
183	 * @param sequence the string to find.
184	 * @param fromIndex the index to start searching from.
185	 * @return the index of the first occurrence of the specified string, or
186	 * -1 if the specified string does not occur.
187	 */
188	//@ requires sequence != null && fromIndex > -1 && fromIndex < length();
189    //@ ensures \result >= -1 && \result < length();
190	int indexOf(CharSequence sequence, int fromIndex);
191
192	/**
193     * Creates a new rope by inserting the specified <code>CharSequence</code>
194     * into this rope.
195     * <p>
196     * The characters of the <code>CharSequence</code> argument are inserted,
197     * in order, into this rope at the indicated offset.
198     *
199     * <p>If <code>s</code> is <code>null</code>, then the four characters
200     * <code>"null"</code> are inserted into this sequence.
201     *
202     * @param      dstOffset the offset.
203     * @param      s the sequence to be inserted
204     * @return     a reference to the new Rope.
205     * @throws     IndexOutOfBoundsException  if the offset is invalid.
206     */
207	//@ requires dstOffset > -1 && dstOffset <= length();
208    Rope insert(int dstOffset, CharSequence s);
209
210	/**
211     * Returns an iterator positioned to start at the specified index.
212     * @param start the start position.
213     * @return an iterator positioned to start at the specified index.
214     */
215	//@ requires start > -1 && start < length();
216    Iterator<Character> iterator(int start);
217
218	/**
219	 * Trims all whitespace as well as characters less than 0x20 from
220	 * the beginning of this string.
221	 * @return a rope with all leading whitespace trimmed.
222	 */
223	//@ ensures \result.length() <= length();
224	Rope trimStart();
225
226	/**
227     * Creates a matcher that will match this rope against the
228     * specified pattern. This method produces a higher performance
229     * matcher than:
230     * <pre>
231     * Matcher m = pattern.matcher(this);
232     * </pre>
233     * The difference may be asymptotically better in some cases.
234     * @param pattern the pattern to match this rope against.
235     * @return a matcher.
236     */
237	//@ requires pattern != null;
238    Matcher matcher(Pattern pattern);
239
240    /**
241     * Returns <code>true</code> if this rope matches the specified
242     * <code>Pattern</code>, or <code>false</code> otherwise.
243     * @see java.util.regex.Pattern
244     * @param regex the specified regular expression.
245     * @return <code>true</code> if this rope matches the specified
246     * <code>Pattern</code>, or <code>false</code> otherwise.
247     */
248    public boolean matches(Pattern regex);
249
250    /**
251     * Returns <code>true</code> if this rope matches the specified
252     * regular expression, or <code>false</code> otherwise.
253     * @see java.util.regex.Pattern
254     * @param regex the specified regular expression.
255     * @return <code>true</code> if this rope matches the specified
256     * regular expression, or <code>false</code> otherwise.
257     */
258    public boolean matches(String regex);
259
260
261    /**
262     * Rebalances the current rope, returning the rebalanced rope. In general,
263     * rope rebalancing is handled automatically, but this method is provided
264     * to give users more control.
265     *
266     * @return a rebalanced rope.
267     */
268    public Rope rebalance();
269
270    /**
271     * Reverses this rope.
272     * @return a reversed copy of this rope.
273     */
274    public Rope reverse();
275
276    /**
277     * Returns a reverse iterator positioned to start at the end of this
278     * rope. A reverse iterator moves backwards instead of forwards through
279     * a rope.
280     * @return A reverse iterator positioned at the end of this rope.
281     * @see Rope#reverseIterator(int)
282     */
283    Iterator<Character> reverseIterator();
284
285    /**
286     * Returns a reverse iterator positioned to start at the specified index.
287     * A reverse iterator moves backwards instead of forwards through a rope.
288     * @param start the start position.
289     * @return a reverse iterator positioned to start at the specified index from
290     * the end of the rope. For example, a value of 1 indicates the iterator
291     * should start 1 character before the end of the rope.
292     * @see Rope#reverseIterator()
293     */
294    Iterator<Character> reverseIterator(int start);
295
296    /**
297	 * Trims all whitespace as well as characters less than <code>0x20</code> from
298	 * the end of this rope.
299	 * @return a rope with all trailing whitespace trimmed.
300	 */
301	//@ ensures \result.length() <= length();
302	Rope trimEnd();
303
304    @Override
305	Rope subSequence(int start, int end);
306
307    /**
308     * Trims all whitespace as well as characters less than <code>0x20</code> from
309     * the beginning and end of this string.
310     * @return a rope with all leading and trailing whitespace trimmed.
311     */
312	Rope trim();
313
314    /**
315     * Write this rope to a <code>Writer</code>.
316     * @param out the writer object.
317     */
318    public void write(Writer out) throws IOException;
319
320    /**
321     * Write a range of this rope to a <code>Writer</code>.
322     * @param out the writer object.
323     * @param offset the range offset.
324     * @param length the range length.
325     */
326    public void write(Writer out, int offset, int length) throws IOException;
327
328    /**
329     * Increase the length of this rope to the specified length by prepending
330     * spaces to this rope. If the specified length is less than or equal to
331     * the current length of the rope, the rope is returned unmodified.
332     * @param toLength the desired length.
333     * @return the padded rope.
334     * @see #padStart(int, char)
335     */
336    public Rope padStart(int toLength);
337
338    /**
339     * Increase the length of this rope to the specified length by repeatedly
340     * prepending the specified character to this rope. If the specified length
341     * is less than or equal to the current length of the rope, the rope is
342     * returned unmodified.
343     * @param toLength the desired length.
344     * @param padChar the character to use for padding.
345     * @return the padded rope.
346     * @see #padStart(int, char)
347     */
348    public Rope padStart(int toLength, char padChar);
349
350    /**
351     * Increase the length of this rope to the specified length by appending
352     * spaces to this rope. If the specified length is less than or equal to
353     * the current length of the rope, the rope is returned unmodified.
354     * @param toLength the desired length.
355     * @return the padded rope.
356     * @see #padStart(int, char)
357     */
358    public Rope padEnd(int toLength);
359
360    /**
361     * Increase the length of this rope to the specified length by repeatedly
362     * appending the specified character to this rope. If the specified length
363     * is less than or equal to the current length of the rope, the rope is
364     * returned unmodified.
365     * @param toLength the desired length.
366     * @param padChar the character to use for padding.
367     * @return the padded rope.
368     * @see #padStart(int, char)
369     */
370    public Rope padEnd(int toLength, char padChar);
371
372    /**
373     * Returns true if and only if the length of this rope is zero.
374     * @return <code>true</code> if and only if the length of this
375     * rope is zero, and <code>false</code> otherwise.
376     */
377    public boolean isEmpty();
378
379    /**
380     * Returns <code>true</code> if this rope starts with the specified
381     * prefix.
382     * @param prefix the prefix to test.
383     * @return <code>true</code> if this rope starts with the
384     * specified prefix and <code>false</code> otherwise.
385     * @see #startsWith(CharSequence, int)
386     */
387    public boolean startsWith(CharSequence prefix);
388    /**
389     * Returns <code>true</code> if this rope, beginning from a specified
390     * offset, starts with the specified prefix.
391     * @param prefix the prefix to test.
392     * @param offset the start offset.
393     * @return <code>true</code> if this rope starts with the
394     * specified prefix and <code>false</code> otherwise.
395     */
396    public boolean startsWith(CharSequence prefix, int offset);
397
398    /**
399     * Returns <code>true</code> if this rope ends with the specified
400     * suffix.
401     * @param suffix the suffix to test.
402     * @return <code>true</code> if this rope starts with the
403     * specified suffix and <code>false</code> otherwise.
404     * @see #endsWith(CharSequence, int)
405     */
406    public boolean endsWith(CharSequence suffix);
407    /**
408     * Returns <code>true</code> if this rope, terminated at a specified
409     * offset, ends with the specified suffix.
410     * @param suffix the suffix to test.
411     * @param offset the termination offset, counted from the end of the
412     * rope.
413     * @return <code>true</code> if this rope starts with the
414     * specified prefix and <code>false</code> otherwise.
415     */
416    public boolean endsWith(CharSequence suffix, int offset);
417}
418