View Javadoc

1   /*
2    * Copyright 2001-2005 The Apache Software Foundation
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.commons.net.nntp;
18  
19  /**
20   * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
21   * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
22   * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
23   *  
24   * @author rwinston <rwinston@checkfree.com>
25   *
26   */
27  
28  import java.util.HashMap;
29  import java.util.Iterator;
30  
31  public class Threader {
32  	private ThreadContainer root;
33  	private HashMap idTable;
34  	private int bogusIdCount = 0;
35  
36  	/**
37  	 * The main threader entry point - The client passes in an array of Threadable objects, and 
38  	 * the Threader constructs a connected 'graph' of messages
39  	 * @param messages
40  	 * @return
41  	 */
42  	public Threadable thread(Threadable[] messages) {
43  		if (messages == null)
44  			return null;
45  
46  		idTable = new HashMap();
47  
48  		// walk through each Threadable element
49  		for (int i = 0; i < messages.length; ++i) {
50  			if (!messages[i].isDummy())
51  				buildContainer(messages[i]);
52  		}
53  
54  		root = findRootSet();
55  		idTable.clear();
56  		idTable = null;
57  
58  		pruneEmptyContainers(root);
59  
60  		root.reverseChildren();
61  		gatherSubjects();
62  
63  		if (root.next != null)
64  			throw new RuntimeException("root node has a next:" + root);
65  
66  		for (ThreadContainer r = root.child; r != null; r = r.next) {
67  			if (r.threadable == null)
68  				r.threadable = r.child.threadable.makeDummy();
69  		}
70  
71  		Threadable result = (root.child == null ? null : root.child.threadable);
72  		root.flush();
73  		root = null;
74  
75  		return result;
76  	}
77  
78  	/**
79  	 * 
80  	 * @param threadable
81  	 */
82  	private void buildContainer(Threadable threadable) {
83  		String id = threadable.messageThreadId();
84  		ThreadContainer container = (ThreadContainer) idTable.get(id);
85  
86  		// A ThreadContainer exists for this id already. This should be a forward reference, but may 
87  		// be a duplicate id, in which case we will need to generate a bogus placeholder id
88  		if (container != null) {
89  			if (container.threadable != null) { // oops! duplicate ids...
90  				id = "<Bogus-id:" + (bogusIdCount++) + ">";
91  				container = null;
92  			} else {
93  				// The container just contained a forward reference to this message, so let's
94  				// fill in the threadable field of the container with this message
95  				container.threadable = threadable;
96  			}
97  		}
98  
99  		// No container exists for that message Id. Create one and insert it into the hash table.
100 		if (container == null) {
101 			container = new ThreadContainer();
102 			container.threadable = threadable;
103 			idTable.put(id, container);
104 		}
105 
106 		// Iterate through all of the references and create ThreadContainers for any references that
107 		// don't have them.
108 		ThreadContainer parentRef = null;
109 		{
110 			String[] references = threadable.messageThreadReferences();
111 			for (int i = 0; i < references.length; ++i) {
112 				String refString = references[i];
113 				ThreadContainer ref = (ThreadContainer) idTable.get(refString);
114 
115 				// if this id doesnt have a container, create one
116 				if (ref == null) {
117 					ref = new ThreadContainer();
118 					idTable.put(refString, ref);
119 				}
120 
121 				// Link references together in the order they appear in the References: header,
122 				// IF they dont have a have a parent already &&
123 				// IF it will not cause a circular reference
124 				if ((parentRef != null)
125 					&& (ref.parent == null)
126 					&& (parentRef != ref)
127 					&& !(parentRef.findChild(ref))) {
128 					// Link ref into the parent's child list
129 					ref.parent = parentRef;
130 					ref.next = parentRef.child;
131 					parentRef.child = ref;
132 				}
133 				parentRef = ref;
134 			}
135 		}
136 
137 		// parentRef is now set to the container of the last element in the references field. make that 
138 		// be the parent of this container, unless doing so causes a circular reference
139 		if (parentRef != null
140 			&& (parentRef == container || container.findChild(parentRef)))
141 			parentRef = null;
142 
143 		// if it has a parent already, its because we saw this message in a References: field, and presumed
144 		// a parent based on the other entries in that field. Now that we have the actual message, we can
145 		// throw away the old parent and use this new one
146 		if (container.parent != null) {
147 			ThreadContainer rest, prev;
148 
149 			for (prev = null, rest = container.parent.child;
150 				rest != null;
151 				prev = rest, rest = rest.next) {
152 				if (rest == container)
153 					break;
154 			}
155 
156 			if (rest == null) {
157 				throw new RuntimeException(
158 					"Didnt find "
159 						+ container
160 						+ " in parent"
161 						+ container.parent);
162 			}
163 
164 			// Unlink this container from the parent's child list
165 			if (prev == null)
166 				container.parent.child = container.next;
167 			else
168 				prev.next = container.next;
169 
170 			container.next = null;
171 			container.parent = null;
172 		}
173 
174 		// If we have a parent, link container into the parents child list
175 		if (parentRef != null) {
176 			container.parent = parentRef;
177 			container.next = parentRef.child;
178 			parentRef.child = container;
179 		}
180 	}
181 
182 	/**
183 	 * Find the root set of all existing ThreadContainers
184 	 * @return root the ThreadContainer representing the root node
185 	 */
186 	private ThreadContainer findRootSet() {
187 		ThreadContainer root = new ThreadContainer();
188 		Iterator iter = idTable.keySet().iterator();
189 
190 		while (iter.hasNext()) {
191 			Object key = iter.next();
192 			ThreadContainer c = (ThreadContainer) idTable.get(key);
193 			if (c.parent == null) {
194 				if (c.next != null)
195 					throw new RuntimeException(
196 						"c.next is " + c.next.toString());
197 				c.next = root.child;
198 				root.child = c;
199 			}
200 		}
201 		return root;
202 	}
203 
204 	/**
205 	 * Delete any empty or dummy ThreadContainers
206 	 * @param parent
207 	 */
208 	private void pruneEmptyContainers(ThreadContainer parent) {
209 		ThreadContainer container, prev, next;
210 		for (prev = null, container = parent.child, next = container.next;
211 			container != null;
212 			prev = container,
213 				container = next,
214 				next = (container == null ? null : container.next)) {
215 
216 			// Is it empty and without any children? If so,delete it 
217 			if (container.threadable == null && container.child == null) {
218 				if (prev == null)
219 					parent.child = container.next;
220 				else
221 					prev.next = container.next;
222 
223 				// Set container to prev so that prev keeps its same value the next time through the loop	
224 				container = prev;
225 			}
226 
227 			// Else if empty, with kids, and (not at root or only one kid)
228 			else if (
229 				container.threadable == null
230 					&& container.child != null
231 					&& (container.parent != null
232 						|| container.child.next == null)) {
233 				// We have an invalid/expired message with kids. Promote the kids to this level. 
234 				ThreadContainer tail;
235 				ThreadContainer kids = container.child;
236 
237 				// Remove this container and replace with 'kids'.
238 				if (prev == null)
239 					parent.child = kids;
240 				else
241 					prev.next = kids;
242 
243 				// Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
244 				// i.e. splice kids into the list in place of container
245 				for (tail = kids; tail.next != null; tail = tail.next)
246 					tail.parent = container.parent;
247 
248 				tail.parent = container.parent;
249 				tail.next = container.next;
250 
251 				// next currently points to the item after the inserted items in the chain - reset that so we process the newly
252 				// promoted items next time round
253 				next = kids;
254 
255 				// Set container to prev so that prev keeps its same value the next time through the loop
256 				container = prev;
257 			} else if (container.child != null) {
258 				// A real message , with kids
259 				// Iterate over the children
260 				pruneEmptyContainers(container);
261 			}
262 		}
263 	}
264 
265 	/**
266 	 *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 
267 	 */
268 	private void gatherSubjects() {
269 
270 		int count = 0;
271 
272 		for (ThreadContainer c = root.child; c != null; c = c.next)
273 			count++;
274 
275 		// TODO verify this will avoid rehashing
276 		HashMap subjectTable = new HashMap((int) (count * 1.2), (float) 0.9);
277 		count = 0;
278 
279 		for (ThreadContainer c = root.child; c != null; c = c.next) {
280 			Threadable threadable = c.threadable;
281 
282 			// No threadable? If so, it is a dummy node in the root set.
283 			// Only root set members may be dummies, and they alway have at least 2 kids
284 			// Take the first kid as representative of the subject
285 			if (threadable == null)
286 				threadable = c.child.threadable;
287 
288 			String subj = threadable.simplifiedSubject();
289 
290 			if (subj == null || subj == "")
291 				continue;
292 
293 			ThreadContainer old = (ThreadContainer) subjectTable.get(subj);
294 
295 			// Add this container to the table iff:
296 			// - There exists no container with this subject
297 			// - or this is a dummy container and the old one is not - the dummy one is
298 			// more interesting as a root, so put it in the table instead
299 			// - The container in the table has a "Re:" version of this subject, and 
300 			// this container has a non-"Re:" version of this subject. The non-"Re:" version
301 			// is the more interesting of the two.
302 			if (old == null
303 				|| (c.threadable == null && old.threadable != null)
304 				|| (old.threadable != null
305 					&& old.threadable.subjectIsReply()
306 					&& c.threadable != null
307 					&& !c.threadable.subjectIsReply())) {
308 				subjectTable.put(subj, c);
309 				count++;
310 			}
311 		}
312 
313 		// If the table is empty, we're done
314 		if (count == 0)
315 			return;
316 
317 		// subjectTable is now populated with one entry for each subject which occurs in the 
318 		// root set. Iterate over the root set, and gather together the difference.
319 		ThreadContainer prev, c, rest;
320 		for (prev = null, c = root.child, rest = c.next;
321 			c != null;
322 			prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
323 			Threadable threadable = c.threadable;
324 
325 			// is it a dummy node?
326 			if (threadable == null)
327 				threadable = c.child.threadable;
328 
329 			String subj = threadable.simplifiedSubject();
330 
331 			// Dont thread together all subjectless messages
332 			if (subj == null || subj == "")
333 				continue;
334 
335 			ThreadContainer old = (ThreadContainer) subjectTable.get(subj);
336 
337 			if (old == c) // That's us
338 				continue;
339 
340 			// We have now found another container in the root set with the same subject
341 			// Remove the "second" message from the root set
342 			if (prev == null)
343 				root.child = c.next;
344 			else
345 				prev.next = c.next;
346 			c.next = null;
347 
348 			if (old.threadable == null && c.threadable == null) {
349 				// both dummies - merge them
350 				ThreadContainer tail;
351 				for (tail = old.child;
352 					tail != null && tail.next != null;
353 					tail = tail.next);
354 
355 				tail.next = c.child;
356 
357 				for (tail = c.child; tail != null; tail = tail.next)
358 					tail.parent = old;
359 
360 				c.child = null;
361 			} else if (
362 				old.threadable == null
363 					|| (c.threadable != null
364 						&& c.threadable.subjectIsReply()
365 						&& !old.threadable.subjectIsReply())) {
366 				// Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
367 				c.parent = old;
368 				c.next = old.child;
369 				old.child = c;
370 			} else {
371 				//	else make the old and new messages be children of a new dummy container.
372 				// We create a new container object for old.msg and empty the old container
373 				ThreadContainer newc = new ThreadContainer();
374 				newc.threadable = old.threadable;
375 				newc.child = old.child;
376 
377 				for (ThreadContainer tail = newc.child;
378 					tail != null;
379 					tail = tail.next)
380 					tail.parent = newc;
381 
382 				old.threadable = null;
383 				old.child = null;
384 
385 				c.parent = old;
386 				newc.parent = old;
387 
388 				// Old is now a dummy- give it 2 kids , c and newc
389 				old.child = c;
390 				c.next = newc;
391 			}
392 			// We've done a merge, so keep the same prev
393 			c = prev;
394 		}
395 
396 		subjectTable.clear();
397 		subjectTable = null;
398 
399 	}
400 }
401 
402 /**
403  * A placeholder utility class, used for constructing a tree of Threadables
404  * Originall implementation by Jamie Zawinski. 
405  * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
406  * Threadable objects
407  * @author Rory Winston <rwinston@checkfree.com>
408  */
409 class ThreadContainer {
410 	Threadable threadable;
411 	ThreadContainer parent;
412 	ThreadContainer prev;
413 	ThreadContainer next;
414 	ThreadContainer child;
415 
416 	/**
417 	 * 
418 	 * @param container
419 	 * @return true if child is under self's tree. Detects circular references
420 	 */
421 	boolean findChild(ThreadContainer target) {
422 		if (child == null)
423 			return false;
424 
425 		else if (child == target)
426 			return true;
427 		else
428 			return child.findChild(target);
429 	}
430 
431 	// Copy the ThreadContainer tree structure down into the underlying Threadable objects
432 	// (Make the Threadable tree look like the ThreadContainer tree)
433 	void flush() {
434 		if (parent != null && threadable == null)
435 			throw new RuntimeException("no threadable in " + this.toString());
436 
437 		parent = null;
438 
439 		if (threadable != null)
440 			threadable.setChild(child == null ? null : child.threadable);
441 
442 		if (child != null) {
443 			child.flush();
444 			child = null;
445 		}
446 
447 		if (threadable != null)
448 			threadable.setNext(next == null ? null : next.threadable);
449 
450 		if (next != null) {
451 			next.flush();
452 			next = null;
453 		}
454 
455 		threadable = null;
456 	}
457 
458 	/**
459 	 * Reverse the entire set of children
460 	 *
461 	 */
462 	void reverseChildren() {
463 		if (child != null) {
464 			ThreadContainer kid, prev, rest;
465 			for (prev = null, kid = child, rest = kid.next;
466 				kid != null;
467 				prev = kid,
468 					kid = rest,
469 					rest = (rest == null ? null : rest.next))
470 				kid.next = prev;
471 
472 			child = prev;
473 
474 			// Do it for the kids 
475 			for (kid = child; kid != null; kid = kid.next)
476 				kid.reverseChildren();
477 		}
478 	}
479 }