View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  
19  package org.apache.commons.net.nntp;
20  
21  /**
22   * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
23   * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
24   * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
25   *  
26   * @author rwinston <rwinston@checkfree.com>
27   *
28   */
29  
30  import java.util.HashMap;
31  import java.util.Iterator;
32  
33  public class Threader {
34      private ThreadContainer root;
35      private HashMap<String,ThreadContainer> idTable;
36      private int bogusIdCount = 0;
37  
38      /**
39       * The main threader entry point - The client passes in an array of Threadable objects, and 
40       * the Threader constructs a connected 'graph' of messages
41       * @param messages
42       * @return
43       */
44      public Threadable thread(Threadable[] messages) {
45          if (messages == null)
46              return null;
47  
48          idTable = new HashMap<String,ThreadContainer>();
49  
50          // walk through each Threadable element
51          for (int i = 0; i < messages.length; ++i) {
52              if (!messages[i].isDummy())
53                  buildContainer(messages[i]);
54          }
55  
56          root = findRootSet();
57          idTable.clear();
58          idTable = null;
59  
60          pruneEmptyContainers(root);
61  
62          root.reverseChildren();
63          gatherSubjects();
64  
65          if (root.next != null)
66              throw new RuntimeException("root node has a next:" + root);
67  
68          for (ThreadContainer r = root.child; r != null; r = r.next) {
69              if (r.threadable == null)
70                  r.threadable = r.child.threadable.makeDummy();
71          }
72  
73          Threadable result = (root.child == null ? null : root.child.threadable);
74          root.flush();
75          root = null;
76  
77          return result;
78      }
79  
80      /**
81       * 
82       * @param threadable
83       */
84      private void buildContainer(Threadable threadable) {
85          String id = threadable.messageThreadId();
86          ThreadContainer container = idTable.get(id);
87  
88          // A ThreadContainer exists for this id already. This should be a forward reference, but may 
89          // be a duplicate id, in which case we will need to generate a bogus placeholder id
90          if (container != null) {
91              if (container.threadable != null) { // oops! duplicate ids...
92                  id = "<Bogus-id:" + (bogusIdCount++) + ">";
93                  container = null;
94              } else {
95                  // The container just contained a forward reference to this message, so let's
96                  // fill in the threadable field of the container with this message
97                  container.threadable = threadable;
98              }
99          }
100 
101         // No container exists for that message Id. Create one and insert it into the hash table.
102         if (container == null) {
103             container = new ThreadContainer();
104             container.threadable = threadable;
105             idTable.put(id, container);
106         }
107 
108         // Iterate through all of the references and create ThreadContainers for any references that
109         // don't have them.
110         ThreadContainer parentRef = null;
111         {
112             String[] references = threadable.messageThreadReferences();
113             for (int i = 0; i < references.length; ++i) {
114                 String refString = references[i];
115                 ThreadContainer ref = idTable.get(refString);
116 
117                 // if this id doesnt have a container, create one
118                 if (ref == null) {
119                     ref = new ThreadContainer();
120                     idTable.put(refString, ref);
121                 }
122 
123                 // Link references together in the order they appear in the References: header,
124                 // IF they dont have a have a parent already &&
125                 // IF it will not cause a circular reference
126                 if ((parentRef != null)
127                     && (ref.parent == null)
128                     && (parentRef != ref)
129                     && !(parentRef.findChild(ref))) {
130                     // Link ref into the parent's child list
131                     ref.parent = parentRef;
132                     ref.next = parentRef.child;
133                     parentRef.child = ref;
134                 }
135                 parentRef = ref;
136             }
137         }
138 
139         // parentRef is now set to the container of the last element in the references field. make that 
140         // be the parent of this container, unless doing so causes a circular reference
141         if (parentRef != null
142             && (parentRef == container || container.findChild(parentRef)))
143             parentRef = null;
144 
145         // if it has a parent already, its because we saw this message in a References: field, and presumed
146         // a parent based on the other entries in that field. Now that we have the actual message, we can
147         // throw away the old parent and use this new one
148         if (container.parent != null) {
149             ThreadContainer rest, prev;
150 
151             for (prev = null, rest = container.parent.child;
152                 rest != null;
153                 prev = rest, rest = rest.next) {
154                 if (rest == container)
155                     break;
156             }
157 
158             if (rest == null) {
159                 throw new RuntimeException(
160                     "Didnt find "
161                         + container
162                         + " in parent"
163                         + container.parent);
164             }
165 
166             // Unlink this container from the parent's child list
167             if (prev == null)
168                 container.parent.child = container.next;
169             else
170                 prev.next = container.next;
171 
172             container.next = null;
173             container.parent = null;
174         }
175 
176         // If we have a parent, link container into the parents child list
177         if (parentRef != null) {
178             container.parent = parentRef;
179             container.next = parentRef.child;
180             parentRef.child = container;
181         }
182     }
183 
184     /**
185      * Find the root set of all existing ThreadContainers
186      * @return root the ThreadContainer representing the root node
187      */
188     private ThreadContainer findRootSet() {
189         ThreadContainer root = new ThreadContainer();
190         Iterator<String> iter = idTable.keySet().iterator();
191 
192         while (iter.hasNext()) {
193             Object key = iter.next();
194             ThreadContainer c = idTable.get(key);
195             if (c.parent == null) {
196                 if (c.next != null)
197                     throw new RuntimeException(
198                         "c.next is " + c.next.toString());
199                 c.next = root.child;
200                 root.child = c;
201             }
202         }
203         return root;
204     }
205 
206     /**
207      * Delete any empty or dummy ThreadContainers
208      * @param parent
209      */
210     private void pruneEmptyContainers(ThreadContainer parent) {
211         ThreadContainer container, prev, next;
212         for (prev = null, container = parent.child, next = container.next;
213             container != null;
214             prev = container,
215                 container = next,
216                 next = (container == null ? null : container.next)) {
217 
218             // Is it empty and without any children? If so,delete it 
219             if (container.threadable == null && container.child == null) {
220                 if (prev == null)
221                     parent.child = container.next;
222                 else
223                     prev.next = container.next;
224 
225                 // Set container to prev so that prev keeps its same value the next time through the loop
226                 container = prev;
227             }
228 
229             // Else if empty, with kids, and (not at root or only one kid)
230             else if (
231                 container.threadable == null
232                     && container.child != null
233                     && (container.parent != null
234                         || container.child.next == null)) {
235                 // We have an invalid/expired message with kids. Promote the kids to this level. 
236                 ThreadContainer tail;
237                 ThreadContainer kids = container.child;
238 
239                 // Remove this container and replace with 'kids'.
240                 if (prev == null)
241                     parent.child = kids;
242                 else
243                     prev.next = kids;
244 
245                 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
246                 // i.e. splice kids into the list in place of container
247                 for (tail = kids; tail.next != null; tail = tail.next)
248                     tail.parent = container.parent;
249 
250                 tail.parent = container.parent;
251                 tail.next = container.next;
252 
253                 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
254                 // promoted items next time round
255                 next = kids;
256 
257                 // Set container to prev so that prev keeps its same value the next time through the loop
258                 container = prev;
259             } else if (container.child != null) {
260                 // A real message , with kids
261                 // Iterate over the children
262                 pruneEmptyContainers(container);
263             }
264         }
265     }
266 
267     /**
268      *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 
269      */
270     private void gatherSubjects() {
271 
272         int count = 0;
273 
274         for (ThreadContainer c = root.child; c != null; c = c.next)
275             count++;
276 
277         // TODO verify this will avoid rehashing
278         HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
279         count = 0;
280 
281         for (ThreadContainer c = root.child; c != null; c = c.next) {
282             Threadable threadable = c.threadable;
283 
284             // No threadable? If so, it is a dummy node in the root set.
285             // Only root set members may be dummies, and they alway have at least 2 kids
286             // Take the first kid as representative of the subject
287             if (threadable == null)
288                 threadable = c.child.threadable;
289 
290             String subj = threadable.simplifiedSubject();
291 
292             if (subj == null || subj == "")
293                 continue;
294 
295             ThreadContainer old = subjectTable.get(subj);
296 
297             // Add this container to the table iff:
298             // - There exists no container with this subject
299             // - or this is a dummy container and the old one is not - the dummy one is
300             // more interesting as a root, so put it in the table instead
301             // - The container in the table has a "Re:" version of this subject, and 
302             // this container has a non-"Re:" version of this subject. The non-"Re:" version
303             // is the more interesting of the two.
304             if (old == null
305                 || (c.threadable == null && old.threadable != null)
306                 || (old.threadable != null
307                     && old.threadable.subjectIsReply()
308                     && c.threadable != null
309                     && !c.threadable.subjectIsReply())) {
310                 subjectTable.put(subj, c);
311                 count++;
312             }
313         }
314 
315         // If the table is empty, we're done
316         if (count == 0)
317             return;
318 
319         // subjectTable is now populated with one entry for each subject which occurs in the 
320         // root set. Iterate over the root set, and gather together the difference.
321         ThreadContainer prev, c, rest;
322         for (prev = null, c = root.child, rest = c.next;
323             c != null;
324             prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
325             Threadable threadable = c.threadable;
326 
327             // is it a dummy node?
328             if (threadable == null)
329                 threadable = c.child.threadable;
330 
331             String subj = threadable.simplifiedSubject();
332 
333             // Dont thread together all subjectless messages
334             if (subj == null || subj == "")
335                 continue;
336 
337             ThreadContainer old = subjectTable.get(subj);
338 
339             if (old == c) // That's us
340                 continue;
341 
342             // We have now found another container in the root set with the same subject
343             // Remove the "second" message from the root set
344             if (prev == null)
345                 root.child = c.next;
346             else
347                 prev.next = c.next;
348             c.next = null;
349 
350             if (old.threadable == null && c.threadable == null) {
351                 // both dummies - merge them
352                 ThreadContainer tail;
353                 for (tail = old.child;
354                     tail != null && tail.next != null;
355                     tail = tail.next);
356 
357                 tail.next = c.child;
358 
359                 for (tail = c.child; tail != null; tail = tail.next)
360                     tail.parent = old;
361 
362                 c.child = null;
363             } else if (
364                 old.threadable == null
365                     || (c.threadable != null
366                         && c.threadable.subjectIsReply()
367                         && !old.threadable.subjectIsReply())) {
368                 // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
369                 c.parent = old;
370                 c.next = old.child;
371                 old.child = c;
372             } else {
373                 // else make the old and new messages be children of a new dummy container.
374                 // We create a new container object for old.msg and empty the old container
375                 ThreadContainer newc = new ThreadContainer();
376                 newc.threadable = old.threadable;
377                 newc.child = old.child;
378 
379                 for (ThreadContainer tail = newc.child;
380                     tail != null;
381                     tail = tail.next)
382                     tail.parent = newc;
383 
384                 old.threadable = null;
385                 old.child = null;
386 
387                 c.parent = old;
388                 newc.parent = old;
389 
390                 // Old is now a dummy- give it 2 kids , c and newc
391                 old.child = c;
392                 c.next = newc;
393             }
394             // We've done a merge, so keep the same prev
395             c = prev;
396         }
397 
398         subjectTable.clear();
399         subjectTable = null;
400 
401     }
402 }
403 
404 /**
405  * A placeholder utility class, used for constructing a tree of Threadables
406  * Originall implementation by Jamie Zawinski. 
407  * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
408  * Threadable objects
409  * @author Rory Winston <rwinston@checkfree.com>
410  */
411 class ThreadContainer {
412     Threadable threadable;
413     ThreadContainer parent;
414     ThreadContainer prev;
415     ThreadContainer next;
416     ThreadContainer child;
417 
418     /**
419      * 
420      * @param container
421      * @return true if child is under self's tree. Detects circular references
422      */
423     boolean findChild(ThreadContainer target) {
424         if (child == null)
425             return false;
426 
427         else if (child == target)
428             return true;
429         else
430             return child.findChild(target);
431     }
432 
433     // Copy the ThreadContainer tree structure down into the underlying Threadable objects
434     // (Make the Threadable tree look like the ThreadContainer tree)
435     void flush() {
436         if (parent != null && threadable == null)
437             throw new RuntimeException("no threadable in " + this.toString());
438 
439         parent = null;
440 
441         if (threadable != null)
442             threadable.setChild(child == null ? null : child.threadable);
443 
444         if (child != null) {
445             child.flush();
446             child = null;
447         }
448 
449         if (threadable != null)
450             threadable.setNext(next == null ? null : next.threadable);
451 
452         if (next != null) {
453             next.flush();
454             next = null;
455         }
456 
457         threadable = null;
458     }
459 
460     /**
461      * Reverse the entire set of children
462      *
463      */
464     void reverseChildren() {
465         if (child != null) {
466             ThreadContainer kid, prev, rest;
467             for (prev = null, kid = child, rest = kid.next;
468                 kid != null;
469                 prev = kid,
470                     kid = rest,
471                     rest = (rest == null ? null : rest.next))
472                 kid.next = prev;
473 
474             child = prev;
475 
476             // Do it for the kids 
477             for (kid = child; kid != null; kid = kid.next)
478                 kid.reverseChildren();
479         }
480     }
481 }