1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.tika;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.File;
21  import java.io.FileInputStream;
22  import java.io.InputStream;
23  
24  import org.apache.tika.io.IOUtils;
25  
26  public class TypeDetectionBenchmark {
27  
28      private static final Tika tika = new Tika();
29  
30      public static void main(String[] args) throws Exception {
31          long start = System.currentTimeMillis();
32          if (args.length > 0) {
33              for (String arg : args) {
34                  benchmark(new File(arg));
35              }
36          } else {
37              benchmark(new File(
38                      "../tika-parsers/src/test/resources/test-documents"));
39          }
40          System.out.println(
41                  "Total benchmark time: "
42                  + (System.currentTimeMillis() - start) + "ms");
43      }
44  
45      private static void benchmark(File file) throws Exception {
46          if (file.isHidden()) {
47              // ignore
48          } else if (file.isFile()) {
49              InputStream input = new FileInputStream(file);
50              try {
51                  byte[] content = IOUtils.toByteArray(input);
52                  String type =
53                      tika.detect(new ByteArrayInputStream(content));
54                  long start = System.currentTimeMillis();
55                  for (int i = 0; i < 1000; i++) {
56                      tika.detect(new ByteArrayInputStream(content));
57                  }
58                  System.out.printf(
59                          "%6dns per Tika.detect(%s) = %s%n",
60                          System.currentTimeMillis() - start, file, type);
61              } finally {
62                  input.close();
63              }
64          } else if (file.isDirectory()) {
65              for (File child : file.listFiles()) {
66                  benchmark(child);
67              }
68          }
69      }
70  
71  }