1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.detect;
18
19 import java.io.IOException;
20 import java.io.InputStream;
21
22 import org.apache.tika.metadata.Metadata;
23 import org.apache.tika.mime.MediaType;
24
25
26
27
28
29
30
31 public class MagicDetector implements Detector {
32
33
34
35
36
37 private final MediaType type;
38
39
40
41
42 private final int length;
43
44
45
46
47
48
49 private final byte[] pattern;
50
51
52
53
54 private final byte[] mask;
55
56
57
58
59
60 private final int offsetRangeBegin;
61
62
63
64
65
66
67
68
69
70
71 private final int offsetRangeEnd;
72
73
74
75
76
77
78
79
80 public MagicDetector(MediaType type, byte[] pattern) {
81 this(type, pattern, 0);
82 }
83
84
85
86
87
88
89
90
91
92 public MagicDetector(MediaType type, byte[] pattern, int offset) {
93 this(type, pattern, null, offset, offset);
94 }
95
96
97
98
99
100 public MagicDetector(
101 MediaType type, byte[] pattern, byte[] mask,
102 int offsetRangeBegin, int offsetRangeEnd) {
103 if (type == null) {
104 throw new IllegalArgumentException("Matching media type is null");
105 } else if (pattern == null) {
106 throw new IllegalArgumentException("Magic match pattern is null");
107 } else if (offsetRangeBegin < 0
108 || offsetRangeEnd < offsetRangeBegin) {
109 throw new IllegalArgumentException(
110 "Invalid offset range: ["
111 + offsetRangeBegin + "," + offsetRangeEnd + "]");
112 }
113
114 this.type = type;
115
116 this.length = Math.max(pattern.length, mask != null ? mask.length : 0);
117
118 this.mask = new byte[length];
119 this.pattern = new byte[length];
120
121 for (int i = 0; i < length; i++) {
122 if (mask != null && i < mask.length) {
123 this.mask[i] = mask[i];
124 } else {
125 this.mask[i] = -1;
126 }
127
128 if (i < pattern.length) {
129 this.pattern[i] = (byte) (pattern[i] & this.mask[i]);
130 } else {
131 this.pattern[i] = 0;
132 }
133 }
134
135 this.offsetRangeBegin = offsetRangeBegin;
136 this.offsetRangeEnd = offsetRangeEnd;
137 }
138
139
140
141
142
143
144 public MediaType detect(InputStream input, Metadata metadata)
145 throws IOException {
146 if (input == null) {
147 return MediaType.OCTET_STREAM;
148 }
149
150 input.mark(offsetRangeEnd + length);
151 try {
152 int offset = 0;
153
154
155 while (offset < offsetRangeBegin) {
156 long n = input.skip(offsetRangeBegin - offset);
157 if (n > 0) {
158 offset += n;
159 } else if (input.read() != -1) {
160 offset += 1;
161 } else {
162 return MediaType.OCTET_STREAM;
163 }
164 }
165
166
167 byte[] buffer =
168 new byte[length + (offsetRangeEnd - offsetRangeBegin)];
169 int n = input.read(buffer);
170 if (n > 0) {
171 offset += n;
172 }
173 while (n != -1 && offset < offsetRangeEnd + length) {
174 int bufferOffset = offset - offsetRangeBegin;
175 n = input.read(
176 buffer, bufferOffset, buffer.length - bufferOffset);
177 }
178 if (offset < offsetRangeBegin + length) {
179 return MediaType.OCTET_STREAM;
180 }
181
182
183 for (int i = 0; i <= offsetRangeEnd - offsetRangeBegin; i++) {
184 boolean match = true;
185 for (int j = 0; match && j < length; j++) {
186 match = (buffer[i + j] & mask[j]) == pattern[j];
187 }
188 if (match) {
189 return type;
190 }
191 }
192
193 return MediaType.OCTET_STREAM;
194 } finally {
195 input.reset();
196 }
197 }
198
199 }