1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.tika.parser.video;
18
19 import java.io.ByteArrayInputStream;
20 import java.io.DataInputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.util.ArrayList;
24 import java.util.Collections;
25 import java.util.Date;
26 import java.util.HashMap;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.Map.Entry;
30
31 import org.apache.tika.exception.TikaException;
32 import org.apache.tika.metadata.Metadata;
33 import org.apache.tika.mime.MediaType;
34 import org.apache.tika.parser.ParseContext;
35 import org.apache.tika.parser.Parser;
36 import org.apache.tika.sax.XHTMLContentHandler;
37 import org.xml.sax.ContentHandler;
38 import org.xml.sax.SAXException;
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 public class FLVParser implements Parser {
64
65 private static int TYPE_METADATA = 0x12;
66 private static byte MASK_AUDIO = 1;
67 private static byte MASK_VIDEO = 4;
68
69 private static final Set<MediaType> SUPPORTED_TYPES =
70 Collections.singleton(MediaType.video("x-flv"));
71
72 public Set<MediaType> getSupportedTypes(ParseContext context) {
73 return SUPPORTED_TYPES;
74 }
75
76 private long readUInt32(DataInputStream input) throws IOException {
77 return input.readInt() & 0xFFFFFFFFL;
78 }
79
80 private int readUInt24(DataInputStream input) throws IOException {
81 int uint = input.read()<<16;
82 uint += input.read()<<8;
83 uint += input.read();
84 return uint;
85 }
86
87 private Object readAMFData(DataInputStream input, int type)
88 throws IOException {
89 if (type == -1) {
90 type = input.readUnsignedByte();
91 }
92 switch (type) {
93 case 0:
94 return input.readDouble();
95 case 1:
96 return input.readUnsignedByte() == 1;
97 case 2:
98 return readAMFString(input);
99 case 3:
100 return readAMFObject(input);
101 case 8:
102 return readAMFEcmaArray(input);
103 case 10:
104 return readAMFStrictArray(input);
105 case 11:
106 final Date date = new Date((long) input.readDouble());
107 input.skip(2);
108 return date;
109 case 13:
110 return "UNDEFINED";
111 default:
112 return null;
113 }
114 }
115
116 private Object readAMFStrictArray(DataInputStream input) throws IOException {
117 long count = readUInt32(input);
118 ArrayList<Object> list = new ArrayList<Object>();
119 for (int i = 0; i < count; i++) {
120 list.add(readAMFData(input, -1));
121 }
122 return list;
123 }
124
125
126 private String readAMFString(DataInputStream input) throws IOException {
127 int size = input.readUnsignedShort();
128 byte[] chars = new byte[size];
129 input.readFully(chars);
130 return new String(chars);
131 }
132
133 private Object readAMFObject(DataInputStream input) throws IOException {
134 HashMap<String, Object> array = new HashMap<String, Object>();
135 while (true) {
136 String key = readAMFString(input);
137 int dataType = input.read();
138 if (dataType == 9) {
139 break;
140 }
141 array.put(key, readAMFData(input, dataType));
142 }
143 return array;
144 }
145
146 private Object readAMFEcmaArray(DataInputStream input) throws IOException {
147 long size = readUInt32(input);
148 HashMap<String, Object> array = new HashMap<String, Object>();
149 for (int i = 0; i < size; i++) {
150 String key = readAMFString(input);
151 int dataType = input.read();
152 array.put(key, readAMFData(input, dataType));
153 }
154 return array;
155 }
156
157 private boolean checkSignature(DataInputStream fis) throws IOException {
158 return fis.read() == 'F' && fis.read() == 'L' && fis.read() == 'V';
159 }
160
161 public void parse(
162 InputStream stream, ContentHandler handler,
163 Metadata metadata, ParseContext context)
164 throws IOException, SAXException, TikaException {
165 DataInputStream datainput = new DataInputStream(stream);
166 if (!checkSignature(datainput)) {
167 throw new TikaException("FLV signature not detected");
168 }
169
170
171 int version = datainput.readUnsignedByte();
172 if (version != 1) {
173
174 throw new TikaException("Unpexpected FLV version: " + version);
175 }
176
177 int typeFlags = datainput.readUnsignedByte();
178
179 long len = readUInt32(datainput);
180 if (len != 9) {
181
182 throw new TikaException("Unpexpected FLV header length: " + len);
183 }
184
185 long sizePrev = readUInt32(datainput);
186 if (sizePrev != 0) {
187
188 throw new TikaException(
189 "Unpexpected FLV first previous block size: " + sizePrev);
190 }
191
192 metadata.set(Metadata.CONTENT_TYPE, "video/x-flv");
193 metadata.set("hasVideo", Boolean.toString((typeFlags & MASK_VIDEO) != 0));
194 metadata.set("hasAudio", Boolean.toString((typeFlags & MASK_AUDIO) != 0));
195
196 XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
197 xhtml.startDocument();
198
199
200 while (true) {
201 int type = datainput.read();
202 if (type == -1) {
203
204 break;
205 }
206
207 int datalen = readUInt24(datainput);
208 stream.skip(4);
209 stream.skip(3);
210
211 if (type == TYPE_METADATA) {
212
213 byte[] metaBytes = new byte[datalen];
214 for (int readCount = 0; readCount < datalen;) {
215 int r = stream.read(metaBytes, readCount, datalen - readCount);
216 if(r!=-1) {
217 readCount += r;
218
219 } else {
220 break;
221 }
222 }
223
224 ByteArrayInputStream is = new ByteArrayInputStream(metaBytes);
225
226 DataInputStream dis = new DataInputStream(is);
227
228 Object data = null;
229
230 for (int i = 0; i < 2; i++) {
231 data = readAMFData(dis, -1);
232 }
233
234 if (data instanceof Map) {
235
236
237 Map<String, Object> extractedMetadata = (Map<String, Object>) data;
238 for (Entry<String, Object> entry : extractedMetadata.entrySet()) {
239 metadata.set(entry.getKey(), entry.getValue().toString());
240 }
241 }
242
243 } else {
244
245 for (int skiplen = 0; skiplen < datalen;) {
246 long currentSkipLen = datainput.skip(datalen - skiplen);
247 skiplen += currentSkipLen;
248 }
249 }
250
251 sizePrev = readUInt32(datainput);
252 if (sizePrev != datalen + 11) {
253
254 break;
255 }
256 }
257
258 xhtml.endDocument();
259 }
260
261 public void parse(InputStream stream, ContentHandler handler,
262 Metadata metadata) throws IOException, SAXException, TikaException {
263 parse(stream, handler, metadata, new ParseContext());
264 }
265
266 }