/* * Copyright (C) 2011, 2013 Google Inc. All rights reserved. * Copyright (C) 2013 Cable Television Labs, Inc. * Copyright (C) 2011-2020 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "WebVTTParser.h" #if ENABLE(VIDEO) #include "CommonAtomStrings.h" #include "Document.h" #include "HTMLParserIdioms.h" #include "ISOVTTCue.h" #include "ProcessingInstruction.h" #include "StyleRule.h" #include "StyleRuleImport.h" #include "StyleSheetContents.h" #include "Text.h" #include "VTTScanner.h" #include "WebVTTElement.h" #include "WebVTTTokenizer.h" namespace WebCore { constexpr double secondsPerHour = 3600; constexpr double secondsPerMinute = 60; constexpr double secondsPerMillisecond = 0.001; constexpr auto fileIdentifier = "WEBVTT"_s; constexpr unsigned fileIdentifierLength = 6; constexpr unsigned regionIdentifierLength = 6; constexpr unsigned styleIdentifierLength = 5; bool WebVTTParser::parseFloatPercentageValue(VTTScanner& valueScanner, float& percentage) { float number; if (!valueScanner.scanFloat(number)) return false; // '%' must be present and at the end of the setting value. if (!valueScanner.scan('%')) return false; if (number < 0 || number > 100) return false; percentage = number; return true; } bool WebVTTParser::parseFloatPercentageValuePair(VTTScanner& valueScanner, char delimiter, FloatPoint& valuePair) { float firstCoord; if (!parseFloatPercentageValue(valueScanner, firstCoord)) return false; if (!valueScanner.scan(delimiter)) return false; float secondCoord; if (!parseFloatPercentageValue(valueScanner, secondCoord)) return false; valuePair = FloatPoint(firstCoord, secondCoord); return true; } WebVTTParser::WebVTTParser(WebVTTParserClient& client, Document& document) : m_document(document) , m_decoder(TextResourceDecoder::create(textPlainContentTypeAtom(), PAL::UTF8Encoding())) , m_client(client) { } Vector> WebVTTParser::takeCues() { return WTFMove(m_cuelist); } Vector> WebVTTParser::takeRegions() { return WTFMove(m_regionList); } Vector WebVTTParser::takeStyleSheets() { return WTFMove(m_styleSheets); } void WebVTTParser::parseFileHeader(String&& data) { m_state = Initial; m_lineReader.reset(); m_lineReader.append(WTFMove(data)); parse(); } void WebVTTParser::parseBytes(const uint8_t* data, unsigned length) { m_lineReader.append(m_decoder->decode(data, length)); parse(); } void WebVTTParser::parseCueData(const ISOWebVTTCue& data) { auto cue = WebVTTCueData::create(); MediaTime startTime = data.presentationTime(); cue->setStartTime(startTime); cue->setEndTime(startTime + data.duration()); cue->setContent(data.cueText()); cue->setId(data.id()); cue->setSettings(data.settings()); MediaTime originalStartTime; if (WebVTTParser::collectTimeStamp(data.originalStartTime(), originalStartTime)) cue->setOriginalStartTime(originalStartTime); m_cuelist.append(WTFMove(cue)); m_client.newCuesParsed(); } void WebVTTParser::flush() { m_lineReader.append(m_decoder->flush()); m_lineReader.appendEndOfStream(); parse(); flushPendingCue(); } void WebVTTParser::parse() { // WebVTT parser algorithm. (5.1 WebVTT file parsing.) // Steps 1 - 3 - Initial setup. while (auto line = m_lineReader.nextLine()) { switch (m_state) { case Initial: // Steps 4 - 9 - Check for a valid WebVTT signature. if (!hasRequiredFileIdentifier(*line)) { m_client.fileFailedToParse(); return; } m_state = Header; break; case Header: // Steps 11 - 14 - Collect WebVTT block m_state = collectWebVTTBlock(*line); break; case Region: m_state = collectRegionSettings(*line); break; case Style: m_state = collectStyleSheet(*line); break; case Id: // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values. if (line->isEmpty()) break; // Step 21 - Cue creation (start a new cue). resetCueValues(); // Steps 22 - 25 - Check if this line contains an optional identifier or timing data. m_state = collectCueId(*line); break; case TimingsAndSettings: // Steps 26 - 27 - Discard current cue if the line is empty. if (line->isEmpty()) { m_state = Id; break; } // Steps 28 - 29 - Collect cue timings and settings. m_state = collectTimingsAndSettings(*line); break; case CueText: // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output. m_state = collectCueText(*line); break; case BadCue: // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen. m_state = ignoreBadCue(*line); break; case Finished: ASSERT_NOT_REACHED(); break; } } } void WebVTTParser::fileFinished() { ASSERT(m_state != Finished); constexpr uint8_t endLines[] = { '\n', '\n' }; parseBytes(endLines, 2); m_state = Finished; } void WebVTTParser::flushPendingCue() { ASSERT(m_lineReader.isAtEndOfStream()); // If we're in the CueText state when we run out of data, we emit the pending cue. if (m_state == CueText) createNewCue(); } bool WebVTTParser::hasRequiredFileIdentifier(const String& line) { // A WebVTT file identifier consists of an optional BOM character, // the string "WEBVTT" followed by an optional space or tab character, // and any number of characters that are not line terminators ... if (!line.startsWith(fileIdentifier)) return false; if (line.length() > fileIdentifierLength && !isHTMLSpace(line[fileIdentifierLength])) return false; return true; } WebVTTParser::ParseState WebVTTParser::collectRegionSettings(const String& line) { // End of region block if (checkAndStoreRegion(line)) return checkAndRecoverCue(line); m_currentRegion->setRegionSettings(line); return Region; } WebVTTParser::ParseState WebVTTParser::collectWebVTTBlock(const String& line) { // collect a WebVTT block parsing. (WebVTT parser algorithm step 14) if (checkAndCreateRegion(line)) return Region; if (checkStyleSheet(line)) return Style; // Handle cue block. ParseState state = checkAndRecoverCue(line); if (state != Header) { if (!m_regionList.isEmpty()) m_client.newRegionsParsed(); if (!m_styleSheets.isEmpty()) m_client.newStyleSheetsParsed(); if (!m_previousLine.isEmpty() && !m_previousLine.contains("-->"_s)) m_currentId = AtomString { m_previousLine }; return state; } // store previous line for cue id. // length is more than 1 line clear m_previousLine and ignore line. if (m_previousLine.isEmpty()) m_previousLine = line; else m_previousLine = emptyString(); return state; } WebVTTParser::ParseState WebVTTParser::checkAndRecoverCue(const String& line) { // parse cue timings and settings if (line.contains("-->"_s)) { ParseState state = recoverCue(line); if (state != BadCue) return state; } return Header; } WebVTTParser::ParseState WebVTTParser::collectStyleSheet(const String& line) { // End of style block if (checkAndStoreStyleSheet(line)) return checkAndRecoverCue(line); m_currentSourceStyleSheet.append(line); return Style; } bool WebVTTParser::checkAndCreateRegion(StringView line) { if (m_previousLine.contains("-->"_s)) return false; // line starts with the substring "REGION" and remaining characters // zero or more U+0020 SPACE characters or U+0009 CHARACTER TABULATION // (tab) characters expected other than these charecters it is invalid. if (line.startsWith("REGION"_s) && line.substring(regionIdentifierLength).isAllSpecialCharacters()) { m_currentRegion = VTTRegion::create(m_document); return true; } return false; } bool WebVTTParser::checkAndStoreRegion(StringView line) { if (!line.isEmpty() && !line.contains("-->"_s)) return false; if (!m_currentRegion->id().isEmpty()) { m_regionList.removeFirstMatching([this] (auto& region) { return region->id() == m_currentRegion->id(); }); m_regionList.append(m_currentRegion.releaseNonNull()); } m_currentRegion = nullptr; return true; } bool WebVTTParser::checkStyleSheet(StringView line) { if (m_previousLine.contains("-->"_s)) return false; // line starts with the substring "STYLE" and remaining characters // zero or more U+0020 SPACE characters or U+0009 CHARACTER TABULATION // (tab) characters expected other than these charecters it is invalid. if (line.startsWith("STYLE"_s) && line.substring(styleIdentifierLength).isAllSpecialCharacters()) return true; return false; } bool WebVTTParser::checkAndStoreStyleSheet(StringView line) { if (!line.isEmpty() && !line.contains("-->"_s)) return false; auto styleSheetText = m_currentSourceStyleSheet.toString(); m_currentSourceStyleSheet.clear(); // WebVTTMode disallows non-data URLs. auto contents = StyleSheetContents::create(CSSParserContext(WebVTTMode)); if (!contents->parseString(styleSheetText)) return true; auto& namespaceRules = contents->namespaceRules(); if (namespaceRules.size()) return true; auto& importRules = contents->importRules(); if (importRules.size()) return true; auto& childRules = contents->childRules(); if (!childRules.size()) return true; StringBuilder sanitizedStyleSheetBuilder; for (const auto& rule : childRules) { if (!rule->isStyleRule()) return true; const auto& styleRule = downcast(*rule); const auto& selectorList = styleRule.selectorList(); if (selectorList.listSize() != 1) return true; auto selector = selectorList.selectorAt(0); auto selectorText = selector->selectorText(); bool isCue = selectorText == "::cue"_s || selectorText.startsWith("::cue("_s); if (!isCue) return true; if (styleRule.properties().isEmpty()) continue; sanitizedStyleSheetBuilder.append(selectorText, " { ", styleRule.properties().asText(), " }\n"); } // It would be more stylish to parse the stylesheet only once instead of serializing a sanitized version. if (!sanitizedStyleSheetBuilder.isEmpty()) m_styleSheets.append(sanitizedStyleSheetBuilder.toString()); return true; } WebVTTParser::ParseState WebVTTParser::collectCueId(const String& line) { if (line.contains("-->"_s)) return collectTimingsAndSettings(line); m_currentId = AtomString { line }; return TimingsAndSettings; } WebVTTParser::ParseState WebVTTParser::collectTimingsAndSettings(const String& line) { if (line.isEmpty()) return BadCue; VTTScanner input(line); // Collect WebVTT cue timings and settings. (5.3 WebVTT cue timings and settings parsing.) // Steps 1 - 3 - Let input be the string being parsed and position be a pointer into input input.skipWhile>(); // Steps 4 - 5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time. if (!collectTimeStamp(input, m_currentStartTime)) return BadCue; input.skipWhile>(); // Steps 6 - 9 - If the next three characters are not "-->", abort and return failure. if (!input.scan("-->")) return BadCue; input.skipWhile>(); // Steps 10 - 11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time. if (!collectTimeStamp(input, m_currentEndTime)) return BadCue; input.skipWhile>(); // Step 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue). m_currentSettings = input.restOfInputAsString(); return CueText; } WebVTTParser::ParseState WebVTTParser::collectCueText(const String& line) { // Step 34. if (line.isEmpty()) { createNewCue(); return Id; } // Step 35. if (line.contains("-->"_s)) { // Step 39-40. createNewCue(); // Step 41 - New iteration of the cue loop. return recoverCue(line); } if (!m_currentContent.isEmpty()) m_currentContent.append('\n'); m_currentContent.append(line); return CueText; } WebVTTParser::ParseState WebVTTParser::recoverCue(const String& line) { // Step 17 and 21. resetCueValues(); // Step 22. return collectTimingsAndSettings(line); } WebVTTParser::ParseState WebVTTParser::ignoreBadCue(const String& line) { if (line.isEmpty()) return Id; if (line.contains("-->"_s)) return recoverCue(line); return BadCue; } // A helper class for the construction of a "cue fragment" from the cue text. class WebVTTTreeBuilder { public: WebVTTTreeBuilder(Document& document) : m_document(document) { } Ref buildFromString(const String& cueText); private: void constructTreeFromToken(Document&); WebVTTToken m_token; RefPtr m_currentNode; Vector m_languageStack; Document& m_document; }; Ref WebVTTTreeBuilder::buildFromString(const String& cueText) { // Cue text processing based on // 5.4 WebVTT cue text parsing rules, and // 5.5 WebVTT cue text DOM construction rules. auto fragment = DocumentFragment::create(m_document); if (cueText.isEmpty()) { fragment->parserAppendChild(Text::create(m_document, String { emptyString() })); return fragment; } m_currentNode = fragment.ptr(); WebVTTTokenizer tokenizer(cueText); m_languageStack.clear(); while (tokenizer.nextToken(m_token)) constructTreeFromToken(m_document); return fragment; } Ref WebVTTParser::createDocumentFragmentFromCueText(Document& document, const String& cueText) { WebVTTTreeBuilder treeBuilder(document); return treeBuilder.buildFromString(cueText); } void WebVTTParser::createNewCue() { auto cue = WebVTTCueData::create(); cue->setStartTime(m_currentStartTime); cue->setEndTime(m_currentEndTime); cue->setContent(m_currentContent.toString()); cue->setId(m_currentId); cue->setSettings(m_currentSettings); m_cuelist.append(WTFMove(cue)); m_client.newCuesParsed(); } void WebVTTParser::resetCueValues() { m_currentId = emptyAtom(); m_currentSettings = emptyString(); m_currentStartTime = MediaTime::zeroTime(); m_currentEndTime = MediaTime::zeroTime(); m_currentContent.clear(); } bool WebVTTParser::collectTimeStamp(const String& line, MediaTime& timeStamp) { if (line.isEmpty()) return false; VTTScanner input(line); return collectTimeStamp(input, timeStamp); } bool WebVTTParser::collectTimeStamp(VTTScanner& input, MediaTime& timeStamp) { // Collect a WebVTT timestamp (5.3 WebVTT cue timings and settings parsing.) // Steps 1 - 4 - Initial checks, let most significant units be minutes. enum Mode { minutes, hours }; Mode mode = minutes; // Steps 5 - 7 - Collect a sequence of characters that are 0-9. // If not 2 characters or value is greater than 59, interpret as hours. int value1; unsigned value1Digits = input.scanDigits(value1); if (!value1Digits) return false; if (value1Digits != 2 || value1 > 59) mode = hours; // Steps 8 - 11 - Collect the next sequence of 0-9 after ':' (must be 2 chars). int value2; if (!input.scan(':') || input.scanDigits(value2) != 2) return false; // Step 12 - Detect whether this timestamp includes hours. int value3; if (mode == hours || input.match(':')) { if (!input.scan(':') || input.scanDigits(value3) != 2) return false; } else { value3 = value2; value2 = value1; value1 = 0; } // Steps 13 - 17 - Collect next sequence of 0-9 after '.' (must be 3 chars). int value4; if (!input.scan('.') || input.scanDigits(value4) != 3) return false; if (value2 > 59 || value3 > 59) return false; // Steps 18 - 19 - Calculate result. timeStamp = MediaTime::createWithDouble((value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond)); return true; } static WebVTTNodeType tokenToNodeType(WebVTTToken& token) { switch (token.name().length()) { case 1: if (token.name()[0] == 'c') return WebVTTNodeTypeClass; if (token.name()[0] == 'v') return WebVTTNodeTypeVoice; if (token.name()[0] == 'b') return WebVTTNodeTypeBold; if (token.name()[0] == 'i') return WebVTTNodeTypeItalic; if (token.name()[0] == 'u') return WebVTTNodeTypeUnderline; break; case 2: if (token.name()[0] == 'r' && token.name()[1] == 't') return WebVTTNodeTypeRubyText; break; case 4: if (token.name()[0] == 'r' && token.name()[1] == 'u' && token.name()[2] == 'b' && token.name()[3] == 'y') return WebVTTNodeTypeRuby; if (token.name()[0] == 'l' && token.name()[1] == 'a' && token.name()[2] == 'n' && token.name()[3] == 'g') return WebVTTNodeTypeLanguage; break; } return WebVTTNodeTypeNone; } void WebVTTTreeBuilder::constructTreeFromToken(Document& document) { // http://dev.w3.org/html5/webvtt/#webvtt-cue-text-dom-construction-rules switch (m_token.type()) { case WebVTTTokenTypes::Character: { m_currentNode->parserAppendChild(Text::create(document, String { m_token.characters() })); break; } case WebVTTTokenTypes::StartTag: { WebVTTNodeType nodeType = tokenToNodeType(m_token); if (nodeType == WebVTTNodeTypeNone) break; WebVTTNodeType currentType = is(*m_currentNode) ? downcast(*m_currentNode).webVTTNodeType() : WebVTTNodeTypeNone; // is only allowed if the current node is . if (nodeType == WebVTTNodeTypeRubyText && currentType != WebVTTNodeTypeRuby) break; auto child = WebVTTElement::create(nodeType, document); if (!m_token.classes().isEmpty()) child->setAttributeWithoutSynchronization(classAttr, m_token.classes()); if (nodeType == WebVTTNodeTypeVoice) child->setAttributeWithoutSynchronization(WebVTTElement::voiceAttributeName(), m_token.annotation()); else if (nodeType == WebVTTNodeTypeLanguage) { m_languageStack.append(m_token.annotation()); child->setAttributeWithoutSynchronization(WebVTTElement::langAttributeName(), m_languageStack.last()); } if (!m_languageStack.isEmpty()) child->setLanguage(m_languageStack.last()); m_currentNode->parserAppendChild(child); m_currentNode = WTFMove(child); break; } case WebVTTTokenTypes::EndTag: { WebVTTNodeType nodeType = tokenToNodeType(m_token); if (nodeType == WebVTTNodeTypeNone) break; // The only non-VTTElement would be the DocumentFragment root. (Text // nodes and PIs will never appear as m_currentNode.) if (!is(*m_currentNode)) break; WebVTTNodeType currentType = downcast(*m_currentNode).webVTTNodeType(); bool matchesCurrent = nodeType == currentType; if (!matchesCurrent) { // auto-closes if (currentType == WebVTTNodeTypeRubyText && nodeType == WebVTTNodeTypeRuby) { if (m_currentNode->parentNode()) m_currentNode = m_currentNode->parentNode(); } else break; } if (nodeType == WebVTTNodeTypeLanguage) m_languageStack.removeLast(); if (m_currentNode->parentNode()) m_currentNode = m_currentNode->parentNode(); break; } case WebVTTTokenTypes::TimestampTag: { String charactersString = m_token.characters(); MediaTime parsedTimeStamp; if (WebVTTParser::collectTimeStamp(charactersString, parsedTimeStamp)) m_currentNode->parserAppendChild(ProcessingInstruction::create(document, "timestamp"_s, WTFMove(charactersString))); break; } default: break; } } } #endif