TokenSequenceBuilder.java
- /*
- * (c) Copyright 2021 Hasan Selman Kara. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package li.selman.jpbe.dsl.token;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Optional;
- import java.util.function.BiFunction;
- /**
- * @author Hasan Selman Kara
- */
- public class TokenSequenceBuilder {
- private final int maxLength;
- private final BiFunction<Character, Token, Optional<Token>> computeTokenForCharHook;
- private final Tokens tokens;
- private static Optional<Token> defaultHook(Character character, Token token) {
- return Optional.empty();
- }
- public TokenSequenceBuilder(int maxLength, Tokens tokens) {
- this(maxLength, TokenSequenceBuilder::defaultHook, tokens);
- }
- public TokenSequenceBuilder(int maxLength, BiFunction<Character, Token, Optional<Token>> computeTokenForCharHook,
- Tokens tokens) {
- if (maxLength <= 0) {
- throw new IllegalArgumentException("MaxLength cannot be smaller than 1");
- }
- if (computeTokenForCharHook == null) {
- throw new IllegalArgumentException("Hook cannot be null. Use default hook!");
- }
- if (tokens == null) {
- throw new IllegalArgumentException("Tokens cannot be null");
- }
- this.maxLength = maxLength;
- this.computeTokenForCharHook = computeTokenForCharHook;
- this.tokens = tokens;
- }
- /**
- * @param input the whole input string provided by the data set
- * @param from start index for sub-string on {@code input}
- * @param to end index for sub-string on {@code input}
- * @return sequence of tokens representing the token structure of a substring on {@code input}
- */
- @SuppressWarnings("checkstyle:CyclomaticComplexity")
- public TokenSequence computeTokenSequence(String input, int from, int to) {
- if (to < 1 || to > input.length()) throw new IllegalArgumentException("'to' index is invalid.");
- if (from < 0 || from >= input.length()) throw new IllegalArgumentException("'from' index is invalid.");
- if (to <= from) throw new IllegalArgumentException("'from' index must be smaller than 'to' index.");
- List<Token> tmpTokens = new ArrayList<>();
- if (from == 0) {
- tmpTokens.add(Token.START);
- }
- String substr = input.substring(from, to);
- Token last = null;
- for (int i = 0; i < substr.length(); i++) {
- char character = substr.charAt(i);
- if (last == null) {
- // Handle first token
- last = computeTokenForChar(character, getLastOrNull(tmpTokens));
- tmpTokens.add(last);
- }
- Token next = computeTokenForChar(character, getLastOrNull(tmpTokens));
- if (!last.equals(next)) {
- last = next;
- tmpTokens.add(last);
- }
- if (tmpTokens.size() > maxLength) {
- // Already too long, preemptive cancellation with current tokens
- return TokenSequence.of(tmpTokens);
- }
- }
- if (to == input.length()) {
- tmpTokens.add(Token.END);
- }
- return TokenSequence.of(tmpTokens);
- }
- private <T> T getLastOrNull(List<T> list) {
- if (list.isEmpty()) {
- return null;
- } else {
- return list.get(list.size() - 1);
- }
- }
- Token computeTokenForChar(char c, Token lastToken) {
- Optional<Token> hookToken = computeTokenForCharHook.apply(c, lastToken);
- if (hookToken.isPresent()) {
- return hookToken.get();
- }
- return tokens.getTokens().stream()
- .filter(token -> token.matches(c, lastToken))
- .findFirst()
- .orElse(tokens.getElseToken());
- }
- }