TokenSequenceBuilder.java

  1. /*
  2.  * (c) Copyright 2021 Hasan Selman Kara. All rights reserved.
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *     http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */
  16. package li.selman.jpbe.dsl.token;

  17. import java.util.ArrayList;
  18. import java.util.List;
  19. import java.util.Optional;
  20. import java.util.function.BiFunction;

  21. /**
  22.  * @author Hasan Selman Kara
  23.  */
  24. public class TokenSequenceBuilder {

  25.     private final int maxLength;
  26.     private final BiFunction<Character, Token, Optional<Token>> computeTokenForCharHook;
  27.     private final Tokens tokens;

  28.     private static Optional<Token> defaultHook(Character character, Token token) {
  29.         return Optional.empty();
  30.     }

  31.     public TokenSequenceBuilder(int maxLength, Tokens tokens) {
  32.         this(maxLength, TokenSequenceBuilder::defaultHook, tokens);
  33.     }

  34.     public TokenSequenceBuilder(int maxLength, BiFunction<Character, Token, Optional<Token>> computeTokenForCharHook,
  35.                                 Tokens tokens) {
  36.         if (maxLength <= 0) {
  37.             throw new IllegalArgumentException("MaxLength cannot be smaller than 1");
  38.         }
  39.         if (computeTokenForCharHook == null) {
  40.             throw new IllegalArgumentException("Hook cannot be null. Use default hook!");
  41.         }
  42.         if (tokens == null) {
  43.             throw new IllegalArgumentException("Tokens cannot be null");
  44.         }

  45.         this.maxLength = maxLength;
  46.         this.computeTokenForCharHook = computeTokenForCharHook;
  47.         this.tokens = tokens;
  48.     }

  49.     /**
  50.      * @param input the whole input string provided by the data set
  51.      * @param from  start index for sub-string on {@code input}
  52.      * @param to    end index for sub-string on {@code input}
  53.      * @return sequence of tokens representing the token structure of a substring on {@code input}
  54.      */
  55.     @SuppressWarnings("checkstyle:CyclomaticComplexity")
  56.     public TokenSequence computeTokenSequence(String input, int from, int to) {
  57.         if (to < 1 || to > input.length()) throw new IllegalArgumentException("'to' index is invalid.");
  58.         if (from < 0 || from >= input.length()) throw new IllegalArgumentException("'from' index is invalid.");
  59.         if (to <= from) throw new IllegalArgumentException("'from' index must be smaller than 'to' index.");

  60.         List<Token> tmpTokens = new ArrayList<>();

  61.         if (from == 0) {
  62.             tmpTokens.add(Token.START);
  63.         }

  64.         String substr = input.substring(from, to);
  65.         Token last = null;
  66.         for (int i = 0; i < substr.length(); i++) {
  67.             char character = substr.charAt(i);
  68.             if (last == null) {
  69.                 // Handle first token
  70.                 last = computeTokenForChar(character, getLastOrNull(tmpTokens));
  71.                 tmpTokens.add(last);
  72.             }

  73.             Token next = computeTokenForChar(character, getLastOrNull(tmpTokens));
  74.             if (!last.equals(next)) {
  75.                 last = next;
  76.                 tmpTokens.add(last);
  77.             }

  78.             if (tmpTokens.size() > maxLength) {
  79.                 // Already too long, preemptive cancellation with current tokens
  80.                 return TokenSequence.of(tmpTokens);
  81.             }
  82.         }

  83.         if (to == input.length()) {
  84.             tmpTokens.add(Token.END);
  85.         }

  86.         return TokenSequence.of(tmpTokens);
  87.     }

  88.     private <T> T getLastOrNull(List<T> list) {
  89.         if (list.isEmpty()) {
  90.             return null;
  91.         } else {
  92.             return list.get(list.size() - 1);
  93.         }
  94.     }

  95.     Token computeTokenForChar(char c, Token lastToken) {

  96.         Optional<Token> hookToken = computeTokenForCharHook.apply(c, lastToken);
  97.         if (hookToken.isPresent()) {
  98.             return hookToken.get();
  99.         }

  100.         return tokens.getTokens().stream()
  101.                 .filter(token -> token.matches(c, lastToken))
  102.                 .findFirst()
  103.                 .orElse(tokens.getElseToken());
  104.     }

  105. }