
# Hilfsfunktion: Extrahiert die Inhalte der äußersten Klammerpaare
extract_top_level <- function(s) {
  chars <- unlist(strsplit(s, ""))
  depth <- 0
  groups <- character(0)
  start <- NA
  for (i in seq_along(chars)) {
    if (chars[i] == "(") {
      if (depth == 0) start <- i
      depth <- depth + 1
    } else if (chars[i] == ")") {
      depth <- depth - 1
      if (depth == 0) {
        groups <- c(groups, substr(s, start + 1, i - 1))
      }
    }
  }
  groups
}

# Rekursive Funktion
extract_skeleton_entries_recursive <- function(s, depth = 0, token_start = 1) {
  groups <- extract_top_level(s)
  if (length(groups) == 0) {
    return(data.frame(start=integer(0), n_tokens=integer(0), depth=integer(0), expr=character(0)))
  }

  result <- data.frame(start=integer(0), n_tokens=integer(0), depth=integer(0), expr=character(0))
  current_token <- token_start

  for (g in groups) {
    n <- length(split_sumerian(g)$signs)
    result <- rbind(result, data.frame(start=current_token, n_tokens=n, depth=depth+1, expr=g))
    # Falls noch Klammern enthalten: Rekursion
    if (grepl("\\(", g)) {
      result <- rbind(result, extract_skeleton_entries_recursive(g, depth + 1, current_token))
    }
    current_token <- current_token + n
  }
  result
}

extract_skeleton_entries <- function(x){
  extract_skeleton_entries_recursive(x, 0, 1)
}
