#!/bin/bash

set -e

OUTILEXDIR=${OUTILEXDIR:-$HOME/outilex}
DICOS=${DICO:-$OUTILEXDIR/dicos}

TXT=$1
SEG=${TXT%.txt}.seg
FSA=${SEG%.seg}.fsa

echo "text segmentation ..."
tokenize -o "$SEG" "$TXT"

echo
echo "dictionnary application ..."
apply-dic -o "$FSA" -dics "$DICOS" -imaj "$SEG"

if [ -n "$NORM_GRAMMAR" ]; then
  echo
  echo "applying normalisation grammar : $NORM_GRAMMAR"
  transduct-fsa -o $FSA.out -gram $NORM_GRAMMAR $FSA
  mv $FSA.out $FSA
fi

if [ -n "$DECO_GRAMMAR" ]; then
  echo
  echo "applying decoration grammar : $DECO_GRAMMAR"
  decore-fsa -o $FSA.out -gram $DECO_GRAMMAR $FSA
  mv $FSA.out $FSA
fi

echo
echo "preprocessing done. text fsa in $FSA."

