ZINC15:Model building

From DISI
Revision as of 20:38, 29 June 2018 by Frodo (talk | contribs) (asdf)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

#!/bin/bash --norc
USAGE="${0} [OPTIONS] <SOURCE_FILE>

  Options:
    -a, --adition-items <ITEMS_FILE> - A tab-delimited file of additional
                                       supplier code to sub_id mappigns to
                                       load
    -n, --name <CATALOG_NAME> - The catalog name to use
                                [default: basename of SOURCE_FILE]
    -f, --filter-mode <MODE> - Filtering rule list to use
                               [default: preliminary]
    -t, --tautomerize-mode <MODE> - Tautomerization rules to use
                                    [default: default]
    -v, --validate-mode <MODE> - Filtering rule list to use
                                 [default: strict]
    --num-stereocenters - Max number of ambiguous stereocenters to enumerate
    -s, --stereo-templates <TEMPLATES> - Special stereoisomer temperating rules use
                                         [default: default]
    -P, --skip-preprocessing - Skip the preprocessing step. Raw source file
                               will be passed to expansion
    -E, --skip-expansion - Skip expansion step. Preprocessing result will be
                           passed to resolution
    -R, --skip-resolution - Skip resolution step. Expansion result will be
                            passed as new substances to creation
    -C, --skip-creation - Skip creation step. Previous resolutions will be
                          passed to loading step
    -L, --skip-loading - Skip loading step. Existing catalog items will be
                         passed to depletion
    -D, --skip-depletion - Skip depletion step.

    Loading pipeline:

    1. Preprocessing: Filtering and neutralization
    2. Expansion: Stereo (RS & EZ) expansion for up to 2 (default)
                  centers, then explicit assignment. Special handling of
                  sterols and attempted early resolution of compounds with
                  a high number of centers
    3. Resolution: Find and separate out existing substances in ZINC
    4. Creation: Add substances determined to be new into ZINC
    5. Loading: Add and update catalog item (catalog_content/catalog_substance)
                mappings in ZINC
    6. Depletion: Final pass of marking old catalog contents as depleted
"

set -e

SOURCE_FILE=""
ADDITIONAL_MAPPINGS=""
CATALOG_NAME="${CATALOG_NAME}"
FILTER_MODE="${FILTER_MODE-preliminary}"
VALIDATE_MODE="${VALIDATE_MODE-strict}"
ZINC_MAX_ENUMERABLE_STEREO_CENTERS="${ZINC_MAX_ENUMERABLE_STEREO_CENTERS-2}"

RUN_PREPROCESSING='yes'
RUN_STEREO_EXPANSION='yes'
RUN_RESOLUTION='yes'
RUN_CREATION='yes'
RUN_LOADING='yes'
RUN_DEPLETION='yes'

while [[ "$#" > 0 ]] ; do
    ARG="${1}"
    VALUE="${2}"
    shift 1
    case "${ARG}" in
        -h|--help)
            echo "${USAGE}" 1>&2
            exit 0
            ;;
        -a|--addition-items)
            ADDITIONAL_MAPPINGS="${VALUE}"
            shift 1
            ;;
        -n|--name)
            CATALOG_NAME="${VALUE}"
            shift 1
            ;;
        -f|--filter)
            FILTER_MODE="${VALUE}"
            shift 1
            ;;
        -v|--validate)
            VALIDATE_MODE="${VALUE}"
            shift 1
            ;;
        --num-stereocenters)
            ZINC_MAX_ENUMERABLE_STEREO_CENTERS="${VALUE}"
            shift 1
            ;;
        -P|--skip-preprocessing)
            RUN_PREPROCESSING="no"
            ;;
        -E|--skip-expansion)
            RUN_STEREO_EXPANSION="no"
            ;;
        -R|--skip-resolution)
            RUN_RESOLUTION="no"
            ;;
        -C|--skip-creation)
            RUN_CREATION="no"
            ;;
        -L|--skip-loading)
            RUN_LOADING="no"
            ;;
        -D|--skip-depletion)
            RUN_DEPLETION="no"
            ;;
        *)
            if [ ! -z "${SOURCE_FILE}" -o "${SOURCE_FILE:0:2}" == "--" ] ; then
                echo "Unexpected argument: ${ARG}" 1>&2
                exit -1
            else
                SOURCE_FILE="${ARG}"

            fi
    esac
done

if [ -z "${SOURCE_FILE}" ] ; then
    echo "Source file required. Exiting." 1>&2
    exit -1
else
    SOURCE_FILE="$( readlink -f "${SOURCE_FILE}" )"
fi

if [ -z "${CATALOG_NAME}" ] ; then
    CATALOG_NAME="${SOURCE_FILE}"
    CATALOG_NAME="$( basename "${CATALOG_NAME}" .ism )"
    CATALOG_NAME="$( basename "${CATALOG_NAME}" .smi )"
fi

if [ -z "${ZINC_CONFIG_ENV}" ] ; then
    export ZINC_CONFIG_ENV="admin"
fi
if [ -z "${ZINC_CONFIG_SETUP_SKIP}" ] ; then
    export ZINC_CONFIG_SETUP_SKIP="blueprints errorhandlers"
fi

echo "Catalog:      ${CATALOG_NAME}" 1>&2
echo "Contents:     ${SOURCE_FILE}" 1>&2
echo "Filtering:    ${FILTER_MODE}" 1>&2
echo "Validation:   ${VALIDATE_MODE}" 1>&2
echo "Steps:" 1>&2
echo "  Preproccessing   - ${RUN_PREPROCESSING}" 1>&2
echo "  Expansion        - ${RUN_STEREO_EXPANSION}" 1>&2
echo "  Resolution       - ${RUN_RESOLUTION}" 1>&2
echo "  Creation         - ${RUN_CREATION}" 1>&2
echo "  Loading          - ${RUN_LOADING}" 1>&2
echo "  Depletion        - ${RUN_DEPLETION}" 1>&2


# Setup commands
ZINC_FILTER="${ZINC_FILTER-zincload-filter ${FILTER_MODE}}"
ZINC_VALIDATE="${ZINC_VALIDATE-zincload-filter ${VALIDATE_MODE}}"
ZINC_CANONICALIZE="${ZINC_CANONICALIZE-zincload-inchi --standardize --inchi-options=/RecMet}"
ZINC_NEUTRALIZE="${ZINC_NEUTRALIZE-neutralize.sh}"
ZINC_TAUTOMERIZE="${ZINC_TAUTOMERIZE-zincload-tautomerize --rules=default}"
ZINC_STEREO_SEPARATE="${ZINC_STEREO_SEPARATE-zincload-ambiguitysplit}"
ZINC_STEREO_EXPAND_COMMAND="${ZINC_STEREO_EXPAND_COMMAND-zincload-expandcenters --headers}"
ZINC_STEREO_DEFAULT_EXPAND="${ZINC_STEREO_DEFAULT_EXPAND-${ZINC_STEREO_EXPAND_COMMAND} --limit=${ZINC_MAX_ENUMERABLE_STEREO_CENTERS} --assign-with=RE --templates=default}"

ZINC_ANNOTATE="${ZINC_ANNOTATE-zincload-inchi --header --inchikey}"
ZINC_IDENTIFY="${ZINC_IDENTIFY-zinc-manage admin substances resolve --headers -f smiles,name,inchikey}"

ZINC_CREATE="${ZINC_CREATE-zinc-manage admin substances load -s SMILES -n Name -c supplier_code -z sub_id_fk -k inchikey --catalog=${CATALOG_NAME} --reactivity -C 100}"
ZINC_LOAD="${ZINC_LOAD-zinc-manage admin catalogs load --header -C 1000}"

if [ "${RUN_DEPLETION}" != 'yes' ] ; then
    ZINC_LOAD="${ZINC_LOAD} --no-depletion"
fi

SOURCE_FILE="$( readlink -f "${SOURCE_FILE}" )"
CATALOG_DIR="$( pwd )/${CATALOG_NAME}"
LOG_DIR="${CATALOG_DIR}/logs"

RAW_FILE="00-${CATALOG_NAME}-raw.ism"
EXTRA_MAPPINGS="${CATALOG_DIR}/01-${CATALOG_NAME}-extra-mappings.tsv"


# Step 1) Initial Preprocessing
###############################
STEP_1_INPUT="10-${CATALOG_NAME}-to-process.ism"
EXTRACTED_FILE="11-${CATALOG_NAME}-extracted.ism"
FILTERED_FILE="12-${CATALOG_NAME}-filtered.ism"
CANONICAL_FILE="13-${CATALOG_NAME}-canonical.ism"
NEUTRALIZED_FILE="14-${CATALOG_NAME}-neutralized.ism"
TAUTOMERIZED_FILE="15-${CATALOG_NAME}-tautomerized.ism"
VERIFY_FILE="16-${CATALOG_NAME}-verified.ism"
DISTINCT_FILE="17-${CATALOG_NAME}-distinct.ism"


FILTER_LOG="${LOG_DIR}/${CATALOG_NAME}.filtered"
TAUTOMER_LOG="${LOG_DIR}/${CATALOG_NAME}.tautomerized"
VERIFY_LOG="${LOG_DIR}/${CATALOG_NAME}.verified"

STEP_1_OUTPUT="18-${CATALOG_NAME}-processed.ism"


# Step 2) Stereo Expansion (and some early substance resolution for combinatorial reduction)
#####################################################################################
STEP_2_INPUT="20-${CATALOG_NAME}-unexpanded.ism"

STEREO_DIR="21-${CATALOG_NAME}-stereo-processing"
STEREO_RAW="00-${CATALOG_NAME}-unprocessed.ism"
STEREO_SEPARATE_DIR="10-extractions"

UNAMBIGUOUS_LABEL="unambiguous"
ENUMERABLE_LABEL="enumerable"
AMBIGUOUS_LABEL="ambiguous"

DEFAULT_CHEMOTYPE="default"
SPECIAL_CHEMOTYPE="special"

STEREO_EXTRACTED_UNAMBIGUOUS_PREFIX="${STEREO_SEPARATE_DIR}/${UNAMBIGUOUS_LABEL}"
STEREO_EXTRACTED_ENUMERABLE_PREFIX="${STEREO_SEPARATE_DIR}/${ENUMERABLE_LABEL}"
STEREO_EXTRACTED_AMBIGUOUS_PREFIX="${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}"

STEREO_UNAMBIGUOUS="21-${UNAMBIGUOUS_LABEL}.ism"
STEREO_ENUMERABLE="22-${ENUMERABLE_LABEL}.ism"
STEREO_ENUMERATED="23-enumerated.ism"

STEREO_AMBIGUOUS="30-${AMBIGUOUS_LABEL}.ism"
STEREO_AMBIGUOUS_CHECK="31-${AMBIGUOUS_LABEL}-to-check.ism"
STEREO_AMBIGUOUS_NEW="32-${AMBIGUOUS_LABEL}-new.ism"
STEREO_AMBIGUOUS_FOUND="33-${AMBIGUOUS_LABEL}-found.tsv"
STEREO_AMBIGUOUS_EXPANDED="34-expanded.ism"

STEREO_PICKED="50-picked.ism"

STEP_2_OUTPUT_A="22-early-mappings.tsv"
STEP_2_OUTPUT_B="23-selected-compounds.ism"


# Step 3) Substance Resolutions
#######################
STEP_3_INPUT="30-to-resolve.ism"
RESOLUTION_ANNOTATED="31-annotated-to-resolve.ism"
RESOLUTION_NEW="32-new.ism"
RESOLUTION_FOUND="33-found.tsv"
STEP_3_OUTPUT_A="34-substances-to-create.ism"
STEP_3_OUTPUT_B="35-existing-mappings-to-load.tsv"


# Step 4) Substance Creation
############################
STEP_4_INPUT="40-substances-to-create.ism"
SUBSTANCES_ANNOTATED="41-substances-to-create-annotated.ism"
SUBSTANCES_ORGANIZED="42-substances-to-create-organized.ism"
CREATED_SUBSTANCES="43-new-substances.ism"
CREATION_MAPPINGS="44-created-mappings.tsv"
CREATION_FAILURES="45-creation-failures.ism"
STEP_4_OUTPUT="46-new-mappings-to-load.tsv"


# Step 5) Item Mapping Loading
##############################
MAPPINGS_DIR="50-${CATALOG_NAME}-items"
STEP_5_INPUT_A="${MAPPINGS_DIR}/10-22-stereo-ambiguous-found.tsv"
STEP_5_INPUT_B="${MAPPINGS_DIR}/20-02-existing-substances-found.tsv"
STEP_5_INPUT_C="${MAPPINGS_DIR}/30-created.tsv"
STEP_5_INPUT_D="${MAPPINGS_DIR}/00-provided-mappings.tsv"
STEP_5_OUTPUT="51-${CATALOG_NAME}-updated-contents-ids.tsv"


echo "1) Active Preprocessing Stages" 1>&2
echo -e "\tZINC_FILTER=${ZINC_FILTER}" 1>&2
echo -e "\tZINC_CANONICALIZE=${ZINC_CANONICALIZE}" 1>&2
echo -e "\tZINC_NEUTRALIZE=${ZINC_NEUTRALIZE}" 1>&2
echo -e "\tZINC_TAUTOMERIZE=${ZINC_TAUTOMERIZE}" 1>&2
echo -e "\tZINC_VALIDATE=${ZINC_VALIDATE}" 1>&2
echo "" 1>&2

echo "2) Active Expansion Stages" 1>&2
echo -e "\tZINC_STEREO_SEPARATE=${ZINC_STEREO_SEPARATE}" 1>&2
echo -e "\tZINC_STEREO_EXPAND_COMMAND=${ZINC_STEREO_EXPAND_COMMAND}" 1>&2
echo -e "\tZINC_STEREO_DEFAULT_EXPAND=${ZINC_STEREO_DEFAULT_EXPAND}" 1>&2
echo "" 1>&2

echo "3) Active Resolution Stages" 1>&2
echo -e "\tZINC_ANNOTATE=${ZINC_ANNOTATE}" 1>&2
echo -e "\tZINC_IDENTIFY=${ZINC_IDENTIFY}" 1>&2

echo "4) Active Creation Stages" 1>&2
echo -e "\tZINC_CREATE=${ZINC_CREATE}" 1>&2

echo "5) Active Loading Stages" 1>&2
echo -e "\tZINC_LOAD=${ZINC_LOAD}" 1>&2

echo "Step 0: Setup" 1>&2
mkdir -pv "${CATALOG_DIR}" 1>&2
mkdir -pv "${LOG_DIR}" 1>&2
pushd "${CATALOG_DIR}" 1>&2

[ ! -e "${RAW_FILE}" ] && \
cat "${SOURCE_FILE}" | sed "s/\s+/\t/" > "${RAW_FILE}"
echo "$( wc -l "${RAW_FILE}" | sed 's/^\s//g' | cut -d\  -f1 ) entries in source catalog" 1>&2

echo "Step 1: Initial Preprocessing" 1>&2
[ ! -e "${STEP_1_INPUT}" ] && \
ln -sv "${RAW_FILE}" "${STEP_1_INPUT}" 1>&2
if [ ! -e "${STEP_1_INPUT}" ] ; then
    echo "Input does not exist. Skipping step 1" 1>&2
elif [ "${RUN_PREPROCESSING}" == 'yes' ] ; then
    [ -e "${STEP_1_INPUT}" ] && \
    awk '{print $1, $2}' \
        < "${STEP_1_INPUT}" \
        > "${EXTRACTED_FILE}"
    [ -e "${EXTRACTED_FILE}" ] && \
    $ZINC_FILTER \
        "${EXTRACTED_FILE}" \
        "${FILTERED_FILE}" \
        --log="${FILTER_LOG}" 2>&1 \
            | tee "${LOG_DIR}/12-filter.log"
    [ -e "${FILTERED_FILE}" ] && \
    $ZINC_CANONICALIZE \
        "${FILTERED_FILE}" \
        "${CANONICAL_FILE}" \
            | tee "${LOG_DIR}/13-canonical.log"
    [ -e "${CANONICAL_FILE}" ] && \
    $ZINC_NEUTRALIZE \
        "${CANONICAL_FILE}" \
        "${NEUTRALIZED_FILE}" 2>&1 \
            | tee "${LOG_DIR}/14-neutralize.log" 1>&2
    [ -e "${NEUTRALIZED_FILE}" ] && \
    $ZINC_TAUTOMERIZE \
        "${NEUTRALIZED_FILE}" \
        "${TAUTOMERIZED_FILE}" \
        --log="${TAUTOMER_LOG}" 2>&1 \
            | tee "${LOG_DIR}/15-tautomerize.log" 1>&2
    [ -e "${TAUTOMERIZED_FILE}" ] && \
    $ZINC_VALIDATE \
        "${TAUTOMERIZED_FILE}" \
        "${VERIFY_FILE}" \
        --log="${VERIFY_LOG}" 2>&1 \
            | tee "${LOG_DIR}/16-verify.log"

    sort -k 2 "${VERIFY_FILE}" \
            | uniq > "${DISTINCT_FILE}"

    ln -svfn "${DISTINCT_FILE}" "${STEP_1_OUTPUT}" 1>&2
else
    echo "Skipping preprocessing by request" 1>&2
    [ ! -e "${STEP_1_OUTPUT}" ] && \
    ln -sv "${STEP_1_INPUT}" "${STEP_1_OUTPUT}" 1>&2
fi

echo "Step 2: Stereo Ambiguity Separation" 1>&2
[ ! -e "${STEP_2_INPUT}" ] && \
ln -sv "${STEP_1_OUTPUT}" "${STEP_2_INPUT}" 1>&2
if [ ! -e "${STEP_2_INPUT}" ] ; then
    echo "No input available. Skipping step 2" 1>&2
elif [ "${RUN_STEREO_EXPANSION}" == 'yes' ] ; then
    mkdir -pv "${STEREO_DIR}" 1>&2
    pushd "${STEREO_DIR}"
    ln -svfn "../${STEP_2_INPUT}" "${STEREO_RAW}"
    mkdir -pv "${STEREO_SEPARATE_DIR}" 1>&2

    [ -e "${STEREO_RAW}" ] && \
    $ZINC_STEREO_SEPARATE \
        "${STEREO_RAW}" \
        --max-enumerable "${ZINC_MAX_ENUMERABLE_STEREO_CENTERS}" \
        --output-dir "${STEREO_SEPARATE_DIR}" \
        --extension ism \
        --prefix "" \
        --unambiguous-suffix "${UNAMBIGUOUS_LABEL}" \
        --enumerable-suffix "${ENUMERABLE_LABEL}" \
        --ambiguous-suffix "${AMBIGUOUS_LABEL}" 2>&1 \
            | tee "${LOG_DIR}/10-split-stereo.log" 1>&2

    echo "  Step 2.1) Handling all unambiguous and enumerable compounds" 1>&2
    EXTRACTED_UNAMBIGUOUS=( $( find "${STEREO_SEPARATE_DIR}" -name "${UNAMBIGUOUS_LABEL}-*.ism" ) )
    EXTRACTED_ENUMERABLE=( $( find "${STEREO_SEPARATE_DIR}" -name "${ENUMERABLE_LABEL}-*.ism" ) )
    EXTRACTED_AMBIGUOUS=( $( find "${STEREO_SEPARATE_DIR}" -name "${AMBIGUOUS_LABEL}-*.ism" ) )

    # Write one header and ignore others
    echo -e "SMILES\tName" > "${STEREO_UNAMBIGUOUS}"
    if [ "${#EXTRACTED_UNAMBIGUOUS[@]}" -gt 0 ] ; then
        for STEREO_FILE in "${EXTRACTED_UNAMBIGUOUS[@]}"; do
            cat "${STEREO_FILE}" >> "${STEREO_UNAMBIGUOUS}"
        done
    fi

    echo -e "SMILES\tName" > "${STEREO_ENUMERABLE}"
    if [ "${#EXTRACTED_ENUMERABLE[@]}" -gt 0 ] ; then
        for STEREO_FILE in "${EXTRACTED_ENUMERABLE[@]}"; do
            cat "${STEREO_FILE}" >> "${STEREO_ENUMERABLE}"
        done
    fi

    echo -e "SMILES\tName" > "${STEREO_AMBIGUOUS}"
    if [ "${#EXTRACTED_AMBIGUOUS[@]}" -gt 0 ] ; then
        for STEREO_FILE in "${EXTRACTED_AMBIGUOUS[@]}"; do
            cat "${STEREO_FILE}" >> "${STEREO_AMBIGUOUS}"
        done
    fi

    [ -e "${STEREO_ENUMERABLE}" ] && \
    $ZINC_STEREO_DEFAULT_EXPAND \
        "${STEREO_ENUMERABLE}" \
        "${STEREO_ENUMERATED}" 2>&1 \
            | tee "${LOG_DIR}/10-12-enumerable-expansion.log" 1>&2

    echo "  Step 2.2) Handling ambiguous compounds" 1>&2
    #[ -e "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${DEFAULT_CHEMOTYPE}.ism" ] && \
    #$ZINC_ANNOTATE \
    #    "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${DEFAULT_CHEMOTYPE}.ism" \
    #    "${STEREO_AMBIGUOUS}" 2>&1 \
    #        | tee "${LOG_DIR}/10-20-annotate-ambiguous-default.log"  1>&2
    #[ -e "${STEREO_AMBIGUOUS}" ] && \
    #$ZINC_IDENTIFY \
    #    "${STEREO_AMBIGUOUS}" \
    #    "${STEREO_AMBIGUOUS_NEW}" \
    #    "${STEREO_AMBIGUOUS_FOUND}" 2>&1 \
    #        | tee "${LOG_DIR}/10-21-resolve-ambiguous-stereo.log"  1>&2
    [ -e "${STEREO_AMBIGUOUS}" ] && \
    $ZINC_STEREO_DEFAULT_EXPAND \
        "${STEREO_AMBIGUOUS}" \
        "${STEREO_AMBIGUOUS_EXPANDED}" 2>&1 \
            | tee "${LOG_DIR}/10-23-ambiguous-expansion.log"  1>&2

    echo "  Step 2.3) Handling ambiguous sterols" 1>&2
    echo "Skipping: Sterols and Glucose now handled inline" 1>&2
    #[ -e "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${SPECIAL_CHEMOTYPE}.ism" ] && \
    #$ZINC_STEREO_STEROL_EXPAND \
    #    "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${SPECIAL_CHEMOTYPE}.ism" \
    #    "${STEREO_SPECIAL_EXPANSIONS}" 2>&1 \
    #        | tee "${LOG_DIR}/10-30-expand-ambiguous-sterols.log"  1>&2
    #[ -e "${STEREO_SPECIAL_EXPANSIONS}" ] && \
    #$ZINC_STEROL_PICK \
    #    "${STEREO_SPECIAL_EXPANSIONS}" \
    #    "${STEREO_SPECIAL_PICKED}" 2>&1 \
    #        | tee "${LOG_DIR}/10-31-pick-expanded-sterols.log"  1>&2

    echo "  Step 2.4) Consolidation of selected stereoisomers" 1>&2
    echo -e "SMILES\tName" > "${STEREO_PICKED}"
    for PICKED in \
            "${STEREO_UNAMBIGUOUS}" \
            "${STEREO_ENUMERATED}" \
            "${STEREO_AMBIGUOUS_EXPANDED}" ; do
        if [ -e "${PICKED}" ] ; then
            tail -n +2 "${PICKED}" >> "${STEREO_PICKED}"
        fi
    done

    popd 1>&2
    ln -svfn "${STEREO_DIR}/${STEREO_AMBIGUOUS_FOUND}" "${STEP_2_OUTPUT_A}" 1>&2
    ln -svfn "${STEREO_DIR}/${STEREO_PICKED}" "${STEP_2_OUTPUT_B}" 1>&2
else
    echo "Skipping stereo expansion by request" 1>&2
    [ ! -e "${STEP_2_OUTPUT_A}" ] && \
    touch "${STEP_2_OUTPUT_A}"  # Nothing found
    [ ! -e "${STEP_2_OUTPUT_B}" ] && \
    ln -sv "${STEP_2_INPUT}" "${STEP_2_OUTPUT_B}" 1>&2
fi

echo "Step 3: Resolution" 1>&2
[ ! -e "${STEP_3_INPUT}" ] && \
ln -sv "${STEP_2_OUTPUT_B}" "${STEP_3_INPUT}" 1>&2
if [ ! -e "${STEP_3_INPUT}" ] ; then
    echo "No input available. Skipping step 3" 1>&2
elif [ "${RUN_RESOLUTION}" == 'yes' ] ; then
    [ -e "${STEP_3_INPUT}" ] && \
    $ZINC_ANNOTATE \
        "${STEP_3_INPUT}" \
        "${RESOLUTION_ANNOTATED}" 2>&1 \
            | tee "${LOG_DIR}/30-resolution-annotation.log"  1>&2
    [ -e "${RESOLUTION_ANNOTATED}" ] && \
    $ZINC_IDENTIFY \
        "${RESOLUTION_ANNOTATED}" \
        "${RESOLUTION_NEW}" \
        "${RESOLUTION_FOUND}" 2>&1 \
            | tee "${LOG_DIR}/31-resolution.log"  1>&2

    ln -svfn "${RESOLUTION_NEW}" "${STEP_3_OUTPUT_A}" 1>&2
    ln -svfn "${RESOLUTION_FOUND}" "${STEP_3_OUTPUT_B}" 1>&2
else
    echo "Skipping resolution by request" 1>&2
    [ ! -e "${STEP_3_OUTPUT_A}" ] && \
    ln -sv "${STEP_3_INPUT}" "${STEP_3_OUTPUT_A}" 1>&2
    [ ! -e "${STEP_3_OUTPUT_B}" ] && \
    touch "${STEP_3_OUTPUT_B}"
fi


echo "Step 4: Creating new substances" 1>&2
[ ! -e "${STEP_4_INPUT}" ] && \
ln -sv "${STEP_3_OUTPUT_A}" "${STEP_4_INPUT}" 1>&2
if [ ! -e "${STEP_4_INPUT}" ] ; then
    echo "No input available. Skipping step 4" 1>&2
elif [ "${RUN_CREATION}" == 'yes' ] ; then
    [ -e "${STEP_4_INPUT}" ] && \
    $ZINC_ANNOTATE \
        "${STEP_4_INPUT}" \
        "${SUBSTANCES_ANNOTATED}" 2>&1 \
            | tee "${LOG_DIR}/41-creation-annotation.log" 1>&2
    head -n 1 "${SUBSTANCES_ANNOTATED}" > "${SUBSTANCES_ORGANIZED}"
    tail -n +2 "${SUBSTANCES_ANNOTATED}" | sort -k 3 >> "${SUBSTANCES_ORGANIZED}"
    [ -e "${SUBSTANCES_ORGANIZED}" ] && \
    $ZINC_CREATE \
        "${SUBSTANCES_ORGANIZED}" \
        "${CREATION_MAPPINGS}" \
        "${CREATION_FAILURES}" 2>&1 \
            | tee "${LOG_DIR}/42-creation.log" 1>&2
    ln -svfn "${CREATION_MAPPINGS}" "${STEP_4_OUTPUT}" 1>&2
else
    echo "Skipping substance creation by request" 1>&2
    [ ! -e "${CREATION_MAPPINGS}" ] && \
    touch "${CREATION_MAPPINGS}" 1>&2
    [ ! -e "${STEP_4_OUTPUT}" ] && \
    ln -sv "${CREATION_MAPPINGS}" "${STEP_4_OUTPUT}" 1>&2
fi


echo "Step 5: Updating Catalog Mappings" 1>&2
mkdir -pv "${MAPPINGS_DIR}" 1>&2
[ ! -e "${STEP_5_INPUT_A}" ] && \
ln -svfn "../${STEP_2_OUTPUT_A}" "${STEP_5_INPUT_A}" 1>&2
[ ! -e "${STEP_5_INPUT_B}" ] && \
ln -svfn "../${STEP_3_OUTPUT_B}" "${STEP_5_INPUT_B}" 1>&2
[ ! -e "${STEP_5_INPUT_C}" ] && \
ln -svfn "../${STEP_4_OUTPUT}" "${STEP_5_INPUT_C}" 1>&2
STEP_5_INPUTS=( "${STEP_5_INPUT_A}" "${STEP_5_INPUT_B}" "${STEP_5_INPUT_C}" )

if [ ! -z "${ADDITIONAL_MAPPINGS}" ] ; then
    ln -svfn "${ADDITIONAL_MAPPINGS}" "${STEP_5_INPUT_D}" 1>&2
    STEP_5_INPUTS=( "${STEP_5_INPUTS[@]}" "${STEP_5_INPUT_D}" )
fi

if [ "${RUN_LOADING}" == 'yes' ] ; then
    CATALOG_LOADING_INPUTS=()
    for INPUT in "${STEP_5_INPUTS[@]}" ; do
        if [ -e "${INPUT}" ] ; then
            CATALOG_LOADING_INPUTS=( "${CATALOG_LOADING_INPUTS[@]}" "${INPUT}" )
        fi
    done
    [ "${#CATALOG_LOADING_INPUTS[@]}" -gt 0 ] && \
    $ZINC_LOAD \
        "${CATALOG_NAME}" \
        "${CATALOG_LOADING_INPUTS[@]}" \
        -o "${STEP_5_OUTPUT}" 2>&1 \
            | tee "${LOG_DIR}/51-loading.log" 1>&2
else
    echo "Skipping catalog item mapping loading by request" 1>&2
    [ ! -e "${STEP_5_OUTPUT}" ] && \
    touch "${STEP_5_OUTPUT}" 1>&2
fi

popd 1>&2