ZINC15:Model building
Jump to navigation
Jump to search
#!/bin/bash --norc USAGE="${0} [OPTIONS] <SOURCE_FILE> Options: -a, --adition-items <ITEMS_FILE> - A tab-delimited file of additional supplier code to sub_id mappigns to load -n, --name <CATALOG_NAME> - The catalog name to use [default: basename of SOURCE_FILE] -f, --filter-mode <MODE> - Filtering rule list to use [default: preliminary] -t, --tautomerize-mode <MODE> - Tautomerization rules to use [default: default] -v, --validate-mode <MODE> - Filtering rule list to use [default: strict] --num-stereocenters - Max number of ambiguous stereocenters to enumerate -s, --stereo-templates <TEMPLATES> - Special stereoisomer temperating rules use [default: default] -P, --skip-preprocessing - Skip the preprocessing step. Raw source file will be passed to expansion -E, --skip-expansion - Skip expansion step. Preprocessing result will be passed to resolution -R, --skip-resolution - Skip resolution step. Expansion result will be passed as new substances to creation -C, --skip-creation - Skip creation step. Previous resolutions will be passed to loading step -L, --skip-loading - Skip loading step. Existing catalog items will be passed to depletion -D, --skip-depletion - Skip depletion step. Loading pipeline: 1. Preprocessing: Filtering and neutralization 2. Expansion: Stereo (RS & EZ) expansion for up to 2 (default) centers, then explicit assignment. Special handling of sterols and attempted early resolution of compounds with a high number of centers 3. Resolution: Find and separate out existing substances in ZINC 4. Creation: Add substances determined to be new into ZINC 5. Loading: Add and update catalog item (catalog_content/catalog_substance) mappings in ZINC 6. Depletion: Final pass of marking old catalog contents as depleted " set -e SOURCE_FILE="" ADDITIONAL_MAPPINGS="" CATALOG_NAME="${CATALOG_NAME}" FILTER_MODE="${FILTER_MODE-preliminary}" VALIDATE_MODE="${VALIDATE_MODE-strict}" ZINC_MAX_ENUMERABLE_STEREO_CENTERS="${ZINC_MAX_ENUMERABLE_STEREO_CENTERS-2}" RUN_PREPROCESSING='yes' RUN_STEREO_EXPANSION='yes' RUN_RESOLUTION='yes' RUN_CREATION='yes' RUN_LOADING='yes' RUN_DEPLETION='yes' while [[ "$#" > 0 ]] ; do ARG="${1}" VALUE="${2}" shift 1 case "${ARG}" in -h|--help) echo "${USAGE}" 1>&2 exit 0 ;; -a|--addition-items) ADDITIONAL_MAPPINGS="${VALUE}" shift 1 ;; -n|--name) CATALOG_NAME="${VALUE}" shift 1 ;; -f|--filter) FILTER_MODE="${VALUE}" shift 1 ;; -v|--validate) VALIDATE_MODE="${VALUE}" shift 1 ;; --num-stereocenters) ZINC_MAX_ENUMERABLE_STEREO_CENTERS="${VALUE}" shift 1 ;; -P|--skip-preprocessing) RUN_PREPROCESSING="no" ;; -E|--skip-expansion) RUN_STEREO_EXPANSION="no" ;; -R|--skip-resolution) RUN_RESOLUTION="no" ;; -C|--skip-creation) RUN_CREATION="no" ;; -L|--skip-loading) RUN_LOADING="no" ;; -D|--skip-depletion) RUN_DEPLETION="no" ;; *) if [ ! -z "${SOURCE_FILE}" -o "${SOURCE_FILE:0:2}" == "--" ] ; then echo "Unexpected argument: ${ARG}" 1>&2 exit -1 else SOURCE_FILE="${ARG}" fi esac done if [ -z "${SOURCE_FILE}" ] ; then echo "Source file required. Exiting." 1>&2 exit -1 else SOURCE_FILE="$( readlink -f "${SOURCE_FILE}" )" fi if [ -z "${CATALOG_NAME}" ] ; then CATALOG_NAME="${SOURCE_FILE}" CATALOG_NAME="$( basename "${CATALOG_NAME}" .ism )" CATALOG_NAME="$( basename "${CATALOG_NAME}" .smi )" fi if [ -z "${ZINC_CONFIG_ENV}" ] ; then export ZINC_CONFIG_ENV="admin" fi if [ -z "${ZINC_CONFIG_SETUP_SKIP}" ] ; then export ZINC_CONFIG_SETUP_SKIP="blueprints errorhandlers" fi echo "Catalog: ${CATALOG_NAME}" 1>&2 echo "Contents: ${SOURCE_FILE}" 1>&2 echo "Filtering: ${FILTER_MODE}" 1>&2 echo "Validation: ${VALIDATE_MODE}" 1>&2 echo "Steps:" 1>&2 echo " Preproccessing - ${RUN_PREPROCESSING}" 1>&2 echo " Expansion - ${RUN_STEREO_EXPANSION}" 1>&2 echo " Resolution - ${RUN_RESOLUTION}" 1>&2 echo " Creation - ${RUN_CREATION}" 1>&2 echo " Loading - ${RUN_LOADING}" 1>&2 echo " Depletion - ${RUN_DEPLETION}" 1>&2 # Setup commands ZINC_FILTER="${ZINC_FILTER-zincload-filter ${FILTER_MODE}}" ZINC_VALIDATE="${ZINC_VALIDATE-zincload-filter ${VALIDATE_MODE}}" ZINC_CANONICALIZE="${ZINC_CANONICALIZE-zincload-inchi --standardize --inchi-options=/RecMet}" ZINC_NEUTRALIZE="${ZINC_NEUTRALIZE-neutralize.sh}" ZINC_TAUTOMERIZE="${ZINC_TAUTOMERIZE-zincload-tautomerize --rules=default}" ZINC_STEREO_SEPARATE="${ZINC_STEREO_SEPARATE-zincload-ambiguitysplit}" ZINC_STEREO_EXPAND_COMMAND="${ZINC_STEREO_EXPAND_COMMAND-zincload-expandcenters --headers}" ZINC_STEREO_DEFAULT_EXPAND="${ZINC_STEREO_DEFAULT_EXPAND-${ZINC_STEREO_EXPAND_COMMAND} --limit=${ZINC_MAX_ENUMERABLE_STEREO_CENTERS} --assign-with=RE --templates=default}" ZINC_ANNOTATE="${ZINC_ANNOTATE-zincload-inchi --header --inchikey}" ZINC_IDENTIFY="${ZINC_IDENTIFY-zinc-manage admin substances resolve --headers -f smiles,name,inchikey}" ZINC_CREATE="${ZINC_CREATE-zinc-manage admin substances load -s SMILES -n Name -c supplier_code -z sub_id_fk -k inchikey --catalog=${CATALOG_NAME} --reactivity -C 100}" ZINC_LOAD="${ZINC_LOAD-zinc-manage admin catalogs load --header -C 1000}" if [ "${RUN_DEPLETION}" != 'yes' ] ; then ZINC_LOAD="${ZINC_LOAD} --no-depletion" fi SOURCE_FILE="$( readlink -f "${SOURCE_FILE}" )" CATALOG_DIR="$( pwd )/${CATALOG_NAME}" LOG_DIR="${CATALOG_DIR}/logs" RAW_FILE="00-${CATALOG_NAME}-raw.ism" EXTRA_MAPPINGS="${CATALOG_DIR}/01-${CATALOG_NAME}-extra-mappings.tsv" # Step 1) Initial Preprocessing ############################### STEP_1_INPUT="10-${CATALOG_NAME}-to-process.ism" EXTRACTED_FILE="11-${CATALOG_NAME}-extracted.ism" FILTERED_FILE="12-${CATALOG_NAME}-filtered.ism" CANONICAL_FILE="13-${CATALOG_NAME}-canonical.ism" NEUTRALIZED_FILE="14-${CATALOG_NAME}-neutralized.ism" TAUTOMERIZED_FILE="15-${CATALOG_NAME}-tautomerized.ism" VERIFY_FILE="16-${CATALOG_NAME}-verified.ism" DISTINCT_FILE="17-${CATALOG_NAME}-distinct.ism" FILTER_LOG="${LOG_DIR}/${CATALOG_NAME}.filtered" TAUTOMER_LOG="${LOG_DIR}/${CATALOG_NAME}.tautomerized" VERIFY_LOG="${LOG_DIR}/${CATALOG_NAME}.verified" STEP_1_OUTPUT="18-${CATALOG_NAME}-processed.ism" # Step 2) Stereo Expansion (and some early substance resolution for combinatorial reduction) ##################################################################################### STEP_2_INPUT="20-${CATALOG_NAME}-unexpanded.ism" STEREO_DIR="21-${CATALOG_NAME}-stereo-processing" STEREO_RAW="00-${CATALOG_NAME}-unprocessed.ism" STEREO_SEPARATE_DIR="10-extractions" UNAMBIGUOUS_LABEL="unambiguous" ENUMERABLE_LABEL="enumerable" AMBIGUOUS_LABEL="ambiguous" DEFAULT_CHEMOTYPE="default" SPECIAL_CHEMOTYPE="special" STEREO_EXTRACTED_UNAMBIGUOUS_PREFIX="${STEREO_SEPARATE_DIR}/${UNAMBIGUOUS_LABEL}" STEREO_EXTRACTED_ENUMERABLE_PREFIX="${STEREO_SEPARATE_DIR}/${ENUMERABLE_LABEL}" STEREO_EXTRACTED_AMBIGUOUS_PREFIX="${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}" STEREO_UNAMBIGUOUS="21-${UNAMBIGUOUS_LABEL}.ism" STEREO_ENUMERABLE="22-${ENUMERABLE_LABEL}.ism" STEREO_ENUMERATED="23-enumerated.ism" STEREO_AMBIGUOUS="30-${AMBIGUOUS_LABEL}.ism" STEREO_AMBIGUOUS_CHECK="31-${AMBIGUOUS_LABEL}-to-check.ism" STEREO_AMBIGUOUS_NEW="32-${AMBIGUOUS_LABEL}-new.ism" STEREO_AMBIGUOUS_FOUND="33-${AMBIGUOUS_LABEL}-found.tsv" STEREO_AMBIGUOUS_EXPANDED="34-expanded.ism" STEREO_PICKED="50-picked.ism" STEP_2_OUTPUT_A="22-early-mappings.tsv" STEP_2_OUTPUT_B="23-selected-compounds.ism" # Step 3) Substance Resolutions ####################### STEP_3_INPUT="30-to-resolve.ism" RESOLUTION_ANNOTATED="31-annotated-to-resolve.ism" RESOLUTION_NEW="32-new.ism" RESOLUTION_FOUND="33-found.tsv" STEP_3_OUTPUT_A="34-substances-to-create.ism" STEP_3_OUTPUT_B="35-existing-mappings-to-load.tsv" # Step 4) Substance Creation ############################ STEP_4_INPUT="40-substances-to-create.ism" SUBSTANCES_ANNOTATED="41-substances-to-create-annotated.ism" SUBSTANCES_ORGANIZED="42-substances-to-create-organized.ism" CREATED_SUBSTANCES="43-new-substances.ism" CREATION_MAPPINGS="44-created-mappings.tsv" CREATION_FAILURES="45-creation-failures.ism" STEP_4_OUTPUT="46-new-mappings-to-load.tsv" # Step 5) Item Mapping Loading ############################## MAPPINGS_DIR="50-${CATALOG_NAME}-items" STEP_5_INPUT_A="${MAPPINGS_DIR}/10-22-stereo-ambiguous-found.tsv" STEP_5_INPUT_B="${MAPPINGS_DIR}/20-02-existing-substances-found.tsv" STEP_5_INPUT_C="${MAPPINGS_DIR}/30-created.tsv" STEP_5_INPUT_D="${MAPPINGS_DIR}/00-provided-mappings.tsv" STEP_5_OUTPUT="51-${CATALOG_NAME}-updated-contents-ids.tsv" echo "1) Active Preprocessing Stages" 1>&2 echo -e "\tZINC_FILTER=${ZINC_FILTER}" 1>&2 echo -e "\tZINC_CANONICALIZE=${ZINC_CANONICALIZE}" 1>&2 echo -e "\tZINC_NEUTRALIZE=${ZINC_NEUTRALIZE}" 1>&2 echo -e "\tZINC_TAUTOMERIZE=${ZINC_TAUTOMERIZE}" 1>&2 echo -e "\tZINC_VALIDATE=${ZINC_VALIDATE}" 1>&2 echo "" 1>&2 echo "2) Active Expansion Stages" 1>&2 echo -e "\tZINC_STEREO_SEPARATE=${ZINC_STEREO_SEPARATE}" 1>&2 echo -e "\tZINC_STEREO_EXPAND_COMMAND=${ZINC_STEREO_EXPAND_COMMAND}" 1>&2 echo -e "\tZINC_STEREO_DEFAULT_EXPAND=${ZINC_STEREO_DEFAULT_EXPAND}" 1>&2 echo "" 1>&2 echo "3) Active Resolution Stages" 1>&2 echo -e "\tZINC_ANNOTATE=${ZINC_ANNOTATE}" 1>&2 echo -e "\tZINC_IDENTIFY=${ZINC_IDENTIFY}" 1>&2 echo "4) Active Creation Stages" 1>&2 echo -e "\tZINC_CREATE=${ZINC_CREATE}" 1>&2 echo "5) Active Loading Stages" 1>&2 echo -e "\tZINC_LOAD=${ZINC_LOAD}" 1>&2 echo "Step 0: Setup" 1>&2 mkdir -pv "${CATALOG_DIR}" 1>&2 mkdir -pv "${LOG_DIR}" 1>&2 pushd "${CATALOG_DIR}" 1>&2 [ ! -e "${RAW_FILE}" ] && \ cat "${SOURCE_FILE}" | sed "s/\s+/\t/" > "${RAW_FILE}" echo "$( wc -l "${RAW_FILE}" | sed 's/^\s//g' | cut -d\ -f1 ) entries in source catalog" 1>&2 echo "Step 1: Initial Preprocessing" 1>&2 [ ! -e "${STEP_1_INPUT}" ] && \ ln -sv "${RAW_FILE}" "${STEP_1_INPUT}" 1>&2 if [ ! -e "${STEP_1_INPUT}" ] ; then echo "Input does not exist. Skipping step 1" 1>&2 elif [ "${RUN_PREPROCESSING}" == 'yes' ] ; then [ -e "${STEP_1_INPUT}" ] && \ awk '{print $1, $2}' \ < "${STEP_1_INPUT}" \ > "${EXTRACTED_FILE}" [ -e "${EXTRACTED_FILE}" ] && \ $ZINC_FILTER \ "${EXTRACTED_FILE}" \ "${FILTERED_FILE}" \ --log="${FILTER_LOG}" 2>&1 \ | tee "${LOG_DIR}/12-filter.log" [ -e "${FILTERED_FILE}" ] && \ $ZINC_CANONICALIZE \ "${FILTERED_FILE}" \ "${CANONICAL_FILE}" \ | tee "${LOG_DIR}/13-canonical.log" [ -e "${CANONICAL_FILE}" ] && \ $ZINC_NEUTRALIZE \ "${CANONICAL_FILE}" \ "${NEUTRALIZED_FILE}" 2>&1 \ | tee "${LOG_DIR}/14-neutralize.log" 1>&2 [ -e "${NEUTRALIZED_FILE}" ] && \ $ZINC_TAUTOMERIZE \ "${NEUTRALIZED_FILE}" \ "${TAUTOMERIZED_FILE}" \ --log="${TAUTOMER_LOG}" 2>&1 \ | tee "${LOG_DIR}/15-tautomerize.log" 1>&2 [ -e "${TAUTOMERIZED_FILE}" ] && \ $ZINC_VALIDATE \ "${TAUTOMERIZED_FILE}" \ "${VERIFY_FILE}" \ --log="${VERIFY_LOG}" 2>&1 \ | tee "${LOG_DIR}/16-verify.log" sort -k 2 "${VERIFY_FILE}" \ | uniq > "${DISTINCT_FILE}" ln -svfn "${DISTINCT_FILE}" "${STEP_1_OUTPUT}" 1>&2 else echo "Skipping preprocessing by request" 1>&2 [ ! -e "${STEP_1_OUTPUT}" ] && \ ln -sv "${STEP_1_INPUT}" "${STEP_1_OUTPUT}" 1>&2 fi echo "Step 2: Stereo Ambiguity Separation" 1>&2 [ ! -e "${STEP_2_INPUT}" ] && \ ln -sv "${STEP_1_OUTPUT}" "${STEP_2_INPUT}" 1>&2 if [ ! -e "${STEP_2_INPUT}" ] ; then echo "No input available. Skipping step 2" 1>&2 elif [ "${RUN_STEREO_EXPANSION}" == 'yes' ] ; then mkdir -pv "${STEREO_DIR}" 1>&2 pushd "${STEREO_DIR}" ln -svfn "../${STEP_2_INPUT}" "${STEREO_RAW}" mkdir -pv "${STEREO_SEPARATE_DIR}" 1>&2 [ -e "${STEREO_RAW}" ] && \ $ZINC_STEREO_SEPARATE \ "${STEREO_RAW}" \ --max-enumerable "${ZINC_MAX_ENUMERABLE_STEREO_CENTERS}" \ --output-dir "${STEREO_SEPARATE_DIR}" \ --extension ism \ --prefix "" \ --unambiguous-suffix "${UNAMBIGUOUS_LABEL}" \ --enumerable-suffix "${ENUMERABLE_LABEL}" \ --ambiguous-suffix "${AMBIGUOUS_LABEL}" 2>&1 \ | tee "${LOG_DIR}/10-split-stereo.log" 1>&2 echo " Step 2.1) Handling all unambiguous and enumerable compounds" 1>&2 EXTRACTED_UNAMBIGUOUS=( $( find "${STEREO_SEPARATE_DIR}" -name "${UNAMBIGUOUS_LABEL}-*.ism" ) ) EXTRACTED_ENUMERABLE=( $( find "${STEREO_SEPARATE_DIR}" -name "${ENUMERABLE_LABEL}-*.ism" ) ) EXTRACTED_AMBIGUOUS=( $( find "${STEREO_SEPARATE_DIR}" -name "${AMBIGUOUS_LABEL}-*.ism" ) ) # Write one header and ignore others echo -e "SMILES\tName" > "${STEREO_UNAMBIGUOUS}" if [ "${#EXTRACTED_UNAMBIGUOUS[@]}" -gt 0 ] ; then for STEREO_FILE in "${EXTRACTED_UNAMBIGUOUS[@]}"; do cat "${STEREO_FILE}" >> "${STEREO_UNAMBIGUOUS}" done fi echo -e "SMILES\tName" > "${STEREO_ENUMERABLE}" if [ "${#EXTRACTED_ENUMERABLE[@]}" -gt 0 ] ; then for STEREO_FILE in "${EXTRACTED_ENUMERABLE[@]}"; do cat "${STEREO_FILE}" >> "${STEREO_ENUMERABLE}" done fi echo -e "SMILES\tName" > "${STEREO_AMBIGUOUS}" if [ "${#EXTRACTED_AMBIGUOUS[@]}" -gt 0 ] ; then for STEREO_FILE in "${EXTRACTED_AMBIGUOUS[@]}"; do cat "${STEREO_FILE}" >> "${STEREO_AMBIGUOUS}" done fi [ -e "${STEREO_ENUMERABLE}" ] && \ $ZINC_STEREO_DEFAULT_EXPAND \ "${STEREO_ENUMERABLE}" \ "${STEREO_ENUMERATED}" 2>&1 \ | tee "${LOG_DIR}/10-12-enumerable-expansion.log" 1>&2 echo " Step 2.2) Handling ambiguous compounds" 1>&2 #[ -e "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${DEFAULT_CHEMOTYPE}.ism" ] && \ #$ZINC_ANNOTATE \ # "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${DEFAULT_CHEMOTYPE}.ism" \ # "${STEREO_AMBIGUOUS}" 2>&1 \ # | tee "${LOG_DIR}/10-20-annotate-ambiguous-default.log" 1>&2 #[ -e "${STEREO_AMBIGUOUS}" ] && \ #$ZINC_IDENTIFY \ # "${STEREO_AMBIGUOUS}" \ # "${STEREO_AMBIGUOUS_NEW}" \ # "${STEREO_AMBIGUOUS_FOUND}" 2>&1 \ # | tee "${LOG_DIR}/10-21-resolve-ambiguous-stereo.log" 1>&2 [ -e "${STEREO_AMBIGUOUS}" ] && \ $ZINC_STEREO_DEFAULT_EXPAND \ "${STEREO_AMBIGUOUS}" \ "${STEREO_AMBIGUOUS_EXPANDED}" 2>&1 \ | tee "${LOG_DIR}/10-23-ambiguous-expansion.log" 1>&2 echo " Step 2.3) Handling ambiguous sterols" 1>&2 echo "Skipping: Sterols and Glucose now handled inline" 1>&2 #[ -e "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${SPECIAL_CHEMOTYPE}.ism" ] && \ #$ZINC_STEREO_STEROL_EXPAND \ # "${STEREO_SEPARATE_DIR}/${AMBIGUOUS_LABEL}-${SPECIAL_CHEMOTYPE}.ism" \ # "${STEREO_SPECIAL_EXPANSIONS}" 2>&1 \ # | tee "${LOG_DIR}/10-30-expand-ambiguous-sterols.log" 1>&2 #[ -e "${STEREO_SPECIAL_EXPANSIONS}" ] && \ #$ZINC_STEROL_PICK \ # "${STEREO_SPECIAL_EXPANSIONS}" \ # "${STEREO_SPECIAL_PICKED}" 2>&1 \ # | tee "${LOG_DIR}/10-31-pick-expanded-sterols.log" 1>&2 echo " Step 2.4) Consolidation of selected stereoisomers" 1>&2 echo -e "SMILES\tName" > "${STEREO_PICKED}" for PICKED in \ "${STEREO_UNAMBIGUOUS}" \ "${STEREO_ENUMERATED}" \ "${STEREO_AMBIGUOUS_EXPANDED}" ; do if [ -e "${PICKED}" ] ; then tail -n +2 "${PICKED}" >> "${STEREO_PICKED}" fi done popd 1>&2 ln -svfn "${STEREO_DIR}/${STEREO_AMBIGUOUS_FOUND}" "${STEP_2_OUTPUT_A}" 1>&2 ln -svfn "${STEREO_DIR}/${STEREO_PICKED}" "${STEP_2_OUTPUT_B}" 1>&2 else echo "Skipping stereo expansion by request" 1>&2 [ ! -e "${STEP_2_OUTPUT_A}" ] && \ touch "${STEP_2_OUTPUT_A}" # Nothing found [ ! -e "${STEP_2_OUTPUT_B}" ] && \ ln -sv "${STEP_2_INPUT}" "${STEP_2_OUTPUT_B}" 1>&2 fi echo "Step 3: Resolution" 1>&2 [ ! -e "${STEP_3_INPUT}" ] && \ ln -sv "${STEP_2_OUTPUT_B}" "${STEP_3_INPUT}" 1>&2 if [ ! -e "${STEP_3_INPUT}" ] ; then echo "No input available. Skipping step 3" 1>&2 elif [ "${RUN_RESOLUTION}" == 'yes' ] ; then [ -e "${STEP_3_INPUT}" ] && \ $ZINC_ANNOTATE \ "${STEP_3_INPUT}" \ "${RESOLUTION_ANNOTATED}" 2>&1 \ | tee "${LOG_DIR}/30-resolution-annotation.log" 1>&2 [ -e "${RESOLUTION_ANNOTATED}" ] && \ $ZINC_IDENTIFY \ "${RESOLUTION_ANNOTATED}" \ "${RESOLUTION_NEW}" \ "${RESOLUTION_FOUND}" 2>&1 \ | tee "${LOG_DIR}/31-resolution.log" 1>&2 ln -svfn "${RESOLUTION_NEW}" "${STEP_3_OUTPUT_A}" 1>&2 ln -svfn "${RESOLUTION_FOUND}" "${STEP_3_OUTPUT_B}" 1>&2 else echo "Skipping resolution by request" 1>&2 [ ! -e "${STEP_3_OUTPUT_A}" ] && \ ln -sv "${STEP_3_INPUT}" "${STEP_3_OUTPUT_A}" 1>&2 [ ! -e "${STEP_3_OUTPUT_B}" ] && \ touch "${STEP_3_OUTPUT_B}" fi echo "Step 4: Creating new substances" 1>&2 [ ! -e "${STEP_4_INPUT}" ] && \ ln -sv "${STEP_3_OUTPUT_A}" "${STEP_4_INPUT}" 1>&2 if [ ! -e "${STEP_4_INPUT}" ] ; then echo "No input available. Skipping step 4" 1>&2 elif [ "${RUN_CREATION}" == 'yes' ] ; then [ -e "${STEP_4_INPUT}" ] && \ $ZINC_ANNOTATE \ "${STEP_4_INPUT}" \ "${SUBSTANCES_ANNOTATED}" 2>&1 \ | tee "${LOG_DIR}/41-creation-annotation.log" 1>&2 head -n 1 "${SUBSTANCES_ANNOTATED}" > "${SUBSTANCES_ORGANIZED}" tail -n +2 "${SUBSTANCES_ANNOTATED}" | sort -k 3 >> "${SUBSTANCES_ORGANIZED}" [ -e "${SUBSTANCES_ORGANIZED}" ] && \ $ZINC_CREATE \ "${SUBSTANCES_ORGANIZED}" \ "${CREATION_MAPPINGS}" \ "${CREATION_FAILURES}" 2>&1 \ | tee "${LOG_DIR}/42-creation.log" 1>&2 ln -svfn "${CREATION_MAPPINGS}" "${STEP_4_OUTPUT}" 1>&2 else echo "Skipping substance creation by request" 1>&2 [ ! -e "${CREATION_MAPPINGS}" ] && \ touch "${CREATION_MAPPINGS}" 1>&2 [ ! -e "${STEP_4_OUTPUT}" ] && \ ln -sv "${CREATION_MAPPINGS}" "${STEP_4_OUTPUT}" 1>&2 fi echo "Step 5: Updating Catalog Mappings" 1>&2 mkdir -pv "${MAPPINGS_DIR}" 1>&2 [ ! -e "${STEP_5_INPUT_A}" ] && \ ln -svfn "../${STEP_2_OUTPUT_A}" "${STEP_5_INPUT_A}" 1>&2 [ ! -e "${STEP_5_INPUT_B}" ] && \ ln -svfn "../${STEP_3_OUTPUT_B}" "${STEP_5_INPUT_B}" 1>&2 [ ! -e "${STEP_5_INPUT_C}" ] && \ ln -svfn "../${STEP_4_OUTPUT}" "${STEP_5_INPUT_C}" 1>&2 STEP_5_INPUTS=( "${STEP_5_INPUT_A}" "${STEP_5_INPUT_B}" "${STEP_5_INPUT_C}" ) if [ ! -z "${ADDITIONAL_MAPPINGS}" ] ; then ln -svfn "${ADDITIONAL_MAPPINGS}" "${STEP_5_INPUT_D}" 1>&2 STEP_5_INPUTS=( "${STEP_5_INPUTS[@]}" "${STEP_5_INPUT_D}" ) fi if [ "${RUN_LOADING}" == 'yes' ] ; then CATALOG_LOADING_INPUTS=() for INPUT in "${STEP_5_INPUTS[@]}" ; do if [ -e "${INPUT}" ] ; then CATALOG_LOADING_INPUTS=( "${CATALOG_LOADING_INPUTS[@]}" "${INPUT}" ) fi done [ "${#CATALOG_LOADING_INPUTS[@]}" -gt 0 ] && \ $ZINC_LOAD \ "${CATALOG_NAME}" \ "${CATALOG_LOADING_INPUTS[@]}" \ -o "${STEP_5_OUTPUT}" 2>&1 \ | tee "${LOG_DIR}/51-loading.log" 1>&2 else echo "Skipping catalog item mapping loading by request" 1>&2 [ ! -e "${STEP_5_OUTPUT}" ] && \ touch "${STEP_5_OUTPUT}" 1>&2 fi popd 1>&2