001/*
002 *    GeoAPI - Java interfaces for OGC/ISO standards
003 *    http://www.geoapi.org
004 *
005 *    Copyright (C) 2018-2019 Open Geospatial Consortium, Inc.
006 *    All Rights Reserved. http://www.opengeospatial.org/ogc/legal
007 *
008 *    Permission to use, copy, and modify this software and its documentation, with
009 *    or without modification, for any purpose and without fee or royalty is hereby
010 *    granted, provided that you include the following on ALL copies of the software
011 *    and documentation or portions thereof, including modifications, that you make:
012 *
013 *    1. The full text of this NOTICE in a location viewable to users of the
014 *       redistributed or derivative work.
015 *    2. Notice of any changes or modifications to the OGC files, including the
016 *       date changes were made.
017 *
018 *    THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE
019 *    NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
020 *    TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
021 *    THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
022 *    PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
023 *
024 *    COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR
025 *    CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION.
026 *
027 *    The name and trademarks of copyright holders may NOT be used in advertising or
028 *    publicity pertaining to the software without specific, written prior permission.
029 *    Title to copyright in this software and any associated documentation will at all
030 *    times remain with copyright holders.
031 */
032package org.opengis.geoapi;
033
034
035/**
036 * Style of the documentation to store. Documentation in XSD files are not sentence;
037 * they begin with a lower-case letter instead than an upper-case one and do not finish
038 * with a period. Those documentation can be read verbatim, or transformed into sentences.
039 *
040 * @author  Martin Desruisseaux (Geomatys)
041 * @since   3.1
042 * @version 3.1
043 */
044public enum DocumentationStyle {
045    /**
046     * Skip documentation. The {@link SchemaInformation.Element#documentation} field will be {@code null}.
047     * This style can be used when documentation is not needed.
048     */
049    NONE,
050
051    /**
052     * Store documentation verbatim, without transforming them into sentences.
053     */
054    VERBATIM,
055
056    /**
057     * Transform documentation to sentences.
058     */
059    SENTENCE;
060
061    /**
062     * Prefix to omit at the beginning of sentence. Some XSD files begin their documentation with
063     * {@code "Description:"}, which is not necessary.
064     */
065    private static final String OMIT = "Description:";
066
067    /**
068     * Parts to ignore from the description given in XSD files.
069     *
070     * @todo store those information in a separated map.
071     */
072    private static final String[] IGNORE = {
073        "FGDC:",
074        "Position:",
075        "Postion:",         // Type found in some XSD.
076        "shortName:",
077        "Conditional",
078        "NITF_ACFTA:",
079        "Note in 19115-3"
080    };
081
082    /**
083     * Known typos in XSD files. Values at even indexes are the typos
084     * and values at odd indexes are the fixes.
085     *
086     * @see <a href="https://github.com/opengeospatial/geoapi/pull/42">Issue #42</a>
087     */
088    private static final String[] TYPOS = {
089        "avaialble",    "available",
090        "desimination", "dissemination",
091        "identifer",    "identifier",
092        "occurance",    "occurrence",
093        "occurence",    "occurrence",
094        "occured",      "occurred",
095        "recieve",      "receive",
096        "temportal",    "temporal"
097    };
098
099    /**
100     * Returns the index {@literal >=} {@code from} of the first non-whitespace character.
101     */
102    private static int skipLeadingWhitespaces(final String doc, int from) {
103         while (from < doc.length()) {
104            final int c = doc.codePointAt(from);
105            if (!Character.isWhitespace(c)) break;
106            from += Character.charCount(c);
107        }
108        return from;
109    }
110
111    /**
112     * Returns the index {@literal <} {@code from} of the last whitespace character.
113     */
114    private static int skipTrailingWhitespaces(final String doc, int from) {
115         while (from > 0) {
116            final int c = doc.codePointBefore(from);
117            if (!Character.isWhitespace(c)) break;
118            from -= Character.charCount(c);
119        }
120        return from;
121    }
122
123    /**
124     * Transforms the given documentation from XSD file in to a sentence.
125     * See class javadoc for details.
126     *
127     * @param  doc     documentation from XSD file.
128     * @param  buffer  temporary buffer. Must be initially empty.
129     * @return the sentence, or {@code null} if none.
130     */
131    static String sentence(final String doc, final StringBuilder buffer) {
132        /*
133         * Skip leading whitespaces and "Description:" prefix (f any),
134         * then omit "annexes" on new lines after the main description.
135         * If the result is an empty string, use null for "no documentation".
136         */
137        int startAt = skipLeadingWhitespaces(doc, 0);
138        if (doc.regionMatches(true, startAt, OMIT, 0, OMIT.length())) {
139            startAt = skipLeadingWhitespaces(doc, OMIT.length());
140        }
141        final int stopAt = beforeAnnexes(doc);
142        if (startAt >= stopAt) {
143            return null;
144        }
145        /*
146         * At this point we know the sub-string to use for documentation.
147         * Copy as a sentence (upper-case first letter, final period).
148         */
149        final int firstChar = doc.codePointAt(startAt);
150        buffer.appendCodePoint(Character.toUpperCase(firstChar))
151              .append(doc, startAt + Character.charCount(firstChar), stopAt);
152        if (doc.charAt(stopAt - 1) != '.') {
153            buffer.append('.');
154        }
155        // Replace multi-spaces by a single space.
156        for (int i=0; (i = buffer.indexOf("  ", i)) >= 0;) {
157            buffer.deleteCharAt(i);
158        }
159        // Documentation in XSD are not sentences. Make it a sentence.
160        int i = buffer.indexOf(" NOTE: ");
161        if (i > 0 && buffer.charAt(i-1) != '.') {
162            buffer.insert(i, '.');
163        }
164        /*
165         * Fix typos.
166         */
167        for (int t=0; t<TYPOS.length;) {
168            final String typo = TYPOS[t++];
169            final String fix  = TYPOS[t++];
170            i = buffer.indexOf(typo);
171            while (i >= 0) {
172                buffer.replace(i, i + typo.length(), fix);
173                i = buffer.indexOf(typo, i + fix.length());
174            }
175        }
176        return buffer.toString();
177    }
178
179    /**
180     * Returns the index after the last character to keep in the given documentation.
181     * This method cut the documentation before trailing "shortName" and other annexes.
182     */
183    private static int beforeAnnexes(final String doc) {
184        int stopAt = doc.length();
185nextLn: for (int eol = stopAt; --eol >= 0;) {
186            final int c = doc.charAt(eol);
187            if (c == '\r' || c == '\n') {
188                final int lineStart = skipLeadingWhitespaces(doc, eol);
189                for (final String header : IGNORE) {
190                    if (doc.regionMatches(true, lineStart, header, 0, header.length())) {
191                        stopAt = eol = skipTrailingWhitespaces(doc, eol);
192                        continue nextLn;
193                    }
194                }
195                break;
196            }
197        }
198        return skipTrailingWhitespaces(doc, stopAt);
199    }
200}