1 | // Jomic - a viewer for comic book archives. |
2 | // Copyright (C) 2004-2011 Thomas Aglassinger |
3 | // |
4 | // This program is free software: you can redistribute it and/or modify |
5 | // it under the terms of the GNU General Public License as published by |
6 | // the Free Software Foundation, either version 3 of the License, or |
7 | // (at your option) any later version. |
8 | // |
9 | // This program is distributed in the hope that it will be useful, |
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | // GNU General Public License for more details. |
13 | // |
14 | // You should have received a copy of the GNU General Public License |
15 | // along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | package net.sf.jomic.tools; |
17 | |
18 | import java.util.Comparator; |
19 | |
20 | import org.apache.commons.logging.Log; |
21 | import org.apache.commons.logging.LogFactory; |
22 | |
23 | /** |
24 | * Comparator to perform a 'natural order' comparisons of strings. |
25 | * |
26 | * @author Thomas Aglassinger |
27 | */ |
28 | public class NaturalOrderComparator implements Comparator |
29 | { |
30 | private static final int DEFAULT_NUMBER_LENGTH = 16; |
31 | private static final char TOKEN_DIGIT = 'd'; |
32 | private static final char TOKEN_OTHER = 'o'; |
33 | private static final char TOKEN_SPACE = 's'; |
34 | |
35 | private Log logger; |
36 | |
37 | public NaturalOrderComparator() { |
38 | logger = LogFactory.getLog(NaturalOrderComparator.class); |
39 | } |
40 | |
41 | public int compare(Object o1, Object o2) { |
42 | int result = Integer.MAX_VALUE; |
43 | String a = o1.toString(); |
44 | String b = o2.toString(); |
45 | int ia = 0; |
46 | int ib = 0; |
47 | int aLength = a.length(); |
48 | int bLength = b.length(); |
49 | |
50 | // skip leading white space |
51 | while ((ia < aLength) && Character.isWhitespace(a.charAt(ia))) { |
52 | ia += 1; |
53 | } |
54 | while ((ib < bLength) && Character.isWhitespace(b.charAt(ib))) { |
55 | ib += 1; |
56 | } |
57 | |
58 | while (result == Integer.MAX_VALUE) { |
59 | // did one of the strings reach its end? |
60 | if (ia == aLength) { |
61 | if (ib == bLength) { |
62 | // both strings reached their end |
63 | result = 0; |
64 | } else { |
65 | // a reached its end, b goes on |
66 | result = -1; |
67 | } |
68 | } else if (ib == bLength) { |
69 | // b reached its end, a goes on |
70 | result = 1; |
71 | } else { |
72 | char aChar = a.charAt(ia); |
73 | char aType = tokenOf(aChar); |
74 | char bChar = b.charAt(ib); |
75 | char bType = tokenOf(bChar); |
76 | |
77 | if (aType != bType) { |
78 | result = sgn(aChar - bChar); |
79 | } else if (aType == TOKEN_SPACE) { |
80 | // seek until end of space |
81 | while (((ia + 1) < aLength) |
82 | && (tokenOf(a.charAt(ia + 1)) == TOKEN_SPACE)) { |
83 | ia += 1; |
84 | } |
85 | while (((ib + 1) < bLength) |
86 | && (tokenOf(b.charAt(ib + 1)) == TOKEN_SPACE)) { |
87 | ib += 1; |
88 | } |
89 | } else if (aType == TOKEN_DIGIT) { |
90 | StringBuffer aNumber = extractNumber(a, ia); |
91 | |
92 | if (logger.isDebugEnabled()) { |
93 | logger.debug("aNumber=" + aNumber); |
94 | } |
95 | StringBuffer bNumber = extractNumber(b, ib); |
96 | |
97 | if (logger.isDebugEnabled()) { |
98 | logger.debug("bNumber=" + bNumber); |
99 | } |
100 | ia += aNumber.length() - 1; |
101 | ib += bNumber.length() - 1; |
102 | stripLeadingZeros(aNumber); |
103 | if (logger.isDebugEnabled()) { |
104 | logger.debug("aNumber\\0=" + aNumber); |
105 | } |
106 | stripLeadingZeros(bNumber); |
107 | if (logger.isDebugEnabled()) { |
108 | logger.debug("bNumber\\0=" + bNumber); |
109 | } |
110 | int deltaLength = sgn(aNumber.length() - bNumber.length()); |
111 | |
112 | if (deltaLength == 0) { |
113 | // numbers have same length; compare them digit by digit |
114 | int i = 0; |
115 | int deltaDigit; |
116 | |
117 | do { |
118 | deltaDigit = aNumber.charAt(i) - bNumber.charAt(i); |
119 | if (deltaDigit == 0) { |
120 | i += 1; |
121 | } else { |
122 | // digits differ |
123 | result = sgn(deltaDigit); |
124 | } |
125 | } while ((deltaDigit == 0) && (i < aNumber.length())); |
126 | } else { |
127 | // numbers differ in length |
128 | result = deltaLength; |
129 | } |
130 | } else if (aType == TOKEN_OTHER) { |
131 | int cmp = sgn(aChar - bChar); |
132 | |
133 | if (cmp != 0) { |
134 | result = cmp; |
135 | } |
136 | } else { |
137 | assert false : "type = " + aType; |
138 | } |
139 | ia += 1; |
140 | ib += 1; |
141 | } |
142 | } |
143 | return result; |
144 | } |
145 | |
146 | private StringBuffer extractNumber(String some, int startIndex) { |
147 | assert some != null; |
148 | assert startIndex < some.length(); |
149 | StringBuffer result = new StringBuffer(DEFAULT_NUMBER_LENGTH); |
150 | int index = startIndex; |
151 | boolean foundNonDigit = false; |
152 | |
153 | if (logger.isDebugEnabled()) { |
154 | logger.debug("extractNumber(\"" + some + "\", " + startIndex + ")"); |
155 | } |
156 | do { |
157 | char c = some.charAt(index); |
158 | |
159 | if (logger.isDebugEnabled()) { |
160 | logger.debug(" " + c); |
161 | } |
162 | foundNonDigit = !Character.isDigit(c); |
163 | if (!foundNonDigit) { |
164 | result.append(c); |
165 | index += 1; |
166 | } |
167 | } while (!foundNonDigit && (index < some.length())); |
168 | return result; |
169 | } |
170 | |
171 | private int sgn(int x) { |
172 | int result; |
173 | |
174 | if (x > 0) { |
175 | result = 1; |
176 | } else if (x == 0) { |
177 | result = 0; |
178 | } else { |
179 | result = -1; |
180 | } |
181 | return result; |
182 | } |
183 | |
184 | private void stripLeadingZeros(StringBuffer some) { |
185 | int deleteCount = 0; |
186 | |
187 | while ((deleteCount < some.length() - 1) |
188 | && (some.charAt(deleteCount) == '0')) { |
189 | deleteCount += 1; |
190 | } |
191 | some.delete(0, deleteCount); |
192 | } |
193 | |
194 | private char tokenOf(char c) { |
195 | char result; |
196 | |
197 | if (Character.isDigit(c)) { |
198 | result = TOKEN_DIGIT; |
199 | } else if (Character.isWhitespace(c)) { |
200 | result = TOKEN_SPACE; |
201 | } else { |
202 | result = TOKEN_OTHER; |
203 | } |
204 | return result; |
205 | } |
206 | } |