Frames

On How to replace strings

0
1
1package de.otto.sluggify;
2
3import com.google.common.base.Joiner;
4import com.google.common.base.Splitter;
5import com.google.common.cache.CacheBuilder;
6import com.google.common.cache.CacheLoader;
7import com.google.common.cache.LoadingCache;
8
9import java.util.concurrent.ExecutionException;
10import java.util.concurrent.TimeUnit;
11import java.util.regex.Matcher;
12import java.util.regex.Pattern;
13
14import static com.google.common.collect.Iterables.transform;
15
16public class Sluggify {
17
18 private static final LoadingCache<String, String> slugifyCache = CacheBuilder.<String,String> newBuilder()
19 .maximumSize(10000)
20 .expireAfterWrite(10, TimeUnit.MINUTES)
21 .build(new CacheLoader<String, String>() {
22 @Override
23 public String load(String key) throws Exception {
24 return doSlugify(key);
25 }
26 });
27
28 public static boolean isEmpty(String stringToCheck) {
29 return stringToCheck == null || stringToCheck.isEmpty();
30 }
31
32 private static final Pattern SPECIAL_CHARACTERS_REGEX = Pattern.compile("\\W");
33
34 public static String removeSpecialCharactersAndConvertToLowercase(String input) {
35 StringBuilder result = new StringBuilder();
36 Matcher matcher = SPECIAL_CHARACTERS_REGEX.matcher(input);
37 int lastIdx = 0;
38 while (matcher.find()) {
39 int startIdx = matcher.start();
40 if (startIdx > lastIdx) {
41 result.append(input.substring(lastIdx, startIdx).toLowerCase());
42 }
43 char umlaut = matcher.group().charAt(0);
44 String replacementString = replacementStringFor(umlaut);
45 if (result.length() == 0 || !("-".equals(replacementString) && result.charAt(result.length() - 1) == '-')) {
46 result.append(replacementString);
47 }
48 lastIdx = matcher.end();
49 }
50 if (lastIdx < input.length()) {
51 result.append(input.substring(lastIdx).toLowerCase());
52 }
53 return result.toString();
54 }
55
56 private static String replacementStringFor(char specialChar) {
57 switch (specialChar) {
58 case 'ä':
59 case 'Ä':
60 case 'æ':
61 return "ae";
62 case 'ö':
63 case 'Ö':
64 return "oe";
65 case 'Ü':
66 case 'ü':
67 return "ue";
68 case 'ß':
69 return "ss";
70 case 'À':
71 case 'Á':
72 case 'Â':
73 case 'Ã':
74 case 'Å':
75 case 'Ā':
76 case 'Ą':
77 case 'Ă':
78 case 'Ã&nbsp;':
79 case 'á':
80 case 'â':
81 case 'ã':
82 case 'Ã¥':
83 case 'ā':
84 case 'ą':
85 case 'ă':
86 return "a";
87 case 'ç':
88 case 'ć':
89 case 'č':
90 case 'ĉ':
91 case 'ċ':
92 case 'Ç':
93 case 'Ć':
94 case 'Č':
95 case 'Ĉ':
96 case 'Ċ':
97 return "c";
98 case 'ď':
99 case 'đ':
100 case 'ð':
101 case 'Ď':
102 case 'Đ':
103 case 'Ð':
104 return "d";
105 case 'È':
106 case 'É':
107 case 'Ê':
108 case 'Ë':
109 case 'Ē':
110 case 'Ę':
111 case 'Ě':
112 case 'Ĕ':
113 case 'Ė':
114 case 'è':
115 case 'é':
116 case 'ê':
117 case 'ë':
118 case 'ē':
119 case 'ę':
120 case 'ě':
121 case 'ĕ':
122 case 'ė':
123 return "e";
124 case 'ƒ':
125 case 'Å¿':
126 return "f";
127 case 'Ä&nbsp;':
128 case 'Ä¢':
129 case 'Ĝ':
130 case 'Ğ':
131 case 'ĝ':
132 case 'ğ':
133 case 'Ä¡':
134 case 'Ä£':
135 return "g";
136 case 'Ĥ':
137 case 'Ħ':
138 case 'Ä¥':
139 case 'ħ':
140 return "h";
141 case 'Ì':
142 case 'Í':
143 case 'Î':
144 case 'Ï':
145 case 'Ī':
146 case 'Ĩ':
147 case 'Ĭ':
148 case 'Ä®':
149 case 'Ä°':
150 case 'ì':
151 case 'í':
152 case 'î':
153 case 'ï':
154 case 'Ä«':
155 case 'Ä©':
156 case 'Ä­':
157 case 'į':
158 case 'ı':
159 return "i";
160 case 'ij':
161 return "ij";
162 case 'Ä´':
163 case 'ĵ':
164 return "j";
165 case 'Ķ':
166 case 'Ä·':
167 case 'ĸ':
168 return "k";
169 case 'ł':
170 case 'ľ':
171 case 'ĺ':
172 case 'ļ':
173 case 'ŀ':
174 case 'Ł':
175 case 'Ľ':
176 case 'Ĺ':
177 case 'Ä»':
178 case 'Ä¿':
179 return "l";
180 case 'Ñ':
181 case 'Ń':
182 case 'Ň':
183 case 'Ņ':
184 case 'Ŋ':
185 case 'ñ':
186 case 'ń':
187 case 'ň':
188 case 'ņ':
189 case 'ʼn':
190 case 'ŋ':
191 return "n";
192 case 'ò':
193 case 'ó':
194 case 'ô':
195 case 'õ':
196 case 'ø':
197 case 'ō':
198 case 'ő':
199 case 'ŏ':
200 case 'œ':
201 case 'Ò':
202 case 'Ó':
203 case 'Ô':
204 case 'Õ':
205 case 'Ø':
206 case 'Ō':
207 case 'Ő':
208 case 'Ŏ':
209 return "o";
210 case 'Þ':
211 case 'þ':
212 return "p";
213 case 'ŕ':
214 case 'ř':
215 case 'ŗ':
216 case 'Ŕ':
217 case 'Ř':
218 case 'Ŗ':
219 return "r";
220 case 'Ś':
221 case 'Å&nbsp;':
222 case 'Ş':
223 case 'Ŝ':
224 case 'Ș':
225 case 'ś':
226 case 'Å¡':
227 case 'ş':
228 case 'ŝ':
229 case 'ș':
230 return "s";
231 case 'Å¥':
232 case 'Å£':
233 case 'ŧ':
234 case 'ț':
235 return "t";
236 case 'Ù':
237 case 'Ú':
238 case 'Û':
239 case 'Ū':
240 case 'Å®':
241 case 'Å°':
242 case 'Ŭ':
243 case 'Ũ':
244 case 'Ų':
245 case 'ù':
246 case 'ú':
247 case 'û':
248 case 'Å«':
249 case 'ů':
250 case 'ű':
251 case 'Å­':
252 case 'Å©':
253 case 'ų':
254 return "u";
255 case 'Å´':
256 case 'ŵ':
257 return "w";
258 case 'Ý':
259 case 'Ŷ':
260 case 'Ÿ':
261 case 'ý':
262 case 'ÿ':
263 case 'Å·':
264 return "y";
265 case 'Ź':
266 case 'Ž':
267 case 'Å»':
268 case 'ž':
269 case 'ż':
270 case 'ź':
271 return "z";
272 case '+':
273 return "plus";
274 default:
275 return "-";
276 }
277 }
278
279 public static String sluggify(String string) {
280 if (isEmpty(string)) {
281 return string;
282 }
283
284 try {
285 return slugifyCache.get(string);
286 } catch (ExecutionException e) {
287 throw new RuntimeException(e);
288 }
289 }
290
291 /**
292 * Sluggify a path consisting of several path elements separated by a path separator.
293 * That is, split path by separator, sluggify all the elements and then join the path back together.
294 *
295 * @param path path to sluggify
296 * @param pathSeparator separator of path to sluggify
297 * @param resultPathSeparator new separator to be used for the returned path
298 *
299 * @return the new sluggified path
300 */
301 public static String sluggifyPath(final String path,
302 final String pathSeparator,
303 final String resultPathSeparator) {
304 final Iterable<String> parts = Splitter.on(pathSeparator).split(path);
305 return Joiner.on(resultPathSeparator).join(transform(parts, input -> Sluggify.sluggify(input)));
306 }
307
308 private static String doSlugify(String string) {
309 string = string.replaceAll("([a-z])'s([^a-z])", "$1s$2"); // WTF ?
310 string = removeSpecialCharactersAndConvertToLowercase(string);
311
312 string = string.replaceAll("-+$", "").replaceAll("^-+", "");
313
314 return string;
315 }
316}