/** UTF8Magic is a simple class for parsing UTF-8 with fallback. *
* It is designed to be completely self contained, very simple to use, and fit * easily into any existing project. *
* This class is in public domain, yay! *
* @author Vidar koala_man Holen */ public class UTF8Magic { /** Parse a String read as Latin1, and conditionally convert from UTF-8. *
* doMagic will parse a String that was read as Latin1 (ISO-8859-1) to see * if it's valid UTF-8. If it is, it will be converted to a proper unicode * String and returned. If it's not valid, it will simply be returned as it * is. *
* Example: *
* myString = myBufferedReader.readLine(); * myString = UTF8Magic.doMagic(myString); * parse(myString); ** The parse-method will be called with a correct String, regardless of * whether the data is Latin1 or UTF-8. *
*
* @param s The string to parse
* @return the converted string, or s if it's not UTF-8.
*/
public static String doMagic(String s) {
if(s==null) return null;
char[] data=s.toCharArray();
int lpos, upos; /* Latin position, Unicode position. */
char t; /* For building chars. */
int count; /* Number of following bytes in character code. */
boolean changed=false; /* If the string was modified. */
for(lpos=upos=0; lpos
* This method is not required and can be edited out.
*/
public static void main(String[] args) {
String utf8=doMagic("\u00c3\u00a6\u00c3\u00b8\u00c3\u00a5\u00c3\u0086\u00c3\u0098\u00c3\u0085");
String latin1=doMagic("\u00e6\u00f8\u00e5\u00c6\u00d8\u00c5");
if(utf8.equals(latin1)) {
System.out.println("The strings are equal, the doMagic did magic!");
System.out.println("1M doMagics(latin1) takes "+time(latin1)+"ms");
System.out.println("1M doMagics(utf-8) takes "+time(utf8)+ "ms");
System.out.println("1M doMagics(ascii) takes "+time("aeoEOE")+"ms");
} else {
System.out.println("It doesn't work, koala_man is a jackass :(");
}
}
/** Return the time in milliseconds to convert the string a million times.
*
* This method is not required and can be edited out.
*/
public static long time(String s) {
long l=System.currentTimeMillis();
int i;
for(i=0; i<1000000; i++) {
doMagic(s);
}
l=System.currentTimeMillis()-l;
return l;
}
/* ^--- Cut here ---^ */
}