0%

XML 转义字符

xml 处理类

在xml对象属性上添加注解: @XmlJavaTypeAdapter(CollapsedStringAdapter.class) 即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package com.enableets.edu.sdk.paper.util;

import org.apache.commons.lang.StringEscapeUtils;

import javax.xml.bind.annotation.adapters.XmlAdapter;

/**
* Html 转义字符处理, 并删除前后空格
*/

public class XMLUnescapeHtmlStringAdapter extends XmlAdapter<String,String> {

/**
* Removes leading and trailing whitespaces of the string
* given as the parameter, then truncate any
* sequnce of tab, CR, LF, and SP by a single whitespace character ' '.
*/
public String unmarshal(String text) {
if(text==null) return null; // be defensive

int len = text.length();

// most of the texts are already in the collapsed form.
// so look for the first whitespace in the hope that we will
// never see it.
int s=0;
while(s<len) {
if(isWhiteSpace(text.charAt(s)))
break;
s++;
}
if(s==len)
// the input happens to be already collapsed.
return text;

// we now know that the input contains spaces.
// let's sit down and do the collapsing normally.

StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/ );

if(s!=0) {
for( int i=0; i<s; i++ )
result.append(text.charAt(i));
result.append(' ');
}

boolean inStripMode = true;
for (int i = s+1; i < len; i++) {
char ch = text.charAt(i);
boolean b = isWhiteSpace(ch);
if (inStripMode && b)
continue; // skip this character

inStripMode = b;
if (inStripMode)
result.append(' ');
else
result.append(ch);
}

// remove trailing whitespaces
len = result.length();
if (len > 0 && result.charAt(len - 1) == ' ')
result.setLength(len - 1);
// whitespaces are already collapsed,
// so all we have to do is to remove the last one character
// if it's a whitespace.

// tran html encoding
String s1 = result.toString();
return StringEscapeUtils.unescapeHtml(s1);
}

/**
* No-op.
*
* Just return the same string given as the parameter.
*/
public String marshal(String s) {
return s;
}


/** returns true if the specified char is a white space character. */
protected static boolean isWhiteSpace(char ch) {
// most of the characters are non-control characters.
// so check that first to quickly return false for most of the cases.
if( ch>0x20 ) return false;

// other than we have to do four comparisons.
return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20;
}
}