franta-hg@72
|
1 |
/*
|
franta-hg@72
|
2 |
* SONEWS News Server
|
franta-hg@72
|
3 |
* see AUTHORS for the list of contributors
|
franta-hg@72
|
4 |
*
|
franta-hg@72
|
5 |
* This program is free software: you can redistribute it and/or modify
|
franta-hg@72
|
6 |
* it under the terms of the GNU General Public License as published by
|
franta-hg@72
|
7 |
* the Free Software Foundation, either version 3 of the License, or
|
franta-hg@72
|
8 |
* (at your option) any later version.
|
franta-hg@72
|
9 |
*
|
franta-hg@72
|
10 |
* This program is distributed in the hope that it will be useful,
|
franta-hg@72
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
franta-hg@72
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
franta-hg@72
|
13 |
* GNU General Public License for more details.
|
franta-hg@72
|
14 |
*
|
franta-hg@72
|
15 |
* You should have received a copy of the GNU General Public License
|
franta-hg@72
|
16 |
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
franta-hg@72
|
17 |
*/
|
franta-hg@72
|
18 |
package org.sonews.storage;
|
franta-hg@72
|
19 |
|
franta-hg@75
|
20 |
import java.io.BufferedReader;
|
franta-hg@102
|
21 |
import java.io.ByteArrayInputStream;
|
franta-hg@72
|
22 |
import java.io.ByteArrayOutputStream;
|
franta-hg@72
|
23 |
import java.io.IOException;
|
franta-hg@75
|
24 |
import java.io.InputStream;
|
franta-hg@75
|
25 |
import java.io.InputStreamReader;
|
franta-hg@75
|
26 |
import java.io.PrintStream;
|
franta-hg@74
|
27 |
import java.io.StringReader;
|
franta-hg@74
|
28 |
import java.io.StringWriter;
|
franta-hg@72
|
29 |
import java.io.UnsupportedEncodingException;
|
franta-hg@72
|
30 |
import java.sql.ResultSet;
|
franta-hg@72
|
31 |
import java.sql.SQLException;
|
franta-hg@72
|
32 |
import java.util.ArrayList;
|
franta-hg@109
|
33 |
import java.util.Arrays;
|
franta-hg@72
|
34 |
import java.util.Date;
|
franta-hg@72
|
35 |
import java.util.Enumeration;
|
franta-hg@74
|
36 |
import java.util.logging.Level;
|
franta-hg@74
|
37 |
import java.util.logging.Logger;
|
franta-hg@109
|
38 |
import java.util.regex.Matcher;
|
franta-hg@109
|
39 |
import java.util.regex.Pattern;
|
franta-hg@72
|
40 |
import javax.mail.Header;
|
franta-hg@72
|
41 |
import javax.mail.MessagingException;
|
franta-hg@72
|
42 |
import javax.mail.Multipart;
|
franta-hg@72
|
43 |
import javax.mail.Session;
|
franta-hg@72
|
44 |
import javax.mail.internet.InternetAddress;
|
franta-hg@72
|
45 |
import javax.mail.internet.MimeBodyPart;
|
franta-hg@72
|
46 |
import javax.mail.internet.MimeMessage;
|
franta-hg@72
|
47 |
import javax.mail.internet.MimeMultipart;
|
franta-hg@104
|
48 |
import javax.xml.parsers.DocumentBuilder;
|
franta-hg@104
|
49 |
import javax.xml.parsers.DocumentBuilderFactory;
|
franta-hg@104
|
50 |
import javax.xml.parsers.ParserConfigurationException;
|
franta-hg@74
|
51 |
import javax.xml.transform.Transformer;
|
franta-hg@103
|
52 |
import javax.xml.transform.TransformerException;
|
franta-hg@74
|
53 |
import javax.xml.transform.TransformerFactory;
|
franta-hg@104
|
54 |
import javax.xml.transform.dom.DOMSource;
|
franta-hg@74
|
55 |
import javax.xml.transform.stream.StreamResult;
|
franta-hg@74
|
56 |
import javax.xml.transform.stream.StreamSource;
|
franta-hg@102
|
57 |
import org.sonews.daemon.NNTPConnection;
|
franta-hg@74
|
58 |
import org.sonews.util.io.Resource;
|
franta-hg@104
|
59 |
import org.w3c.dom.Document;
|
franta-hg@104
|
60 |
import org.xml.sax.SAXException;
|
franta-hg@72
|
61 |
|
franta-hg@72
|
62 |
/**
|
franta-hg@72
|
63 |
* This is MimeMessage which enables custom Message-ID header
|
franta-hg@72
|
64 |
* (this header will not be overwritten by the default one like in MimeMessage).
|
franta-hg@72
|
65 |
*
|
franta-hg@72
|
66 |
* Also add header and body separate serialization.
|
franta-hg@72
|
67 |
*
|
franta-hg@72
|
68 |
* And can be deserialized from SQL ResultSet
|
franta-hg@72
|
69 |
*
|
franta-hg@72
|
70 |
* @author František Kučera (frantovo.cz)
|
franta-hg@72
|
71 |
*/
|
franta-hg@72
|
72 |
public class DrupalMessage extends MimeMessage {
|
franta-hg@72
|
73 |
|
franta-hg@116
|
74 |
/**
|
franta-hg@116
|
75 |
* If body of message posted by user through NNTP starts with this text,
|
franta-hg@116
|
76 |
* it will be treated as formated text in Markdown syntax.
|
franta-hg@116
|
77 |
*/
|
franta-hg@116
|
78 |
private static final String MARKDOWN_HEADER = "#!markdown\r\n";
|
franta-hg@74
|
79 |
private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
|
franta-hg@72
|
80 |
private static final String MESSAGE_ID_HEADER = "Message-ID";
|
franta-hg@72
|
81 |
private static final String CRLF = "\r\n";
|
franta-hg@72
|
82 |
public static final String CHARSET = "UTF-8";
|
franta-hg@72
|
83 |
private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
|
franta-hg@100
|
84 |
private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";
|
franta-hg@72
|
85 |
private String messageID;
|
franta-hg@102
|
86 |
private Long parentID;
|
franta-hg@102
|
87 |
private Long groupID;
|
franta-hg@104
|
88 |
private TransformerFactory transformerFactory;
|
franta-hg@104
|
89 |
private DocumentBuilderFactory documentBuilderFactory;
|
franta-hg@104
|
90 |
|
franta-hg@104
|
91 |
/**
|
franta-hg@104
|
92 |
* Initializes XML factories (Transformer, DocumentBuilder).
|
franta-hg@104
|
93 |
*/
|
franta-hg@104
|
94 |
private void initFactories() {
|
franta-hg@104
|
95 |
transformerFactory = TransformerFactory.newInstance();
|
franta-hg@104
|
96 |
documentBuilderFactory = DocumentBuilderFactory.newInstance();
|
franta-hg@104
|
97 |
/**
|
franta-hg@104
|
98 |
* Komentáře nás nepotřebujeme
|
franta-hg@104
|
99 |
* (a museli bychom je brát v úvahu při dělení odstavců:
|
franta-hg@104
|
100 |
* v současné verzi XSLT odstavcovače by nám případný komentář
|
franta-hg@104
|
101 |
* rozdělil text na dva odstavce, přestože to má být odstavec jede).
|
franta-hg@104
|
102 |
*/
|
franta-hg@104
|
103 |
documentBuilderFactory.setIgnoringComments(true);
|
franta-hg@104
|
104 |
}
|
franta-hg@72
|
105 |
|
franta-hg@72
|
106 |
/**
|
franta-hg@72
|
107 |
* Constructs MIME message from SQL result.
|
franta-hg@72
|
108 |
* @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
|
franta-hg@72
|
109 |
* @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
|
franta-hg@72
|
110 |
*/
|
franta-hg@104
|
111 |
public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException {
|
franta-hg@72
|
112 |
super(Session.getDefaultInstance(System.getProperties()));
|
franta-hg@104
|
113 |
initFactories();
|
franta-hg@72
|
114 |
|
franta-hg@102
|
115 |
groupID = rs.getLong("group_id");
|
franta-hg@102
|
116 |
addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));
|
franta-hg@72
|
117 |
addHeader("Newsgroups", rs.getString("group_name"));
|
franta-hg@74
|
118 |
setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
|
franta-hg@72
|
119 |
setSubject(rs.getString("subject"));
|
franta-hg@72
|
120 |
setSentDate(new Date(rs.getLong("created")));
|
franta-hg@74
|
121 |
|
franta-hg@102
|
122 |
parentID = rs.getLong("parent_id");
|
franta-hg@74
|
123 |
if (parentID > 0) {
|
franta-hg@72
|
124 |
String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
|
franta-hg@72
|
125 |
addHeader("In-Reply-To", parentMessageID);
|
franta-hg@72
|
126 |
addHeader("References", parentMessageID);
|
franta-hg@72
|
127 |
}
|
franta-hg@72
|
128 |
|
franta-hg@72
|
129 |
if (constructBody) {
|
franta-hg@72
|
130 |
Multipart multipart = new MimeMultipart("alternative");
|
franta-hg@72
|
131 |
setContent(multipart);
|
franta-hg@72
|
132 |
|
franta-hg@82
|
133 |
/** XHTML part */
|
franta-hg@82
|
134 |
MimeBodyPart htmlPart = new MimeBodyPart();
|
franta-hg@103
|
135 |
String xhtmlText = readXhtmlText(
|
franta-hg@103
|
136 |
rs.getString("text"),
|
franta-hg@103
|
137 |
rs.getString("subject"),
|
franta-hg@103
|
138 |
rs.getInt("parent_id"),
|
franta-hg@103
|
139 |
rs.getString("urlBase"),
|
franta-hg@103
|
140 |
rs.getString("wwwRead"),
|
franta-hg@103
|
141 |
rs.getString("wwwPost"));
|
franta-hg@82
|
142 |
htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
|
franta-hg@84
|
143 |
|
franta-hg@74
|
144 |
/** Plain text part */
|
franta-hg@72
|
145 |
MimeBodyPart textPart = new MimeBodyPart();
|
franta-hg@106
|
146 |
String plainText = formatedToPlainText(xhtmlText);
|
franta-hg@89
|
147 |
textPart.setText(plainText);
|
franta-hg@89
|
148 |
//addHeader("Lines", String.valueOf(plainText.split("\n").length));
|
franta-hg@87
|
149 |
|
franta-hg@87
|
150 |
/**
|
franta-hg@87
|
151 |
* Thunderbirdu záleží, v jakém pořadí části jsou
|
franta-hg@87
|
152 |
* (když je prostý text druhý, html se nezobrazí),
|
franta-hg@87
|
153 |
* KNode zobrazuje HTML správně, i když je na prvním místě.
|
franta-hg@87
|
154 |
*/
|
franta-hg@72
|
155 |
multipart.addBodyPart(textPart);
|
franta-hg@87
|
156 |
multipart.addBodyPart(htmlPart);
|
franta-hg@72
|
157 |
} else {
|
franta-hg@82
|
158 |
/** empty body, just headers */
|
franta-hg@72
|
159 |
setText("");
|
franta-hg@72
|
160 |
}
|
franta-hg@72
|
161 |
}
|
franta-hg@72
|
162 |
|
franta-hg@102
|
163 |
/**
|
franta-hg@102
|
164 |
* Constructs MIME message from article posted by user.
|
franta-hg@102
|
165 |
* @param article article that came through NNTP.
|
franta-hg@102
|
166 |
* @throws MessagingException
|
franta-hg@102
|
167 |
*/
|
franta-hg@102
|
168 |
public DrupalMessage(Article article) throws MessagingException {
|
franta-hg@102
|
169 |
super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));
|
franta-hg@104
|
170 |
initFactories();
|
franta-hg@102
|
171 |
|
franta-hg@109
|
172 |
String[] replyToHeaders = getHeader("In-Reply-To");
|
franta-hg@109
|
173 |
String[] referencesHeaders = getHeader("References");
|
franta-hg@109
|
174 |
String parentMessageID;
|
franta-hg@109
|
175 |
if (replyToHeaders != null && replyToHeaders.length == 1) {
|
franta-hg@109
|
176 |
parentMessageID = replyToHeaders[0];
|
franta-hg@109
|
177 |
} else if (referencesHeaders != null && referencesHeaders.length == 1) {
|
franta-hg@109
|
178 |
Pattern p = Pattern.compile("(\\s*<.*>)*\\s*(<.*>)");
|
franta-hg@109
|
179 |
Matcher m = p.matcher(referencesHeaders[0]);
|
franta-hg@109
|
180 |
|
franta-hg@109
|
181 |
if (m.matches()) {
|
franta-hg@109
|
182 |
parentMessageID = m.group(2);
|
franta-hg@109
|
183 |
} else {
|
franta-hg@109
|
184 |
throw new MessagingException("Message posted by user had invalid References header: " + referencesHeaders[0]);
|
franta-hg@109
|
185 |
}
|
franta-hg@102
|
186 |
} else {
|
franta-hg@109
|
187 |
throw new MessagingException("Message posted by user must have exactly one In-Reply-To header. Reply-To headers: " + Arrays.toString(replyToHeaders) + " Referemces headers: " + Arrays.toString(referencesHeaders));
|
franta-hg@102
|
188 |
}
|
franta-hg@109
|
189 |
|
franta-hg@109
|
190 |
parentID = parseArticleID(parentMessageID);
|
franta-hg@109
|
191 |
groupID = parseGroupID(parentMessageID);
|
franta-hg@102
|
192 |
}
|
franta-hg@102
|
193 |
|
franta-hg@102
|
194 |
private static InputStream serializeArticle(Article a) {
|
franta-hg@102
|
195 |
byte articleHeaders[] = a.getHeaderSource().getBytes();
|
franta-hg@102
|
196 |
byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();
|
franta-hg@102
|
197 |
byte body[] = a.getBody();
|
franta-hg@102
|
198 |
|
franta-hg@102
|
199 |
byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];
|
franta-hg@102
|
200 |
|
franta-hg@102
|
201 |
System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);
|
franta-hg@102
|
202 |
System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);
|
franta-hg@102
|
203 |
System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);
|
franta-hg@102
|
204 |
|
franta-hg@102
|
205 |
return new ByteArrayInputStream(message);
|
franta-hg@102
|
206 |
}
|
franta-hg@102
|
207 |
|
franta-hg@106
|
208 |
/**
|
franta-hg@106
|
209 |
* @param xhtmlText well-formed XHTML
|
franta-hg@106
|
210 |
* @return plain text representation of this formated text
|
franta-hg@106
|
211 |
*/
|
franta-hg@106
|
212 |
private String formatedToPlainText(String xhtmlText) {
|
franta-hg@89
|
213 |
try {
|
franta-hg@104
|
214 |
Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
|
franta-hg@89
|
215 |
|
franta-hg@89
|
216 |
StringReader input = new StringReader(xhtmlText);
|
franta-hg@89
|
217 |
StringWriter output = new StringWriter(xhtmlText.length());
|
franta-hg@89
|
218 |
textTransformer.transform(new StreamSource(input), new StreamResult(output));
|
franta-hg@89
|
219 |
|
franta-hg@89
|
220 |
return output.toString();
|
franta-hg@89
|
221 |
} catch (Exception e) {
|
franta-hg@89
|
222 |
/**
|
franta-hg@89
|
223 |
* TODO: lepší ošetření chyby
|
franta-hg@89
|
224 |
*/
|
franta-hg@89
|
225 |
log.log(Level.WARNING, "Error while transforming article to plain text", e);
|
franta-hg@106
|
226 |
return "Při transformaci příspěvku bohužel došlo k chybě.";
|
franta-hg@89
|
227 |
}
|
franta-hg@72
|
228 |
}
|
franta-hg@72
|
229 |
|
franta-hg@104
|
230 |
private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException {
|
franta-hg@104
|
231 |
DocumentBuilder db = documentBuilderFactory.newDocumentBuilder();
|
franta-hg@104
|
232 |
Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));
|
franta-hg@104
|
233 |
return new DOMSource(d);
|
franta-hg@104
|
234 |
}
|
franta-hg@104
|
235 |
|
franta-hg@105
|
236 |
private String readXhtmlText(String sourceText, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException {
|
franta-hg@72
|
237 |
/**
|
franta-hg@82
|
238 |
* TODO:
|
franta-hg@104
|
239 |
* - znovupoužívat XSL transformér (nejen v instanci)
|
franta-hg@82
|
240 |
* - používat cache, ukládat si vygenerované články
|
franta-hg@72
|
241 |
*/
|
franta-hg@105
|
242 |
String wrappedText = makeSimpleXHTML(sourceText);
|
franta-hg@103
|
243 |
|
franta-hg@104
|
244 |
Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
|
franta-hg@103
|
245 |
String paragraphedText;
|
franta-hg@103
|
246 |
boolean tidyWasUsed = false;
|
franta-hg@74
|
247 |
try {
|
franta-hg@105
|
248 |
StringWriter output = new StringWriter(2 * wrappedText.length());
|
franta-hg@105
|
249 |
paragraphTransformer.transform(readDOM(wrappedText), new StreamResult(output));
|
franta-hg@103
|
250 |
paragraphedText = output.toString();
|
franta-hg@103
|
251 |
} catch (Exception e) {
|
franta-hg@103
|
252 |
log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
|
franta-hg@105
|
253 |
StringWriter output = new StringWriter(2 * wrappedText.length());
|
franta-hg@105
|
254 |
paragraphTransformer.transform(readDOM(tidyXhtml(wrappedText)), new StreamResult(output));
|
franta-hg@103
|
255 |
paragraphedText = output.toString();
|
franta-hg@103
|
256 |
tidyWasUsed = true;
|
franta-hg@103
|
257 |
}
|
franta-hg@75
|
258 |
|
franta-hg@104
|
259 |
Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
|
franta-hg@103
|
260 |
xhtmlTransformer.setParameter("isRoot", (parentId == 0));
|
franta-hg@103
|
261 |
xhtmlTransformer.setParameter("title", subject);
|
franta-hg@103
|
262 |
xhtmlTransformer.setParameter("urlBase", urlBase);
|
franta-hg@103
|
263 |
xhtmlTransformer.setParameter("wwwRead", wwwRead);
|
franta-hg@103
|
264 |
xhtmlTransformer.setParameter("wwwPost", wwwPost);
|
franta-hg@103
|
265 |
xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
|
franta-hg@105
|
266 |
StringReader paragraphedReader = new StringReader(paragraphedText);
|
franta-hg@105
|
267 |
StringWriter xhtmlWriter = new StringWriter(2 * paragraphedText.length());
|
franta-hg@105
|
268 |
xhtmlTransformer.transform(new StreamSource(paragraphedReader), new StreamResult(xhtmlWriter));
|
franta-hg@75
|
269 |
|
franta-hg@105
|
270 |
return xhtmlWriter.toString();
|
franta-hg@72
|
271 |
}
|
franta-hg@72
|
272 |
|
franta-hg@103
|
273 |
/**
|
franta-hg@116
|
274 |
* Converts markdown to XHTML.
|
franta-hg@116
|
275 |
* @param markdown text in Markdown syntax
|
franta-hg@116
|
276 |
* @return XHTML document (with html/body elements)
|
franta-hg@116
|
277 |
* @throws StorageBackendException when markdown proces returned any errors
|
franta-hg@116
|
278 |
* (other exceptions are thrown when afterwards XHTML validation fails).
|
franta-hg@116
|
279 |
*/
|
franta-hg@116
|
280 |
private String readXhtmlTextMarkdown(String markdown) throws TransformerException, IOException, ParserConfigurationException, SAXException, StorageBackendException {
|
franta-hg@116
|
281 |
Runtime r = Runtime.getRuntime();
|
franta-hg@116
|
282 |
Process p = r.exec(new String[]{"sudo", "-u", "markdown", "/usr/bin/markdown"});
|
franta-hg@116
|
283 |
|
franta-hg@116
|
284 |
PrintStream processInput = new PrintStream(p.getOutputStream());
|
franta-hg@116
|
285 |
processInput.print(markdown);
|
franta-hg@116
|
286 |
processInput.close();
|
franta-hg@116
|
287 |
|
franta-hg@116
|
288 |
String errors = streamToString(p.getErrorStream());
|
franta-hg@116
|
289 |
String htmlFragment = streamToString(p.getInputStream());
|
franta-hg@116
|
290 |
|
franta-hg@116
|
291 |
if (errors.length() == 0) {
|
franta-hg@116
|
292 |
String htmlDocument = makeSimpleXHTML(htmlFragment);
|
franta-hg@116
|
293 |
String xhtmlDocument = readXhtmlText(htmlDocument, null, -1, null, null, null);
|
franta-hg@116
|
294 |
return xhtmlDocument;
|
franta-hg@116
|
295 |
} else {
|
franta-hg@116
|
296 |
throw new StorageBackendException("Error while transforming Markdown to XHTML: " + errors);
|
franta-hg@116
|
297 |
}
|
franta-hg@116
|
298 |
}
|
franta-hg@116
|
299 |
|
franta-hg@116
|
300 |
/**
|
franta-hg@103
|
301 |
* Does not parse XML works just with text.
|
franta-hg@103
|
302 |
* @param body XHTML fragment that should be put between <body> and </body>
|
franta-hg@103
|
303 |
* @return simple XHTML document (body wrapped in html and body tags)
|
franta-hg@103
|
304 |
*/
|
franta-hg@84
|
305 |
private static String makeSimpleXHTML(String body) {
|
franta-hg@84
|
306 |
return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";
|
franta-hg@84
|
307 |
}
|
franta-hg@84
|
308 |
|
franta-hg@75
|
309 |
/**
|
franta-hg@103
|
310 |
* Does not parse XML works just with text.
|
franta-hg@103
|
311 |
* @param xhtml whole XHTML page
|
franta-hg@103
|
312 |
* @return content between <body> and </body> tags.
|
franta-hg@103
|
313 |
*/
|
franta-hg@103
|
314 |
private static String makeFragmentXHTML(String xhtml) {
|
franta-hg@103
|
315 |
final String startTag = "<body>";
|
franta-hg@103
|
316 |
final String endTag = "</body>";
|
franta-hg@103
|
317 |
|
franta-hg@103
|
318 |
int start = xhtml.indexOf(startTag) + startTag.length();
|
franta-hg@103
|
319 |
int end = xhtml.lastIndexOf(endTag);
|
franta-hg@103
|
320 |
|
franta-hg@103
|
321 |
return xhtml.substring(start, end);
|
franta-hg@103
|
322 |
}
|
franta-hg@103
|
323 |
|
franta-hg@103
|
324 |
/**
|
franta-hg@75
|
325 |
* TODO: refaktorovat, přesunout
|
franta-hg@75
|
326 |
*/
|
franta-hg@75
|
327 |
private static String tidyXhtml(String inputText) throws IOException {
|
franta-hg@89
|
328 |
/*
|
franta-hg@89
|
329 |
* Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
|
franta-hg@89
|
330 |
*
|
franta-hg@89
|
331 |
* TODO:
|
franta-hg@89
|
332 |
* - použít delší zástupný řetězec, ne jen jeden znak
|
franta-hg@89
|
333 |
* - umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)
|
franta-hg@89
|
334 |
*/
|
franta-hg@100
|
335 |
inputText = označKonceŘádků(inputText);
|
franta-hg@82
|
336 |
|
franta-hg@75
|
337 |
Runtime r = Runtime.getRuntime();
|
franta-hg@82
|
338 |
Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
|
franta-hg@82
|
339 |
"-asxml", // well formed XHTML
|
franta-hg@82
|
340 |
"-numeric", // číselné entity
|
franta-hg@82
|
341 |
"-utf8", // kódování
|
franta-hg@82
|
342 |
"--show-warnings", "false", // žádná varování nás nezajímají
|
franta-hg@82
|
343 |
"--show-errors", "0", // ani chyby
|
franta-hg@82
|
344 |
"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
|
franta-hg@82
|
345 |
"--logical-emphasis", "true", // em a strong místo i a b
|
franta-hg@82
|
346 |
"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
|
franta-hg@82
|
347 |
"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
|
franta-hg@82
|
348 |
});
|
franta-hg@75
|
349 |
|
franta-hg@75
|
350 |
PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
|
franta-hg@75
|
351 |
vstupProcesu.print(inputText);
|
franta-hg@75
|
352 |
vstupProcesu.close();
|
franta-hg@75
|
353 |
|
franta-hg@75
|
354 |
String outputText = streamToString(p.getInputStream());
|
franta-hg@75
|
355 |
|
franta-hg@100
|
356 |
outputText = vraťKonceŘádků(outputText);
|
franta-hg@82
|
357 |
|
franta-hg@75
|
358 |
return outputText;
|
franta-hg@75
|
359 |
}
|
franta-hg@75
|
360 |
|
franta-hg@100
|
361 |
private static String označKonceŘádků(String text) {
|
franta-hg@100
|
362 |
text = text.replaceAll(">\\s+<", "> <");
|
franta-hg@100
|
363 |
text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");
|
franta-hg@100
|
364 |
return text;
|
franta-hg@100
|
365 |
}
|
franta-hg@100
|
366 |
|
franta-hg@100
|
367 |
private static String vraťKonceŘádků(String text) {
|
franta-hg@100
|
368 |
text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");
|
franta-hg@100
|
369 |
text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");
|
franta-hg@100
|
370 |
return text;
|
franta-hg@100
|
371 |
}
|
franta-hg@100
|
372 |
|
franta-hg@75
|
373 |
/**
|
franta-hg@75
|
374 |
* TODO: refaktorovat, přesunout
|
franta-hg@75
|
375 |
*/
|
franta-hg@75
|
376 |
private static String streamToString(InputStream proud) throws IOException {
|
franta-hg@75
|
377 |
StringBuilder výsledek = new StringBuilder();
|
franta-hg@75
|
378 |
BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
|
franta-hg@75
|
379 |
while (true) {
|
franta-hg@75
|
380 |
String radek = buf.readLine();
|
franta-hg@75
|
381 |
if (radek == null) {
|
franta-hg@75
|
382 |
break;
|
franta-hg@75
|
383 |
} else {
|
franta-hg@75
|
384 |
výsledek.append(radek);
|
franta-hg@75
|
385 |
výsledek.append("\n");
|
franta-hg@75
|
386 |
}
|
franta-hg@75
|
387 |
}
|
franta-hg@75
|
388 |
return výsledek.toString();
|
franta-hg@75
|
389 |
}
|
franta-hg@75
|
390 |
|
franta-hg@102
|
391 |
public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {
|
franta-hg@72
|
392 |
StringBuilder sb = new StringBuilder();
|
franta-hg@72
|
393 |
sb.append("<");
|
franta-hg@72
|
394 |
sb.append(articleID);
|
franta-hg@72
|
395 |
sb.append("-");
|
franta-hg@72
|
396 |
sb.append(groupID);
|
franta-hg@72
|
397 |
sb.append("-");
|
franta-hg@72
|
398 |
sb.append(groupName);
|
franta-hg@72
|
399 |
sb.append("@");
|
franta-hg@72
|
400 |
sb.append(domainName);
|
franta-hg@72
|
401 |
sb.append(">");
|
franta-hg@72
|
402 |
return sb.toString();
|
franta-hg@72
|
403 |
}
|
franta-hg@72
|
404 |
|
franta-hg@102
|
405 |
/**
|
franta-hg@102
|
406 |
* @return article ID of parent of this message | or null, if this is root article and not reply to another one
|
franta-hg@102
|
407 |
*/
|
franta-hg@102
|
408 |
public Long getParentID() {
|
franta-hg@102
|
409 |
return parentID;
|
franta-hg@102
|
410 |
}
|
franta-hg@102
|
411 |
|
franta-hg@102
|
412 |
/**
|
franta-hg@102
|
413 |
* @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group
|
franta-hg@102
|
414 |
*/
|
franta-hg@102
|
415 |
public Long getGroupID() {
|
franta-hg@102
|
416 |
return groupID;
|
franta-hg@102
|
417 |
}
|
franta-hg@102
|
418 |
|
franta-hg@102
|
419 |
/**
|
franta-hg@102
|
420 |
*
|
franta-hg@102
|
421 |
* @param messageID <{0}-{1}-{2}@domain.tld> where {0} is nntp_id and {1} is group_id and {2} is group_name
|
franta-hg@102
|
422 |
* @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid
|
franta-hg@102
|
423 |
*/
|
franta-hg@102
|
424 |
private static String[] parseMessageID(String messageID) {
|
franta-hg@102
|
425 |
if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {
|
franta-hg@102
|
426 |
return messageID.substring(1).split("@")[0].split("\\-");
|
franta-hg@102
|
427 |
} else {
|
franta-hg@102
|
428 |
return null;
|
franta-hg@102
|
429 |
}
|
franta-hg@102
|
430 |
}
|
franta-hg@102
|
431 |
|
franta-hg@102
|
432 |
public static Long parseArticleID(String messageID) {
|
franta-hg@102
|
433 |
String[] localPart = parseMessageID(messageID);
|
franta-hg@102
|
434 |
if (localPart == null) {
|
franta-hg@102
|
435 |
return null;
|
franta-hg@102
|
436 |
} else {
|
franta-hg@102
|
437 |
return Long.parseLong(localPart[0]);
|
franta-hg@102
|
438 |
}
|
franta-hg@102
|
439 |
}
|
franta-hg@102
|
440 |
|
franta-hg@102
|
441 |
public static Long parseGroupID(String messageID) {
|
franta-hg@102
|
442 |
String[] localPart = parseMessageID(messageID);
|
franta-hg@102
|
443 |
if (localPart == null) {
|
franta-hg@102
|
444 |
return null;
|
franta-hg@102
|
445 |
} else {
|
franta-hg@102
|
446 |
return Long.parseLong(localPart[1]);
|
franta-hg@102
|
447 |
// If needed:
|
franta-hg@102
|
448 |
// parseGroupName() will be same as this method, just with:
|
franta-hg@102
|
449 |
// return localPart[2];
|
franta-hg@102
|
450 |
}
|
franta-hg@102
|
451 |
}
|
franta-hg@102
|
452 |
|
franta-hg@72
|
453 |
@Override
|
franta-hg@72
|
454 |
public void setHeader(String name, String value) throws MessagingException {
|
franta-hg@72
|
455 |
super.setHeader(name, value);
|
franta-hg@72
|
456 |
|
franta-hg@72
|
457 |
if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
|
franta-hg@72
|
458 |
messageID = value;
|
franta-hg@72
|
459 |
}
|
franta-hg@72
|
460 |
}
|
franta-hg@72
|
461 |
|
franta-hg@72
|
462 |
@Override
|
franta-hg@72
|
463 |
public final void addHeader(String name, String value) throws MessagingException {
|
franta-hg@72
|
464 |
super.addHeader(name, value);
|
franta-hg@72
|
465 |
|
franta-hg@72
|
466 |
if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
|
franta-hg@72
|
467 |
messageID = value;
|
franta-hg@72
|
468 |
}
|
franta-hg@72
|
469 |
}
|
franta-hg@72
|
470 |
|
franta-hg@72
|
471 |
@Override
|
franta-hg@72
|
472 |
public void removeHeader(String name) throws MessagingException {
|
franta-hg@72
|
473 |
super.removeHeader(name);
|
franta-hg@72
|
474 |
|
franta-hg@72
|
475 |
if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
|
franta-hg@72
|
476 |
messageID = null;
|
franta-hg@72
|
477 |
}
|
franta-hg@72
|
478 |
}
|
franta-hg@72
|
479 |
|
franta-hg@72
|
480 |
public void setMessageID(String messageID) {
|
franta-hg@72
|
481 |
this.messageID = messageID;
|
franta-hg@72
|
482 |
}
|
franta-hg@72
|
483 |
|
franta-hg@72
|
484 |
@Override
|
franta-hg@72
|
485 |
protected void updateMessageID() throws MessagingException {
|
franta-hg@72
|
486 |
if (messageID == null) {
|
franta-hg@72
|
487 |
super.updateMessageID();
|
franta-hg@72
|
488 |
} else {
|
franta-hg@72
|
489 |
setHeader(MESSAGE_ID_HEADER, messageID);
|
franta-hg@72
|
490 |
}
|
franta-hg@72
|
491 |
}
|
franta-hg@72
|
492 |
|
franta-hg@72
|
493 |
/**
|
franta-hg@72
|
494 |
* Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
|
franta-hg@72
|
495 |
*
|
franta-hg@72
|
496 |
* <pre>MIME-Version: 1.0
|
franta-hg@72
|
497 |
*Content-Type: multipart/alternative;</pre>
|
franta-hg@72
|
498 |
*
|
franta-hg@72
|
499 |
* @return serialized headers
|
franta-hg@72
|
500 |
* @throws MessagingException if getAllHeaders() fails
|
franta-hg@72
|
501 |
*/
|
franta-hg@72
|
502 |
public String getHeaders() throws MessagingException {
|
franta-hg@72
|
503 |
StringBuilder sb = new StringBuilder();
|
franta-hg@72
|
504 |
for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
|
franta-hg@72
|
505 |
sb.append(eh.nextElement());
|
franta-hg@72
|
506 |
sb.append(CRLF);
|
franta-hg@72
|
507 |
}
|
franta-hg@72
|
508 |
return sb.toString();
|
franta-hg@72
|
509 |
}
|
franta-hg@72
|
510 |
|
franta-hg@72
|
511 |
public byte[] getBody() throws IOException, MessagingException {
|
franta-hg@72
|
512 |
saveChanges();
|
franta-hg@72
|
513 |
|
franta-hg@72
|
514 |
ArrayList<String> skipHeaders = new ArrayList<String>();
|
franta-hg@72
|
515 |
for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
|
franta-hg@72
|
516 |
Header h = (Header) eh.nextElement();
|
franta-hg@72
|
517 |
skipHeaders.add(h.getName());
|
franta-hg@72
|
518 |
}
|
franta-hg@72
|
519 |
|
franta-hg@72
|
520 |
ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
|
franta-hg@72
|
521 |
writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
|
franta-hg@72
|
522 |
return baos.toByteArray();
|
franta-hg@72
|
523 |
}
|
franta-hg@103
|
524 |
|
franta-hg@103
|
525 |
/**
|
franta-hg@103
|
526 |
* Transforms message content to valid XHTML and strips html and body tags.
|
franta-hg@103
|
527 |
* When receiving message from user through NNTP
|
franta-hg@103
|
528 |
* this method is used to get text that should be saved into databse.
|
franta-hg@103
|
529 |
* @return XHTML fragment – content between <body> and </body> tags.
|
franta-hg@103
|
530 |
*/
|
franta-hg@103
|
531 |
public String getBodyXhtmlFragment() throws StorageBackendException {
|
franta-hg@103
|
532 |
/**
|
franta-hg@103
|
533 |
* TODO: podporovat i zprávy přímo v HTML a multipart.
|
franta-hg@103
|
534 |
*/
|
franta-hg@103
|
535 |
try {
|
franta-hg@103
|
536 |
Object c = getContent();
|
franta-hg@103
|
537 |
if (isMimeType("text/plain") && c instanceof String) {
|
franta-hg@116
|
538 |
String inputText = (String) c;
|
franta-hg@116
|
539 |
String xhtml;
|
franta-hg@116
|
540 |
|
franta-hg@116
|
541 |
if (inputText.startsWith(MARKDOWN_HEADER)) {
|
franta-hg@116
|
542 |
xhtml = readXhtmlTextMarkdown(inputText.substring(MARKDOWN_HEADER.length()));
|
franta-hg@116
|
543 |
} else {
|
franta-hg@116
|
544 |
|
franta-hg@116
|
545 |
xhtml = readXhtmlText(
|
franta-hg@116
|
546 |
inputText,
|
franta-hg@116
|
547 |
getSubject(),
|
franta-hg@116
|
548 |
getParentID(),
|
franta-hg@116
|
549 |
null,
|
franta-hg@116
|
550 |
null,
|
franta-hg@116
|
551 |
null);
|
franta-hg@116
|
552 |
}
|
franta-hg@103
|
553 |
return makeFragmentXHTML(xhtml);
|
franta-hg@103
|
554 |
} else {
|
franta-hg@103
|
555 |
throw new StorageBackendException("Only text/plain messages are supported for now – post it as plain text please.");
|
franta-hg@103
|
556 |
}
|
franta-hg@103
|
557 |
} catch (Exception e) {
|
franta-hg@103
|
558 |
throw new StorageBackendException(e);
|
franta-hg@103
|
559 |
}
|
franta-hg@103
|
560 |
}
|
franta-hg@109
|
561 |
|
franta-hg@106
|
562 |
public String getBodyPlainText() throws StorageBackendException {
|
franta-hg@106
|
563 |
/**
|
franta-hg@106
|
564 |
* TODO: netransformovat XHTML 2x
|
franta-hg@106
|
565 |
*/
|
franta-hg@106
|
566 |
return formatedToPlainText(makeSimpleXHTML(getBodyXhtmlFragment()));
|
franta-hg@106
|
567 |
}
|
franta-hg@72
|
568 |
}
|