Drupal: zpráva od uživatele se před uložením prožene přes XSLT případně Tidy.
3 * see AUTHORS for the list of contributors
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 package org.sonews.storage;
20 import java.io.BufferedReader;
21 import java.io.ByteArrayInputStream;
22 import java.io.ByteArrayOutputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.PrintStream;
27 import java.io.StringReader;
28 import java.io.StringWriter;
29 import java.io.UnsupportedEncodingException;
30 import java.sql.ResultSet;
31 import java.sql.SQLException;
32 import java.util.ArrayList;
33 import java.util.Date;
34 import java.util.Enumeration;
35 import java.util.logging.Level;
36 import java.util.logging.Logger;
37 import javax.mail.Header;
38 import javax.mail.MessagingException;
39 import javax.mail.Multipart;
40 import javax.mail.Session;
41 import javax.mail.internet.InternetAddress;
42 import javax.mail.internet.MimeBodyPart;
43 import javax.mail.internet.MimeMessage;
44 import javax.mail.internet.MimeMultipart;
45 import javax.xml.transform.Transformer;
46 import javax.xml.transform.TransformerException;
47 import javax.xml.transform.TransformerFactory;
48 import javax.xml.transform.stream.StreamResult;
49 import javax.xml.transform.stream.StreamSource;
50 import org.sonews.daemon.NNTPConnection;
51 import org.sonews.util.io.Resource;
54 * This is MimeMessage which enables custom Message-ID header
55 * (this header will not be overwritten by the default one like in MimeMessage).
57 * Also add header and body separate serialization.
59 * And can be deserialized from SQL ResultSet
61 * @author František Kučera (frantovo.cz)
63 public class DrupalMessage extends MimeMessage {
65 private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
66 private static final String MESSAGE_ID_HEADER = "Message-ID";
67 private static final String CRLF = "\r\n";
68 public static final String CHARSET = "UTF-8";
69 private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
70 private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";
71 private String messageID;
72 private Long parentID;
76 * Constructs MIME message from SQL result.
77 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
78 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
80 public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException {
81 super(Session.getDefaultInstance(System.getProperties()));
83 groupID = rs.getLong("group_id");
84 addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));
85 addHeader("Newsgroups", rs.getString("group_name"));
86 setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
87 setSubject(rs.getString("subject"));
88 setSentDate(new Date(rs.getLong("created")));
90 parentID = rs.getLong("parent_id");
92 String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
93 addHeader("In-Reply-To", parentMessageID);
94 addHeader("References", parentMessageID);
98 Multipart multipart = new MimeMultipart("alternative");
99 setContent(multipart);
102 MimeBodyPart htmlPart = new MimeBodyPart();
103 String xhtmlText = readXhtmlText(
104 rs.getString("text"),
105 rs.getString("subject"),
106 rs.getInt("parent_id"),
107 rs.getString("urlBase"),
108 rs.getString("wwwRead"),
109 rs.getString("wwwPost"));
110 htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
112 /** Plain text part */
113 MimeBodyPart textPart = new MimeBodyPart();
114 String plainText = readPlainText(rs, xhtmlText);
115 textPart.setText(plainText);
116 //addHeader("Lines", String.valueOf(plainText.split("\n").length));
119 * Thunderbirdu záleží, v jakém pořadí části jsou
120 * (když je prostý text druhý, html se nezobrazí),
121 * KNode zobrazuje HTML správně, i když je na prvním místě.
123 multipart.addBodyPart(textPart);
124 multipart.addBodyPart(htmlPart);
126 /** empty body, just headers */
132 * Constructs MIME message from article posted by user.
133 * @param article article that came through NNTP.
134 * @throws MessagingException
136 public DrupalMessage(Article article) throws MessagingException {
137 super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));
139 String[] parentHeaders = getHeader("In-Reply-To");
140 if (parentHeaders.length == 1) {
141 String parentMessageID = parentHeaders[0];
142 parentID = parseArticleID(parentMessageID);
143 groupID = parseGroupID(parentMessageID);
145 throw new MessagingException("Message posted by user must have exactly one In-Reply-To header.");
149 private static InputStream serializeArticle(Article a) {
150 byte articleHeaders[] = a.getHeaderSource().getBytes();
151 byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();
152 byte body[] = a.getBody();
154 byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];
156 System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);
157 System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);
158 System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);
160 return new ByteArrayInputStream(message);
163 private String readPlainText(ResultSet rs, String xhtmlText) {
165 TransformerFactory tf = TransformerFactory.newInstance();
166 Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
168 StringReader input = new StringReader(xhtmlText);
169 StringWriter output = new StringWriter(xhtmlText.length());
170 textTransformer.transform(new StreamSource(input), new StreamResult(output));
172 return output.toString();
173 } catch (Exception e) {
175 * TODO: lepší ošetření chyby
177 log.log(Level.WARNING, "Error while transforming article to plain text", e);
178 return makeSimpleXHTML("Při transformaci příspěvku bohužel došlo k chybě.");
182 private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException {
185 * - znovupoužívat XSL transformér
186 * - používat cache, ukládat si vygenerované články
188 String inputText = makeSimpleXHTML(text);
190 TransformerFactory tf = TransformerFactory.newInstance();
191 Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
193 String paragraphedText;
194 boolean tidyWasUsed = false;
196 StringReader input = new StringReader(inputText);
197 StringWriter output = new StringWriter(2 * inputText.length());
198 paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
199 paragraphedText = output.toString();
200 } catch (Exception e) {
201 log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
202 StringReader input = new StringReader(tidyXhtml(inputText));
203 StringWriter output = new StringWriter(2 * inputText.length());
204 paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
205 paragraphedText = output.toString();
209 Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
210 xhtmlTransformer.setParameter("isRoot", (parentId == 0));
211 xhtmlTransformer.setParameter("title", subject);
212 xhtmlTransformer.setParameter("urlBase", urlBase);
213 xhtmlTransformer.setParameter("wwwRead", wwwRead);
214 xhtmlTransformer.setParameter("wwwPost", wwwPost);
215 xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
216 StringReader input = new StringReader(paragraphedText);
217 StringWriter output = new StringWriter(2 * paragraphedText.length());
218 xhtmlTransformer.transform(new StreamSource(input), new StreamResult(output));
220 return output.toString();
224 * Does not parse XML works just with text.
225 * @param body XHTML fragment that should be put between <body> and </body>
226 * @return simple XHTML document (body wrapped in html and body tags)
228 private static String makeSimpleXHTML(String body) {
229 return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";
233 * Does not parse XML works just with text.
234 * @param xhtml whole XHTML page
235 * @return content between <body> and </body> tags.
237 private static String makeFragmentXHTML(String xhtml) {
238 final String startTag = "<body>";
239 final String endTag = "</body>";
241 int start = xhtml.indexOf(startTag) + startTag.length();
242 int end = xhtml.lastIndexOf(endTag);
244 return xhtml.substring(start, end);
248 * TODO: refaktorovat, přesunout
250 private static String tidyXhtml(String inputText) throws IOException {
252 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
255 * - použít delší zástupný řetězec, ne jen jeden znak
256 * - umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)
258 inputText = označKonceŘádků(inputText);
260 Runtime r = Runtime.getRuntime();
261 Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
262 "-asxml", // well formed XHTML
263 "-numeric", // číselné entity
265 "--show-warnings", "false", // žádná varování nás nezajímají
266 "--show-errors", "0", // ani chyby
267 "--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
268 "--logical-emphasis", "true", // em a strong místo i a b
269 "--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
270 "--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
273 PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
274 vstupProcesu.print(inputText);
275 vstupProcesu.close();
277 String outputText = streamToString(p.getInputStream());
279 outputText = vraťKonceŘádků(outputText);
284 private static String označKonceŘádků(String text) {
285 text = text.replaceAll(">\\s+<", "> <");
286 text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");
290 private static String vraťKonceŘádků(String text) {
291 text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");
292 text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");
297 * TODO: refaktorovat, přesunout
299 private static String streamToString(InputStream proud) throws IOException {
300 StringBuilder výsledek = new StringBuilder();
301 BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
303 String radek = buf.readLine();
307 výsledek.append(radek);
308 výsledek.append("\n");
311 return výsledek.toString();
314 public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {
315 StringBuilder sb = new StringBuilder();
317 sb.append(articleID);
321 sb.append(groupName);
323 sb.append(domainName);
325 return sb.toString();
329 * @return article ID of parent of this message | or null, if this is root article and not reply to another one
331 public Long getParentID() {
336 * @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group
338 public Long getGroupID() {
344 * @param messageID <{0}-{1}-{2}@domain.tld> where {0} is nntp_id and {1} is group_id and {2} is group_name
345 * @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid
347 private static String[] parseMessageID(String messageID) {
348 if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {
349 return messageID.substring(1).split("@")[0].split("\\-");
355 public static Long parseArticleID(String messageID) {
356 String[] localPart = parseMessageID(messageID);
357 if (localPart == null) {
360 return Long.parseLong(localPart[0]);
364 public static Long parseGroupID(String messageID) {
365 String[] localPart = parseMessageID(messageID);
366 if (localPart == null) {
369 return Long.parseLong(localPart[1]);
371 // parseGroupName() will be same as this method, just with:
372 // return localPart[2];
377 public void setHeader(String name, String value) throws MessagingException {
378 super.setHeader(name, value);
380 if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
386 public final void addHeader(String name, String value) throws MessagingException {
387 super.addHeader(name, value);
389 if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
395 public void removeHeader(String name) throws MessagingException {
396 super.removeHeader(name);
398 if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
403 public void setMessageID(String messageID) {
404 this.messageID = messageID;
408 protected void updateMessageID() throws MessagingException {
409 if (messageID == null) {
410 super.updateMessageID();
412 setHeader(MESSAGE_ID_HEADER, messageID);
417 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
419 * <pre>MIME-Version: 1.0
420 *Content-Type: multipart/alternative;</pre>
422 * @return serialized headers
423 * @throws MessagingException if getAllHeaders() fails
425 public String getHeaders() throws MessagingException {
426 StringBuilder sb = new StringBuilder();
427 for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
428 sb.append(eh.nextElement());
431 return sb.toString();
434 public byte[] getBody() throws IOException, MessagingException {
437 ArrayList<String> skipHeaders = new ArrayList<String>();
438 for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
439 Header h = (Header) eh.nextElement();
440 skipHeaders.add(h.getName());
443 ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
444 writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
445 return baos.toByteArray();
449 * Transforms message content to valid XHTML and strips html and body tags.
450 * When receiving message from user through NNTP
451 * this method is used to get text that should be saved into databse.
452 * @return XHTML fragment – content between <body> and </body> tags.
454 public String getBodyXhtmlFragment() throws StorageBackendException {
456 * TODO: podporovat i zprávy přímo v HTML a multipart.
459 Object c = getContent();
460 if (isMimeType("text/plain") && c instanceof String) {
461 String xhtml = readXhtmlText(
468 return makeFragmentXHTML(xhtml);
470 throw new StorageBackendException("Only text/plain messages are supported for now – post it as plain text please.");
472 } catch (Exception e) {
473 throw new StorageBackendException(e);