src/org/sonews/storage/DrupalMessage.java
author František Kučera <franta-hg@frantovo.cz>
Fri, 21 Oct 2011 18:21:30 +0200
changeset 105 d566d08c20d6
parent 104 b4c8a2760d6f
child 106 dc04a3c2c557
permissions -rw-r--r--
Drupal: lepší názvy proměnných.
     1 /*
     2  *   SONEWS News Server
     3  *   see AUTHORS for the list of contributors
     4  *
     5  *   This program is free software: you can redistribute it and/or modify
     6  *   it under the terms of the GNU General Public License as published by
     7  *   the Free Software Foundation, either version 3 of the License, or
     8  *   (at your option) any later version.
     9  *
    10  *   This program is distributed in the hope that it will be useful,
    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  *   GNU General Public License for more details.
    14  *
    15  *   You should have received a copy of the GNU General Public License
    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 package org.sonews.storage;
    19 
    20 import java.io.BufferedReader;
    21 import java.io.ByteArrayInputStream;
    22 import java.io.ByteArrayOutputStream;
    23 import java.io.IOException;
    24 import java.io.InputStream;
    25 import java.io.InputStreamReader;
    26 import java.io.PrintStream;
    27 import java.io.StringReader;
    28 import java.io.StringWriter;
    29 import java.io.UnsupportedEncodingException;
    30 import java.sql.ResultSet;
    31 import java.sql.SQLException;
    32 import java.util.ArrayList;
    33 import java.util.Date;
    34 import java.util.Enumeration;
    35 import java.util.logging.Level;
    36 import java.util.logging.Logger;
    37 import javax.mail.Header;
    38 import javax.mail.MessagingException;
    39 import javax.mail.Multipart;
    40 import javax.mail.Session;
    41 import javax.mail.internet.InternetAddress;
    42 import javax.mail.internet.MimeBodyPart;
    43 import javax.mail.internet.MimeMessage;
    44 import javax.mail.internet.MimeMultipart;
    45 import javax.xml.parsers.DocumentBuilder;
    46 import javax.xml.parsers.DocumentBuilderFactory;
    47 import javax.xml.parsers.ParserConfigurationException;
    48 import javax.xml.transform.Transformer;
    49 import javax.xml.transform.TransformerException;
    50 import javax.xml.transform.TransformerFactory;
    51 import javax.xml.transform.dom.DOMSource;
    52 import javax.xml.transform.stream.StreamResult;
    53 import javax.xml.transform.stream.StreamSource;
    54 import org.sonews.daemon.NNTPConnection;
    55 import org.sonews.util.io.Resource;
    56 import org.w3c.dom.Document;
    57 import org.xml.sax.SAXException;
    58 
    59 /**
    60  * This is MimeMessage which enables custom Message-ID header
    61  * (this header will not be overwritten by the default one like in MimeMessage).
    62  * 
    63  * Also add header and body separate serialization.
    64  * 
    65  * And can be deserialized from SQL ResultSet
    66  * 
    67  * @author František Kučera (frantovo.cz)
    68  */
    69 public class DrupalMessage extends MimeMessage {
    70 
    71 	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
    72 	private static final String MESSAGE_ID_HEADER = "Message-ID";
    73 	private static final String CRLF = "\r\n";
    74 	public static final String CHARSET = "UTF-8";
    75 	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
    76 	private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";
    77 	private String messageID;
    78 	private Long parentID;
    79 	private Long groupID;
    80 	private TransformerFactory transformerFactory;
    81 	private DocumentBuilderFactory documentBuilderFactory;
    82 
    83 	/**
    84 	 * Initializes XML factories (Transformer, DocumentBuilder).
    85 	 */
    86 	private void initFactories() {
    87 		transformerFactory = TransformerFactory.newInstance();
    88 		documentBuilderFactory = DocumentBuilderFactory.newInstance();
    89 		/**
    90 		 * Komentáře nás nepotřebujeme 
    91 		 * (a museli bychom je brát v úvahu při dělení odstavců:
    92 		 * v současné verzi XSLT odstavcovače by nám případný komentář
    93 		 * rozdělil text na dva odstavce, přestože to má být odstavec jede).
    94 		 */
    95 		documentBuilderFactory.setIgnoringComments(true);
    96 	}
    97 
    98 	/**
    99 	 * Constructs MIME message from SQL result.
   100 	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
   101 	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
   102 	 */
   103 	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException {
   104 		super(Session.getDefaultInstance(System.getProperties()));
   105 		initFactories();
   106 
   107 		groupID = rs.getLong("group_id");
   108 		addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));
   109 		addHeader("Newsgroups", rs.getString("group_name"));
   110 		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
   111 		setSubject(rs.getString("subject"));
   112 		setSentDate(new Date(rs.getLong("created")));
   113 
   114 		parentID = rs.getLong("parent_id");
   115 		if (parentID > 0) {
   116 			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
   117 			addHeader("In-Reply-To", parentMessageID);
   118 			addHeader("References", parentMessageID);
   119 		}
   120 
   121 		if (constructBody) {
   122 			Multipart multipart = new MimeMultipart("alternative");
   123 			setContent(multipart);
   124 
   125 			/** XHTML part */
   126 			MimeBodyPart htmlPart = new MimeBodyPart();
   127 			String xhtmlText = readXhtmlText(
   128 					rs.getString("text"),
   129 					rs.getString("subject"),
   130 					rs.getInt("parent_id"),
   131 					rs.getString("urlBase"),
   132 					rs.getString("wwwRead"),
   133 					rs.getString("wwwPost"));
   134 			htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
   135 
   136 			/** Plain text part */
   137 			MimeBodyPart textPart = new MimeBodyPart();
   138 			String plainText = readPlainText(rs, xhtmlText);
   139 			textPart.setText(plainText);
   140 			//addHeader("Lines", String.valueOf(plainText.split("\n").length));
   141 
   142 			/**
   143 			 * Thunderbirdu záleží, v jakém pořadí části jsou 
   144 			 * (když je prostý text druhý, html se nezobrazí),
   145 			 * KNode zobrazuje HTML správně, i když je na prvním místě.
   146 			 */
   147 			multipart.addBodyPart(textPart);
   148 			multipart.addBodyPart(htmlPart);
   149 		} else {
   150 			/** empty body, just headers */
   151 			setText("");
   152 		}
   153 	}
   154 
   155 	/**
   156 	 * Constructs MIME message from article posted by user.
   157 	 * @param article article that came through NNTP.
   158 	 * @throws MessagingException 
   159 	 */
   160 	public DrupalMessage(Article article) throws MessagingException {
   161 		super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));
   162 		initFactories();
   163 
   164 		String[] parentHeaders = getHeader("In-Reply-To");
   165 		if (parentHeaders != null && parentHeaders.length == 1) {
   166 			String parentMessageID = parentHeaders[0];
   167 			parentID = parseArticleID(parentMessageID);
   168 			groupID = parseGroupID(parentMessageID);
   169 		} else {
   170 			throw new MessagingException("Message posted by user must have exactly one In-Reply-To header.");
   171 		}
   172 	}
   173 
   174 	private static InputStream serializeArticle(Article a) {
   175 		byte articleHeaders[] = a.getHeaderSource().getBytes();
   176 		byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();
   177 		byte body[] = a.getBody();
   178 
   179 		byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];
   180 
   181 		System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);
   182 		System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);
   183 		System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);
   184 
   185 		return new ByteArrayInputStream(message);
   186 	}
   187 
   188 	private String readPlainText(ResultSet rs, String xhtmlText) {
   189 		try {
   190 			Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
   191 
   192 			StringReader input = new StringReader(xhtmlText);
   193 			StringWriter output = new StringWriter(xhtmlText.length());
   194 			textTransformer.transform(new StreamSource(input), new StreamResult(output));
   195 
   196 			return output.toString();
   197 		} catch (Exception e) {
   198 			/**
   199 			 * TODO: lepší ošetření chyby
   200 			 */
   201 			log.log(Level.WARNING, "Error while transforming article to plain text", e);
   202 			return makeSimpleXHTML("Při transformaci příspěvku bohužel došlo k chybě.");
   203 		}
   204 	}
   205 
   206 	private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException {
   207 		DocumentBuilder db = documentBuilderFactory.newDocumentBuilder();
   208 		Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));
   209 		return new DOMSource(d);
   210 	}
   211 
   212 	private String readXhtmlText(String sourceText, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException {
   213 		/**
   214 		 * TODO: 
   215 		 *		- znovupoužívat XSL transformér (nejen v instanci)
   216 		 *		- používat cache, ukládat si vygenerované články
   217 		 */
   218 		String wrappedText = makeSimpleXHTML(sourceText);
   219 
   220 		Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
   221 		String paragraphedText;
   222 		boolean tidyWasUsed = false;
   223 		try {
   224 			StringWriter output = new StringWriter(2 * wrappedText.length());
   225 			paragraphTransformer.transform(readDOM(wrappedText), new StreamResult(output));
   226 			paragraphedText = output.toString();
   227 		} catch (Exception e) {
   228 			log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
   229 			StringWriter output = new StringWriter(2 * wrappedText.length());
   230 			paragraphTransformer.transform(readDOM(tidyXhtml(wrappedText)), new StreamResult(output));
   231 			paragraphedText = output.toString();
   232 			tidyWasUsed = true;
   233 		}
   234 
   235 		Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
   236 		xhtmlTransformer.setParameter("isRoot", (parentId == 0));
   237 		xhtmlTransformer.setParameter("title", subject);
   238 		xhtmlTransformer.setParameter("urlBase", urlBase);
   239 		xhtmlTransformer.setParameter("wwwRead", wwwRead);
   240 		xhtmlTransformer.setParameter("wwwPost", wwwPost);
   241 		xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
   242 		StringReader paragraphedReader = new StringReader(paragraphedText);
   243 		StringWriter xhtmlWriter = new StringWriter(2 * paragraphedText.length());
   244 		xhtmlTransformer.transform(new StreamSource(paragraphedReader), new StreamResult(xhtmlWriter));
   245 
   246 		return xhtmlWriter.toString();
   247 	}
   248 
   249 	/**
   250 	 * Does not parse XML works just with text.
   251 	 * @param body XHTML fragment that should be put between &lt;body&gt; and &lt;/body&gt;
   252 	 * @return simple XHTML document (body wrapped in html and body tags)
   253 	 */
   254 	private static String makeSimpleXHTML(String body) {
   255 		return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";
   256 	}
   257 
   258 	/**
   259 	 * Does not parse XML works just with text.
   260 	 * @param xhtml whole XHTML page
   261 	 * @return content between &lt;body&gt; and &lt;/body&gt; tags.
   262 	 */
   263 	private static String makeFragmentXHTML(String xhtml) {
   264 		final String startTag = "<body>";
   265 		final String endTag = "</body>";
   266 
   267 		int start = xhtml.indexOf(startTag) + startTag.length();
   268 		int end = xhtml.lastIndexOf(endTag);
   269 
   270 		return xhtml.substring(start, end);
   271 	}
   272 
   273 	/**
   274 	 * TODO: refaktorovat, přesunout
   275 	 */
   276 	private static String tidyXhtml(String inputText) throws IOException {
   277 		/*
   278 		 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
   279 		 *
   280 		 * TODO:
   281 		 *		- použít delší zástupný řetězec, ne jen jeden znak
   282 		 *		- umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)
   283 		 */
   284 		inputText = označKonceŘádků(inputText);
   285 
   286 		Runtime r = Runtime.getRuntime();
   287 		Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
   288 					"-asxml", // well formed XHTML
   289 					"-numeric", // číselné entity
   290 					"-utf8", // kódování
   291 					"--show-warnings", "false", // žádná varování nás nezajímají
   292 					"--show-errors", "0", // ani chyby
   293 					"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
   294 					"--logical-emphasis", "true", // em a strong místo i a b
   295 					"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
   296 					"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
   297 				});
   298 
   299 		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
   300 		vstupProcesu.print(inputText);
   301 		vstupProcesu.close();
   302 
   303 		String outputText = streamToString(p.getInputStream());
   304 
   305 		outputText = vraťKonceŘádků(outputText);
   306 
   307 		return outputText;
   308 	}
   309 
   310 	private static String označKonceŘádků(String text) {
   311 		text = text.replaceAll(">\\s+<", "> <");
   312 		text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");
   313 		return text;
   314 	}
   315 
   316 	private static String vraťKonceŘádků(String text) {
   317 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");
   318 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");
   319 		return text;
   320 	}
   321 
   322 	/**
   323 	 * TODO: refaktorovat, přesunout
   324 	 */
   325 	private static String streamToString(InputStream proud) throws IOException {
   326 		StringBuilder výsledek = new StringBuilder();
   327 		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
   328 		while (true) {
   329 			String radek = buf.readLine();
   330 			if (radek == null) {
   331 				break;
   332 			} else {
   333 				výsledek.append(radek);
   334 				výsledek.append("\n");
   335 			}
   336 		}
   337 		return výsledek.toString();
   338 	}
   339 
   340 	public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {
   341 		StringBuilder sb = new StringBuilder();
   342 		sb.append("<");
   343 		sb.append(articleID);
   344 		sb.append("-");
   345 		sb.append(groupID);
   346 		sb.append("-");
   347 		sb.append(groupName);
   348 		sb.append("@");
   349 		sb.append(domainName);
   350 		sb.append(">");
   351 		return sb.toString();
   352 	}
   353 
   354 	/**
   355 	 * @return article ID of parent of this message | or null, if this is root article and not reply to another one
   356 	 */
   357 	public Long getParentID() {
   358 		return parentID;
   359 	}
   360 
   361 	/**
   362 	 * @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group
   363 	 */
   364 	public Long getGroupID() {
   365 		return groupID;
   366 	}
   367 
   368 	/**
   369 	 * 
   370 	 * @param messageID &lt;{0}-{1}-{2}@domain.tld&gt; where {0} is nntp_id and {1} is group_id and {2} is group_name
   371 	 * @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid
   372 	 */
   373 	private static String[] parseMessageID(String messageID) {
   374 		if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {
   375 			return messageID.substring(1).split("@")[0].split("\\-");
   376 		} else {
   377 			return null;
   378 		}
   379 	}
   380 
   381 	public static Long parseArticleID(String messageID) {
   382 		String[] localPart = parseMessageID(messageID);
   383 		if (localPart == null) {
   384 			return null;
   385 		} else {
   386 			return Long.parseLong(localPart[0]);
   387 		}
   388 	}
   389 
   390 	public static Long parseGroupID(String messageID) {
   391 		String[] localPart = parseMessageID(messageID);
   392 		if (localPart == null) {
   393 			return null;
   394 		} else {
   395 			return Long.parseLong(localPart[1]);
   396 			// If needed:
   397 			// parseGroupName() will be same as this method, just with:
   398 			// return localPart[2];
   399 		}
   400 	}
   401 
   402 	@Override
   403 	public void setHeader(String name, String value) throws MessagingException {
   404 		super.setHeader(name, value);
   405 
   406 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   407 			messageID = value;
   408 		}
   409 	}
   410 
   411 	@Override
   412 	public final void addHeader(String name, String value) throws MessagingException {
   413 		super.addHeader(name, value);
   414 
   415 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   416 			messageID = value;
   417 		}
   418 	}
   419 
   420 	@Override
   421 	public void removeHeader(String name) throws MessagingException {
   422 		super.removeHeader(name);
   423 
   424 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   425 			messageID = null;
   426 		}
   427 	}
   428 
   429 	public void setMessageID(String messageID) {
   430 		this.messageID = messageID;
   431 	}
   432 
   433 	@Override
   434 	protected void updateMessageID() throws MessagingException {
   435 		if (messageID == null) {
   436 			super.updateMessageID();
   437 		} else {
   438 			setHeader(MESSAGE_ID_HEADER, messageID);
   439 		}
   440 	}
   441 
   442 	/**
   443 	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
   444 	 * 
   445 	 * <pre>MIME-Version: 1.0
   446 	 *Content-Type: multipart/alternative;</pre>
   447 	 * 
   448 	 * @return serialized headers
   449 	 * @throws MessagingException if getAllHeaders() fails
   450 	 */
   451 	public String getHeaders() throws MessagingException {
   452 		StringBuilder sb = new StringBuilder();
   453 		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
   454 			sb.append(eh.nextElement());
   455 			sb.append(CRLF);
   456 		}
   457 		return sb.toString();
   458 	}
   459 
   460 	public byte[] getBody() throws IOException, MessagingException {
   461 		saveChanges();
   462 
   463 		ArrayList<String> skipHeaders = new ArrayList<String>();
   464 		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
   465 			Header h = (Header) eh.nextElement();
   466 			skipHeaders.add(h.getName());
   467 		}
   468 
   469 		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
   470 		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
   471 		return baos.toByteArray();
   472 	}
   473 
   474 	/**
   475 	 * Transforms message content to valid XHTML and strips html and body tags.
   476 	 * When receiving message from user through NNTP 
   477 	 * this method is used to get text that should be saved into databse.
   478 	 * @return XHTML fragment – content between &lt;body&gt; and &lt;/body&gt; tags.
   479 	 */
   480 	public String getBodyXhtmlFragment() throws StorageBackendException {
   481 		/**
   482 		 * TODO: podporovat i zprávy přímo v HTML a multipart.
   483 		 */
   484 		try {
   485 			Object c = getContent();
   486 			if (isMimeType("text/plain") && c instanceof String) {
   487 				String xhtml = readXhtmlText(
   488 						(String) c,
   489 						getSubject(),
   490 						getParentID(),
   491 						null,
   492 						null,
   493 						null);
   494 				return makeFragmentXHTML(xhtml);
   495 			} else {
   496 				throw new StorageBackendException("Only text/plain messages are supported for now – post it as plain text please.");
   497 			}
   498 		} catch (Exception e) {
   499 			throw new StorageBackendException(e);
   500 		}
   501 	}
   502 }