sonews-drupal: src/org/sonews/storage/DrupalMessage.java@4ddc1020a154

     1 /*

     2  *   SONEWS News Server

     3  *   see AUTHORS for the list of contributors

     4  *

     5  *   This program is free software: you can redistribute it and/or modify

     6  *   it under the terms of the GNU General Public License as published by

     7  *   the Free Software Foundation, either version 3 of the License, or

     8  *   (at your option) any later version.

     9  *

    10  *   This program is distributed in the hope that it will be useful,

    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of

    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    13  *   GNU General Public License for more details.

    14  *

    15  *   You should have received a copy of the GNU General Public License

    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.

    17  */

    18 package org.sonews.storage;

    20 import java.io.BufferedReader;

    21 import java.io.ByteArrayInputStream;

    22 import java.io.ByteArrayOutputStream;

    23 import java.io.IOException;

    24 import java.io.InputStream;

    25 import java.io.InputStreamReader;

    26 import java.io.PrintStream;

    27 import java.io.StringReader;

    28 import java.io.StringWriter;

    29 import java.io.UnsupportedEncodingException;

    30 import java.sql.ResultSet;

    31 import java.sql.SQLException;

    32 import java.util.ArrayList;

    33 import java.util.Arrays;

    34 import java.util.Date;

    35 import java.util.Enumeration;

    36 import java.util.logging.Level;

    37 import java.util.logging.Logger;

    38 import java.util.regex.Matcher;

    39 import java.util.regex.Pattern;

    40 import javax.mail.Header;

    41 import javax.mail.MessagingException;

    42 import javax.mail.Multipart;

    43 import javax.mail.Session;

    44 import javax.mail.internet.InternetAddress;

    45 import javax.mail.internet.MimeBodyPart;

    46 import javax.mail.internet.MimeMessage;

    47 import javax.mail.internet.MimeMultipart;

    48 import javax.xml.parsers.DocumentBuilder;

    49 import javax.xml.parsers.DocumentBuilderFactory;

    50 import javax.xml.parsers.ParserConfigurationException;

    51 import javax.xml.transform.Transformer;

    52 import javax.xml.transform.TransformerException;

    53 import javax.xml.transform.TransformerFactory;

    54 import javax.xml.transform.dom.DOMSource;

    55 import javax.xml.transform.stream.StreamResult;

    56 import javax.xml.transform.stream.StreamSource;

    57 import org.sonews.daemon.NNTPConnection;

    58 import org.sonews.util.io.Resource;

    59 import org.w3c.dom.Document;

    60 import org.xml.sax.SAXException;

    62 /**

    63  * This is MimeMessage which enables custom Message-ID header

    64  * (this header will not be overwritten by the default one like in MimeMessage).

    65  *

    66  * Also add header and body separate serialization.

    67  *

    68  * And can be deserialized from SQL ResultSet

    69  *

    70  * @author František Kučera (frantovo.cz)

    71  */

    72 public class DrupalMessage extends MimeMessage {

    74 	/**

    75 	 * If body of message posted by user through NNTP starts with this text,

    76 	 * it will be treated as formated text in Markdown syntax.

    77 	 */

    78 	private static final String MARKDOWN_HEADER = "#!markdown\r\n";

    79 	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());

    80 	private static final String MESSAGE_ID_HEADER = "Message-ID";

    81 	private static final String CRLF = "\r\n";

    82 	public static final String CHARSET = "UTF-8";

    83 	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;

    84 	private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";

    85 	private String messageID;

    86 	private Long parentID;

    87 	private Long groupID;

    88 	private TransformerFactory transformerFactory;

    89 	private DocumentBuilderFactory documentBuilderFactory;

    91 	/**

    92 	 * Initializes XML factories (Transformer, DocumentBuilder).

    93 	 */

    94 	private void initFactories() {

    95 		transformerFactory = TransformerFactory.newInstance();

    96 		documentBuilderFactory = DocumentBuilderFactory.newInstance();

    97 		/**

    98 		 * Komentáře nás nepotřebujeme

    99 		 * (a museli bychom je brát v úvahu při dělení odstavců:

   100 		 * v současné verzi XSLT odstavcovače by nám případný komentář

   101 		 * rozdělil text na dva odstavce, přestože to má být odstavec jede).

   102 		 */

   103 		documentBuilderFactory.setIgnoringComments(true);

   104 	}

   106 	/**

   107 	 * Constructs MIME message from SQL result.

   108 	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.

   109 	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).

   110 	 */

   111 	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException {

   112 		super(Session.getDefaultInstance(System.getProperties()));

   113 		initFactories();

   115 		groupID = rs.getLong("group_id");

   116 		addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));

   117 		addHeader("Newsgroups", rs.getString("group_name"));

   118 		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));

   119 		setSubject(rs.getString("subject"));

   120 		setSentDate(new Date(rs.getLong("created")));

   122 		parentID = rs.getLong("parent_id");

   123 		if (parentID > 0) {

   124 			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);

   125 			addHeader("In-Reply-To", parentMessageID);

   126 			addHeader("References", parentMessageID);

   127 		}

   129 		if (constructBody) {

   130 			Multipart multipart = new MimeMultipart("alternative");

   131 			setContent(multipart);

   133 			/** XHTML part */

   134 			MimeBodyPart htmlPart = new MimeBodyPart();

   135 			String xhtmlText = readXhtmlText(

   136 					rs.getString("text"),

   137 					rs.getString("subject"),

   138 					rs.getInt("parent_id"),

   139 					rs.getString("urlBase"),

   140 					rs.getString("wwwRead"),

   141 					rs.getString("wwwPost"));

   142 			htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);

   144 			/** Plain text part */

   145 			MimeBodyPart textPart = new MimeBodyPart();

   146 			String plainText = formatedToPlainText(xhtmlText);

   147 			textPart.setText(plainText);

   148 			//addHeader("Lines", String.valueOf(plainText.split("\n").length));

   150 			/**

   151 			 * Thunderbirdu záleží, v jakém pořadí části jsou

   152 			 * (když je prostý text druhý, html se nezobrazí),

   153 			 * KNode zobrazuje HTML správně, i když je na prvním místě.

   154 			 */

   155 			multipart.addBodyPart(textPart);

   156 			multipart.addBodyPart(htmlPart);

   157 		} else {

   158 			/** empty body, just headers */

   159 			setText("");

   160 		}

   161 	}

   163 	/**

   164 	 * Constructs MIME message from article posted by user.

   165 	 * @param article article that came through NNTP.

   166 	 * @throws MessagingException

   167 	 */

   168 	public DrupalMessage(Article article) throws MessagingException {

   169 		super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));

   170 		initFactories();

   172 		String[] replyToHeaders = getHeader("In-Reply-To");

   173 		String[] referencesHeaders = getHeader("References");

   174 		String parentMessageID;

   175 		if (replyToHeaders != null && replyToHeaders.length == 1) {

   176 			parentMessageID = replyToHeaders[0];

   177 		} else if (referencesHeaders != null && referencesHeaders.length == 1) {

   178 			Pattern p = Pattern.compile("(\\s*<.*>)*\\s*(<.*>)");

   179 			Matcher m = p.matcher(referencesHeaders[0]);

   181 			if (m.matches()) {

   182 				parentMessageID = m.group(2);

   183 			} else {

   184 				throw new MessagingException("Message posted by user had invalid References header: " + referencesHeaders[0]);

   185 			}

   186 		} else {

   187 			throw new MessagingException("Message posted by user must have exactly one In-Reply-To header. Reply-To headers: " + Arrays.toString(replyToHeaders) + " Referemces headers: " + Arrays.toString(referencesHeaders));

   188 		}

   190 		parentID = parseArticleID(parentMessageID);

   191 		groupID = parseGroupID(parentMessageID);

   192 	}

   194 	private static InputStream serializeArticle(Article a) {

   195 		byte articleHeaders[] = a.getHeaderSource().getBytes();

   196 		byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();

   197 		byte body[] = a.getBody();

   199 		byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];

   201 		System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);

   202 		System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);

   203 		System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);

   205 		return new ByteArrayInputStream(message);

   206 	}

   208 	/**

   209 	 * @param xhtmlText well-formed XHTML

   210 	 * @return plain text representation of this formated text

   211 	 */

   212 	private String formatedToPlainText(String xhtmlText) {

   213 		try {

   214 			Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));

   216 			StringReader input = new StringReader(xhtmlText);

   217 			StringWriter output = new StringWriter(xhtmlText.length());

   218 			textTransformer.transform(new StreamSource(input), new StreamResult(output));

   220 			return output.toString();

   221 		} catch (Exception e) {

   222 			/**

   223 			 * TODO: lepší ošetření chyby

   224 			 */

   225 			log.log(Level.WARNING, "Error while transforming article to plain text", e);

   226 			return "Při transformaci příspěvku bohužel došlo k chybě.";

   227 		}

   228 	}

   230 	private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException {

   231 		DocumentBuilder db = documentBuilderFactory.newDocumentBuilder();

   232 		Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));

   233 		return new DOMSource(d);

   234 	}

   236 	private String readXhtmlText(String sourceText, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException {

   237 		/**

   238 		 * TODO:

   239 		 *		- znovupoužívat XSL transformér (nejen v instanci)

   240 		 *		- používat cache, ukládat si vygenerované články

   241 		 */

   242 		String wrappedText = makeSimpleXHTML(sourceText);

   244 		Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));

   245 		String paragraphedText;

   246 		boolean tidyWasUsed = false;

   247 		try {

   248 			StringWriter output = new StringWriter(2 * wrappedText.length());

   249 			paragraphTransformer.transform(readDOM(wrappedText), new StreamResult(output));

   250 			paragraphedText = output.toString();

   251 		} catch (Exception e) {

   252 			log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);

   253 			StringWriter output = new StringWriter(2 * wrappedText.length());

   254 			paragraphTransformer.transform(readDOM(tidyXhtml(wrappedText)), new StreamResult(output));

   255 			paragraphedText = output.toString();

   256 			tidyWasUsed = true;

   257 		}

   259 		Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));

   260 		xhtmlTransformer.setParameter("isRoot", (parentId == 0));

   261 		xhtmlTransformer.setParameter("title", subject);

   262 		xhtmlTransformer.setParameter("urlBase", urlBase);

   263 		xhtmlTransformer.setParameter("wwwRead", wwwRead);

   264 		xhtmlTransformer.setParameter("wwwPost", wwwPost);

   265 		xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));

   266 		StringReader paragraphedReader = new StringReader(paragraphedText);

   267 		StringWriter xhtmlWriter = new StringWriter(2 * paragraphedText.length());

   268 		xhtmlTransformer.transform(new StreamSource(paragraphedReader), new StreamResult(xhtmlWriter));

   270 		return xhtmlWriter.toString();

   271 	}

   273 	/**

   274 	 * Converts markdown to XHTML.

   275 	 * @param markdown text in Markdown syntax

   276 	 * @return XHTML document (with html/body elements)

   277 	 * @throws StorageBackendException when markdown proces returned any errors

   278 	 * (other exceptions are thrown when afterwards XHTML validation fails).

   279 	 */

   280 	private String readXhtmlTextMarkdown(String markdown) throws TransformerException, IOException, ParserConfigurationException, SAXException, StorageBackendException {

   281 		Runtime r = Runtime.getRuntime();

   282 		Process p = r.exec(new String[]{"sudo", "-u", "markdown", "/usr/bin/markdown"});

   284 		PrintStream processInput = new PrintStream(p.getOutputStream());

   285 		processInput.print(markdown);

   286 		processInput.close();

   288 		String errors = streamToString(p.getErrorStream());

   289 		String htmlFragment = streamToString(p.getInputStream());

   291 		if (errors.length() == 0) {

   292 			String htmlDocument = makeSimpleXHTML(htmlFragment);

   293 			String xhtmlDocument = readXhtmlText(htmlDocument, null, -1, null, null, null);

   294 			return xhtmlDocument;

   295 		} else {

   296 			throw new StorageBackendException("Error while transforming Markdown to XHTML: " + errors);

   297 		}

   298 	}

   300 	/**

   301 	 * Does not parse XML works just with text.

   302 	 * @param body XHTML fragment that should be put between &lt;body&gt; and &lt;/body&gt;

   303 	 * @return simple XHTML document (body wrapped in html and body tags)

   304 	 */

   305 	private static String makeSimpleXHTML(String body) {

   306 		return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";

   307 	}

   309 	/**

   310 	 * Does not parse XML works just with text.

   311 	 * @param xhtml whole XHTML page

   312 	 * @return content between &lt;body&gt; and &lt;/body&gt; tags.

   313 	 */

   314 	private static String makeFragmentXHTML(String xhtml) {

   315 		final String startTag = "<body>";

   316 		final String endTag = "</body>";

   318 		int start = xhtml.indexOf(startTag) + startTag.length();

   319 		int end = xhtml.lastIndexOf(endTag);

   321 		return xhtml.substring(start, end);

   322 	}

   324 	/**

   325 	 * TODO: refaktorovat, přesunout

   326 	 */

   327 	private static String tidyXhtml(String inputText) throws IOException {

   328 		/*

   329 		 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966

   330 		 *

   331 		 * TODO:

   332 		 *		- použít delší zástupný řetězec, ne jen jeden znak

   333 		 *		- umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)

   334 		 */

   335 		inputText = označKonceŘádků(inputText);

   337 		Runtime r = Runtime.getRuntime();

   338 		Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net

   339 					"-asxml", // well formed XHTML

   340 					"-numeric", // číselné entity

   341 					"-utf8", // kódování

   342 					"--show-warnings", "false", // žádná varování nás nezajímají

   343 					"--show-errors", "0", // ani chyby

   344 					"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)

   345 					"--logical-emphasis", "true", // em a strong místo i a b

   346 					"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech

   347 					"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah

   348 				});

   350 		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());

   351 		vstupProcesu.print(inputText);

   352 		vstupProcesu.close();

   354 		String outputText = streamToString(p.getInputStream());

   356 		outputText = vraťKonceŘádků(outputText);

   358 		return outputText;

   359 	}

   361 	private static String označKonceŘádků(String text) {

   362 		text = text.replaceAll(">\\s+<", "> <");

   363 		text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");

   364 		return text;

   365 	}

   367 	private static String vraťKonceŘádků(String text) {

   368 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");

   369 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");

   370 		return text;

   371 	}

   373 	/**

   374 	 * TODO: refaktorovat, přesunout

   375 	 */

   376 	private static String streamToString(InputStream proud) throws IOException {

   377 		StringBuilder výsledek = new StringBuilder();

   378 		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));

   379 		while (true) {

   380 			String radek = buf.readLine();

   381 			if (radek == null) {

   382 				break;

   383 			} else {

   384 				výsledek.append(radek);

   385 				výsledek.append("\n");

   386 			}

   387 		}

   388 		return výsledek.toString();

   389 	}

   391 	public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {

   392 		StringBuilder sb = new StringBuilder();

   393 		sb.append("<");

   394 		sb.append(articleID);

   395 		sb.append("-");

   396 		sb.append(groupID);

   397 		sb.append("-");

   398 		sb.append(groupName);

   399 		sb.append("@");

   400 		sb.append(domainName);

   401 		sb.append(">");

   402 		return sb.toString();

   403 	}

   405 	/**

   406 	 * @return article ID of parent of this message | or null, if this is root article and not reply to another one

   407 	 */

   408 	public Long getParentID() {

   409 		return parentID;

   410 	}

   412 	/**

   413 	 * @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group

   414 	 */

   415 	public Long getGroupID() {

   416 		return groupID;

   417 	}

   419 	/**

   420 	 *

   421 	 * @param messageID &lt;{0}-{1}-{2}@domain.tld&gt; where {0} is nntp_id and {1} is group_id and {2} is group_name

   422 	 * @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid

   423 	 */

   424 	private static String[] parseMessageID(String messageID) {

   425 		if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {

   426 			return messageID.substring(1).split("@")[0].split("\\-");

   427 		} else {

   428 			return null;

   429 		}

   430 	}

   432 	public static Long parseArticleID(String messageID) {

   433 		String[] localPart = parseMessageID(messageID);

   434 		if (localPart == null) {

   435 			return null;

   436 		} else {

   437 			return Long.parseLong(localPart[0]);

   438 		}

   439 	}

   441 	public static Long parseGroupID(String messageID) {

   442 		String[] localPart = parseMessageID(messageID);

   443 		if (localPart == null) {

   444 			return null;

   445 		} else {

   446 			return Long.parseLong(localPart[1]);

   447 			// If needed:

   448 			// parseGroupName() will be same as this method, just with:

   449 			// return localPart[2];

   450 		}

   451 	}

   453 	@Override

   454 	public void setHeader(String name, String value) throws MessagingException {

   455 		super.setHeader(name, value);

   457 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   458 			messageID = value;

   459 		}

   460 	}

   462 	@Override

   463 	public final void addHeader(String name, String value) throws MessagingException {

   464 		super.addHeader(name, value);

   466 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   467 			messageID = value;

   468 		}

   469 	}

   471 	@Override

   472 	public void removeHeader(String name) throws MessagingException {

   473 		super.removeHeader(name);

   475 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   476 			messageID = null;

   477 		}

   478 	}

   480 	public void setMessageID(String messageID) {

   481 		this.messageID = messageID;

   482 	}

   484 	@Override

   485 	protected void updateMessageID() throws MessagingException {

   486 		if (messageID == null) {

   487 			super.updateMessageID();

   488 		} else {

   489 			setHeader(MESSAGE_ID_HEADER, messageID);

   490 		}

   491 	}

   493 	/**

   494 	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:

   495 	 *

   496 	 * <pre>MIME-Version: 1.0

   497 	 *Content-Type: multipart/alternative;</pre>

   498 	 *

   499 	 * @return serialized headers

   500 	 * @throws MessagingException if getAllHeaders() fails

   501 	 */

   502 	public String getHeaders() throws MessagingException {

   503 		StringBuilder sb = new StringBuilder();

   504 		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {

   505 			sb.append(eh.nextElement());

   506 			sb.append(CRLF);

   507 		}

   508 		return sb.toString();

   509 	}

   511 	public byte[] getBody() throws IOException, MessagingException {

   512 		saveChanges();

   514 		ArrayList<String> skipHeaders = new ArrayList<String>();

   515 		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {

   516 			Header h = (Header) eh.nextElement();

   517 			skipHeaders.add(h.getName());

   518 		}

   520 		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);

   521 		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));

   522 		return baos.toByteArray();

   523 	}

   525 	/**

   526 	 * Transforms message content to valid XHTML and strips html and body tags.

   527 	 * When receiving message from user through NNTP

   528 	 * this method is used to get text that should be saved into databse.

   529 	 * @return XHTML fragment – content between &lt;body&gt; and &lt;/body&gt; tags.

   530 	 */

   531 	public String getBodyXhtmlFragment() throws StorageBackendException {

   532 		/**

   533 		 * TODO: podporovat i zprávy přímo v HTML a multipart.

   534 		 */

   535 		try {

   536 			Object c = getContent();

   537 			if (isMimeType("text/plain") && c instanceof String) {

   538 				String inputText = (String) c;

   539 				String xhtml;

   541 				if (inputText.startsWith(MARKDOWN_HEADER)) {

   542 					xhtml = readXhtmlTextMarkdown(inputText.substring(MARKDOWN_HEADER.length()));

   543 				} else {

   545 					xhtml = readXhtmlText(

   546 							inputText,

   547 							getSubject(),

   548 							getParentID(),

   549 							null,

   550 							null,

   551 							null);

   552 				}

   553 				return makeFragmentXHTML(xhtml);

   554 			} else {

   555 				throw new StorageBackendException("Only text/plain messages are supported for now – post it as plain text please.");

   556 			}

   557 		} catch (Exception e) {

   558 			throw new StorageBackendException(e);

   559 		}

   560 	}

   562 	public String getBodyPlainText() throws StorageBackendException {

   563 		/**

   564 		 * TODO: netransformovat XHTML 2x

   565 		 */

   566 		return formatedToPlainText(makeSimpleXHTML(getBodyXhtmlFragment()));

   567 	}

   568 }

author	František Kučera <franta-hg@frantovo.cz>
	Sun, 06 Nov 2011 00:08:05 +0100
changeset 116	4ddc1020a154
parent 109	03cc47e9daee
child 118	ba7ea56fd672
permissions	-rw-r--r--