sonews-drupal: src/org/sonews/storage/DrupalMessage.java@03cc47e9daee

     1 /*

     2  *   SONEWS News Server

     3  *   see AUTHORS for the list of contributors

     4  *

     5  *   This program is free software: you can redistribute it and/or modify

     6  *   it under the terms of the GNU General Public License as published by

     7  *   the Free Software Foundation, either version 3 of the License, or

     8  *   (at your option) any later version.

     9  *

    10  *   This program is distributed in the hope that it will be useful,

    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of

    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    13  *   GNU General Public License for more details.

    14  *

    15  *   You should have received a copy of the GNU General Public License

    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.

    17  */

    18 package org.sonews.storage;

    20 import java.io.BufferedReader;

    21 import java.io.ByteArrayInputStream;

    22 import java.io.ByteArrayOutputStream;

    23 import java.io.IOException;

    24 import java.io.InputStream;

    25 import java.io.InputStreamReader;

    26 import java.io.PrintStream;

    27 import java.io.StringReader;

    28 import java.io.StringWriter;

    29 import java.io.UnsupportedEncodingException;

    30 import java.sql.ResultSet;

    31 import java.sql.SQLException;

    32 import java.util.ArrayList;

    33 import java.util.Arrays;

    34 import java.util.Date;

    35 import java.util.Enumeration;

    36 import java.util.logging.Level;

    37 import java.util.logging.Logger;

    38 import java.util.regex.Matcher;

    39 import java.util.regex.Pattern;

    40 import javax.mail.Header;

    41 import javax.mail.MessagingException;

    42 import javax.mail.Multipart;

    43 import javax.mail.Session;

    44 import javax.mail.internet.InternetAddress;

    45 import javax.mail.internet.MimeBodyPart;

    46 import javax.mail.internet.MimeMessage;

    47 import javax.mail.internet.MimeMultipart;

    48 import javax.xml.parsers.DocumentBuilder;

    49 import javax.xml.parsers.DocumentBuilderFactory;

    50 import javax.xml.parsers.ParserConfigurationException;

    51 import javax.xml.transform.Transformer;

    52 import javax.xml.transform.TransformerException;

    53 import javax.xml.transform.TransformerFactory;

    54 import javax.xml.transform.dom.DOMSource;

    55 import javax.xml.transform.stream.StreamResult;

    56 import javax.xml.transform.stream.StreamSource;

    57 import org.sonews.daemon.NNTPConnection;

    58 import org.sonews.util.io.Resource;

    59 import org.w3c.dom.Document;

    60 import org.xml.sax.SAXException;

    62 /**

    63  * This is MimeMessage which enables custom Message-ID header

    64  * (this header will not be overwritten by the default one like in MimeMessage).

    65  *

    66  * Also add header and body separate serialization.

    67  *

    68  * And can be deserialized from SQL ResultSet

    69  *

    70  * @author František Kučera (frantovo.cz)

    71  */

    72 public class DrupalMessage extends MimeMessage {

    74 	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());

    75 	private static final String MESSAGE_ID_HEADER = "Message-ID";

    76 	private static final String CRLF = "\r\n";

    77 	public static final String CHARSET = "UTF-8";

    78 	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;

    79 	private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";

    80 	private String messageID;

    81 	private Long parentID;

    82 	private Long groupID;

    83 	private TransformerFactory transformerFactory;

    84 	private DocumentBuilderFactory documentBuilderFactory;

    86 	/**

    87 	 * Initializes XML factories (Transformer, DocumentBuilder).

    88 	 */

    89 	private void initFactories() {

    90 		transformerFactory = TransformerFactory.newInstance();

    91 		documentBuilderFactory = DocumentBuilderFactory.newInstance();

    92 		/**

    93 		 * Komentáře nás nepotřebujeme

    94 		 * (a museli bychom je brát v úvahu při dělení odstavců:

    95 		 * v současné verzi XSLT odstavcovače by nám případný komentář

    96 		 * rozdělil text na dva odstavce, přestože to má být odstavec jede).

    97 		 */

    98 		documentBuilderFactory.setIgnoringComments(true);

    99 	}

   101 	/**

   102 	 * Constructs MIME message from SQL result.

   103 	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.

   104 	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).

   105 	 */

   106 	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException {

   107 		super(Session.getDefaultInstance(System.getProperties()));

   108 		initFactories();

   110 		groupID = rs.getLong("group_id");

   111 		addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));

   112 		addHeader("Newsgroups", rs.getString("group_name"));

   113 		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));

   114 		setSubject(rs.getString("subject"));

   115 		setSentDate(new Date(rs.getLong("created")));

   117 		parentID = rs.getLong("parent_id");

   118 		if (parentID > 0) {

   119 			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);

   120 			addHeader("In-Reply-To", parentMessageID);

   121 			addHeader("References", parentMessageID);

   122 		}

   124 		if (constructBody) {

   125 			Multipart multipart = new MimeMultipart("alternative");

   126 			setContent(multipart);

   128 			/** XHTML part */

   129 			MimeBodyPart htmlPart = new MimeBodyPart();

   130 			String xhtmlText = readXhtmlText(

   131 					rs.getString("text"),

   132 					rs.getString("subject"),

   133 					rs.getInt("parent_id"),

   134 					rs.getString("urlBase"),

   135 					rs.getString("wwwRead"),

   136 					rs.getString("wwwPost"));

   137 			htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);

   139 			/** Plain text part */

   140 			MimeBodyPart textPart = new MimeBodyPart();

   141 			String plainText = formatedToPlainText(xhtmlText);

   142 			textPart.setText(plainText);

   143 			//addHeader("Lines", String.valueOf(plainText.split("\n").length));

   145 			/**

   146 			 * Thunderbirdu záleží, v jakém pořadí části jsou

   147 			 * (když je prostý text druhý, html se nezobrazí),

   148 			 * KNode zobrazuje HTML správně, i když je na prvním místě.

   149 			 */

   150 			multipart.addBodyPart(textPart);

   151 			multipart.addBodyPart(htmlPart);

   152 		} else {

   153 			/** empty body, just headers */

   154 			setText("");

   155 		}

   156 	}

   158 	/**

   159 	 * Constructs MIME message from article posted by user.

   160 	 * @param article article that came through NNTP.

   161 	 * @throws MessagingException

   162 	 */

   163 	public DrupalMessage(Article article) throws MessagingException {

   164 		super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));

   165 		initFactories();

   167 		String[] replyToHeaders = getHeader("In-Reply-To");

   168 		String[] referencesHeaders = getHeader("References");

   169 		String parentMessageID;

   170 		if (replyToHeaders != null && replyToHeaders.length == 1) {

   171 			parentMessageID = replyToHeaders[0];

   172 		} else if (referencesHeaders != null && referencesHeaders.length == 1) {

   173 			Pattern p = Pattern.compile("(\\s*<.*>)*\\s*(<.*>)");

   174 			Matcher m = p.matcher(referencesHeaders[0]);

   176 			if (m.matches()) {

   177 				parentMessageID = m.group(2);

   178 			} else {

   179 				throw new MessagingException("Message posted by user had invalid References header: " + referencesHeaders[0]);

   180 			}

   181 		} else {

   182 			throw new MessagingException("Message posted by user must have exactly one In-Reply-To header. Reply-To headers: " + Arrays.toString(replyToHeaders) + " Referemces headers: " + Arrays.toString(referencesHeaders));

   183 		}

   185 		parentID = parseArticleID(parentMessageID);

   186 		groupID = parseGroupID(parentMessageID);

   187 	}

   189 	private static InputStream serializeArticle(Article a) {

   190 		byte articleHeaders[] = a.getHeaderSource().getBytes();

   191 		byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();

   192 		byte body[] = a.getBody();

   194 		byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];

   196 		System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);

   197 		System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);

   198 		System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);

   200 		return new ByteArrayInputStream(message);

   201 	}

   203 	/**

   204 	 * @param xhtmlText well-formed XHTML

   205 	 * @return plain text representation of this formated text

   206 	 */

   207 	private String formatedToPlainText(String xhtmlText) {

   208 		try {

   209 			Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));

   211 			StringReader input = new StringReader(xhtmlText);

   212 			StringWriter output = new StringWriter(xhtmlText.length());

   213 			textTransformer.transform(new StreamSource(input), new StreamResult(output));

   215 			return output.toString();

   216 		} catch (Exception e) {

   217 			/**

   218 			 * TODO: lepší ošetření chyby

   219 			 */

   220 			log.log(Level.WARNING, "Error while transforming article to plain text", e);

   221 			return "Při transformaci příspěvku bohužel došlo k chybě.";

   222 		}

   223 	}

   225 	private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException {

   226 		DocumentBuilder db = documentBuilderFactory.newDocumentBuilder();

   227 		Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));

   228 		return new DOMSource(d);

   229 	}

   231 	private String readXhtmlText(String sourceText, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException {

   232 		/**

   233 		 * TODO:

   234 		 *		- znovupoužívat XSL transformér (nejen v instanci)

   235 		 *		- používat cache, ukládat si vygenerované články

   236 		 */

   237 		String wrappedText = makeSimpleXHTML(sourceText);

   239 		Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));

   240 		String paragraphedText;

   241 		boolean tidyWasUsed = false;

   242 		try {

   243 			StringWriter output = new StringWriter(2 * wrappedText.length());

   244 			paragraphTransformer.transform(readDOM(wrappedText), new StreamResult(output));

   245 			paragraphedText = output.toString();

   246 		} catch (Exception e) {

   247 			log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);

   248 			StringWriter output = new StringWriter(2 * wrappedText.length());

   249 			paragraphTransformer.transform(readDOM(tidyXhtml(wrappedText)), new StreamResult(output));

   250 			paragraphedText = output.toString();

   251 			tidyWasUsed = true;

   252 		}

   254 		Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));

   255 		xhtmlTransformer.setParameter("isRoot", (parentId == 0));

   256 		xhtmlTransformer.setParameter("title", subject);

   257 		xhtmlTransformer.setParameter("urlBase", urlBase);

   258 		xhtmlTransformer.setParameter("wwwRead", wwwRead);

   259 		xhtmlTransformer.setParameter("wwwPost", wwwPost);

   260 		xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));

   261 		StringReader paragraphedReader = new StringReader(paragraphedText);

   262 		StringWriter xhtmlWriter = new StringWriter(2 * paragraphedText.length());

   263 		xhtmlTransformer.transform(new StreamSource(paragraphedReader), new StreamResult(xhtmlWriter));

   265 		return xhtmlWriter.toString();

   266 	}

   268 	/**

   269 	 * Does not parse XML works just with text.

   270 	 * @param body XHTML fragment that should be put between &lt;body&gt; and &lt;/body&gt;

   271 	 * @return simple XHTML document (body wrapped in html and body tags)

   272 	 */

   273 	private static String makeSimpleXHTML(String body) {

   274 		return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";

   275 	}

   277 	/**

   278 	 * Does not parse XML works just with text.

   279 	 * @param xhtml whole XHTML page

   280 	 * @return content between &lt;body&gt; and &lt;/body&gt; tags.

   281 	 */

   282 	private static String makeFragmentXHTML(String xhtml) {

   283 		final String startTag = "<body>";

   284 		final String endTag = "</body>";

   286 		int start = xhtml.indexOf(startTag) + startTag.length();

   287 		int end = xhtml.lastIndexOf(endTag);

   289 		return xhtml.substring(start, end);

   290 	}

   292 	/**

   293 	 * TODO: refaktorovat, přesunout

   294 	 */

   295 	private static String tidyXhtml(String inputText) throws IOException {

   296 		/*

   297 		 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966

   298 		 *

   299 		 * TODO:

   300 		 *		- použít delší zástupný řetězec, ne jen jeden znak

   301 		 *		- umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)

   302 		 */

   303 		inputText = označKonceŘádků(inputText);

   305 		Runtime r = Runtime.getRuntime();

   306 		Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net

   307 					"-asxml", // well formed XHTML

   308 					"-numeric", // číselné entity

   309 					"-utf8", // kódování

   310 					"--show-warnings", "false", // žádná varování nás nezajímají

   311 					"--show-errors", "0", // ani chyby

   312 					"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)

   313 					"--logical-emphasis", "true", // em a strong místo i a b

   314 					"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech

   315 					"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah

   316 				});

   318 		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());

   319 		vstupProcesu.print(inputText);

   320 		vstupProcesu.close();

   322 		String outputText = streamToString(p.getInputStream());

   324 		outputText = vraťKonceŘádků(outputText);

   326 		return outputText;

   327 	}

   329 	private static String označKonceŘádků(String text) {

   330 		text = text.replaceAll(">\\s+<", "> <");

   331 		text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");

   332 		return text;

   333 	}

   335 	private static String vraťKonceŘádků(String text) {

   336 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");

   337 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");

   338 		return text;

   339 	}

   341 	/**

   342 	 * TODO: refaktorovat, přesunout

   343 	 */

   344 	private static String streamToString(InputStream proud) throws IOException {

   345 		StringBuilder výsledek = new StringBuilder();

   346 		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));

   347 		while (true) {

   348 			String radek = buf.readLine();

   349 			if (radek == null) {

   350 				break;

   351 			} else {

   352 				výsledek.append(radek);

   353 				výsledek.append("\n");

   354 			}

   355 		}

   356 		return výsledek.toString();

   357 	}

   359 	public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {

   360 		StringBuilder sb = new StringBuilder();

   361 		sb.append("<");

   362 		sb.append(articleID);

   363 		sb.append("-");

   364 		sb.append(groupID);

   365 		sb.append("-");

   366 		sb.append(groupName);

   367 		sb.append("@");

   368 		sb.append(domainName);

   369 		sb.append(">");

   370 		return sb.toString();

   371 	}

   373 	/**

   374 	 * @return article ID of parent of this message | or null, if this is root article and not reply to another one

   375 	 */

   376 	public Long getParentID() {

   377 		return parentID;

   378 	}

   380 	/**

   381 	 * @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group

   382 	 */

   383 	public Long getGroupID() {

   384 		return groupID;

   385 	}

   387 	/**

   388 	 *

   389 	 * @param messageID &lt;{0}-{1}-{2}@domain.tld&gt; where {0} is nntp_id and {1} is group_id and {2} is group_name

   390 	 * @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid

   391 	 */

   392 	private static String[] parseMessageID(String messageID) {

   393 		if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {

   394 			return messageID.substring(1).split("@")[0].split("\\-");

   395 		} else {

   396 			return null;

   397 		}

   398 	}

   400 	public static Long parseArticleID(String messageID) {

   401 		String[] localPart = parseMessageID(messageID);

   402 		if (localPart == null) {

   403 			return null;

   404 		} else {

   405 			return Long.parseLong(localPart[0]);

   406 		}

   407 	}

   409 	public static Long parseGroupID(String messageID) {

   410 		String[] localPart = parseMessageID(messageID);

   411 		if (localPart == null) {

   412 			return null;

   413 		} else {

   414 			return Long.parseLong(localPart[1]);

   415 			// If needed:

   416 			// parseGroupName() will be same as this method, just with:

   417 			// return localPart[2];

   418 		}

   419 	}

   421 	@Override

   422 	public void setHeader(String name, String value) throws MessagingException {

   423 		super.setHeader(name, value);

   425 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   426 			messageID = value;

   427 		}

   428 	}

   430 	@Override

   431 	public final void addHeader(String name, String value) throws MessagingException {

   432 		super.addHeader(name, value);

   434 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   435 			messageID = value;

   436 		}

   437 	}

   439 	@Override

   440 	public void removeHeader(String name) throws MessagingException {

   441 		super.removeHeader(name);

   443 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   444 			messageID = null;

   445 		}

   446 	}

   448 	public void setMessageID(String messageID) {

   449 		this.messageID = messageID;

   450 	}

   452 	@Override

   453 	protected void updateMessageID() throws MessagingException {

   454 		if (messageID == null) {

   455 			super.updateMessageID();

   456 		} else {

   457 			setHeader(MESSAGE_ID_HEADER, messageID);

   458 		}

   459 	}

   461 	/**

   462 	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:

   463 	 *

   464 	 * <pre>MIME-Version: 1.0

   465 	 *Content-Type: multipart/alternative;</pre>

   466 	 *

   467 	 * @return serialized headers

   468 	 * @throws MessagingException if getAllHeaders() fails

   469 	 */

   470 	public String getHeaders() throws MessagingException {

   471 		StringBuilder sb = new StringBuilder();

   472 		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {

   473 			sb.append(eh.nextElement());

   474 			sb.append(CRLF);

   475 		}

   476 		return sb.toString();

   477 	}

   479 	public byte[] getBody() throws IOException, MessagingException {

   480 		saveChanges();

   482 		ArrayList<String> skipHeaders = new ArrayList<String>();

   483 		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {

   484 			Header h = (Header) eh.nextElement();

   485 			skipHeaders.add(h.getName());

   486 		}

   488 		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);

   489 		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));

   490 		return baos.toByteArray();

   491 	}

   493 	/**

   494 	 * Transforms message content to valid XHTML and strips html and body tags.

   495 	 * When receiving message from user through NNTP

   496 	 * this method is used to get text that should be saved into databse.

   497 	 * @return XHTML fragment – content between &lt;body&gt; and &lt;/body&gt; tags.

   498 	 */

   499 	public String getBodyXhtmlFragment() throws StorageBackendException {

   500 		/**

   501 		 * TODO: podporovat i zprávy přímo v HTML a multipart.

   502 		 */

   503 		try {

   504 			Object c = getContent();

   505 			if (isMimeType("text/plain") && c instanceof String) {

   506 				String xhtml = readXhtmlText(

   507 						(String) c,

   508 						getSubject(),

   509 						getParentID(),

   510 						null,

   511 						null,

   512 						null);

   513 				return makeFragmentXHTML(xhtml);

   514 			} else {

   515 				throw new StorageBackendException("Only text/plain messages are supported for now – post it as plain text please.");

   516 			}

   517 		} catch (Exception e) {

   518 			throw new StorageBackendException(e);

   519 		}

   520 	}

   522 	public String getBodyPlainText() throws StorageBackendException {

   523 		/**

   524 		 * TODO: netransformovat XHTML 2x

   525 		 */

   526 		return formatedToPlainText(makeSimpleXHTML(getBodyXhtmlFragment()));

   527 	}

   528 }

author	František Kučera <franta-hg@frantovo.cz>
	Tue, 25 Oct 2011 14:10:55 +0200
changeset 109	03cc47e9daee
parent 106	dc04a3c2c557
child 116	4ddc1020a154
permissions	-rw-r--r--