sonews-drupal: src/org/sonews/storage/DrupalMessage.java@a788bf0e1080

     1 /*

     2  *   SONEWS News Server

     3  *   see AUTHORS for the list of contributors

     4  *

     5  *   This program is free software: you can redistribute it and/or modify

     6  *   it under the terms of the GNU General Public License as published by

     7  *   the Free Software Foundation, either version 3 of the License, or

     8  *   (at your option) any later version.

     9  *

    10  *   This program is distributed in the hope that it will be useful,

    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of

    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    13  *   GNU General Public License for more details.

    14  *

    15  *   You should have received a copy of the GNU General Public License

    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.

    17  */

    18 package org.sonews.storage;

    20 import java.io.BufferedReader;

    21 import java.io.ByteArrayInputStream;

    22 import java.io.ByteArrayOutputStream;

    23 import java.io.IOException;

    24 import java.io.InputStream;

    25 import java.io.InputStreamReader;

    26 import java.io.PrintStream;

    27 import java.io.StringReader;

    28 import java.io.StringWriter;

    29 import java.io.UnsupportedEncodingException;

    30 import java.sql.ResultSet;

    31 import java.sql.SQLException;

    32 import java.util.ArrayList;

    33 import java.util.Date;

    34 import java.util.Enumeration;

    35 import java.util.logging.Level;

    36 import java.util.logging.Logger;

    37 import javax.mail.Header;

    38 import javax.mail.MessagingException;

    39 import javax.mail.Multipart;

    40 import javax.mail.Session;

    41 import javax.mail.internet.InternetAddress;

    42 import javax.mail.internet.MimeBodyPart;

    43 import javax.mail.internet.MimeMessage;

    44 import javax.mail.internet.MimeMultipart;

    45 import javax.xml.transform.Transformer;

    46 import javax.xml.transform.TransformerException;

    47 import javax.xml.transform.TransformerFactory;

    48 import javax.xml.transform.stream.StreamResult;

    49 import javax.xml.transform.stream.StreamSource;

    50 import org.sonews.daemon.NNTPConnection;

    51 import org.sonews.util.io.Resource;

    53 /**

    54  * This is MimeMessage which enables custom Message-ID header

    55  * (this header will not be overwritten by the default one like in MimeMessage).

    56  *

    57  * Also add header and body separate serialization.

    58  *

    59  * And can be deserialized from SQL ResultSet

    60  *

    61  * @author František Kučera (frantovo.cz)

    62  */

    63 public class DrupalMessage extends MimeMessage {

    65 	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());

    66 	private static final String MESSAGE_ID_HEADER = "Message-ID";

    67 	private static final String CRLF = "\r\n";

    68 	public static final String CHARSET = "UTF-8";

    69 	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;

    70 	private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";

    71 	private String messageID;

    72 	private Long parentID;

    73 	private Long groupID;

    75 	/**

    76 	 * Constructs MIME message from SQL result.

    77 	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.

    78 	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).

    79 	 */

    80 	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException {

    81 		super(Session.getDefaultInstance(System.getProperties()));

    83 		groupID = rs.getLong("group_id");

    84 		addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));

    85 		addHeader("Newsgroups", rs.getString("group_name"));

    86 		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));

    87 		setSubject(rs.getString("subject"));

    88 		setSentDate(new Date(rs.getLong("created")));

    90 		parentID = rs.getLong("parent_id");

    91 		if (parentID > 0) {

    92 			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);

    93 			addHeader("In-Reply-To", parentMessageID);

    94 			addHeader("References", parentMessageID);

    95 		}

    97 		if (constructBody) {

    98 			Multipart multipart = new MimeMultipart("alternative");

    99 			setContent(multipart);

   101 			/** XHTML part */

   102 			MimeBodyPart htmlPart = new MimeBodyPart();

   103 			String xhtmlText = readXhtmlText(

   104 					rs.getString("text"),

   105 					rs.getString("subject"),

   106 					rs.getInt("parent_id"),

   107 					rs.getString("urlBase"),

   108 					rs.getString("wwwRead"),

   109 					rs.getString("wwwPost"));

   110 			htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);

   112 			/** Plain text part */

   113 			MimeBodyPart textPart = new MimeBodyPart();

   114 			String plainText = readPlainText(rs, xhtmlText);

   115 			textPart.setText(plainText);

   116 			//addHeader("Lines", String.valueOf(plainText.split("\n").length));

   118 			/**

   119 			 * Thunderbirdu záleží, v jakém pořadí části jsou

   120 			 * (když je prostý text druhý, html se nezobrazí),

   121 			 * KNode zobrazuje HTML správně, i když je na prvním místě.

   122 			 */

   123 			multipart.addBodyPart(textPart);

   124 			multipart.addBodyPart(htmlPart);

   125 		} else {

   126 			/** empty body, just headers */

   127 			setText("");

   128 		}

   129 	}

   131 	/**

   132 	 * Constructs MIME message from article posted by user.

   133 	 * @param article article that came through NNTP.

   134 	 * @throws MessagingException

   135 	 */

   136 	public DrupalMessage(Article article) throws MessagingException {

   137 		super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));

   139 		String[] parentHeaders = getHeader("In-Reply-To");

   140 		if (parentHeaders.length == 1) {

   141 			String parentMessageID = parentHeaders[0];

   142 			parentID = parseArticleID(parentMessageID);

   143 			groupID = parseGroupID(parentMessageID);

   144 		} else {

   145 			throw new MessagingException("Message posted by user must have exactly one In-Reply-To header.");

   146 		}

   147 	}

   149 	private static InputStream serializeArticle(Article a) {

   150 		byte articleHeaders[] = a.getHeaderSource().getBytes();

   151 		byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();

   152 		byte body[] = a.getBody();

   154 		byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];

   156 		System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);

   157 		System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);

   158 		System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);

   160 		return new ByteArrayInputStream(message);

   161 	}

   163 	private String readPlainText(ResultSet rs, String xhtmlText) {

   164 		try {

   165 			TransformerFactory tf = TransformerFactory.newInstance();

   166 			Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));

   168 			StringReader input = new StringReader(xhtmlText);

   169 			StringWriter output = new StringWriter(xhtmlText.length());

   170 			textTransformer.transform(new StreamSource(input), new StreamResult(output));

   172 			return output.toString();

   173 		} catch (Exception e) {

   174 			/**

   175 			 * TODO: lepší ošetření chyby

   176 			 */

   177 			log.log(Level.WARNING, "Error while transforming article to plain text", e);

   178 			return makeSimpleXHTML("Při transformaci příspěvku bohužel došlo k chybě.");

   179 		}

   180 	}

   182 	private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException {

   183 		/**

   184 		 * TODO:

   185 		 *		- znovupoužívat XSL transformér

   186 		 *		- používat cache, ukládat si vygenerované články

   187 		 */

   188 		String inputText = makeSimpleXHTML(text);

   190 		TransformerFactory tf = TransformerFactory.newInstance();

   191 		Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));

   193 		String paragraphedText;

   194 		boolean tidyWasUsed = false;

   195 		try {

   196 			StringReader input = new StringReader(inputText);

   197 			StringWriter output = new StringWriter(2 * inputText.length());

   198 			paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));

   199 			paragraphedText = output.toString();

   200 		} catch (Exception e) {

   201 			log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);

   202 			StringReader input = new StringReader(tidyXhtml(inputText));

   203 			StringWriter output = new StringWriter(2 * inputText.length());

   204 			paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));

   205 			paragraphedText = output.toString();

   206 			tidyWasUsed = true;

   207 		}

   209 		Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));

   210 		xhtmlTransformer.setParameter("isRoot", (parentId == 0));

   211 		xhtmlTransformer.setParameter("title", subject);

   212 		xhtmlTransformer.setParameter("urlBase", urlBase);

   213 		xhtmlTransformer.setParameter("wwwRead", wwwRead);

   214 		xhtmlTransformer.setParameter("wwwPost", wwwPost);

   215 		xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));

   216 		StringReader input = new StringReader(paragraphedText);

   217 		StringWriter output = new StringWriter(2 * paragraphedText.length());

   218 		xhtmlTransformer.transform(new StreamSource(input), new StreamResult(output));

   220 		return output.toString();

   221 	}

   223 	/**

   224 	 * Does not parse XML works just with text.

   225 	 * @param body XHTML fragment that should be put between &lt;body&gt; and &lt;/body&gt;

   226 	 * @return simple XHTML document (body wrapped in html and body tags)

   227 	 */

   228 	private static String makeSimpleXHTML(String body) {

   229 		return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";

   230 	}

   232 	/**

   233 	 * Does not parse XML works just with text.

   234 	 * @param xhtml whole XHTML page

   235 	 * @return content between &lt;body&gt; and &lt;/body&gt; tags.

   236 	 */

   237 	private static String makeFragmentXHTML(String xhtml) {

   238 		final String startTag = "<body>";

   239 		final String endTag = "</body>";

   241 		int start = xhtml.indexOf(startTag) + startTag.length();

   242 		int end = xhtml.lastIndexOf(endTag);

   244 		return xhtml.substring(start, end);

   245 	}

   247 	/**

   248 	 * TODO: refaktorovat, přesunout

   249 	 */

   250 	private static String tidyXhtml(String inputText) throws IOException {

   251 		/*

   252 		 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966

   253 		 *

   254 		 * TODO:

   255 		 *		- použít delší zástupný řetězec, ne jen jeden znak

   256 		 *		- umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)

   257 		 */

   258 		inputText = označKonceŘádků(inputText);

   260 		Runtime r = Runtime.getRuntime();

   261 		Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net

   262 					"-asxml", // well formed XHTML

   263 					"-numeric", // číselné entity

   264 					"-utf8", // kódování

   265 					"--show-warnings", "false", // žádná varování nás nezajímají

   266 					"--show-errors", "0", // ani chyby

   267 					"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)

   268 					"--logical-emphasis", "true", // em a strong místo i a b

   269 					"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech

   270 					"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah

   271 				});

   273 		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());

   274 		vstupProcesu.print(inputText);

   275 		vstupProcesu.close();

   277 		String outputText = streamToString(p.getInputStream());

   279 		outputText = vraťKonceŘádků(outputText);

   281 		return outputText;

   282 	}

   284 	private static String označKonceŘádků(String text) {

   285 		text = text.replaceAll(">\\s+<", "> <");

   286 		text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");

   287 		return text;

   288 	}

   290 	private static String vraťKonceŘádků(String text) {

   291 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");

   292 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");

   293 		return text;

   294 	}

   296 	/**

   297 	 * TODO: refaktorovat, přesunout

   298 	 */

   299 	private static String streamToString(InputStream proud) throws IOException {

   300 		StringBuilder výsledek = new StringBuilder();

   301 		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));

   302 		while (true) {

   303 			String radek = buf.readLine();

   304 			if (radek == null) {

   305 				break;

   306 			} else {

   307 				výsledek.append(radek);

   308 				výsledek.append("\n");

   309 			}

   310 		}

   311 		return výsledek.toString();

   312 	}

   314 	public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {

   315 		StringBuilder sb = new StringBuilder();

   316 		sb.append("<");

   317 		sb.append(articleID);

   318 		sb.append("-");

   319 		sb.append(groupID);

   320 		sb.append("-");

   321 		sb.append(groupName);

   322 		sb.append("@");

   323 		sb.append(domainName);

   324 		sb.append(">");

   325 		return sb.toString();

   326 	}

   328 	/**

   329 	 * @return article ID of parent of this message | or null, if this is root article and not reply to another one

   330 	 */

   331 	public Long getParentID() {

   332 		return parentID;

   333 	}

   335 	/**

   336 	 * @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group

   337 	 */

   338 	public Long getGroupID() {

   339 		return groupID;

   340 	}

   342 	/**

   343 	 *

   344 	 * @param messageID &lt;{0}-{1}-{2}@domain.tld&gt; where {0} is nntp_id and {1} is group_id and {2} is group_name

   345 	 * @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid

   346 	 */

   347 	private static String[] parseMessageID(String messageID) {

   348 		if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {

   349 			return messageID.substring(1).split("@")[0].split("\\-");

   350 		} else {

   351 			return null;

   352 		}

   353 	}

   355 	public static Long parseArticleID(String messageID) {

   356 		String[] localPart = parseMessageID(messageID);

   357 		if (localPart == null) {

   358 			return null;

   359 		} else {

   360 			return Long.parseLong(localPart[0]);

   361 		}

   362 	}

   364 	public static Long parseGroupID(String messageID) {

   365 		String[] localPart = parseMessageID(messageID);

   366 		if (localPart == null) {

   367 			return null;

   368 		} else {

   369 			return Long.parseLong(localPart[1]);

   370 			// If needed:

   371 			// parseGroupName() will be same as this method, just with:

   372 			// return localPart[2];

   373 		}

   374 	}

   376 	@Override

   377 	public void setHeader(String name, String value) throws MessagingException {

   378 		super.setHeader(name, value);

   380 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   381 			messageID = value;

   382 		}

   383 	}

   385 	@Override

   386 	public final void addHeader(String name, String value) throws MessagingException {

   387 		super.addHeader(name, value);

   389 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   390 			messageID = value;

   391 		}

   392 	}

   394 	@Override

   395 	public void removeHeader(String name) throws MessagingException {

   396 		super.removeHeader(name);

   398 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {

   399 			messageID = null;

   400 		}

   401 	}

   403 	public void setMessageID(String messageID) {

   404 		this.messageID = messageID;

   405 	}

   407 	@Override

   408 	protected void updateMessageID() throws MessagingException {

   409 		if (messageID == null) {

   410 			super.updateMessageID();

   411 		} else {

   412 			setHeader(MESSAGE_ID_HEADER, messageID);

   413 		}

   414 	}

   416 	/**

   417 	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:

   418 	 *

   419 	 * <pre>MIME-Version: 1.0

   420 	 *Content-Type: multipart/alternative;</pre>

   421 	 *

   422 	 * @return serialized headers

   423 	 * @throws MessagingException if getAllHeaders() fails

   424 	 */

   425 	public String getHeaders() throws MessagingException {

   426 		StringBuilder sb = new StringBuilder();

   427 		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {

   428 			sb.append(eh.nextElement());

   429 			sb.append(CRLF);

   430 		}

   431 		return sb.toString();

   432 	}

   434 	public byte[] getBody() throws IOException, MessagingException {

   435 		saveChanges();

   437 		ArrayList<String> skipHeaders = new ArrayList<String>();

   438 		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {

   439 			Header h = (Header) eh.nextElement();

   440 			skipHeaders.add(h.getName());

   441 		}

   443 		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);

   444 		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));

   445 		return baos.toByteArray();

   446 	}

   448 	/**

   449 	 * Transforms message content to valid XHTML and strips html and body tags.

   450 	 * When receiving message from user through NNTP

   451 	 * this method is used to get text that should be saved into databse.

   452 	 * @return XHTML fragment – content between &lt;body&gt; and &lt;/body&gt; tags.

   453 	 */

   454 	public String getBodyXhtmlFragment() throws StorageBackendException {

   455 		/**

   456 		 * TODO: podporovat i zprávy přímo v HTML a multipart.

   457 		 */

   458 		try {

   459 			Object c = getContent();

   460 			if (isMimeType("text/plain") && c instanceof String) {

   461 				String xhtml = readXhtmlText(

   462 						(String) c,

   463 						getSubject(),

   464 						getParentID(),

   465 						null,

   466 						null,

   467 						null);

   468 				return makeFragmentXHTML(xhtml);

   469 			} else {

   470 				throw new StorageBackendException("Only text/plain messages are supported for now – post it as plain text please.");

   471 			}

   472 		} catch (Exception e) {

   473 			throw new StorageBackendException(e);

   474 		}

   475 	}

   476 }

author	František Kučera <franta-hg@frantovo.cz>
	Thu, 20 Oct 2011 10:50:58 +0200
changeset 103	a788bf0e1080
parent 102	d843b4fee5dc
child 104	b4c8a2760d6f
permissions	-rw-r--r--