1.1 --- a/org/sonews/storage/Article.java Wed May 12 11:18:02 2010 +0200
1.2 +++ b/org/sonews/storage/Article.java Sun Aug 29 17:03:21 2010 +0200
1.3 @@ -21,8 +21,6 @@
1.4 import java.io.ByteArrayInputStream;
1.5 import java.io.ByteArrayOutputStream;
1.6 import java.io.IOException;
1.7 -import java.io.InputStream;
1.8 -import java.nio.charset.Charset;
1.9 import java.security.MessageDigest;
1.10 import java.security.NoSuchAlgorithmException;
1.11 import java.util.UUID;
1.12 @@ -32,10 +30,8 @@
1.13 import javax.mail.Header;
1.14 import javax.mail.Message;
1.15 import javax.mail.MessagingException;
1.16 -import javax.mail.Multipart;
1.17 import javax.mail.internet.InternetHeaders;
1.18 import org.sonews.config.Config;
1.19 -import org.sonews.util.Log;
1.20
1.21 /**
1.22 * Represents a newsgroup article.
1.23 @@ -97,7 +93,7 @@
1.24
1.25 /**
1.26 * Creates an Article instance using the data from the javax.mail.Message
1.27 - * object.
1.28 + * object. This constructor is called by the Mailinglist gateway.
1.29 * @see javax.mail.Message
1.30 * @param msg
1.31 * @throws IOException
1.32 @@ -113,61 +109,25 @@
1.33 final Header header = (Header)e.nextElement();
1.34 this.headers.addHeader(header.getName(), header.getValue());
1.35 }
1.36 -
1.37 - // The "content" of the message can be a String if it's a simple text/plain
1.38 - // message, a Multipart object or an InputStream if the content is unknown.
1.39 - final Object content = msg.getContent();
1.40 - if(content instanceof String)
1.41 - {
1.42 - this.body = ((String)content).getBytes(getBodyCharset());
1.43 - }
1.44 - else if(content instanceof Multipart) // probably subclass MimeMultipart
1.45 - {
1.46 - // We're are not interested in the different parts of the MultipartMessage,
1.47 - // so we simply read in all data which *can* be huge.
1.48 - InputStream in = msg.getInputStream();
1.49 - this.body = readContent(in);
1.50 - }
1.51 - else if(content instanceof InputStream)
1.52 - {
1.53 - // The message format is unknown to the Message class, but we can
1.54 - // simply read in the whole message data.
1.55 - this.body = readContent((InputStream)content);
1.56 - }
1.57 - else
1.58 - {
1.59 - // Unknown content is probably a malformed mail we should skip.
1.60 - // On the other hand we produce an inconsistent mail mirror, but no
1.61 - // mail system must transport invalid content.
1.62 - Log.get().severe("Skipping message due to unknown content. Throwing exception...");
1.63 - MessagingException ex = new MessagingException("Unknown content: " + content);
1.64 - Log.get().throwing("Article.java", "<init>", ex);
1.65 - throw ex;
1.66 - }
1.67 +
1.68 + // Reads the raw byte body using Message.writeTo(OutputStream out)
1.69 + this.body = readContent(msg);
1.70
1.71 // Validate headers
1.72 validateHeaders();
1.73 }
1.74
1.75 /**
1.76 - * Reads from the given InputString into a byte array.
1.77 - * TODO: Move this generalized method to org.sonews.util.io.Resource.
1.78 + * Reads from the given Message into a byte array.
1.79 * @param in
1.80 * @return
1.81 * @throws IOException
1.82 */
1.83 - private byte[] readContent(InputStream in)
1.84 - throws IOException
1.85 + private byte[] readContent(Message in)
1.86 + throws IOException, MessagingException
1.87 {
1.88 ByteArrayOutputStream out = new ByteArrayOutputStream();
1.89 -
1.90 - int b = in.read();
1.91 - while(b >= 0)
1.92 - {
1.93 - out.write(b);
1.94 - b = in.read();
1.95 - }
1.96 -
1.97 + in.writeTo(out);
1.98 return out.toByteArray();
1.99 }
1.100
1.101 @@ -226,51 +186,6 @@
1.102 {
1.103 return body;
1.104 }
1.105 -
1.106 - /**
1.107 - * @return Charset of the body text
1.108 - */
1.109 - private Charset getBodyCharset()
1.110 - {
1.111 - // We espect something like
1.112 - // Content-Type: text/plain; charset=ISO-8859-15
1.113 - String contentType = getHeader(Headers.CONTENT_TYPE)[0];
1.114 - int idxCharsetStart = contentType.indexOf("charset=") + "charset=".length();
1.115 - int idxCharsetEnd = contentType.indexOf(";", idxCharsetStart);
1.116 -
1.117 - String charsetName = "UTF-8";
1.118 - if(idxCharsetStart >= 0 && idxCharsetStart < contentType.length())
1.119 - {
1.120 - if(idxCharsetEnd < 0)
1.121 - {
1.122 - charsetName = contentType.substring(idxCharsetStart);
1.123 - }
1.124 - else
1.125 - {
1.126 - charsetName = contentType.substring(idxCharsetStart, idxCharsetEnd);
1.127 - }
1.128 - }
1.129 -
1.130 - // Sometimes there are '"' around the name
1.131 - if(charsetName.length() > 2 &&
1.132 - charsetName.charAt(0) == '"' && charsetName.endsWith("\""))
1.133 - {
1.134 - charsetName = charsetName.substring(1, charsetName.length() - 2);
1.135 - }
1.136 -
1.137 - // Create charset
1.138 - Charset charset = Charset.forName("UTF-8"); // This MUST be supported by JVM
1.139 - try
1.140 - {
1.141 - charset = Charset.forName(charsetName);
1.142 - }
1.143 - catch(Exception ex)
1.144 - {
1.145 - Log.get().severe(ex.getMessage());
1.146 - Log.get().severe("Article.getBodyCharset(): Unknown charset: " + charsetName);
1.147 - }
1.148 - return charset;
1.149 - }
1.150
1.151 /**
1.152 * @return Numerical IDs of the newsgroups this Article belongs to.