Skip to content

Commit

Permalink
Use raw message when manually parsing messages from topic storage (ap…
Browse files Browse the repository at this point in the history
…ache#3146)

* Use raw message when manually parsing messages from topic storage

* Added missing headers

* Fixed copy to byte[]
  • Loading branch information
merlimat authored Dec 10, 2018
1 parent a60fb3f commit bddfa2a
Show file tree
Hide file tree
Showing 12 changed files with 447 additions and 114 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import com.google.common.collect.Sets;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;

import java.util.List;
import java.util.concurrent.TimeUnit;
Expand All @@ -32,12 +32,10 @@
import org.apache.bookkeeper.mledger.impl.PositionImpl;
import org.apache.pulsar.broker.auth.MockedPulsarServiceBaseTest;
import org.apache.pulsar.broker.service.persistent.PersistentTopic;
import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.api.Producer;
import org.apache.pulsar.client.api.RawMessage;
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.common.api.Commands;
import org.apache.pulsar.common.api.proto.PulsarApi.MessageMetadata;
import org.apache.pulsar.common.api.raw.MessageParser;
import org.apache.pulsar.common.api.raw.RawMessage;
import org.apache.pulsar.common.naming.TopicName;
import org.apache.pulsar.common.policies.data.ClusterData;
import org.apache.pulsar.common.policies.data.TenantInfo;
Expand Down Expand Up @@ -66,12 +64,6 @@ public void cleanup() throws Exception {
super.internalCleanup();
}

public static String extractKey(RawMessage m) throws Exception {
ByteBuf headersAndPayload = m.getHeadersAndPayload();
MessageMetadata msgMetadata = Commands.parseMessageMetadata(headersAndPayload);
return msgMetadata.getPartitionKey();
}

@Test
public void testWithoutBatches() throws Exception {
String topic = "persistent://my-tenant/my-ns/my-topic";
Expand All @@ -93,11 +85,11 @@ public void testWithoutBatches() throws Exception {
for (int i = 0; i < n; i++) {
Entry entry = cursor.readEntriesOrWait(1).get(0);

List<Message<?>> messages = Lists.newArrayList();
List<RawMessage> messages = Lists.newArrayList();

try {
MessageParser.parseMessage(topicName, entry.getLedgerId(), entry.getEntryId(), entry.getDataBuffer(),
(messageId, message, payload) -> {
(message) -> {
messages.add(message);
});
} finally {
Expand All @@ -106,7 +98,9 @@ public void testWithoutBatches() throws Exception {

assertEquals(messages.size(), 1);

assertEquals(messages.get(0).getData(), ("hello-" + i).getBytes());
assertEquals(messages.get(0).getData(), Unpooled.wrappedBuffer(("hello-" + i).getBytes()));

messages.forEach(RawMessage::release);
}
}

Expand All @@ -133,11 +127,11 @@ public void testWithBatches() throws Exception {
assertEquals(cursor.getNumberOfEntriesInBacklog(), 1);
Entry entry = cursor.readEntriesOrWait(1).get(0);

List<Message<?>> messages = Lists.newArrayList();
List<RawMessage> messages = Lists.newArrayList();

try {
MessageParser.parseMessage(topicName, entry.getLedgerId(), entry.getEntryId(), entry.getDataBuffer(),
(messageId, message, payload) -> {
(message) -> {
messages.add(message);
});
} finally {
Expand All @@ -147,9 +141,11 @@ public void testWithBatches() throws Exception {
assertEquals(messages.size(), 10);

for (int i = 0; i < n; i++) {
assertEquals(messages.get(i).getData(), ("hello-" + i).getBytes());
assertEquals(messages.get(i).getData(), Unpooled.wrappedBuffer(("hello-" + i).getBytes()));
}

messages.forEach(RawMessage::release);

producer.close();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.client.impl;
package org.apache.pulsar.common.api.raw;

import static com.scurrilous.circe.checksum.Crc32cIntChecksum.computeChecksum;
import static org.apache.pulsar.common.api.Commands.hasChecksum;
Expand All @@ -25,17 +25,13 @@
import io.netty.buffer.ByteBuf;

import java.io.IOException;
import java.util.Optional;

import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;

import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.api.MessageId;
import org.apache.pulsar.common.api.Commands;
import org.apache.pulsar.common.api.PulsarDecoder;
import org.apache.pulsar.common.api.proto.PulsarApi;
import org.apache.pulsar.common.api.proto.PulsarApi.MessageIdData;
import org.apache.pulsar.common.api.proto.PulsarApi.MessageMetadata;
import org.apache.pulsar.common.compression.CompressionCodec;
import org.apache.pulsar.common.compression.CompressionCodecProvider;
Expand All @@ -45,7 +41,7 @@
@Slf4j
public class MessageParser {
public interface MessageProcessor {
void process(MessageId messageId, Message<?> message, ByteBuf payload);
void process(RawMessage message);
}

/**
Expand All @@ -54,36 +50,29 @@ public interface MessageProcessor {
*/
public static void parseMessage(TopicName topicName, long ledgerId, long entryId, ByteBuf headersAndPayload,
MessageProcessor processor) throws IOException {
MessageIdImpl msgId = new MessageIdImpl(ledgerId, entryId, -1);

MessageIdData.Builder messageIdBuilder = MessageIdData.newBuilder();
messageIdBuilder.setLedgerId(ledgerId);
messageIdBuilder.setEntryId(entryId);
MessageIdData messageId = messageIdBuilder.build();

MessageMetadata msgMetadata = null;
ByteBuf payload = headersAndPayload;
ByteBuf uncompressedPayload = null;

try {
if (!verifyChecksum(headersAndPayload, messageId, topicName.toString(), "reader")) {
if (!verifyChecksum(topicName, headersAndPayload, ledgerId, entryId)) {
// discard message with checksum error
return;
}

try {
msgMetadata = Commands.parseMessageMetadata(payload);
} catch (Throwable t) {
log.warn("[{}] Failed to deserialize metadata for message {} - Ignoring", topicName, messageId);
log.warn("[{}] Failed to deserialize metadata for message {}:{} - Ignoring", topicName, ledgerId, entryId);
return;
}

if (msgMetadata.getEncryptionKeysCount() > 0) {
throw new IOException("Cannot parse encrypted message " + msgMetadata + " on topic " + topicName);
}

uncompressedPayload = uncompressPayloadIfNeeded(messageId, msgMetadata, headersAndPayload,
topicName.toString(), "reader");
uncompressedPayload = uncompressPayloadIfNeeded(topicName, msgMetadata, headersAndPayload, ledgerId,
entryId);

if (uncompressedPayload == null) {
// Message was discarded on decompression error
Expand All @@ -93,67 +82,60 @@ public static void parseMessage(TopicName topicName, long ledgerId, long entryId
final int numMessages = msgMetadata.getNumMessagesInBatch();

if (numMessages == 1 && !msgMetadata.hasNumMessagesInBatch()) {
final MessageImpl<?> message = new MessageImpl<>(topicName.toString(),
msgId, msgMetadata, uncompressedPayload,
null, null);
processor.process(msgId, message, uncompressedPayload);

processor.process(RawMessageImpl.get(msgMetadata, null, uncompressedPayload, ledgerId, entryId, 0));
} else {
// handle batch message enqueuing; uncompressed payload has all messages in batch
receiveIndividualMessagesFromBatch(topicName.toString(), msgMetadata, uncompressedPayload, messageId, null, -1, processor);
receiveIndividualMessagesFromBatch(msgMetadata, uncompressedPayload, ledgerId, entryId, processor);
}
} finally {
if (uncompressedPayload != null) {
uncompressedPayload.release();
}

messageIdBuilder.recycle();
messageId.recycle();
msgMetadata.recycle();
}
}

public static boolean verifyChecksum(ByteBuf headersAndPayload, MessageIdData messageId, String topic,
String subscription) {
public static boolean verifyChecksum(TopicName topic, ByteBuf headersAndPayload, long ledgerId, long entryId) {
if (hasChecksum(headersAndPayload)) {
int checksum = readChecksum(headersAndPayload);
int computedChecksum = computeChecksum(headersAndPayload);
if (checksum != computedChecksum) {
log.error(
"[{}][{}] Checksum mismatch for message at {}:{}. Received checksum: 0x{}, Computed checksum: 0x{}",
topic, subscription, messageId.getLedgerId(), messageId.getEntryId(),
Long.toHexString(checksum), Integer.toHexString(computedChecksum));
"[{}] Checksum mismatch for message at {}:{}. Received checksum: 0x{}, Computed checksum: 0x{}",
topic, ledgerId, entryId, Long.toHexString(checksum), Integer.toHexString(computedChecksum));
return false;
}
}

return true;
}

public static ByteBuf uncompressPayloadIfNeeded(MessageIdData messageId, MessageMetadata msgMetadata,
ByteBuf payload, String topic, String subscription) {
public static ByteBuf uncompressPayloadIfNeeded(TopicName topic, MessageMetadata msgMetadata,
ByteBuf payload, long ledgerId, long entryId) {
CompressionCodec codec = CompressionCodecProvider.getCompressionCodec(msgMetadata.getCompression());
int uncompressedSize = msgMetadata.getUncompressedSize();
int payloadSize = payload.readableBytes();
if (payloadSize > PulsarDecoder.MaxMessageSize) {
// payload size is itself corrupted since it cannot be bigger than the MaxMessageSize
log.error("[{}][{}] Got corrupted payload message size {} at {}", topic, subscription, payloadSize,
messageId);
log.error("[{}] Got corrupted payload message size {} at {}:{}", topic, payloadSize,
ledgerId, entryId);
return null;
}

try {
ByteBuf uncompressedPayload = codec.decode(payload, uncompressedSize);
return uncompressedPayload;
} catch (IOException e) {
log.error("[{}][{}] Failed to decompress message with {} at {}: {}", topic, subscription,
msgMetadata.getCompression(), messageId, e.getMessage(), e);
log.error("[{}] Failed to decompress message with {} at {}:{} : {}", topic,
msgMetadata.getCompression(), ledgerId, entryId, e.getMessage(), e);
return null;
}
}

public static void receiveIndividualMessagesFromBatch(String topic, MessageMetadata msgMetadata,
ByteBuf uncompressedPayload, MessageIdData messageId, ClientCnx cnx,
int partitionIndex, MessageProcessor processor) {
private static void receiveIndividualMessagesFromBatch(MessageMetadata msgMetadata,
ByteBuf uncompressedPayload, long ledgerId, long entryId, MessageProcessor processor) {
int batchSize = msgMetadata.getNumMessagesInBatch();

try {
Expand All @@ -167,20 +149,11 @@ public static void receiveIndividualMessagesFromBatch(String topic, MessageMetad
// message has been compacted out, so don't send to the user
singleMessagePayload.release();
singleMessageMetadataBuilder.recycle();

continue;
}

BatchMessageIdImpl batchMessageIdImpl = new BatchMessageIdImpl(messageId.getLedgerId(),
messageId.getEntryId(), partitionIndex, i, null);
final MessageImpl<?> message = new MessageImpl<>(
topic, batchMessageIdImpl, msgMetadata,
singleMessageMetadataBuilder.build(), singleMessagePayload, Optional.empty(), cnx, null);

processor.process(batchMessageIdImpl, message, singleMessagePayload);

singleMessagePayload.release();
singleMessageMetadataBuilder.recycle();
processor.process(RawMessageImpl.get(msgMetadata, singleMessageMetadataBuilder, singleMessagePayload,
ledgerId, entryId, i));
}
} catch (IOException e) {
log.warn("Unable to obtain messages in batch", e);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.common.api.raw;

import io.netty.buffer.ByteBuf;

import java.util.Map;
import java.util.Optional;

/**
* View of a message that exposes the internal direct-memory buffer for more efficient processing.
*
* The message needs to be released when the processing is done.
*/
public interface RawMessage {

/**
* Release all the resources associated with this raw message
*/
void release();

/**
* Return the properties attached to the message.
*
* Properties are application defined key/value pairs that will be attached to the message
*
* @return an unmodifiable view of the properties map
*/
Map<String, String> getProperties();

/**
* Get the content of the message
*
* @return the byte array with the message payload
*/
ByteBuf getData();

/**
* Get the unique message ID associated with this message.
*
* The message id can be used to univocally refer to a message without having the keep the entire payload in memory.
*
* Only messages received from the consumer will have a message id assigned.
*
* @return the message id null if this message was not received by this client instance
*/
RawMessageId getMessageId();

/**
* Get the publish time of this message. The publish time is the timestamp that a client publish the message.
*
* @return publish time of this message.
* @see #getEventTime()
*/
long getPublishTime();

/**
* Get the event time associated with this message. It is typically set by the applications via
* {@link MessageBuilder#setEventTime(long)}.
*
* <p>
* If there isn't any event time associated with this event, it will return 0.
*/
long getEventTime();

/**
* Get the sequence id associated with this message. It is typically set by the applications via
* {@link MessageBuilder#setSequenceId(long)}.
*
* @return sequence id associated with this message.
* @see MessageBuilder#setEventTime(long)
*/
long getSequenceId();

/**
* Get the producer name who produced this message.
*
* @return producer name who produced this message, null if producer name is not set.
*/
String getProducerName();

/**
* Get the key of the message
*
* @return the key of the message
*/
Optional<String> getKey();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.common.api.raw;

public interface RawMessageId {

}
Loading

0 comments on commit bddfa2a

Please sign in to comment.