Skip to content

Commit

Permalink
javadoc; bug fixes; thrift support; refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
julienledem committed Mar 2, 2013
1 parent 6bf2661 commit af20471
Show file tree
Hide file tree
Showing 49 changed files with 1,450 additions and 1,183 deletions.
2 changes: 1 addition & 1 deletion parquet-column/src/main/java/parquet/Log.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class Log {
/**
* this is the compile time log level
*/
public static final Level LEVEL = Level.INFO;
public static final Level LEVEL = Level.FINE;

public static final boolean DEBUG = (LEVEL.intValue() <= Level.FINE.intValue());
public static final boolean INFO = (LEVEL.intValue() <= Level.INFO.intValue());
Expand Down
2 changes: 1 addition & 1 deletion parquet-column/src/main/java/parquet/bytes/BytesUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class BytesUtils {
* @return the number of bits required
*/
public static int getWidthFromMaxInt(int bound) {
return (int)Math.ceil(Math.log(bound + 1)/Math.log(2));
return (int)Math.ceil(Math.log((double)bound + 1)/Math.log(2));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,45 +20,78 @@
/**
* base class to implement an encoding for a given column
*
* pages are homogeneous (store a single type)
* Usually only one of the read*() methods is overridden
*
* @author Julien Le Dem
*
*/
public abstract class PrimitiveColumnReader {

/**
* to initialize the column reader
* Called to initialize the column reader with a new page.
*
* The underlying implementation knows how much data to read
* @param in the array to read from
* @param offset where to start reading from
* @return the next offset to read from
* <ul>The page contains the bytes for:
* <li> repetition levels column
* <li> definition levels column
* <li> data column
* </ul>
* Each column reader knows how much data to read and returns the next offset for the next column
* The data column always reads to the end and returns the array size.
* @param valueCount count of values in this page
* @param page the array to read from containing the page data (repetition levels, definition levels, data)
* @param offset where to start reading from in the page
* @return the offset to read from the next column (the page length in the case of the data column which is last in the page)
* @throws IOException
*/
public abstract int initFromPage(long valueCount, byte[] in, int offset) throws IOException;
public abstract int initFromPage(long valueCount, byte[] page, int offset) throws IOException;

/**
* @return the next boolean from the page
*/
public boolean readBoolean() {
throw new UnsupportedOperationException();
}

/**
* @return the next boolean from the page
*/
public int readByte() {
throw new UnsupportedOperationException();
}

/**
* @return the next boolean from the page
*/
public float readFloat() {
throw new UnsupportedOperationException();
}

/**
* @return the next boolean from the page
*/
public byte[] readBytes() {
throw new UnsupportedOperationException();
}

/**
* @return the next boolean from the page
*/
public double readDouble() {
throw new UnsupportedOperationException();
}

/**
* @return the next boolean from the page
*/
public int readInteger() {
throw new UnsupportedOperationException();
}

/**
* @return the next boolean from the page
*/
public long readLong() {
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,56 +15,90 @@
*/
package parquet.example;

import java.util.List;

import parquet.io.convert.Converter;
import parquet.io.convert.GroupConverter;
import parquet.io.convert.PrimitiveConverter;
import parquet.io.convert.RecordConverter;
import parquet.schema.GroupType;
import parquet.schema.MessageType;
import parquet.schema.PrimitiveType;
import parquet.schema.TypeConverter;

/**
* Dummy implementation for perf tests
*
* @author Julien Le Dem
*
*/
public final class DummyRecordConverter extends
RecordConverter<Object> {
Object a;
public final class DummyRecordConverter extends RecordConverter<Object> {

public Object getCurrentRecord() {
return a;
}
private Object a;
private GroupConverter root;

public GroupConverter getGroupConverter(int fieldIndex) {
return new DummyRecordConverter();
}
public DummyRecordConverter(MessageType schema) {
this.root = (GroupConverter)schema.convertWith(new TypeConverter<Converter>() {

public PrimitiveConverter getPrimitiveConverter(int fieldIndex) {
return new PrimitiveConverter() {
public void addBinary(byte[] value) {
a = value;
}
public void addBoolean(boolean value) {
a = value;
}
public void addDouble(double value) {
a = value;
}
public void addFloat(float value) {
a = value;
@Override
public Converter convertPrimitiveType(List<GroupType> path, PrimitiveType primitiveType) {
return new PrimitiveConverter() {

public void addBinary(byte[] value) {
a = value;
}
public void addBoolean(boolean value) {
a = value;
}
public void addDouble(double value) {
a = value;
}
public void addFloat(float value) {
a = value;
}
public void addInt(int value) {
a = value;
}
public void addLong(long value) {
a = value;
}
};
}
public void addInt(int value) {
a = value;

@Override
public Converter convertGroupType(List<GroupType> path, GroupType groupType, final List<Converter> converters) {
return new GroupConverter() {

public Converter getConverter(int fieldIndex) {
return converters.get(fieldIndex);
}

public void start() {
a = "start()";
}

public void end() {
a = "end()";
}

};
}
public void addLong(long value) {
a = value;

@Override
public Converter convertMessageType(MessageType messageType, List<Converter> children) {
return convertGroupType(null, messageType, children);
}
};
});
}

public void start() {
a = "start()";
@Override
public Object getCurrentRecord() {
return a;
}

public void end() {
a = "end()";
@Override
public GroupConverter getRootConverter() {
return root;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,30 @@

import parquet.example.data.Group;
import parquet.example.data.simple.SimpleGroupFactory;
import parquet.io.convert.GroupConverter;
import parquet.io.convert.RecordConverter;
import parquet.schema.MessageType;

public class GroupRecordConverter extends SimpleGroupConverter {
public class GroupRecordConverter extends RecordConverter<Group> {

private final SimpleGroupFactory simpleGroupFactory;

private Group current;

private GroupConverter root;

public GroupRecordConverter(MessageType schema) {
super(null, -1, schema);
this.simpleGroupFactory = new SimpleGroupFactory(schema);
this.root = new SimpleGroupConverter(null, 0, schema) {
@Override
public void start() {
current = simpleGroupFactory.newGroup();
}

@Override
public void end() {
}
};
}

@Override
Expand All @@ -36,12 +49,8 @@ public Group getCurrentRecord() {
}

@Override
public void start() {
current = simpleGroupFactory.newGroup();
}

@Override
public void end() {
public GroupConverter getRootConverter() {
return root;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,29 @@
package parquet.example.data.simple.convert;

import parquet.example.data.Group;
import parquet.io.convert.Converter;
import parquet.io.convert.GroupConverter;
import parquet.io.convert.PrimitiveConverter;
import parquet.io.convert.RecordConverter;
import parquet.schema.GroupType;
import parquet.schema.Type;

class SimpleGroupConverter extends RecordConverter<Group> {
class SimpleGroupConverter extends GroupConverter {
private final SimpleGroupConverter parent;
private final int index;
private Group current;
private GroupConverter[] groupConverters;
private PrimitiveConverter[] primitiveConverters;
private Converter[] converters;

SimpleGroupConverter(SimpleGroupConverter parent, int index, GroupType schema) {
this.parent = parent;
this.index = index;

groupConverters = new GroupConverter[schema.getFieldCount()];
primitiveConverters = new PrimitiveConverter[schema.getFieldCount()];
converters = new Converter[schema.getFieldCount()];

for (int i = 0; i < groupConverters.length; i++) {
for (int i = 0; i < converters.length; i++) {
final Type type = schema.getType(i);
if (type.isPrimitive()) {
primitiveConverters[i] = new SimplePrimitiveConverter(this, i);
converters[i] = new SimplePrimitiveConverter(this, i);
} else {
groupConverters[i] = new SimpleGroupConverter(this, i, type.asGroupType());
converters[i] = new SimpleGroupConverter(this, i, type.asGroupType());
}

}
Expand All @@ -53,20 +50,14 @@ public void start() {
}

@Override
public PrimitiveConverter getPrimitiveConverter(int fieldIndex) {
return primitiveConverters[fieldIndex];
}

@Override
public GroupConverter getGroupConverter(int fieldIndex) {
return groupConverters[fieldIndex];
public Converter getConverter(int fieldIndex) {
return converters[fieldIndex];
}

@Override
public void end() {
}

@Override
public Group getCurrentRecord() {
return current;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package parquet.pig;
package parquet.io;

import java.util.ArrayDeque;
import java.util.Deque;

import parquet.io.RecordConsumer;
import parquet.io.convert.GroupConverter;
import parquet.io.convert.PrimitiveConverter;
import parquet.schema.MessageType;
Expand Down Expand Up @@ -58,9 +57,9 @@ public void startField(String field, int index) {
typePath.push(currentType);
currentType = currentType.asGroupType().getType(index);
if (currentType.isPrimitive()) {
currentPrimitive = current.getPrimitiveConverter(index);
currentPrimitive = current.getConverter(index).asPrimitiveConverter();
} else {
current = current.getGroupConverter(index);
current = current.getConverter(index).asGroupConverter();
}
}

Expand Down
Loading

0 comments on commit af20471

Please sign in to comment.