Skip to content

Commit cafe76e

Browse files
committed
upgrade to use cassandra 1.x and latest hector
1 parent 95a7856 commit cafe76e

File tree

3 files changed

+347
-111
lines changed

3 files changed

+347
-111
lines changed

config/src/main/resources/cassandra.yaml

+171-53
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,13 @@ cluster_name: 'Test Cluster'
2121
# a random token, which will lead to hot spots.
2222
initial_token:
2323

24-
# Set to true to make new [non-seed] nodes automatically migrate data
25-
# to themselves from the pre-existing nodes in the cluster. Defaults
26-
# to false because you can only bootstrap N machines at a time from
27-
# an existing cluster of N, so if you are bringing up a cluster of
28-
# 10 machines with 3 seeds you would have to do it in stages. Leaving
29-
# this off for the initial start simplifies that.
30-
auto_bootstrap: false
31-
3224
# See http://wiki.apache.org/cassandra/HintedHandoff
3325
hinted_handoff_enabled: true
3426
# this defines the maximum amount of time a dead host will have hints
3527
# generated. After it has been dead this long, hints will be dropped.
3628
max_hint_window_in_ms: 3600000 # one hour
37-
# Sleep this long after delivering each row or row fragment
38-
hinted_handoff_throttle_delay_in_ms: 50
29+
# Sleep this long after delivering each hint
30+
hinted_handoff_throttle_delay_in_ms: 1
3931

4032
# authentication backend, implementing IAuthenticator; used to identify users
4133
authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
@@ -74,26 +66,92 @@ data_file_directories:
7466
# commit log
7567
commitlog_directory: ./tmp/commitlog
7668

69+
# Maximum size of the key cache in memory.
70+
#
71+
# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
72+
# minimum, sometimes more. The key cache is fairly tiny for the amount of
73+
# time it saves, so it's worthwhile to use it at large numbers.
74+
# The row cache saves even more time, but must store the whole values of
75+
# its rows, so it is extremely space-intensive. It's best to only use the
76+
# row cache if you have hot rows or static rows.
77+
#
78+
# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
79+
#
80+
# Default value is 2 (call hold > 200000 keys). Set to 0 to disable key cache.
81+
#key_cache_size_in_mb: 2
82+
83+
# Duration in seconds after which Cassandra should
84+
# safe the keys cache. Caches are saved to saved_caches_directory as
85+
# specified in this configuration file.
86+
#
87+
# Saved caches greatly improve cold-start speeds, and is relatively cheap in
88+
# terms of I/O for the key cache. Row cache saving is much more expensive and
89+
# has limited use.
90+
#
91+
# Default is 14400 or 4 hours.
92+
#key_cache_save_period: 14400
93+
94+
# Number of keys from the key cache to save
95+
# Disabled by default, meaning all keys are going to be saved
96+
# key_cache_keys_to_save: 100
97+
98+
# Maximum size of the row cache in memory.
99+
# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
100+
#
101+
# Default value is 0, to disable row caching.
102+
#row_cache_size_in_mb: 0
103+
104+
# Duration in seconds after which Cassandra should
105+
# safe the row cache. Caches are saved to saved_caches_directory as specified
106+
# in this configuration file.
107+
#
108+
# Saved caches greatly improve cold-start speeds, and is relatively cheap in
109+
# terms of I/O for the key cache. Row cache saving is much more expensive and
110+
# has limited use.
111+
#
112+
# Default is 0 to disable saving the row cache.
113+
#row_cache_save_period: 0
114+
115+
# Number of keys from the row cache to save
116+
# Disabled by default, meaning all keys are going to be saved
117+
# row_cache_keys_to_save: 100
118+
119+
# The provider for the row cache to use.
120+
#
121+
# Supported values are: ConcurrentLinkedHashCacheProvider, SerializingCacheProvider
122+
#
123+
# SerializingCacheProvider serialises the contents of the row and stores
124+
# it in native memory, i.e., off the JVM Heap. Serialized rows take
125+
# significantly less memory than "live" rows in the JVM, so you can cache
126+
# more rows in a given memory footprint. And storing the cache off-heap
127+
# means you can use smaller heap sizes, reducing the impact of GC pauses.
128+
#
129+
# It is also valid to specify the fully-qualified class name to a class
130+
# that implements org.apache.cassandra.cache.IRowCacheProvider.
131+
#
132+
# Defaults to SerializingCacheProvider
133+
#row_cache_provider: SerializingCacheProvider
134+
77135
# saved caches
78136
saved_caches_directory: ./tmp/saved_caches
79137

80-
# Size to allow commitlog to grow to before creating a new segment
81-
commitlog_rotation_threshold_in_mb: 128
82-
83138
# commitlog_sync may be either "periodic" or "batch."
84139
# When in batch mode, Cassandra won't ack writes until the commit log
85140
# has been fsynced to disk. It will wait up to
86-
# CommitLogSyncBatchWindowInMS milliseconds for other writes, before
141+
# commitlog_sync_batch_window_in_ms milliseconds for other writes, before
87142
# performing the sync.
88-
commitlog_sync: periodic
89-
143+
#
144+
# commitlog_sync: batch
145+
# commitlog_sync_batch_window_in_ms: 50
146+
#
90147
# the other option is "periodic" where writes may be acked immediately
91148
# and the CommitLog is simply synced every commitlog_sync_period_in_ms
92149
# milliseconds.
150+
commitlog_sync: periodic
93151
commitlog_sync_period_in_ms: 10000
94152

95-
# any class that implements the SeedProvider interface and has a constructor that takes a Map<String, String> of
96-
# parameters will do.
153+
# any class that implements the SeedProvider interface and has a
154+
# constructor that takes a Map<String, String> of parameters will do.
97155
seed_provider:
98156
# Addresses of hosts that are deemed contact points.
99157
# Cassandra nodes use this list of hosts to find each other and learn
@@ -102,6 +160,7 @@ seed_provider:
102160
- class_name: org.apache.cassandra.locator.SimpleSeedProvider
103161
parameters:
104162
# seeds is actually a comma-delimited list of addresses.
163+
# Ex: "<ip1>,<ip2>,<ip3>"
105164
- seeds: "127.0.0.1"
106165

107166
# emergency pressure valve: each time heap usage after a full (CMS)
@@ -142,12 +201,16 @@ concurrent_reads: 32
142201
concurrent_writes: 32
143202

144203
# Total memory to use for memtables. Cassandra will flush the largest
145-
# memtable when this much memory is used. Prefer using this to
146-
# the older, per-ColumnFamily memtable flush thresholds.
204+
# memtable when this much memory is used.
147205
# If omitted, Cassandra will set it to 1/3 of the heap.
148-
# If set to 0, only the old flush thresholds are used.
149206
# memtable_total_space_in_mb: 2048
150207

208+
# Total space to use for commitlogs.
209+
# If space gets above this value (it will round up to the next nearest
210+
# segment multiple), Cassandra will flush every dirty CF in the oldest
211+
# segment and remove it.
212+
# commitlog_total_space_in_mb: 4096
213+
151214
# This sets the amount of memtable flush writer threads. These will
152215
# be blocked by disk io, and each one will hold a memtable in memory
153216
# while blocked. If you have a large heap and many data directories,
@@ -167,6 +230,10 @@ sliced_buffer_size_in_kb: 64
167230
# TCP port, for commands and data
168231
storage_port: 7000
169232

233+
# SSL port, for encrypted communication. Unused unless enabled in
234+
# encryption_options
235+
ssl_storage_port: 7001
236+
170237
# Address to bind to and tell other Cassandra nodes to connect to. You
171238
# _must_ change this if you want multiple nodes to be able to
172239
# communicate!
@@ -179,29 +246,53 @@ storage_port: 7000
179246
# Setting this to 0.0.0.0 is always wrong.
180247
listen_address: localhost
181248

249+
# Address to broadcast to other Cassandra nodes
250+
# Leaving this blank will set it to the same value as listen_address
251+
# broadcast_address: 1.2.3.4
252+
182253
# The address to bind the Thrift RPC service to -- clients connect
183254
# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
184255
# you want Thrift to listen on all interfaces.
185256
#
186257
# Leaving this blank has the same effect it does for ListenAddress,
187258
# (i.e. it will be based on the configured hostname of the node).
188-
rpc_address: 0.0.0.0
259+
rpc_address: localhost
189260
# port for Thrift to listen for clients on
190261
rpc_port: 9160
191262

192263
# enable or disable keepalive on rpc connections
193264
rpc_keepalive: true
194265

195-
# Cassandra uses thread-per-client for client RPC. This can
196-
# be expensive in memory used for thread stack for a large
197-
# enough number of clients. (Hence, connection pooling is
198-
# very, very strongly recommended.)
199-
#
266+
# Cassandra provides three options for the RPC Server:
267+
#
268+
# sync -> One connection per thread in the rpc pool (see below).
269+
# For a very large number of clients, memory will be your limiting
270+
# factor; on a 64 bit JVM, 128KB is the minimum stack size per thread.
271+
# Connection pooling is very, very strongly recommended.
272+
#
273+
# async -> Nonblocking server implementation with one thread to serve
274+
# rpc connections. This is not recommended for high throughput use
275+
# cases. Async has been tested to be about 50% slower than sync
276+
# or hsha and is deprecated: it will be removed in the next major release.
277+
#
278+
# hsha -> Stands for "half synchronous, half asynchronous." The rpc thread pool
279+
# (see below) is used to manage requests, but the threads are multiplexed
280+
# across the different clients.
281+
#
282+
# The default is sync because on Windows hsha is about 30% slower. On Linux,
283+
# sync/hsha performance is about the same, with hsha of course using less memory.
284+
rpc_server_type: sync
285+
200286
# Uncomment rpc_min|max|thread to set request pool size.
201-
# You would primarily set max as a safeguard against misbehaved
202-
# clients; if you do hit the max, Cassandra will block until
203-
# one disconnects before accepting more. The defaults are
204-
# min of 16 and max unlimited.
287+
# You would primarily set max for the sync server to safeguard against
288+
# misbehaved clients; if you do hit the max, Cassandra will block until one
289+
# disconnects before accepting more. The defaults for sync are min of 16 and max
290+
# unlimited.
291+
#
292+
# For the Hsha server, the min and max both default to quadruple the number of
293+
# CPU cores.
294+
#
295+
# This configuration is ignored by the async server.
205296
#
206297
# rpc_min_threads: 16
207298
# rpc_max_threads: 2048
@@ -231,10 +322,6 @@ incremental_backups: false
231322
# is a data format change.
232323
snapshot_before_compaction: false
233324

234-
# change this to increase the compaction thread's priority. In java, 1 is the
235-
# lowest priority and that is our default.
236-
# compaction_thread_priority: 1
237-
238325
# Add column indexes to a row after its contents reach this size.
239326
# Increase if your column values are large, or if you have a very large
240327
# number of columns. The competing causes are, Cassandra has to
@@ -249,28 +336,48 @@ column_index_size_in_kb: 64
249336
# will be logged specifying the row key.
250337
in_memory_compaction_limit_in_mb: 64
251338

252-
# Number of compaction threads. This default to the number of processors,
253-
# enabling multiple compactions to execute at once. Using more than one
254-
# thread is highly recommended to preserve read performance in a mixed
255-
# read/write workload as this avoids sstables from accumulating during long
256-
# running compactions. The default is usually fine and if you experience
257-
# problems with compaction running too slowly or too fast, you should look at
339+
# Number of simultaneous compactions to allow, NOT including
340+
# validation "compactions" for anti-entropy repair. Simultaneous
341+
# compactions can help preserve read performance in a mixed read/write
342+
# workload, by mitigating the tendency of small sstables to accumulate
343+
# during a single long running compactions. The default is usually
344+
# fine and if you experience problems with compaction running too
345+
# slowly or too fast, you should look at
258346
# compaction_throughput_mb_per_sec first.
259-
# Uncomment to make compaction mono-threaded.
347+
#
348+
# This setting has no effect on LeveledCompactionStrategy.
349+
#
350+
# concurrent_compactors defaults to the number of cores.
351+
# Uncomment to make compaction mono-threaded, the pre-0.8 default.
260352
#concurrent_compactors: 1
261353

354+
# Multi-threaded compaction. When enabled, each compaction will use
355+
# up to one thread per core, plus one thread per sstable being merged.
356+
# This is usually only useful for SSD-based hardware: otherwise,
357+
# your concern is usually to get compaction to do LESS i/o (see:
358+
# compaction_throughput_mb_per_sec), not more.
359+
multithreaded_compaction: false
360+
262361
# Throttles compaction to the given total throughput across the entire
263362
# system. The faster you insert data, the faster you need to compact in
264363
# order to keep the sstable count down, but in general, setting this to
265364
# 16 to 32 times the rate you are inserting data is more than sufficient.
266-
# Setting this to 0 disables throttling.
365+
# Setting this to 0 disables throttling. Note that this account for all types
366+
# of compaction, including validation compaction.
267367
compaction_throughput_mb_per_sec: 16
268368

269369
# Track cached row keys during compaction, and re-cache their new
270370
# positions in the compacted sstable. Disable if you use really large
271371
# key caches.
272372
compaction_preheat_key_cache: true
273373

374+
# Throttles all outbound streaming file transfers on this node to the
375+
# given total throughput in Mbps. This is necessary because Cassandra does
376+
# mostly sequential IO when streaming data during bootstrap or repair, which
377+
# can lead to saturating the network connection and degrading rpc performance.
378+
# When unset, the default is 400 Mbps or 50 MB/s.
379+
# stream_throughput_outbound_megabits_per_sec: 400
380+
274381
# Time to wait for a reply from other nodes before failing the command
275382
rpc_timeout_in_ms: 10000
276383

@@ -294,11 +401,6 @@ rpc_timeout_in_ms: 10000
294401
# explicitly configured in cassandra-topology.properties.
295402
endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
296403

297-
# dynamic_snitch -- This boolean controls whether the above snitch is
298-
# wrapped with a dynamic snitch, which will monitor read latencies
299-
# and avoid reading from hosts that have slowed (due to compaction,
300-
# for instance)
301-
dynamic_snitch: true
302404
# controls how often to perform the more expensive part of host score
303405
# calculation
304406
dynamic_snitch_update_interval_in_ms: 100
@@ -312,7 +414,7 @@ dynamic_snitch_reset_interval_in_ms: 600000
312414
# expressed as a double which represents a percentage. Thus, a value of
313415
# 0.2 means Cassandra would continue to prefer the static snitch values
314416
# until the pinned host was 20% worse than the fastest.
315-
dynamic_snitch_badness_threshold: 0.0
417+
dynamic_snitch_badness_threshold: 0.1
316418

317419
# request_scheduler -- Set this to a class that implements
318420
# RequestScheduler, which will schedule incoming client requests
@@ -354,24 +456,40 @@ request_scheduler: org.apache.cassandra.scheduler.NoScheduler
354456
# the request scheduling. Currently the only valid option is keyspace.
355457
# request_scheduler_id: keyspace
356458

357-
# The Index Interval determines how large the sampling of row keys
358-
# is for a given SSTable. The larger the sampling, the more effective
359-
# the index is at the cost of space.
459+
# index_interval controls the sampling of entries from the primrary
460+
# row index in terms of space versus time. The larger the interval,
461+
# the smaller and less effective the sampling will be. In technicial
462+
# terms, the interval coresponds to the number of index entries that
463+
# are skipped between taking each sample. All the sampled entries
464+
# must fit in memory. Generally, a value between 128 and 512 here
465+
# coupled with a large key cache size on CFs results in the best trade
466+
# offs. This value is not often changed, however if you have many
467+
# very small rows (many to an OS page), then increasing this will
468+
# often lower memory usage without a impact on performance.
360469
index_interval: 128
361470

362471
# Enable or disable inter-node encryption
363472
# Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
364473
# users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
365474
# suite for authentication, key exchange and encryption of the actual data transfers.
366475
# NOTE: No custom encryption options are enabled at the moment
367-
# The available internode options are : all, none
476+
# The available internode options are : all, none, dc, rack
477+
#
478+
# If set to dc cassandra will encrypt the traffic between the DCs
479+
# If set to rack cassandra will encrypt the traffic between the racks
368480
#
369481
# The passwords used in these options must match the passwords used when generating
370482
# the keystore and truststore. For instructions on generating these files, see:
371483
# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
484+
#
372485
encryption_options:
373486
internode_encryption: none
374487
keystore: conf/.keystore
375488
keystore_password: cassandra
376489
truststore: conf/.truststore
377490
truststore_password: cassandra
491+
# More advanced defaults below:
492+
# protocol: TLS
493+
# algorithm: SunX509
494+
# store_type: JKS
495+
# cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA]

0 commit comments

Comments
 (0)