forked from apache/pig
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpig.properties
656 lines (558 loc) · 25.4 KB
/
pig.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Pig configuration file. All values can be overwritten by command line
# arguments; for a description of the properties, run
#
# pig -h properties
#
############################################################################
#
# == Logging properties
#
# Location of pig log file. If blank, a file with a timestamped slug
# ('pig_1399336559369.log') will be generated in the current working directory.
#
# pig.logfile=
# pig.logfile=/tmp/pig-err.log
# Log4j configuration file. Set at runtime with the -4 parameter. The source
# distribution has a ./conf/log4j.properties.template file you can rename and
# customize.
#
# log4jconf=./conf/log4j.properties
# Verbose Output.
# * false (default): print only INFO and above to screen
# * true: Print all log messages to screen
#
# verbose=false
# Omit timestamps on log messages. (default: false)
#
# brief=false
# Logging level. debug=OFF|ERROR|WARN|INFO|DEBUG (default: INFO)
#
# debug=INFO
# Roll up warnings across tasks, so that when millions of mappers suddenly cry
# out in error they are partially silenced. (default, recommended: true)
#
# aggregate.warning=true
# Should DESCRIBE pretty-print its schema?
# * false (default): print on a single-line, suitable for pasting back in to your script
# * true (recommended): prints on multiple lines with indentation, much more readable
#
# pig.pretty.print.schema=false
# === Profiling UDFs ===
# Turn on UDF timers? This will cause two counters to be
# tracked for every UDF and LoadFunc in your script: approx_microsecs measures
# approximate time spent inside a UDF approx_invocations reports the approximate
# number of times the UDF was invoked.
#
# * false (default): do not record timing information of UDFs.
# * true: report UDF performance. Uses more counters, but gives more insight
# into script operation
#
# pig.udf.profile=false
# Specify frequency of profiling (default: every 100th).
# pig.udf.profile.frequency=100
############################################################################
#
# == Site-specific Properties
#
# Execution Mode. Local mode is much faster, but only suitable for small amounts
# of data. Local mode interprets paths on the local file system; Mapreduce mode
# on the HDFS. Read more under 'Execution Modes' within the Getting Started
# documentation.
#
# * mapreduce (default): use the Hadoop cluster defined in your Hadoop config files
# * local: use local mode
# * tez: use Tez on Hadoop cluster
# * tez_local: use Tez local mode
#
# exectype=mapreduce
# Bootstrap file with default statements to execute in every Pig job, similar to
# .bashrc. If blank, uses the file '.pigbootup' from your home directory; If a
# value is supplied, that file is NOT loaded. This does not do tilde expansion
# -- you must supply the full path to the file.
#
# pig.load.default.statements=
# pig.load.default.statements=/home/bob/.pigrc
# Kill all waiting/running MR jobs upon a MR job failure? (default: false) If
# false, jobs that can proceed independently will do so unless a parent stage
# fails. If true, the failure of any stage in the script kills all jobs.
#
# stop.on.failure=false
# File containing the pig script to run. Rarely set in the properties file.
# Commandline: -f
#
# file=
# Jarfile to load, colon separated. Rarely used.
#
# jar=
# Register additional .jar files to use with your Pig script.
# Most typically used as a command line option (see http://pig.apache.org/docs/r0.12.0/basic.html#register):
#
# pig -Dpig.additional.jars=hdfs://nn.mydomain.com:9020/myjars/my.jar
#
# pig.additional.jars=<colon separated list of jars with optional wildcards>
# pig.additional.jars=/usr/local/share/pig/pig/contrib/piggybank/java/piggybank.jar:/usr/local/share/pig/datafu/datafu-pig/build/libs/datafu-pig-1.2.1.jar
# Specify potential packages to which a UDF or a group of UDFs belong,
# eliminating the need to qualify the UDF on every call. See
# http://pig.apache.org/docs/r0.12.0/udf.html#use-short-names
#
# Commandline use:
#
# pig \
# -Dpig.additional.jars=$PIG_HOME/contrib/piggybank/java/piggybank.jar:$PIG_HOME/../datafu/datafu-pig/build/libs/datafu-pig-1.2.1.jar \
# -Dudf.import.list=org.apache.pig.piggybank.evaluation:datafu.pig.util \
# happy_job.pig
#
# udf.import.list=<colon separated list of imports>
# udf.import.list=org.apache.pig.piggybank.evaluation:datafu.pig.bags:datafu.pig.hash:datafu.pig.stats:datafu.pig.util
#
# Reuse jars across jobs run by the same user? (default: false) If enabled, jars
# are placed in ${pig.user.cache.location}/${user.name}/.pigcache. Since most
# jars change infrequently, this gives a minor speedup.
#
# pig.user.cache.enabled=false
# Base path for storing jars cached by the pig.user.cache.enabled feature. (default: /tmp)
#
# pig.user.cache.location=/tmp
# Replication factor for cached jars. If not specified mapred.submit.replication
# is used, whose default is 10.
#
# pig.user.cache.replication=10
# Default UTC offset. (default: the host's current UTC offset) Supply a UTC
# offset in Java's timezone format: e.g., +08:00.
#
# pig.datetime.default.tz=
# Path to download the artifacts when registering ivy coordinates. This defaults
# to the directory grape uses for downloading libraries.
# (default: ~/.groovy/grapes)
#
# pig.artifacts.download.location=
############################################################################
#
# Memory impacting properties
#
# Amount of memory (as fraction of heap) allocated to bags before a spill is
# forced. Default is 0.2, meaning 20% of available memory. Note that this memory
# is shared across all large bags used by the application. See
# http://pig.apache.org/docs/r0.12.0/perf.html#memory-management
#
# pig.cachedbag.memusage=0.2
# Don't spill bags smaller than this size (bytes). Default: 5000000, or about
# 5MB. Usually, the more spilling the longer runtime, so you might want to tune
# it according to heap size of each task and so forth.
#
# pig.spill.size.threshold=5000000
# EXPERIMENTAL: If a file bigger than this size (bytes) is spilled -- thus
# freeing a bunch of ram -- tell the JVM to perform garbage collection. This
# should help reduce the number of files being spilled, but causes more-frequent
# garbage collection. Default: 40000000 (about 40 MB)
#
# pig.spill.gc.activation.size=40000000
# Maximum amount of data to replicate using the distributed cache when doing
# fragment-replicated join. (default: 1000000000, about 1GB) Consider increasing
# this in a production environment, but carefully.
#
# pig.join.replicated.max.bytes=1000000000
# Fraction of heap available for the reducer to perform a skewed join. A low
# fraction forces Pig to use more reducers, but increases the copying cost. See
# http://pig.apache.org/docs/r0.12.0/perf.html#skewed-joins
#
# pig.skewedjoin.reduce.memusage=0.3
#
# === SchemaTuple ===
#
# The SchemaTuple feature (PIG-2632) uses a tuple's schema (when known) to
# generate a custom Java class to hold records. Otherwise, tuples are loaded as
# a plain list that is unaware of its contents' schema -- and so each element
# has to be wrapped as a Java object on its own. This can provide more efficient
# CPU utilization, serialization, and most of all memory usage.
#
# This feature is considered experimental and is off by default. You can
# selectively enable it for specific operations using pig.schematuple.udf,
# pig.schematuple.load, pig.schematuple.fr_join and pig.schematuple.merge_join
#
# Enable the SchemaTuple optimization in all available cases? (default: false; recommended: true)
#
# pig.schematuple=false
# EXPERIMENTAL: Use SchemaTuples with UDFs (default: value of pig.schematuple).
# pig.schematuple.udf=false
# EXPERIMENTAL, CURRENTLY NOT IMPLEMENTED, but in the future, LoadFunc's with
# known schemas should output SchemaTuples. (default: value of pig.schematuple)
# pig.schematuple.load=false
# EXPERIMENTAL: Use SchemaTuples in replicated joins. The potential memory
# saving here is significant. (default: value of pig.schematuple)
# pig.schematuple.fr_join=false
# EXPERIMENTAL: Use SchemaTuples in merge joins. (default: value of pig.schematuple).
# pig.schematuple.merge_join=false
############################################################################
#
# Serialization options
#
# Omit empty part files from the output? (default: false)
#
# * false (default): reducers generates an output file, even if output is empty
# * true (recommended): do not generate zero-byte part files
#
# The default behavior of MapReduce is to generate an empty file for no data, so
# Pig follows that. But many small files can cause annoying extra map tasks and
# put load on the HDFS, so consider setting this to 'true'
#
# pig.output.lazy=false
#
# === Tempfile Handling
#
# EXPERIMENTAL: Storage format for temporary files generated by intermediate
# stages of Pig jobs. This can provide significant speed increases for certain
# codecs, as reducing the amount of data transferred to and from disk can more
# than make up for the cost of compression/compression. Recommend that you set
# up LZO compression in Hadoop and specify tfile storage.
#
# Compress temporary files?
# * false (default): do not compress
# * true (recommended): compress temporary files.
#
# pig.tmpfilecompression=false
# pig.tmpfilecompression=true
# Tempfile storage container type.
#
# * tfile (default, recommended): more efficient, but only supports supports gz(gzip) and lzo compression.
# https://issues.apache.org/jira/secure/attachment/12396286/TFile%20Specification%2020081217.pdf
# * seqfile: only supports gz(gzip), lzo, snappy, and bzip2 compression
#
# pig.tmpfilecompression.storage=tfile
# Codec types for intermediate job files. tfile supports gz(gzip) and lzo;
# seqfile support gz(gzip), lzo, snappy, bzip2
#
# * lzo (recommended with caveats): moderate compression, low cpu burden;
# typically leads to a noticeable speedup. Best default choice, but you must
# set up LZO independently due to license incompatibility
# * snappy: moderate compression, low cpu burden; typically leads to a noticeable speedup..
# * gz (default): higher compression, high CPU burden. Typically leads to a noticeable slowdown.
# * bzip2: most compression, major CPU burden. Typically leads to a noticeable slowdown.
#
# pig.tmpfilecompression.codec=gzip
#
# === Split Combining
#
#
# Should pig try to combine small files for fewer map tasks? This improves the
# efficiency of jobs with many small input files, reduces the overhead on the
# jobtracker, and reduces the number of output files a map-only job
# produces. However, it only works with certain loaders and increases non-local
# map tasks. See http://pig.apache.org/docs/r0.12.0/perf.html#combine-files
#
# * false (default, recommended): _do_ combine files
# * true: do not combine files
#
# pig.noSplitCombination=false
#
# Size, in bytes, of data to be processed by a single map. Smaller files are
# combined untill this size is reached. If unset, defaults to the file system's
# default block size.
#
# pig.maxCombinedSplitSize=
# ###########################################################################
#
# Execution options
#
# Should pig omit combiners? (default, recommended: false -- meaning pig _will_
# use combiners)
#
# When combiners work well, they eliminate a significant amount of
# data. However, if they do not eliminate much data -- say, a DISTINCT operation
# that only eliminates 5% of the records -- they add a noticeable overhead to
# the job. So the recommended default is false (use combiners), selectively
# disabling them per-job:
#
# pig -Dpig.exec.nocombiner=true distinct_but_not_too_much.pig
#
# pig.exec.nocombiner=false
# EXPERIMENTAL: Aggregate records in map task before sending to the combiner?
# (default: false, 10; recommended: true, 10). In cases where there is a massive
# reduction of data in the aggregation step, pig can do a first pass of
# aggregation before the data even leaves the mapper, saving much serialization
# overhead. It's off by default but can give a major improvement to
# group-and-aggregate operations. Pig skips partial aggregation unless reduction
# is better than a factor of minReduction (default: 10). See
# http://pig.apache.org/docs/r0.12.0/perf.html#hash-based-aggregation
#
# pig.exec.mapPartAgg=false
# pig.exec.mapPartAgg.minReduction=10
#
# === Control how many reducers are used.
#
# Estimate number of reducers naively using a fixed amount of data per
# reducer. Optimally, you have both fewer reducers than available reduce slots,
# and reducers that are neither getting too little data (less than a half-GB or
# so) nor too much data (more than 2-3 times the reducer child process max heap
# size). The default of 1000000000 (about 1GB) is probably low for a production
# cluster -- however it's much worse to set this too high (reducers spill many
# times over in group-sort) than too low (delay waiting for reduce slots).
#
# pig.exec.reducers.bytes.per.reducer=1000000000
#
# Don't ever use more than this many reducers. (default: 999)
#
# pig.exec.reducers.max=999
#
# === Local mode for small jobs
#
# EXPERIMENTAL: Use local mode for small jobs? If true, jobs with input data
# size smaller than pig.auto.local.input.maxbytes bytes and one or no reducers
# are run in local mode, which is much faster. Note that file paths are still
# interpreted as pig.exectype implies.
#
# * true (recommended): allow local mode for small jobs, which is much faster.
# * false (default): always use pig.exectype.
#
# pig.auto.local.enabled=false
#
# Definition of a small job for the pig.auto.local.enabled feature. Only jobs
# with less than this may bytes are candidates to run locally (default:
# 100000000 bytes, about 1GB)
#
# pig.auto.local.input.maxbytes=100000000
#
# Should use hadoop's BZipCodec for bzip2 input? (for PigStorage and TextLoader)
# Only available for hadoop 2.X and after and ignored for others.(Default: true)
#
# pig.bzip.use.hadoop.inputformat=true
############################################################################
#
# Security Features
#
# Comma-delimited list of commands/operators that are disallowed. This security
# feature can be used by administrators to block use of certain commands by
# users.
#
# * <blank> (default): all commands and operators are allowed.
# * fs,set (for example): block all filesystem commands and config changes from pig scripts.
#
# pig.blacklist=
# pig.blacklist=fs,set
# Comma-delimited list of the only commands/operators that are allowed. This
# security feature can be used by administrators to block use of certain
# commands by users.
#
# * <blank> (default): all commands and operators not on the pig.blacklist are allowed.
# * load,store,filter,group: only LOAD, STORE, FILTER, GROUP
# from pig scripts. All other commands and operators will fail.
#
# pig.whitelist=
# pig.whitelist=load,store,filter,group
#####################################################################
#
# Advanced Site-specific Customizations
#
# Remove intermediate output files?
#
# * true (default, recommended): remove the files
# * false: do NOT remove the files. You must clean them up yourself.
#
# Keeping them is useful for advanced debugging, but can be dangerous -- you
# must clean them up yourself. Inspect the intermediate outputs with
#
# LOAD '/path/to/tmp/file' USING org.apache.pig.impl.io.TFileStorage();
#
# (Or ...SequenceFileInterStorage if pig.tmpfilecompression.storage is seqfile)
#
# pig.delete.temp.files=true
# EXPERIMENTAL: A Pig Progress Notification Listener (PPNL) lets you wire pig's
# progress into your visibility stack. To use a PPNL, supply the fully qualified
# class name of a PPNL implementation. Note that only one PPNL can be set up, so
# if you need several, write a PPNL that will chain them.
#
# See https://github.com/twitter/ambrose for a pretty awesome one of these
#
# pig.notification.listener=<fully qualified class name of a PPNL implementation>
# String argument to pass to your PPNL constructor (optional). Only a single
# string value is allowed. (default none)
#
# pig.notification.listener.arg=<somevalue>
# EXPERIMENTAL: Class invoked to estimate the number of reducers to use.
# (default: org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.InputSizeReducerEstimator)
#
# If you don't know how or why to write a PigReducerEstimator, you're unlikely
# to use this. By default, the naive mapReduceLayer.InputSizeReducerEstimator is
# used, but you can specify anything implementing the interface
# org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigReducerEstimator
#
# pig.exec.reducer.estimator=<fully qualified class name of a PigReducerEstimator implementation>
# Optional String argument to pass to your PigReducerEstimator. (default: none;
# a single String argument is allowed).
#
# pig.exec.reducer.estimator.arg=<somevalue>
# Class invoked to report the size of reducers output. By default, the reducers'
# output is computed as the total size of output files. But not every storage is
# file-based, and so this logic can be replaced by implementing the interface
# org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigStatsOutputSizeReader
# If you need to register more than one reader, you can register them as a comma
# separated list. Every reader implements a boolean supports(POStore sto) method.
# When there are more than one reader, they are consulted in order, and the
# first one whose supports() method returns true will be used.
#
# pig.stats.output.size.reader=<fully qualified class name of a PigStatsOutputSizeReader implementation>
# pig.stats.output.size.reader.unsupported=<comma separated list of StoreFuncs that are not supported by this reader>
# By default, Pig retrieves TaskReports for every launched task to compute
# various job statistics. But this can cause OOM if the number of tasks is
# large. In such case, you can disable it by setting this property to true.
# pig.stats.notaskreport=false
#
# Override hadoop configs programatically
#
# By default, Pig expects hadoop configs (hadoop-site.xml and core-site.xml)
# to be present on the classpath. There are cases when these configs are
# needed to be passed programatically, such as while using the PigServer API.
# In such cases, you can override hadoop configs by setting the property
# "pig.use.overriden.hadoop.configs".
#
# When this property is set to true, Pig ignores looking for hadoop configs
# in the classpath and instead picks it up from Properties/Configuration
# object passed to it.
#
# pig.use.overriden.hadoop.configs=false
# Implied LoadFunc for the LOAD operation when no USING clause is
# present. Supply the fully qualified class name of a LoadFunc
# implementation. Note: setting this means you will have to modify most code
# brought in from elsewhere on the web, as people generally omit the USING
# clause for TSV files.
#
# * org.apache.pig.builtin.PigStorage (default): the traditional tab-separated-values LoadFunc
# * my.custom.udfcollection.MyCustomLoadFunc (for example): use MyCustomLoadFunc instead
#
# pig.default.load.func=<fully qualified class name of a LoadFunc implementation>
# The implied StoreFunc for STORE operations with no USING clause. Supply the
# fully qualified class name of a StoreFunc implementation.
#
# * org.apache.pig.builtin.PigStorage (default): the traditional tab-separated-values StoreFunc.
# * my.custom.udfcollection.MyCustomStoreFunc (for example): use MyCustomStoreFunc instead
#
# pig.default.store.func=<fully qualified class name of a StoreFunc implementation>
# Recover jobs when the application master is restarted? (default: false). This
# is a Hadoop 2 specific property; enable it to take advantage of AM recovery.
#
# pig.output.committer.recovery.support=true
# Should scripts check to prevent multiple stores writing to the same location?
# (default: false) When set to true, stops the execution of script right away.
#
pig.location.check.strict=false
# In addition to the fs-style commands (rm, ls, etc) Pig can now execute
# SQL-style DDL commands, eg "sql create table pig_test(name string, age int)".
# The only implemented backend is hcat, and luckily that's also the default.
#
# pig.sql.type=hcat
# Path to the hcat executable, for use with pig.sql.type=hcat (default: null)
#
hcat.bin=/usr/local/hcat/bin/hcat
###########################################################################
#
# Overrides for extreme environments
#
# (Most people won't have to adjust these parameters)
#
# Limit the pig script length placed in the jobconf xml. (default:10240)
# Extremely long queries can waste space in the JobConf; since its contents are
# only advisory, the default is fine unless you are retaining it for forensics.
#
# pig.script.max.size=10240
# Disable use of counters by Pig. Note that the word 'counter' is singular here.
#
# * false (default, recommended): do NOT disable counters.
# * true: disable counters. Set this to true only when your Pig job will
# otherwise die because of using more counters than hadoop configured limit
#
# pig.disable.counter=true
# Sample size (per-mapper, in number of rows) the ORDER..BY operation's
# RandomSampleLoader uses to estimate how your data should be
# partitioned. (default, recommended: 100 rows per task) Increase this if you
# have exceptionally large input splits and are unhappy with the reducer skew.
#
# pig.random.sampler.sample.size=100
# Process an entire script at once, reducing the amount of work and number of
# tasks? (default, recommended: true) See http://pig.apache.org/docs/r0.12.0/perf.html#multi-query-execution
#
# MultiQuery optimization is very useful, and so the recommended default is
# true. You may find a that a script fails to compile under MultiQuery. If so,
# disable it at runtime:
#
# pig -no_multiquery script_that_makes_pig_sad.pig
#
# opt.multiquery=true
# For small queries, fetch data directly from the HDFS. (default, recommended:
# true). If you want to force Pig to launch a MR job, for example when you're
# testing a live cluster, disable with the -N option. See PIG-3642.
#
# opt.fetch=true
#########################################################################
#
# Error Handling Properties
#
# By default, Pig job fails immediately on encountering an errors on writing Tuples for Store.
# If you want Pig to allow certain errors before failing you can set this property.
# If the propery is set to true and the StoreFunc implements ErrorHandling if will allow configurable errors
# based on the OutputErrorHandler implementation
# pig.allow.store.errors = false
#
# Controls the minimum number of errors for store
# pig.errors.min.records = 0
#
# Set the threshold for percentage of errors
# pig.error.threshold.percent = 0.0f
###########################################################################
#
# Streaming properties
#
# Define what properties will be set in the streaming environment. Just set this
# property to a comma-delimited list of properties to set, and those properties
# will be set in the environment.
#
# pig.streaming.environment=<comma-delimited list of propertes>
# Specify a comma-delimited list of local files to ship to distributed cache for
# streaming job.
#
# pig.streaming.ship.files=<comma-delimited list of local files>
# Specify a comma-delimited list of remote files to cache on distributed cache
# for streaming job.
#
# pig.streaming.cache.files=<comma-delimited list of remote files>
# Specify the python command to be used for python streaming udf. By default,
# python is used, but you can overwrite it with a non-default version such as
# python2.7.
#
# pig.streaming.udf.python.command=python
###########################################################################
#
# Tez specific properties
#
# Enable auto/grace parallelism in tez. Default is true and these should be
# used by default unless you encounter some bug in automatic parallelism.
# If pig.tez.auto.parallelism is set to false, 1 is used as default parallelism
#pig.tez.auto.parallelism=true
#pig.tez.grace.parallelism=true
# Union optimization (pig.tez.opt.union=true) in tez uses vertex groups to store
# output from different vertices into one final output location.
# If a StoreFunc's OutputCommitter does not work with multiple vertices
# writing to same location, then you can disable union optimization just
# for that StoreFunc. Refer PIG-4649. You can also specify a whitelist of StoreFuncs
# that are known to work with multiple vertices writing to same location instead of a blacklist
#pig.tez.opt.union.unsupported.storefuncs=org.apache.hcatalog.pig.HCatStorer,org.apache.hive.hcatalog.pig.HCatStorer
#pig.tez.opt.union.supported.storefuncs=
# Pig only reads once from datasource for LoadFuncs specified here during sort instead of
# loading once for sampling and loading again for partitioning.
# Used to avoid hitting external non-filesystem datasources like HBase and Accumulo twice.
pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage