-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmulti_report.txt
11608 lines (10506 loc) · 539 KB
/
multi_report.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
# shellcheck disable=SC1075,SC2027,SC2034,SC2128,SC2002,SC2004,SC2086,SC2162
LANG="en_US.UTF-8"
#
TrueNASConfigEmailEncryption="" # Set this to "" for no encryption, MUST REMAIN ON LINE 5.
# NOTE: Some email providers will not send some encrypted file types, such as GMAIL, but .zip files are okay.
# NOTE: 7zip is used for the compression/encryption and will not be installed unless encryption is enabled.
##### Version 3.12
###### Get Config File Name and Location
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
Config_File_Name="$SCRIPT_DIR/multi_report_config.txt"
set -E -o functrace
failure(){
local -n _lineno="${1:-LINENO}"
local -n _bash_lineno="${2:-BASH_LINENO}"
local _last_command="${3:-${BASH_COMMAND}}"
local _code="${4:-0}"
## Workaround for read EOF combo tripping traps
if ! ((_code)); then
return "${_code}"
fi
local _last_command_height="$(wc -l <<<"${_last_command}")"
local _date="$(date)"
local -a _output_array=()
_output_array+=(
'-----------------------------------'
"Start: ${_date}"
' '
"lines_history: [${_lineno} ${_bash_lineno[*]}]"
"function_trace: [${FUNCNAME[*]}]"
"exit_code: ${_code}"
)
if [[ "${#BASH_SOURCE[@]}" -gt '1' ]]; then
_output_array+=('source_trace:')
for _item in "${BASH_SOURCE[@]}"; do
_output_array+=(" - ${_item}")
done
else
_output_array+=("source_trace: [${BASH_SOURCE[*]}]")
fi
if [[ "${_last_command_height}" -gt '1' ]]; then
_output_array+=(
'last_command: ->'
"${_last_command}"
)
else
_output_array+=("last_command: ${_last_command}")
fi
# echo "End: $(date)"
date >> /tmp/multi_report_errors.txt
_output_array+=('---------------------------------')
printf '%s\n' "${_output_array[@]}"
printf '%s\n' "${_output_array[@]}" >> /tmp/multi_report_errors.txt
# exit ${_code}
}
if test -e "/tmp/multi_report_errors.txt"; then
rm /tmp/multi_report_errors.txt
echo "Deleted"
fi
# trap 'failure "LINENO" "BASH_LINENO" "${BASH_COMMAND}" "${?}"' ERR
# Use [-help] to read the Help Section. For a short list of commands use [-h].
# Use [-config] to create a configuration file in the directory this script is run from.
###### ZPool & SMART status report with FreeNAS/TrueNAS config backup
### Original script by joeschmuck
### Modified by Bidule0hm, melp, toomuchdata
### Thanks goes out to Basil Hendroff who created the SMR Drive Check script.
### Currently maintained by joeschmuck ([email protected])
### Currently sendemail.py is maintained by Oxyde ([email protected])
# Version
### Changelog:
#
# V3.11
#
# Fixed Updating Script, for the most part. Automatic and `-update` , `-update_selftest` , and `-updated_sendemail` work.
#
# V3.11 (18 January 2025)
#
# Fixed gdisk and sgdisk installation on CORE, resolving drive errors on LINE 4434 error messages.
#
# V3.1 (17 January 2025)
#
# THIS WAS BETA 7, HOWEVER IT SEEMS TO WORK SO I REMOVED THE BETA.
#
# ANY PROBLEMS, PLEASE ADDRESS THEM TO [email protected]
# READ THE DOCUMENTS IF YOU JUST HAVE A QUESTION ABOTU SETUP AND IF THE DOCUMENTS ARE LAKING IN INFORMATION, PLEASE LET ME KNOW.
#
# V3.1 (continued) (17 January 2025)
#
# - Removed all S.M.A.R.T. testing from Multi-Report and created companion script Drive-Selftest.
# ---- Drive-Selftest script provides all the required functionality to run SMART tests on
# ---- on all the system drives and report any issues. This is in preparation for when
# ---- TrueNAS has resolved the NVMe SMART testing issues, then the companion script
# ---- will no longer be required. Additionally it simplifies the Multi-Report script.
# - A Zpool error now flags the suspect drive as well in the text section of the output file.
# - Replacing switches '-dump emailextra' with '-dump emailall'. Either will function for now.
# - Added NVMe Temperature Sensors 1 and 2 to the Chart, if then exist.
# - Fixed sgdisk partition error for drives named 'sdp'.
# - Added Memory Total, Used, Available, and SWAP to report.
# - Replaced sendmail (iXsystems removed it) with sendemail.py courtesy of Oxyde.
# - -dump will generate a single .tar file vice many individual files in TrueNAS 24.10.1 or greater.
#
#
# V3.0.8 Beta (18 November 2024)
#
# - Fix for some NVMe drives may report self-test results with leading white space.
# - Fix for not checking if NVMe drives exist before attempting to run self-test.
# - Fix for a drive serial number with white space.
# - Added more data collection for NVMe drives (NVMe Self-Test Log and NVMe Error-Log).
# - REMOVED SMART Testing from within Multi-Report and created new seperate companion script for SMART testing called Drive_Selftest.
# - Added Partition Backups generated and attached when TrueNAS Configuration is attached (by request).
# - Cleaned up the Text Section and added a little more data.
# - Added dumping of API drive data to aid in development efforts using the API.
# - Added automatic update to statistical data file (reformatting the file to fix 30 day Total Data Read/Written issue).
# - Updated SMR Drive Checking to report drives for 14 runs of the script.
# - Fix for NVMe errors messages for drive checks on NVMe drive which does not support Self-test.
# - Added message that if using TrueNAS 24.10 or greater, the Smartmontools Override is no longer "required". However people may still desire to use it.
# - Fixed some (null) error messages while collecting smart data from drive.
# - Added error message for statistical data file not containing a driver serial number.
# - Added ZFS/Pool ONLINE (green) or if other (red).
# - Added API data capture routine for -dump routine supporting troubleshooting.
# - Updated the JSON Error Log to remove some of the un-needed data.
# - Added option to Enable/Disable running external SMART test script. Internal testing removed from Multi-Report.
# - Added checking previous SMART check pass/fail. This will catch any SMART Long test failures from a previous run.
# - No longer download all github files, only downloads the files we need.
# V3.0.7 (08 June 2024)
#
# - Fixed for some NVMe drives may report self-test results with leading white space.
# - Fixed to actually generate an alarm for Media Errors.
# - Added Compensation (offset) for Media Errors.
# - Added more data collection for NVMe drives (NVMe Self-Test Log and NVMe Error-Log).
# V3.0.6 (02 June 2024)
#
# - (The push for this change) Fix for Zpool gptid listing in text section (listing cache, log, meta, spare, and dedup).
#
# - Added polling NVMe drives for self-test completion when 'waiting' for test complete. The default is now to wait for the test(s) to complete.
# ---- The smart self-test will start on ALL NVMe drive at the same time will be asked if the test completed or failed once a second.
# ---- When the results are present the script will continue on to each successive NVMe drive, which if they were all identical, the tests
# ---- should be completing within a second of the first drive polled. This happens for both Short and Long tests.
# ---- A new pair of variables in the multi_report_config.txt file can be set to "false" to have the script not wait and just use the previous
# ---- results. By default the script will wait.
# ---- Now for a question to hose who are reading this... How would yo feel about checking the last test time for each NVMe and if it
# ---- the last test time was less than 18 hours old, then skip the test. Of course it will end up being a variable that the user could
# ---- change the time value. I just thought of this because when I test, I end up running a lot of NVMe self-tests.
# ---- Send me [email protected] an email or just message me on the forum if you have an option.
#
# - Changed using smartmontool if v7.4 is installed to "enable" as TrueNAS (no version) supports scheduled NVMe self-testing.
# V3.0.5 (25 May 2024)
#
# -Fix for Zpool Status error messages.
# -Added SMR drive background in yellow.
# V3.0.4 (20 May 2024)
#
# -Fix for abnormally high HE levels, if RAW Value is over 100, utilize Normalized Values.
# -Fix for 'cache' not being displayed in Text Section.
# -Fix for Spencer integration for Dragonfish (24.04.0).
# -Added '-disable_smr' and '-enable_smr' switches to modify the config file.
# -Added '-check_smr' switch for a One-Time Run to check SMR drives if normal SMR Checking has been disabled.
# -Fix for Pool Names which contain space characters.
# V3.0.3 (13 May 2024)
#
# -Fix for downloading SMR script.
# V3.0.2 (11 May 2024)
#
# -Fix for nvmecontrol for TrueNAS 13.3
# -Added sgdisk and gdisk to validate partitions. (Note: For CORE, will copy the files from GitHub)
# -Fixed NVMe simulation
# -Fixed Automatic Update so it runs the script immediately after the update.
# -Added Total Data Read/Total Data Written to Zpool Stats (supports up to 9.2 YB values)
# -Added "Total Data Written" per drive for "Last 30 Days" or "Current Month Actual" (Past 30 days is default).
# -Added SMR Drive Detection, which can be disabled.
# -Fixed MultiZone reporting Critical Error vice Warning Error.
# -Fixed to Ignore MultiZone errors for SSDs.
# V3.0.1 (08 April 2024)
#
# -Fixed NVMe Advanced Configuration question for NVMe Long Self-test.
# -Fixed Temperature reporting for 'HPE' SSD.
# -Added SCSI drive reporting when using SCSI to ATA Translation.
# -Added SCSI "Accurate" Power On Hours for all SCSI drives (I hope) - Disabled by default, and new '-scsismart' switch.
# -Added SCSI "Use Last Test Hours" for Power On Hours - Option if the previous option is not agreeable.
# -Added LastTestType (hours) for drives which stop reporting Self-tests at 65535 hours.
# -Updated User Guide to support new features.
# V3.0 (30 March 2024)
#
# Notable issues:
# NVMe - Last SMART Short and Long tests not displaying in text area. It will be solved when Smartmontools 7.4 is installed.
# -- This is not an issue in SCALE 23.10.2 as it has Smartmontools 7.4.
#
# - Fixed checking NVMe drives for if they support Self-tests.
# - Added NVME Short and Long Self-test for smartctl 7.3 and below. Monday through Saturday a Short Test, Sunday a Long Test.
# --- a Long Test, you may disable either or both options. Once TrueNAS can run NVMe SMART Tests expect this option to go away.
# - Updated to list Drive Idents for NVMe in the Text section.
# - Added NVME Ignore "Invalid Field in Command", disabled by default.
# - Added Wait for SMART Short/Long Self-test to complete before sending the report.
# - Added SMART Self-test Failure Recognition for NVMe.
# - Updated CORE ability to capture NVMe Last Test Age.
# - Updated NVMe routines to ignore real data gathering while in test mode.
# - Enhanced SCSI/SAS drive recognition and Power_On_Hours collection.
# - Fixed Zpool Reporting of 'Resilvering xx days' incorrectly reporting in SCALE.
# - Updated 7zip to only being installed if email is encrypted (See line 5 of this script).
# - Updated script for SCALE Dragonfish for installing 7zip if required.
# - Updated Configuration Questions to make configuration a little easier.
# - Removed 'Mouseover' option and hardcoded it.
# - Corrected 'Pool_Capacity_Type' variable missing in config file.
# - Added checking for all software commands to respond (thanks dak180 for the idea).
# - Added custom wear level alarm value 'i' to the group 'n' 'r' 'd'. 'i' = Ignore. This makes wearLevel="", non-exist.
# - Added Email Report ONLY on Alert (any Error Message).
# - Updated to send attachments when Email_On_Alarm_Only="true" and Email_On_Alarm_Only_And_Attachments="true".
# - Changed Non-Recognized drive power_on_hours from Warning to Caution.
# - Adjusted script for multiple LBA reporting on Yucun SSDs.
# - Updated script to work in a directory with a 'space character' in the path.
# - Removed variables (IncludedSSD and IncludeNVM).
#
###### EMAIL SECTION ######
###### Email Address(s)
Email="[email protected]" # Normal email address to send report.
From="[email protected]" # From address (default works for many)
###### Alert Email Configuration - For Temperature and Critical Error monitoring when you suspect a problem.
### You must use the '-m' switch
AlertEmail="[email protected]" # Alert email address used with the '-m' switch.
AlertOnWarningTemp="true" # Send alert on Warning Temp. Default = "true"
AlertOnCriticalError="true" # Send alert on Critical Error. Default = "true"
###### EMAIL ON ALARM ONLY ######
Email_On_Alarm_Only="false" # When true, an email will only be sent if an alarm condition exists. Default = 'false'
Email_On_Alarm_Only_And_Attachments="true" # When true, email attachments will be sent even when no alarm condition exists. Default = 'true'
###### HDD/SSD/NVMe SMART Testing ######
### SMART Testing - SMART Testing is no longer an intergral part of Multi-Report and you must use an add_on script to perform the testing.
External_SMART_Testing="true" # When set to "true" it will check if 'drive_selftest.sh' is present and run it.
External_Script_Name="$SCRIPT_DIR/drive_selftest.sh" # Default setting is "$SCRIPT_DIR/drive_selftest.sh"
###### IGNORE LOCK ######
# LOCATED ONLY IN THE SCRIPT, NOT THE CONFIG FILE BECAUSE THIS VALUE IS REQUIRED BEFORE READING THE EXTERNAL CONFIG FILE.
Ignore_Lock="disable" # Ignore_Lock when set to "enable" will ignore checking for multiple instances of multi_report.sh running. Default = "disable"
###### SCRIPT UPDATE ######
###### Script Update
### Ensure you understand these options. The Default values will only check for updates and not automatically update.
Check_For_Updates="true" # Will check GitHub for updates and include message in next email. Default = "true"
Automatic_MR_Update="false" # WARNING !!! This option will automatically update the Multi-Report script if a newer version exists on GitHub with no user interaction. Default = "false"
Automatic_Selftest_Update="false" # WARNING !!! This option will automatically update the Drive_Selftest script if a newer version exists on GitHub with no user interaction. Default = "false"
Automatic_Sendemail_Update="false" # WARNING !!! This option will automatically update the Sendermail.py script if a newer version exists on GitHub with no user interaction. Default = "false"
###### SCRIPT ADD-ONS ######
SMR_Enable="true" # Will enable SMR operations if set to "true". Default = "true"
SMR_Update="true" # Will automatically download Basil Hendroff's smr-check.sh file from Github if the file does not exist. Default = "true"
SMR_Ignore_Alarm="false" # When "true" will not generate an alarm condition, however the Drive ID will still change the background color. Default = "false"
SMR_New_Drive_Det_Count=14 # The SMR script will check the statistical_data_file for how many times the drive serial number has been listed. Default = 14
# - If it is less than or equal to this value, then run the SMR script. 0=Disable (Run SMR Check every time).
### PARTITION CHECK AND BACKUP
Partition_Check="false" # Run sgdisk on each drive. Default = "false", this will install gdisk/sgdisk on TrueNAS CORE if not present.
# -- It is "false" because you should choose to control what is installed or not.
Partition_Backup="true" # Set to "true" to save each partition table with the TrueNAS configuration backup.
# NOTE: You need sgdisk installed, run the Partition Check once to install on CORE.
### Spencer Integration
# Warning Levels are: None, Warning, Critical -- This only affects the Email Subject Line, if any errors are present, an attachment will occur.
spencer_new_warning_level="Warning" # What to do if a "new" error occurs. Default = "Warning"
spencer_existing_warning_level="None" # What to do for an existing error. Default = "None"
spencer_enable="true" # To call the Spencer.py script if "true" or "false" to not run the Spencer.py script. Default = "true"
spencer_script_name="$SCRIPT_DIR/spencer.py" # The default is "spencer.py" located in the default script directory.
###### GENERAL THRESHOLDS ######
### Zpool Status Summary Table Settings
PoolUsedWarn=80 # Pool used percentage for CRITICAL color to be used. Default = 80
ScrubAgeWarn=37 # Maximum age (in days) of last pool scrub before CRITICAL color will be used. Default = 37.
ZpoolFragWarn=80 # Percent of fragmentation before a Warning message occurs.
### Temperature Settings
HDDtempWarn=45 # HDD Drive Warning Temp (in C) when a WARNING message will be used. Default = 45
HDDtempCrit=50 # HDD Drive Critical Temp (in C) when a CRITICAL message will be used. Default = 50
SSDtempWarn=50 # SSD Drive Warning Temp (in C) when a WARNING message will be used. Default = 50
SSDtempCrit=60 # SSD Drive Critical Temp (in C) when a CRITICAL message will be used. Default = 60
NVMtempWarn=55 # NVM Drive Warning Temp (in C) when a WARNING message will be used. Default = 55
NVMtempCrit=65 # NVM Drive Critical Temp (in C) when a CRITICAL message will be used. Default = 65
### Current Power Cycle Maximum Temperature Override
HDD_Cur_Pwr_Max_Temp_Ovrd="true" # HDD Max Drive Temp Override. This value when "true" will NOT alarm on any Current Power Cycle Max Temperature Limit.
SSD_Cur_Pwr_Max_Temp_Ovrd="true" # SSD Max Drive Temp Override. This value when "true" will NOT alarm on any Current Power Cycle Max Temperature Limit.
NVM_Cur_Pwr_Max_Temp_Ovrd="true" # NVM Max Drive Temp Override. This value when "true" will NOT alarm on any Current Power Cycle Max Temperature Limit.
### Media Alarms
SectorsWarn=0 # Number of sectors per drive when a WARNING message will be used, this value should be less than SectorsCrit.
SectorsCrit=9 # Number of sectors per drive when a CRITICAL message will be used.
ReAllocWarn=0 # Number of Reallocated sector events allowed.
MultiZoneWarn=0 # Number of MultiZone Errors to allow when a Warning message will be used. Default is 0.
MultiZoneCrit=5 # Number of MultiZone Errors to allow when a Warning message will be used. Default is 5.
DeviceRedFlag="true" # Set to "true" to have the Device Column indicate RED for ANY alarm condition. Default is true.
HeliumAlarm="true" # Set to "true" to set for a critical alarm any He value below "HeliumMin" value. Default is true.
HeliumMin=100 # Set to 100 for a zero leak helium result. An alert will occur below this value.
RawReadWarn=5 # Number of read errors to allow when a WARNING message will be used, this value should be less than RawReadCrit.
RawReadCrit=100 # Number of read errors to allow when a CRITICAL message will be used.
SeekErrorsWarn=5 # Number of seek errors to allow when a WARNING message will be used, this value should be less than SeekErrorsCrit.
SeekErrorsCrit=100 # Number of seek errors to allow when a CRITICAL message will be used.
NVM_Media_Errors=1 # Number of Media Errors to alarm with a CRITICAL message.
WearLevelCrit=9 # Wear Level Alarm Setpoint when a WARNING message. 9% is the default.
TestWarnAge=2 # Maximum age (in days) of last SMART test before CRITICAL color/message will be used.
### NVMe Low Power / Invalid Errors
NVM_Low_Power="true" # Set the NVMe power level to the minimum setting. This does not mean the NVMe will remain at this power level. Only works in CORE.
NVMe_Ignore_Invalid_Errors="disable" # Set to 'enable' to ignore 'Invalid Field in Command' messages. Google this message to see if you are comfortable ignoring it.
### Time-Limited Error Recovery (TLER)
SCT_Enable="false" # Set to "true" to send a command to enable SCT on your drives for user defined timeout.
SCT_Warning_Level="TLER_No_Msg" # Set to "all" will generate a Warning Message for all devices not reporting SCT enabled. "TLER" reports only drive which support TLER.
# "TLER_No_Msg" will only report for TLER drives and not report a Warning Message if the drive can set TLER on.
SCT_Read_Timeout=70 # Set to the read threshold. Default = 70 = 7.0 seconds.
SCT_Write_Timeout=70 # Set to the write threshold. Default = 70 = 7.0 seconds.
##### SCSI Specific Settings ######
Run_SMART_No_power_on_time="false" # Some SCSI drives do not report power_on_time, yet they report SMART Self-test times. This option will force
# a SMART Short Self-test, wait 2 minutes for the test to complete, and report the correct power_on_time.
# This is the same as using the '-scsismart' switch at the CLI.
###### General Settings ######
### Output Formats
PowerTimeFormat="h" # Format for power-on hours string, valid options are "ymdh", "ymd", "ym", "y", or "h" (year month day hour).
TempDisplay="*C" # The format you desire the temperature to be displayed. Common formats are: "*C", "^C", or "^c". Choose your own.
Non_Exist_Value="---" # How do you desire non-existent data to be displayed. The Default is "---", popular options are "N/A" or " ".
Pool_Capacity_Type="zfs" # Select "zfs" or "zpool" for Zpool Status Report - Pool Size and Free Space capacities. "zfs" is default.
Last_Test_Type_poh="true" # Include the Last Test Power On Hours.
lastTestTypeHoursIdent="hrs" # Test to follow power on hours numbers. Default = "hrs".
### Ignore or Activate Alarms
IgnoreUDMA="false" # Set to "true" to ignore all UltraDMA CRC Errors for the summary alarm (Email Header) only, errors will appear in the graphical chart. Default is "false".
IgnoreSeekError="true" # Set to "true" to ignore all Seek Error Rate/Health errors. Default is true.
IgnoreReadError="true" # Set to "true" to ignore all Seek Error Rate/Health errors. Default is true.
IgnoreMultiZone="false" # Set to "true" to ignore all MultiZone Errors. Default is false.
DisableWarranty="true" # Set to "true to disable Email Subject line alerts for any expired warranty alert. The Email body will still report the alert. Default is "true".
### Enable-Disable Text Portion
Enable_Text_Section="true" # This will display the Text Section below the CHART when "true". Default="true"
### Disable or Activate Input/Output File Settings
ReportNonSMART="true" # Will force even non-SMART devices to be reported, "true" = normal operation to report non-SMART devices.
DisableRAWdata="false" # Set to "true" to remove the smartctl -a data and non-smart data appended to the normal report. Default is false.
ATA_Auto_Enable="false" # Set to "true" to automatically update Log Error count to only display a log error when a new one occurs.
### Text Output Selection
Enable_Messages="true" # This will enable the Warning/Caution type messages. Default="true".
Enable_Zpool_Messages="true" # This will list all 'zpool -v status' and identify drives by gptid to drive ident. Default="true".
Enable_SMART_Messages="true" # This will output SMART data if available. Default="true".
### Total Data Written - 30 Day or Current Month
Total_Data_Written_Month="30Days" # Options are: "month" for Current Month, or "30Days" for the rolling previous 30 days.
###### Statistical Data File
statistical_data_file="$SCRIPT_DIR/statisticalsmartdata.csv" # Default location is where the script is located.
SDF_DataRecordEnable="true" # Set to "true" will save all drive data into a CSV file defined by "statistical_data_file" below.
SDF_DataEmail="true" # Set to "true" to have an attachment of the file emailed to you. Default is true.
SDF_DataPurgeDays=730 # Set to the number of day you wish to keep in the data. Older data will be purged. Default is 730 days (2 years). 0=Disable.
SDF_DataEmailDay="Mon" # Set to the day of the week the statistical report is emailed. (All, Mon, Tue, Wed, Thu, Fri, Sat, Sun, Month)
###### TrueNAS config backup settings
TrueNASConfigEmailEnable="true" # Set to "true" to save config backup (which renders next two options operational); "false" to keep disable config backups.
TrueNASConfigEmailDay="Mon" # Set to the day of the week the config is emailed. (All, Mon, Tue, Wed, Thu, Fri, Sat, Sun, Month)
TrueNASConfigBackupSave="false" # Set to "false" to delete TrueNAS config backup after mail is sent; "true" to keep it in dir below.
TrueNASConfigBackupLocation="/tmp/" # Directory in which to store the backup FreeNAS config files.
###### Attach multi_report_config.txt to email ######
MRConfigEmailEnable="true" # Set to "true" to enable periodic email (which renders next two options operational).
MRChangedEmailSend="true" # If "true" will attach the updated/changed file to the email.
MRConfigEmailDay="Mon" # Set to the day of the week the multi_report_config.txt is emailed. (All, Mon, Tue, Wed, Thu, Fri, Sat, Sun, Month, Never)
###### REPORT CHART CONFIGURATION
###### CUSTOM SUBJECT LINE
host=$(hostname -s)
# The host name will precede the subject line message.
Subject_Line_Critical="*CRITICAL ERROR* SMART Testing Results for ${host} *CRITICAL ERROR*"
Subject_Line_Warning="*WARNING* SMART Testing Results for ${host} *WARNING*"
Subject_Line_Normal="SMART Testing Results for ${host} - All is Good"
###### REPORT HEADER TITLE
HDDreportTitle="Spinning Rust Summary Report" # This is the title of the HDD report, change as you desire.
SSDreportTitle="SSD Summary Report" # This is the title of the SSD report, change as you desire.
NVMreportTitle="NVMe Summary Report" # This is the title of the NVMe report, change as you desire.
###### CUSTOM REPORT CONFIGURATION
### By default most items are selected. Change the item to false to have it not displayed in the graph, true to have it displayed.
### NOTE: Alarm setpoints are not affected by these settings, this is only what columns of data are to be displayed on the graph.
### I would recommend that you remove columns of data that you don't really care about to make the graph less busy.
###### For Zpool Status Summary
Zpool_Pool_Name_Title="Pool Name"
Zpool_Status_Title="Status"
Zpool_Pool_Size_Title="Pool Size"
Zpool_Free_Space_Title="Free Space"
Zpool_Used_Space_Title="Used Space"
Zfs_Pool_Size_Title="^Pool Size"
Zfs_Free_Space_Title="^Free Space"
Zfs_Used_Space_Title="^Used Space"
Zpool_Frag_Title="Frag"
Zpool_Read_Errors_Title="Read Errors"
Zpool_Write_Errors_Title="Write Errors"
Zpool_Checksum_Errors_Title="Cksum Errors"
Zpool_Scrub_Repaired_Title="Scrub Repaired Bytes"
Zpool_Scrub_Errors_Title="Scrub Errors"
Zpool_Scrub_Age_Title="Last Scrub Age"
Zpool_Scrub_Duration_Title="Last Scrub Duration"
Zpool_Total_Data_Written_Title="Total Data Read /<br> Total Data Written"
###### For Hard Drive Section
HDD_Device_ID="true"
HDD_Device_ID_Title="Device ID"
HDD_Serial_Number="true"
HDD_Serial_Number_Title="Serial Number"
HDD_Model_Number="true"
HDD_Model_Number_Title="Model Number"
HDD_Capacity="true"
HDD_Capacity_Title="HDD Capacity"
HDD_Rotational_Rate="true"
HDD_Rotational_Rate_Title="RPM"
HDD_SMART_Status="true"
HDD_SMART_Status_Title="SMART Status"
HDD_Warranty_Title="Warr- anty"
HDD_Warranty="true"
HDD_Raw_Read_Error_Rate="true"
HDD_Raw_Read_Error_Rate_Title="Raw Error Rate"
HDD_Drive_Temp="true"
HDD_Drive_Temp_Title="Curr Temp"
HDD_Drive_Temp_Min="true"
HDD_Drive_Temp_Min_Title="Temp Min"
HDD_Drive_Temp_Max="true"
HDD_Drive_Temp_Max_Title="Temp Max"
HDD_Power_On_Hours="true"
HDD_Power_On_Hours_Title="Power On Time"
HDD_Start_Stop_Count="true"
HDD_Start_Stop_Count_Title="Start Stop Count"
HDD_Load_Cycle="true"
HDD_Load_Cycle_Title="Load Cycle Count"
HDD_Spin_Retry="true"
HDD_Spin_Retry_Title="Spin Retry Count"
HDD_Reallocated_Sectors="true"
HDD_Reallocated_Sectors_Title="Re-alloc Sects"
HDD_Reallocated_Events="true"
HDD_Reallocated_Events_Title="Re-alloc Evnt"
HDD_Pending_Sectors="true"
HDD_Pending_Sectors_Title="Curr Pend Sects"
HDD_Offline_Uncorrectable="true"
HDD_Offline_Uncorrectable_Title="Offl Unc Sects"
HDD_UDMA_CRC_Errors_List="true"
HDD_UDMA_CRC_Errors_List_Title="UDMA CRC Error"
HDD_Seek_Error_Rate="true"
HDD_Seek_Error_Rate_Title="Seek Error Rate"
HDD_MultiZone_Errors="true"
HDD_MultiZone_Errors_Title="Multi Zone Error"
HDD_Helium_Level="true"
HDD_Helium_Level_Title="He Level"
HDD_Last_Test_Age="true"
HDD_Last_Test_Age_Title="Last Test Age"
HDD_Last_Test_Type="true"
HDD_Last_Test_Type_Title="Last Test Type (time conducted)"
HDD_Total_Data_Written="true"
HDD_Total_Data_Written_Title="Total Data Read<br>/ Written"
HDD_Total_Data_Written_Month="true"
HDD_Total_Data_Written_Month_Title="Total Data Written 30 Days"
###### For Solid State Drive Section
SSD_Device_ID="true"
SSD_Device_ID_Title="Device ID"
SSD_Serial_Number="true"
SSD_Serial_Number_Title="Serial Number"
SSD_Model_Number="true"
SSD_Model_Number_Title="Model Number"
SSD_Capacity="true"
SSD_Capacity_Title="SSD Capacity"
SSD_SMART_Status="true"
SSD_SMART_Status_Title="SMART Status"
SSD_Warranty_Title="Warr- anty"
SSD_Warranty="true"
SSD_Drive_Temp="true"
SSD_Drive_Temp_Title="Curr Temp"
SSD_Drive_Temp_Min="true"
SSD_Drive_Temp_Min_Title="Temp Min"
SSD_Drive_Temp_Max="true"
SSD_Drive_Temp_Max_Title="Temp Max"
SSD_Power_On_Hours="true"
SSD_Power_On_Hours_Title="Power On Time"
SSD_Wear_Level="true"
SSD_Wear_Level_Title="Wear Level"
SSD_Reallocated_Sectors="true"
SSD_Reallocated_Sectors_Title="Re-alloc Sects"
SSD_Reallocated_Events="true"
SSD_Reallocated_Events_Title="Re-alloc Evnt"
SSD_Pending_Sectors="true"
SSD_Pending_Sectors_Title="Curr Pend Sects"
SSD_Offline_Uncorrectable="true"
SSD_Offline_Uncorrectable_Title="Offl Unc Sects"
SSD_UDMA_CRC_Errors_List="true"
SSD_UDMA_CRC_Errors_List_Title="UDMA CRC Error"
SSD_Last_Test_Age="true"
SSD_Last_Test_Age_Title="Last Test Age"
SSD_Last_Test_Type="true"
SSD_Last_Test_Type_Title="Last Test Type (time conducted)"
SSD_Total_Data_Written="true"
SSD_Total_Data_Written_Title="Total Data Read<br>/ Written"
SSD_Total_Data_Written_Month="true"
SSD_Total_Data_Written_Month_Title="Total Data Written 30 Days"
###### For NVMe Drive Section
NVM_Device_ID="true"
NVM_Device_ID_Title="Device ID"
NVM_Serial_Number="true"
NVM_Serial_Number_Title="Serial Number"
NVM_Model_Number="true"
NVM_Model_Number_Title="Model Number"
NVM_Capacity="true"
NVM_Capacity_Title="NVMe Capacity"
NVM_SMART_Status="true"
NVM_SMART_Status_Title="SMART Status"
NVM_Warranty_Title="Warr- anty"
NVM_Warranty="true"
NVM_Critical_Warning="true"
NVM_Critical_Warning_Title="Critical Warning"
NVM_Drive_Temp="true"
NVM_Drive_Temp_Title="Curr Temp"
NVM_Drive_Temp_Min="false" # Not usable on NVMe drive yet
NVM_Drive_Temp_Min_Title="Temp Min"
NVM_Drive_Temp_Max="false" # Not usable on NVMe drive yet
NVM_Drive_Temp_Max_Title="Temp Max"
NVM_Power_Level="true"
NVM_Power_Level_Title="Power State"
NVM_Power_On_Hours="true"
NVM_Power_On_Hours_Title="Power On Time"
NVM_Wear_Level="true"
NVM_Wear_Level_Title="Wear Level"
NVM_Media_Error="true"
NVM_Media_Error_Title="Media Errors"
NVM_Last_Test_Age="true"
NVM_Last_Test_Age_Title="Last Test Age"
NVM_Last_Test_Type="true"
NVM_Last_Test_Type_Title="Last Test Type (time conducted)"
NVM_Total_Data_Written="true"
NVM_Total_Data_Written_Title="Total Data Read<br>/ Written"
NVM_Total_Data_Written_Month="true"
NVM_Total_Data_Written_Month_Title="Total Data Written 30 Days"
###### Drive Ignore List
### What does it do:
### Use this to list any drives to ignore and remove from the report. This is
### very useful for ignoring USB Flash Drives or other drives for which good
### data is not able to be collected (non-standard).
###
### How to use it:
### We are using a comma delimited file to identify the drive serial numbers.
### You MUST use the exact and full serial number smartctl reports, if there
### is no identical match then it will not ignore the drive.
###
### Format: Ignore_Drives_List="serial_number,serial_number,serial_number"
### Example: Ignore_Drives_List="VMWare,1JUMLBD,21HNSAFC21410E"
Ignore_Drives_List=""
###### Drive UDMA CRC Error Count List, MultiZone List Errors List,
###### Reallocated Sectors Exceptions, Reallocated Sectors Events Exceptions
###
### What does it do:
### If you have a drive which has one of the above errors not a 0 (zero) value,
### this setting will offset the value back to zero for the considerations of
### monitoring future increases of this specific error. This will subtract
### the value by a correction value in order to 0 (zero) the value and
### highlight it in yellow to denote it was overridden. The Warning Title
### will not be flagged if this is zero'd out in this manner.
###
### How to use it:
### List each drive by serial number and include the current error count value.
### The format is very specific and will not work if you wing it, use the Example.
###
### Format: CRC_Errors_List="serial_number:current_udma_error_count,serial_number:current_udma_error_count"
###
### Example: CRC_Errors_List="WD-WMC4N2578099:1,S2X1J90CA48799:2,P02618119268:1"
ATA_Errors_List=""
CRC_Errors_List=""
MultiZone_List=""
ReAllocated_Sector_List=""
ReAllocated_Sector_Events_List=""
Media_Errors_List=""
###### Custom Drive Configuration
### Used to define specific alarm values for specific drives by serial number.
### This should only be used for drives where the default alarm settings
### are not proper or you need to reverse some values where they may be listed
### opposite, for example WearLevel may be listed as 0% vice 100%.
### Up to 24 unique drive values may be stored (tested).
###
### Use -config to set these values.
###
### THE BREAKDOWN OF THIS LINE FORMAT (entire list broken to two lines for viewing, separated using a comma)
### serial:tempwarn:tempcrit:sectorswarn:sectorscrit:reallocwarn:multizonewarn:multizonecrit:rawreadwarn:
### rawreadcrit:seekerrorswarn:seekerrorscrit:testage:testAgeOvrd:heliummin:wearleveladj
### (testAgeOvrd '0'=Default, '1'=Ignore, wearleveladj 'd'=Default 'r'=Reverse value 'n'=Normalized 'i'=Ignore)
Custom_Drives_List=""
###### Warranty Expiration Date
### What does it do:
### This section is used to add warranty expiration dates for designated drives
### and to create an alert when they expire. This is good to give you a
### heads-up on when you might need to start looking for a replacement drive.
###
### How to use it:
### Format: Drive_Warranty_List="serial_number:YYYY-MM-DD,serial_number:YYYY-MM-DD"
### Example: Drive_Warranty_List="K1JUMLBD:2020-09-30,K1JRSWLD:2020-09-30,K1JUMW4D:2020-09-30,K1GVD84B:2020-10-12"
Drive_Warranty_List=""
###### Expired Drive Warranty Colors
expiredWarrantyBoxColor="#000000" # "#000000" = normal box perimeter color.
WarrantyBackgndColor="#f1ffad" # Hex code or "none" = normal background.
###### Global table of colors
### The colors selected you can change but you will need to look up the proper
### HEX code for a color.
okColor="#b5fcb9" # Hex code for color to use in SMART Status column if drives pass (default is darker light green, #b5fcb9).
warnColor="#F38B16" # Hex code for WARN color (default is orange, #F38B16).
critColor="#f44336" # Hex code for CRITICAL color (default is red, #f44336).
altColor="#f4f4f4" # Table background alternates row colors between white and this color (default is light gray, #f4f4f4).
whtColor="#ffffff" # Hex for White background.
ovrdColor="#ffffe4" # Hex code for Override Yellow.
blueColor="#87ceeb" # Hex code for Sky Blue, used for the SCRUB/SMART Test In Progress/background.
yellowColor="#f1ffad" # Hex code for pale green-yellow.
pohColor="#ffffcc" # Hex code for pale yellow.
###### THIS SECTION FOR DRIVE_SELFTEST SCRIPT ONLY
###### SCRIPT UPDATES ---- NOT OPERATIONAL YET
Allow_Drive_Selftest_Script_Update="true" # When set to "true" then the script will automatically update itself if a new update is present.
###### HDD/SSD/NVMe SMART Testing
### SHORT SETTINGS
Short_Test_Mode=3 # 1 = Use Short_Drives_to_Test_Per_Day value, 2 = All Drives Tested (Ignores other options), 3 = No Drives Tested.
Short_Time_Delay_Between_Drives=1 # Tests will have a XX second delay between the drives starting testing. If drives are always spinning, this can be "0".
Short_SMART_Testing_Order="DriveID" # Test order is for Test Mode 1 ONLY, select "Serial" or "DriveID" for sort order. Default = 'Serial'
Short_Drives_to_Test_Per_Day=1 # For Test_Mode 1) How many drives to run each day minimum?
Short_Drives_Test_Period="Week" # "Week" (7 days) or "Month" (28 days)
Short_Drives_Tested_Days_of_the_Week="1,2,3,4,5,6,7" # Days of the week to run, 1=Mon, 2=Tue, 3=Wed, 4=Thu, 5=Fri, 6=Sat, 7=Sun. # This takes over for number of days variable.
Short_Drives_Test_Delay=130 # How long to delay when running Short tests, before exiting to controlling procedure. Default is 130 second should allow.
# Short tests to complete before continuing. If using without Mulit-Report, set this value to 1.
### LONG SETTINGS
Long_Test_Mode=3 # 1 = Use Long_Drives_to_Test_Per_Day value, 2 = All Drives Tested (Ignores other options), 3 = No Drives Tested.
Long_Time_Delay_Between_Drives=1 # Tests will have a XX second delay between the drives starting the next test.
Long_SMART_Testing_Order="Serial" # Test order is either "Serial" or "DriveID". Default = 'Serial'
Long_Drives_to_Test_Per_Day=1 # For Test_Mode 1) How many drives to run each day minimum?
Long_Drives_Test_Period="Week" # "Week" (7 days) or "Month" (28 days)
Long_Drives_Tested_Days_of_the_Week="1,2,3,4,5,6,7" # Days of the week to run, 1=Mon, 2=Tue, 3=Wed, 4=Thu, 5=Fri, 6=Sat, 7=Sun. # This takes over for number of days variable.
### REPORT
Drive_List_Length=10 # This is how many drive IDs to list per line. Default is 10.
Enable_Logging="true" # This will create a text file named "drive_test_xx.txt". Run -clearlog
LOG_DIR=$SCRIPT_DIR"/DS_Logs" # The default log directory is the script directory.
### EXTERNAL CONFIGURATION FILE
Use_multi_report_config_values="true" # A "true" value here will use the $Config_File_Name file values to override the values defined above.
# This allows the values to be retained between versions. A "false" will utilize the values above.
##########################
##########################
### ###
### STOP EDITING THE ###
### SCRIPT HERE ###
### ###
##########################
##########################
###### Auto-generated Parameters
softver=$(uname -s)
#host=$(hostname -s)
truenas_ver=$(cat /etc/version)
testdata_path="data"
Dump_Loop="0"
re='^[0-9]+$'
runfilename="multi_report.sh"
### temp files have been converted to variable stored, not stored in /tmp/ as a file. ###
tempfilepath=$(( 10 + $RANDOM % 1000 ))
logfile="/tmp/${tempfilepath}smart_report_body.tmp"
logfile_header="/tmp/${tempfilepath}smart_report_header.tmp"
logfile_temp="/tmp/${tempfilepath}smart_report_temp.tmp"
boundary="gc0p4Jq0M2Yt08jU534c0p"
CurrentFilename="multi_report_v3.12_2025_01_18.txt"
valid_config_version_date="2025-01-17" # Configuration file valid date
progverdate="$(echo $CurrentFilename | cut -d '_' -f4,5,6 | cut -d '.' -f1 | sed -r 's/[_]+/-/g')"
progname="Multi-Report "$(echo $CurrentFilename | cut -d '_' -f3)" dtd:"
physmem=$(midclt call system.info | jq -Mre '.physmem')
if [[ $physmem -gt 1024 ]]; then physmem=$((( $physmem / 1024 ))); memunit="Ki"; fi
if [[ $physmem -gt 1024 ]]; then physmem=$((( $physmem / 1024 ))); memunit="Mi"; fi
if [[ $physmem -gt 1024 ]]; then physmem=$((( $physmem / 1024 ))); memunit="Gi"; fi
if [[ $physmem -gt 1024 ]]; then physmem=$((( $physmem / 1024 ))); memunit="Ti"; fi
if [[ $physmem -gt 1024 ]]; then physmem=$((( $physmem / 1024 ))); memunit="Pi"; fi
physmem=$physmem$memunit
uptime=$(midclt call system.info | jq -Mre '.uptime')
if [[ $softver != "Linux" ]]; then
top -d1 | head -n 7 > /tmp/memory_free.txt
if [[ "$(cat /etc/version | grep "FreeNAS")" ]]; then
programver=progname$progverdate" (FreeNAS "$(cat /etc/version | cut -d " " -f1 | sed 's/FreeNAS-//')")"
programver2="$(cat /etc/version | cut -d"-" -f1)"
programver3="$(cat /etc/version | cut -d " " -f1 | sed 's/FreeNAS-//')"
else
programver=$progname$progverdate" (TrueNAS Core "$(cat /etc/version | cut -d " " -f1 | sed 's/TrueNAS-//')")"
programver2="$(cat /etc/version | cut -d"-" -f1)_Core"
programver3="$(cat /etc/version | cut -d " " -f1 | sed 's/TrueNAS-//')"
fi
totalmem=$physmem
# usedmem=$(top -d1 | head -n 7 | tail -4 | grep "Mem:" | awk '{printf $7}')
freemem=$(top -d1 | head -n 7 | tail -4 | grep "Mem:" | awk '{printf $8}')"i"
swapused="Swap Total: "$(top -d1 | head -n 7 | tail -4 | grep "Swap:" | awk '{printf $2}')"i"
swapused=$swapused", Swap Free: "$(top -d1 | head -n 7 | tail -4 | grep "Swap:" | awk '{printf $4}')"i"
programver4="Total Memory: "$totalmem", Free Memory: "$freemem", "$swapused"<br>System Uptime: "$uptime
truenas_ver=$(cat /etc/version | cut -d"-" -f2 | cut -d"." -f1) # Provides a whole number like '13'
else
free > /tmp/memory_free.txt # Add this to the dump
totalmem=$(free -h | grep "Mem:" | awk '{printf $2}')
usedmem=$(free -h | grep "Mem:" | awk '{printf $3}')
freemem=$(free -h | grep "Mem:" | awk '{printf $4}')
swapused=$(free -h | grep "Swap:" | awk '{printf $3}')
programver=$progname$progverdate" (TrueNAS Scale "$(cat /etc/version)")"
programver2="TrueNAS_Scale_$(cat /etc/version | cut -d" " -f1)"
programver3="$(cat /etc/version)"
programver4="Total Memory: "$totalmem", Used Memory: "$usedmem", Free Memory: "$freemem", Swap Used: "$swapused"<br>System Uptime: "$uptime
truenas_ver=$(cat /etc/version | cut -d"-" -f1) # Provides the entire version number like '24.10.0.2'
fi
truenas_ver_major=$(echo $truenas_ver | cut -d"." -f1)
truenas_ver_minor=$(echo $truenas_ver | cut -d"." -f2)
truenas_ver_low=$(echo $truenas_ver | cut -d"." -f3)
truenas_ver_sub=$(echo $truenas_ver | cut -d"." -f4)
if [[ $truenas_ver > 24.10.0.2 ]]; then
echo "TrueNAS does not support sendmail function, using sendemail.py to send you your email."
truenas_sendmail_support="No"
else
truenas_sendmail_support="Yes"
fi
(echo " "
echo "Pysical Memory from API"
echo $physmem
echo " "
echo "Up Time"
echo $uptime
) >> /tmp/memory_free.txt
nvme_supports_selftest="" # To stop nvme check if self-test not supported.
drive_name=""
declare -a testfilenames
declare -a testfilenamesHDD
declare -a smartdrives
UpdateAvailable=""
UpdateDriveAvailable=""
UpdateSendemailAvailable=""
smr_already_tested=0
No_External_File="false"
attachment=()
# Declaring the global drive arrays
# HDD/SSD/NVMe Common
Drives_ID_Array="" # Drive Obtained Value
Drives_SN_Array="" # Drive Obtained Value
Drives_Subsystem_Array="" # API
Drives_Model_Array="" # Drive Obtained Value
Drives_Capacity_Array=0 # Drive Obtained Value
Drives_Rotation_Array=0 # Drive Obtained Value
Drives_SMART_Status_Array="" # Drive Obtained Value
Drives_Type_Array="" # API Obtained Value
Drives_ZFSGUID_Array="" # API Obtained Value
Drives_Size_Array=0 # Drive Obtained Value
Drives_Multipath_Array="" # Drive Obtained Value
Drives_Description_Array="" # API Obtained Value
Drives_BUS_Array="" # API Obtained Value
Drives_Pool_Array="" # API Obtained Value
Drives_Current_Temp_Array=0 # Drive Obtained Value
Drives_Min_Temp=0 # Drive Obtained Value
Drives_Max_Temp=0 # Drive Obtained Value
Drives_Power_On_Time_Array=0 # Drive Obtained Value
Drives_Start_Stop_Array=0 # Drive Obtained Value
Drives_Load_Cycle_Array=0 # Drive Obtained Value
Drives_Spin_Retry_Array=0 # Drive Obtained Value
Drives_Reallocated_Sectors_Array=0 # Drive Obtained Value
Drives_Reallocated_Events_Array=0 # Drive Obtained Value
Drives_Current_Pending_Sectors_Array=0 # Drive Obtained Value
Drives_Offline_Uncorrectable_Sectors_Array=0 # Drive Obtained Value
Drives_UDMA_CRC_Errors_Array=0 # Drive Obtained Value
Drives_Raw_Read_Error_Rate_Array=0 # Drive Obtained Value
Drives_Seek_Error_Rate_Array=0 # Drive Obtained Value
Drives_MultiZone_Error_Array=0 # Drive Obtained Value
Drives_He_Level_Array=0 # Drive Obtained Value
Drives_Last_Test_Hours_Array=0 # Drive Obtained Value
Drives_Last_Test_Type_Array="" # Drive Obtained Value
Drives_Last_Test_Age_Array=0 # Calculated Value
# SSD/NVMe Unique
Drives_Critical_Warning_Array="" # Drive Obtained Value
Drives_Wear_Level_Array=0 # Drive Obtained Value
Drives_Wear_Level_Thresh_Array=0 # Drive Obtained Value
Drives_Media_Errors_Array=0 # Drive Obtained Value
##########################
##########################
### ###
### PROGRAMMING / ###
### TROUBLESHOOTING ###
### HACKS ###
### ###
##########################
##########################
#Unique programming hacks to properly emulate other hardware that is not actually on the system.
VMWareNVME="off" # Set to "off" normally, "on" to assist in incorrect VMWare fake drives reporting.
Joes_System="false" # Custom settings for my system and to remove these from your system.
Sample_Test="false" # Setup static test values for testing.
Develop="false" # Set to 'true' for development output.
GitHubSimulate="false" # Use test section of GitHub.
Debug_Steps="false" # Lists each process when it runs.
##########################
##########################
### ###
### DEFINE FUNCTIONS ###
### ###
##########################
##########################
########## EXIT IF MULTIPLE INSTANCES ARE RUNNING ##########
# Let's stop a second instance from running.
mefull=`basename "$0"`
if [[ "$1" == "-ignore_lock" || "$2" == "-ignore_lock" || "$3" == "-ignore_lock" || "$4" == "-ignore_lock" || "$Ignore_Lock" == "enable" ]]; then
printf "Ignoring Multiple Instance Check\n" >&2
else
if ! mkdir /tmp/multi_report.lock; then
printf "Script is already Running... Exiting\n" >&2
printf "If this message is in error, remove '/tmp/multi_report.lock' directory or just reboot TrueNAS to recover.\n" >&2
exit 1
fi
fi
trap 'rm -rf /tmp/multi_report.lock' EXIT # Remove the lock directory on exit
#################### FUNCTIONS ####################
########## CREATE ATTACHMENT FILE ##########
# This file will convert and create any required attachments.
# Call with attachment_file_path and attachment_file_name
# Example: create_attachement_file /tmp/temp_body_report.txt report.txt
create_attachment_file () {
if [[ $Debug_Steps == "true" ]]; then echo "create_attachment"; fi
if test -e "/tmp/attachment.json"; then # If the file exists then we need to add on to the end, but need to delete the last ']'.
# LETS SEE IF THE LIST ALREADY CONTAINS THIS FILE
if grep -q $2 "/tmp/attachment.json"; then
return
fi
# I need to read the file back, remove the last 2 lines, add line ' },', then add new header.
updating_attachment=$(head -n -2 /tmp/attachment.json) # This removes the last lines.
(
echo "${updating_attachment}"
echo ' },'
) > /tmp/attachment.json
else
(
echo '['
) > /tmp/attachment.json
fi
encoded_content=$(base64 --wrap=0 $1)
# add header 'new' information
(
echo ' {'
echo ' "headers": ['
echo ' {'
echo ' "name": "Content-Transfer-Encoding",'
echo ' "value": "base64"'
echo ' },'
echo ' {'
echo ' "name": "Content-Type",'
echo ' "value": "application/octet-stream",'
echo ' "params": {'
echo ' "name": "'"$2"'"'
echo ' }'
echo ' }'
echo ' ],'
echo ' "content": "'"$encoded_content"'"'
echo ' }'
echo ']'
) >> /tmp/attachment.json
}
########## SPENCER INTEGRATION - CHECK MESSAGES FILE FOR ERROR MESSAGES ##########
# We will check the /var/log/messages file for any iscsi, cam, ctl, or cdb error messages
#
# Use the CSV that is used in the data recording to record each instance of an alarm.
# Search each instance for an exact match for repeat offenders.
# Remove entries if they no longer exits in the messages log file.
#
# Count up the duplicate errors and put them as "6x error message" for example.
# Create a Message file to add to the Warning list so we can punt this out.
#
spencer () {
if [[ $Debug_Steps == "true" ]]; then echo "spencer"; fi
# Check status of "/tmp/spencer_report.txt" file, then if we have errors process it.
spencer_error="false"
if [ -f "/tmp/spencer_report.txt" ]; then
if [ $(grep -wic "New Error Messages" "/tmp/spencer_report.txt") -gt 0 ]; then
# Error Levels are: None, Warning, Critical
if [[ $spencer_new_warning_level == "Warning" ]]; then
logfile_warning=$logfile_warning"$(printf "Spencer New Error Data - See Attachment")"
elif [[ $spencer_new_warning_level == "Critical" ]]; then
logfile_critical=$logfile_critical"$(printf "Spencer New Error Data - See Attachment")"
fi
spencer_error="true"
fi
if [ $(grep -wic "Previous" "/tmp/spencer_report.txt") -gt 0 ]; then
# Error Levels are: None, Warning, Critical
if [[ $spencer_existing_warning_level == "Warning" ]]; then
logfile_warning=$logfile_warning"$(printf "Spencer Existing Error Data - See Attachment")"
elif [[ $spencer_existing_warning_level == "Critical" ]]; then
logfile_critical=$logfile_critical"$(printf "Spencer Existing Error Data - See Attachment")"
fi
spencer_error="true"
fi
else
if ! test -e "$spencer_script_name"; then
spencer_error="notinstalled"
fi
fi
}
### Using Drive_id_data_temp=list of drive IDs, Return is both serial or ID
### This routine uses the TrueNAS API. Called: gets_drive_serial_numbers_api "serials"
get_drive_serial_numbers_api () {
if [[ $Debug_Steps == "true" ]]; then echo "get_drive_serial_numbers_api"; fi
api_x=0
api_drive_name=""
api_drive_serial_number=""
echo -n "Scanning Drives"
while [[ "$(midclt call disk.query | jq -r '.['$api_x'].name')" != "null" ]]; do
for api_temp_loop in $drive_id_data_temp; do
api_drive_name_test=$(midclt call disk.query | jq -r '.['$api_x'].name' | sed 's/nvd/nvme/g')
if [[ "$api_drive_name_test" == *"$api_temp_loop"* ]]; then
echo -n "." # $api_drive_name_test
if [[ "$api_drive_name_test" == *"nvme"* ]]; then
api_drive_name=$api_drive_name" "$(echo "nvme"$(echo $api_drive_name_test | sed -r 's#^nvme##' | cut -d 'n' -f 1)" ")