Skip to content

Commit

Permalink
keep genome.fa.masked
Browse files Browse the repository at this point in the history
  • Loading branch information
CSU-KangHu committed Aug 12, 2024
1 parent 18c4877 commit ed23ef8
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 31 deletions.
16 changes: 15 additions & 1 deletion .idea/deployment.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@
if not os.path.isabs(output_dir):
output_dir = os.path.abspath(output_dir)

if not os.path.exists(reference):
log.logger.error('\nCannot find input genome assembly: ' + str(reference))
parser.print_help()
exit(-1)

if threads is None:
threads = int(default_threads)
else:
Expand Down
52 changes: 22 additions & 30 deletions module/HiTE_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3683,38 +3683,30 @@ def get_logo_seq(ltr_copies):
# is_TE, info, cons_seq = judge_boundary_v9(cur_seq, align_file, debug, TE_type, plant, result_type)
# print(is_TE, info, cons_seq)

# work_dir = '/homeb/hukang/KmerRepFinder_test/library/HiTE_lib/mouse_bak'
# candidate_non_ltr_path = '/homeb/hukang/KmerRepFinder_test/library/curated_lib/Repbase_28.06/mouse/mouse_non_ltr.lib'
# names, contigs = read_fasta(candidate_non_ltr_path)
# non_ltr_contigs = {}
# for name in names:
# non_ltr_contigs[name.split('#')[0]] = contigs[name]
# non_ltr_path = work_dir + '/mouse_non_ltr.lib'
# store_fasta(non_ltr_contigs, non_ltr_path)
#
#
# confident_non_ltr_path = work_dir + '/test_non_ltr.fa'
# flanking_len = 50
# reference = '/homeb/hukang/KmerRepFinder_test/library/earlgrey_lib/mouse/mm39.fa'
# test_home = '/home/hukang/test/HiTE/module'
# tmp_output_dir = work_dir
# chrom_seg_length = 100000
# chunk_size = 400
work_dir = '/home/hukang/test1/HiTE/demo/David_suggestion'
candidate_tir_path = work_dir + '/mAntPal2.1.pri_TIR_373-_rep.fa'
confident_tir_path = work_dir + '/test_tir.fa'
flanking_len = 50
reference = work_dir + '/GCA_027563665.1_DD_ASM_mAP_20210503_genomic.fna'
test_home = '/home/hukang/test1/HiTE/module'
tmp_output_dir = work_dir + '/outdir'
chrom_seg_length = 100000
chunk_size = 400
# split_genome_command = 'cd ' + test_home + ' && python3 ' + test_home + '/split_genome_chunks.py -g ' \
# + reference + ' --tmp_output_dir ' + tmp_output_dir \
# + ' --chrom_seg_length ' + str(chrom_seg_length) + ' --chunk_size ' + str(chunk_size)
# #os.system(split_genome_command)
#
# TE_type = 'non_ltr'
# split_ref_dir = tmp_output_dir + '/ref_chr'
# threads = 40
# ref_index = 0
# subset_script_path = '/home/hukang/test/HiTE/tools/ready_for_MSA.sh'
# plant = 0
# debug = 1
# flank_region_align_v5(non_ltr_path, confident_non_ltr_path, flanking_len, reference, split_ref_dir,
# TE_type, work_dir, threads, ref_index, log, subset_script_path,
# plant, debug, 0, result_type='cons')
#os.system(split_genome_command)

TE_type = 'tir'
split_ref_dir = tmp_output_dir + '/ref_chr'
threads = 40
ref_index = 0
subset_script_path = '/home/hukang/test1/HiTE/tools/ready_for_MSA.sh'
plant = 0
debug = 1
flank_region_align_v5(candidate_tir_path, confident_tir_path, flanking_len, reference, split_ref_dir,
TE_type, work_dir, threads, ref_index, log, subset_script_path,
plant, debug, 0, result_type='cons')


# # 将candidate_non_ltr.fa进行分类,抽取出LINE/SINE标签
Expand Down Expand Up @@ -4126,7 +4118,7 @@ def get_logo_seq(ltr_copies):
#
# column_data.to_csv('/homeb/hukang/KmerRepFinder_test/library/nextflow_test2/rice/novel_tir/data.csv', index=False)
#
draw_dist('/homeb/hukang/KmerRepFinder_test/library/nextflow_test2/rice/novel_tir/data.csv')
# draw_dist('/homeb/hukang/KmerRepFinder_test/library/nextflow_test2/rice/novel_tir/data.csv')

# 获取新的TIR转座子,得到它们的多序列比对,蛋白质结构信息
# tmp_output_dir = '/homeb/hukang/KmerRepFinder_test/library/nextflow_test4/rice'
Expand Down
1 change: 1 addition & 0 deletions module/clean_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
keep_files_temp = []
keep_files = ['chr_name\.map',
'genome\.rename\.fa',
'genome\.fa\.masked',
'genome\.rename\.fa\.pass\.list',
'.*\.scn',
'genome\.rename\.fa\.LTRlib\.fa',
Expand Down

0 comments on commit ed23ef8

Please sign in to comment.