Skip to content

Commit 20fcd39

Browse files
committed
Added support for building a metrics report with Illumina data.
1 parent 4b68324 commit 20fcd39

2 files changed

Lines changed: 52 additions & 30 deletions

File tree

4Pipe4.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def StartUp():
105105
quit(basefile + " already exists. Please deal with it before \
106106
proceeding.")
107107
else:
108-
os.symlink(input_file, outfile + ".fastq")
108+
os.symlink(input_file, arg.outfile + ".fastq")
109109

110110

111111
if arg.configFile is not None:
@@ -314,13 +314,19 @@ def ORFliner(basefile):
314314
print("\nRunning the metrics calculator module...")
315315
seqclean_log_path = "%s/seqcl_%s.fasta.log" % (os.path.split(basefile)[0],
316316
miraproject)
317-
Metrics.Run_module(seqclean_log_path, basefile + '.fasta',
318-
basefile + '.clean.fasta', basefile + '.fasta.qual',
319-
basefile + '.clean.fasta.qual',
320-
basefile + '_assembly/' + miraproject + '_d_info/'
321-
+ miraproject + '_info_assembly.txt', basefile
322-
+ '.SNPs.fasta', basefile + '.BestORF.fasta',
323-
basefile + '.Metrics.html')
317+
if arg.datatype == "454":
318+
Metrics.Run_module(seqclean_log_path, basefile + '.fasta',
319+
basefile + '.clean.fasta', basefile + '.fasta.qual',
320+
basefile + '.clean.fasta.qual',
321+
basefile + '_assembly/' + miraproject + '_d_info/'
322+
+ miraproject + '_info_assembly.txt', basefile
323+
+ '.SNPs.fasta', basefile + '.BestORF.fasta',
324+
basefile + '.Metrics.html')
325+
else:
326+
Metrics.Run_as_solexa(basefile + '_assembly/' + miraproject + '_d_info/'
327+
+ miraproject + '_info_assembly.txt', basefile
328+
+ '.SNPs.fasta', basefile + '.BestORF.fasta',
329+
basefile + '.Metrics.html')
324330
# Finally we write down our report using the data gathered so far:
325331
print("\nRunning Reporter module...")
326332
Reporter.RunModule(basefile + '.BestORF.fasta', basefile + '.SNPs.fasta',

Metrics.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def Read_qual_metrics(qual_file):
4444
quals += lines
4545
qual.close()
4646
qual_avg = "%.2f" % (sum(quals)/len(quals))
47-
47+
4848
return(qual_avg)
4949

5050

@@ -159,26 +159,28 @@ def Metrics_writer(dataset_info, contig_info, snp_info, metrics_file):
159159
TABLE,THEAD,TBODY,TFOOT,TR,TH,TD,P { font-family:"Arial"; font-size:small }\
160160
\n -->\n </STYLE>\n </HEAD>\n<BODY>\n')
161161
metrics_file.write("<H1>4Pipe4 metrics report:</H1>\n")
162-
metrics_file.write("<H2>Dataset metrics:</H2>\n")
163-
metrics_file.write("<p>Average read length (before cleaning): "
164-
+ str(dataset_info[1][0]) + "</p>\n")
165-
metrics_file.write("<p>Maximum read length (before cleaning): "
166-
+ str(dataset_info[1][1]) + "</p>\n")
167-
metrics_file.write("<p>Median of read length (before cleaning): "
168-
+ str(dataset_info[1][2]) + "</p>\n")
169-
metrics_file.write("<p>Average base quality (before cleaning): "
170-
+ str(dataset_info[3]) + "</p>\n")
171-
metrics_file.write("<H3>SeqClean report:</H3>")
172-
for lines in dataset_info[0]:
173-
metrics_file.write("<p>" + lines + "</p>")
174-
metrics_file.write("<p>Average read length (after cleaning): "
175-
+ str(dataset_info[2][0]) + "</p>\n")
176-
metrics_file.write("<p>Maximum read length (after cleaning): "
177-
+ str(dataset_info[2][1]) + "</p>\n")
178-
metrics_file.write("<p>Median of read length (after cleaning): "
179-
+ str(dataset_info[2][2]) + "</p>\n")
180-
metrics_file.write("<p>Average base quality (after cleaning): "
181-
+ str(dataset_info[4]) + "</p>\n")
162+
# Write these metrics for 454 only.
163+
if dataset_info != "solxa":
164+
metrics_file.write("<H2>Dataset metrics:</H2>\n")
165+
metrics_file.write("<p>Average read length (before cleaning): "
166+
+ str(dataset_info[1][0]) + "</p>\n")
167+
metrics_file.write("<p>Maximum read length (before cleaning): "
168+
+ str(dataset_info[1][1]) + "</p>\n")
169+
metrics_file.write("<p>Median of read length (before cleaning): "
170+
+ str(dataset_info[1][2]) + "</p>\n")
171+
metrics_file.write("<p>Average base quality (before cleaning): "
172+
+ str(dataset_info[3]) + "</p>\n")
173+
metrics_file.write("<H3>SeqClean report:</H3>")
174+
for lines in dataset_info[0]:
175+
metrics_file.write("<p>" + lines + "</p>")
176+
metrics_file.write("<p>Average read length (after cleaning): "
177+
+ str(dataset_info[2][0]) + "</p>\n")
178+
metrics_file.write("<p>Maximum read length (after cleaning): "
179+
+ str(dataset_info[2][1]) + "</p>\n")
180+
metrics_file.write("<p>Median of read length (after cleaning): "
181+
+ str(dataset_info[2][2]) + "</p>\n")
182+
metrics_file.write("<p>Average base quality (after cleaning): "
183+
+ str(dataset_info[4]) + "</p>\n")
182184

183185
metrics_file.write("<H2>Contig metrics:</H2>\n")
184186
metrics_file.write("<p>Number of reads assembled: "
@@ -234,14 +236,28 @@ def Run_module(seqclean_log_file, original_fasta_file, clean_fasta_file,
234236
original_fasta_qual_file, clean_fasta_qual_file,
235237
info_assembly_file, snps_fasta_file, bestorf_fasta_file,
236238
metrics_file):
237-
'''Run the module'''
239+
"""
240+
Run the module
241+
"""
238242
dataset_info = Dataset_gather(seqclean_log_file, original_fasta_file,
239243
clean_fasta_file, original_fasta_qual_file,
240244
clean_fasta_qual_file)
241245
contig_info = Contig_gather(info_assembly_file)
242246
snp_info = SNP_gather(snps_fasta_file, bestorf_fasta_file)
243247
Metrics_writer(dataset_info, contig_info, snp_info, metrics_file)
244248

249+
250+
def Run_as_solexa(info_assembly_file, snps_fasta_file, bestorf_fasta_file,
251+
metrics_file):
252+
"""
253+
Run the module with solexa data.
254+
"""
255+
contig_info = Contig_gather(info_assembly_file)
256+
snp_info = SNP_gather(snps_fasta_file, bestorf_fasta_file)
257+
dataset_info = "solexa"
258+
Metrics_writer(dataset_info, contig_info, snp_info, metrics_file)
259+
260+
245261
if __name__ == "__main__":
246262
# Usage: python3 Metrics.py (view Run_module() for a list of arguments)
247263
from sys import argv

0 commit comments

Comments
 (0)