QUAST

Quality Assessment Tool for Genome Assemblies by Center for Algorithmic Biotechnology

Contigs are ordered from largest (contig #1) to smallest.

Contigs are broken into nonoverlapping 100 bp windows. Plot shows numbers of windows for each GC percentage.

{"minContig":500,"report":[["Statistics without reference",[{"values":[11],"quality":"Less is better","isMain":true,"metricName":"# contigs"},{"values":[17],"quality":"Less is better","isMain":false,"metricName":"# contigs (>= 0 bp)"},{"values":[8],"quality":"Less is better","isMain":false,"metricName":"# contigs (>= 1000 bp)"},{"values":[7],"quality":"Less is better","isMain":false,"metricName":"# contigs (>= 5000 bp)"},{"values":[6],"quality":"Less is better","isMain":false,"metricName":"# contigs (>= 10000 bp)"},{"values":[5],"quality":"Less is better","isMain":false,"metricName":"# contigs (>= 25000 bp)"},{"values":[2],"quality":"Less is better","isMain":false,"metricName":"# contigs (>= 50000 bp)"},{"values":[368542],"quality":"More is better","isMain":true,"metricName":"Largest contig"},{"values":[582257],"quality":"More is better","isMain":true,"metricName":"Total length"},{"values":[584267],"quality":"More is better","isMain":false,"metricName":"Total length (>= 0 bp)"},{"values":[580160],"quality":"More is better","isMain":true,"metricName":"Total length (>= 1000 bp)"},{"values":[577000],"quality":"More is better","isMain":false,"metricName":"Total length (>= 5000 bp)"},{"values":[570240],"quality":"More is better","isMain":true,"metricName":"Total length (>= 10000 bp)"},{"values":[554043],"quality":"More is better","isMain":false,"metricName":"Total length (>= 25000 bp)"},{"values":[446481],"quality":"More is better","isMain":true,"metricName":"Total length (>= 50000 bp)"},{"values":[368542],"quality":"More is better","isMain":false,"metricName":"N50"},{"values":[77939],"quality":"More is better","isMain":false,"metricName":"N75"},{"values":[1],"quality":"Less is better","isMain":false,"metricName":"L50"},{"values":[2],"quality":"Less is better","isMain":false,"metricName":"L75"},{"values":["31.71"],"quality":"Equal","isMain":false,"metricName":"GC (%)"}]],["Misassemblies",[]],["Unaligned",[]],["Mismatches",[{"values":[0],"quality":"Less is better","isMain":false,"metricName":"# N's"},{"values":["0.00"],"quality":"Less is better","isMain":true,"metricName":"# N's per 100 kbp"}]],["Genome statistics",[]],["Predicted genes",[]],["Reference statistics",[]]],"referenceName":"not_aligned","date":"21 November 2017, Tuesday, 13:00:23","order":[0],"assembliesNames":["m_genitalium"]}
{{ qualities }}
{{ mainMetrics }}
{"lists_of_lengths":[[368542,77939,38235,38171,31156,16197,6760,3160,819,717,561]],"filenames":["m_genitalium"]}
{{ assembliesLengths }}
{{ referenceLength }}
{"tickX":1}
{"coord_y":[[368542,368542,368542,77939,77939,38235,38235,38171,38171,31156,31156,16197,16197,6760,6760,3160,3160,819,819,717,717,561,561]],"coord_x":[[0.0,1e-10,63.29541765921234,63.29541765931234,76.6810875609911,76.6810875610911,83.24777546684712,83.24777546694712,89.80347166285678,89.80347166295678,95.15437341242784,95.15437341252785,97.93613473088344,97.93613473098344,99.09713408340303,99.09713408350304,99.63984975706603,99.63984975716603,99.78050929400591,99.78050929410591,99.90365079337818,99.90365079347818,100.0]],"filenames":["m_genitalium"]}
{{ coordNGx }}
{{ coordNAx }}
{{ coordNGAx }}
{{ genesInContigs }}
{{ operonsInContigs }}
[{{ num_contigs }}, {{ Largest_contig }}, {{ Total_length }}, {{ num_misassemblies }}, {{ Misassembled_contigs_length }}, {{ num_mismatches_per_100_kbp }}, {{ num_indels_per_100_kbp }}, {{ num_N's_per_100_kbp }}, {{ Genome_fraction }}, {{ Duplication_ratio }}, {{ NGA50 }}]
{{ allMisassemblies }}
{{ krona }}
{"list_of_GC_distributions":[[[0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33.0,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0,75.0,76.0,77.0,78.0,79.0,80.0,81.0,82.0,83.0,84.0,85.0,86.0,87.0,88.0,89.0,90.0,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0],[0,0,0,0,0,0,0,0,0,0,0,0,0,2,6,3,8,14,23,21,44,70,91,140,152,229,241,312,379,375,437,407,423,401,357,289,268,229,178,156,137,101,83,46,54,35,26,26,18,13,10,7,6,2,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]],"lists_of_gc_info":null,"filenames":["m_genitalium"]}
{"links_names":["Icarus: contig browser"],"links":["icarus_viewers/contig_size_viewer.html"]}
{ "# contigs" : "is the total number of contigs in the assembly.", "Largest contig" : "is the length of the longest contig in the assembly.", "Total length" : "is the total number of bases in the assembly.", "Reference length" : "is the total number of bases in the reference.", "# contigs (>= 0 bp)" : "is the total number of contigs in the assembly that have size greater or equal than 0 bp.", "Total length (>= 0 bp)" : "is the total number of bases in the contigs having size greater or equal than 0 bp.", "N50" : "is the contig length such that using longer or equal length contigs produces half (50%) of the bases of the assembly. Usually there is no value that produces exactly 50%, so the technical definition is the maximum length x such that using contigs of length at least x accounts for at least 50% of the total assembly length.", "NG50" : "is the contig length such that using longer or equal length contigs produces half (50%) of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "N75" : "is the contig length such that using longer or equal length contigs produces 75% of the bases of the assembly. Usually there is no value that produces exactly 75%, so the technical definition is the maximum length x such that using contigs of length at least x accounts for at least 75% of the total assembly length.", "NG75" : "is the contig length such that using longer or equal length contigs produces 75% of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "L50" : "is the minimum number of contigs that produce half (50%) of the bases of the assembly. In other words, it's the number of contigs of length at least N50.", "LG50" : "is the minimum number of contigs that produce half (50%) of the bases of the reference genome. In other words, it's the number of contigs of length at least NG50. This metric is computed only if a reference genome is provided.", "L75" : "is the minimum number of contigs that produce 75% of the bases of the assembly. In other words, it's the number of contigs of length at least N75.", "LG75" : "is the minimum number of contigs that produce 75% of the bases of the reference genome. In other words, it's the number of contigs of length at least NG75. This metric is computed only if a reference genome is provided.", "NA50" : "is N50 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "NGA50" : "is NG50 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "NA75" : "is N75 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "NGA75" : "is NG75 where the lengths of aligned blocks are counted instead of contig lengths. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces. This metric is computed only if a reference genome is provided.", "LA50" : "is L50 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "LGA50" : "is LG50 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "LA75" : "is L75 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "LGA75" : "is LG75 where aligned blocks are counted instead of contigs. I.e., if a contig has a misassembly with respect to the reference, the contig is broken into smaller pieces.", "Average %IDY" : "is the average of alignment identity percent (Nucmer measure of alignment accuracy) among all contigs.", "# misassemblies" : "is the number of positions in the assembled contigs where the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference (relocation) or they overlap on more than 1 kbp (relocation) or flanking sequences align on different strands (inversion) or different chromosomes (translocation).", "# misassembled contigs" : "is the number of contigs that contain misassembly events.", "Misassembled contigs length" : "is the number of total bases contained in all contigs that have one or more misassemblies.", "# relocations" : "is the number of relocation events among all misassembly events. Relocation is a misassembly where the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference, or they overlap by more than 1 kbp and both flanking sequences align on the same chromosome.", "# translocations" : "is the number of translocation events among all misassembly events. Translocation is a misassembly where the flanking sequences align on different chromosomes.", "# interspecies translocations" : "is the number of interspecies translocation events among all misassembly events. Interspecies translocation is a misassembly where the flanking sequences align on different references (--meta only).", "# inversions" : "is the number of inversion events among all misassembly events. Inversion is a misassembly where it is not a relocation and the flanking sequences align on opposite strands of the same chromosome.", "# local misassemblies" : "is the number of local misassemblies. We define a local misassembly breakpoint as a breakpoint that satisfies these conditions:
  1. Two or more distinct alignments cover the breakpoint.
  2. The gap between left and right flanking sequences is less than 1 kbp.
  3. The left and right flanking sequences both are on the same strand of the same chromosome of the reference genome.
", "# scaffold gap size misassemblies" : "is the number of scaffold gap size misassemblies. We define scaffold gap size misassembly as a breakpoint where the flanking sequences combined in scaffold on the wrong distance. These misassemblies are not included in the total number of misassemblies. ", "# possibly misassembled contigs": "is the number of contigs that contain large unaligned fragment and thus could possibly contain interspecies translocation with unknown reference.", "# structural variations" : "is the number of misassemblies matched with structural variations.", "# fully unaligned contigs" : "is the number of contigs that have no alignment to the reference sequence.", "Fully unaligned length" : "is the total number of bases contained in all fully unaligned contigs.", "# partially unaligned contigs" : "is the number of contigs that are not fully unaligned ones but have fragments with no alignment to the reference.", "# with misassembly" : "is the number of partially unaligned contigs that contain misassembly events in their aligned fragment. Note that such misassemblies are not counted in # misassemblies and other misassemblies statistics.", "# both parts are significant" : "is the number of partially unaligned contigs that contain both aligned and unaligned fragments of length ≥ min-contig threshold.", "Partially unaligned length" : "is the total number of unaligned bases in all partially unaligned contigs.", "# ambiguous contigs" : "is the number of contigs that have reference alignments of equal quality in multiple locations on the reference.", "Ambiguous contigs length" : "is the total number of bases contained in all ambiguous contigs.", "Genome fraction (%)" : "is the total number of aligned bases in the reference, divided by the genome size. A base in the reference genome is counted as aligned if there is at least one contig with at least one alignment to this base. Contigs from repeat regions may map to multiple places, and thus may be counted multiple times in this quantity.", "GC (%)" : "is the total number of G and C nucleotides in the assembly, divided by the total length of the assembly.", "Reference GC (%)" : "is the total number of G and C nucleotides in the reference, divided by the total length of the reference.", "# mismatches per 100 kbp" : "is the average number of mismatches per 100000 aligned bases.", "# mismatches" : "is the number of mismatches in all aligned bases.", "# indels per 100 kbp" : "is the average number of indels per 100000 aligned bases.", "# indels" : "is the number of indels in all aligned bases", "# short indels" : "is the number of indels of length less or equal to 5 bp", "# long indels" : "is the number of indels of length greater than 5 bp", "Indels length" : "is the number of total bases contained in all indels", "# genes" : "is the number of genes in the assembly (complete and partial), based on a user-provided annotated list of gene positions in the reference genome. A gene counts as 'partially covered' if the assembly contains at least 100 bp of this gene but not the whole gene.", "# operons" : "is the number of operons in the assembly (complete and partial), based on a user-provided annotated list of operon positions in the reference genome. An operon counts as 'partially covered' if the assembly contains at least 100 bp of this operon but not the whole operon.", "# predicted genes (unique)" : "is the number of unique genes in the assembly found by a gene prediction tool.", "# predicted genes (>= 0 bp)" : "is the number of found genes having length greater or equal than 0 bp.", "Cumulative length" : "plot shows the growth of assembly contig lengths. On the x-axis, contigs are ordered from largest (contig #1) to smallest. The y-axis gives the size of the x largest contigs in the assembly.", "Nx" : "plot shows the Nx metric value as x varies from 0 to 100. Nx is the minimum contig length y such that using contigs of length at least y accounts for at least x% of the total assembly length.", "NGx" : "plot shows the NGx metric value as x varies from 0 to 100. NGx is the minimum contig length y such that using contigs of length at least y accounts for at least x% of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "NAx" : "plot shows the NAx metric value as x varies from 0 to 100. NAx is computed similarly to Nx, but based on lengths of aligned blocks instead of contig lengths. Contigs are broken into aligned blocks at misassembly breakpoints. NAx is the minimum block length y such that using blocks of length at least y accounts for at least x% of the bases of the assembly. This metric is computed only if a reference genome is provided.", "NGAx" : "plot shows the NGAx metric value as x varies from 0 to 100.NGAx is computed similarly to NGx, but based on lengths of aligned blocks instead of contig lengths. Contigs are broken at misassembly breakpoints. NGAx is the minimum block length y such that using blocks of length at least y accounts for at least x% of the bases of the reference genome. This metric is computed only if a reference genome is provided.", "GC content" : "plot shows the distribution of GC percentage among the contigs, i.e., the total number of bases in contigs with such GC content. Typically, the distribution is approximately Gaussian. However, for some genomes it is not Gaussian. For assembly projects with contaminants, the GC distribution of the contaminants often differs from the reference genome and may give a superposition of multiple curves with different peaks.", "Duplication ratio" : "is the total number of aligned bases in the assembly (i.e. Total length - Fully unaligned length - Partially unaligned length), divided by the total number of aligned bases in the reference (see the Genome fraction (%) metric). If the assembly contains many contigs that cover the same regions of the reference, its Duplication ratio may be much larger than 1. This may occur due to overestimating repeat multiplicities and due to small overlaps between contigs, among other reasons.", "Largest alignment" : "is the length of the largest continuous alignment in the assembly. This metric is always equal to the Largest contig metric but it can be smaller if the largest contig of the assembly contains a misassembly event.", "# N's" : "is the total number of uncalled bases (N's) in the assembly.", "# N's per 100 kbp" : "is the average number of uncalled bases (N's) per 100000 assembly bases." }