-
Notifications
You must be signed in to change notification settings - Fork 0
/
mtfilter_vcf.py
70 lines (51 loc) · 1.58 KB
/
mtfilter_vcf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
'''
count_mismatches.py - count the number of mismatches per gene
====================================================
:Author: Ian Sudbery
:Release: $Id$
:Date: |today|
:Tags: Python
Purpose
-------
Count the number of high quality mismatches per gene per base sequenced. Will discard reads marked as duplicate
Usage
-----
.. Example use case
Example::
python cgat_script_template.py
Type::
python cgat_script_template.py --help
for command line help.
Command line options
--------------------
'''
import sys
from collections import defaultdict
from CGAT import Experiment as E
from CGAT import IOTools
import CGAT.Bed as Bed
import textwrap
import pysam
import vcf
import re
def main(argv=None):
"""script main.
parses command line options in sys.argv, unless *argv* is given.
"""
if argv is None:
argv = sys.argv
# setup command line parser
parser = E.OptionParser(version="%prog version: $Id$",
usage=globals()["__doc__"])
parser.add_option("-p", "--vcf-path", dest="vcfpath", type="string",
help="path to indexed vcf file for dataset of choice")
parser.add_option("-o", "--out", dest="out",
help="name of output file")
(options, args) = E.Start(parser, argv=argv)
vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
vcffile = vcf.Reader(open(options.vcfpath,"r"))
vcfregion = vcffile.fetch("chrM")
vcf_writer = vcf.Writer(open('%s'%(out),'w'), vcfregion)
for record in vcfregion:
vcf_writer.write_record(record)
vcf_writer.flush()