Anna Syme

Click name ↑ to return to homepage

Basic python script structure

Create file

#!/usr/bin/env python
print("this is script.py")

Make it executable

Main function

#!/usr/bin/env python
print("this is script.py")

def main():
    print("this is the main function")

if __name__ == '__main__':
    main()

Read in the R1.fastq read file

#!/usr/bin/env python
import argparse
import sys
print("this is script.py")

def parse_args():
    parser=argparse.ArgumentParser(description="a script to do stuff")
    parser.add_argument("R1_file")
    args=parser.parse_args()
    return args

def main():
    print("this is the main function")

if __name__ == '__main__':
    main()

Read the inputs in the main function

#!/usr/bin/env python
import argparse
import sys
print("this is script.py")

def parse_args():
    parser=argparse.ArgumentParser(description="a script to do stuff")
    parser.add_argument("R1_file")
    args=parser.parse_args()
    return args

def main():
    print("this is the main function")
    inputs=parse_args()
    print(inputs.R1_file)

if __name__ == '__main__':
    main()

Make a separate function to process files

#!/usr/bin/env python
import argparse
import sys
print("this is script.py")

def parse_args():
    parser=argparse.ArgumentParser(description="a script to do stuff")
    parser.add_argument("R1_file")
    args=parser.parse_args()
    return args

def process_files(things):
    print("this is the process files function")
    print(things.R1_file)

def main():
    print("this is the main function")
    inputs=parse_args()
    print(inputs.R1_file)
    process_files(inputs)

if __name__ == '__main__':
    main()

Run a tool to get stats your input R1 file

#!/usr/bin/env python
import argparse
import sys
import subprocess
print("this is script.py")

def parse_args():
    parser=argparse.ArgumentParser(description="a script to do stuff")
    parser.add_argument("R1_file")
    args=parser.parse_args()
    return args

def process_files(things):
    print("this is the process files function")
    print(things.R1_file)
    cmd=["seqkit", "stats", things.R1_file]
    subprocess.run(cmd)

def main():
    print("this is the main function")
    inputs=parse_args()
    print(inputs.R1_file)
    process_files(inputs)

if __name__ == '__main__':
    main()

Put this stats step into its own function

#!/usr/bin/env python
import argparse
import sys
import subprocess
print("this is script.py")

def parse_args():
    parser=argparse.ArgumentParser(description="a script to do stuff")
    parser.add_argument("R1_file")
    args=parser.parse_args()
    return args

def stats(file):
    cmd=["seqkit", "stats", file]
    subprocess.run(cmd)

def process_files(things):
    print("this is the process files function")
    print(things.R1_file)
    # cmd=["seqkit", "stats", things.R1_file]
    # subprocess.run(cmd)
    stats(things.R1_file)

def main():
    print("this is the main function")
    inputs=parse_args()
    print(inputs.R1_file)
    process_files(inputs)

if __name__ == '__main__':
    main()

Add in an optional input

#!/usr/bin/env python
import argparse
import sys
import subprocess
print("this is script.py")

def parse_args():
    parser=argparse.ArgumentParser(description="a script to do stuff")
    parser.add_argument("R1_file")
    parser.add_argument("--threads", type=int, default=16)
    args=parser.parse_args()
    return args

def stats(file):
    cmd=["seqkit", "stats", file]
    subprocess.run(cmd)

def process_files(things):
    print("this is the process files function")
    print(things.R1_file)
    # cmd=["seqkit", "stats", things.R1_file]
    # subprocess.run(cmd)
    stats(things.R1_file)

def main():
    print("this is the main function")
    inputs=parse_args()
    print(inputs.R1_file)
    print(inputs.threads)
    process_files(inputs)

if __name__ == '__main__':
    main() 
#!/usr/bin/env python
import argparse
import sys
import subprocess
print("this is script.py")

def parse_args():
    parser=argparse.ArgumentParser(description="a script to do stuff")
    parser.add_argument("R1_file")
    parser.add_argument("--threads", type=int, default=16)
    args=parser.parse_args()

    print("the inputs are:")
    for arg in vars(args):
        print("{} is {}".format(arg, getattr(args, arg)))

    return args

def stats(file):
    cmd=["seqkit", "stats", file]
    subprocess.run(cmd)

def process_files(things):
    print("this is the process files function")
    print(things.R1_file)
    # cmd=["seqkit", "stats", things.R1_file]
    # subprocess.run(cmd)
    stats(things.R1_file)

def main():
    print("this is the main function")
    inputs=parse_args()
    print(inputs.R1_file)
    print(inputs.threads)
    process_files(inputs)

if __name__ == '__main__':
    main()