-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathplot_tagDensity.py
More file actions
executable file
·87 lines (67 loc) · 3.43 KB
/
Copy pathplot_tagDensity.py
File metadata and controls
executable file
·87 lines (67 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# R-script to process and plot tag usage data world-wide
# usage: ./plot_tagDensity.py [-i input.csv] [-o output.png|.jpg|.pdf] [--tag tag_name] [-w <num>] [-b <lat,lon,lat,lon>] [-c <yes|no>]
# -i|--input - input file in csv format with header and two columns containing lat, lon coordinates [default or -: stdin]
# -o|--output - output picture file formats: png, jpg, pdf (default: .png with autogenerated name)
# -t|--tag - name of tag to displey in plot title
# -w|--binwidth - size of square for object counting in degrees. 1 means 1˚x1˚ square (defalut: 1)
# -b|--bbox - plot only area within bbox. Four coordinates seprated by , [lat,lon,lat,lon] (defalut: -55,-180,90,180)
# -c|--countries - plot countries' borders [yes|no] (default: no)
# binning idea from: https://stackoverflow.com/questions/11507575/basemap-and-density-plots
import datetime
import sys
import csv
import argparse
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from matplotlib import rc
from datetime import date
parser = argparse.ArgumentParser(description='Script for creating files contating names of reads that bear given number of mutations')
parser.add_argument('-i', '--input', type=str,
help='Input CSV file lat, lon coordinates')
parser.add_argument('-o', '--output', type=str,
help='Outputfile for plot (.png, ,jpg, .pdf)',
default="plot_" + str(datetime.datetime.now()) + ".png")
parser.add_argument('-t', '--tag', type=str,
help='tag name',
default="tag density")
parser.add_argument('-w', '--binwidth', type=float,
help='size of square for object counting in degrees',
default=1)
parser.add_argument('-b', '--bbox', type=str,
help='four coordinates separeatd by comma',
default="-55,-180,90,180")
parser.add_argument('-c', '--countries', type=str, choices=['yes', 'no'],
help='whether to plot borders (default: no)',
default='no')
args = parser.parse_args()
bbox = args.bbox.split(",")
bbox = [float(x) for x in bbox]
if args.input == '-':
overpass = np.genfromtxt(sys.stdin, delimiter=',', skip_header=1)
else:
overpass = np.genfromtxt(args.input, delimiter=',', skip_header=1)
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
if args.countries == 'no':
world = world.dissolve()
# Calculate how many bins do we need to split the world
nx = int(360 / args.binwidth)
ny = int(180 / args.binwidth)
# Create bin edges
lon_bins = np.linspace(-180, 180, nx+1)
lat_bins = np.linspace(-90, 90, ny+1)
# Calculate frequency in each square bin
density, _, _ = np.histogram2d(overpass[:, 0], overpass[:, 1], [lat_bins, lon_bins])
# Turn the lon/lat bins into 2 dimensional arrays
lon_bins_2d, lat_bins_2d = np.meshgrid(lon_bins, lat_bins)
fig, ax = plt.subplots()
# ax.set_aspect('equal')
ax.set_facecolor('black')
ax.axis([bbox[1], bbox[3], bbox[0], bbox[2]])
ax.set_title(args.tag, fontsize=15)
plt.pcolormesh(lon_bins_2d, lat_bins_2d, np.log10(density), cmap='plasma', zorder=1)
world.boundary.plot(ax=ax, edgecolor='grey', linewidth=0.25, zorder=2)
plt.colorbar(label=r'$log_{10}$', fraction=0.04, aspect=6)
fig.text(.77, .22, str(date.today()), ha='center')
plt.savefig(args.output, dpi = 300, bbox_inches='tight')