Data conversion¶
In [1]:
Copied!
import gwaslab as gl
import gwaslab as gl
In [2]:
Copied!
gl.show_version()
gl.show_version()
2024/12/22 22:33:17 GWASLab v3.5.4 https://cloufield.github.io/gwaslab/ 2024/12/22 22:33:17 (C) 2022-2024, Yunye He, Kamatani Lab, MIT License, gwaslab@gmail.com
Loading sample data¶
In [3]:
Copied!
mysumstats = gl.Sumstats("../0_sample_data/t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
mysumstats = gl.Sumstats("../0_sample_data/t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
Out[3]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | STATUS | |
---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 9960099 |
BETA -> OR¶
In [4]:
Copied!
mysumstats.fill_data(to_fill=["OR"])
mysumstats.fill_data(to_fill=["OR"])
2024/12/22 22:33:32 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:32 -Column : SNPID CHR POS EA NEA EAF BETA SE STATUS 2024/12/22 22:33:32 -DType : string Int64 Int64 category category float32 float64 float64 category 2024/12/22 22:33:32 -Verified: T T T T T T T T T 2024/12/22 22:33:32 -Overwrite mode: False 2024/12/22 22:33:32 -Skipping columns: [] 2024/12/22 22:33:32 -Filling columns: ['OR'] 2024/12/22 22:33:32 - Filling Columns iteratively... 2024/12/22 22:33:32 - Filling OR using BETA column... 2024/12/22 22:33:32 - Filling OR_95L/OR_95U using BETA/SE columns... 2024/12/22 22:33:32 Finished filling data using existing columns. 2024/12/22 22:33:32 Start to reorder the columns...v3.5.4 2024/12/22 22:33:32 -Current Dataframe shape : 5 x 12 ; Memory usage: 21.47 MB 2024/12/22 22:33:32 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,OR,OR_95L,OR_95U,STATUS 2024/12/22 22:33:32 Finished reordering the columns.
In [5]:
Copied!
mysumstats.data
mysumstats.data
Out[5]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
OR -> BETA¶
In [6]:
Copied!
mysumstats.data.drop(labels=["BETA","SE"],axis=1,inplace=True)
mysumstats.data.drop(labels=["BETA","SE"],axis=1,inplace=True)
In [7]:
Copied!
mysumstats.data
mysumstats.data
Out[7]:
SNPID | CHR | POS | EA | NEA | EAF | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
In [8]:
Copied!
mysumstats.fill_data(to_fill=["BETA","SE"])
mysumstats.fill_data(to_fill=["BETA","SE"])
2024/12/22 22:33:32 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:32 -Column : SNPID CHR POS EA NEA EAF OR OR_95L OR_95U STATUS 2024/12/22 22:33:32 -DType : string Int64 Int64 category category float32 float64 float64 float64 category 2024/12/22 22:33:32 -Verified: T T T T T T T T T T 2024/12/22 22:33:32 -Overwrite mode: False 2024/12/22 22:33:32 -Skipping columns: [] 2024/12/22 22:33:32 -Filling columns: ['BETA', 'SE'] 2024/12/22 22:33:32 - Filling Columns iteratively... 2024/12/22 22:33:32 - Filling BETA value using OR column... 2024/12/22 22:33:32 - Filling SE value using OR/OR_95U column... 2024/12/22 22:33:32 Finished filling data using existing columns. 2024/12/22 22:33:32 Start to reorder the columns...v3.5.4 2024/12/22 22:33:32 -Current Dataframe shape : 5 x 12 ; Memory usage: 21.47 MB 2024/12/22 22:33:32 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,OR,OR_95L,OR_95U,STATUS 2024/12/22 22:33:32 Finished reordering the columns.
In [9]:
Copied!
mysumstats.data
mysumstats.data
Out[9]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
BETA/SE -> Z¶
In [10]:
Copied!
mysumstats.fill_data(to_fill=["Z"])
mysumstats.fill_data(to_fill=["Z"])
2024/12/22 22:33:32 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:32 -Column : SNPID CHR POS EA NEA EAF BETA SE OR OR_95L OR_95U STATUS 2024/12/22 22:33:32 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 category 2024/12/22 22:33:32 -Verified: T T T T T T T T T T T T 2024/12/22 22:33:32 -Overwrite mode: False 2024/12/22 22:33:32 -Skipping columns: [] 2024/12/22 22:33:32 -Filling columns: ['Z'] 2024/12/22 22:33:32 - Filling Columns iteratively... 2024/12/22 22:33:32 - Filling Z using BETA/SE column... 2024/12/22 22:33:32 Finished filling data using existing columns. 2024/12/22 22:33:32 Start to reorder the columns...v3.5.4 2024/12/22 22:33:32 -Current Dataframe shape : 5 x 13 ; Memory usage: 21.47 MB 2024/12/22 22:33:32 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,Z,OR,OR_95L,OR_95U,STATUS 2024/12/22 22:33:32 Finished reordering the columns.
In [11]:
Copied!
mysumstats.data
mysumstats.data
Out[11]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | Z | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | -0.528694 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 0.528694 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 0.398050 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.070352 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.102564 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
P -> MLOG10P¶
In [12]:
Copied!
mysumstats.fill_data(to_fill=["MLOG10P"])
mysumstats.fill_data(to_fill=["MLOG10P"])
2024/12/22 22:33:32 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:32 -Column : SNPID CHR POS EA NEA EAF BETA SE Z OR OR_95L OR_95U STATUS 2024/12/22 22:33:32 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 float64 category 2024/12/22 22:33:32 -Verified: T T T T T T T T T T T T T 2024/12/22 22:33:32 -Overwrite mode: False 2024/12/22 22:33:32 -Skipping columns: [] 2024/12/22 22:33:32 -Filling columns: ['MLOG10P'] 2024/12/22 22:33:32 - Filling Columns iteratively... 2024/12/22 22:33:32 - Filling P value using Z column... 2024/12/22 22:33:32 - Filling MLOG10P using P column... 2024/12/22 22:33:32 Finished filling data using existing columns. 2024/12/22 22:33:32 Start to reorder the columns...v3.5.4 2024/12/22 22:33:32 -Current Dataframe shape : 5 x 15 ; Memory usage: 21.47 MB 2024/12/22 22:33:32 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,Z,P,MLOG10P,OR,OR_95L,OR_95U,STATUS 2024/12/22 22:33:32 Finished reordering the columns.
MLOG10P -> P¶
In [13]:
Copied!
mysumstats.fill_data(to_fill=["P"])
mysumstats.fill_data(to_fill=["P"])
2024/12/22 22:33:33 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:33 -Column : SNPID CHR POS EA NEA EAF BETA SE Z P MLOG10P OR OR_95L OR_95U STATUS 2024/12/22 22:33:33 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 float64 float64 float64 category 2024/12/22 22:33:33 -Verified: T T T T T T T T T T T T T T T 2024/12/22 22:33:33 -Overwrite mode: False 2024/12/22 22:33:33 -Skipping columns: ['P'] 2024/12/22 22:33:33 -No available columns to fill. Skipping. 2024/12/22 22:33:33 Finished filling data using existing columns. 2024/12/22 22:33:33 Start to reorder the columns...v3.5.4 2024/12/22 22:33:33 -Current Dataframe shape : 5 x 15 ; Memory usage: 21.47 MB 2024/12/22 22:33:33 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,Z,P,MLOG10P,OR,OR_95L,OR_95U,STATUS 2024/12/22 22:33:33 Finished reordering the columns.
In [14]:
Copied!
mysumstats.data
mysumstats.data
Out[14]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | Z | P | MLOG10P | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | -0.528694 | 0.597017 | 0.224013 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 0.528694 | 0.597017 | 0.224013 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 0.398050 | 0.690593 | 0.160778 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.070352 | 0.284461 | 0.545977 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.102564 | 0.270217 | 0.568288 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
EAF -> MAF¶
In [15]:
Copied!
mysumstats.fill_data(to_fill=["MAF"])
mysumstats.fill_data(to_fill=["MAF"])
2024/12/22 22:33:33 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:33 -Column : SNPID CHR POS EA NEA EAF BETA SE Z P MLOG10P OR OR_95L OR_95U STATUS 2024/12/22 22:33:33 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 float64 float64 float64 category 2024/12/22 22:33:33 -Verified: T T T T T T T T T T T T T T T 2024/12/22 22:33:33 -Overwrite mode: False 2024/12/22 22:33:33 -Skipping columns: [] 2024/12/22 22:33:33 -Filling columns: ['MAF'] 2024/12/22 22:33:33 - Filling Columns iteratively... 2024/12/22 22:33:33 - Filling MAF using EAF column... 2024/12/22 22:33:33 Finished filling data using existing columns. 2024/12/22 22:33:33 Start to reorder the columns...v3.5.4 2024/12/22 22:33:33 -Current Dataframe shape : 5 x 16 ; Memory usage: 21.47 MB 2024/12/22 22:33:33 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,MAF,BETA,SE,Z,P,MLOG10P,OR,OR_95L,OR_95U,STATUS 2024/12/22 22:33:33 Finished reordering the columns.
In [16]:
Copied!
mysumstats.data
mysumstats.data
Out[16]:
SNPID | CHR | POS | EA | NEA | EAF | MAF | BETA | SE | Z | P | MLOG10P | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | 0.0040 | -0.0737 | 0.1394 | -0.528694 | 0.597017 | 0.224013 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0040 | 0.0737 | 0.1394 | 0.528694 | 0.597017 | 0.224013 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0051 | 0.0490 | 0.1231 | 0.398050 | 0.690593 | 0.160778 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.1626 | 0.0213 | 0.0199 | 1.070352 | 0.284461 | 0.545977 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.1407 | 0.0172 | 0.0156 | 1.102564 | 0.270217 | 0.568288 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
Simulation of extreme P values¶
In [17]:
Copied!
mysumstats = gl.Sumstats("../0_sample_data/t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
beta="BETA",
se="SE",nrows=5, verbose=False)
# simulate some extreme P values by shrinking the SE
mysumstats.data["SE"] = mysumstats.data["SE"]/100
mysumstats.data
mysumstats = gl.Sumstats("../0_sample_data/t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
beta="BETA",
se="SE",nrows=5, verbose=False)
# simulate some extreme P values by shrinking the SE
mysumstats.data["SE"] = mysumstats.data["SE"]/100
mysumstats.data
Out[17]:
SNPID | CHR | POS | BETA | SE | STATUS | |
---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | -0.0737 | 0.001394 | 9999999 |
1 | 1:725933_A_G | 1 | 725933 | 0.0737 | 0.001394 | 9999999 |
2 | 1:737801_T_C | 1 | 737801 | 0.0490 | 0.001231 | 9999999 |
3 | 1:749963_T_TAA | 1 | 749963 | 0.0213 | 0.000199 | 9999999 |
4 | 1:751343_T_A | 1 | 751343 | 0.0172 | 0.000156 | 9999999 |
Limited precision of float64¶
For P < 1e-308, they become 0 due to limnited precision of float64
In [18]:
Copied!
mysumstats.fill_data(to_fill=["Z","P"])
mysumstats.fill_data(to_fill=["Z","P"])
2024/12/22 22:33:34 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:34 -Column : SNPID CHR POS BETA SE STATUS 2024/12/22 22:33:34 -DType : object string int64 float64 float64 category 2024/12/22 22:33:34 -Verified: T F T T T T 2024/12/22 22:33:34 #WARNING! Columns with possibly incompatible dtypes: CHR 2024/12/22 22:33:34 -Overwrite mode: False 2024/12/22 22:33:34 -Skipping columns: [] 2024/12/22 22:33:34 -Filling columns: ['Z', 'P'] 2024/12/22 22:33:34 - Filling Columns iteratively... 2024/12/22 22:33:34 - Filling Z using BETA/SE column... 2024/12/22 22:33:34 - Filling P value using Z column... 2024/12/22 22:33:34 Finished filling data using existing columns. 2024/12/22 22:33:34 Start to reorder the columns...v3.5.4 2024/12/22 22:33:34 -Current Dataframe shape : 5 x 8 ; Memory usage: 21.47 MB 2024/12/22 22:33:34 -Reordering columns to : SNPID,CHR,POS,BETA,SE,Z,P,STATUS 2024/12/22 22:33:34 Finished reordering the columns.
In [19]:
Copied!
mysumstats.data
mysumstats.data
Out[19]:
SNPID | CHR | POS | BETA | SE | Z | P | STATUS | |
---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | -0.0737 | 0.001394 | -52.869440 | 0.0 | 9999999 |
1 | 1:725933_A_G | 1 | 725933 | 0.0737 | 0.001394 | 52.869440 | 0.0 | 9999999 |
2 | 1:737801_T_C | 1 | 737801 | 0.0490 | 0.001231 | 39.805037 | 0.0 | 9999999 |
3 | 1:749963_T_TAA | 1 | 749963 | 0.0213 | 0.000199 | 107.035176 | 0.0 | 9999999 |
4 | 1:751343_T_A | 1 | 751343 | 0.0172 | 0.000156 | 110.256410 | 0.0 | 9999999 |
Recalculate MLOG10P with extreme P value mode¶
In [20]:
Copied!
mysumstats.fill_data(to_fill=["MLOG10P"],extreme=True)
mysumstats.fill_data(to_fill=["MLOG10P"],extreme=True)
2024/12/22 22:33:34 Start filling data using existing columns...v3.5.4 2024/12/22 22:33:34 -Column : SNPID CHR POS BETA SE Z P STATUS 2024/12/22 22:33:34 -DType : object string int64 float64 float64 float64 float64 category 2024/12/22 22:33:34 -Verified: T F T T T T T T 2024/12/22 22:33:34 #WARNING! Columns with possibly incompatible dtypes: CHR 2024/12/22 22:33:34 -Overwrite mode: False 2024/12/22 22:33:34 -Skipping columns: [] 2024/12/22 22:33:34 -Filling columns: ['MLOG10P'] 2024/12/22 22:33:34 - Filling Columns iteratively... 2024/12/22 22:33:34 - Filling MLOG10P using Z column... 2024/12/22 22:33:34 Finished filling data using existing columns. 2024/12/22 22:33:34 Start to reorder the columns...v3.5.4 2024/12/22 22:33:34 -Current Dataframe shape : 5 x 11 ; Memory usage: 21.47 MB 2024/12/22 22:33:34 -Reordering columns to : SNPID,CHR,POS,BETA,SE,Z,P,MLOG10P,STATUS,P_MANTISSA,P_EXPONENT 2024/12/22 22:33:34 Finished reordering the columns.
In [21]:
Copied!
mysumstats.data
mysumstats.data
Out[21]:
SNPID | CHR | POS | BETA | SE | Z | P | MLOG10P | STATUS | P_MANTISSA | P_EXPONENT | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | -0.0737 | 0.001394 | -52.869440 | 0.0 | 608.786553 | 9999999 | 1.634734 | -609.0 |
1 | 1:725933_A_G | 1 | 725933 | 0.0737 | 0.001394 | 52.869440 | 0.0 | 608.786553 | 9999999 | 1.634734 | -609.0 |
2 | 1:737801_T_C | 1 | 737801 | 0.0490 | 0.001231 | 39.805037 | 0.0 | 345.755249 | 9999999 | 1.756915 | -346.0 |
3 | 1:749963_T_TAA | 1 | 749963 | 0.0213 | 0.000199 | 107.035176 | 0.0 | 2489.881261 | 9999999 | 1.314436 | -2490.0 |
4 | 1:751343_T_A | 1 | 751343 | 0.0172 | 0.000156 | 110.256410 | 0.0 | 2641.885723 | 9999999 | 1.300999 | -2642.0 |
Calculate Per-SNP r2¶
In [22]:
Copied!
mysumstats = gl.Sumstats("../0_sample_data/t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",n=170000,
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
mysumstats = gl.Sumstats("../0_sample_data/t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",n=170000,
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
Out[22]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | N | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 170000 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 170000 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 170000 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 170000 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 170000 | 9960099 |
In [23]:
Copied!
mysumstats.get_per_snp_r2()
mysumstats.get_per_snp_r2()
2024/12/22 22:33:49 Start to calculate per-SNP heritibility... 2024/12/22 22:33:49 -Calculating per-SNP rsq by 2 * (BETA**2) * AF * (1-AF) / Var(y)... 2024/12/22 22:33:49 -Var(y) is provided: 1... 2024/12/22 22:33:49 -Calculating F-statistic: F = [(N-k-1)/k] * (r2/1-r2)... where k = 1 2024/12/22 22:33:49 -For r2, SNPR2 is used. 2024/12/22 22:33:49 Finished calculating per-SNP heritability!
In [24]:
Copied!
mysumstats.data
mysumstats.data
Out[24]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | N | STATUS | _VAR(BETAX) | SNPR2 | F | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 170000 | 9960099 | 0.000043 | 0.000043 | 7.357797 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 170000 | 9960099 | 0.000043 | 0.000043 | 7.357782 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 170000 | 9960099 | 0.000024 | 0.000024 | 4.142153 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 170000 | 9960399 | 0.000124 | 0.000124 | 21.005844 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 170000 | 9960099 | 0.000072 | 0.000072 | 12.161878 |