Data conversion¶
In [1]:
Copied!
import sys
sys.path.insert(0,"/home/yunye/work/gwaslab/src")
import gwaslab as gl
import sys
sys.path.insert(0,"/home/yunye/work/gwaslab/src")
import gwaslab as gl
Loading sample data¶
In [2]:
Copied!
mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
Out[2]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | STATUS | |
---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 9960099 |
BETA -> OR¶
In [3]:
Copied!
mysumstats.fill_data(to_fill=["OR"])
mysumstats.fill_data(to_fill=["OR"])
Sat Feb 3 20:36:12 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Column : SNPID CHR POS EA NEA EAF BETA SE STATUS Sat Feb 3 20:36:12 2024 -DType : string Int64 Int64 category category float32 float64 float64 category Sat Feb 3 20:36:12 2024 -Verified: T T T T T T T T T Sat Feb 3 20:36:12 2024 -Overwrite mode: False Sat Feb 3 20:36:12 2024 -Skipping columns: [] Sat Feb 3 20:36:12 2024 -Filling columns: ['OR'] Sat Feb 3 20:36:12 2024 - Filling Columns iteratively... Sat Feb 3 20:36:12 2024 - Filling OR using BETA column... Sat Feb 3 20:36:12 2024 - Filling OR_95L/OR_95U using BETA/SE columns... Sat Feb 3 20:36:12 2024 Finished filling data using existing columns. Sat Feb 3 20:36:12 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Current Dataframe shape : 5 x 12 ; Memory usage: 19.94 MB Sat Feb 3 20:36:12 2024 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,OR,OR_95L,OR_95U,STATUS Sat Feb 3 20:36:12 2024 Finished reordering the columns.
In [4]:
Copied!
mysumstats.data
mysumstats.data
Out[4]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
OR -> BETA¶
In [5]:
Copied!
mysumstats.data.drop(labels=["BETA","SE"],axis=1,inplace=True)
mysumstats.data.drop(labels=["BETA","SE"],axis=1,inplace=True)
In [6]:
Copied!
mysumstats.data
mysumstats.data
Out[6]:
SNPID | CHR | POS | EA | NEA | EAF | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
In [7]:
Copied!
mysumstats.fill_data(to_fill=["BETA","SE"])
mysumstats.fill_data(to_fill=["BETA","SE"])
Sat Feb 3 20:36:12 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Column : SNPID CHR POS EA NEA EAF OR OR_95L OR_95U STATUS Sat Feb 3 20:36:12 2024 -DType : string Int64 Int64 category category float32 float64 float64 float64 category Sat Feb 3 20:36:12 2024 -Verified: T T T T T T T T T T Sat Feb 3 20:36:12 2024 -Overwrite mode: False Sat Feb 3 20:36:12 2024 -Skipping columns: [] Sat Feb 3 20:36:12 2024 -Filling columns: ['BETA', 'SE'] Sat Feb 3 20:36:12 2024 - Filling Columns iteratively... Sat Feb 3 20:36:12 2024 - Filling BETA value using OR column... Sat Feb 3 20:36:12 2024 - Filling SE value using OR/OR_95U column... Sat Feb 3 20:36:12 2024 Finished filling data using existing columns. Sat Feb 3 20:36:12 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Current Dataframe shape : 5 x 12 ; Memory usage: 19.94 MB Sat Feb 3 20:36:12 2024 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,OR,OR_95L,OR_95U,STATUS Sat Feb 3 20:36:12 2024 Finished reordering the columns.
In [8]:
Copied!
mysumstats.data
mysumstats.data
Out[8]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
BETA/SE -> Z¶
In [9]:
Copied!
mysumstats.fill_data(to_fill=["Z"])
mysumstats.fill_data(to_fill=["Z"])
Sat Feb 3 20:36:12 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Column : SNPID CHR POS EA NEA EAF BETA SE OR OR_95L OR_95U STATUS Sat Feb 3 20:36:12 2024 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 category Sat Feb 3 20:36:12 2024 -Verified: T T T T T T T T T T T T Sat Feb 3 20:36:12 2024 -Overwrite mode: False Sat Feb 3 20:36:12 2024 -Skipping columns: [] Sat Feb 3 20:36:12 2024 -Filling columns: ['Z'] Sat Feb 3 20:36:12 2024 - Filling Columns iteratively... Sat Feb 3 20:36:12 2024 - Filling Z using BETA/SE column... Sat Feb 3 20:36:12 2024 Finished filling data using existing columns. Sat Feb 3 20:36:12 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Current Dataframe shape : 5 x 13 ; Memory usage: 19.94 MB Sat Feb 3 20:36:12 2024 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,Z,OR,OR_95L,OR_95U,STATUS Sat Feb 3 20:36:12 2024 Finished reordering the columns.
In [10]:
Copied!
mysumstats.data
mysumstats.data
Out[10]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | Z | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | -0.528694 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 0.528694 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 0.398050 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.070352 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.102564 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
P -> MLOG10P¶
In [11]:
Copied!
mysumstats.fill_data(to_fill=["MLOG10P"])
mysumstats.fill_data(to_fill=["MLOG10P"])
Sat Feb 3 20:36:12 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Column : SNPID CHR POS EA NEA EAF BETA SE Z OR OR_95L OR_95U STATUS Sat Feb 3 20:36:12 2024 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 float64 category Sat Feb 3 20:36:12 2024 -Verified: T T T T T T T T T T T T T Sat Feb 3 20:36:12 2024 -Overwrite mode: False Sat Feb 3 20:36:12 2024 -Skipping columns: [] Sat Feb 3 20:36:12 2024 -Filling columns: ['MLOG10P'] Sat Feb 3 20:36:12 2024 - Filling Columns iteratively... Sat Feb 3 20:36:12 2024 - Filling P value using Z column... Sat Feb 3 20:36:12 2024 - Filling MLOG10P using P column... Sat Feb 3 20:36:12 2024 Finished filling data using existing columns. Sat Feb 3 20:36:12 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Current Dataframe shape : 5 x 15 ; Memory usage: 19.94 MB Sat Feb 3 20:36:12 2024 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,Z,P,MLOG10P,OR,OR_95L,OR_95U,STATUS Sat Feb 3 20:36:12 2024 Finished reordering the columns.
MLOG10P -> P¶
In [12]:
Copied!
mysumstats.fill_data(to_fill=["P"])
mysumstats.fill_data(to_fill=["P"])
Sat Feb 3 20:36:12 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Column : SNPID CHR POS EA NEA EAF BETA SE Z P MLOG10P OR OR_95L OR_95U STATUS Sat Feb 3 20:36:12 2024 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 float64 float64 float64 category Sat Feb 3 20:36:12 2024 -Verified: T T T T T T T T T T T T T T T Sat Feb 3 20:36:12 2024 -Overwrite mode: False Sat Feb 3 20:36:12 2024 -Skipping columns: ['P'] Sat Feb 3 20:36:12 2024 -No available columns to fill. Skipping. Sat Feb 3 20:36:12 2024 Finished filling data using existing columns. Sat Feb 3 20:36:12 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Current Dataframe shape : 5 x 15 ; Memory usage: 19.94 MB Sat Feb 3 20:36:12 2024 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,BETA,SE,Z,P,MLOG10P,OR,OR_95L,OR_95U,STATUS Sat Feb 3 20:36:12 2024 Finished reordering the columns.
In [13]:
Copied!
mysumstats.data
mysumstats.data
Out[13]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | Z | P | MLOG10P | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | -0.528694 | 0.597017 | 0.224013 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 0.528694 | 0.597017 | 0.224013 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 0.398050 | 0.690593 | 0.160778 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 1.070352 | 0.284461 | 0.545977 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 1.102564 | 0.270217 | 0.568288 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
EAF -> MAF¶
In [14]:
Copied!
mysumstats.fill_data(to_fill=["MAF"])
mysumstats.fill_data(to_fill=["MAF"])
Sat Feb 3 20:36:12 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Column : SNPID CHR POS EA NEA EAF BETA SE Z P MLOG10P OR OR_95L OR_95U STATUS Sat Feb 3 20:36:12 2024 -DType : string Int64 Int64 category category float32 float64 float64 float64 float64 float64 float64 float64 float64 category Sat Feb 3 20:36:12 2024 -Verified: T T T T T T T T T T T T T T T Sat Feb 3 20:36:12 2024 -Overwrite mode: False Sat Feb 3 20:36:12 2024 -Skipping columns: [] Sat Feb 3 20:36:12 2024 -Filling columns: ['MAF'] Sat Feb 3 20:36:12 2024 - Filling Columns iteratively... Sat Feb 3 20:36:12 2024 - Filling MAF using EAF column... Sat Feb 3 20:36:12 2024 Finished filling data using existing columns. Sat Feb 3 20:36:12 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:12 2024 -Current Dataframe shape : 5 x 16 ; Memory usage: 19.94 MB Sat Feb 3 20:36:12 2024 -Reordering columns to : SNPID,CHR,POS,EA,NEA,EAF,MAF,BETA,SE,Z,P,MLOG10P,OR,OR_95L,OR_95U,STATUS Sat Feb 3 20:36:12 2024 Finished reordering the columns.
In [15]:
Copied!
mysumstats.data
mysumstats.data
Out[15]:
SNPID | CHR | POS | EA | NEA | EAF | MAF | BETA | SE | Z | P | MLOG10P | OR | OR_95L | OR_95U | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | 0.0040 | -0.0737 | 0.1394 | -0.528694 | 0.597017 | 0.224013 | 0.928950 | 0.706863 | 1.220815 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0040 | 0.0737 | 0.1394 | 0.528694 | 0.597017 | 0.224013 | 1.076484 | 0.819125 | 1.414702 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0051 | 0.0490 | 0.1231 | 0.398050 | 0.690593 | 0.160778 | 1.050220 | 0.825083 | 1.336790 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.1626 | 0.0213 | 0.0199 | 1.070352 | 0.284461 | 0.545977 | 1.021528 | 0.982452 | 1.062159 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.1407 | 0.0172 | 0.0156 | 1.102564 | 0.270217 | 0.568288 | 1.017349 | 0.986714 | 1.048935 | 9960099 |
Simulation of extreme P values¶
In [16]:
Copied!
mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
beta="BETA",
se="SE",nrows=5, verbose=False)
# simulate some extreme P values by shrinking the SE
mysumstats.data["SE"] = mysumstats.data["SE"]/100
mysumstats.data
mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
beta="BETA",
se="SE",nrows=5, verbose=False)
# simulate some extreme P values by shrinking the SE
mysumstats.data["SE"] = mysumstats.data["SE"]/100
mysumstats.data
Out[16]:
SNPID | CHR | POS | BETA | SE | STATUS | |
---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | -0.0737 | 0.001394 | 9999999 |
1 | 1:725933_A_G | 1 | 725933 | 0.0737 | 0.001394 | 9999999 |
2 | 1:737801_T_C | 1 | 737801 | 0.0490 | 0.001231 | 9999999 |
3 | 1:749963_T_TAA | 1 | 749963 | 0.0213 | 0.000199 | 9999999 |
4 | 1:751343_T_A | 1 | 751343 | 0.0172 | 0.000156 | 9999999 |
Limited precision of float64¶
For P < 1e-308, they become 0 due to limnited precision of float64
In [17]:
Copied!
mysumstats.fill_data(to_fill=["Z","P"])
mysumstats.fill_data(to_fill=["Z","P"])
Sat Feb 3 20:36:13 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:13 2024 -Column : SNPID CHR POS BETA SE STATUS Sat Feb 3 20:36:13 2024 -DType : object string int64 float64 float64 category Sat Feb 3 20:36:13 2024 -Verified: T F T T T T Sat Feb 3 20:36:13 2024 #WARNING! Columns with possibly incompatable dtypes: CHR Sat Feb 3 20:36:13 2024 -Overwrite mode: False Sat Feb 3 20:36:13 2024 -Skipping columns: [] Sat Feb 3 20:36:13 2024 -Filling columns: ['Z', 'P'] Sat Feb 3 20:36:13 2024 - Filling Columns iteratively... Sat Feb 3 20:36:13 2024 - Filling Z using BETA/SE column... Sat Feb 3 20:36:13 2024 - Filling P value using Z column... Sat Feb 3 20:36:13 2024 Finished filling data using existing columns. Sat Feb 3 20:36:13 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:13 2024 -Current Dataframe shape : 5 x 8 ; Memory usage: 19.94 MB Sat Feb 3 20:36:13 2024 -Reordering columns to : SNPID,CHR,POS,BETA,SE,Z,P,STATUS Sat Feb 3 20:36:13 2024 Finished reordering the columns.
In [18]:
Copied!
mysumstats.data
mysumstats.data
Out[18]:
SNPID | CHR | POS | BETA | SE | Z | P | STATUS | |
---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | -0.0737 | 0.001394 | -52.869440 | 0.0 | 9999999 |
1 | 1:725933_A_G | 1 | 725933 | 0.0737 | 0.001394 | 52.869440 | 0.0 | 9999999 |
2 | 1:737801_T_C | 1 | 737801 | 0.0490 | 0.001231 | 39.805037 | 0.0 | 9999999 |
3 | 1:749963_T_TAA | 1 | 749963 | 0.0213 | 0.000199 | 107.035176 | 0.0 | 9999999 |
4 | 1:751343_T_A | 1 | 751343 | 0.0172 | 0.000156 | 110.256410 | 0.0 | 9999999 |
Recalculate MLOG10P with extreme P value mode¶
In [19]:
Copied!
mysumstats.fill_data(to_fill=["MLOG10P"],extreme=True)
mysumstats.fill_data(to_fill=["MLOG10P"],extreme=True)
Sat Feb 3 20:36:13 2024 Start filling data using existing columns...v3.4.38 Sat Feb 3 20:36:13 2024 -Column : SNPID CHR POS BETA SE Z P STATUS Sat Feb 3 20:36:13 2024 -DType : object string int64 float64 float64 float64 float64 category Sat Feb 3 20:36:13 2024 -Verified: T F T T T T T T Sat Feb 3 20:36:13 2024 #WARNING! Columns with possibly incompatable dtypes: CHR Sat Feb 3 20:36:13 2024 -Overwrite mode: False Sat Feb 3 20:36:13 2024 -Skipping columns: [] Sat Feb 3 20:36:13 2024 -Filling columns: ['MLOG10P'] Sat Feb 3 20:36:13 2024 - Filling Columns iteratively... Sat Feb 3 20:36:13 2024 - Filling MLOG10P using Z column... Sat Feb 3 20:36:13 2024 Finished filling data using existing columns. Sat Feb 3 20:36:13 2024 Start to reorder the columns...v3.4.38 Sat Feb 3 20:36:13 2024 -Current Dataframe shape : 5 x 11 ; Memory usage: 19.94 MB Sat Feb 3 20:36:13 2024 -Reordering columns to : SNPID,CHR,POS,BETA,SE,Z,P,MLOG10P,STATUS,P_MANTISSA,P_EXPONENT Sat Feb 3 20:36:13 2024 Finished reordering the columns.
In [20]:
Copied!
mysumstats.data
mysumstats.data
Out[20]:
SNPID | CHR | POS | BETA | SE | Z | P | MLOG10P | STATUS | P_MANTISSA | P_EXPONENT | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | -0.0737 | 0.001394 | -52.869440 | 0.0 | 608.786553 | 9999999 | 1.634734 | -609.0 |
1 | 1:725933_A_G | 1 | 725933 | 0.0737 | 0.001394 | 52.869440 | 0.0 | 608.786553 | 9999999 | 1.634734 | -609.0 |
2 | 1:737801_T_C | 1 | 737801 | 0.0490 | 0.001231 | 39.805037 | 0.0 | 345.755249 | 9999999 | 1.756915 | -346.0 |
3 | 1:749963_T_TAA | 1 | 749963 | 0.0213 | 0.000199 | 107.035176 | 0.0 | 2489.881261 | 9999999 | 1.314436 | -2490.0 |
4 | 1:751343_T_A | 1 | 751343 | 0.0172 | 0.000156 | 110.256410 | 0.0 | 2641.885723 | 9999999 | 1.300999 | -2642.0 |
Calculate Per-SNP r2¶
In [21]:
Copied!
mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",n=170000,
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
mysumstats = gl.Sumstats("t2d_bbj.txt.gz",
snpid="SNP",
chrom="CHR",
pos="POS",
ea="ALT",
nea="REF",
neaf="Frq",
beta="BETA",n=170000,
se="SE",nrows=5,verbose=False)
mysumstats.basic_check(verbose=False)
mysumstats.data
Out[21]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | N | STATUS | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 170000 | 9960099 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 170000 | 9960099 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 170000 | 9960099 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 170000 | 9960399 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 170000 | 9960099 |
In [22]:
Copied!
mysumstats.get_per_snp_r2()
mysumstats.get_per_snp_r2()
Sat Feb 3 20:36:19 2024 Start to calculate per-SNP heritibility... Sat Feb 3 20:36:19 2024 -Calculating per-SNP rsq by 2 * (BETA**2) * AF * (1-AF) / Var(y)... Sat Feb 3 20:36:19 2024 -Var(y) is provided: 1... Sat Feb 3 20:36:19 2024 -Calculating F-statistic: F = [(N-k-1)/k] * (r2/1-r2)... where k = 1 Sat Feb 3 20:36:19 2024 -For r2, SNPR2 is used. Sat Feb 3 20:36:19 2024 Finished calculating per-SNP heritability!
In [23]:
Copied!
mysumstats.data
mysumstats.data
Out[23]:
SNPID | CHR | POS | EA | NEA | EAF | BETA | SE | N | STATUS | _VAR(BETAX) | SNPR2 | F | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1:725932_G_A | 1 | 725932 | G | A | 0.9960 | -0.0737 | 0.1394 | 170000 | 9960099 | 0.000043 | 0.000043 | 7.357797 |
1 | 1:725933_A_G | 1 | 725933 | G | A | 0.0040 | 0.0737 | 0.1394 | 170000 | 9960099 | 0.000043 | 0.000043 | 7.357782 |
2 | 1:737801_T_C | 1 | 737801 | C | T | 0.0051 | 0.0490 | 0.1231 | 170000 | 9960099 | 0.000024 | 0.000024 | 4.142153 |
3 | 1:749963_T_TAA | 1 | 749963 | TAA | T | 0.8374 | 0.0213 | 0.0199 | 170000 | 9960399 | 0.000124 | 0.000124 | 21.005844 |
4 | 1:751343_T_A | 1 | 751343 | T | A | 0.8593 | 0.0172 | 0.0156 | 170000 | 9960099 | 0.000072 | 0.000072 | 12.161878 |