r – y-axis breaks with ggplot2 for a manhattan plot

I have a manhattan plot of genetic information:

enter image description here

It was generated using the qqman package cran.r-project.org/web/packages/qqman/vignettes/qqman.html) in R which takes a dataframe of P-values, chromosome position and a gene position (for any biologists reading, this is a per gene manhattan hence the sparsity of signal). The data looks like this (with an example dataset below:

SNP P           CHR BP 
ABC 1.1e-300    16  875849
AAS 1.2e-150    4   2343
JTL 4.2e-07     3   436544
LKS 4.1e-06     2   23565
JKSA 0.000432   1   98043
LKF 0.0032      22  387235
A20 0.0054      10  3252
AKLF 0.0235     4   4543543


structure(list(Gene = c("ABC1", "HGT2", "SLC34A3_ENSG00000198569", 
"OR9K2_ENSG00000170605", "NFKB2_ENSG00000077150", "EFR3A_ENSG00000132294", 
"SLC7A9_ENSG00000021488", "SEMG1_ENSG00000124233", "EWSR1_ENSG00000182944", 
"ATP5PD_ENSG00000167863", "MAST3_ENSG00000099308", "KRT31_ENSG00000094796", 
"FOXI1_ENSG00000168269", "CHCHD7_ENSG00000170791", "MAPK6_ENSG00000069956", 
"SPRYD3_ENSG00000167778", "HOXB13_ENSG00000159184", "SLC12A9_ENSG00000146828", 
"EXOC2_ENSG00000112685", "KCNJ15_ENSG00000157551", "SLC22A18_ENSG00000110628", 
"ARID4A_ENSG00000032219", "SKP2_ENSG00000145604", "ZNF831_ENSG00000124203", 
"ZNF275_ENSG00000063587", "SLC16A2_ENSG00000147100", "ADRB1_ENSG00000043591", 
"DSCAM_ENSG00000171587", "PPM1H_ENSG00000111110", "IFNA14_ENSG00000228083", 
"STX2_ENSG00000111450", "VPS54_ENSG00000143952", "ANXA7_ENSG00000138279", 
"MAP3K12_ENSG00000139625", "MED13L_ENSG00000123066", "CHRM2_ENSG00000181072", 
"RBP7_ENSG00000162444", "DRD1_ENSG00000184845", "CCDC121_ENSG00000176714", 
"HMG20B_ENSG00000064961", "POU5F1B_ENSG00000212993", "SESN1_ENSG00000080546", 
"DNASE1_ENSG00000213918", "FBXO24_ENSG00000106336", "RAG2_ENSG00000175097", 
"UTS2_ENSG00000049247", "KMT2B_ENSG00000272333", "RBM33_ENSG00000184863", 
"SNRPB2_ENSG00000125870", "FOXO4_ENSG00000184481", "NBPF3_ENSG00000142794", 
"PPL_ENSG00000118898", "LYPD6B_ENSG00000150556", "POLD3_ENSG00000077514", 
"PIK3CB_ENSG00000051382", "BCL2L12_ENSG00000126453", "CDC45_ENSG00000093009", 
"DUXA_ENSG00000258873", "MCM3_ENSG00000112118", "CAPN3_ENSG00000092529", 
"FMO4_ENSG00000076258", "B3GALT2_ENSG00000162630", "MICB_ENSG00000204516", 
"CCL22_ENSG00000102962", "JKAMP_ENSG00000050130", "GSDME_ENSG00000105928", 
"IZUMO4_ENSG00000099840", "NCKAP5L_ENSG00000167566", "ZRANB1_ENSG00000019995", 
"TAL1_ENSG00000162367", "SLTM_ENSG00000137776", "SPC25_ENSG00000152253", 
"GAP43_ENSG00000172020", "FGD3_ENSG00000127084", "PTCD3_ENSG00000132300", 
"PAH_ENSG00000171759", "MMP8_ENSG00000118113", "RSBN1L_ENSG00000187257", 
"AC026740.3_ENSG00000286094", "FAM189A2_ENSG00000135063", "TMEM245_ENSG00000106771", 
"DDX50_ENSG00000107625", "SP140_ENSG00000079263", "C21orf91_ENSG00000154642", 
"MEIKIN_ENSG00000239642", "TNFRSF8_ENSG00000120949", "RNF24_ENSG00000101236", 
"CDK5_ENSG00000164885", "HINT1_ENSG00000169567", "TYRO3_ENSG00000092445", 
"KRT75_ENSG00000170454", "RBM44_ENSG00000177483", "MYH8_ENSG00000133020", 
"UBXN11_ENSG00000158062", "APOL3_ENSG00000128284", "NRXN3_ENSG00000021645", 
"PRSS16_ENSG00000112812", "BST1_ENSG00000109743", "FAM49A_ENSG00000197872", 
"SLC3A2_ENSG00000168003", "OR1C1_ENSG00000221888", "MYMK_ENSG00000187616", 
"RASSF1_ENSG00000068028", "ARID5A_ENSG00000196843", "UAP1L1_ENSG00000197355", 
"DPH2_ENSG00000132768", "G6PC_ENSG00000131482", "SH2B1_ENSG00000178188", 
"RELL1_ENSG00000181826", "ABCC5_ENSG00000114770", "ZNF333_ENSG00000160961", 
"NIF3L1_ENSG00000196290", "COMMD2_ENSG00000114744", "ZCCHC14_ENSG00000140948", 
"P3H1_ENSG00000117385", "KRT14_ENSG00000186847", "SPG7_ENSG00000197912", 
"ERCC6L_ENSG00000186871", "UPF1_ENSG00000005007", "FCGR3A_ENSG00000203747", 
"SLC39A13_ENSG00000165915", "ACYP2_ENSG00000170634", "AL162596.1_ENSG00000285946", 
"MEF2D_ENSG00000116604", "ATPAF1_ENSG00000123472", "DNAL4_ENSG00000100246", 
"ADRA2A_ENSG00000150594", "ALDH3B2_ENSG00000132746", "L3MBTL3_ENSG00000198945", 
"NR2E1_ENSG00000112333", "OTUD1_ENSG00000165312", "MCMDC2_ENSG00000178460", 
"TXNL1_ENSG00000091164", "CES5A_ENSG00000159398", "CCL16_ENSG00000275152", 
"ZBTB12_ENSG00000204366", "OGDHL_ENSG00000197444", "ARHGEF7_ENSG00000102606", 
"RBM20_ENSG00000203867", "SELENOK_ENSG00000113811", "HBB_ENSG00000244734", 
"WDR3_ENSG00000065183", "MAPKBP1_ENSG00000137802", "LTB4R2_ENSG00000213906", 
"SLC25A15_ENSG00000102743", "ZBTB26_ENSG00000171448", "FDX2_ENSG00000267673", 
"HSD3B7_ENSG00000099377", "RBFOX3_ENSG00000167281"), Pvalue = c(1.4e-300, 
2.4e-150, 2.6089114579797e-07, 2.0296620694138e-06, 0.000147497259292417, 
0.000229023886289315, 0.000245084674285079, 0.000256308708221289, 
0.000261527824152563, 0.000288694716678695, 0.000290173032394758, 
0.000320594572326915, 0.000346135729902497, 0.000355400110852, 
0.000365256352980237, 0.000409731023356175, 0.000434204786603609, 
0.000439775242591978, 0.000489192731765176, 0.000496753250110893, 
0.00049911036273298, 0.000570787086811797, 0.000817460863988795, 
0.000909350865229142, 0.000939159281654778, 0.00101875263711804, 
0.00104161722087825, 0.00104642519111031, 0.0011025121215934, 
0.00110797190460954, 0.00115516532029414, 0.00119237737210043, 
0.00122886113380205, 0.00123316670384388, 0.00126924175390097, 
0.00133083135434398, 0.00135900612361495, 0.00139601886941515, 
0.00140034988031684, 0.00144667154281775, 0.00152488013161856, 
0.00163920217629621, 0.00165121328565765, 0.00174281606991877, 
0.00177541992540164, 0.00190567015024483, 0.00197012178338563, 
0.00201154365191081, 0.00217761616500045, 0.00218849598206619, 
0.00219107805420338, 0.00219952638949095, 0.0022100400174857, 
0.00224988976742913, 0.00227842036080439, 0.00231351589815465, 
0.00233840710255306, 0.00239368490047076, 0.00240800589782486, 
0.00243072813003242, 0.00244930354205075, 0.00250643393459327, 
0.00251262640919065, 0.00251308387281417, 0.00263512458389692, 
0.00278748971622167, 0.00285692531240396, 0.00294631292976411, 
0.0029855292366705, 0.00300042887433971, 0.00303321747691876, 
0.00303431537337207, 0.00303655747990805, 0.00305247991142066, 
0.00305779719421262, 0.0030773769185013, 0.00309595279588104, 
0.00320602521859303, 0.00332374190234568, 0.00335845666631385, 
0.00343476781423846, 0.00352132856036713, 0.0035370791144882, 
0.00361921945446442, 0.00362829729460107, 0.00362925899436917, 
0.00371857751928739, 0.00379170913533391, 0.00381786051662956, 
0.00384603142808415, 0.0040621114920355, 0.00409131954647834, 
0.00421076475281379, 0.00426968726537658, 0.00434706101829539, 
0.00440972006588558, 0.00441860470852284, 0.00442578968523244, 
0.00442716922579578, 0.00452215526426547, 0.00455658711791962, 
0.00456768818316559, 0.00459525378983388, 0.00470562811526665, 
0.00479427416502232, 0.00480697291736709, 0.00487609777383424, 
0.00487626066774249, 0.0048982035968409, 0.00495106368869058, 
0.00495974901689888, 0.0051182254688722, 0.00511868853158659, 
0.00517459699358158, 0.0051863728177568, 0.0052533748441207, 
0.0053048513357663, 0.00535144603215779, 0.00536294574878726, 
0.00551084451782391, 0.00554884846488313, 0.0057184975334863, 
0.00579274777888456, 0.00589230566622367, 0.00598698264647979, 
0.00611781183554826, 0.00620691435617104, 0.00623285869674561, 
0.00627192651777919, 0.00631120768525961, 0.00638288332792991, 
0.00640000445930411, 0.00640676243762089, 0.00651734394089964, 
0.0065624463096069, 0.00663922011120555, 0.00664879787639161, 
0.00670461778135323, 0.00687266504207529, 0.00695679654393111, 
0.00703352727799, 0.0070826001238915, 0.00709135444023445, 0.007142701991454, 
0.00715597471729579, 0.00717318609326256, 0.00717726401691021, 
0.00723420182380741, 0.00734437099984853), CHR = c(16L, 4L, 4L, 
1L, 14L, 16L, 5L, 6L, 20L, 9L, 9L, 7L, 22L, 3L, 14L, 3L, 8L, 
8L, 21L, 16L, 4L, 16L, 12L, 14L, 4L, 1L, 12L, 15L, 5L, 4L, 21L, 
22L, 1L, 1L, 14L, 6L, 15L, 9L, 20L, 20L, 17L, 7L, 15L, 6L, 20L, 
7L, 8L, 9L, 1L, 13L, 11L, 12L, 4L, 7L, 20L, 12L, 7L, 5L, 12L, 
21L, 5L, 8L, 14L, 9L, 10L, 17L, 21L, 19L, 4L, 21L, 18L, 21L, 
7L, 12L, 21L, 2L, 15L, 7L, 14L, 15L, 4L, 12L, 5L, 14L, 21L, 8L, 
21L, 15L, 18L, 12L, 11L, 20L, 2L, 22L, 14L, 17L, 3L, 4L, 14L, 
15L, 9L, 7L, 20L, 15L, 18L, 15L, 19L, 13L, 15L, 6L, 7L, 8L, 3L, 
4L, 21L, 7L, 18L, 4L, 13L, 16L, 14L, 22L, 2L, 2L, 6L, 16L, 15L, 
8L, 7L, 19L, 13L, 6L, 21L, 8L, 18L, 22L, 19L, 21L, 16L, 2L, 4L, 
5L, 15L, 6L, 3L, 21L, 15L, 4L, 11L), POS = c(40665L, 197088L, 
107291L, 210681L, 43546L, 79324L, 84342L, 184478L, 153093L, 180926L, 
186110L, 117933L, 40682L, 54752L, 42758L, 61354L, 60378L, 157811L, 
154466L, 126398L, 31037L, 115113L, 151914L, 10177L, 149587L, 
79681L, 199754L, 129963L, 127032L, 175940L, 213708L, 51165L, 
2584L, 166487L, 56259L, 130923L, 89219L, 170034L, 178967L, 102826L, 
16982L, 188528L, 185007L, 6373L, 23298L, 199514L, 10429L, 58720L, 
124518L, 210323L, 52212L, 186662L, 166963L, 58802L, 97157L, 14448L, 
205795L, 70401L, 41824L, 93825L, 107954L, 207638L, 58648L, 64942L, 
184005L, 19239L, 326L, 167713L, 106774L, 9145L, 174348L, 116079L, 
38916L, 561L, 140433L, 123765L, 92497L, 187902L, 32027L, 63696L, 
141286L, 67825L, 131698L, 120443L, 72621L, 165143L, 188862L, 
52376L, 16769L, 77430L, 38655L, 145317L, 188469L, 113143L, 198322L, 
26732L, 165043L, 25287L, 72392L, 12505L, 134208L, 126649L, 86308L, 
199525L, 204348L, 103538L, 78610L, 176290L, 175950L, 73590L, 
148494L, 151769L, 135252L, 141200L, 73351L, 45244L, 136493L, 
33343L, 11165L, 915L, 80714L, 164700L, 142935L, 137224L, 554L, 
92823L, 143083L, 166581L, 121459L, 19037L, 325L, 59959L, 155468L, 
20896L, 33721L, 4468L, 113639L, 17103L, 184481L, 164337L, 174760L, 
96405L, 207423L, 46590L, 168811L, 205743L, 74180L, 178456L, 126892L
)), row.names = c(NA, -149L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x55a80de817a0>)

In reality there are around 20,000 lines for each gene in the human genome.

Using qqman, one uses:

manhttahn(gwas_data...) 

To get the plot.

I would like the same plot but with the axis broken between 8-149 and then again from 149-300 so that the bottom part isn’t all compressed. qqman is unable to do this.

I have tried modifying the script from this website: danielroelfs.com/blog/how-i-create-manhattan-plots-using-ggplot/

And my code looks like this:

table above: gwas_data

data_cum <- gwas_data %>% 
  group_by(CHR) %>% 
  summarise(max_bp = max(BP)) %>% 
  mutate(bp_add = lag(cumsum(max_bp), default = 0)) %>% 
  select(CHR, bp_add)

gwas_data <- gwas_data %>% 
  inner_join(data_cum, by = "CHR") %>% 
  mutate(bp_cum = bp + bp_add)

axis_set <- gwas_data %>% 
  group_by(CHR) %>% 
  summarize(center = mean(bp_cum))

ylim <- gwas_data %>% 
  filter(P == min(P)) %>% 
  mutate(ylim = abs(floor(log10(P))) + 2) %>% 
  pull(ylim)

sig <- 0.05/length(gwas_data$P) #this is a bonferroni correction

manhplot <- ggplot(gwas_data, aes(x = bp_cum, y = -log10(P), 
                                  color = as_factor(CHR), size = -log10(P))) +
  geom_hline(yintercept = -log10(sig), color = "grey40", linetype = "dashed") + 
  geom_point(alpha = 0.75) +
  scale_x_continuous(label = axis_set$chr, breaks = axis_set$center) +
  scale_y_continuous(expand = c(0,0), limits = c(0, ylim)) +
  scale_color_manual(values = rep(c("#276FBF", "#183059"), unique(length(axis_set$chr)))) +
  scale_size_continuous(range = c(0.5,3)) +
  labs(x = NULL, 
       y = "-log<sub>10</sub>(p)") + 
  theme_minimal() +
  theme( 
    legend.position = "none",
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    axis.title.y = element_markdown(),
    axis.text.x = element_text(angle = 60, size = 8, vjust = 0.5)
  )

This gives me:

Which is wrong. However, if I try and then cut the axis using the ggbreak package with:

t <- manhplot +scale_y_cut(break=c(10,140))
t+ scale_y_cut(break=c(140,300))

Which gives me:

enter image description here

How would I sort the chromosome x-axis and the breaks out so it looks like the qqman plot but with the y-axis compressed?

Many thanks

Read more here: Source link