root.dir <- here::here()
knitr::opts_chunk$set(echo = TRUE, root.dir=root.dir, dpi = 400)
knitr::opts_knit$set(root.dir = root.dir)

source(here::here("code","utils.R"))

Import data

Color dictionaries

keys <- create_keys()

ChIP-seq data

Data previously processed in data/enrichment.Rmd script.

gr.chip <- readRDS(here::here("data","gr.chip.hg38_cutoff5_standardised.rds")) |> 
  GenomicRanges::GRangesList()
## Loading required package: rtracklayer
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
viewpoint <- 166169213 
gr.span <- GenomicRanges::GRanges(
    seqnames = "chr6",
    ranges = IRanges::IRanges(
        start = viewpoint - 1000000,
        end = viewpoint + 1000000
    )
)

Create metadata

Create a table of metadata from the peakfile names, and ensure standardised nomenclature (i.e. ).

meta <- create_metadata(nms = names(gr.chip))
names(gr.chip) <- meta$name2

Subset samples

Filter to only ranges within the predefinedTBXT window. Only include samples with >1 ranges.

#### Filter to just the target region ####
gr.chip_sub <- GenomicRanges::pintersect(
    x = GenomicRanges::GRangesList(gr.chip),
    y = gr.span,
    drop.nohit.ranges=TRUE) 
lengths <- sapply(gr.chip_sub, length)
print(lengths)
##  A549_H3K27ac_R1  A549_H3K27ac_R2 A549_H3K27me3_R1 A549_H3K27me3_R2 
##               26               31               10               20 
##  A549_H3K4me3_R1  A549_H3K4me3_R2  A549_H3K4me3_R3  A549_H3K9me3_R1 
##               24               19               27               67 
##  H358_H3K27ac_R1 H358_H3K27me3_R1  H358_H3K4me3_R1  H358_H3K9me3_R1 
##               19                1               15                0 
##  H460_H3K4me3_R1  H460_H3K4me3_R2  H460_H3K27ac_R1  H460_H3K27ac_R2 
##               43               36               56               57 
##  H460_H3K9me3_R1 H460_H3K27me3_R1 
##               14               25
gr.chip_sub <- gr.chip_sub[lengths>0]

Locus subset

Rebin peak data

bins <- EpiCompare::rebin_peaks(peakfiles = gr.chip_sub,
                                genome_build = "hg38",
                                intensity_cols = "score",
                                bin_size = 1000,
                                keep_chr = "chr6", 
                                workers = 10)
## 
## Warning: replacing previous import 'Biostrings::pattern' by 'grid::pattern'
## when loading 'genomation'
## Standardising peak files in 170,806 bins of 1,000 bp.
## Merging data into matrix.
## Converting to sparse matrix.
## Binned matrix size: 170,806 x 17
## Matrix sparsity: 0.9997

Postprocess binned matrix

bins <- prepare_bins(bins = bins) 
## 170806 empty bins dropped.
## Filtered matrix sparsity: 0.881

Compute correlation matrix

cor_mat <- cor(as.matrix(bins))
utils::write.csv(cor_mat,
                 file = here::here("results","rebinned_correlations.csv"), 
                 row.names = TRUE)

Plot heatmap

file <- here::here("results",paste0("heatmap_locus",c(".png",".pdf")))
hm <- heatmaply::heatmaply(x = cor_mat,
                     row_side_colors = meta[colnames(cor_mat),
                                            c("Cell","Histone")],
                     row_side_palette = c(keys$cell_key,keys$color_key),
                     plot_method = "plotly",
                     file = file,
                     key.title = "Pearson r",
                     height = 600
                     ) 
## Warning: 'plotly::orca' is deprecated.
## Use 'kaleido' instead.
## See help("Deprecated")
## Warning in value[[3L]](cond): plotly::orca failed:
##  Error: The orca command-line utility is required for this functionality.
## 
## Please follow the installation instructions here -- https://github.com/plotly/orca#installation
## Warning: 'plotly::export' is deprecated.
## Use 'orca' instead.
## See help("Deprecated")
## Warning: 'plotly::orca' is deprecated.
## Use 'kaleido' instead.
## See help("Deprecated")
## Warning in value[[3L]](cond): plotly::orca failed:
##  Error: The orca command-line utility is required for this functionality.
## 
## Please follow the installation instructions here -- https://github.com/plotly/orca#installation
## Warning: 'plotly::export' is deprecated.
## Use 'orca' instead.
## See help("Deprecated")
# plotly::export(p = hm, 
#                file = file[1],
#                vwidth = 500*3,
#                vheight = 500*3,
#                zoom = 1) 
hm

Chromosome-wide

Compute correlations across all of Chromosome 6.

Rebin peak data

bins <- EpiCompare::rebin_peaks(peakfiles = gr.chip,
                                genome_build = "hg38",
                                intensity_cols = "score",
                                bin_size = 1000,
                                keep_chr = "chr6", 
                                workers = 10)
## Standardising peak files in 170,806 bins of 1,000 bp.
## Merging data into matrix.
## Converting to sparse matrix.
## Binned matrix size: 170,806 x 18
## Matrix sparsity: 0.9819

Postprocess binned matrix

bins <- prepare_bins(bins = bins) 
## 170806 empty bins dropped.
## Filtered matrix sparsity: 0.899

Compute correlation matrix

cor_mat <- cor(as.matrix(bins))
utils::write.csv(cor_mat,
                 file = here::here("results","rebinned_correlations.chr6.csv"), 
                 row.names = TRUE) 

Plot heatmap

file <- here::here("results",paste0("heatmap_chr6",c(".png",".pdf")))
hm <- heatmaply::heatmaply(x = cor_mat,
                     row_side_colors = meta[colnames(cor_mat),
                                            c("Cell","Histone")],
                     row_side_palette = c(keys$cell_key,keys$color_key),
                     plot_method = "plotly",
                     file = file,
                     key.title = "Pearson r",
                     height = 600
                     ) 
## Warning: 'plotly::orca' is deprecated.
## Use 'kaleido' instead.
## See help("Deprecated")
## Warning in value[[3L]](cond): plotly::orca failed:
##  Error: The orca command-line utility is required for this functionality.
## 
## Please follow the installation instructions here -- https://github.com/plotly/orca#installation
## Warning: 'plotly::export' is deprecated.
## Use 'orca' instead.
## See help("Deprecated")
## Warning: 'plotly::orca' is deprecated.
## Use 'kaleido' instead.
## See help("Deprecated")
## Warning in value[[3L]](cond): plotly::orca failed:
##  Error: The orca command-line utility is required for this functionality.
## 
## Please follow the installation instructions here -- https://github.com/plotly/orca#installation
## Warning: 'plotly::export' is deprecated.
## Use 'orca' instead.
## See help("Deprecated")
hm

Session info

utils::sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Rome
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] rtracklayer_1.61.1   GenomicRanges_1.53.2 GenomeInfoDb_1.37.6 
## [4] IRanges_2.35.2       S4Vectors_0.39.3     BiocGenerics_0.47.0 
## 
## loaded via a namespace (and not attached):
##   [1] splines_4.3.1                          
##   [2] later_1.3.1                            
##   [3] BiocIO_1.11.0                          
##   [4] bitops_1.0-7                           
##   [5] ggplotify_0.1.2                        
##   [6] filelock_1.0.2                         
##   [7] tibble_3.2.1                           
##   [8] polyclip_1.10-6                        
##   [9] XML_3.99-0.14                          
##  [10] lifecycle_1.0.3                        
##  [11] rprojroot_2.0.3                        
##  [12] processx_3.8.2                         
##  [13] lattice_0.21-9                         
##  [14] pals_1.8                               
##  [15] MASS_7.3-60                            
##  [16] crosstalk_1.2.0                        
##  [17] dendextend_1.17.1                      
##  [18] magrittr_2.0.3                         
##  [19] plotly_4.10.2                          
##  [20] sass_0.4.7                             
##  [21] rmarkdown_2.25                         
##  [22] BSgenome.Hsapiens.UCSC.hg38_1.4.5      
##  [23] plotrix_3.8-2                          
##  [24] jquerylib_0.1.4                        
##  [25] yaml_2.3.7                             
##  [26] BRGenomics_1.12.0                      
##  [27] httpuv_1.6.11                          
##  [28] cowplot_1.1.1                          
##  [29] mapproj_1.2.11                         
##  [30] DBI_1.1.3                              
##  [31] RColorBrewer_1.1-3                     
##  [32] maps_3.4.1                             
##  [33] abind_1.4-5                            
##  [34] zlibbioc_1.47.0                        
##  [35] purrr_1.0.2                            
##  [36] ggraph_2.1.0                           
##  [37] RCurl_1.98-1.12                        
##  [38] yulab.utils_0.1.0                      
##  [39] tweenr_2.0.2                           
##  [40] rappdirs_0.3.3                         
##  [41] seriation_1.5.1                        
##  [42] GenomeInfoDbData_1.2.10                
##  [43] enrichplot_1.20.3                      
##  [44] ggrepel_0.9.4                          
##  [45] tidytree_0.4.5                         
##  [46] ChIPseeker_1.36.0                      
##  [47] codetools_0.2-19                       
##  [48] DelayedArray_0.27.10                   
##  [49] DOSE_3.26.2                            
##  [50] xml2_1.3.5                             
##  [51] ggforce_0.4.1                          
##  [52] tidyselect_1.2.0                       
##  [53] aplot_0.2.2                            
##  [54] farver_2.1.1                           
##  [55] viridis_0.6.4                          
##  [56] TSP_1.2-4                              
##  [57] matrixStats_1.0.0                      
##  [58] BiocFileCache_2.9.1                    
##  [59] webshot_0.5.5                          
##  [60] GenomicAlignments_1.37.0               
##  [61] jsonlite_1.8.7                         
##  [62] ellipsis_0.3.2                         
##  [63] tidygraph_1.2.3                        
##  [64] iterators_1.0.14                       
##  [65] foreach_1.5.2                          
##  [66] tools_4.3.1                            
##  [67] progress_1.2.2                         
##  [68] treeio_1.25.4                          
##  [69] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
##  [70] Rcpp_1.0.11                            
##  [71] glue_1.6.2                             
##  [72] gridExtra_2.3                          
##  [73] SparseArray_1.1.12                     
##  [74] xfun_0.40                              
##  [75] here_1.0.1                             
##  [76] DESeq2_1.40.2                          
##  [77] qvalue_2.32.0                          
##  [78] MatrixGenerics_1.13.1                  
##  [79] ca_0.71.1                              
##  [80] dplyr_1.1.3                            
##  [81] withr_2.5.1                            
##  [82] BiocManager_1.30.22                    
##  [83] fastmap_1.1.1                          
##  [84] boot_1.3-28.1                          
##  [85] fansi_1.0.5                            
##  [86] callr_3.7.3                            
##  [87] caTools_1.18.2                         
##  [88] digest_0.6.33                          
##  [89] R6_2.5.1                               
##  [90] mime_0.12                              
##  [91] gridGraphics_0.5-1                     
##  [92] seqPattern_1.32.0                      
##  [93] colorspace_2.1-0                       
##  [94] GO.db_3.18.0                           
##  [95] gtools_3.9.4                           
##  [96] dichromat_2.0-0.1                      
##  [97] biomaRt_2.57.1                         
##  [98] RSQLite_2.3.1                          
##  [99] utf8_1.2.3                             
## [100] tidyr_1.3.0                            
## [101] generics_0.1.3                         
## [102] data.table_1.14.8                      
## [103] htmlwidgets_1.6.2                      
## [104] prettyunits_1.2.0                      
## [105] graphlayouts_1.0.1                     
## [106] httr_1.4.7                             
## [107] S4Arrays_1.1.6                         
## [108] downloadthis_0.3.2                     
## [109] scatterpie_0.2.1                       
## [110] pkgconfig_2.0.3                        
## [111] gtable_0.3.4                           
## [112] registry_0.5-1                         
## [113] blob_1.2.4                             
## [114] impute_1.75.1                          
## [115] XVector_0.41.1                         
## [116] shadowtext_0.1.2                       
## [117] htmltools_0.5.6.1                      
## [118] fgsea_1.26.0                           
## [119] scales_1.2.1                           
## [120] Biobase_2.61.0                         
## [121] png_0.1-8                              
## [122] EpiCompare_1.3.4                       
## [123] ggfun_0.1.3                            
## [124] knitr_1.44                             
## [125] rstudioapi_0.15.0                      
## [126] tzdb_0.4.0                             
## [127] reshape2_1.4.4                         
## [128] rjson_0.2.21                           
## [129] nlme_3.1-163                           
## [130] curl_5.1.0                             
## [131] cachem_1.0.8                           
## [132] stringr_1.5.0                          
## [133] KernSmooth_2.23-22                     
## [134] BiocVersion_3.18.0                     
## [135] parallel_4.3.1                         
## [136] HDO.db_0.99.1                          
## [137] AnnotationDbi_1.63.2                   
## [138] restfulr_0.0.15                        
## [139] pillar_1.9.0                           
## [140] grid_4.3.1                             
## [141] vctrs_0.6.4                            
## [142] gplots_3.1.3                           
## [143] promises_1.2.1                         
## [144] dbplyr_2.3.4                           
## [145] xtable_1.8-4                           
## [146] evaluate_0.22                          
## [147] readr_2.1.4                            
## [148] GenomicFeatures_1.53.2                 
## [149] cli_3.6.1                              
## [150] locfit_1.5-9.8                         
## [151] compiler_4.3.1                         
## [152] Rsamtools_2.17.0                       
## [153] rlang_1.1.1                            
## [154] crayon_1.5.2                           
## [155] heatmaply_1.5.0                        
## [156] ps_1.7.5                               
## [157] plyr_1.8.9                             
## [158] fs_1.6.3                               
## [159] stringi_1.7.12                         
## [160] gridBase_0.4-7                         
## [161] genomation_1.32.0                      
## [162] viridisLite_0.4.2                      
## [163] BiocParallel_1.35.4                    
## [164] assertthat_0.2.1                       
## [165] munsell_0.5.0                          
## [166] Biostrings_2.69.2                      
## [167] lazyeval_0.2.2                         
## [168] GOSemSim_2.26.1                        
## [169] Matrix_1.6-1.1                         
## [170] BSgenome_1.69.0                        
## [171] hms_1.1.3                              
## [172] patchwork_1.1.3                        
## [173] bit64_4.0.5                            
## [174] ggplot2_3.4.4                          
## [175] KEGGREST_1.41.4                        
## [176] shiny_1.7.5                            
## [177] SummarizedExperiment_1.31.1            
## [178] interactiveDisplayBase_1.39.0          
## [179] AnnotationHub_3.9.2                    
## [180] igraph_1.5.1                           
## [181] memoise_2.0.1.9000                     
## [182] bslib_0.5.1                            
## [183] ggtree_3.9.1                           
## [184] fastmatch_1.1-4                        
## [185] bit_4.0.5                              
## [186] ape_5.7-1