Correlation of PARP1 with CTCF ChIP-Seq

We will analyze the correlation of nucleosome associated PARP1 reads with CTCF ChIP-seq. The CTCF ChIP-seq is from the UCSC/ENCODE data portal. The PARP1 reads and CTCF ChIP-seq data has already been processed and is made available as part of the fmdatabreastcaparp1 package.


Load the PARP1 data.


Calculate the weighted coverage of the ln4 and ln5 sample reads, and then sum the reads in each TSS window.

mcf7_cov <- coverage(parp1_ln4_unique, weight = "n_count")
mdamb231_cov <- coverage(parp1_ln5_unique, weight = "n_count")

tss_windows <- binned_function(tss_windows, mcf7_cov, "sum", "parp1_mcf7")
tss_windows <- binned_function(tss_windows, mdamb231_cov, "sum", "parp1_mdamb231")

Load the CTCT ChIP-seq data, and average the ChIP-seq peaks in each TSS window.

ctcf_r1_cov <- coverage(ctcf_rep1, weight = "mcols.signal")
ctcf_r2_cov <- coverage(ctcf_rep2, weight = "mcols.signal")
tss_windows <- binned_function(tss_windows, ctcf_r1_cov, "mean_nozero", "ctcf_r1")
tss_windows <- binned_function(tss_windows, ctcf_r2_cov, "mean_nozero", "ctcf_r2")

Now with the Parp1 reads and CTCF ChIP-Seq data, we can start doing some correlations.

non_zero <- "both"

Start with a sampling of points and graph and generate a correlation.

r1_v_mcf7 <- subsample_nonzeros(mcols(tss_windows), c("ctcf_r1", "parp1_mcf7"), non_zero = non_zero, n_points = 10000)
ggplot(r1_v_mcf7, aes(x = ctcf_r1, y = parp1_mcf7)) + geom_point() + scale_y_log10() + scale_x_log10()

cor(log10(r1_v_mcf7[,1]+1), log10(r1_v_mcf7[,2]+1))
## [1] 0.1219387
r2_v_mcf7 <- subsample_nonzeros(mcols(tss_windows), c("ctcf_r2", "parp1_mcf7"), non_zero = non_zero, n_points = 10000)
ggplot(r2_v_mcf7, aes(x = ctcf_r2, y = parp1_mcf7)) + geom_point() + scale_y_log10() + scale_x_log10()

cor(log10(r2_v_mcf7[,1]+1), log10(r2_v_mcf7[,2]+1))
## [1] 0.2111723

Now do them all.

all_comb <- expand.grid(c("ctcf_r1", "ctcf_r2"), c("parp1_mcf7", "parp1_mdamb231"), stringsAsFactors = FALSE)
out_cor <- lapply(seq(1, nrow(all_comb)), function(i_row){
  correlate_non_zero(mcols(tss_windows), as.character(all_comb[i_row,]), log_transform = TRUE, non_zero = non_zero, test = TRUE)
all_comb_names <- paste(all_comb[,1], all_comb[,2], sep = "_v_")
out_cor <-, out_cor)
rownames(out_cor) <- all_comb_names

TSS correlations:

corr_value p_value
ctcf_r1_v_parp1_mcf7 0.1138285 0
ctcf_r2_v_parp1_mcf7 0.1959455 0
ctcf_r1_v_parp1_mdamb231 0.0446777 0
ctcf_r2_v_parp1_mdamb231 0.0830321 0
out_graphs <- lapply(seq(1, nrow(all_comb)), function(i_row){
  use_vars <- as.character(all_comb[i_row,])
  subpoints <- subsample_nonzeros(mcols(tss_windows), use_vars, non_zero = non_zero, n_points = 10000)
  ggplot(subpoints, aes_string(x = use_vars[1], y = use_vars[2])) + geom_point() + scale_y_log10() + scale_x_log10()
## [[1]]

## [[2]]

## [[3]]

## [[4]]

Genome Wide Comparison

Are these correlations a result of association with the TSS's? One way to test this is to set up a calculation genome-wide.

genome_tiles <- tileGenome(seqinfo(Hsapiens), tilewidth = 2000, = TRUE)

genome_tiles <- binned_function(genome_tiles, mcf7_cov, "sum", "parp1_mcf7")
genome_tiles <- binned_function(genome_tiles, mdamb231_cov, "sum", "parp1_mdamb231")
genome_tiles <- binned_function(genome_tiles, ctcf_r1_cov, "mean_nozero", "ctcf_r1")
genome_tiles <- binned_function(genome_tiles, ctcf_r2_cov, "mean_nozero", "ctcf_r2")
genome_r1_v_mcf7 <- subsample_nonzeros(mcols(genome_tiles), c("ctcf_r1", "parp1_mcf7"), non_zero = non_zero, n_points = 10000)
ggplot(genome_r1_v_mcf7, aes(x = ctcf_r1, y = parp1_mcf7)) + scale_x_log10() + scale_y_log10() + geom_point()

cor(log(genome_r1_v_mcf7[,1]+1), log(genome_r1_v_mcf7[,2]+1))
## [1] 0.181062
genome_cor <- lapply(seq(1, nrow(all_comb)), function(i_row){
  correlate_non_zero(mcols(genome_tiles), as.character(all_comb[i_row,]), log_transform = TRUE, non_zero = non_zero, test = TRUE)
all_comb_names <- paste(all_comb[,1], all_comb[,2], sep = "_v_")
genome_cor <-, genome_cor)
rownames(genome_cor) <- all_comb_names

Genome wide correlations:

corr_value p_value
ctcf_r1_v_parp1_mcf7 0.1854415 0
ctcf_r2_v_parp1_mcf7 0.2390277 0
ctcf_r1_v_parp1_mdamb231 0.1608382 0
ctcf_r2_v_parp1_mdamb231 0.1793969 0

Save the correlation results in some plain text files.

saveloc <- "../inst/correlation_tables"
write.table(out_cor, file = file.path(saveloc, "ctcf_tss.txt"), sep = "\t")
write.table(genome_cor, file = file.path(saveloc, "ctcf_genome.txt"), sep = "\t")

