久久国产成人av_抖音国产毛片_a片网站免费观看_A片无码播放手机在线观看,色五月在线观看,亚洲精品m在线观看,女人自慰的免费网址,悠悠在线观看精品视频,一级日本片免费的,亚洲精品久,国产精品成人久久久久久久

分享

TCGA的臨床數據變生信文章系列(1)

 醫(yī)學院的石頭 2018-04-25

感謝有邑同學的來稿,,有邑同學已獲贈免編程生信文章Genespring+Cytoscape還原操作詳解|TCGA,,GEO,,SEER數據庫挖掘系列的免費線下學習名額

我是從cBioPortal數據庫下載的臨床數據,,下載使用臨床數據還是沒問題的,。RNAseq counts數據推薦用生信人TCGA小工具下載,這里放一個傳送門:https://www./article/95,。言歸正傳,,講cBioPortal。主頁面是這樣的:

選擇我們需要的腫瘤類型和數據集,,可以看到最新的TCGA數據庫里面有頭頸部腫瘤530例:

我們可以點擊download,,切換到下載頁面

點擊綠色框內的圖標進入概覽頁面

點擊Download,,就可以在瀏覽器里面下載數據了

下載好之后解壓就行了,里面有多個文件也有說明,。這里不再贅述了,。
以下就是代碼,讀取數據生成對象:

#================臨床資料整理============
# 1.1 讀入數據
patientMatch <>'hnsc_tcga/hnsc_tcga/data_bcr_clinical_data_patient.txt',
                        header = T, row.names = 1, comment.char = '#', sep = ' ',
                         na.strings =
'[Not Available]')          
> colnames(patientMatch)
# [1] 'PATIENT_ID'                              'FORM_COMPLETION_DATE'                  
#
[3] 'HISTOLOGICAL_DIAGNOSIS'                  'PRIMARY_SITE'                          
# [5] 'LATERALITY'                              'PROSPECTIVE_COLLECTION'                
#
[7] 'RETROSPECTIVE_COLLECTION'                'SEX'                                    
# [9] 'DAYS_TO_BIRTH'                           'RACE'                                  
# [11] 'ETHNICITY'                               'HISTORY_OTHER_MALIGNANCY'              
# [13] 'HISTORY_NEOADJUVANT_TRTYN'               'INITIAL_PATHOLOGIC_DX_YEAR'            
# [15] 'LYMPH_NODE_NECK_DISSECTION_INDICATOR'    'LYMPH_NODE_DISSECTION_METHOD'          
# [17] 'LYMPH_NODES_EXAMINED'                    'LYMPH_NODE_EXAMINED_COUNT'              
#
[19] 'LYMPH_NODES_EXAMINED_HE_COUNT'           'LYMPH_NODES_EXAMINED_IHC_COUNT'        
#
[21] 'PATH_MARGIN'                             'P53_GENE_ANALYSIS'                      
# [23] 'AMPLIFICATION_STATUS'                    'VITAL_STATUS'                          
#
[25] 'DAYS_TO_LAST_FOLLOWUP'                   'DAYS_TO_DEATH'                          
# [27] 'TUMOR_STATUS'                            'AJCC_STAGING_EDITION'                  
# [29] 'AJCC_TUMOR_PATHOLOGIC_PT'                'AJCC_NODES_PATHOLOGIC_PN'              
# [31] 'AJCC_METASTASIS_PATHOLOGIC_PM'           'AJCC_PATHOLOGIC_TUMOR_STAGE'            
#
[33] 'EXTRACAPSULAR_SPREAD_PATHOLOGIC'         'GRADE'                                  
# [35] 'ANGIOLYMPHATIC_INVASION'                 'PERINEURAL_INVASION'                    
# [37] 'HPV_STATUS_P16'                          'HPV_STATUS_ISH'                        
# [39] 'TOBACCO_SMOKING_HISTORY_INDICATOR'       'SMOKING_YEAR_STARTED'                  
# [41] 'SMOKING_YEAR_STOPPED'                    'SMOKING_PACK_YEARS'                    
# [43] 'ALCOHOL_HISTORY_DOCUMENTED'              'ALCOHOL_CONSUMPTION_FREQUENCY'          
# [45] 'DAILY_ALCOHOL'                           'RADIATION_TREATMENT_ADJUVANT'          
#
[47] 'PHARMACEUTICAL_TX_ADJUVANT'              'TREATMENT_OUTCOME_FIRST_COURSE'        
# [49] 'NEW_TUMOR_EVENT_AFTER_INITIAL_TREATMENT' 'AGE'                                    
# [51] 'CLIN_M_STAGE'                            'CLIN_N_STAGE'                          
# [53] 'CLIN_T_STAGE'                            'CLINICAL_STAGE'                        
#
[55] 'DAYS_TO_INITIAL_PATHOLOGIC_DIAGNOSIS'    'DISEASE_CODE'                          
# [57]'EXTRANODAL_INVOLVEMENT'                  'ICD_10'                                
# [59]'ICD_O_3_HISTOLOGY'                       'ICD_O_3_SITE'                          
# [61] 'INFORMED_CONSENT_VERIFIED'               'PROJECT_CODE'                          
# [63] 'STAGE_OTHER'                             'TISSUE_SOURCE_SITE'                    
# [65] 'TUMOR_TISSUE_SITE'                       'OS_STATUS'                              
# [67]'OS_MONTHS'                               'DFS_STATUS'                            
# [69] 'DFS_MONTHS'    
> dim(patientMatch)
# [1] 527  69

一共有69列,,即69個變量,,527個樣本。這里我們需要的變量有:
“PATIENT_ID”  ,,做行名

rownames(patientMatch) <->$PATIENT_ID
patientMatch <->
1]

其它的變量,,這里用了一個正則表達式:

'PRIMARY_SITE','LATERALITY'                             
 'SEX'                                                               
 'RACE'                                                                
 'HISTORY_OTHER_MALIGNANCY'                             
             
'LYMPH_NODE_NECK_DISSECTION_INDICATOR'   
 'LYMPH_NODE_DISSECTION_METHOD'                              
 'LYMPH_NODE_EXAMINED_COUNT'
,'LYMPH_NODES_EXAMINED_HE_COUNT'          
 'LYMPH_NODES_EXAMINED_IHC_COUNT'
,'PATH_MARGIN'                            
                                           
 'VITAL_STATUS'
,'DAYS_TO_LAST_FOLLOWUP'                  
 'DAYS_TO_DEATH'
,'TUMOR_STATUS'                           
                  'AJCC_TUMOR_PATHOLOGIC_PT'     
          
 'AJCC_NODES_PATHOLOGIC_PN','AJCC_METASTASIS_PATHOLOGIC_PM'          
 'AJCC_PATHOLOGIC_TUMOR_STAGE'
,'EXTRACAPSULAR_SPREAD_PATHOLOGIC'        
 'GRADE'
,'ANGIOLYMPHATIC_INVASION'                
 'PERINEURAL_INVASION'
,'HPV_STATUS_P16'                         
 'HPV_STATUS_ISH'
,'TOBACCO_SMOKING_HISTORY_INDICATOR'      
                     'ALCOHOL_HISTORY_DOCUMENTED'             
           'PHARMACEUTICAL_TX_ADJUVANT'    
         
 
'TREATMENT_OUTCOME_FIRST_COURSE','NEW_TUMOR_EVENT_AFTER_INITIAL_TREATMENT'
 'AGE'
,'CLIN_M_STAGE'                           
 'CLIN_N_STAGE'
,'CLIN_T_STAGE'                           
 'CLINICAL_STAGE'                                                  
 'TISSUE_SOURCE_SITE'
,'TUMOR_TISSUE_SITE'                      
 'OS_STATUS'
,'OS_MONTHS'                              
 'DFS_STATUS'
     

原文本在這里,需要把換行符,、回車符去掉,,在兩個中間沒有字母或數字的雙引號”中間插入一個,。用的正則替換式如下圖:


完成之后變成這樣子:

'PRIMARY_SITE','LATERALITY','SEX','RACE','HISTORY_OTHER_MALIGNANCY','LYMPH_NODE_NECK_DISSECTION_INDICATOR','LYMPH_NODE_DISSECTION_METHOD','LYMPH_NODE_EXAMINED_COUNT','LYMPH_NODES_EXAMINED_HE_COUNT','LYMPH_NODES_EXAMINED_IHC_COUNT','PATH_MARGIN','VITAL_STATUS','DAYS_TO_LAST_FOLLOWUP','DAYS_TO_DEATH','TUMOR_STATUS','AJCC_TUMOR_PATHOLOGIC_PT','AJCC_NODES_PATHOLOGIC_PN','AJCC_METASTASIS_PATHOLOGIC_PM','AJCC_PATHOLOGIC_TUMOR_STAGE','EXTRACAPSULAR_SPREAD_PATHOLOGIC','GRADE','ANGIOLYMPHATIC_INVASION','PERINEURAL_INVASION','HPV_STATUS_P16','HPV_STATUS_ISH','TOBACCO_SMOKING_HISTORY_INDICATOR','ALCOHOL_HISTORY_DOCUMENTED','PHARMACEUTICAL_TX_ADJUVANT','TREATMENT_OUTCOME_FIRST_COURSE','NEW_TUMOR_EVENT_AFTER_INITIAL_TREATMENT'
'AGE'
,'CLIN_M_STAGE','CLIN_N_STAGE','CLIN_T_STAGE','CLINICAL_STAGE','TISSUE_SOURCE_SITE','TUMOR_TISSUE_SITE','OS_STATUS','OS_MONTHS','DFS_STATUS'


這一步也可以在RStudio中完成,。我們選出這些列:

# 1.2 選出所需要的變量
patientMatch <- patientmatch[,="" c(="">
'PRIMARY_SITE','LATERALITY','SEX','RACE','HISTORY_OTHER_MALIGNANCY',
                                  'LYMPH_NODE_NECK_DISSECTION_INDICATOR','LYMPH_NODE_DISSECTION_METHOD'
,
                                  'LYMPH_NODE_EXAMINED_COUNT'
,'LYMPH_NODES_EXAMINED_HE_COUNT',
                                  'LYMPH_NODES_EXAMINED_IHC_COUNT'
,'PATH_MARGIN','VITAL_STATUS',
                                  'DAYS_TO_LAST_FOLLOWUP'
,'DAYS_TO_DEATH','TUMOR_STATUS','AJCC_TUMOR_PATHOLOGIC_PT',
                                  'AJCC_NODES_PATHOLOGIC_PN'
,'AJCC_METASTASIS_PATHOLOGIC_PM',
                                  'AJCC_PATHOLOGIC_TUMOR_STAGE'
,'EXTRACAPSULAR_SPREAD_PATHOLOGIC',
                                  'GRADE'
,'ANGIOLYMPHATIC_INVASION','PERINEURAL_INVASION','HPV_STATUS_P16',
                                  'HPV_STATUS_ISH'
,'TOBACCO_SMOKING_HISTORY_INDICATOR','ALCOHOL_HISTORY_DOCUMENTED',
                                  'PHARMACEUTICAL_TX_ADJUVANT'
,'TREATMENT_OUTCOME_FIRST_COURSE',
                                  'NEW_TUMOR_EVENT_AFTER_INITIAL_TREATMENT'
,
                                  'AGE'
,'CLIN_M_STAGE','CLIN_N_STAGE','CLIN_T_STAGE','CLINICAL_STAGE',
                                  'TISSUE_SOURCE_SITE'
,'TUMOR_TISSUE_SITE','OS_STATUS','OS_MONTHS','DFS_STATUS')]

> colnames(patientMatch)#選出的變量的變量名
# [1] 'PRIMARY_SITE'                            'LATERALITY'                             
# [3] 'SEX'                                     'RACE'                                   
# [5] 'HISTORY_OTHER_MALIGNANCY'                'LYMPH_NODE_NECK_DISSECTION_INDICATOR'   
# [7] 'LYMPH_NODE_DISSECTION_METHOD'            'LYMPH_NODE_EXAMINED_COUNT'              
# [9] 'LYMPH_NODES_EXAMINED_HE_COUNT'           'LYMPH_NODES_EXAMINED_IHC_COUNT'         
# [11] 'PATH_MARGIN'                             'VITAL_STATUS'                           
# [13] 'DAYS_TO_LAST_FOLLOWUP'                   'DAYS_TO_DEATH'                          
# [15] 'TUMOR_STATUS'                            'AJCC_TUMOR_PATHOLOGIC_PT'               
# [17] 'AJCC_NODES_PATHOLOGIC_PN'                'AJCC_METASTASIS_PATHOLOGIC_PM'          
# [19] 'AJCC_PATHOLOGIC_TUMOR_STAGE'             'EXTRACAPSULAR_SPREAD_PATHOLOGIC'        
# [21] 'GRADE'                                   'ANGIOLYMPHATIC_INVASION'                
# [23] 'PERINEURAL_INVASION'                     'HPV_STATUS_P16'                         
# [25] 'HPV_STATUS_ISH'                          'TOBACCO_SMOKING_HISTORY_INDICATOR'      
# [27] 'ALCOHOL_HISTORY_DOCUMENTED'              'PHARMACEUTICAL_TX_ADJUVANT'             
# [29] 'TREATMENT_OUTCOME_FIRST_COURSE'          'NEW_TUMOR_EVENT_AFTER_INITIAL_TREATMENT'
# [31] 'AGE'                                     'CLIN_M_STAGE'                           
# [33] 'CLIN_N_STAGE'                            'CLIN_T_STAGE'                           
# [35] 'CLINICAL_STAGE'                          'TISSUE_SOURCE_SITE'                     
# [37] 'TUMOR_TISSUE_SITE'                       'OS_STATUS'                              
# [39] 'OS_MONTHS'                               'DFS_STATUS'
> dim(patientMatch)
#[1] 527  40   

下面我們生成一個新的對象來儲存整理我們需要的變量和變量值:

# 1.3 新對象baseLine儲存變量值baseLine <- patientmatch[,="">
# baseLine$Race[baseLine$Race %in% c('Not Evaluated', 'Unknown')] <->
# rownames(baseLine) <->$patientID

# 1.4 數據分組轉換并儲存
baseLine$Age <->$AGE) > 55,'>55', '<>
baseLine$Smoke <->h$TOBACCO_SMOKING_HISTORY_INDICATOR == '1', 'NO', 'YES')
baseLine$
Acohol <->$ALCOHOL_HISTORY_DOCUMENTED
baseLine$
pMargin <->$PATH_MARGIN
patientMatch$
AJCC_PATHOLOGIC_TUMOR_STAGE[patientMatch$AJCC_PATHOLOGIC_TUMOR_STAGE == '[Discrepancy]'] <->
baseLine$pathoStage <->'[a-cA-C]', '', patientMatch$AJCC_PATHOLOGIC_TUMOR_STAGE)
baseLine$PathoT <->'[a-cA-C]', '', patientMatch$AJCC_TUMOR_PATHOLOGIC_PT)
baseLine$PathoN <->'[a-cA-C]','',patientMatch$AJCC_NODES_PATHOLOGIC_PN)
baseLine$PathoM <->'[a-cA-C]', '', patientMatch$AJCC_METASTASIS_PATHOLOGIC_PM)
baseLine$ClinStage <->'[a-cA-C]', '', patientMatch$CLINICAL_STAGE)
baseLine$ClinT <->'[a-cA-C]', '', patientMatch$CLIN_T_STAGE)
baseLine$ClinN <->'[a-cA-C]', '', patientMatch$CLIN_N_STAGE)
baseLine$ClinM <->'[a-cA-C]', '', patientMatch$CLIN_M_STAGE)
baseLine$HistoGrade <->$GRADE
baseLine$LymphVasInvas <->$ANGIOLYMPHATIC_INVASION
baseLine$PeriInvas <->$PERINEURAL_INVASION
#baseLine$MarginStatus <->$MarginStatus,
#  
                             levels = c('Negative', 'Close', 'Positive'))
baseLine$NodeExtraCapsu <->$EXTRACAPSULAR_SPREAD_PATHOLOGIC

后面HPV的感染有兩個檢測方法,,我們選其中任意一個陽性的患者判定其HPV感染狀態(tài)為陽性,通過創(chuàng)建一個函數來判定:

# 1.5 判斷HPV感染
HPVlog <- function(x,="">
'Positive' %in% c(x, y)){hpv = 'Positive'}
  else if(
'Negative' %in% c(x, y)){hpv = 'Negative'}
  else {hpv = NA}
 
return(hpv)
    }
baseLine
$HPV <- mapply(hpvlog,="">$HPV_STATUS_ISH, patientMatch$HPV_STATUS_P16)

后面是結局事件和時間:

# 1.6 生存時間和事件
baseLine$
Mons2End <->$Mons2End
baseLine$VitalStatus <->$VitalStatus
baseLine$DisFreeStatus <->$DFS_STATUS
baseLine$
OSMonths <->$OS_MONTHS

導出整理出來的臨床資料:

# 1.7 結果導出
write.csv(baseLine,
'baseLine.csv')
dim(baseLine)
#[1] 527  23

tableNA <- table(baseline[,="" 2],usena="">'ifany')
Freq <- lapply(baseline[,="">1:22],function(x) table(x, useNA = 'ifany'))
prop <->1:22], prop.table)

建立表格并導出:

# 1.8 建表
Char <->
for(i in 1:22){
 Character <- c(names(freq[i]),="">[[i]]))
 Noc <- c(na,="">[[i]], '(', round(prop[[i]]*100, 2),')'))
 patientChara <->'Characteristics' = Character, 'Number of Case' = Noc)
 Char <- rbind(char,="">
}
library(xlsx)
write.xlsx(Char, 'BaseLineIntergrated.xls', row.names = F, showNA = F)

這是最終導出的表格:

    本站是提供個人知識管理的網絡存儲空間,,所有內容均由用戶發(fā)布,,不代表本站觀點。請注意甄別內容中的聯系方式,、誘導購買等信息,,謹防詐騙。如發(fā)現有害或侵權內容,,請點擊一鍵舉報,。
    轉藏 分享 獻花(0

    0條評論

    發(fā)表

    請遵守用戶 評論公約

    類似文章 更多