【发布时间】:2020-10-18 03:21:14
【问题描述】:
我在一个基于不同村庄名称的文件夹中有 300 个结构相同的 CSV 文件。我需要单独读取每个文件,处理这些文件,并将输出文件导出到另一个文件夹中,并使用各自的村庄名称(例如,'村庄名称'_score)。
这是一个示例村庄文件的数据...
structure(list(ID_GC = structure(1:51, .Label = c("492K", "494K",
"497K", "498K", "499K", "500K", "501K", "502K", "503K", "504K",
"506K", "507K", "508K", "509K", "510K", "511K", "512K", "513K",
"514K", "516K", "517K", "518K", "519K", "522K", "523K", "524K",
"526K", "527K", "528K", "530K", "531K", "532K", "533K", "534K",
"535K", "536K", "537K", "538K", "539K", "540K", "541K", "542K",
"543K", "544K", "545K", "546K", "547K", "548K", "550K", "551K",
"552K"), class = "factor"), Lat = c(23.78107, 23.78115, 23.78122,
23.78123, 23.78125, 23.78081, 23.78096, 23.78062, 23.78068, 23.78071,
23.78075, 23.78043, 23.78021, 23.77937, 23.77985, 23.77981, 23.77995,
23.77987, 23.7799, 23.7796, 23.77944, 23.77934, 23.77937, 23.77906,
23.77899, 23.77907, 23.77889, 23.77898, 23.77863, 23.77865, 23.77855,
23.77852, 23.77843, 23.77806, 23.77824, 23.77809, 23.7781, 23.77797,
23.77788, 23.77786, 23.77809, 23.77815, 23.77771, 23.77757, 23.77772,
23.77752, 23.7774, 23.7772, 23.77869, 23.78084, 23.78178), Long = c(90.65016,
90.64968, 90.6497, 90.64969, 90.64972, 90.64996, 90.64987, 90.64989,
90.64924, 90.64921, 90.65, 90.64998, 90.6494, 90.64989, 90.64978,
90.64973, 90.64952, 90.64958, 90.64925, 90.64935, 90.6492, 90.64922,
90.64919, 90.64928, 90.64937, 90.64887, 90.64919, 90.64891, 90.64914,
90.64903, 90.64907, 90.6491, 90.64868, 90.6491, 90.64853, 90.64862,
90.64851, 90.64852, 90.64865, 90.64865, 90.64878, 90.64878, 90.64866,
90.64859, 90.64844, 90.64839, 90.64858, 90.64861, 90.64922, 90.64994,
90.64925), Village = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Abdullapur", class = "factor"),
Depth_m = c(18, 18, 18, 210, 18, 31.5, 13.5, 15, 13.5, 21,
13.5, 18, 15, 240, 24, 13.5, 19.5, 33, 156, 14.4, 18, 21,
13.5, 18, 18, 51, 48, 54, 67.5, 69, 69, 66, 66, 21, 60, 66,
54, 31.5, 21, 210, 66, 12, 54, 27, 219, 18, 18, 18, 18, 18,
21), As_ug_L = c(68L, 68L, 68L, 2L, 68L, 306L, 129L, 129L,
20L, 68L, 188L, 129L, 68L, 2L, 68L, 68L, 129L, 188L, 2L,
2L, 68L, 37L, 20L, 306L, 306L, 20L, 306L, 20L, 2L, 2L, 2L,
2L, 2L, 306L, 2L, 2L, 2L, 306L, 306L, 2L, 2L, 306L, 2L, 306L,
20L, 306L, 68L, 68L, 306L, 68L, 20L)), class = "data.frame", row.names = c(NA,
-51L))
以及计算所有村庄所需的另一个数据集(“dtw_BG”)......
structure(list(ID_GC = structure(c(10L, 11L, 12L, 13L, 14L, 8L,
9L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 1L, 2L, 3L, 4L, 5L, 6L,
7L), .Label = c("1002F", "1008F", "1016F", "1029F", "1051F",
"1053F", "1058F", "1548D", "1561D", "498K", "509K", "514K", "540K",
"545K", "559K", "560K", "569K", "571K", "597K", "601K", "614K",
"819F", "829F", "933F", "934F", "951F", "957F", "958F", "959F",
"960F", "964F", "973F", "982F", "998F"), class = "factor"), Lat = c(23.78123,
23.77937, 23.7799, 23.77786, 23.77772, 23.77439336, 23.77204886,
23.77484, 23.775, 23.77528, 23.77492, 23.77521, 23.77593, 23.7757,
23.78494, 23.78473, 23.78385611, 23.78395451, 23.78426992, 23.78374538,
23.78377154, 23.78360725, 23.78340944, 23.78362259, 23.78272036,
23.78307399, 23.78269739, 23.78252464, 23.78279102, 23.78131262,
23.78149057, 23.77867098, 23.77828323, 23.78592929), Long = c(90.64969,
90.64989, 90.64925, 90.64865, 90.64844, 90.65543457, 90.65292302,
90.65158, 90.65192, 90.65219, 90.65232, 90.65363, 90.65356, 90.65483,
90.65025, 90.65238, 90.64900976, 90.64933908, 90.65082989, 90.64891814,
90.64902199, 90.64910447, 90.64933699, 90.6488857, 90.64921562,
90.64848103, 90.64799873, 90.64826494, 90.64738669, 90.64781684,
90.64612672, 90.64499055, 90.64476985, 90.6499865), Village = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("Abdullapur", "Chauthar Kanda", "Nagra Para Faitadi",
"Nowa Para"), class = "factor"), Depth_m = c(210, 240, 156, 210,
219, 225, 195, 299.7, 299.7, 240, 240, 234, 240, 105, 165, 180,
180, 225, 180, 210, 195, 201, 180, 195, 210, 210, 195, 180, 225,
180, 108, 210, 225, 240), As_ug_L = c(2L, 2L, 2L, 2L, 20L, 2L,
2L, 2L, 20L, 2L, 2L, 7L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA,
-34L))
我需要处理所有村庄,但我不确定如何循环它们。到目前为止,我能够使用“readr”包读取所有单独的村庄文件。
library(readr)
a <- list.files(path = "/Users/......",
pattern = "*.csv", full.names = T)
这是我用于单个村庄的代码:
dtw_BG<- read.csv('/Users/...../dtw_BG.csv',header=TRUE)
gw<-read.csv('/Users/....../Abdullapur.csv',header=TRUE)
stw = gw[gw$Depth_m <= 90,]
stw_R = gw[gw$Depth_m <= 90 & gw$As_ug_L > 50,]
itw = gw[gw$Depth_m >= 45 & gw$Depth_m <= 90,]
itw_10 = gw[gw$Depth_m >= 45 & gw$Depth_m <= 90 & gw$As_ug_L <= 10,]
p<-stw [,c(3,2)]
R<-stw_R[,c(3,2)]
ITW<-itw[,c(3,2)]
ITW_10<- itw_10[,c(3,2)]
BG<-dtw_BG[,c(3,2)]
dist_R<- lapply(1:length(p[[1]]), function (i) distGeo (R, p[i,]))
dist_R<-lapply(1:length(p[[1]]), function (i) data.frame(R, dist_R[[i]]))
dist_R100<-lapply(1:length(p[[1]]),function (i) dist_R[[i]][dist_R[[i]][,3] <= 100,])
maxscore<- lapply(1:length(p[[1]]), function(i) nrow (dist_R100[[i]]))
maxscore<-unlist(maxscore)
dist_ITW<- lapply(1:length(p[[1]]), function (i) distGeo (ITW, p[i,]))
dist_ITW<-lapply(1:length(p[[1]]), function (i) data.frame(ITW, dist_ITW[[i]]))
dist_ITW100<-lapply(1:length(p[[1]]),function (i) dist_ITW[[i]][dist_ITW[[i]][,3] <= 100,])
count_itw<- lapply(1:length(p[[1]]), function(i) nrow (dist_ITW100[[i]]))
count_itw<-unlist(count_itw)
if (nrow(ITW_10)==0) {
count_itw10<- rep(0, length(maxscore))
} else {
dist_ITW10<- lapply(1:length(p[[1]]), function (i) distGeo (ITW_10, p[i,]))
dist_ITW10<-lapply(1:length(p[[1]]), function (i) data.frame(ITW_10, dist_ITW10[[i]]))
dist_ITW10_100<-lapply(1:length(p[[1]]),function (i) dist_ITW10[[i]][dist_ITW10[[i]][,3] <= 100,])
count_itw10<- lapply(1:length(p[[1]]), function(i) nrow (dist_ITW10_100[[i]]))
count_itw10<-unlist(count_itw10)
}
dist_BG<- lapply(1:length(p[[1]]), function (i) distGeo (BG, p[i,]))
dist_BG<-lapply(1:length(p[[1]]), function (i) data.frame(BG, dist_BG[[i]]))
dtw<-lapply(1:length(p[[1]]), function(i) {
lapply(1: length(maxscore), function(j) {
min(distGeo( c(dist_R100[[i]][j,1], dist_R100[[i]][j,2]), dist_BG[[i]]))
}
)
}
)
dtw<-unlist(dtw)
dtw<-split(dtw, (0:length(dtw) %/% length(p[[1]])))
dtw <- dtw[-length (dtw)]
count<-lapply(1:length(dtw), function(i) length(subset(dtw[[i]], dtw[[i]]<=100)))
count<-unlist(count)
score<-maxscore-count
abc<-cbind (stw, maxscore, count, score, count_itw, count_itw10)
abc<- data.frame (abc)
write.csv (abc, "/Users/..../Output/Abdullapur_score.csv", row.names = F)
所提供村庄的输出应如下所示
structure(list(ID_GC = structure(c(1L, 2L, 3L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 15L, 16L, 17L, 18L, 20L, 21L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
37L, 38L, 39L, 41L, 42L, 43L, 44L, 46L, 47L, 48L, 49L, 50L, 51L
), .Label = c("492K", "494K", "497K", "498K", "499K", "500K",
"501K", "502K", "503K", "504K", "506K", "507K", "508K", "509K",
"510K", "511K", "512K", "513K", "514K", "516K", "517K", "518K",
"519K", "522K", "523K", "524K", "526K", "527K", "528K", "530K",
"531K", "532K", "533K", "534K", "535K", "536K", "537K", "538K",
"539K", "540K", "541K", "542K", "543K", "544K", "545K", "546K",
"547K", "548K", "550K", "551K", "552K"), class = "factor"), Lat = c(23.78107,
23.78115, 23.78122, 23.78125, 23.78081, 23.78096, 23.78062, 23.78068,
23.78071, 23.78075, 23.78043, 23.78021, 23.77985, 23.77981, 23.77995,
23.77987, 23.7796, 23.77944, 23.77934, 23.77937, 23.77906, 23.77899,
23.77907, 23.77889, 23.77898, 23.77863, 23.77865, 23.77855, 23.77852,
23.77843, 23.77806, 23.77824, 23.77809, 23.7781, 23.77797, 23.77788,
23.77809, 23.77815, 23.77771, 23.77757, 23.77752, 23.7774, 23.7772,
23.77869, 23.78084, 23.78178), Long = c(90.65016, 90.64968, 90.6497,
90.64972, 90.64996, 90.64987, 90.64989, 90.64924, 90.64921, 90.65,
90.64998, 90.6494, 90.64978, 90.64973, 90.64952, 90.64958, 90.64935,
90.6492, 90.64922, 90.64919, 90.64928, 90.64937, 90.64887, 90.64919,
90.64891, 90.64914, 90.64903, 90.64907, 90.6491, 90.64868, 90.6491,
90.64853, 90.64862, 90.64851, 90.64852, 90.64865, 90.64878, 90.64878,
90.64866, 90.64859, 90.64839, 90.64858, 90.64861, 90.64922, 90.64994,
90.64925), Village = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Abdullapur", class = "factor"),
Depth_m = c(18, 18, 18, 18, 31.5, 13.5, 15, 13.5, 21, 13.5,
18, 15, 24, 13.5, 19.5, 33, 14.4, 18, 21, 13.5, 18, 18, 51,
48, 54, 67.5, 69, 69, 66, 66, 21, 60, 66, 54, 31.5, 21, 66,
12, 54, 27, 18, 18, 18, 18, 18, 21), As_ug_L = c(68L, 68L,
68L, 68L, 306L, 129L, 129L, 20L, 68L, 188L, 129L, 68L, 68L,
68L, 129L, 188L, 2L, 68L, 37L, 20L, 306L, 306L, 20L, 306L,
20L, 2L, 2L, 2L, 2L, 2L, 306L, 2L, 2L, 2L, 306L, 306L, 2L,
306L, 2L, 306L, 306L, 68L, 68L, 306L, 68L, 20L), maxscore = c(10L,
11L, 11L, 11L, 12L, 12L, 16L, 13L, 12L, 12L, 16L, 13L, 8L,
10L, 9L, 10L, 9L, 10L, 10L, 10L, 7L, 7L, 5L, 7L, 6L, 9L,
9L, 9L, 8L, 9L, 9L, 9L, 9L, 8L, 8L, 8L, 9L, 9L, 8L, 8L, 8L,
8L, 6L, 7L, 12L, 3L), count = c(10L, 11L, 11L, 11L, 12L,
12L, 16L, 13L, 12L, 12L, 16L, 13L, 8L, 10L, 9L, 10L, 9L,
9L, 9L, 9L, 6L, 6L, 4L, 6L, 5L, 8L, 8L, 8L, 7L, 8L, 8L, 8L,
8L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 6L, 6L, 12L, 3L),
score = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L), count_itw = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 6L, 7L, 7L, 8L, 8L,
9L, 10L, 10L, 12L, 12L, 12L, 12L, 13L, 11L, 13L, 10L, 10L,
10L, 10L, 12L, 12L, 6L, 6L, 5L, 5L, 2L, 12L, 0L, 0L), count_itw10 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 3L, 4L, 4L, 5L, 5L, 6L, 7L, 7L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 6L, 6L, 5L, 5L, 2L,
9L, 0L, 0L)), class = "data.frame", row.names = c(1L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L, 16L, 17L, 18L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L,
33L, 34L, 35L, 36L, 37L, 38L, 39L, 41L, 42L, 43L, 44L, 46L, 47L,
48L, 49L, 50L, 51L))
如何导出每个村庄文件的名称?
提前谢谢你:)
【问题讨论】:
-
您可以列出目录中的 .csv 文件并使用您为单个村庄制作的代码循环浏览它们
标签: r loops batch-processing