【发布时间】:2022-01-20 05:01:09
【问题描述】:
对于我正在处理的项目,我们在 Excel 工作表中提供了一些数据,我通过 Excel 将其转换为 CSV。 这些文件包含具有不同类别但 ID 相同的测量值。
例子
readingId; category; result;
1 ; cat 1 ; A
1 ; cat 2 ; B
2 ; cat1 ; C
然后我将 CSV 转换为 JSON 并编写了一个函数来将数据输出到不同的对象中
const fs = require('fs');
const path = require('path');
exports.convertJson = (file) => {
let rawData = fs.readFileSync(file);
let jsonData = JSON.parse(rawData);
let rawOutput = [];
for (output of jsonData) {
rawOutput.push({
locationId: output.Meetlocatienummer,
date: output.Aanmaakdatum_score,
subCategorie: output.Bestekspost,
score: output.Score,
scoreNumber: output.Cijfer,
categories: output.Categorie,
coordinates: output.Coordinaten,
neighbourhoodIndex: output.BUURTCODE,
quality: output.KWALITEIT,
district: output.STADSDEEL,
distrcitIndex: output.STADSDLCD,
street: output.STRAATNAAM,
neighbourhood: output.WIJK,
cluster: output.Cluster,
});
}
return rawOutput;
};
输出如下结果
[
{
locationId: 10215,
date: undefined,
subCategorie: 'Meubilair-afvalbak-vullingsgraad',
score: '',
scoreNumber: 8,
categories: 'Meubilair',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Meubilair-container-bijgeplaatst afval rondom container',
score: 'A+',
scoreNumber: 10,
categories: 'Meubilair',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Riolering-kolk-belemmering inlaat',
score: 'A+',
scoreNumber: 10,
categories: 'Riolering',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-open verharding-elementenverharding-onkruid',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-natuurlijk afval',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-uitwerpselen',
score: 'A+',
scoreNumber: 10,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-zwerfafval grof',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-veegvuil goten',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-onkruid rondom obstakels',
score: 'B',
scoreNumber: 6,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-grof vuil',
score: 'A+',
scoreNumber: 10,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-zwerfafval fijn',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 7466,
date: undefined,
subCategorie: 'Meubilair-afvalbak-vullingsgraad',
score: 'B',
scoreNumber: 6,
categories: 'Meubilair',
coordinates: '52.072647, 4.288656',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Jan Krosstraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
}
]
最后我想将此信息写入 MongoDB,并且我想到了以下方案来减少重复数据的负载
{
locationId: output.Meetlocatienummer,
date: output.Aanmaakdatum_score,
subCategories: [
{
subCategory: output.Bestekspost,
score: output.Score,
scoreNumber: output.Cijfer,
},
],
categories: [{ category: output.Categorie }],
coordinates: output.Coordinaten,
neighbourhoodIndex: output.BUURTCODE,
quality: output.KWALITEIT,
district: output.STADSDEEL,
distrcitIndex: output.STADSDLCD,
street: output.STRAATNAAM,
neighbourhood: output.WIJK,
cluster: output.Cluster,
}
这个项目是学习NodeJS时的一个爱好项目。实际数据是读取我为被污染的垃圾工作的城市街道的数量。在Excel中阅读数千行以查找城市的热点有点无聊,因为仅阅读一些分数和图表有点无聊,所以我认为通过NodeJS将其导入Leaflet会很好。
随着我学习 Node 并可能在未来 React 中,实际的后端将包含更多功能,这就是为什么我尝试自己编写它而不是将数据导入到谷歌地图中,这可以正常工作,但缺乏详细的类别过滤。
我希望我的想法有点清晰,有人可以指出我正确的方向。
编辑 1 我对 lodash 的了解更进一步。
return _(rawOutput)
.groupBy('locationId')
.map((obj) => _.assignWith({}, ...obj, (val1, val2) => val1 || val2))
.value();
我找到了上面的 sn-p,现在每个唯一 locationId 只得到 1 个输出,但现在我坚持使用子类别构建最终输出。
我还使用 csv-parser 直接从 csv 转换为正确的 json 输出,这将是理想的,因为那时我不必手动转换它。
我明天再回复它:-)
【问题讨论】:
-
嗯,MongoDB 会很好地保存与“父”项相关的信息数组,并且它可以对坐标执行地理查找(您必须使用 GeoJSON 模式,但对于上面的点)这并不繁重)。你最初想要做什么?创建将骗子聚集在一起的逻辑?在MongoDB之外?里面?
-
好的,我会调查的!我试图将大量重复数据放入一个对象中。在不同类别的同一坐标上大约有 10 个测量值。我希望将这些结果合并到一个对象中,并将不同的类别合并到主对象内的一个新对象中。 id:1 类别:- 类别:1 - 类别:2 坐标:0、0 而不是 id:1 类别:1 坐标:0、0 id:1 类别:2 坐标:0,0 等
-
categories和subCategories是什么关系?或者说“按locationId分组,只捕获10 个左右的测量值(坐标、街道、集群等)一次并构建一个类别字符串数组和一个包含subCategory、score 和scoreNumber 的subCategory 对象数组就足够了“? -
每个类别都有一些子类别,它们之间没有直接关系。例如,有一个“垃圾”类别,其中包含“小垃圾”、“大垃圾”、“自然垃圾”等子类别。
标签: node.js json mongodb express csv