last update:
2024-01-03
options(repr.plot.width=12, repr.plot.height=6)
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(xts))
suppressPackageStartupMessages(library(lubridate))
Sys.setlocale("LC_TIME", "C")
dat=read.table(file="datafiles/rawDatafile_2015_15min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
# apply 0.01 multiplier and convert total counts in 15-min to counts per minute (cpm)
counts=dat$V5*100/15
# dataframe of raw variables
#data.only=data.frame(timestamp=new.date.time, gamma=counts,TdegC=dat$V6) #old version
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 15 min measurements --> 60*15=900 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/900)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/900)))!=1)
ngaps
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=as.POSIXct("2015-05-07 11:30:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2015-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
# plot(x = data.all.times$timestamp,y=data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
Pre-processing: outlier removal
The outlier is a single value (the first measurement after the gap on 2015-12-29 04:30)
data.all.times$gamma[which.min(data.all.times$gamma)]=NA
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
# Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2015_15min.txt",data.all.times,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear worspace
dat=read.table(file="datafiles/rawDatafile_2016_15min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
#summary(new.date.time)
# apply 0.01 multiplier and convert total counts in 15-min to counts per minute (cpm)
counts=dat$V5*100/15
# dataframe of raw variables
#data.only=data.frame(timestamp=new.date.time, gamma=counts,TdegC=dat$V6) #old version
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 15 min measurements --> 60*15=900 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/900)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/900)))!=1)
ngaps
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=as.POSIXct("2016-01-01 00:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), to=as.POSIXct("2016-04-27 08:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y=data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
Pre-processing: outlier removal
The outlier is a single value on 2016-01-13, 12:30 (the first measurement after the gap on 2016-01-12 03:00)
data.all.times$gamma[which.min(data.all.times$gamma)]=NA
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2016_15min_1.txt",data.all.times,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear worspace
dat=read.table(file="datafiles/rawDatafile_2016_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
#data.only=data.frame(timestamp=new.date.time, gamma=counts,TdegC=dat$V6) # old version
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
ngaps
#data.only$timestamp[502]
plot(x = data.only$timestamp,y = data.only$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.only$timestamp,y = data.only$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
## Pre-processed datafile (complete days only - since 2016-05-05)
write.table(file="datafiles/preprocessedDatafile_2016_1min.txt",data.only[502:dim(data.only)[1],],sep=",",quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
xts.gamma.1min=xts(data.only[(502:dim(data.only)[1]),]$gamma, order.by=data.only[(502:dim(data.only)[1]),]$timestamp,tzone="UTC")
xts.temp.1min=xts(data.only[(502:dim(data.only)[1]),]$TdegC, order.by=data.only[(502:dim(data.only)[1]),]$timestamp,tzone="UTC")
times.15min <- seq.POSIXt(from=as.POSIXct("2016-05-05 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2016-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum)
#xts.temp.15min=period.apply(x=xts.temp.1min, INDEX=endpoints(xts.temp.1min, on="mins", k=15), FUN=mean)
#dat.2016.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1),TdegC=head(as.vector(xts.temp.15min),-1))
dat.2016.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.only$timestamp,y = data.only$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2016.15min$timestamp,y=dat.2016.15min$gamma,col=4)
#plot(x = data.only$timestamp,y = data.only$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
#lines(x=dat.2016.15min$timestamp,y=dat.2016.15min$TdegC,col=4)
## Pre-processed datafile (2016, from direct 15min measurements and from 1-min aggregation)
write.table(file="datafiles/preprocessedDatafile_2016_15min_2.txt",dat.2016.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear worspace
dat=read.table(file="datafiles/rawDatafile_2017_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
summary(new.date.time)
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
#data.only=data.frame(timestamp=new.date.time, gamma=counts,TdegC=dat$V6) # old version
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
ngaps
Min. 1st Qu. Median "2017-01-01 00:00:00" "2017-04-02 06:00:15" "2017-07-02 12:00:30" Mean 3rd Qu. Max. "2017-07-02 12:01:07" "2017-10-01 18:02:45" "2017-12-31 23:59:00"
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=as.POSIXct("2017-01-01 00:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2017-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=60)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
Pre-processing: outlier removal
Outliers (set as NA):
The data from 5 July 2017 [10:00 to 17:50] are also set as NA as related to tests and displacement of sensor field work
library(lubridate)
# data from testing period on July 5th set as NA
date1 <- as.POSIXct("2017-07-05 10:00:00",tz="UTC")
date2 <- as.POSIXct("2017-07-05 17:59:00",tz="UTC")
test.dates <- interval(date1, date2)
data.all.times[data.all.times$timestamp %within% test.dates,2]=NA
#data.all.times[data.all.times$timestamp %within% test.dates,3]=NA
out1 <- as.POSIXct("2017-05-23 14:01:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
out2 <- as.POSIXct("2017-05-27 10:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
out3 <- as.POSIXct("2017-07-19 07:48:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
data.all.times[which(data.all.times$timestamp == out1),2]=NA
data.all.times[which(data.all.times$timestamp == out2),2]=NA
data.all.times[which(data.all.times$timestamp == out3),2]=NA
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
## Pre-processed datafile (complete days only - up to last.time )
first.time <- as.POSIXct("2017-01-01 00:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
last.time <- as.POSIXct("2017-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
complete.2017 <- interval(first.time,last.time)
dat1min=data.all.times[data.all.times$timestamp %within% complete.2017,]
write.table(file="datafiles/preprocessedDatafile_2017_1min.txt",dat1min,quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
times.15min <- seq.POSIXt(from=as.POSIXct("2017-01-01 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2017-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
library(xts)
xts.gamma.1min=xts(dat1min$gamma, order.by=dat1min$timestamp,tzone="UTC")
#xts.temp.1min=xts(dat1min$TdegC , order.by=dat1min$timestamp,tzone="UTC")
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum)
#xts.temp.15min=period.apply(x=xts.temp.1min, INDEX=endpoints(xts.temp.1min, on="mins", k=15), FUN=mean)
dat.2017.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2017.15min$timestamp,y=dat.2017.15min$gamma,col=4)
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
#lines(x=dat.2017.15min$timestamp,y=dat.2017.15min$TdegC,col=4)
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2017_15min.txt",dat.2017.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear worspace
dat=read.table(file="datafiles/rawDatafile_2018_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
start.time=new.date.time[1]
end.time=new.date.time[length(new.date.time)]
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
#data.only=data.frame(timestamp=new.date.time, gamma=counts,TdegC=dat$V6) #old version
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
start.time
end.time
ngaps
[1] "2018-01-01 UTC"
[1] "2018-12-31 23:59:00 UTC"
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=start.time, to=end.time, by=60)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
Pre-processing: outlier removal
data.all.times$gamma[which(data.all.times$gamma < 6500)]=NA
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
## Pre-processed datafile (complete days only - up to last.time )
first.time <- start.time
last.time <- as.POSIXct("2018-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
complete.2018 <- interval(first.time,last.time)
dat1min=data.all.times[data.all.times$timestamp %within% complete.2018,]
write.table(file="datafiles/preprocessedDatafile_2018_1min.txt",dat1min,quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
times.15min <- seq.POSIXt(from=as.POSIXct("2018-01-01 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2018-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
library(xts)
xts.gamma.1min=xts(dat1min$gamma, order.by=dat1min$timestamp,tzone="UTC")
#xts.temp.1min=xts(dat1min$TdegC , order.by=dat1min$timestamp,tzone="UTC")
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum)
#xts.temp.15min=period.apply(x=xts.temp.1min, INDEX=endpoints(xts.temp.1min, on="mins", k=15), FUN=mean)
dat.2018.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2018.15min$timestamp,y=dat.2018.15min$gamma,col=4)
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2018_15min.txt",dat.2018.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear workspace
Note: All NAN values in file rawDatafile_2019_1min.txt replaced by NA
dat=read.table(file="datafiles/rawDatafile_2019_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
start.time=new.date.time[1]
end.time=new.date.time[length(new.date.time)]
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
start.time
end.time
ngaps
[1] "2019-01-01 UTC"
[1] "2019-12-31 23:59:00 UTC"
## Outlier value on "2019-10-30 13:54:00" set as NA (likely as a result of a short power outage)
counts[which.min(counts)]=NA
data.only=data.frame(timestamp=new.date.time, gamma=counts)
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=start.time, to=end.time, by=60)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
## Pre-processed datafile (complete days only - up to last.time )
first.time <- start.time
last.time <- as.POSIXct("2019-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
complete.2019 <- interval(first.time,last.time)
dat1min=data.all.times[data.all.times$timestamp %within% complete.2019,]
write.table(file="datafiles/preprocessedDatafile_2019_1min.txt",dat1min,quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
times.15min <- seq.POSIXt(from=as.POSIXct("2019-01-01 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2019-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
xts.gamma.1min=xts(dat1min$gamma, order.by=dat1min$timestamp,tzone="UTC")
#xts.temp.1min=xts(dat1min$TdegC , order.by=dat1min$timestamp,tzone="UTC")
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum)
#xts.temp.15min=period.apply(x=xts.temp.1min, INDEX=endpoints(xts.temp.1min, on="mins", k=15), FUN=mean)
xts.gamma.15min[29049]=NA
xts.gamma.15min[29050]=NA
xts.gamma.15min[29051]=NA
xts.gamma.15min[29052]=NA
xts.gamma.15min[29053]=NA
dat.2019.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2019.15min$timestamp,y=dat.2019.15min$gamma,col=4)
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2019_15min.txt",dat.2019.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear workspace
dat=read.table(file="datafiles/rawDatafile_2020_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE,fill=TRUE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
start.time=new.date.time[1]
end.time=new.date.time[length(new.date.time)]
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
start.time
end.time
ngaps
[1] "2020-01-01 UTC"
[1] "2020-12-31 23:59:00 UTC"
## Outlier values below 6500 after '2020-09-23 13:53:00'" set as NA
# (likely as a result of a short power outage)
counts[which(counts < 6500)]=NA
data.only=data.frame(timestamp=new.date.time, gamma=counts)
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=start.time, to=end.time, by=60)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
## Pre-processed datafile (complete days only - up to last.time )
first.time <- start.time
last.time <- as.POSIXct("2020-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
complete.2020 <- interval(first.time,last.time)
dat1min=data.all.times[data.all.times$timestamp %within% complete.2020,]
write.table(file="datafiles/preprocessedDatafile_2020_1min.txt",dat1min,quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
times.15min <- seq.POSIXt(from=as.POSIXct("2020-01-01 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2020-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
xts.gamma.1min=xts(dat1min$gamma, order.by=dat1min$timestamp,tzone="UTC")
#xts.temp.1min=xts(dat1min$TdegC , order.by=dat1min$timestamp,tzone="UTC")
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum)
#xts.temp.15min=period.apply(x=xts.temp.1min, INDEX=endpoints(xts.temp.1min, on="mins", k=15), FUN=mean)
dat.2020.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2020.15min$timestamp,y=dat.2020.15min$gamma,col=4)
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2020_15min.txt",dat.2020.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear workspace
dat=read.table(file="datafiles/rawDatafile_2021_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE,fill=TRUE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
start.time=new.date.time[1]
end.time=new.date.time[length(new.date.time)]
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
start.time
end.time
ngaps
[1] "2021-01-01 UTC"
[1] "2021-12-31 23:59:00 UTC"
## Outlier values below 6500 after '2020-09-23 13:53:00'" set as NA
# (likely as a result of a short power outage)
#counts[which(counts < 6500)]=NA
#data.only=data.frame(timestamp=new.date.time, gamma=counts,TdegC=dat$V6)
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=start.time, to=end.time, by=60)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
## Pre-processed datafile (complete days only - up to last.time )
first.time <- start.time
last.time <- as.POSIXct("2021-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
complete.2021 <- interval(first.time,last.time)
dat1min=data.all.times[data.all.times$timestamp %within% complete.2021,]
write.table(file="datafiles/preprocessedDatafile_2021_1min.txt",dat1min,quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
times.15min <- seq.POSIXt(from=as.POSIXct("2021-01-01 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2021-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
xts.gamma.1min=xts(dat1min$gamma, order.by=dat1min$timestamp,tzone="UTC")
#xts.temp.1min=xts(dat1min$TdegC , order.by=dat1min$timestamp,tzone="UTC")
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum)
#xts.temp.15min=period.apply(x=xts.temp.1min, INDEX=endpoints(xts.temp.1min, on="mins", k=15), FUN=mean)
dat.2021.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2021.15min$timestamp,y=dat.2021.15min$gamma,col=4,type="b",pch=20)
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2021_15min.txt",dat.2021.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear workspace
dat=read.table(file="datafiles/rawDatafile_2022_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE,fill=TRUE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
start.time=new.date.time[1]
end.time=new.date.time[length(new.date.time)]
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
start.time
end.time
[1] "2022-01-01 UTC"
[1] "2022-12-31 23:59:00 UTC"
Pre-processing: add missing times as NA values
all.times <- seq.POSIXt(from=start.time, to=end.time, by=60)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
gamma.xts=xts(data.all.times$gamma,data.all.times$timestamp)
plot(gamma.xts,main="")
plot(gamma.xts["2022-05-17/"], type="b",pch=20,main="")
#plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
Pre-processing: set suspicious values as NA
Gamma values from 2022-05-17 to 2022-07-18 are suspicious and set as missing (NA).
Feedback from the station technicians: unusual human activity in the container where the sensor is located, sensor was not in its usual up-right position for some time
gamma.xts["2022-05-17 00:00:00/2022-07-18 23:59:00"]=NA
data.all.times=data.frame(timestamp=index(gamma.xts), gamma=as.vector(gamma.xts))
plot(as.zoo(gamma.xts),main="")
## Pre-processed datafile (complete days only - up to last.time )
first.time <- start.time
last.time <- as.POSIXct("2022-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
complete.2022 <- interval(first.time,last.time)
dat1min=data.all.times[data.all.times$timestamp %within% complete.2022,]
write.table(file="datafiles/preprocessedDatafile_2022_1min.txt",dat1min,quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
times.15min <- seq.POSIXt(from=as.POSIXct("2022-01-01 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2022-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
xts.gamma.1min=xts(dat1min$gamma, order.by=dat1min$timestamp,tzone="UTC")
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum,na.rm=TRUE)
dat.2022.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2022.15min$timestamp,y=dat.2022.15min$gamma,col=4,type="b",pch=20)
dat.2022.15min$gamma[which(dat.2022.15min$gamma<6500)]=NA
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2022.15min$timestamp,y=dat.2022.15min$gamma,col=4,type="b",pch=20)
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2022_15min.txt",dat.2022.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear workspace
dat=read.table(file="datafiles/rawDatafile_2023_1min.txt",header=FALSE,sep=",",stringsAsFactors=FALSE,fill=TRUE)
# set date/time as POSIXct
new.date.time=as.POSIXct(dat$V1,format="%Y-%m-%d %H:%M:%S",tz="UTC")
start.time=new.date.time[1]
end.time=new.date.time[length(new.date.time)]
# apply 0.01 multiplier
counts=dat$V5*100
# dataframe of raw variables
data.only=data.frame(timestamp=new.date.time, gamma=counts)
# check for continuity of record [0 --> continuous record]; ngaps=number of gaps
# 1 min measurements --> 60 sec
ngaps=length(which(c(1,round(diff(unclass(new.date.time)/60)))!=1))
gaps=which(c(1,round(diff(unclass(new.date.time)/60)))!=1)
start.time
end.time
ngaps
[1] "2023-01-01 UTC"
[1] "2023-12-31 23:59:00 UTC"
Pre-processing: add missing times as NA values
library(dplyr)
all.times <- seq.POSIXt(from=start.time, to=end.time, by=60)
continuous.time=as.POSIXct(format.POSIXct(all.times),format="%Y-%m-%d %H:%M:%S",tz="UTC")
df <- data.frame(timestamp=continuous.time,stringsAsFactors=FALSE)
data.all.times <- full_join(df,data.only)
Joining with `by = join_by(timestamp)`
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
#plot(x = data.all.times$timestamp,y = data.all.times$TdegC,type="b",pch=20,xlab="time",ylab="T (degC)")
## Pre-processed datafile (complete days only - up to last.time )
first.time <- start.time
last.time <- as.POSIXct("2023-12-31 23:59:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
complete.2023 <- interval(first.time,last.time)
dat1min=data.all.times[data.all.times$timestamp %within% complete.2023,]
write.table(file="datafiles/preprocessedDatafile_2023_1min.txt",dat1min,quote=FALSE,row.names=FALSE)
Pre-processing: aggregation to 15 min
times.15min <- seq.POSIXt(from=as.POSIXct("2023-01-01 00:15:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"),
to=as.POSIXct("2023-12-31 23:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC"), by=900)
xts.gamma.1min=xts(dat1min$gamma, order.by=dat1min$timestamp,tzone="UTC")
#xts.temp.1min=xts(dat1min$TdegC , order.by=dat1min$timestamp,tzone="UTC")
xts.gamma.15min=period.apply(x=xts.gamma.1min/15, INDEX=endpoints(xts.gamma.1min, on="mins", k=15), FUN=sum)
#xts.temp.15min=period.apply(x=xts.temp.1min, INDEX=endpoints(xts.temp.1min, on="mins", k=15), FUN=mean)
dat.2023.15min=data.frame(timestamp=times.15min, gamma=head(as.vector(xts.gamma.15min),-1))
plot(x = data.all.times$timestamp,y = data.all.times$gamma,type="b",pch=20,xlab="time",ylab="counts (cpm)")
lines(x=dat.2023.15min$timestamp,y=dat.2023.15min$gamma,col=4,type="b",pch=20)
## Pre-processed datafile
write.table(file="datafiles/preprocessedDatafile_2023_15min.txt",dat.2023.15min,sep=",",quote=FALSE,row.names=FALSE)
rm(list = ls()) # clear workspace